Berechnung von Entropie von Zeichen, Tupeln und Wörtern

2025-04-22 20:28:57 +02:00
parent a3145f0eab
commit 1544b2d514
2 changed files with 120 additions and 0 deletions
--- a/P1/main.py
+++ b/P1/main.py
@@ -0,0 +1,117 @@
+import math
+from operator import concat
+
+
+def read_text_file():
+    path = '/Users/safak/Documents/UNI/IT/P1/text.txt'
+
+    with open(path, 'r', encoding='utf-8') as file:
+        content = file.read()
+    return content
+
+
+def Z_statistik():
+    # Ergebnisse mit https://de.planetcalc.com/2476/ prüfbar
+    message = read_text_file()
+
+    # Anzahl der Zeichen in der Nachricht X
+    m = len(message)
+
+    # Häufigkeiten der Zeichen in der Nachricht X
+    char_count = {} #Dictionary
+
+    for char in message:
+        if char_count.__contains__(char):
+            char_count[char] += 1
+        else:
+            char_count[char] = 1
+
+    calculate(char_count, m, message)
+
+
+def Z2_statistik():
+    message = read_text_file()
+
+    # Anzahl der Tupel in der Nachricht
+    m = len(message) - 1        # Letztes Zeichen kann kein Tupel mit "nichts" bilden
+
+    # welche Zeichen kommen wie oft vor?
+    char_count = {} #Dictionary
+
+    for i, char in enumerate(message):
+        if i != m:
+            tuple = char + message[i+1]
+            if char_count.__contains__(tuple):
+                char_count[tuple] += 1
+            else:
+                char_count[tuple] = 1
+
+
+    calculate(char_count, m, message)
+
+def W_statistik():
+    message = read_text_file()
+
+    # Anzahl der Tupel in der Nachricht
+    m = len(message)
+
+    # welche Zeichen kommen wie oft vor?
+    word_count = {} #Dictionary
+
+    enumerate(message)
+    i = 0
+
+    while i != m:
+        word = ""
+
+        if message[i].isalpha():
+            word += message[i]
+            while message[i+1].isalpha():
+                i += 1
+                word += message[i]
+
+            if word_count.__contains__(word):
+                word_count[word] += 1
+            else:
+                word_count[word] = 1
+
+        i += 1
+
+
+    calculate(word_count, m, message)
+
+
+
+def calculate(collection, m, message):
+    # Wahrscheinlichkeit von x_i
+    p_X = {}
+    for x_i in collection:
+        p_X[x_i] = collection[x_i] / m
+
+    # Informationsgehalt I(x_i) = -log_{2}(p(x_i))
+    i_X = {}
+    for x_i in p_X:
+        i_X[x_i] = -1 * math.log2(p_X[x_i])
+
+    # Entropie H(X) = sum_{i=1}{M}(p(x_i)*I(x_i))
+    h_X = 0
+    for x_i in i_X:
+        h_X += p_X[x_i] * i_X[x_i]
+
+    # Ausgabe
+    print("Nachricht:")
+    print(message)
+
+    for x_i in sorted(i_X):
+        print(f"{x_i:<20}: {collection[x_i]:<5} : {i_X[x_i]:<10}")
+    print("Entropie der Nachricht = " + h_X.__str__())
+
+
+def main():
+    Z_statistik()
+    Z2_statistik()
+    W_statistik()
+
+
+if __name__ == '__main__':
+    main()