From 5a0e37580a7beb79a66223884653ffabaca118f4 Mon Sep 17 00:00:00 2001 From: Safak Date: Wed, 23 Apr 2025 15:47:17 +0200 Subject: [PATCH] Reduzierung auf eine Dict in der die Eigenschaften count, p(x_i) und I(x_i) geschrieben werden --- P1/main.py | 82 +++++++++++++++++++++--------------------------------- 1 file changed, 31 insertions(+), 51 deletions(-) diff --git a/P1/main.py b/P1/main.py index 17b92f7..5ceeff5 100644 --- a/P1/main.py +++ b/P1/main.py @@ -1,6 +1,11 @@ import math from operator import concat +empty_element = { + "count" : 1, + "p(x_i)" : 0, + "I(x_i)" : 0 +} def read_text_file(): path = '/Users/safak/Documents/UNI/IT/P1/text.txt' @@ -9,58 +14,47 @@ def read_text_file(): content = file.read() return content - def Z_statistik(): # Ergebnisse mit https://de.planetcalc.com/2476/ prüfbar message = read_text_file() - # Anzahl der Zeichen in der Nachricht X m = len(message) - # Häufigkeiten der Zeichen in der Nachricht X - char_count = {} #Dictionary + elements = {} for char in message: - if char_count.__contains__(char): - char_count[char] += 1 + if elements.__contains__(char): + elements[char]["count"] += 1 else: - char_count[char] = 1 - - calculate(char_count, m, message) + elements[char] = empty_element.copy() + calculate(elements, m, message) def Z2_statistik(): message = read_text_file() - # Anzahl der Tupel in der Nachricht m = len(message) - 1 # Letztes Zeichen kann kein Tupel mit "nichts" bilden - # welche Zeichen kommen wie oft vor? - char_count = {} #Dictionary + elements = {} for i, char in enumerate(message): if i != m: tuple = char + message[i+1] - if char_count.__contains__(tuple): - char_count[tuple] += 1 + if elements.__contains__(tuple): + elements[tuple]["count"] += 1 else: - char_count[tuple] = 1 + elements[tuple] = empty_element.copy() - - calculate(char_count, m, message) + calculate(elements, m, message) def W_statistik(): message = read_text_file() - # Anzahl der Wörter in der Nachricht m = 0 - - # welche Zeichen kommen wie oft vor? - word_count = {} #Dictionary - - enumerate(message) i = 0 + elements = {} + while i != len(message): word = "" @@ -77,48 +71,34 @@ def W_statistik(): m += 1 # Zähle die Häufigkeit der Wörter - if word_count.__contains__(word): - word_count[word] += 1 + if elements.__contains__(word): + elements[word]["count"] += 1 else: - word_count[word] = 1 + elements[word] = empty_element.copy() # gehe weiter in der Nachricht i += 1 - calculate(word_count, m, message) + calculate(elements, m, message) +def calculate(elements, m, message): + for x_i in elements: + elements[x_i]["p(x_i)"] = elements[x_i]["count"] / m + elements[x_i]["I(x_i)"] = -1 * math.log2(elements[x_i]["p(x_i)"]) - -def calculate(collection, m, message): - # Wahrscheinlichkeit von x_i - p_X = {} - for x_i in collection: - p_X[x_i] = collection[x_i] / m - - # Informationsgehalt I(x_i) = -log_{2}(p(x_i)) - i_X = {} - for x_i in p_X: - i_X[x_i] = -1 * math.log2(p_X[x_i]) - - # Entropie H(X) = sum_{i=1}{M}(p(x_i)*I(x_i)) - h_X = 0 - for x_i in i_X: - h_X += p_X[x_i] * i_X[x_i] + h = 0 + for x_i in elements: + h += elements[x_i]["p(x_i)"] * elements[x_i]["I(x_i)"] # Ausgabe - print("Nachricht:") - print(message) - - for x_i in sorted(i_X): - print(f"{x_i:<20}: {collection[x_i]:<5} : {i_X[x_i]:<10}") - print("Entropie der Nachricht = " + h_X.__str__()) - + for x_i in elements: + print(f"{elements[x_i]["count"]:3.0f} | {elements[x_i]["I(x_i)"]:15.12f} | »{x_i}«") + print("Entropie der Nachricht = " + h.__str__()) def main(): Z_statistik() Z2_statistik() W_statistik() - if __name__ == '__main__': main() \ No newline at end of file