117 lines
3.0 KiB
Python
117 lines
3.0 KiB
Python
import math
|
|
|
|
empty_element = {
|
|
"count" : 1,
|
|
"p(x_i)" : 0,
|
|
"I(x_i)" : 0
|
|
}
|
|
|
|
def read_text_file():
|
|
path = '/Users/safak/Documents/UNI/IT/P1/text.txt'
|
|
|
|
with open(path, 'r', encoding='utf-8') as file:
|
|
content = file.read()
|
|
return content
|
|
|
|
def Z_statistik():
|
|
# Ergebnisse mit https://de.planetcalc.com/2476/ prüfbar
|
|
message = read_text_file()
|
|
|
|
m = len(message)
|
|
|
|
elements = {}
|
|
|
|
for char in message:
|
|
if elements.__contains__(char):
|
|
elements[char]["count"] += 1
|
|
else:
|
|
elements[char] = empty_element.copy()
|
|
|
|
calculate(elements, m, message)
|
|
|
|
def Z2_statistik():
|
|
message = read_text_file()
|
|
|
|
m = len(message) - 1 # Letztes Zeichen kann kein Tupel mit "nichts" bilden
|
|
|
|
elements = {}
|
|
|
|
for i, char in enumerate(message):
|
|
if i != m:
|
|
tuple = char + message[i+1]
|
|
if elements.__contains__(tuple):
|
|
elements[tuple]["count"] += 1
|
|
else:
|
|
elements[tuple] = empty_element.copy()
|
|
|
|
calculate(elements, m, message)
|
|
|
|
def W_statistik():
|
|
message = read_text_file()
|
|
|
|
m = 0
|
|
i = 0
|
|
|
|
elements = {}
|
|
|
|
while i != len(message):
|
|
word = ""
|
|
|
|
if message[i].isalpha():
|
|
# Fange ein Wort an
|
|
word += message[i]
|
|
|
|
# füge Zeichen hinzu bis Wortende erreicht ist
|
|
while message[i+1].isalpha():
|
|
i += 1
|
|
word += message[i]
|
|
|
|
# Erhöhe die Anzahl der Wörter in der Nachricht
|
|
m += 1
|
|
|
|
# Zähle die Häufigkeit der Wörter
|
|
if elements.__contains__(word):
|
|
elements[word]["count"] += 1
|
|
else:
|
|
elements[word] = empty_element.copy()
|
|
|
|
# gehe weiter in der Nachricht
|
|
i += 1
|
|
|
|
calculate(elements, m, message)
|
|
|
|
def calculate(elements, m, message):
|
|
for x_i in elements:
|
|
elements[x_i]["p(x_i)"] = elements[x_i]["count"] / m
|
|
elements[x_i]["I(x_i)"] = -1 * math.log2(elements[x_i]["p(x_i)"])
|
|
|
|
h = 0
|
|
for x_i in elements:
|
|
h += elements[x_i]["p(x_i)"] * elements[x_i]["I(x_i)"]
|
|
|
|
|
|
sorted_by_element = sorted(elements)
|
|
|
|
sorted_by_count = dict(sorted(
|
|
elements.items(),
|
|
key = lambda item: item[1]["count"],
|
|
reverse=True
|
|
))
|
|
|
|
sorted_by_information = dict(sorted(
|
|
elements.items(),
|
|
key = lambda item: item[1]["I(x_i)"],
|
|
reverse=True
|
|
))
|
|
# Ausgabe
|
|
for x_i in sorted_by_element:
|
|
print(f"{elements[x_i]["count"]:3.0f} | {elements[x_i]["p(x_i)"]:10.7f} | {elements[x_i]["I(x_i)"]:10.7f} | »{x_i}«")
|
|
print("Entropie der Nachricht = " + h.__str__() + "\n\n")
|
|
|
|
def main():
|
|
Z_statistik()
|
|
Z2_statistik()
|
|
W_statistik()
|
|
|
|
if __name__ == '__main__':
|
|
main() |