Berechnung von Entropie von Zeichen, Tupeln und Wörtern
This commit is contained in:
117
P1/main.py
Normal file
117
P1/main.py
Normal file
@@ -0,0 +1,117 @@
|
||||
import math
|
||||
from operator import concat
|
||||
|
||||
|
||||
def read_text_file():
|
||||
path = '/Users/safak/Documents/UNI/IT/P1/text.txt'
|
||||
|
||||
with open(path, 'r', encoding='utf-8') as file:
|
||||
content = file.read()
|
||||
return content
|
||||
|
||||
|
||||
def Z_statistik():
|
||||
# Ergebnisse mit https://de.planetcalc.com/2476/ prüfbar
|
||||
message = read_text_file()
|
||||
|
||||
# Anzahl der Zeichen in der Nachricht X
|
||||
m = len(message)
|
||||
|
||||
# Häufigkeiten der Zeichen in der Nachricht X
|
||||
char_count = {} #Dictionary
|
||||
|
||||
for char in message:
|
||||
if char_count.__contains__(char):
|
||||
char_count[char] += 1
|
||||
else:
|
||||
char_count[char] = 1
|
||||
|
||||
calculate(char_count, m, message)
|
||||
|
||||
|
||||
def Z2_statistik():
|
||||
message = read_text_file()
|
||||
|
||||
# Anzahl der Tupel in der Nachricht
|
||||
m = len(message) - 1 # Letztes Zeichen kann kein Tupel mit "nichts" bilden
|
||||
|
||||
# welche Zeichen kommen wie oft vor?
|
||||
char_count = {} #Dictionary
|
||||
|
||||
for i, char in enumerate(message):
|
||||
if i != m:
|
||||
tuple = char + message[i+1]
|
||||
if char_count.__contains__(tuple):
|
||||
char_count[tuple] += 1
|
||||
else:
|
||||
char_count[tuple] = 1
|
||||
|
||||
|
||||
calculate(char_count, m, message)
|
||||
|
||||
def W_statistik():
|
||||
message = read_text_file()
|
||||
|
||||
# Anzahl der Tupel in der Nachricht
|
||||
m = len(message)
|
||||
|
||||
# welche Zeichen kommen wie oft vor?
|
||||
word_count = {} #Dictionary
|
||||
|
||||
enumerate(message)
|
||||
i = 0
|
||||
|
||||
while i != m:
|
||||
word = ""
|
||||
|
||||
if message[i].isalpha():
|
||||
word += message[i]
|
||||
while message[i+1].isalpha():
|
||||
i += 1
|
||||
word += message[i]
|
||||
|
||||
if word_count.__contains__(word):
|
||||
word_count[word] += 1
|
||||
else:
|
||||
word_count[word] = 1
|
||||
|
||||
i += 1
|
||||
|
||||
|
||||
calculate(word_count, m, message)
|
||||
|
||||
|
||||
|
||||
def calculate(collection, m, message):
|
||||
# Wahrscheinlichkeit von x_i
|
||||
p_X = {}
|
||||
for x_i in collection:
|
||||
p_X[x_i] = collection[x_i] / m
|
||||
|
||||
# Informationsgehalt I(x_i) = -log_{2}(p(x_i))
|
||||
i_X = {}
|
||||
for x_i in p_X:
|
||||
i_X[x_i] = -1 * math.log2(p_X[x_i])
|
||||
|
||||
# Entropie H(X) = sum_{i=1}{M}(p(x_i)*I(x_i))
|
||||
h_X = 0
|
||||
for x_i in i_X:
|
||||
h_X += p_X[x_i] * i_X[x_i]
|
||||
|
||||
# Ausgabe
|
||||
print("Nachricht:")
|
||||
print(message)
|
||||
|
||||
for x_i in sorted(i_X):
|
||||
print(f"{x_i:<20}: {collection[x_i]:<5} : {i_X[x_i]:<10}")
|
||||
print("Entropie der Nachricht = " + h_X.__str__())
|
||||
|
||||
|
||||
def main():
|
||||
Z_statistik()
|
||||
Z2_statistik()
|
||||
W_statistik()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user