Reduzierung auf eine Dict in der die Eigenschaften count, p(x_i) und I(x_i) geschrieben werden
This commit is contained in:
82
P1/main.py
82
P1/main.py
@@ -1,6 +1,11 @@
|
|||||||
import math
|
import math
|
||||||
from operator import concat
|
from operator import concat
|
||||||
|
|
||||||
|
empty_element = {
|
||||||
|
"count" : 1,
|
||||||
|
"p(x_i)" : 0,
|
||||||
|
"I(x_i)" : 0
|
||||||
|
}
|
||||||
|
|
||||||
def read_text_file():
|
def read_text_file():
|
||||||
path = '/Users/safak/Documents/UNI/IT/P1/text.txt'
|
path = '/Users/safak/Documents/UNI/IT/P1/text.txt'
|
||||||
@@ -9,58 +14,47 @@ def read_text_file():
|
|||||||
content = file.read()
|
content = file.read()
|
||||||
return content
|
return content
|
||||||
|
|
||||||
|
|
||||||
def Z_statistik():
|
def Z_statistik():
|
||||||
# Ergebnisse mit https://de.planetcalc.com/2476/ prüfbar
|
# Ergebnisse mit https://de.planetcalc.com/2476/ prüfbar
|
||||||
message = read_text_file()
|
message = read_text_file()
|
||||||
|
|
||||||
# Anzahl der Zeichen in der Nachricht X
|
|
||||||
m = len(message)
|
m = len(message)
|
||||||
|
|
||||||
# Häufigkeiten der Zeichen in der Nachricht X
|
elements = {}
|
||||||
char_count = {} #Dictionary
|
|
||||||
|
|
||||||
for char in message:
|
for char in message:
|
||||||
if char_count.__contains__(char):
|
if elements.__contains__(char):
|
||||||
char_count[char] += 1
|
elements[char]["count"] += 1
|
||||||
else:
|
else:
|
||||||
char_count[char] = 1
|
elements[char] = empty_element.copy()
|
||||||
|
|
||||||
calculate(char_count, m, message)
|
|
||||||
|
|
||||||
|
calculate(elements, m, message)
|
||||||
|
|
||||||
def Z2_statistik():
|
def Z2_statistik():
|
||||||
message = read_text_file()
|
message = read_text_file()
|
||||||
|
|
||||||
# Anzahl der Tupel in der Nachricht
|
|
||||||
m = len(message) - 1 # Letztes Zeichen kann kein Tupel mit "nichts" bilden
|
m = len(message) - 1 # Letztes Zeichen kann kein Tupel mit "nichts" bilden
|
||||||
|
|
||||||
# welche Zeichen kommen wie oft vor?
|
elements = {}
|
||||||
char_count = {} #Dictionary
|
|
||||||
|
|
||||||
for i, char in enumerate(message):
|
for i, char in enumerate(message):
|
||||||
if i != m:
|
if i != m:
|
||||||
tuple = char + message[i+1]
|
tuple = char + message[i+1]
|
||||||
if char_count.__contains__(tuple):
|
if elements.__contains__(tuple):
|
||||||
char_count[tuple] += 1
|
elements[tuple]["count"] += 1
|
||||||
else:
|
else:
|
||||||
char_count[tuple] = 1
|
elements[tuple] = empty_element.copy()
|
||||||
|
|
||||||
|
calculate(elements, m, message)
|
||||||
calculate(char_count, m, message)
|
|
||||||
|
|
||||||
def W_statistik():
|
def W_statistik():
|
||||||
message = read_text_file()
|
message = read_text_file()
|
||||||
|
|
||||||
# Anzahl der Wörter in der Nachricht
|
|
||||||
m = 0
|
m = 0
|
||||||
|
|
||||||
# welche Zeichen kommen wie oft vor?
|
|
||||||
word_count = {} #Dictionary
|
|
||||||
|
|
||||||
enumerate(message)
|
|
||||||
i = 0
|
i = 0
|
||||||
|
|
||||||
|
elements = {}
|
||||||
|
|
||||||
while i != len(message):
|
while i != len(message):
|
||||||
word = ""
|
word = ""
|
||||||
|
|
||||||
@@ -77,48 +71,34 @@ def W_statistik():
|
|||||||
m += 1
|
m += 1
|
||||||
|
|
||||||
# Zähle die Häufigkeit der Wörter
|
# Zähle die Häufigkeit der Wörter
|
||||||
if word_count.__contains__(word):
|
if elements.__contains__(word):
|
||||||
word_count[word] += 1
|
elements[word]["count"] += 1
|
||||||
else:
|
else:
|
||||||
word_count[word] = 1
|
elements[word] = empty_element.copy()
|
||||||
|
|
||||||
# gehe weiter in der Nachricht
|
# gehe weiter in der Nachricht
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
calculate(word_count, m, message)
|
calculate(elements, m, message)
|
||||||
|
|
||||||
|
def calculate(elements, m, message):
|
||||||
|
for x_i in elements:
|
||||||
|
elements[x_i]["p(x_i)"] = elements[x_i]["count"] / m
|
||||||
|
elements[x_i]["I(x_i)"] = -1 * math.log2(elements[x_i]["p(x_i)"])
|
||||||
|
|
||||||
|
h = 0
|
||||||
def calculate(collection, m, message):
|
for x_i in elements:
|
||||||
# Wahrscheinlichkeit von x_i
|
h += elements[x_i]["p(x_i)"] * elements[x_i]["I(x_i)"]
|
||||||
p_X = {}
|
|
||||||
for x_i in collection:
|
|
||||||
p_X[x_i] = collection[x_i] / m
|
|
||||||
|
|
||||||
# Informationsgehalt I(x_i) = -log_{2}(p(x_i))
|
|
||||||
i_X = {}
|
|
||||||
for x_i in p_X:
|
|
||||||
i_X[x_i] = -1 * math.log2(p_X[x_i])
|
|
||||||
|
|
||||||
# Entropie H(X) = sum_{i=1}{M}(p(x_i)*I(x_i))
|
|
||||||
h_X = 0
|
|
||||||
for x_i in i_X:
|
|
||||||
h_X += p_X[x_i] * i_X[x_i]
|
|
||||||
|
|
||||||
# Ausgabe
|
# Ausgabe
|
||||||
print("Nachricht:")
|
for x_i in elements:
|
||||||
print(message)
|
print(f"{elements[x_i]["count"]:3.0f} | {elements[x_i]["I(x_i)"]:15.12f} | »{x_i}«")
|
||||||
|
print("Entropie der Nachricht = " + h.__str__())
|
||||||
for x_i in sorted(i_X):
|
|
||||||
print(f"{x_i:<20}: {collection[x_i]:<5} : {i_X[x_i]:<10}")
|
|
||||||
print("Entropie der Nachricht = " + h_X.__str__())
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
Z_statistik()
|
Z_statistik()
|
||||||
Z2_statistik()
|
Z2_statistik()
|
||||||
W_statistik()
|
W_statistik()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
Reference in New Issue
Block a user