184 lines
3.6 KiB
Python
184 lines
3.6 KiB
Python
# --- Aus P1 ---
|
|
import math
|
|
|
|
ELEMENTS = []
|
|
INTERVALLS = []
|
|
N = 8 # länge eines Codeworts
|
|
|
|
|
|
class Element:
|
|
id = ""
|
|
count = 0
|
|
p_x_i = 0
|
|
I_x_i = 0
|
|
bin = ""
|
|
|
|
def __init__(self, id):
|
|
self.id = id
|
|
self.count = 1
|
|
|
|
|
|
def find_element(list, target_it):
|
|
for i, element in enumerate(list):
|
|
if element.id == target_it:
|
|
return i, element
|
|
return None, None
|
|
|
|
|
|
def read_text_file():
|
|
path = './text.txt'
|
|
|
|
with open(path, 'r', encoding='utf-8') as file:
|
|
content = file.read()
|
|
return content
|
|
|
|
|
|
def Z_statistik(message):
|
|
# Ergebnisse mit https://de.planetcalc.com/2476/ prüfbar
|
|
# message = read_text_file()
|
|
|
|
m = len(message)
|
|
|
|
global ELEMENTS
|
|
|
|
for char in message:
|
|
i, entry = find_element(ELEMENTS, char)
|
|
if entry is not None:
|
|
entry.count += 1
|
|
else:
|
|
ELEMENTS.append(Element(char))
|
|
|
|
calculate(m, message)
|
|
|
|
|
|
def calculate(m, message):
|
|
global ELEMENTS
|
|
h = 0
|
|
for element in ELEMENTS:
|
|
element.p_x_i = element.count / m
|
|
element.I_x_i = -1 * math.log2(element.p_x_i)
|
|
h += element.p_x_i * element.I_x_i
|
|
|
|
# Ausgabe
|
|
ELEMENTS.sort(key=lambda element: element.count, reverse=True)
|
|
"""for element in elements:
|
|
print(f"{element.count:3.0f} | {element.p_x_i:10.7f} | {element.I_x_i:10.7f} | »{element.id}«")
|
|
|
|
print("Entropie = " + h.__str__() + "\n\n")"""
|
|
|
|
|
|
# --- Aus P1 ---
|
|
|
|
def charToBin():
|
|
global ELEMENTS
|
|
for element in ELEMENTS:
|
|
element.bin = format(ord(element.id), 'b')
|
|
|
|
|
|
def calcBitCount(block):
|
|
p = 1
|
|
for char in block:
|
|
i, element = find_element(ELEMENTS, char)
|
|
p *= element.p_x_i
|
|
|
|
N = round(-math.log(p), 0)
|
|
return N
|
|
|
|
|
|
def calcIntervalls():
|
|
global INTERVALLS
|
|
current_limit = 0
|
|
for element in ELEMENTS:
|
|
new_limit = current_limit + element.p_x_i
|
|
current_limit = new_limit
|
|
INTERVALLS.append(new_limit)
|
|
|
|
|
|
def createBlocks(message):
|
|
blocks = []
|
|
|
|
block = []
|
|
|
|
for char in message:
|
|
if len(block) == 0:
|
|
block.append(char)
|
|
continue
|
|
if calcBitCount(block) <= N:
|
|
block.append(char)
|
|
else:
|
|
blocks.append(block)
|
|
block = [char]
|
|
|
|
blocks.append(block)
|
|
# TODO EOF Symbol setzen und auf Länge des Blocks dabei achten
|
|
|
|
return blocks
|
|
|
|
|
|
def createCodeWord(block):
|
|
intervalls = []
|
|
|
|
i, element = find_element(ELEMENTS, block[0])
|
|
print(element.id)
|
|
|
|
if i == 0:
|
|
low = 0
|
|
else:
|
|
low = INTERVALLS[i - 1]
|
|
high = INTERVALLS[i]
|
|
range = high - low
|
|
|
|
# Normierung
|
|
for i, element in enumerate(ELEMENTS):
|
|
new_high = low + range * element.p_x_i
|
|
intervalls.append((low, new_high))
|
|
low = new_high
|
|
|
|
print(intervalls)
|
|
|
|
|
|
for char in block[1:]:
|
|
i, element = find_element(ELEMENTS, char)
|
|
print(element.id)
|
|
|
|
low, high = intervalls[i]
|
|
|
|
range = high - low
|
|
|
|
intervalls = []
|
|
|
|
for i, element in enumerate(ELEMENTS):
|
|
new_high = low + range * element.p_x_i
|
|
intervalls.append((low, new_high))
|
|
low = new_high
|
|
|
|
print(intervalls)
|
|
|
|
low, x = intervalls[0]
|
|
x, high = intervalls[-1]
|
|
print(f"[{low}, {high})")
|
|
|
|
|
|
def AC_Encoder():
|
|
global ELEMENTS
|
|
global INTERVALLS
|
|
message = read_text_file()
|
|
|
|
l = len(message)
|
|
|
|
l_bin = l * 8
|
|
|
|
Z_statistik(message)
|
|
# charToBin() # unnötig, da nur das Codewort in binär umgewandelt werden muss
|
|
calcIntervalls()
|
|
# INTERVALLS = [0.1, 0.8, 1]
|
|
|
|
blocks = createBlocks(message)
|
|
|
|
print(INTERVALLS)
|
|
|
|
createCodeWord(blocks[0])
|
|
|
|
|
|
AC_Encoder()
|