Funktionierende Version des Decoders
This commit is contained in:
251
P3/main.py
251
P3/main.py
@@ -2,8 +2,8 @@
|
|||||||
import math
|
import math
|
||||||
|
|
||||||
ELEMENTS = []
|
ELEMENTS = []
|
||||||
INTERVALLS = []
|
N = 50 # länge eines Codeworts
|
||||||
N = 8 # länge eines Codeworts
|
EOF = "$"
|
||||||
|
|
||||||
|
|
||||||
class Element:
|
class Element:
|
||||||
@@ -11,7 +11,6 @@ class Element:
|
|||||||
count = 0
|
count = 0
|
||||||
p_x_i = 0
|
p_x_i = 0
|
||||||
I_x_i = 0
|
I_x_i = 0
|
||||||
bin = ""
|
|
||||||
|
|
||||||
def __init__(self, id):
|
def __init__(self, id):
|
||||||
self.id = id
|
self.id = id
|
||||||
@@ -34,12 +33,8 @@ def read_text_file():
|
|||||||
|
|
||||||
|
|
||||||
def Z_statistik(message):
|
def Z_statistik(message):
|
||||||
# Ergebnisse mit https://de.planetcalc.com/2476/ prüfbar
|
|
||||||
# message = read_text_file()
|
|
||||||
|
|
||||||
m = len(message)
|
|
||||||
|
|
||||||
global ELEMENTS
|
global ELEMENTS
|
||||||
|
m = len(message)
|
||||||
|
|
||||||
for char in message:
|
for char in message:
|
||||||
i, entry = find_element(ELEMENTS, char)
|
i, entry = find_element(ELEMENTS, char)
|
||||||
@@ -48,11 +43,6 @@ def Z_statistik(message):
|
|||||||
else:
|
else:
|
||||||
ELEMENTS.append(Element(char))
|
ELEMENTS.append(Element(char))
|
||||||
|
|
||||||
calculate(m, message)
|
|
||||||
|
|
||||||
|
|
||||||
def calculate(m, message):
|
|
||||||
global ELEMENTS
|
|
||||||
h = 0
|
h = 0
|
||||||
for element in ELEMENTS:
|
for element in ELEMENTS:
|
||||||
element.p_x_i = element.count / m
|
element.p_x_i = element.count / m
|
||||||
@@ -61,31 +51,53 @@ def calculate(m, message):
|
|||||||
|
|
||||||
# Ausgabe
|
# Ausgabe
|
||||||
ELEMENTS.sort(key=lambda element: element.count, reverse=True)
|
ELEMENTS.sort(key=lambda element: element.count, reverse=True)
|
||||||
"""for element in elements:
|
|
||||||
print(f"{element.count:3.0f} | {element.p_x_i:10.7f} | {element.I_x_i:10.7f} | »{element.id}«")
|
|
||||||
|
|
||||||
print("Entropie = " + h.__str__() + "\n\n")"""
|
|
||||||
|
|
||||||
|
|
||||||
# --- Aus P1 ---
|
# --- Aus P1 ---
|
||||||
|
|
||||||
def charToBin():
|
def binary_to_decimal(nkst_bits):
|
||||||
global ELEMENTS
|
result = 0.0
|
||||||
for element in ELEMENTS:
|
for i, bit in enumerate(nkst_bits):
|
||||||
element.bin = format(ord(element.id), 'b')
|
if bit == '1':
|
||||||
|
result += 2 ** (-(i + 1))
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
def calcBitCount(block):
|
def decimal_to_binary(decimal_val, n_bits, min_val, max_val):
|
||||||
|
nkst_bits = []
|
||||||
|
val = decimal_val
|
||||||
|
|
||||||
|
for _ in range(n_bits):
|
||||||
|
val *= 2
|
||||||
|
if val >= 1:
|
||||||
|
nkst_bits.append('1')
|
||||||
|
val -= 1
|
||||||
|
else:
|
||||||
|
nkst_bits.append('0')
|
||||||
|
|
||||||
|
converted_val = binary_to_decimal(nkst_bits)
|
||||||
|
|
||||||
|
# !!! Führt bei zu kleinen Intervallen zu Ungenauigkeiten
|
||||||
|
if not (min_val <= converted_val < max_val):
|
||||||
|
if nkst_bits[-1] == '1':
|
||||||
|
nkst_bits[-1] = '0'
|
||||||
|
else:
|
||||||
|
nkst_bits[-1] = '1'
|
||||||
|
|
||||||
|
return "".join(nkst_bits)
|
||||||
|
|
||||||
|
|
||||||
|
def calc_bit_count(block):
|
||||||
p = 1
|
p = 1
|
||||||
for char in block:
|
for char in block:
|
||||||
i, element = find_element(ELEMENTS, char)
|
i, element = find_element(ELEMENTS, char)
|
||||||
p *= element.p_x_i
|
p *= element.p_x_i
|
||||||
|
|
||||||
N = round(-math.log(p), 0)
|
N = math.ceil(-math.log(p, 2))
|
||||||
return N
|
return N
|
||||||
|
|
||||||
|
|
||||||
def calcIntervalls():
|
def calc_intervalls():
|
||||||
global INTERVALLS
|
global INTERVALLS
|
||||||
current_limit = 0
|
current_limit = 0
|
||||||
for element in ELEMENTS:
|
for element in ELEMENTS:
|
||||||
@@ -94,90 +106,163 @@ def calcIntervalls():
|
|||||||
INTERVALLS.append(new_limit)
|
INTERVALLS.append(new_limit)
|
||||||
|
|
||||||
|
|
||||||
def createBlocks(message):
|
def calc_block_size():
|
||||||
blocks = []
|
last_element = ELEMENTS[-2]
|
||||||
|
|
||||||
block = []
|
block = []
|
||||||
|
|
||||||
for char in message:
|
while True:
|
||||||
if len(block) == 0:
|
tmp = block.copy()
|
||||||
block.append(char)
|
tmp.append(last_element.id)
|
||||||
continue
|
if calc_bit_count(tmp) <= N:
|
||||||
if calcBitCount(block) <= N:
|
block.append(last_element.id)
|
||||||
block.append(char)
|
|
||||||
else:
|
else:
|
||||||
blocks.append(block)
|
break
|
||||||
block = [char]
|
|
||||||
|
|
||||||
blocks.append(block)
|
return len(block)
|
||||||
# TODO EOF Symbol setzen und auf Länge des Blocks dabei achten
|
|
||||||
|
|
||||||
|
def create_blocks(message):
|
||||||
|
blocks = []
|
||||||
|
block = []
|
||||||
|
block_size = calc_block_size()
|
||||||
|
|
||||||
|
for i in range(0, len(message), block_size):
|
||||||
|
block = message[i:i + block_size]
|
||||||
|
blocks.append(block)
|
||||||
|
|
||||||
return blocks
|
return blocks
|
||||||
|
|
||||||
|
|
||||||
def createCodeWord(block):
|
def create_code_word(block, verbose=False):
|
||||||
intervalls = []
|
current_low = 0.0
|
||||||
|
current_high = 1.0
|
||||||
|
|
||||||
i, element = find_element(ELEMENTS, block[0])
|
for position, char in enumerate(block):
|
||||||
print(element.id)
|
target_element = find_element(ELEMENTS, char)
|
||||||
|
|
||||||
if i == 0:
|
if target_element is None:
|
||||||
low = 0
|
return None
|
||||||
else:
|
|
||||||
low = INTERVALLS[i - 1]
|
|
||||||
high = INTERVALLS[i]
|
|
||||||
range = high - low
|
|
||||||
|
|
||||||
# Normierung
|
# Erstelle Intervalle für aktuelle Position
|
||||||
for i, element in enumerate(ELEMENTS):
|
char_intervals = []
|
||||||
new_high = low + range * element.p_x_i
|
current_range = current_high - current_low
|
||||||
intervalls.append((low, new_high))
|
low = current_low
|
||||||
low = new_high
|
|
||||||
|
|
||||||
print(intervalls)
|
for element in ELEMENTS:
|
||||||
|
# Intervall für dieses Zeichen: [low, high)
|
||||||
|
high = low + current_range * element.p_x_i
|
||||||
|
char_intervals.append((element.id, (low, high)))
|
||||||
|
low = high
|
||||||
|
|
||||||
|
# Finde das Intervall für das aktuelle Zeichen
|
||||||
|
found_interval = None
|
||||||
|
for element_char, interval in char_intervals:
|
||||||
|
if element_char == char:
|
||||||
|
found_interval = interval
|
||||||
|
break
|
||||||
|
|
||||||
|
if found_interval is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
interval_low, interval_high = found_interval
|
||||||
|
current_low = interval_low
|
||||||
|
current_high = interval_high
|
||||||
|
|
||||||
|
final_value = current_low + (current_high - current_low) / 2
|
||||||
|
binary_representation = decimal_to_binary(final_value, N, current_low, current_high)
|
||||||
|
|
||||||
|
return binary_representation
|
||||||
|
|
||||||
|
|
||||||
for char in block[1:]:
|
def decode_single_block(decimal_value, max_chars=N):
|
||||||
i, element = find_element(ELEMENTS, char)
|
if not ELEMENTS:
|
||||||
print(element.id)
|
return None
|
||||||
|
|
||||||
low, high = intervalls[i]
|
decoded_chars = []
|
||||||
|
current_value = decimal_value
|
||||||
|
|
||||||
range = high - low
|
for position in range(max_chars):
|
||||||
|
char_intervals = []
|
||||||
|
low = 0.0
|
||||||
|
|
||||||
intervalls = []
|
for element in ELEMENTS:
|
||||||
|
high = low + element.p_x_i
|
||||||
|
char_intervals.append((element.id, (low, high)))
|
||||||
|
low = high
|
||||||
|
|
||||||
for i, element in enumerate(ELEMENTS):
|
found_char = None
|
||||||
new_high = low + range * element.p_x_i
|
found_interval = None
|
||||||
intervalls.append((low, new_high))
|
|
||||||
low = new_high
|
|
||||||
|
|
||||||
print(intervalls)
|
for char, interval in char_intervals:
|
||||||
|
interval_low, interval_high = interval
|
||||||
|
if interval_low <= current_value < interval_high:
|
||||||
|
found_char = char
|
||||||
|
found_interval = interval
|
||||||
|
break
|
||||||
|
|
||||||
low, x = intervalls[0]
|
# Spezialfall: Wert liegt genau auf der oberen Grenze des letzten Intervalls
|
||||||
x, high = intervalls[-1]
|
if found_char is None and current_value == 1.0:
|
||||||
print(f"[{low}, {high})")
|
found_char = char_intervals[-1][0]
|
||||||
|
found_interval = char_intervals[-1][1]
|
||||||
|
|
||||||
|
if found_char is None or found_char == EOF:
|
||||||
|
break
|
||||||
|
|
||||||
|
decoded_chars.append(found_char)
|
||||||
|
|
||||||
|
interval_low, interval_high = found_interval
|
||||||
|
interval_range = interval_high - interval_low
|
||||||
|
|
||||||
|
if interval_range == 0:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Normalisierung: Transformiere den Wert vom aktuellen Intervall auf [0, 1)
|
||||||
|
current_value = (current_value - interval_low) / interval_range
|
||||||
|
|
||||||
|
return ''.join(decoded_chars)
|
||||||
|
|
||||||
|
|
||||||
def AC_Encoder():
|
def AC_Decoder(code):
|
||||||
global ELEMENTS
|
decoded = ""
|
||||||
global INTERVALLS
|
l = calc_block_size()
|
||||||
|
|
||||||
|
for i in range(0, len(code), N):
|
||||||
|
if i + N > len(code):
|
||||||
|
block = code[i:]
|
||||||
|
else:
|
||||||
|
block = code[i:i + N]
|
||||||
|
|
||||||
|
decoded += decode_single_block(binary_to_decimal(block), l)
|
||||||
|
|
||||||
|
return decoded
|
||||||
|
|
||||||
|
|
||||||
|
def AC_Encoder(message):
|
||||||
|
Z_statistik(message)
|
||||||
|
|
||||||
|
blocks = create_blocks(message)
|
||||||
|
|
||||||
|
code = ""
|
||||||
|
|
||||||
|
for block in blocks:
|
||||||
|
code += create_code_word(block)
|
||||||
|
|
||||||
|
return code
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
message = read_text_file()
|
message = read_text_file()
|
||||||
|
|
||||||
l = len(message)
|
encoded = AC_Encoder(message)
|
||||||
|
decoded = AC_Decoder(encoded)
|
||||||
|
|
||||||
l_bin = l * 8
|
print(f"Original: {len(message)} Zeichen = {len(message) * 8} Bits")
|
||||||
|
print(f"Komprimiert: {len(encoded)} Bits")
|
||||||
|
print(f"Maximale Blockgröße: {calc_block_size()}")
|
||||||
|
print(f"Anzahl verschiedener Zeichen: {len(ELEMENTS)}")
|
||||||
|
print(f"Zweitseltenste: '{ELEMENTS[-2].id}' kommt {ELEMENTS[-2].count}x vor")
|
||||||
|
|
||||||
Z_statistik(message)
|
print(f"\nIst die Nachricht richtig en- & dekodiert worden? {message[:-1] == decoded}")
|
||||||
# charToBin() # unnötig, da nur das Codewort in binär umgewandelt werden muss
|
|
||||||
calcIntervalls()
|
|
||||||
# INTERVALLS = [0.1, 0.8, 1]
|
|
||||||
|
|
||||||
blocks = createBlocks(message)
|
|
||||||
|
|
||||||
print(INTERVALLS)
|
|
||||||
|
|
||||||
createCodeWord(blocks[0])
|
|
||||||
|
|
||||||
|
|
||||||
AC_Encoder()
|
main()
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
SWISS
|
qwewqwqwewqwewqwewwweeqeqwqwqewqwqwqeqweqwqweqewqwweqeqwwqeeqwqwwqeeqwwwqeeeeqewweqeeewwwwqeeqwwwqeeqewwqwwwqeeqeeeeeqwwwwqwwqeqwqwqwqwwwq$
|
||||||
Reference in New Issue
Block a user