Funktionierende Version des Decoders

This commit is contained in:
2025-06-27 00:51:27 +02:00
parent 0ffdf5af24
commit ace0d97b82
2 changed files with 169 additions and 84 deletions

View File

@@ -2,8 +2,8 @@
import math import math
ELEMENTS = [] ELEMENTS = []
INTERVALLS = [] N = 50 # länge eines Codeworts
N = 8 # länge eines Codeworts EOF = "$"
class Element: class Element:
@@ -11,7 +11,6 @@ class Element:
count = 0 count = 0
p_x_i = 0 p_x_i = 0
I_x_i = 0 I_x_i = 0
bin = ""
def __init__(self, id): def __init__(self, id):
self.id = id self.id = id
@@ -34,12 +33,8 @@ def read_text_file():
def Z_statistik(message): def Z_statistik(message):
# Ergebnisse mit https://de.planetcalc.com/2476/ prüfbar
# message = read_text_file()
m = len(message)
global ELEMENTS global ELEMENTS
m = len(message)
for char in message: for char in message:
i, entry = find_element(ELEMENTS, char) i, entry = find_element(ELEMENTS, char)
@@ -48,11 +43,6 @@ def Z_statistik(message):
else: else:
ELEMENTS.append(Element(char)) ELEMENTS.append(Element(char))
calculate(m, message)
def calculate(m, message):
global ELEMENTS
h = 0 h = 0
for element in ELEMENTS: for element in ELEMENTS:
element.p_x_i = element.count / m element.p_x_i = element.count / m
@@ -61,31 +51,53 @@ def calculate(m, message):
# Ausgabe # Ausgabe
ELEMENTS.sort(key=lambda element: element.count, reverse=True) ELEMENTS.sort(key=lambda element: element.count, reverse=True)
"""for element in elements:
print(f"{element.count:3.0f} | {element.p_x_i:10.7f} | {element.I_x_i:10.7f} | »{element.id}«")
print("Entropie = " + h.__str__() + "\n\n")"""
# --- Aus P1 --- # --- Aus P1 ---
def charToBin(): def binary_to_decimal(nkst_bits):
global ELEMENTS result = 0.0
for element in ELEMENTS: for i, bit in enumerate(nkst_bits):
element.bin = format(ord(element.id), 'b') if bit == '1':
result += 2 ** (-(i + 1))
return result
def calcBitCount(block): def decimal_to_binary(decimal_val, n_bits, min_val, max_val):
nkst_bits = []
val = decimal_val
for _ in range(n_bits):
val *= 2
if val >= 1:
nkst_bits.append('1')
val -= 1
else:
nkst_bits.append('0')
converted_val = binary_to_decimal(nkst_bits)
# !!! Führt bei zu kleinen Intervallen zu Ungenauigkeiten
if not (min_val <= converted_val < max_val):
if nkst_bits[-1] == '1':
nkst_bits[-1] = '0'
else:
nkst_bits[-1] = '1'
return "".join(nkst_bits)
def calc_bit_count(block):
p = 1 p = 1
for char in block: for char in block:
i, element = find_element(ELEMENTS, char) i, element = find_element(ELEMENTS, char)
p *= element.p_x_i p *= element.p_x_i
N = round(-math.log(p), 0) N = math.ceil(-math.log(p, 2))
return N return N
def calcIntervalls(): def calc_intervalls():
global INTERVALLS global INTERVALLS
current_limit = 0 current_limit = 0
for element in ELEMENTS: for element in ELEMENTS:
@@ -94,90 +106,163 @@ def calcIntervalls():
INTERVALLS.append(new_limit) INTERVALLS.append(new_limit)
def createBlocks(message): def calc_block_size():
blocks = [] last_element = ELEMENTS[-2]
block = [] block = []
for char in message: while True:
if len(block) == 0: tmp = block.copy()
block.append(char) tmp.append(last_element.id)
continue if calc_bit_count(tmp) <= N:
if calcBitCount(block) <= N: block.append(last_element.id)
block.append(char)
else: else:
blocks.append(block) break
block = [char]
blocks.append(block) return len(block)
# TODO EOF Symbol setzen und auf Länge des Blocks dabei achten
def create_blocks(message):
blocks = []
block = []
block_size = calc_block_size()
for i in range(0, len(message), block_size):
block = message[i:i + block_size]
blocks.append(block)
return blocks return blocks
def createCodeWord(block): def create_code_word(block, verbose=False):
intervalls = [] current_low = 0.0
current_high = 1.0
i, element = find_element(ELEMENTS, block[0]) for position, char in enumerate(block):
print(element.id) target_element = find_element(ELEMENTS, char)
if i == 0: if target_element is None:
low = 0 return None
else:
low = INTERVALLS[i - 1]
high = INTERVALLS[i]
range = high - low
# Normierung # Erstelle Intervalle für aktuelle Position
for i, element in enumerate(ELEMENTS): char_intervals = []
new_high = low + range * element.p_x_i current_range = current_high - current_low
intervalls.append((low, new_high)) low = current_low
low = new_high
print(intervalls) for element in ELEMENTS:
# Intervall für dieses Zeichen: [low, high)
high = low + current_range * element.p_x_i
char_intervals.append((element.id, (low, high)))
low = high
# Finde das Intervall für das aktuelle Zeichen
found_interval = None
for element_char, interval in char_intervals:
if element_char == char:
found_interval = interval
break
if found_interval is None:
return None
interval_low, interval_high = found_interval
current_low = interval_low
current_high = interval_high
final_value = current_low + (current_high - current_low) / 2
binary_representation = decimal_to_binary(final_value, N, current_low, current_high)
return binary_representation
for char in block[1:]: def decode_single_block(decimal_value, max_chars=N):
i, element = find_element(ELEMENTS, char) if not ELEMENTS:
print(element.id) return None
low, high = intervalls[i] decoded_chars = []
current_value = decimal_value
range = high - low for position in range(max_chars):
char_intervals = []
low = 0.0
intervalls = [] for element in ELEMENTS:
high = low + element.p_x_i
char_intervals.append((element.id, (low, high)))
low = high
for i, element in enumerate(ELEMENTS): found_char = None
new_high = low + range * element.p_x_i found_interval = None
intervalls.append((low, new_high))
low = new_high
print(intervalls) for char, interval in char_intervals:
interval_low, interval_high = interval
if interval_low <= current_value < interval_high:
found_char = char
found_interval = interval
break
low, x = intervalls[0] # Spezialfall: Wert liegt genau auf der oberen Grenze des letzten Intervalls
x, high = intervalls[-1] if found_char is None and current_value == 1.0:
print(f"[{low}, {high})") found_char = char_intervals[-1][0]
found_interval = char_intervals[-1][1]
if found_char is None or found_char == EOF:
break
decoded_chars.append(found_char)
interval_low, interval_high = found_interval
interval_range = interval_high - interval_low
if interval_range == 0:
break
# Normalisierung: Transformiere den Wert vom aktuellen Intervall auf [0, 1)
current_value = (current_value - interval_low) / interval_range
return ''.join(decoded_chars)
def AC_Encoder(): def AC_Decoder(code):
global ELEMENTS decoded = ""
global INTERVALLS l = calc_block_size()
for i in range(0, len(code), N):
if i + N > len(code):
block = code[i:]
else:
block = code[i:i + N]
decoded += decode_single_block(binary_to_decimal(block), l)
return decoded
def AC_Encoder(message):
Z_statistik(message)
blocks = create_blocks(message)
code = ""
for block in blocks:
code += create_code_word(block)
return code
def main():
message = read_text_file() message = read_text_file()
l = len(message) encoded = AC_Encoder(message)
decoded = AC_Decoder(encoded)
l_bin = l * 8 print(f"Original: {len(message)} Zeichen = {len(message) * 8} Bits")
print(f"Komprimiert: {len(encoded)} Bits")
print(f"Maximale Blockgröße: {calc_block_size()}")
print(f"Anzahl verschiedener Zeichen: {len(ELEMENTS)}")
print(f"Zweitseltenste: '{ELEMENTS[-2].id}' kommt {ELEMENTS[-2].count}x vor")
Z_statistik(message) print(f"\nIst die Nachricht richtig en- & dekodiert worden? {message[:-1] == decoded}")
# charToBin() # unnötig, da nur das Codewort in binär umgewandelt werden muss
calcIntervalls()
# INTERVALLS = [0.1, 0.8, 1]
blocks = createBlocks(message)
print(INTERVALLS)
createCodeWord(blocks[0])
AC_Encoder() main()

View File

@@ -1 +1 @@
SWISS qwewqwqwewqwewqwewwweeqeqwqwqewqwqwqeqweqwqweqewqwweqeqwwqeeqwqwwqeeqwwwqeeeeqewweqeeewwwwqeeqwwwqeeqewwqwwwqeeqeeeeeqwwwwqwwqeqwqwqwqwwwq$