From ace0d97b822cd71cbf16fe7d7d352d3abe602b1e Mon Sep 17 00:00:00 2001 From: Safak Date: Fri, 27 Jun 2025 00:51:27 +0200 Subject: [PATCH] Funktionierende Version des Decoders --- P3/main.py | 251 +++++++++++++++++++++++++++++++++++----------------- P3/text.txt | 2 +- 2 files changed, 169 insertions(+), 84 deletions(-) diff --git a/P3/main.py b/P3/main.py index c2df937..57d10ae 100644 --- a/P3/main.py +++ b/P3/main.py @@ -2,8 +2,8 @@ import math ELEMENTS = [] -INTERVALLS = [] -N = 8 # länge eines Codeworts +N = 50 # länge eines Codeworts +EOF = "$" class Element: @@ -11,7 +11,6 @@ class Element: count = 0 p_x_i = 0 I_x_i = 0 - bin = "" def __init__(self, id): self.id = id @@ -34,12 +33,8 @@ def read_text_file(): def Z_statistik(message): - # Ergebnisse mit https://de.planetcalc.com/2476/ prüfbar - # message = read_text_file() - - m = len(message) - global ELEMENTS + m = len(message) for char in message: i, entry = find_element(ELEMENTS, char) @@ -48,11 +43,6 @@ def Z_statistik(message): else: ELEMENTS.append(Element(char)) - calculate(m, message) - - -def calculate(m, message): - global ELEMENTS h = 0 for element in ELEMENTS: element.p_x_i = element.count / m @@ -61,31 +51,53 @@ def calculate(m, message): # Ausgabe ELEMENTS.sort(key=lambda element: element.count, reverse=True) - """for element in elements: - print(f"{element.count:3.0f} | {element.p_x_i:10.7f} | {element.I_x_i:10.7f} | »{element.id}«") - - print("Entropie = " + h.__str__() + "\n\n")""" # --- Aus P1 --- -def charToBin(): - global ELEMENTS - for element in ELEMENTS: - element.bin = format(ord(element.id), 'b') +def binary_to_decimal(nkst_bits): + result = 0.0 + for i, bit in enumerate(nkst_bits): + if bit == '1': + result += 2 ** (-(i + 1)) + return result -def calcBitCount(block): +def decimal_to_binary(decimal_val, n_bits, min_val, max_val): + nkst_bits = [] + val = decimal_val + + for _ in range(n_bits): + val *= 2 + if val >= 1: + nkst_bits.append('1') + val -= 1 + else: + nkst_bits.append('0') + + converted_val = binary_to_decimal(nkst_bits) + + # !!! Führt bei zu kleinen Intervallen zu Ungenauigkeiten + if not (min_val <= converted_val < max_val): + if nkst_bits[-1] == '1': + nkst_bits[-1] = '0' + else: + nkst_bits[-1] = '1' + + return "".join(nkst_bits) + + +def calc_bit_count(block): p = 1 for char in block: i, element = find_element(ELEMENTS, char) p *= element.p_x_i - N = round(-math.log(p), 0) + N = math.ceil(-math.log(p, 2)) return N -def calcIntervalls(): +def calc_intervalls(): global INTERVALLS current_limit = 0 for element in ELEMENTS: @@ -94,90 +106,163 @@ def calcIntervalls(): INTERVALLS.append(new_limit) -def createBlocks(message): - blocks = [] - +def calc_block_size(): + last_element = ELEMENTS[-2] block = [] - for char in message: - if len(block) == 0: - block.append(char) - continue - if calcBitCount(block) <= N: - block.append(char) + while True: + tmp = block.copy() + tmp.append(last_element.id) + if calc_bit_count(tmp) <= N: + block.append(last_element.id) else: - blocks.append(block) - block = [char] + break - blocks.append(block) - # TODO EOF Symbol setzen und auf Länge des Blocks dabei achten + return len(block) + + +def create_blocks(message): + blocks = [] + block = [] + block_size = calc_block_size() + + for i in range(0, len(message), block_size): + block = message[i:i + block_size] + blocks.append(block) return blocks -def createCodeWord(block): - intervalls = [] +def create_code_word(block, verbose=False): + current_low = 0.0 + current_high = 1.0 - i, element = find_element(ELEMENTS, block[0]) - print(element.id) + for position, char in enumerate(block): + target_element = find_element(ELEMENTS, char) - if i == 0: - low = 0 - else: - low = INTERVALLS[i - 1] - high = INTERVALLS[i] - range = high - low + if target_element is None: + return None - # Normierung - for i, element in enumerate(ELEMENTS): - new_high = low + range * element.p_x_i - intervalls.append((low, new_high)) - low = new_high + # Erstelle Intervalle für aktuelle Position + char_intervals = [] + current_range = current_high - current_low + low = current_low - print(intervalls) + for element in ELEMENTS: + # Intervall für dieses Zeichen: [low, high) + high = low + current_range * element.p_x_i + char_intervals.append((element.id, (low, high))) + low = high + + # Finde das Intervall für das aktuelle Zeichen + found_interval = None + for element_char, interval in char_intervals: + if element_char == char: + found_interval = interval + break + + if found_interval is None: + return None + + interval_low, interval_high = found_interval + current_low = interval_low + current_high = interval_high + + final_value = current_low + (current_high - current_low) / 2 + binary_representation = decimal_to_binary(final_value, N, current_low, current_high) + + return binary_representation - for char in block[1:]: - i, element = find_element(ELEMENTS, char) - print(element.id) +def decode_single_block(decimal_value, max_chars=N): + if not ELEMENTS: + return None - low, high = intervalls[i] + decoded_chars = [] + current_value = decimal_value - range = high - low + for position in range(max_chars): + char_intervals = [] + low = 0.0 - intervalls = [] + for element in ELEMENTS: + high = low + element.p_x_i + char_intervals.append((element.id, (low, high))) + low = high - for i, element in enumerate(ELEMENTS): - new_high = low + range * element.p_x_i - intervalls.append((low, new_high)) - low = new_high + found_char = None + found_interval = None - print(intervalls) + for char, interval in char_intervals: + interval_low, interval_high = interval + if interval_low <= current_value < interval_high: + found_char = char + found_interval = interval + break - low, x = intervalls[0] - x, high = intervalls[-1] - print(f"[{low}, {high})") + # Spezialfall: Wert liegt genau auf der oberen Grenze des letzten Intervalls + if found_char is None and current_value == 1.0: + found_char = char_intervals[-1][0] + found_interval = char_intervals[-1][1] + + if found_char is None or found_char == EOF: + break + + decoded_chars.append(found_char) + + interval_low, interval_high = found_interval + interval_range = interval_high - interval_low + + if interval_range == 0: + break + + # Normalisierung: Transformiere den Wert vom aktuellen Intervall auf [0, 1) + current_value = (current_value - interval_low) / interval_range + + return ''.join(decoded_chars) -def AC_Encoder(): - global ELEMENTS - global INTERVALLS +def AC_Decoder(code): + decoded = "" + l = calc_block_size() + + for i in range(0, len(code), N): + if i + N > len(code): + block = code[i:] + else: + block = code[i:i + N] + + decoded += decode_single_block(binary_to_decimal(block), l) + + return decoded + + +def AC_Encoder(message): + Z_statistik(message) + + blocks = create_blocks(message) + + code = "" + + for block in blocks: + code += create_code_word(block) + + return code + + +def main(): message = read_text_file() - l = len(message) + encoded = AC_Encoder(message) + decoded = AC_Decoder(encoded) - l_bin = l * 8 + print(f"Original: {len(message)} Zeichen = {len(message) * 8} Bits") + print(f"Komprimiert: {len(encoded)} Bits") + print(f"Maximale Blockgröße: {calc_block_size()}") + print(f"Anzahl verschiedener Zeichen: {len(ELEMENTS)}") + print(f"Zweitseltenste: '{ELEMENTS[-2].id}' kommt {ELEMENTS[-2].count}x vor") - Z_statistik(message) - # charToBin() # unnötig, da nur das Codewort in binär umgewandelt werden muss - calcIntervalls() - # INTERVALLS = [0.1, 0.8, 1] - - blocks = createBlocks(message) - - print(INTERVALLS) - - createCodeWord(blocks[0]) + print(f"\nIst die Nachricht richtig en- & dekodiert worden? {message[:-1] == decoded}") -AC_Encoder() +main() diff --git a/P3/text.txt b/P3/text.txt index 9748b0f..47d696c 100644 --- a/P3/text.txt +++ b/P3/text.txt @@ -1 +1 @@ -SWISS \ No newline at end of file +qwewqwqwewqwewqwewwweeqeqwqwqewqwqwqeqweqwqweqewqwweqeqwwqeeqwqwwqeeqwwwqeeeeqewweqeeewwwwqeeqwwwqeeqewwqwwwqeeqeeeeeqwwwwqwwqeqwqwqwqwwwq$ \ No newline at end of file