# --- Aus P1 --- import math ELEMENTS = [] N = 50 # länge eines Codeworts EOF = "$" class Element: id = "" count = 0 p_x_i = 0 I_x_i = 0 def __init__(self, id): self.id = id self.count = 1 def find_element(list, target_it): for i, element in enumerate(list): if element.id == target_it: return i, element return None, None def read_text_file(): path = './text.txt' with open(path, 'r', encoding='utf-8') as file: content = file.read() return content def Z_statistik(message): global ELEMENTS m = len(message) for char in message: i, entry = find_element(ELEMENTS, char) if entry is not None: entry.count += 1 else: ELEMENTS.append(Element(char)) h = 0 for element in ELEMENTS: element.p_x_i = element.count / m element.I_x_i = -1 * math.log2(element.p_x_i) h += element.p_x_i * element.I_x_i # Ausgabe ELEMENTS.sort(key=lambda element: element.count, reverse=True) # --- Aus P1 --- def binary_to_decimal(nkst_bits): result = 0.0 for i, bit in enumerate(nkst_bits): if bit == '1': result += 2 ** (-(i + 1)) return result def decimal_to_binary(decimal_val, n_bits, min_val, max_val): nkst_bits = [] val = decimal_val for _ in range(n_bits): val *= 2 if val >= 1: nkst_bits.append('1') val -= 1 else: nkst_bits.append('0') converted_val = binary_to_decimal(nkst_bits) # !!! Führt bei zu kleinen Intervallen zu Ungenauigkeiten if not (min_val <= converted_val < max_val): if nkst_bits[-1] == '1': nkst_bits[-1] = '0' else: nkst_bits[-1] = '1' return "".join(nkst_bits) def calc_bit_count(block): p = 1 for char in block: i, element = find_element(ELEMENTS, char) p *= element.p_x_i N = math.ceil(-math.log(p, 2)) return N def calc_intervalls(): global INTERVALLS current_limit = 0 for element in ELEMENTS: new_limit = current_limit + element.p_x_i current_limit = new_limit INTERVALLS.append(new_limit) def calc_block_size(): last_element = ELEMENTS[-2] block = [] while True: tmp = block.copy() tmp.append(last_element.id) if calc_bit_count(tmp) <= N: block.append(last_element.id) else: break return len(block) def create_blocks(message): blocks = [] block = [] block_size = calc_block_size() for i in range(0, len(message), block_size): block = message[i:i + block_size] blocks.append(block) return blocks def create_code_word(block, verbose=False): current_low = 0.0 current_high = 1.0 for position, char in enumerate(block): target_element = find_element(ELEMENTS, char) if target_element is None: return None # Erstelle Intervalle für aktuelle Position char_intervals = [] current_range = current_high - current_low low = current_low for element in ELEMENTS: # Intervall für dieses Zeichen: [low, high) high = low + current_range * element.p_x_i char_intervals.append((element.id, (low, high))) low = high # Finde das Intervall für das aktuelle Zeichen found_interval = None for element_char, interval in char_intervals: if element_char == char: found_interval = interval break if found_interval is None: return None interval_low, interval_high = found_interval current_low = interval_low current_high = interval_high final_value = current_low + (current_high - current_low) / 2 binary_representation = decimal_to_binary(final_value, N, current_low, current_high) return binary_representation def decode_single_block(decimal_value, max_chars=N): if not ELEMENTS: return None decoded_chars = [] current_value = decimal_value for position in range(max_chars): char_intervals = [] low = 0.0 for element in ELEMENTS: high = low + element.p_x_i char_intervals.append((element.id, (low, high))) low = high found_char = None found_interval = None for char, interval in char_intervals: interval_low, interval_high = interval if interval_low <= current_value < interval_high: found_char = char found_interval = interval break # Spezialfall: Wert liegt genau auf der oberen Grenze des letzten Intervalls if found_char is None and current_value == 1.0: found_char = char_intervals[-1][0] found_interval = char_intervals[-1][1] if found_char is None or found_char == EOF: break decoded_chars.append(found_char) interval_low, interval_high = found_interval interval_range = interval_high - interval_low if interval_range == 0: break # Normalisierung: Transformiere den Wert vom aktuellen Intervall auf [0, 1) current_value = (current_value - interval_low) / interval_range return ''.join(decoded_chars) def AC_Decoder(code): decoded = "" l = calc_block_size() for i in range(0, len(code), N): if i + N > len(code): block = code[i:] else: block = code[i:i + N] decoded += decode_single_block(binary_to_decimal(block), l) return decoded def AC_Encoder(message): Z_statistik(message) blocks = create_blocks(message) code = "" for block in blocks: code += create_code_word(block) return code def main(): message = read_text_file() encoded = AC_Encoder(message) decoded = AC_Decoder(encoded) print(decoded) print(f"Original: {len(message)} Zeichen = {len(message) * 8} Bits") print(f"Komprimiert: {len(encoded)} Bits") print(f"Maximale Blockgröße: {calc_block_size()}") print(f"Anzahl verschiedener Zeichen: {len(ELEMENTS)}") print(f"Zweitseltenste: '{ELEMENTS[-2].id}' kommt {ELEMENTS[-2].count}x vor") print(f"\nIst die Nachricht richtig en- & dekodiert worden? {message[:-1] == decoded}") main()