From 3f9a424e92bf49e616378e5dd8c657aa079939aa Mon Sep 17 00:00:00 2001 From: Safak Date: Mon, 15 Dec 2025 15:05:21 +0100 Subject: [PATCH] 6.2.1 Matrix Darstellung auf LaTeX umgestellt --- ...> HazinedarSafak_3108590_BDA_II_P_2.ipynb} | 70 ++++++++++--------- 1 file changed, 38 insertions(+), 32 deletions(-) rename P2/{HazinedarSafak_3108590_BDA II_P_2.ipynb => HazinedarSafak_3108590_BDA_II_P_2.ipynb} (99%) diff --git a/P2/HazinedarSafak_3108590_BDA II_P_2.ipynb b/P2/HazinedarSafak_3108590_BDA_II_P_2.ipynb similarity index 99% rename from P2/HazinedarSafak_3108590_BDA II_P_2.ipynb rename to P2/HazinedarSafak_3108590_BDA_II_P_2.ipynb index bfa935a..2b91bad 100644 --- a/P2/HazinedarSafak_3108590_BDA II_P_2.ipynb +++ b/P2/HazinedarSafak_3108590_BDA_II_P_2.ipynb @@ -27,8 +27,8 @@ "id": "29c253cc-3060-4da3-bc9b-1aa5f5874db1", "metadata": { "ExecuteTime": { - "end_time": "2025-12-15T13:56:24.782957150Z", - "start_time": "2025-12-15T13:56:24.757679243Z" + "end_time": "2025-12-15T14:01:01.846362881Z", + "start_time": "2025-12-15T14:01:01.832960844Z" } }, "source": [ @@ -42,7 +42,7 @@ " r\"./Sherlock_card.txt\"]" ], "outputs": [], - "execution_count": 1 + "execution_count": 8 }, { "cell_type": "markdown", @@ -57,8 +57,8 @@ "id": "dee15bba-e43b-4e4b-b1a2-d798411820cb", "metadata": { "ExecuteTime": { - "end_time": "2025-12-15T13:56:24.879680892Z", - "start_time": "2025-12-15T13:56:24.784572818Z" + "end_time": "2025-12-15T14:01:01.904231100Z", + "start_time": "2025-12-15T14:01:01.872546121Z" } }, "source": [ @@ -66,7 +66,7 @@ "vectorizer = CountVectorizer(input=\"filename\")" ], "outputs": [], - "execution_count": 2 + "execution_count": 9 }, { "cell_type": "markdown", @@ -84,8 +84,8 @@ "id": "f94a5742-9b26-40ff-a093-b5f0f0bce12f", "metadata": { "ExecuteTime": { - "end_time": "2025-12-15T13:56:25.090650734Z", - "start_time": "2025-12-15T13:56:24.882138584Z" + "end_time": "2025-12-15T14:01:02.160184470Z", + "start_time": "2025-12-15T14:01:01.928765726Z" } }, "source": [ @@ -9017,7 +9017,7 @@ ] } ], - "execution_count": 3 + "execution_count": 10 }, { "cell_type": "markdown", @@ -9028,17 +9028,23 @@ "Es werden über alle Dokumente hinweg alle einzigartigen Features, in diesem Fall die unterschiedlichen Wörter gezählt. Dies gibt uns die `bag_of_words`.\n", "\n", "Der Aufbau der Matrix ist wie folgt\n", - "```\n", - " -> axis 1\n", - "V - axis 0 - V\n", - "\n", - "| | Wort1 | Wort2 | Wort3 | ... |\n", - "|------|-------|-------|-------|-----|\n", - "| Dok1 | | | | |\n", - "| Dok2 | | | | |\n", - "| ... | | | | |\n", - "```\n", - "\n", + "$$\n", + "\\begin{array}{cc}\n", + " % Obere Zeile: Leerecke links, Pfeil für Axis 1 rechts\n", + " & \\xrightarrow{\\hspace{1cm} \\text{axis 1 (Wörter)} \\hspace{1cm}} \\\\\n", + " % Untere Zeile: Pfeil für Axis 0 links, Matrix rechts\n", + " \\begin{array}{c} \\text{axis 0} \\\\ \\text{(Docs)} \\\\ \\downarrow \\end{array} &\n", + " \\begin{array}{|c|cccc|}\n", + " \\hline\n", + " & \\text{Wort}_1 & \\text{Wort}_2 & \\text{Wort}_3 & \\dots \\\\\n", + " \\hline\n", + " \\text{Dok}_1 & n_{1,1} & n_{1,2} & n_{1,3} & \\dots \\\\\n", + " \\text{Dok}_2 & n_{2,1} & n_{2,2} & n_{2,3} & \\dots \\\\\n", + " \\vdots & \\vdots & \\vdots & \\vdots & \\ddots \\\\\n", + " \\hline\n", + " \\end{array}\n", + "\\end{array}\n", + "$$\n", "#### Gesamtzahl der (einzigartigen) Wörter\n", "Die Anzahl der Spalten gibt uns die Anzahl der Wörter über alle Texte hinweg, da jede Spalte ein Wort darstellt, d.h. ein Wort wird nicht doppelt gelistet werden.\n", "So erhalten wir eine Anzahl von **8879** Wörtern, die in den Texten vorkommen.\n", @@ -9094,8 +9100,8 @@ "id": "a43e2e80", "metadata": { "ExecuteTime": { - "end_time": "2025-12-15T13:56:25.132343345Z", - "start_time": "2025-12-15T13:56:25.106818886Z" + "end_time": "2025-12-15T14:01:02.198425286Z", + "start_time": "2025-12-15T14:01:02.167549542Z" } }, "source": [ @@ -9123,7 +9129,7 @@ ] } ], - "execution_count": 4 + "execution_count": 11 }, { "cell_type": "markdown", @@ -9156,8 +9162,8 @@ "id": "b0de993a-7aad-4126-938d-86bc4bd26d8e", "metadata": { "ExecuteTime": { - "end_time": "2025-12-15T13:56:26.904835204Z", - "start_time": "2025-12-15T13:56:25.135153350Z" + "end_time": "2025-12-15T14:01:04.058166213Z", + "start_time": "2025-12-15T14:01:02.203172264Z" } }, "source": [ @@ -9223,7 +9229,7 @@ ] } ], - "execution_count": 5 + "execution_count": 12 }, { "metadata": {}, @@ -9317,8 +9323,8 @@ { "metadata": { "ExecuteTime": { - "end_time": "2025-12-15T13:56:26.992285254Z", - "start_time": "2025-12-15T13:56:26.965360172Z" + "end_time": "2025-12-15T14:01:04.103470898Z", + "start_time": "2025-12-15T14:01:04.073400774Z" } }, "cell_type": "code", @@ -9399,7 +9405,7 @@ ] } ], - "execution_count": 6 + "execution_count": 13 }, { "cell_type": "markdown", @@ -9414,8 +9420,8 @@ "id": "1cff3622-5a62-49bb-903f-4f98d9b044fb", "metadata": { "ExecuteTime": { - "end_time": "2025-12-15T13:56:27.029415899Z", - "start_time": "2025-12-15T13:56:26.995416559Z" + "end_time": "2025-12-15T14:01:04.139311264Z", + "start_time": "2025-12-15T14:01:04.107288791Z" } }, "source": [ @@ -9461,7 +9467,7 @@ ] } ], - "execution_count": 7 + "execution_count": 14 }, { "metadata": {},