add notebook with basic flexiconc example

Change-Id: I09b6cb872d1bd74d173541dec8bf5ac9bd63d468
diff --git a/examples/example_flexiconc.ipynb b/examples/example_flexiconc.ipynb
new file mode 100644
index 0000000..3957b9d
--- /dev/null
+++ b/examples/example_flexiconc.ipynb
@@ -0,0 +1,435 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "R[write to console]: Welcome to KorAP API for DeLiKo@DNB!\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Searching \"[tt/l=Korpus]\" in \"\"\u001b[0m by this KorAP instance.\u001b[0m\u001b[32m\u001b[32m: 771 hits\u001b[0m\u001b[32m, took 0.11682251 s\n",
+      "\u001b[0m\u001b[32mUsing fields API: \u001b[0m\u001b[32mRetrieved page 1/16 in 0.31968938 s\n",
+      "\u001b[0m\u001b[32mUsing fields API: \u001b[0m\u001b[32mRetrieved page 2/16 in 0.384494092 s\n",
+      "\u001b[0m\u001b[32mUsing fields API: \u001b[0m\u001b[32mRetrieved page 3/16 in 0.229332277 s\n",
+      "\u001b[0m\u001b[32mUsing fields API: \u001b[0m\u001b[32mRetrieved page 3/16 in 0.229332277 s\n",
+      "\u001b[0m\u001b[32mUsing fields API: \u001b[0m\u001b[32mRetrieved page 4/16 in 0.229332277 s\n",
+      "\u001b[0m\u001b[32mUsing fields API: \u001b[0m\u001b[32mRetrieved page 5/16 in 0.397982977 s\n",
+      "\u001b[0m\u001b[32mUsing fields API: \u001b[0m\u001b[32mRetrieved page 6/16 in 0.467999036 s\n",
+      "\u001b[0m\u001b[32mUsing fields API: \u001b[0m\u001b[32mRetrieved page 7/16 in 1.06042362 s\n",
+      "\u001b[0m\u001b[32mUsing fields API: \u001b[0m\u001b[32mRetrieved page 8/16 in 0.504681485 s\n",
+      "\u001b[0m\u001b[32mUsing fields API: \u001b[0m\u001b[32mRetrieved page 9/16 in 0.436383899 s\n",
+      "\u001b[0m\u001b[32mUsing fields API: \u001b[0m\u001b[32mRetrieved page 10/16 in 0.621623505 s\n",
+      "\u001b[0m\u001b[32mUsing fields API: \u001b[0m\u001b[32mRetrieved page 11/16 in 0.258548927 s\n",
+      "\u001b[0m\u001b[32mUsing fields API: \u001b[0m\u001b[32mRetrieved page 12/16 in 0.433153669 s\n",
+      "\u001b[0m\u001b[32mUsing fields API: \u001b[0m\u001b[32mRetrieved page 13/16 in 0.432334822 s\n",
+      "\u001b[0m\u001b[32mUsing fields API: \u001b[0m\u001b[32mRetrieved page 14/16 in 0.399952292 s\n",
+      "\u001b[0m\u001b[32mUsing fields API: \u001b[0m\u001b[32mRetrieved page 15/16 in 0.400414105 s\n",
+      "\u001b[0m\u001b[32mUsing fields API: \u001b[0m\u001b[32mRetrieved page 16/16 in 0.731605508 s\n",
+      "\u001b[0m\u001b[32mUsing fields API: \u001b[0m\u001b[32mRetrieved page 17/16 in 0.173677668 s\n",
+      "\u001b[0m"
+     ]
+    }
+   ],
+   "source": [
+    "from KorAPClient import KorAPConnection\n",
+    "from rpy2.robjects import r\n",
+    "\n",
+    "# As base, use the fiction corpus DeLiKo@DNB (see <https://doi.org/10.5281/zenodo.14943116>)\n",
+    "kcon = KorAPConnection(KorAPUrl=\"https://korap.dnb.de/\", verbose=True).auth()\n",
+    "\n",
+    "r['set.seed'](42) # Set the seed for reproducibility, will in future be exported by KorAPClient\n",
+    "q = kcon.corpusQuery(\"[tt/l=Korpus]\", metadataOnly=False)\n",
+    "q = q.fetchNext(maxFetch=1000, randomizePageOrder=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/var/folders/76/__9t5rnd5k94skg1118jhpw00000gn/T/ipykernel_77388/1758551592.py:36: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n",
+      "  matches = tokens[tokens['offset'] == 0].groupby('line_id').apply(\n"
+     ]
+    }
+   ],
+   "source": [
+    "results = q.slots['collectedMatches']\n",
+    "# use index as line_id\n",
+    "results[\"line_id\"] = results.index\n",
+    "\n",
+    "# take all columns except snippet, tokens.left, tokens.match, tokens.right, matchStart, matchEnd\n",
+    "drop_columns = ['snippet', 'tokens.left', 'tokens.match', 'tokens.right', 'matchStart', 'matchEnd']\n",
+    "metadata = results.drop(columns=drop_columns)\n",
+    "\n",
+    "tokens = []\n",
+    "for _, line in results.iterrows():\n",
+    "    left_context, match, right_context = (line[col].split(\"\\t\") for col in ['tokens.left', 'tokens.match', 'tokens.right'])\n",
+    "    id_in_line = -1 # will be obsolete in later version of flexiconc\n",
+    "    \n",
+    "    if left_context != [\"\"]:\n",
+    "        for i, token in enumerate(left_context):\n",
+    "            id_in_line += 1\n",
+    "            offset = 0 - len(left_context) + i\n",
+    "            tokens.append([offset, token, line[\"line_id\"], id_in_line])\n",
+    "    \n",
+    "    for i, token in enumerate(match):\n",
+    "        id_in_line += 1\n",
+    "        offset = 0\n",
+    "        tokens.append([offset, token, line[\"line_id\"], id_in_line])\n",
+    "    \n",
+    "    if right_context != [\"\"]:\n",
+    "        for i, token in enumerate(right_context):\n",
+    "            id_in_line += 1\n",
+    "            offset = i + 1\n",
+    "            tokens.append([offset, token, line[\"line_id\"], id_in_line])\n",
+    "\n",
+    "# create a dataframe from the tokens list\n",
+    "import pandas as pd\n",
+    "tokens = pd.DataFrame(tokens, columns=[\"offset\", \"word\", \"line_id\", \"id_in_line\"])\n",
+    "\n",
+    "# matches df will be obsolete in later version of flexiconc as \"offset\" is included in tokens\n",
+    "# Create the matches DataFrame using the index directly for aggregation\n",
+    "matches = tokens[tokens['offset'] == 0].groupby('line_id').apply(\n",
+    "    lambda group: pd.Series({\n",
+    "        'match_start': group.index.min(),  # Get the minimum index value for match_start\n",
+    "        'match_end': group.index.max()  # Get the maximum index value for match_end\n",
+    "    })\n",
+    ").reset_index()\n",
+    "\n",
+    "# Add 'slot' column to the matches DataFrame and populate it with 0's\n",
+    "matches['slot'] = 0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from flexiconc.concordance import Concordance\n",
+    "\n",
+    "# Create the Concordance object\n",
+    "c = Concordance()\n",
+    "\n",
+    "# Option A: Tokens file includes 'offset'\n",
+    "c.load(\n",
+    "    metadata=metadata,\n",
+    "    tokens=tokens,\n",
+    "    matches=matches\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div style='margin-bottom:10px;'><strong>Query:</strong> </div>\n",
+       "<ul style='list-style-type:none;'>\n",
+       "<li>[1] 🔎 subset (813): </li>\n",
+       "</ul>\n"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from IPython.display import HTML, display\n",
+    "from flexiconc.visualization.html_visualizer import generate_concordance_html, generate_analysis_tree_html\n",
+    "display(HTML(generate_analysis_tree_html(c)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <style>\n",
+       "        table.concordance {\n",
+       "            border-collapse: collapse;\n",
+       "            width: 100%;\n",
+       "            table-layout: auto;\n",
+       "        }\n",
+       "        table.concordance th, table.concordance td {\n",
+       "            border: 1px solid #dddddd;\n",
+       "            padding: 4px;\n",
+       "            vertical-align: top;\n",
+       "            white-space: nowrap;\n",
+       "            overflow: hidden;\n",
+       "            text-overflow: ellipsis;\n",
+       "        }\n",
+       "        table.concordance th {\n",
+       "            background-color: #f2f2f2;\n",
+       "            text-align: center;\n",
+       "        }\n",
+       "        table.concordance th.line-id, table.concordance td.line-id {\n",
+       "            text-align: center;\n",
+       "            white-space: nowrap;\n",
+       "        }\n",
+       "        table.concordance th.metadata, table.concordance td.metadata {\n",
+       "            text-align: center;\n",
+       "            white-space: nowrap;\n",
+       "        }\n",
+       "        table.concordance th.left-context, table.concordance td.left-context {\n",
+       "            text-align: right;\n",
+       "            overflow: hidden;\n",
+       "            white-space: nowrap;\n",
+       "            width: 40%;\n",
+       "            max-width: 0px;\n",
+       "        }\n",
+       "        table.concordance th.node, table.concordance td.node {\n",
+       "            text-align: center;\n",
+       "            font-weight: bold;\n",
+       "            white-space: nowrap;\n",
+       "        }\n",
+       "        table.concordance th.right-context, table.concordance td.right-context {\n",
+       "            text-align: left;\n",
+       "            overflow: hidden;\n",
+       "            white-space: nowrap;\n",
+       "            width: 40%;\n",
+       "            max-width: 0px;\n",
+       "        }\n",
+       "        table.concordance div.left-context {\n",
+       "            float: right;\n",
+       "            white-space: nowrap;\n",
+       "        }\n",
+       "        table.concordance div.right-context {\n",
+       "            float: left;\n",
+       "            white-space: nowrap;\n",
+       "        }\n",
+       "    </style>\n",
+       "    <table class=\"concordance\">\n",
+       "        <colgroup>\n",
+       "            <col>\n",
+       "                <col>\n",
+       "            <col>\n",
+       "            <col>\n",
+       "        </colgroup>\n",
+       "        <tr>\n",
+       "            <th class=\"line-id\">Line ID</th>\n",
+       "                <th class=\"left-context\">Left Context</th>\n",
+       "            <th class=\"node\">Node</th>\n",
+       "            <th class=\"right-context\">Right Context</th>\n",
+       "        </tr>\n",
+       "    \n",
+       "            <tr>\n",
+       "                <td class=\"line-id\">0</td>\n",
+       "                \n",
+       "                <td class=\"left-context\"><div class=\"left-context\">des Attila, der seinen stattlichen</div></td>\n",
+       "                <td class=\"node\">Korpus</td>\n",
+       "                <td class=\"right-context\"><div class=\"right-context\">viel zu eng umspannt hielt</div></td>\n",
+       "            </tr>\n",
+       "            \n",
+       "            <tr>\n",
+       "                <td class=\"line-id\">1</td>\n",
+       "                \n",
+       "                <td class=\"left-context\"><div class=\"left-context\">– einem mannshohen Streichinstrument mit gekrümmtem</div></td>\n",
+       "                <td class=\"node\">Korpus</td>\n",
+       "                <td class=\"right-context\"><div class=\"right-context\">in Form eines Halbmonds,</div></td>\n",
+       "            </tr>\n",
+       "            \n",
+       "            <tr>\n",
+       "                <td class=\"line-id\">2</td>\n",
+       "                \n",
+       "                <td class=\"left-context\"><div class=\"left-context\">an ihren Trommelfellen und brachte den</div></td>\n",
+       "                <td class=\"node\">Korpus</td>\n",
+       "                <td class=\"right-context\"><div class=\"right-context\">des Wagens zum Wackeln.</div></td>\n",
+       "            </tr>\n",
+       "            \n",
+       "            <tr>\n",
+       "                <td class=\"line-id\">3</td>\n",
+       "                \n",
+       "                <td class=\"left-context\"><div class=\"left-context\">an den Schnittstellen zwischen Tür und</div></td>\n",
+       "                <td class=\"node\">Korpus</td>\n",
+       "                <td class=\"right-context\"><div class=\"right-context\">. Genau da, wo</div></td>\n",
+       "            </tr>\n",
+       "            \n",
+       "            <tr>\n",
+       "                <td class=\"line-id\">4</td>\n",
+       "                \n",
+       "                <td class=\"left-context\"><div class=\"left-context\">ein halbes Dutzend Klingen aus dem</div></td>\n",
+       "                <td class=\"node\">Korpus</td>\n",
+       "                <td class=\"right-context\"><div class=\"right-context\">schnellte. Und dann kam</div></td>\n",
+       "            </tr>\n",
+       "            \n",
+       "            <tr>\n",
+       "                <td class=\"line-id\">5</td>\n",
+       "                \n",
+       "                <td class=\"left-context\"><div class=\"left-context\">gebürstete Oberfläche seiner Gliedmaßen verlieh dem</div></td>\n",
+       "                <td class=\"node\">Korpus</td>\n",
+       "                <td class=\"right-context\"><div class=\"right-context\">etwas Edles, während die</div></td>\n",
+       "            </tr>\n",
+       "            \n",
+       "            <tr>\n",
+       "                <td class=\"line-id\">6</td>\n",
+       "                \n",
+       "                <td class=\"left-context\"><div class=\"left-context\">Multitool, ein Reparaturbot mit flachem</div></td>\n",
+       "                <td class=\"node\">Korpus</td>\n",
+       "                <td class=\"right-context\"><div class=\"right-context\">, der seine sechs flexiblen</div></td>\n",
+       "            </tr>\n",
+       "            \n",
+       "            <tr>\n",
+       "                <td class=\"line-id\">7</td>\n",
+       "                \n",
+       "                <td class=\"left-context\"><div class=\"left-context\">gehabt, die Kennung an seinem</div></td>\n",
+       "                <td class=\"node\">Korpus</td>\n",
+       "                <td class=\"right-context\"><div class=\"right-context\">auszumachen, würde aber schnellstmöglich</div></td>\n",
+       "            </tr>\n",
+       "            \n",
+       "            <tr>\n",
+       "                <td class=\"line-id\">8</td>\n",
+       "                \n",
+       "                <td class=\"left-context\"><div class=\"left-context\">Art von Reparaturbots, mit flachem</div></td>\n",
+       "                <td class=\"node\">Korpus</td>\n",
+       "                <td class=\"right-context\"><div class=\"right-context\">und mehreren Gliedmaßen, die</div></td>\n",
+       "            </tr>\n",
+       "            \n",
+       "            <tr>\n",
+       "                <td class=\"line-id\">9</td>\n",
+       "                \n",
+       "                <td class=\"left-context\"><div class=\"left-context\">und mit gesplittertem Visor, der</div></td>\n",
+       "                <td class=\"node\">Korpus</td>\n",
+       "                <td class=\"right-context\"><div class=\"right-context\">eines Roboters, dessen Arme</div></td>\n",
+       "            </tr>\n",
+       "            \n",
+       "            <tr>\n",
+       "                <td class=\"line-id\">10</td>\n",
+       "                \n",
+       "                <td class=\"left-context\"><div class=\"left-context\">ihn ein, dass der metallene</div></td>\n",
+       "                <td class=\"node\">Korpus</td>\n",
+       "                <td class=\"right-context\"><div class=\"right-context\">sich innerhalb weniger Schläge bis</div></td>\n",
+       "            </tr>\n",
+       "            \n",
+       "            <tr>\n",
+       "                <td class=\"line-id\">11</td>\n",
+       "                \n",
+       "                <td class=\"left-context\"><div class=\"left-context\">ab und stieß den übrig gebliebenen</div></td>\n",
+       "                <td class=\"node\">Korpus</td>\n",
+       "                <td class=\"right-context\"><div class=\"right-context\">mit dem Fuß hinüber zu</div></td>\n",
+       "            </tr>\n",
+       "            \n",
+       "            <tr>\n",
+       "                <td class=\"line-id\">12</td>\n",
+       "                \n",
+       "                <td class=\"left-context\"><div class=\"left-context\">Während die meisten dieser Schüsse vom</div></td>\n",
+       "                <td class=\"node\">Korpus</td>\n",
+       "                <td class=\"right-context\"><div class=\"right-context\">des Exoskeletts abprallten, erschienen</div></td>\n",
+       "            </tr>\n",
+       "            \n",
+       "            <tr>\n",
+       "                <td class=\"line-id\">13</td>\n",
+       "                \n",
+       "                <td class=\"left-context\"><div class=\"left-context\">. Mehr nicht. Als der</div></td>\n",
+       "                <td class=\"node\">Korpus</td>\n",
+       "                <td class=\"right-context\"><div class=\"right-context\">des Roboters neben dem ÜberBot</div></td>\n",
+       "            </tr>\n",
+       "            \n",
+       "            <tr>\n",
+       "                <td class=\"line-id\">14</td>\n",
+       "                \n",
+       "                <td class=\"left-context\"><div class=\"left-context\">Multitool, ein Reparaturbot mit flachem</div></td>\n",
+       "                <td class=\"node\">Korpus</td>\n",
+       "                <td class=\"right-context\"><div class=\"right-context\">und sechs flexiblen ausfahrbaren Gliedmaßen</div></td>\n",
+       "            </tr>\n",
+       "            \n",
+       "            <tr>\n",
+       "                <td class=\"line-id\">15</td>\n",
+       "                \n",
+       "                <td class=\"left-context\"><div class=\"left-context\">heißt, wenn du deinen eigenen</div></td>\n",
+       "                <td class=\"node\">Korpus</td>\n",
+       "                <td class=\"right-context\"><div class=\"right-context\">in Sicherheit bringen möchtest.</div></td>\n",
+       "            </tr>\n",
+       "            \n",
+       "            <tr>\n",
+       "                <td class=\"line-id\">16</td>\n",
+       "                \n",
+       "                <td class=\"left-context\"><div class=\"left-context\">der erstmals im Jahre 1706 gesammelte</div></td>\n",
+       "                <td class=\"node\">Korpus</td>\n",
+       "                <td class=\"right-context\"><div class=\"right-context\">von Tang-Lyrik aufweist: Fast</div></td>\n",
+       "            </tr>\n",
+       "            \n",
+       "            <tr>\n",
+       "                <td class=\"line-id\">17</td>\n",
+       "                \n",
+       "                <td class=\"left-context\"><div class=\"left-context\">Gesicht auf einem wie Espenlaub bebenden</div></td>\n",
+       "                <td class=\"node\">Korpus</td>\n",
+       "                <td class=\"right-context\"><div class=\"right-context\">, – ja in der</div></td>\n",
+       "            </tr>\n",
+       "            \n",
+       "            <tr>\n",
+       "                <td class=\"line-id\">18</td>\n",
+       "                \n",
+       "                <td class=\"left-context\"><div class=\"left-context\">dreißig Fatras sind das größte bekannte</div></td>\n",
+       "                <td class=\"node\">Korpus</td>\n",
+       "                <td class=\"right-context\"><div class=\"right-context\">. In einem mittelalterlichen Manuskript</div></td>\n",
+       "            </tr>\n",
+       "            \n",
+       "            <tr>\n",
+       "                <td class=\"line-id\">19</td>\n",
+       "                \n",
+       "                <td class=\"left-context\"><div class=\"left-context\">– also die Hälfte des bekannten</div></td>\n",
+       "                <td class=\"node\">Korpus</td>\n",
+       "                <td class=\"right-context\"><div class=\"right-context\">– wurden aufgenommen. Sie</div></td>\n",
+       "            </tr>\n",
+       "            </table>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "display(HTML(generate_concordance_html(c, c.root, n=20)))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "KorApClient",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}