Browse Source

add notebook for cooccorrenze

Federica 2 years ago
parent
commit
f83826b22b

+ 3 - 0
.idea/.gitignore

@@ -0,0 +1,3 @@
+# Default ignored files
+/shelf/
+/workspace.xml

+ 10 - 0
.idea/Ricerche.iml

@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/TIgrO" />
+    </content>
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

+ 6 - 0
.idea/inspectionProfiles/profiles_settings.xml

@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

+ 4 - 0
.idea/misc.xml

@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (Ricerche)" project-jdk-type="Python SDK" />
+</project>

+ 8 - 0
.idea/modules.xml

@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/Ricerche.iml" filepath="$PROJECT_DIR$/.idea/Ricerche.iml" />
+    </modules>
+  </component>
+</project>

+ 6 - 0
.idea/vcs.xml

@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>

BIN
flask_be/.DS_Store


BIN
flask_be/engine/__pycache__/simple_query_test_pandas.cpython-39.pyc


BIN
flask_be/engine/__pycache__/test_occorrenzario_pandas.cpython-39.pyc


+ 11 - 0
flask_be/engine/flask_be.code-workspace

@@ -0,0 +1,11 @@
+{
+	"folders": [
+		{
+			"path": ".."
+		},
+		{
+			"path": "../.."
+		}
+	],
+	"settings": {}
+}

+ 5 - 5
flask_be/engine/simple_query_test_pandas.py

@@ -273,9 +273,9 @@ def ricercalemmiforme (entries, path, espansa, raddoppiata):
         return finalresults"""
 
 #%% ricercaforme(interpreter(entry), "../")
-entry = "proferire*"
-print ("Ricerca di: " + entry)
-df=ricercalemmi(interpreter(entry), "../", 1, 0)
-print (counter(df))
-dtale.show(df)
+#entry = "proferire*"
+#print ("Ricerca di: " + entry)
+#df=ricercalemmi(interpreter(entry), "../", 1, 0)
+#print (counter(df))
+#dtale.show(df)
 # %%

+ 377 - 0
flask_be/engine/test_cooccorrenze_notebook.ipynb

@@ -0,0 +1,377 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sqlite3\n",
+    "import pandas as pd\n",
+    "import dtale\n",
+    "import unicodedata\n",
+    "from simple_query_test_pandas import ricercaforme\n",
+    "from simple_query_test_pandas import ricercalemmi\n",
+    "from simple_query_test_pandas import ricercaformelemmi \n",
+    "from simple_query_test_pandas import ricercalemmiforme\n",
+    "from simple_query_test_pandas import inizialeraddoppiata\n",
+    "from simple_query_test_pandas import interpreter\n",
+    "from test_occorrenzario_pandas import findtexts\n",
+    "from test_occorrenzario_pandas import findcontexts\n",
+    "from test_occorrenzario_pandas import findbib"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def ricerca_cooccorrenze (listaricerche, intervallo, periodo):\n",
+    "    listOcc = [\"occ00001\", \"occ00002\", \"occ00003\"]\n",
+    "    charOffsetConst = int(100)\n",
+    "    listatesti = pd.DataFrame()\n",
+    "    if listaricerche[0][1] == \"L\":\n",
+    "        ricerca = ricercalemmi(interpreter(listaricerche[0][0]), \"../\", listaricerche[0][2], listaricerche[0][3])\n",
+    "        listatesti = findtexts(\"lemmi\", ricerca,listOcc, \"../\")\n",
+    "        print(listatesti)\n",
+    "    elif listaricerche[0][1] == \"F\":\n",
+    "        ricerca = ricercaforme(interpreter(listaricerche[0][0]), \"../\", listaricerche[0][2], listaricerche[0][3])\n",
+    "        listatesti = findtexts(\"forme\", ricerca,listOcc, \"../\")\n",
+    "    for ricerca, tipo, espansa, raddoppiata in listaricerche[1:]:\n",
+    "        if tipo == \"L\":\n",
+    "            search = ricercalemmi(interpreter(ricerca), \"../\", espansa, raddoppiata)\n",
+    "            textlist = findtexts(\"lemmi\", search,listOcc, \"../\")\n",
+    "            df_new = pd.DataFrame(columns=list(listatesti.columns))\n",
+    "            for index1, row1 in listatesti.iterrows():\n",
+    "                for index2, row2 in textlist.iterrows():\n",
+    "                    if row1['ntx'] == row2['ntx'] and (row1['mappa'] - row2['mappa']) in range(-intervallo,intervallo):\n",
+    "                        df_new = df_new.append(row1)\n",
+    "            listatesti = df_new\n",
+    "            print(listatesti)\n",
+    "        elif tipo == \"F\":\n",
+    "            search = ricercaforme(interpreter(ricerca), \"../\", espansa, raddoppiata)\n",
+    "            textlist = findtexts(\"forme\", search,listOcc, \"../\")\n",
+    "            print (textlist)\n",
+    "            df_new = pd.DataFrame(columns=list(listatesti.columns))\n",
+    "            for index1, row1 in listatesti.iterrows():\n",
+    "                for index2, row2 in textlist.iterrows():\n",
+    "                    if row1['ntx'] == row2['ntx'] and (row1['mappa'] - row2['mappa']) in range(-intervallo,intervallo):\n",
+    "                        df_new = df_new.append(row1)\n",
+    "            listatesti = df_new\n",
+    "            print(listatesti)\n",
+    "    contexts = findcontexts(listatesti,charOffsetConst)\n",
+    "    bibliocontexts = findbib(contexts,\"../\")         \n",
+    "    return bibliocontexts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "     cod  ntx   pitxt  elemlen  mappa  numperiod  links  numorg sigla vol  \\\n",
+      "0     70    1     590        5    105          8      0       2   od2       \n",
+      "1     70    1    1403        5    257         17      0       3   od2       \n",
+      "2     70    1    2997        5    522         49      0       5   od2       \n",
+      "3     70    1    4226        5    723         63      0       6   od2       \n",
+      "4     70    1    5893        5   1020         92      0       8   od2       \n",
+      "..   ...  ...     ...      ...    ...        ...    ...     ...   ...  ..   \n",
+      "222   70    5  239919        5  40060       3271      0      36    dk       \n",
+      "223   70    5  242687        5  40518       3310      0      36    dk       \n",
+      "0     70    7    6590        5   1179         58      0       8   i51       \n",
+      "1     70    7    7735        5   1394         69      0      10   i51       \n",
+      "2     70    7   10778        5   1956         91      0      16   i51       \n",
+      "\n",
+      "     pag  riga  col  tipostanza  stanza  verso  lemma   cat_gr disambiguatore  \n",
+      "0     84    15    0           0       0      0                                 \n",
+      "1     86     1    0           0       0      0                                 \n",
+      "2     87    11    0           0       0      0                                 \n",
+      "3     88    13    0           0       0      0                                 \n",
+      "4     90     7    0           0       0      0                                 \n",
+      "..   ...   ...  ...         ...     ...    ...    ...      ...            ...  \n",
+      "222  291     4    0           8      23   1312                                 \n",
+      "223  296    11    0           8      38   1371                                 \n",
+      "0    641     7    0           1      58      7  quale  interr.                 \n",
+      "1    643     9    0           1      58      9                                 \n",
+      "2    650     4    0           1      58      4  quale     rel.                 \n",
+      "\n",
+      "[341 rows x 19 columns]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_12436/3689161262.py:31: FutureWarning:\n",
+      "\n",
+      "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "\n",
+      "/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_12436/3689161262.py:31: FutureWarning:\n",
+      "\n",
+      "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "\n",
+      "/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_12436/3689161262.py:31: FutureWarning:\n",
+      "\n",
+      "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "\n",
+      "/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_12436/3689161262.py:31: FutureWarning:\n",
+      "\n",
+      "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "\n",
+      "/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_12436/3689161262.py:31: FutureWarning:\n",
+      "\n",
+      "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "\n",
+      "/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_12436/3689161262.py:31: FutureWarning:\n",
+      "\n",
+      "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "\n",
+      "/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_12436/3689161262.py:31: FutureWarning:\n",
+      "\n",
+      "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "\n",
+      "/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_12436/3689161262.py:31: FutureWarning:\n",
+      "\n",
+      "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "\n",
+      "/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_12436/3689161262.py:31: FutureWarning:\n",
+      "\n",
+      "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "\n",
+      "/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_12436/3689161262.py:31: FutureWarning:\n",
+      "\n",
+      "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "\n",
+      "/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_12436/3689161262.py:31: FutureWarning:\n",
+      "\n",
+      "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "\n",
+      "/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_12436/3689161262.py:31: FutureWarning:\n",
+      "\n",
+      "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "\n",
+      "/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_12436/3689161262.py:31: FutureWarning:\n",
+      "\n",
+      "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "\n",
+      "/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_12436/3689161262.py:31: FutureWarning:\n",
+      "\n",
+      "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "\n",
+      "/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_12436/3689161262.py:31: FutureWarning:\n",
+      "\n",
+      "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "\n",
+      "/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_12436/3689161262.py:31: FutureWarning:\n",
+      "\n",
+      "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "\n",
+      "/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_12436/3689161262.py:31: FutureWarning:\n",
+      "\n",
+      "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "\n",
+      "/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_12436/3689161262.py:31: FutureWarning:\n",
+      "\n",
+      "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "\n",
+      "/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_12436/3689161262.py:31: FutureWarning:\n",
+      "\n",
+      "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "\n",
+      "/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_12436/3689161262.py:31: FutureWarning:\n",
+      "\n",
+      "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "\n",
+      "/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_12436/3689161262.py:31: FutureWarning:\n",
+      "\n",
+      "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "\n",
+      "/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_12436/3689161262.py:31: FutureWarning:\n",
+      "\n",
+      "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "\n",
+      "/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_12436/3689161262.py:31: FutureWarning:\n",
+      "\n",
+      "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "\n",
+      "/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_12436/3689161262.py:31: FutureWarning:\n",
+      "\n",
+      "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "\n",
+      "/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_12436/3689161262.py:31: FutureWarning:\n",
+      "\n",
+      "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "\n",
+      "/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_12436/3689161262.py:31: FutureWarning:\n",
+      "\n",
+      "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "    cod ntx   pitxt elemlen  mappa numperiod links numorg sigla vol  pag riga  \\\n",
+      "15   98   1    2942       2    514        48     0      5   od2       87   10   \n",
+      "16   98   1    2994       2    521        49     0      5   od2       87   11   \n",
+      "17   98   1    3014       2    525        49     0      5   od2       87   11   \n",
+      "25   98   1    4223       2    722        63     0      6   od2       88   13   \n",
+      "52   98   1   13689       2   2336       198     0     11   od2      108   13   \n",
+      "53   98   1   13703       2   2338       198     0     11   od2      108   13   \n",
+      "126  98   1   33715       2   5780       493     0     22   od2      142   17   \n",
+      "135  98   1   37593       2   6448       551     0     27   od2      147    7   \n",
+      "1    98   3     479       2     86         7     2      2   aa1   1    3   12   \n",
+      "5    98   3    3182       2    585        42     2      2   aa1   1    6    6   \n",
+      "13   98   3   12468       2   2255       151     2      2   aa1   1   14   17   \n",
+      "25   98   3   20089       2   3632       252     2      2   aa1   1   21   13   \n",
+      "52   98   3   35406       2   6429       439     2      2   aa1   1   35    8   \n",
+      "53   98   3   36535       2   6635       450     2      2   aa1   1   36   11   \n",
+      "62   98   3   43231       2   7830       539     2      2   aa1   1   42    4   \n",
+      "105  98   3   85721       2  15570      1109     2      5   aa1   1   86   11   \n",
+      "116  98   3   95933       2  17392      1262     2      5   aa1   1   95   17   \n",
+      "131  98   3  116836       2  21138      1557     2      8   aa1   1  121    1   \n",
+      "135  98   3  124086       2  22412      1671     2      8   aa1   1  127    9   \n",
+      "141  98   3  137581       2  24859      1857     2      8   aa1   1  138   26   \n",
+      "157  98   3  153460       2  27660      2064     2     11   aa1   1  159    2   \n",
+      "163  98   3  158168       2  28494      2127     2     11   aa1   4    3    4   \n",
+      "163  98   3  158168       2  28494      2127     2     11   aa1   4    3    4   \n",
+      "170  98   3  168589       2  30354      2270     2     11   aa1   1  167   22   \n",
+      "173  98   3  176397       2  31730      2373     2     11   aa1   1  174   21   \n",
+      "190  98   3  199282       2  35815      2664     2     14   aa1   1  200    6   \n",
+      "\n",
+      "    col tipostanza stanza verso lemma cat_gr disambiguatore  \n",
+      "15    0          0      0     0                              \n",
+      "16    0          0      0     0                              \n",
+      "17    0          0      0     0                              \n",
+      "25    0          0      0     0                              \n",
+      "52    0          0      0     0                              \n",
+      "53    0          0      0     0                              \n",
+      "126   0          1      0     0                              \n",
+      "135   0          1      0     0                              \n",
+      "1     0          0      0     0                              \n",
+      "5     0          0      0     0                              \n",
+      "13    0          0      0     0                              \n",
+      "25    0          0      0     0                              \n",
+      "52    0          0      0     0                              \n",
+      "53    0          0      0     0                              \n",
+      "62    0          0      0     0                              \n",
+      "105   0          0      0     0                              \n",
+      "116   0          0      0     0                              \n",
+      "131   0          0      0     0                              \n",
+      "135   0          0      0     0                              \n",
+      "141   0          0      0     0                              \n",
+      "157   0          0      0     0                              \n",
+      "163   0          0      0     0                              \n",
+      "163   0          0      0     0                              \n",
+      "170   0          0      0     0                              \n",
+      "173   0          0      0     0                              \n",
+      "190   0          0      0     0                              \n",
+      "    cod ntx   pitxt elemlen  mappa numperiod links numorg sigla vol  pag riga  \\\n",
+      "0  4096   2   56578       6   9142       757     2     11   lb1       81   20   \n",
+      "1  4096   2  106523       6  17381      1370     2     19   lb1      121    1   \n",
+      "2  4096   2  177663       6  29048      2299     2     29   lb1      174   18   \n",
+      "3  4096   2  183976       6  30099      2384     2     31   lb1      180    5   \n",
+      "4  4096   2  246755       6  40389      3201     2     45   lb1      236   17   \n",
+      "0  4096   3   20098       6   3634       252     2      2   aa1   1   21   13   \n",
+      "1  4096   3   87442       6  15882      1134     2      5   aa1   1   87   24   \n",
+      "2  4096   3   87469       6  15886      1135     2      5   aa1   1   87   24   \n",
+      "3  4096   3  106376       6  19265      1407     2      8   aa1   1  111   15   \n",
+      "4  4096   3  123432       6  22289      1657     2      8   aa1   1  126   20   \n",
+      "\n",
+      "  col tipostanza stanza verso     lemma cat_gr disambiguatore  \n",
+      "0   0          0      0     0                                  \n",
+      "1   0          0      0     0                                  \n",
+      "2   0          0      0     0                                  \n",
+      "3   0          0      0     0                                  \n",
+      "4   0          0      0     0                                  \n",
+      "0   0          0      0     0                                  \n",
+      "1   0          0      0     0                                  \n",
+      "2   0          0      0     0                                  \n",
+      "3   0          0      0     0                                  \n",
+      "4   0          0      0     0  crescere     v.                 \n",
+      "   cod ntx  pitxt elemlen mappa numperiod links numorg sigla vol pag riga col  \\\n",
+      "25  98   3  20089       2  3632       252     2      2   aa1   1  21   13   0   \n",
+      "\n",
+      "   tipostanza stanza verso lemma cat_gr disambiguatore  \n",
+      "25          0      0     0                              \n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_12436/3689161262.py:31: FutureWarning:\n",
+      "\n",
+      "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "        <iframe\n",
+       "            width=\"100%\"\n",
+       "            height=\"475\"\n",
+       "            src=\"http://MBP-di-Federica.wind3.hub:40001/dtale/iframe/2\"\n",
+       "            frameborder=\"0\"\n",
+       "            allowfullscreen\n",
+       "        ></iframe>\n",
+       "        "
+      ],
+      "text/plain": [
+       "<IPython.lib.display.IFrame at 0x7f7a23ef1910>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ricerca = [[\"il\", \"F\", 1, 0], [\"quale\", \"F\", 0, 0],[\"cresce\", \"F\", 0, 0]]\n",
+    "cooccorrenze = ricerca_cooccorrenze(ricerca, 10, 0)\n",
+    "dtale.show(cooccorrenze)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.0"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

+ 366 - 246
flask_be/engine/test_occorrenziario_notebook.ipynb

@@ -2,285 +2,236 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 67,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
     "import sqlite3\n",
     "import pandas as pd\n",
-    "import dtale"
+    "import dtale\n",
+    "import unicodedata\n",
+    "from simple_query_test_pandas import ricercaforme\n",
+    "from simple_query_test_pandas import ricercalemmi\n",
+    "from simple_query_test_pandas import ricercaformelemmi \n",
+    "from simple_query_test_pandas import ricercalemmiforme\n",
+    "from simple_query_test_pandas import inizialeraddoppiata\n",
+    "from simple_query_test_pandas import interpreter"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 68,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
-    "def combinations(s):\n",
-    "  result = []\n",
-    "  start = s.find(\"<\")\n",
-    "  end = s.find(\">\")\n",
-    "  if start == -1 or end == -1:\n",
-    "    return [s]\n",
-    "  items = s[start + 1:end].split(\",\")\n",
-    "  for item in items:\n",
-    "    result.extend([s[:start] + item + rest for rest in combinations(s[end + 1:])])\n",
-    "  return result"
+    "#%% Step 1: trovare, nelle tabelle degli occorrenzari, i riferimenti al testo (versione 'itxt') delle\n",
+    "# forme recuperate al punto 1, recuperare le sigle dei documenti e le loro associazioni agli ntx\n",
+    "def findtexts (type, df, listOcc, path):\n",
+    "    textlist = pd.DataFrame()\n",
+    "    codlist= list(df[\"cod\"])\n",
+    "    strlist= [str(x) for x in codlist]\n",
+    "    form_data=\" OR tab.cod= \".join(strlist)\n",
+    "    lem_data=\" OR tab.indlem= \".join(strlist)\n",
+    "    con = sqlite3.connect(\"file:\" + path + \"/db/test1.db\" + \"?mode=ro\", uri=True)\n",
+    "    for table in listOcc:\n",
+    "        if type == \"forme\":\n",
+    "            Query = \"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM \" + table + \" AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.cod=\" + form_data\n",
+    "        elif type == \"lemmi\":\n",
+    "            Query = \"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM \" + table + \" AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.indlem=\" + lem_data\n",
+    "        extendequeryReponse = pd.read_sql(Query, con)\n",
+    "        textlist = pd.concat([textlist, extendequeryReponse])\n",
+    "    return textlist"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
-    "#%% funzione interprete\n",
-    "def interpreter (data):\n",
-    "    clean_data= \"'\"+data.replace(\"*\", \"%\").replace(\"?\", \"_\").replace(\" \",\"\").replace(\"'\", \"''\").replace(\"’\", \"''\") +\"'\"\n",
-    "    return combinations(clean_data)"
+    "# %% Step 2: mettere insieme le informazioni recuperare i contesti, versione 'itxt'\n",
+    "def findcontexts (textlist, charOffsetConst):\n",
+    "    contexts = []\n",
+    "    for ind, row in textlist.iterrows():\n",
+    "        pitxtLocal = row[\"pitxt\"]\n",
+    "        sigla = row[\"sigla\"]\n",
+    "        with open(\"../db/itxt/\" + sigla, 'r', encoding=\"utf-32-le\") as file1:\n",
+    "            file1.seek( max( 4*(pitxtLocal-charOffsetConst), 0), 0 )\n",
+    "            cont = file1.read(row[\"elemlen\"]+2*charOffsetConst)\n",
+    "            contexts.append(cont)\n",
+    "    textlist['contesto'] = contexts\n",
+    "            #textlist.loc[ind,'contesto'] = cont\n",
+    "    return (textlist)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# %% funzione iniziale raddoppiata\n",
-    "def inizialeraddoppiata (data):\n",
-    "    doubleddata=[]\n",
-    "    for el in data:\n",
-    "        if el[1] != \"%\" and \"_\":\n",
-    "            doubleddata = doubleddata + [\"'\"+ el[1] + el[1:]]\n",
-    "    return doubleddata"
+    "def findbib (contexts, path):\n",
+    "    infobib = pd.DataFrame()\n",
+    "    rif_org = pd.DataFrame()\n",
+    "    for ind, row in contexts.iterrows():\n",
+    "        con = sqlite3.connect(\"file:\" + path + \"db/bibliografia/BiblioTLIO.db\" + \"?mode=ro\", uri=True)\n",
+    "        Query = \"SELECT [Anno iniziale], [Titolo Abbreviato], IQ FROM datibib WHERE Sigla= '\" + row[\"sigla\"] +\"'\"\n",
+    "        bib = pd.read_sql(Query, con)\n",
+    "        infobib = pd.concat([infobib, bib])\n",
+    "        #contexts.loc[ind, 'Titolo Abbreviato'] = bib.iloc[0, 1]\n",
+    "        #contexts.loc[ind, 'Anno iniziale'] = bib.iloc[0, 0]\n",
+    "        con2 = sqlite3.connect(\"file:\" + path + \"/db/test1.db\" + \"?mode=ro\", uri=True)\n",
+    "        Query2 = \"SELECT head AS Rif_organico, full AS Rif_completo FROM org WHERE (indice= '\" + str(row[\"numorg\"]) + \"'\" + \") AND (ntx= + '\" + str(row[\"ntx\"]) + \"'\" + \")\"\n",
+    "        rif = pd.read_sql(Query2, con2)\n",
+    "        rif_org = pd.concat([rif_org, rif])\n",
+    "        #contexts.loc[ind, 'Rif_organico'] = rif.iloc[0, 0]\n",
+    "        #contexts.loc[ind, 'Rif_completo'] = rif.iloc[0, 1]\n",
+    "    anno = list(infobib['Anno iniziale'])\n",
+    "    titolo = list(infobib['Titolo Abbreviato'])\n",
+    "    iq = list(infobib['IQ'])\n",
+    "    rif1 = list(rif_org['Rif_organico'])\n",
+    "    rif2 = list(rif_org['Rif_completo'])\n",
+    "    contexts['Anno iniziale'] = anno\n",
+    "    contexts['Titolo Abbreviato'] = titolo\n",
+    "    contexts ['IQ'] = iq\n",
+    "    contexts['Rif_organico'] = rif1\n",
+    "    contexts['Rig_completo'] = rif2\n",
+    "    contexts.pag = contexts.pag.astype(int)\n",
+    "    chrono = contexts.sort_values(by=['Anno iniziale', 'Rif_organico', 'pag'])   \n",
+    "    cols = ['links','Titolo Abbreviato', 'Rif_organico', 'tipostanza', 'stanza', 'verso', 'pag', 'riga', 'IQ', 'lemma', 'cat_gr', 'disambiguatore', 'contesto']\n",
+    "    clean_df = chrono[cols].reset_index()\n",
+    "    return (clean_df)"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "# %% funzione counter\n",
-    "def counter (results):\n",
-    "    trovati= len(results.index)\n",
-    "    occorrenze= results['occ'].sum()\n",
-    "    return (\"Trovati=\" + str(trovati) + \" Occorrenze=\" + str(occorrenze))"
+    "<h1>Per un nuovo sistema di interrogazione dei dati del Corpus TLIO</h1>"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
+   "attachments": {},
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "#%% Funzione ricerca per forme\n",
-    "def ricercaforme (entries, path, espansa, raddoppiata):\n",
-    "\n",
-    "    if espansa == 0:\n",
-    "\n",
-    "        data=\" OR spec LIKE \".join(entries)\n",
-    "        doubleddata=\" OR spec LIKE \".join(inizialeraddoppiata(entries))\n",
-    "\n",
-    "        if raddoppiata == 1: \n",
-    "            theSimpleQuery = \"SELECT spec AS forma, nocc AS occ FROM form WHERE spec LIKE \" + data + \" OR spec LIKE \" + doubleddata + \"ORDER BY idfor\"\n",
-    "        else:\n",
-    "            theSimpleQuery = \"SELECT spec AS forma, nocc AS occ FROM form WHERE spec LIKE \" + data + \" ORDER BY idfor\"\n",
-    "\n",
-    "        con = sqlite3.connect(\"file:\" + path + \"/db/test1.db\" + \"?mode=ro\", uri=True)\n",
-    "        answer_table = pd.read_sql(theSimpleQuery, con)\n",
-    "        return answer_table\n",
-    "\n",
-    "    else:\n",
-    "\n",
-    "        data=\" OR spec LIKE \".join(entries)\n",
-    "        data2=\" OR norm LIKE \".join(entries)\n",
-    "        doubleddata=\" OR spec LIKE \".join(inizialeraddoppiata(entries))\n",
-    "\n",
-    "        if raddoppiata == 1:\n",
-    "            theSimpleQuery = \"SELECT DISTINCT norm FROM form WHERE (spec LIKE \" + data +\") OR (norm LIKE \" + data2 + \") OR (spec LIKE \" + doubleddata + \") OR (norm LIKE \" + doubleddata + \")\" + \" ORDER BY idfor\"\n",
-    "        else:\n",
-    "            theSimpleQuery = \"SELECT DISTINCT norm FROM form WHERE (spec LIKE \" + data +\") OR (norm LIKE \" + data2 + \")\" + \" ORDER BY idfor\"\n",
-    "\n",
-    "        con = sqlite3.connect(\"file:\" + path + \"/db/test1.db\" + \"?mode=ro\", uri=True)\n",
-    "        cur = con.cursor()\n",
-    "        queryReponse = cur.execute(theSimpleQuery)\n",
-    "        results = queryReponse.fetchall()\n",
-    "        finalresults = pd.DataFrame()\n",
-    "\n",
-    "        for result in results:\n",
-    "            expandedQuery = \"SELECT spec AS forma, nocc AS occ FROM form WHERE norm LIKE \" + \"'\" + result[0] + \"'\" + \" ORDER BY idfor\"\n",
-    "            extendequeryReponse = pd.read_sql(expandedQuery, con)\n",
-    "            finalresults = pd.concat([finalresults, extendequeryReponse])\n",
-    "        return finalresults"
+    "<h2>1. Ricerca per forme</h2>\n",
+    "<h3>Lista di esempi di ricerca eseguibili:</h3>"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "#%% Funzione ricerca per lemmi\n",
-    "def ricercalemmi (entries, path, espansa, raddoppiata):\n",
-    "\n",
-    "    if espansa == 0:\n",
-    "\n",
-    "        data=\" OR spec LIKE \".join(entries)\n",
-    "        doubleddata=\" OR spec LIKE \".join(inizialeraddoppiata(entries))\n",
-    "        \n",
-    "        if raddoppiata == 1:\n",
-    "            theSimpleQuery = \"SELECT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ FROM lem WHERE spec LIKE \" + data  + \" OR spec LIKE \" + doubleddata + \"ORDER BY idlem\"\n",
-    "        else:\n",
-    "            theSimpleQuery = \"SELECT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ FROM lem WHERE spec LIKE \" + data + \" ORDER BY idlem\"\n",
-    "        \n",
-    "        #print(theSimpleQuery)\n",
-    "\n",
-    "        con = sqlite3.connect(\"file:\" + path + \"/db/test1.db\" + \"?mode=ro\", uri=True)\n",
-    "        answer_table = pd.read_sql(theSimpleQuery, con)\n",
-    "        return answer_table\n",
-    "\n",
-    "    else:\n",
-    "\n",
-    "        data=\" OR spec LIKE \".join(entries)\n",
-    "        data2=\" OR norm LIKE \".join(entries)\n",
-    "        doubleddata=\" OR spec LIKE \".join(inizialeraddoppiata(entries))\n",
-    "\n",
-    "        if raddoppiata == 1:\n",
-    "            theSimpleQuery = \"SELECT DISTINCT norm FROM lem WHERE (spec LIKE \" + data +\") OR (norm LIKE \" + data2 + \") OR (spec LIKE \" + doubleddata + \") OR (norm LIKE \" + doubleddata + \")\" + \" ORDER BY idlem\"\n",
-    "        else:\n",
-    "            theSimpleQuery = \"SELECT DISTINCT norm FROM lem WHERE (spec LIKE \" + data +\") OR (norm LIKE \" + data2 + \")\" + \" ORDER BY idlem\"\n",
-    "       \n",
-    "\n",
-    "        con = sqlite3.connect(\"file:\" + path + \"/db/test1.db\" + \"?mode=ro\", uri=True)\n",
-    "        cur = con.cursor()\n",
-    "        queryReponse = cur.execute(theSimpleQuery)\n",
-    "        results = queryReponse.fetchall()\n",
-    "        finalresults = pd.DataFrame()\n",
-    "\n",
-    "        for result in results:\n",
-    "            expandedQuery = \"SELECT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ FROM lem WHERE norm LIKE \" + \"'\" + result[0] + \"'\" + \" ORDER BY idlem\"\n",
-    "            extendequeryReponse = pd.read_sql(expandedQuery, con)\n",
-    "            finalresults = pd.concat([finalresults, extendequeryReponse])\n",
-    "        return finalresults"
+    "ricerca di: filius"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
+   "attachments": {},
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "#%% Funzione ricerca di forme/lemmi\n",
-    "def ricercaformelemmi (entries, path, espansa, raddoppiata):\n",
-    "\n",
-    "\n",
-    "    if espansa == 0:\n",
-    "\n",
-    "        data=\" OR form.spec LIKE \".join(entries)\n",
-    "        doubleddata=\" OR form.spec LIKE \".join(inizialeraddoppiata(entries))\n",
-    "        \n",
-    "        if raddoppiata == 1:\n",
-    "            theSimpleQuery = \"SELECT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE form.spec LIKE \" + data + \" OR form.spec LIKE \" + doubleddata + \" ORDER BY form.idfor\"\n",
-    "        else:\n",
-    "            theSimpleQuery = \"SELECT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE form.spec LIKE \" + data + \" ORDER BY form.idfor\"\n",
-    "        \n",
-    "\n",
-    "        con = sqlite3.connect(\"file:\" + path + \"/db/test1.db\" + \"?mode=ro\", uri=True)\n",
-    "        answer_table = pd.read_sql(theSimpleQuery, con)\n",
-    "        return answer_table\n",
-    "\n",
-    "    else:\n",
-    "        data=\" OR spec LIKE \".join(entries)\n",
-    "        data2=\" OR norm LIKE \".join(entries)\n",
-    "        doubleddata=\" OR spec LIKE \".join(inizialeraddoppiata(entries))\n",
-    "\n",
-    "        if raddoppiata == 1:\n",
-    "            theSimpleQuery = \"SELECT DISTINCT norm FROM form WHERE (spec LIKE \" + data +\") OR (norm LIKE \" + data2 + \") OR (spec LIKE \" + doubleddata + \") OR (norm LIKE \" + doubleddata + \")\" + \" ORDER BY idfor\"\n",
-    "        else:\n",
-    "            theSimpleQuery = \"SELECT DISTINCT norm FROM form WHERE (spec LIKE \" + data +\") OR (norm LIKE \" + data2 + \")\" + \" ORDER BY idfor\"\n",
-    "\n",
-    "        con = sqlite3.connect(\"file:\" + path + \"/db/test1.db\" + \"?mode=ro\", uri=True)\n",
-    "        cur = con.cursor()\n",
-    "        queryReponse = cur.execute(theSimpleQuery)\n",
-    "        results = queryReponse.fetchall()\n",
-    "        finalresults = pd.DataFrame()\n",
-    "\n",
-    "        for result in results:\n",
-    "            expandedQuery = \"SELECT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE form.norm LIKE \" + \"'\" + result[0] + \"'\" + \" ORDER BY idfor\"\n",
-    "            extendequeryReponse = pd.read_sql(expandedQuery, con)\n",
-    "            finalresults = pd.concat([finalresults, extendequeryReponse])\n",
-    "        return finalresults"
+    "![](img/1.png)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "        <iframe\n",
+       "            width=\"100%\"\n",
+       "            height=\"475\"\n",
+       "            src=\"http://MBP-di-Federica.wind3.hub:40000/dtale/iframe/2\"\n",
+       "            frameborder=\"0\"\n",
+       "            allowfullscreen\n",
+       "        ></iframe>\n",
+       "        "
+      ],
+      "text/plain": [
+       "<IPython.lib.display.IFrame at 0x7fc7ab8ae820>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "#%% Funzione ricerca lemmi/forme\n",
-    "def ricercalemmiforme (entries, path, espansa, raddoppiata):\n",
-    "\n",
-    "\n",
-    "    if espansa == 0:\n",
-    "\n",
-    "        data=\" OR form.spec LIKE \".join(entries)\n",
-    "        doubleddata=\" OR form.spec LIKE \".join(inizialeraddoppiata(entries))\n",
-    "        \n",
-    "        if raddoppiata == 1:\n",
-    "            theSimpleQuery = \"SELECT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod != 0 AND lem.cod = pfl.lemma WHERE lem.spec LIKE \" + data + \" OR form.spec LIKE \" + doubleddata + \" ORDER BY lem.idlem\"\n",
-    "        else:\n",
-    "            theSimpleQuery = \"SELECT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma,lem.omo AS disambiguatore, pfl.nocc AS occ FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod != 0 AND lem.cod = pfl.lemma WHERE lem.spec LIKE \" + data + \" ORDER BY lem.idlem\"\n",
-    "\n",
-    "        con = sqlite3.connect(\"file:\" + path + \"/db/test1.db\" + \"?mode=ro\", uri=True)\n",
-    "        answer_table = pd.read_sql(theSimpleQuery, con)\n",
-    "        return answer_table\n",
-    "        \n",
-    "    else:\n",
-    "        data=\" OR spec LIKE \".join(entries)\n",
-    "        data2=\" OR norm LIKE \".join(entries)\n",
-    "        doubleddata=\" OR spec LIKE \".join(inizialeraddoppiata(entries))\n",
-    "\n",
-    "        if raddoppiata == 1:\n",
-    "            theSimpleQuery = \"SELECT DISTINCT norm FROM lem WHERE (spec LIKE \" + data +\") OR (norm LIKE \" + data2 + \") OR (spec LIKE \" + doubleddata + \") OR (norm LIKE \" + doubleddata + \")\" + \" ORDER BY idlem\"\n",
-    "        else:\n",
-    "            theSimpleQuery = \"SELECT DISTINCT norm FROM lem WHERE (spec LIKE \" + data +\") OR (norm LIKE \" + data2 + \")\" + \" ORDER BY idlem\"\n",
-    "\n",
-    "        con = sqlite3.connect(\"file:\" + path + \"/db/test1.db\" + \"?mode=ro\", uri=True)\n",
-    "        cur = con.cursor()\n",
-    "        queryReponse = cur.execute(theSimpleQuery)\n",
-    "        results = queryReponse.fetchall()\n",
-    "        finalresults = pd.DataFrame()\n",
-    "        for result in results:\n",
-    "            expandedQuery = \"SELECT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE lem.norm LIKE \" + \"'\" + result[0] + \"'\" + \" ORDER BY lem.idlem\"\n",
-    "            extendequeryReponse = pd.read_sql(expandedQuery, con)\n",
-    "            finalresults = pd.concat([finalresults, extendequeryReponse])\n",
-    "        return finalresults"
+    "df = ricercaforme(interpreter('filius'), \"../\", 0, 0)\n",
+    "dtale.show(df)"
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<h1>Per un nuovo sistema di interrogazione dei dati del Corpus TLIO</h1>"
+    "![](img/c1.png)"
    ]
   },
   {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "<h2>1. Ricerca per forme</h2>\n",
-    "<h3>Lista di esempi di ricerca eseguibili:</h3>"
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "        <iframe\n",
+       "            width=\"100%\"\n",
+       "            height=\"475\"\n",
+       "            src=\"http://MBP-di-Federica.wind3.hub:40000/dtale/iframe/3\"\n",
+       "            frameborder=\"0\"\n",
+       "            allowfullscreen\n",
+       "        ></iframe>\n",
+       "        "
+      ],
+      "text/plain": [
+       "<IPython.lib.display.IFrame at 0x7fc7ab625be0>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "parola = \"filius\"\n",
+    "type=\"forme\"\n",
+    "charOffsetConst = int(100)\n",
+    "listOcc = [\"occ00001\", \"occ00002\", \"occ00003\"]\n",
+    "search=ricercaforme(interpreter(parola), \"../\", 0, 0)\n",
+    "textlist=findtexts(type, search, listOcc, \"../\")\n",
+    "contexts = findcontexts(textlist, charOffsetConst)\n",
+    "bibliocontexts = findbib(contexts,\"../\")\n",
+    "dtale.show(bibliocontexts)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "ricerca di: filius"
+    "ricerca di: meterò-me"
    ]
   },
   {
@@ -288,18 +239,45 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "![](img/1.png)\n",
-    "\n",
-    "![](img/c1.png)"
+    "![](img/3.png)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "        <iframe\n",
+       "            width=\"100%\"\n",
+       "            height=\"475\"\n",
+       "            src=\"http://MBP-di-Federica.wind3.hub:40000/dtale/iframe/4\"\n",
+       "            frameborder=\"0\"\n",
+       "            allowfullscreen\n",
+       "        ></iframe>\n",
+       "        "
+      ],
+      "text/plain": [
+       "<IPython.lib.display.IFrame at 0x7fc7ab95ea60>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "df = ricercaforme(interpreter('filius'), \"../\", 0, 0)\n",
+    "df = ricercaforme(interpreter('meterò-me'), \"../\", 0, 0)\n",
     "dtale.show(df)"
    ]
   },
@@ -307,15 +285,6 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "ricerca di: meterò-me"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "![](img/3.png)\n",
     "\n",
     "![](img/c2.png)"
    ]
@@ -326,8 +295,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "df = ricercaforme(interpreter('meterò-me'), \"../\", 0, 0)\n",
-    "dtale.show(df)"
+    "parola = \"meterò-me\"\n",
+    "type=\"forme\"\n",
+    "charOffsetConst = int(100)\n",
+    "listOcc = [\"occ00001\", \"occ00002\", \"occ00003\"]\n",
+    "search=ricercaforme(interpreter(parola), \"../\", 0, 0)\n",
+    "textlist=findtexts(type, search, listOcc, \"../\")\n",
+    "contexts = findcontexts(textlist, charOffsetConst)\n",
+    "bibliocontexts = findbib(contexts,\"../\")\n",
+    "dtale.show(bibliocontexts)"
    ]
   },
   {
@@ -350,9 +326,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "![](img/6.png)\n",
-    "\n",
-    "![](img/c3.png)"
+    "![](img/6.png)"
    ]
   },
   {
@@ -365,6 +339,31 @@
     "dtale.show(df)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "![](img/c3.png)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "parola = \"a\"\n",
+    "type=\"forme\"\n",
+    "charOffsetConst = int(100)\n",
+    "listOcc = [\"occ00001\", \"occ00002\", \"occ00003\"]\n",
+    "search=ricercaforme(interpreter(parola), \"../\", 1, 0)\n",
+    "textlist=findtexts(type, search, listOcc, \"../\")\n",
+    "contexts = findcontexts(textlist, charOffsetConst)\n",
+    "bibliocontexts = findbib(contexts,\"../\")\n",
+    "dtale.show(bibliocontexts)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -377,9 +376,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "![](img/7.png)\n",
-    "\n",
-    "![](img/c4.png)"
+    "![](img/7.png)"
    ]
   },
   {
@@ -392,6 +389,31 @@
     "dtale.show(df)"
    ]
   },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "![](img/c4.png)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "parola = \"fòra\"\n",
+    "type=\"forme\"\n",
+    "charOffsetConst = int(100)\n",
+    "listOcc = [\"occ00001\", \"occ00002\", \"occ00003\"]\n",
+    "search=ricercaforme(interpreter(parola), \"../\", 1, 0)\n",
+    "textlist=findtexts(type, search, listOcc, \"../\")\n",
+    "contexts = findcontexts(textlist, charOffsetConst)\n",
+    "bibliocontexts = findbib(contexts,\"../\")\n",
+    "dtale.show(bibliocontexts)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -411,9 +433,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "![](img/10.png)\n",
-    "\n",
-    "![](img/c5.png)"
+    "![](img/10.png)"
    ]
   },
   {
@@ -426,6 +446,31 @@
     "dtale.show(df)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "![](img/c5.png)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "parola = \"*·\"\n",
+    "type=\"forme\"\n",
+    "charOffsetConst = int(100)\n",
+    "listOcc = [\"occ00001\", \"occ00002\", \"occ00003\"]\n",
+    "search=ricercaforme(interpreter(parola), \"../\", 0, 0)\n",
+    "textlist=findtexts(type, search, listOcc, \"../\")\n",
+    "contexts = findcontexts(textlist, charOffsetConst)\n",
+    "bibliocontexts = findbib(contexts,\"../\")\n",
+    "dtale.show(bibliocontexts)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -438,9 +483,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "![](img/19.png)\n",
-    "\n",
-    "![](img/c10.png)"
+    "![](img/19.png)"
    ]
   },
   {
@@ -453,6 +496,31 @@
     "dtale.show(df)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "![](img/c10.png)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "parola = \"alaman*ni\"\n",
+    "type=\"forme\"\n",
+    "charOffsetConst = int(100)\n",
+    "listOcc = [\"occ00001\", \"occ00002\", \"occ00003\"]\n",
+    "search=ricercaforme(interpreter(parola), \"../\", 0, 0)\n",
+    "textlist=findtexts(type, search, listOcc, \"../\")\n",
+    "contexts = findcontexts(textlist, charOffsetConst)\n",
+    "bibliocontexts = findbib(contexts,\"../\")\n",
+    "dtale.show(bibliocontexts)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -472,9 +540,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "![](img/21.png)\n",
-    "\n",
-    "![](img/c11.png)"
+    "![](img/21.png)"
    ]
   },
   {
@@ -486,6 +552,60 @@
     "df = ricercalemmi(interpreter('mezzo'), \"../\", 1, 0)\n",
     "dtale.show(df)"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "![](img/c11.png)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "        <iframe\n",
+       "            width=\"100%\"\n",
+       "            height=\"475\"\n",
+       "            src=\"http://MBP-di-Federica.wind3.hub:40000/dtale/iframe/5\"\n",
+       "            frameborder=\"0\"\n",
+       "            allowfullscreen\n",
+       "        ></iframe>\n",
+       "        "
+      ],
+      "text/plain": [
+       "<IPython.lib.display.IFrame at 0x7fc7ab95e9a0>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "parola = \"mezzo\"\n",
+    "type=\"lemmi\"\n",
+    "charOffsetConst = int(100)\n",
+    "listOcc = [\"occ00001\", \"occ00002\", \"occ00003\"]\n",
+    "search=ricercalemmi(interpreter(parola), \"../\", 1, 0)\n",
+    "textlist=findtexts(type, search, listOcc, \"../\")\n",
+    "contexts = findcontexts(textlist, charOffsetConst)\n",
+    "bibliocontexts = findbib(contexts,\"../\")\n",
+    "dtale.show(bibliocontexts)"
+   ]
   }
  ],
  "metadata": {