|
@@ -2,285 +2,236 @@
|
|
|
"cells": [
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 67,
|
|
|
+ "execution_count": 2,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
"import sqlite3\n",
|
|
|
"import pandas as pd\n",
|
|
|
- "import dtale"
|
|
|
+ "import dtale\n",
|
|
|
+ "import unicodedata\n",
|
|
|
+ "from simple_query_test_pandas import ricercaforme\n",
|
|
|
+ "from simple_query_test_pandas import ricercalemmi\n",
|
|
|
+ "from simple_query_test_pandas import ricercaformelemmi \n",
|
|
|
+ "from simple_query_test_pandas import ricercalemmiforme\n",
|
|
|
+ "from simple_query_test_pandas import inizialeraddoppiata\n",
|
|
|
+ "from simple_query_test_pandas import interpreter"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 68,
|
|
|
+ "execution_count": 3,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
- "def combinations(s):\n",
|
|
|
- " result = []\n",
|
|
|
- " start = s.find(\"<\")\n",
|
|
|
- " end = s.find(\">\")\n",
|
|
|
- " if start == -1 or end == -1:\n",
|
|
|
- " return [s]\n",
|
|
|
- " items = s[start + 1:end].split(\",\")\n",
|
|
|
- " for item in items:\n",
|
|
|
- " result.extend([s[:start] + item + rest for rest in combinations(s[end + 1:])])\n",
|
|
|
- " return result"
|
|
|
+ "#%% Step 1: trovare, nelle tabelle degli occorrenzari, i riferimenti al testo (versione 'itxt') delle\n",
|
|
|
+ "# forme recuperate al punto 1, recuperare le sigle dei documenti e le loro associazioni agli ntx\n",
|
|
|
+ "def findtexts (type, df, listOcc, path):\n",
|
|
|
+ " textlist = pd.DataFrame()\n",
|
|
|
+ " codlist= list(df[\"cod\"])\n",
|
|
|
+ " strlist= [str(x) for x in codlist]\n",
|
|
|
+ " form_data=\" OR tab.cod= \".join(strlist)\n",
|
|
|
+ " lem_data=\" OR tab.indlem= \".join(strlist)\n",
|
|
|
+ " con = sqlite3.connect(\"file:\" + path + \"/db/test1.db\" + \"?mode=ro\", uri=True)\n",
|
|
|
+ " for table in listOcc:\n",
|
|
|
+ " if type == \"forme\":\n",
|
|
|
+ " Query = \"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM \" + table + \" AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.cod=\" + form_data\n",
|
|
|
+ " elif type == \"lemmi\":\n",
|
|
|
+ " Query = \"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM \" + table + \" AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.indlem=\" + lem_data\n",
|
|
|
+ " extendequeryReponse = pd.read_sql(Query, con)\n",
|
|
|
+ " textlist = pd.concat([textlist, extendequeryReponse])\n",
|
|
|
+ " return textlist"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
+ "execution_count": 4,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
- "#%% funzione interprete\n",
|
|
|
- "def interpreter (data):\n",
|
|
|
- " clean_data= \"'\"+data.replace(\"*\", \"%\").replace(\"?\", \"_\").replace(\" \",\"\").replace(\"'\", \"''\").replace(\"’\", \"''\") +\"'\"\n",
|
|
|
- " return combinations(clean_data)"
|
|
|
+ "# %% Step 2: mettere insieme le informazioni recuperare i contesti, versione 'itxt'\n",
|
|
|
+ "def findcontexts (textlist, charOffsetConst):\n",
|
|
|
+ " contexts = []\n",
|
|
|
+ " for ind, row in textlist.iterrows():\n",
|
|
|
+ " pitxtLocal = row[\"pitxt\"]\n",
|
|
|
+ " sigla = row[\"sigla\"]\n",
|
|
|
+ " with open(\"../db/itxt/\" + sigla, 'r', encoding=\"utf-32-le\") as file1:\n",
|
|
|
+ " file1.seek( max( 4*(pitxtLocal-charOffsetConst), 0), 0 )\n",
|
|
|
+ " cont = file1.read(row[\"elemlen\"]+2*charOffsetConst)\n",
|
|
|
+ " contexts.append(cont)\n",
|
|
|
+ " textlist['contesto'] = contexts\n",
|
|
|
+ " #textlist.loc[ind,'contesto'] = cont\n",
|
|
|
+ " return (textlist)"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
+ "execution_count": 5,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
- "# %% funzione iniziale raddoppiata\n",
|
|
|
- "def inizialeraddoppiata (data):\n",
|
|
|
- " doubleddata=[]\n",
|
|
|
- " for el in data:\n",
|
|
|
- " if el[1] != \"%\" and \"_\":\n",
|
|
|
- " doubleddata = doubleddata + [\"'\"+ el[1] + el[1:]]\n",
|
|
|
- " return doubleddata"
|
|
|
+ "def findbib (contexts, path):\n",
|
|
|
+ " infobib = pd.DataFrame()\n",
|
|
|
+ " rif_org = pd.DataFrame()\n",
|
|
|
+ " for ind, row in contexts.iterrows():\n",
|
|
|
+ " con = sqlite3.connect(\"file:\" + path + \"db/bibliografia/BiblioTLIO.db\" + \"?mode=ro\", uri=True)\n",
|
|
|
+ " Query = \"SELECT [Anno iniziale], [Titolo Abbreviato], IQ FROM datibib WHERE Sigla= '\" + row[\"sigla\"] +\"'\"\n",
|
|
|
+ " bib = pd.read_sql(Query, con)\n",
|
|
|
+ " infobib = pd.concat([infobib, bib])\n",
|
|
|
+ " #contexts.loc[ind, 'Titolo Abbreviato'] = bib.iloc[0, 1]\n",
|
|
|
+ " #contexts.loc[ind, 'Anno iniziale'] = bib.iloc[0, 0]\n",
|
|
|
+ " con2 = sqlite3.connect(\"file:\" + path + \"/db/test1.db\" + \"?mode=ro\", uri=True)\n",
|
|
|
+ " Query2 = \"SELECT head AS Rif_organico, full AS Rif_completo FROM org WHERE (indice= '\" + str(row[\"numorg\"]) + \"'\" + \") AND (ntx= + '\" + str(row[\"ntx\"]) + \"'\" + \")\"\n",
|
|
|
+ " rif = pd.read_sql(Query2, con2)\n",
|
|
|
+ " rif_org = pd.concat([rif_org, rif])\n",
|
|
|
+ " #contexts.loc[ind, 'Rif_organico'] = rif.iloc[0, 0]\n",
|
|
|
+ " #contexts.loc[ind, 'Rif_completo'] = rif.iloc[0, 1]\n",
|
|
|
+ " anno = list(infobib['Anno iniziale'])\n",
|
|
|
+ " titolo = list(infobib['Titolo Abbreviato'])\n",
|
|
|
+ " iq = list(infobib['IQ'])\n",
|
|
|
+ " rif1 = list(rif_org['Rif_organico'])\n",
|
|
|
+ " rif2 = list(rif_org['Rif_completo'])\n",
|
|
|
+ " contexts['Anno iniziale'] = anno\n",
|
|
|
+ " contexts['Titolo Abbreviato'] = titolo\n",
|
|
|
+ " contexts ['IQ'] = iq\n",
|
|
|
+ " contexts['Rif_organico'] = rif1\n",
|
|
|
+ " contexts['Rig_completo'] = rif2\n",
|
|
|
+ " contexts.pag = contexts.pag.astype(int)\n",
|
|
|
+ " chrono = contexts.sort_values(by=['Anno iniziale', 'Rif_organico', 'pag']) \n",
|
|
|
+ " cols = ['links','Titolo Abbreviato', 'Rif_organico', 'tipostanza', 'stanza', 'verso', 'pag', 'riga', 'IQ', 'lemma', 'cat_gr', 'disambiguatore', 'contesto']\n",
|
|
|
+ " clean_df = chrono[cols].reset_index()\n",
|
|
|
+ " return (clean_df)"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
- "cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
+ "cell_type": "markdown",
|
|
|
"metadata": {},
|
|
|
- "outputs": [],
|
|
|
"source": [
|
|
|
- "# %% funzione counter\n",
|
|
|
- "def counter (results):\n",
|
|
|
- " trovati= len(results.index)\n",
|
|
|
- " occorrenze= results['occ'].sum()\n",
|
|
|
- " return (\"Trovati=\" + str(trovati) + \" Occorrenze=\" + str(occorrenze))"
|
|
|
+ "<h1>Per un nuovo sistema di interrogazione dei dati del Corpus TLIO</h1>"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
- "cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
+ "attachments": {},
|
|
|
+ "cell_type": "markdown",
|
|
|
"metadata": {},
|
|
|
- "outputs": [],
|
|
|
"source": [
|
|
|
- "#%% Funzione ricerca per forme\n",
|
|
|
- "def ricercaforme (entries, path, espansa, raddoppiata):\n",
|
|
|
- "\n",
|
|
|
- " if espansa == 0:\n",
|
|
|
- "\n",
|
|
|
- " data=\" OR spec LIKE \".join(entries)\n",
|
|
|
- " doubleddata=\" OR spec LIKE \".join(inizialeraddoppiata(entries))\n",
|
|
|
- "\n",
|
|
|
- " if raddoppiata == 1: \n",
|
|
|
- " theSimpleQuery = \"SELECT spec AS forma, nocc AS occ FROM form WHERE spec LIKE \" + data + \" OR spec LIKE \" + doubleddata + \"ORDER BY idfor\"\n",
|
|
|
- " else:\n",
|
|
|
- " theSimpleQuery = \"SELECT spec AS forma, nocc AS occ FROM form WHERE spec LIKE \" + data + \" ORDER BY idfor\"\n",
|
|
|
- "\n",
|
|
|
- " con = sqlite3.connect(\"file:\" + path + \"/db/test1.db\" + \"?mode=ro\", uri=True)\n",
|
|
|
- " answer_table = pd.read_sql(theSimpleQuery, con)\n",
|
|
|
- " return answer_table\n",
|
|
|
- "\n",
|
|
|
- " else:\n",
|
|
|
- "\n",
|
|
|
- " data=\" OR spec LIKE \".join(entries)\n",
|
|
|
- " data2=\" OR norm LIKE \".join(entries)\n",
|
|
|
- " doubleddata=\" OR spec LIKE \".join(inizialeraddoppiata(entries))\n",
|
|
|
- "\n",
|
|
|
- " if raddoppiata == 1:\n",
|
|
|
- " theSimpleQuery = \"SELECT DISTINCT norm FROM form WHERE (spec LIKE \" + data +\") OR (norm LIKE \" + data2 + \") OR (spec LIKE \" + doubleddata + \") OR (norm LIKE \" + doubleddata + \")\" + \" ORDER BY idfor\"\n",
|
|
|
- " else:\n",
|
|
|
- " theSimpleQuery = \"SELECT DISTINCT norm FROM form WHERE (spec LIKE \" + data +\") OR (norm LIKE \" + data2 + \")\" + \" ORDER BY idfor\"\n",
|
|
|
- "\n",
|
|
|
- " con = sqlite3.connect(\"file:\" + path + \"/db/test1.db\" + \"?mode=ro\", uri=True)\n",
|
|
|
- " cur = con.cursor()\n",
|
|
|
- " queryReponse = cur.execute(theSimpleQuery)\n",
|
|
|
- " results = queryReponse.fetchall()\n",
|
|
|
- " finalresults = pd.DataFrame()\n",
|
|
|
- "\n",
|
|
|
- " for result in results:\n",
|
|
|
- " expandedQuery = \"SELECT spec AS forma, nocc AS occ FROM form WHERE norm LIKE \" + \"'\" + result[0] + \"'\" + \" ORDER BY idfor\"\n",
|
|
|
- " extendequeryReponse = pd.read_sql(expandedQuery, con)\n",
|
|
|
- " finalresults = pd.concat([finalresults, extendequeryReponse])\n",
|
|
|
- " return finalresults"
|
|
|
+ "<h2>1. Ricerca per forme</h2>\n",
|
|
|
+ "<h3>Lista di esempi di ricerca eseguibili:</h3>"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
- "cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
+ "cell_type": "markdown",
|
|
|
"metadata": {},
|
|
|
- "outputs": [],
|
|
|
"source": [
|
|
|
- "#%% Funzione ricerca per lemmi\n",
|
|
|
- "def ricercalemmi (entries, path, espansa, raddoppiata):\n",
|
|
|
- "\n",
|
|
|
- " if espansa == 0:\n",
|
|
|
- "\n",
|
|
|
- " data=\" OR spec LIKE \".join(entries)\n",
|
|
|
- " doubleddata=\" OR spec LIKE \".join(inizialeraddoppiata(entries))\n",
|
|
|
- " \n",
|
|
|
- " if raddoppiata == 1:\n",
|
|
|
- " theSimpleQuery = \"SELECT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ FROM lem WHERE spec LIKE \" + data + \" OR spec LIKE \" + doubleddata + \"ORDER BY idlem\"\n",
|
|
|
- " else:\n",
|
|
|
- " theSimpleQuery = \"SELECT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ FROM lem WHERE spec LIKE \" + data + \" ORDER BY idlem\"\n",
|
|
|
- " \n",
|
|
|
- " #print(theSimpleQuery)\n",
|
|
|
- "\n",
|
|
|
- " con = sqlite3.connect(\"file:\" + path + \"/db/test1.db\" + \"?mode=ro\", uri=True)\n",
|
|
|
- " answer_table = pd.read_sql(theSimpleQuery, con)\n",
|
|
|
- " return answer_table\n",
|
|
|
- "\n",
|
|
|
- " else:\n",
|
|
|
- "\n",
|
|
|
- " data=\" OR spec LIKE \".join(entries)\n",
|
|
|
- " data2=\" OR norm LIKE \".join(entries)\n",
|
|
|
- " doubleddata=\" OR spec LIKE \".join(inizialeraddoppiata(entries))\n",
|
|
|
- "\n",
|
|
|
- " if raddoppiata == 1:\n",
|
|
|
- " theSimpleQuery = \"SELECT DISTINCT norm FROM lem WHERE (spec LIKE \" + data +\") OR (norm LIKE \" + data2 + \") OR (spec LIKE \" + doubleddata + \") OR (norm LIKE \" + doubleddata + \")\" + \" ORDER BY idlem\"\n",
|
|
|
- " else:\n",
|
|
|
- " theSimpleQuery = \"SELECT DISTINCT norm FROM lem WHERE (spec LIKE \" + data +\") OR (norm LIKE \" + data2 + \")\" + \" ORDER BY idlem\"\n",
|
|
|
- " \n",
|
|
|
- "\n",
|
|
|
- " con = sqlite3.connect(\"file:\" + path + \"/db/test1.db\" + \"?mode=ro\", uri=True)\n",
|
|
|
- " cur = con.cursor()\n",
|
|
|
- " queryReponse = cur.execute(theSimpleQuery)\n",
|
|
|
- " results = queryReponse.fetchall()\n",
|
|
|
- " finalresults = pd.DataFrame()\n",
|
|
|
- "\n",
|
|
|
- " for result in results:\n",
|
|
|
- " expandedQuery = \"SELECT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ FROM lem WHERE norm LIKE \" + \"'\" + result[0] + \"'\" + \" ORDER BY idlem\"\n",
|
|
|
- " extendequeryReponse = pd.read_sql(expandedQuery, con)\n",
|
|
|
- " finalresults = pd.concat([finalresults, extendequeryReponse])\n",
|
|
|
- " return finalresults"
|
|
|
+ "ricerca di: filius"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
- "cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
+ "attachments": {},
|
|
|
+ "cell_type": "markdown",
|
|
|
"metadata": {},
|
|
|
- "outputs": [],
|
|
|
"source": [
|
|
|
- "#%% Funzione ricerca di forme/lemmi\n",
|
|
|
- "def ricercaformelemmi (entries, path, espansa, raddoppiata):\n",
|
|
|
- "\n",
|
|
|
- "\n",
|
|
|
- " if espansa == 0:\n",
|
|
|
- "\n",
|
|
|
- " data=\" OR form.spec LIKE \".join(entries)\n",
|
|
|
- " doubleddata=\" OR form.spec LIKE \".join(inizialeraddoppiata(entries))\n",
|
|
|
- " \n",
|
|
|
- " if raddoppiata == 1:\n",
|
|
|
- " theSimpleQuery = \"SELECT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE form.spec LIKE \" + data + \" OR form.spec LIKE \" + doubleddata + \" ORDER BY form.idfor\"\n",
|
|
|
- " else:\n",
|
|
|
- " theSimpleQuery = \"SELECT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE form.spec LIKE \" + data + \" ORDER BY form.idfor\"\n",
|
|
|
- " \n",
|
|
|
- "\n",
|
|
|
- " con = sqlite3.connect(\"file:\" + path + \"/db/test1.db\" + \"?mode=ro\", uri=True)\n",
|
|
|
- " answer_table = pd.read_sql(theSimpleQuery, con)\n",
|
|
|
- " return answer_table\n",
|
|
|
- "\n",
|
|
|
- " else:\n",
|
|
|
- " data=\" OR spec LIKE \".join(entries)\n",
|
|
|
- " data2=\" OR norm LIKE \".join(entries)\n",
|
|
|
- " doubleddata=\" OR spec LIKE \".join(inizialeraddoppiata(entries))\n",
|
|
|
- "\n",
|
|
|
- " if raddoppiata == 1:\n",
|
|
|
- " theSimpleQuery = \"SELECT DISTINCT norm FROM form WHERE (spec LIKE \" + data +\") OR (norm LIKE \" + data2 + \") OR (spec LIKE \" + doubleddata + \") OR (norm LIKE \" + doubleddata + \")\" + \" ORDER BY idfor\"\n",
|
|
|
- " else:\n",
|
|
|
- " theSimpleQuery = \"SELECT DISTINCT norm FROM form WHERE (spec LIKE \" + data +\") OR (norm LIKE \" + data2 + \")\" + \" ORDER BY idfor\"\n",
|
|
|
- "\n",
|
|
|
- " con = sqlite3.connect(\"file:\" + path + \"/db/test1.db\" + \"?mode=ro\", uri=True)\n",
|
|
|
- " cur = con.cursor()\n",
|
|
|
- " queryReponse = cur.execute(theSimpleQuery)\n",
|
|
|
- " results = queryReponse.fetchall()\n",
|
|
|
- " finalresults = pd.DataFrame()\n",
|
|
|
- "\n",
|
|
|
- " for result in results:\n",
|
|
|
- " expandedQuery = \"SELECT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE form.norm LIKE \" + \"'\" + result[0] + \"'\" + \" ORDER BY idfor\"\n",
|
|
|
- " extendequeryReponse = pd.read_sql(expandedQuery, con)\n",
|
|
|
- " finalresults = pd.concat([finalresults, extendequeryReponse])\n",
|
|
|
- " return finalresults"
|
|
|
+ ""
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
- "metadata": {},
|
|
|
- "outputs": [],
|
|
|
+ "execution_count": 7,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/html": [
|
|
|
+ "\n",
|
|
|
+ " <iframe\n",
|
|
|
+ " width=\"100%\"\n",
|
|
|
+ " height=\"475\"\n",
|
|
|
+ " src=\"http://MBP-di-Federica.wind3.hub:40000/dtale/iframe/2\"\n",
|
|
|
+ " frameborder=\"0\"\n",
|
|
|
+ " allowfullscreen\n",
|
|
|
+ " ></iframe>\n",
|
|
|
+ " "
|
|
|
+ ],
|
|
|
+ "text/plain": [
|
|
|
+ "<IPython.lib.display.IFrame at 0x7fc7ab8ae820>"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "display_data"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": []
|
|
|
+ },
|
|
|
+ "execution_count": 7,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
"source": [
|
|
|
- "#%% Funzione ricerca lemmi/forme\n",
|
|
|
- "def ricercalemmiforme (entries, path, espansa, raddoppiata):\n",
|
|
|
- "\n",
|
|
|
- "\n",
|
|
|
- " if espansa == 0:\n",
|
|
|
- "\n",
|
|
|
- " data=\" OR form.spec LIKE \".join(entries)\n",
|
|
|
- " doubleddata=\" OR form.spec LIKE \".join(inizialeraddoppiata(entries))\n",
|
|
|
- " \n",
|
|
|
- " if raddoppiata == 1:\n",
|
|
|
- " theSimpleQuery = \"SELECT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod != 0 AND lem.cod = pfl.lemma WHERE lem.spec LIKE \" + data + \" OR form.spec LIKE \" + doubleddata + \" ORDER BY lem.idlem\"\n",
|
|
|
- " else:\n",
|
|
|
- " theSimpleQuery = \"SELECT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma,lem.omo AS disambiguatore, pfl.nocc AS occ FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod != 0 AND lem.cod = pfl.lemma WHERE lem.spec LIKE \" + data + \" ORDER BY lem.idlem\"\n",
|
|
|
- "\n",
|
|
|
- " con = sqlite3.connect(\"file:\" + path + \"/db/test1.db\" + \"?mode=ro\", uri=True)\n",
|
|
|
- " answer_table = pd.read_sql(theSimpleQuery, con)\n",
|
|
|
- " return answer_table\n",
|
|
|
- " \n",
|
|
|
- " else:\n",
|
|
|
- " data=\" OR spec LIKE \".join(entries)\n",
|
|
|
- " data2=\" OR norm LIKE \".join(entries)\n",
|
|
|
- " doubleddata=\" OR spec LIKE \".join(inizialeraddoppiata(entries))\n",
|
|
|
- "\n",
|
|
|
- " if raddoppiata == 1:\n",
|
|
|
- " theSimpleQuery = \"SELECT DISTINCT norm FROM lem WHERE (spec LIKE \" + data +\") OR (norm LIKE \" + data2 + \") OR (spec LIKE \" + doubleddata + \") OR (norm LIKE \" + doubleddata + \")\" + \" ORDER BY idlem\"\n",
|
|
|
- " else:\n",
|
|
|
- " theSimpleQuery = \"SELECT DISTINCT norm FROM lem WHERE (spec LIKE \" + data +\") OR (norm LIKE \" + data2 + \")\" + \" ORDER BY idlem\"\n",
|
|
|
- "\n",
|
|
|
- " con = sqlite3.connect(\"file:\" + path + \"/db/test1.db\" + \"?mode=ro\", uri=True)\n",
|
|
|
- " cur = con.cursor()\n",
|
|
|
- " queryReponse = cur.execute(theSimpleQuery)\n",
|
|
|
- " results = queryReponse.fetchall()\n",
|
|
|
- " finalresults = pd.DataFrame()\n",
|
|
|
- " for result in results:\n",
|
|
|
- " expandedQuery = \"SELECT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE lem.norm LIKE \" + \"'\" + result[0] + \"'\" + \" ORDER BY lem.idlem\"\n",
|
|
|
- " extendequeryReponse = pd.read_sql(expandedQuery, con)\n",
|
|
|
- " finalresults = pd.concat([finalresults, extendequeryReponse])\n",
|
|
|
- " return finalresults"
|
|
|
+ "df = ricercaforme(interpreter('filius'), \"../\", 0, 0)\n",
|
|
|
+ "dtale.show(df)"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
+ "attachments": {},
|
|
|
"cell_type": "markdown",
|
|
|
"metadata": {},
|
|
|
"source": [
|
|
|
- "<h1>Per un nuovo sistema di interrogazione dei dati del Corpus TLIO</h1>"
|
|
|
+ ""
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
- "attachments": {},
|
|
|
- "cell_type": "markdown",
|
|
|
- "metadata": {},
|
|
|
- "source": [
|
|
|
- "<h2>1. Ricerca per forme</h2>\n",
|
|
|
- "<h3>Lista di esempi di ricerca eseguibili:</h3>"
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 8,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/html": [
|
|
|
+ "\n",
|
|
|
+ " <iframe\n",
|
|
|
+ " width=\"100%\"\n",
|
|
|
+ " height=\"475\"\n",
|
|
|
+ " src=\"http://MBP-di-Federica.wind3.hub:40000/dtale/iframe/3\"\n",
|
|
|
+ " frameborder=\"0\"\n",
|
|
|
+ " allowfullscreen\n",
|
|
|
+ " ></iframe>\n",
|
|
|
+ " "
|
|
|
+ ],
|
|
|
+ "text/plain": [
|
|
|
+ "<IPython.lib.display.IFrame at 0x7fc7ab625be0>"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "display_data"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": []
|
|
|
+ },
|
|
|
+ "execution_count": 8,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "parola = \"filius\"\n",
|
|
|
+ "type=\"forme\"\n",
|
|
|
+ "charOffsetConst = int(100)\n",
|
|
|
+ "listOcc = [\"occ00001\", \"occ00002\", \"occ00003\"]\n",
|
|
|
+ "search=ricercaforme(interpreter(parola), \"../\", 0, 0)\n",
|
|
|
+ "textlist=findtexts(type, search, listOcc, \"../\")\n",
|
|
|
+ "contexts = findcontexts(textlist, charOffsetConst)\n",
|
|
|
+ "bibliocontexts = findbib(contexts,\"../\")\n",
|
|
|
+ "dtale.show(bibliocontexts)"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "markdown",
|
|
|
"metadata": {},
|
|
|
"source": [
|
|
|
- "ricerca di: filius"
|
|
|
+ "ricerca di: meterò-me"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
@@ -288,18 +239,45 @@
|
|
|
"cell_type": "markdown",
|
|
|
"metadata": {},
|
|
|
"source": [
|
|
|
- "\n",
|
|
|
- "\n",
|
|
|
- ""
|
|
|
+ ""
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
- "metadata": {},
|
|
|
- "outputs": [],
|
|
|
+ "execution_count": 9,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/html": [
|
|
|
+ "\n",
|
|
|
+ " <iframe\n",
|
|
|
+ " width=\"100%\"\n",
|
|
|
+ " height=\"475\"\n",
|
|
|
+ " src=\"http://MBP-di-Federica.wind3.hub:40000/dtale/iframe/4\"\n",
|
|
|
+ " frameborder=\"0\"\n",
|
|
|
+ " allowfullscreen\n",
|
|
|
+ " ></iframe>\n",
|
|
|
+ " "
|
|
|
+ ],
|
|
|
+ "text/plain": [
|
|
|
+ "<IPython.lib.display.IFrame at 0x7fc7ab95ea60>"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "display_data"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": []
|
|
|
+ },
|
|
|
+ "execution_count": 9,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
"source": [
|
|
|
- "df = ricercaforme(interpreter('filius'), \"../\", 0, 0)\n",
|
|
|
+ "df = ricercaforme(interpreter('meterò-me'), \"../\", 0, 0)\n",
|
|
|
"dtale.show(df)"
|
|
|
]
|
|
|
},
|
|
@@ -307,15 +285,6 @@
|
|
|
"cell_type": "markdown",
|
|
|
"metadata": {},
|
|
|
"source": [
|
|
|
- "ricerca di: meterò-me"
|
|
|
- ]
|
|
|
- },
|
|
|
- {
|
|
|
- "attachments": {},
|
|
|
- "cell_type": "markdown",
|
|
|
- "metadata": {},
|
|
|
- "source": [
|
|
|
- "\n",
|
|
|
"\n",
|
|
|
""
|
|
|
]
|
|
@@ -326,8 +295,15 @@
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
- "df = ricercaforme(interpreter('meterò-me'), \"../\", 0, 0)\n",
|
|
|
- "dtale.show(df)"
|
|
|
+ "parola = \"meterò-me\"\n",
|
|
|
+ "type=\"forme\"\n",
|
|
|
+ "charOffsetConst = int(100)\n",
|
|
|
+ "listOcc = [\"occ00001\", \"occ00002\", \"occ00003\"]\n",
|
|
|
+ "search=ricercaforme(interpreter(parola), \"../\", 0, 0)\n",
|
|
|
+ "textlist=findtexts(type, search, listOcc, \"../\")\n",
|
|
|
+ "contexts = findcontexts(textlist, charOffsetConst)\n",
|
|
|
+ "bibliocontexts = findbib(contexts,\"../\")\n",
|
|
|
+ "dtale.show(bibliocontexts)"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
@@ -350,9 +326,7 @@
|
|
|
"cell_type": "markdown",
|
|
|
"metadata": {},
|
|
|
"source": [
|
|
|
- "\n",
|
|
|
- "\n",
|
|
|
- ""
|
|
|
+ ""
|
|
|
]
|
|
|
},
|
|
|
{
|
|
@@ -365,6 +339,31 @@
|
|
|
"dtale.show(df)"
|
|
|
]
|
|
|
},
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "\n",
|
|
|
+ ""
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "parola = \"a\"\n",
|
|
|
+ "type=\"forme\"\n",
|
|
|
+ "charOffsetConst = int(100)\n",
|
|
|
+ "listOcc = [\"occ00001\", \"occ00002\", \"occ00003\"]\n",
|
|
|
+ "search=ricercaforme(interpreter(parola), \"../\", 1, 0)\n",
|
|
|
+ "textlist=findtexts(type, search, listOcc, \"../\")\n",
|
|
|
+ "contexts = findcontexts(textlist, charOffsetConst)\n",
|
|
|
+ "bibliocontexts = findbib(contexts,\"../\")\n",
|
|
|
+ "dtale.show(bibliocontexts)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
{
|
|
|
"cell_type": "markdown",
|
|
|
"metadata": {},
|
|
@@ -377,9 +376,7 @@
|
|
|
"cell_type": "markdown",
|
|
|
"metadata": {},
|
|
|
"source": [
|
|
|
- "\n",
|
|
|
- "\n",
|
|
|
- ""
|
|
|
+ ""
|
|
|
]
|
|
|
},
|
|
|
{
|
|
@@ -392,6 +389,31 @@
|
|
|
"dtale.show(df)"
|
|
|
]
|
|
|
},
|
|
|
+ {
|
|
|
+ "attachments": {},
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ ""
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "parola = \"fòra\"\n",
|
|
|
+ "type=\"forme\"\n",
|
|
|
+ "charOffsetConst = int(100)\n",
|
|
|
+ "listOcc = [\"occ00001\", \"occ00002\", \"occ00003\"]\n",
|
|
|
+ "search=ricercaforme(interpreter(parola), \"../\", 1, 0)\n",
|
|
|
+ "textlist=findtexts(type, search, listOcc, \"../\")\n",
|
|
|
+ "contexts = findcontexts(textlist, charOffsetConst)\n",
|
|
|
+ "bibliocontexts = findbib(contexts,\"../\")\n",
|
|
|
+ "dtale.show(bibliocontexts)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
{
|
|
|
"cell_type": "markdown",
|
|
|
"metadata": {},
|
|
@@ -411,9 +433,7 @@
|
|
|
"cell_type": "markdown",
|
|
|
"metadata": {},
|
|
|
"source": [
|
|
|
- "\n",
|
|
|
- "\n",
|
|
|
- ""
|
|
|
+ ""
|
|
|
]
|
|
|
},
|
|
|
{
|
|
@@ -426,6 +446,31 @@
|
|
|
"dtale.show(df)"
|
|
|
]
|
|
|
},
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "\n",
|
|
|
+ ""
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "parola = \"*·\"\n",
|
|
|
+ "type=\"forme\"\n",
|
|
|
+ "charOffsetConst = int(100)\n",
|
|
|
+ "listOcc = [\"occ00001\", \"occ00002\", \"occ00003\"]\n",
|
|
|
+ "search=ricercaforme(interpreter(parola), \"../\", 0, 0)\n",
|
|
|
+ "textlist=findtexts(type, search, listOcc, \"../\")\n",
|
|
|
+ "contexts = findcontexts(textlist, charOffsetConst)\n",
|
|
|
+ "bibliocontexts = findbib(contexts,\"../\")\n",
|
|
|
+ "dtale.show(bibliocontexts)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
{
|
|
|
"cell_type": "markdown",
|
|
|
"metadata": {},
|
|
@@ -438,9 +483,7 @@
|
|
|
"cell_type": "markdown",
|
|
|
"metadata": {},
|
|
|
"source": [
|
|
|
- "\n",
|
|
|
- "\n",
|
|
|
- ""
|
|
|
+ ""
|
|
|
]
|
|
|
},
|
|
|
{
|
|
@@ -453,6 +496,31 @@
|
|
|
"dtale.show(df)"
|
|
|
]
|
|
|
},
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "\n",
|
|
|
+ ""
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "parola = \"alaman*ni\"\n",
|
|
|
+ "type=\"forme\"\n",
|
|
|
+ "charOffsetConst = int(100)\n",
|
|
|
+ "listOcc = [\"occ00001\", \"occ00002\", \"occ00003\"]\n",
|
|
|
+ "search=ricercaforme(interpreter(parola), \"../\", 0, 0)\n",
|
|
|
+ "textlist=findtexts(type, search, listOcc, \"../\")\n",
|
|
|
+ "contexts = findcontexts(textlist, charOffsetConst)\n",
|
|
|
+ "bibliocontexts = findbib(contexts,\"../\")\n",
|
|
|
+ "dtale.show(bibliocontexts)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
{
|
|
|
"cell_type": "markdown",
|
|
|
"metadata": {},
|
|
@@ -472,9 +540,7 @@
|
|
|
"cell_type": "markdown",
|
|
|
"metadata": {},
|
|
|
"source": [
|
|
|
- "\n",
|
|
|
- "\n",
|
|
|
- ""
|
|
|
+ ""
|
|
|
]
|
|
|
},
|
|
|
{
|
|
@@ -486,6 +552,60 @@
|
|
|
"df = ricercalemmi(interpreter('mezzo'), \"../\", 1, 0)\n",
|
|
|
"dtale.show(df)"
|
|
|
]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "\n",
|
|
|
+ ""
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 10,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/html": [
|
|
|
+ "\n",
|
|
|
+ " <iframe\n",
|
|
|
+ " width=\"100%\"\n",
|
|
|
+ " height=\"475\"\n",
|
|
|
+ " src=\"http://MBP-di-Federica.wind3.hub:40000/dtale/iframe/5\"\n",
|
|
|
+ " frameborder=\"0\"\n",
|
|
|
+ " allowfullscreen\n",
|
|
|
+ " ></iframe>\n",
|
|
|
+ " "
|
|
|
+ ],
|
|
|
+ "text/plain": [
|
|
|
+ "<IPython.lib.display.IFrame at 0x7fc7ab95e9a0>"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "display_data"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": []
|
|
|
+ },
|
|
|
+ "execution_count": 10,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "parola = \"mezzo\"\n",
|
|
|
+ "type=\"lemmi\"\n",
|
|
|
+ "charOffsetConst = int(100)\n",
|
|
|
+ "listOcc = [\"occ00001\", \"occ00002\", \"occ00003\"]\n",
|
|
|
+ "search=ricercalemmi(interpreter(parola), \"../\", 1, 0)\n",
|
|
|
+ "textlist=findtexts(type, search, listOcc, \"../\")\n",
|
|
|
+ "contexts = findcontexts(textlist, charOffsetConst)\n",
|
|
|
+ "bibliocontexts = findbib(contexts,\"../\")\n",
|
|
|
+ "dtale.show(bibliocontexts)"
|
|
|
+ ]
|
|
|
}
|
|
|
],
|
|
|
"metadata": {
|