123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336 |
- # Questo NON è parte del codice: è un notebook Jupyter (nell'implementazione di VSCode)
- # che ho usato per fare dei test!
- # %%
- # Test code using Jupyter
- # %%
- import sqlite3
- import re
- import pandas as pd
- import dtale
- import unicodedata
- import sys
- #%% funzione combinazioni <> è chiamata da interpreter
- def combinations(s):
- result = []
- start = s.find("<")
- end = s.find(">")
- if start == -1 or end == -1:
- return [s]
- items = s[start + 1:end].split(",")
- for item in items:
- result.extend([s[:start] + item + rest for rest in combinations(s[end + 1:])])
- return result
- #%% funzione interprete, sta alla base di ogni ricerca
- def interpreter (data):
- clean_data= "'"+data.replace("*", "%").replace("?", "_").replace(" ","").replace("'", "''").replace("’", "''") +"'"
- return combinations(clean_data)
-
- # %% funzione iniziale raddoppiata, è chiamata dalle funzioni di ricerca con iniziale raddoppiata
- def inizialeraddoppiata (data):
- doubleddata=[]
- for el in data:
- if el[1] != "%" and "_":
- doubleddata = doubleddata + ["'"+ el[1] + el[1:]]
- return doubleddata
- # %% funzione normalizza stringa (ricerca espansa), è chiamata dalle funzioni di ricerca espansa
- def normalize(stringa):
- return unicodedata.normalize('NFKD', stringa).encode('ASCII', 'ignore').decode('utf-8')
- def list_normalize(lista):
- return [normalize(stringa) for stringa in lista]
- # %% funzione counter, può essere chiamata sui risultati delle ricerche per visualizzare le forme/lemmi e il numero di occorrenze individuate
- def counter (results):
- if not results.empty:
- trovati= len(results.index)
- occorrenze= results['occ'].sum()
- return ("Trovati=" + str(trovati) + " Occorrenze=" + str(occorrenze))
- #%% Funzione ricerca per forme
- def ricercaforme (entries, path, espansa, raddoppiata):
- if espansa == 0:
- data=" OR spec LIKE ".join(entries)
- doubleddata=" OR spec LIKE ".join(inizialeraddoppiata(entries))
-
- if raddoppiata == 1:
- theSimpleQuery = f"SELECT spec AS forma, nocc AS occ, cod FROM form WHERE spec LIKE {data} OR spec LIKE {doubleddata} ORDER BY idfor"
- else:
- theSimpleQuery = f"SELECT spec AS forma, nocc AS occ, cod FROM form WHERE spec LIKE {data} ORDER BY idfor"
- con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
- answer_table = pd.read_sql(theSimpleQuery, con)
- if answer_table.empty:
- print ("Nessun risultato")
- sys.exit(1)
- else:
- return answer_table
- else:
- data=" OR spec LIKE ".join(entries)
- data_norm=" OR norm LIKE ".join(list_normalize(entries))
- doubleddata_norm=" OR norm LIKE ".join(list_normalize(inizialeraddoppiata(entries)))
- doubleddata=" OR spec LIKE ".join(inizialeraddoppiata(entries))
- if raddoppiata == 1:
- theSimpleQuery = f"SELECT DISTINCT spec AS forma, nocc AS occ, cod FROM form WHERE (spec LIKE {data}) OR (norm LIKE {data_norm}) OR (spec LIKE {doubleddata}) OR (norm LIKE {doubleddata_norm}) ORDER BY idfor"
- else:
- theSimpleQuery = f"SELECT DISTINCT spec AS forma, nocc AS occ, cod FROM form WHERE (spec LIKE {data}) OR (norm LIKE {data_norm}) ORDER BY idfor"
- con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
- answer_table = pd.read_sql(theSimpleQuery, con)
- if answer_table.empty:
- print ("Nessun risultato")
- sys.exit(1)
- else:
- return answer_table
- #deprecated
- """if espansa == 0:
- data=" OR spec LIKE ".join(entries)
- doubleddata=" OR spec LIKE ".join(inizialeraddoppiata(entries))
- if raddoppiata == 1:
- theSimpleQuery = "SELECT spec AS forma, nocc AS occ, cod FROM form WHERE spec LIKE " + data + " OR spec LIKE " + doubleddata + "ORDER BY idfor"
- else:
- theSimpleQuery = "SELECT spec AS forma, nocc AS occ, cod FROM form WHERE spec LIKE " + data + " ORDER BY idfor"
- con = sqlite3.connect("file:" + path + "/db/test1.db" + "?mode=ro", uri=True)
- answer_table = pd.read_sql(theSimpleQuery, con)
- return answer_table
- else:
- data=" OR spec LIKE ".join(entries)
- data_norm=" OR norm LIKE ".join(list_normalize(entries))
- doubleddata_norm=" OR norm LIKE ".join(list_normalize(inizialeraddoppiata(entries)))
- doubleddata=" OR spec LIKE ".join(inizialeraddoppiata(entries))
- if raddoppiata == 1:
- theSimpleQuery = "SELECT DISTINCT spec AS forma, nocc AS occ, cod FROM form WHERE (spec LIKE " + data +") OR (norm LIKE " + data_norm + ") OR (spec LIKE " + doubleddata + ") OR (norm LIKE " + doubleddata_norm + ")" + " ORDER BY idfor"
- else:
- theSimpleQuery = "SELECT DISTINCT spec AS forma, nocc AS occ, cod FROM form WHERE (spec LIKE " + data +") OR (norm LIKE " + data_norm + ")" + " ORDER BY idfor"
- con = sqlite3.connect("file:" + path + "/db/test1.db" + "?mode=ro", uri=True)
- answer_table = pd.read_sql(theSimpleQuery, con)
- return answer_table"""
- #%% Funzione ricerca per lemmi
- def ricercalemmi (entries, path, espansa, raddoppiata):
- if espansa == 0:
- data = " OR spec LIKE ".join(entries)
- doubleddata = " OR spec LIKE ".join(inizialeraddoppiata(entries))
- if raddoppiata == 1:
- theSimpleQuery = f"SELECT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE spec LIKE {data} OR spec LIKE {doubleddata} ORDER BY idlem"
- else:
- theSimpleQuery = f"SELECT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE spec LIKE {data} ORDER BY idlem"
- con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
- answer_table = pd.read_sql(theSimpleQuery, con)
- if answer_table.empty:
- print ("Nessun risultato")
- sys.exit(1)
- else:
- return answer_table
- else:
- data = " OR spec LIKE ".join(entries)
- data_norm = " OR norm LIKE ".join(list_normalize(entries))
- doubleddata_norm = " OR norm LIKE ".join(list_normalize(inizialeraddoppiata(entries)))
- doubleddata = " OR spec LIKE ".join(inizialeraddoppiata(entries))
- if raddoppiata == 1:
- theSimpleQuery = f"SELECT DISTINCT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE (spec LIKE {data}) OR (norm LIKE {data_norm}) OR (spec LIKE {doubleddata}) OR (norm LIKE {doubleddata_norm}) ORDER BY idlem"
- else:
- theSimpleQuery = f"SELECT DISTINCT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE (spec LIKE {data}) OR (norm LIKE {data_norm}) ORDER BY idlem"
- con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
- answer_table = pd.read_sql(theSimpleQuery, con)
- if answer_table.empty:
- print ("Nessun risultato")
- sys.exit(1)
- else:
- return answer_table
- #deprecated
- """if espansa == 0:
- data=" OR spec LIKE ".join(entries)
- doubleddata=" OR spec LIKE ".join(inizialeraddoppiata(entries))
-
- if raddoppiata == 1:
- theSimpleQuery = "SELECT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE spec LIKE " + data + " OR spec LIKE " + doubleddata + "ORDER BY idlem"
- else:
- theSimpleQuery = "SELECT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE spec LIKE " + data + " ORDER BY idlem"
- con = sqlite3.connect("file:" + path + "/db/test1.db" + "?mode=ro", uri=True)
- answer_table = pd.read_sql(theSimpleQuery, con)
- return answer_table
- else:
- data=" OR spec LIKE ".join(entries)
- data_norm=" OR norm LIKE ".join(list_normalize(entries))
- doubleddata_norm=" OR norm LIKE ".join(list_normalize(inizialeraddoppiata(entries)))
- doubleddata=" OR spec LIKE ".join(inizialeraddoppiata(entries))
- if raddoppiata == 1:
- theSimpleQuery = "SELECT DISTINCT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE (spec LIKE " + data +") OR (norm LIKE " + data_norm + ") OR (spec LIKE " + doubleddata + ") OR (norm LIKE " + doubleddata_norm + ")" + " ORDER BY idlem"
- else:
- theSimpleQuery = "SELECT DISTINCT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE (spec LIKE " + data +") OR (norm LIKE " + data_norm + ")" + " ORDER BY idlem"
- con = sqlite3.connect("file:" + path + "/db/test1.db" + "?mode=ro", uri=True)
- answer_table = pd.read_sql(theSimpleQuery, con)
- return answer_table"""
- #%% Funzione ricerca di forme con vista lemmi
- def ricercaformelemmi (entries, path, espansa, raddoppiata):
- if espansa == 0:
- data = " OR form.spec LIKE ".join(entries)
- doubleddata = " OR form.spec LIKE ".join(inizialeraddoppiata(entries))
- if raddoppiata == 1:
- theSimpleQuery = f"SELECT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE form.spec LIKE {data} OR form.spec LIKE {doubleddata} ORDER BY form.idfor"
- else:
- theSimpleQuery = f"SELECT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE form.spec LIKE {data} ORDER BY form.idfor"
- con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
- answer_table = pd.read_sql(theSimpleQuery, con)
- if answer_table.empty:
- print ("Nessun risultato")
- sys.exit(1)
- else:
- return answer_table
- else:
- data = " OR form.spec LIKE ".join(entries)
- data_norm = " OR form.norm LIKE ".join(list_normalize(entries))
- doubleddata_norm = " OR form.norm LIKE ".join(list_normalize(inizialeraddoppiata(entries)))
- doubleddata = " OR form.spec LIKE ".join(inizialeraddoppiata(entries))
- if raddoppiata == 1:
- theSimpleQuery = f"SELECT DISTINCT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (form.spec LIKE {data}) OR (form.norm LIKE {data_norm}) OR (form.spec LIKE {doubleddata}) OR (form.norm LIKE {doubleddata_norm}) ORDER BY form.idfor"
- else:
- theSimpleQuery = f"SELECT DISTINCT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (form.spec LIKE {data}) OR (form.norm LIKE {data_norm}) ORDER BY form.idfor"
- con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
- answer_table = pd.read_sql(theSimpleQuery, con)
- if answer_table.empty:
- print ("Nessun risultato")
- sys.exit(1)
- else:
- return answer_table
- #deprecated
- """if espansa == 0:
- data=" OR form.spec LIKE ".join(entries)
- doubleddata=" OR form.spec LIKE ".join(inizialeraddoppiata(entries))
-
- if raddoppiata == 1:
- theSimpleQuery = "SELECT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE form.spec LIKE " + data + " OR form.spec LIKE " + doubleddata + " ORDER BY form.idfor"
- else:
- theSimpleQuery = "SELECT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE form.spec LIKE " + data + " ORDER BY form.idfor"
-
- con = sqlite3.connect("file:" + path + "/db/test1.db" + "?mode=ro", uri=True)
- answer_table = pd.read_sql(theSimpleQuery, con)
- return answer_table
- else:
-
- data=" OR form.spec LIKE ".join(entries)
- data_norm=" OR form.norm LIKE ".join(list_normalize(entries))
- doubleddata_norm=" OR form.norm LIKE ".join(list_normalize(inizialeraddoppiata(entries)))
- doubleddata=" OR form.spec LIKE ".join(inizialeraddoppiata(entries))
- if raddoppiata == 1:
- theSimpleQuery = "SELECT DISTINCT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (form.spec LIKE " + data +") OR (form.norm LIKE " + data_norm + ") OR (form.spec LIKE " + doubleddata + ") OR (form.norm LIKE " + doubleddata_norm + ")" + " ORDER BY form.idfor"
- else:
- theSimpleQuery = "SELECT DISTINCT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (form.spec LIKE " + data +") OR (form.norm LIKE " + data_norm + ")" + " ORDER BY form.idfor"
- con = sqlite3.connect("file:" + path + "/db/test1.db" + "?mode=ro", uri=True)
- answer_table = pd.read_sql(theSimpleQuery, con)
- return answer_table"""
- #%% Funzione ricerca lemmi con vista forme
- def ricercalemmiforme (entries, path, espansa, raddoppiata):
- if espansa == 0:
- data = " OR form.spec LIKE ".join(entries)
- doubleddata = " OR form.spec LIKE ".join(inizialeraddoppiata(entries))
- if raddoppiata == 1:
- theSimpleQuery = f"SELECT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod != 0 AND lem.cod = pfl.lemma WHERE lem.spec LIKE {data} OR form.spec LIKE {doubleddata} ORDER BY lem.idlem"
- else:
- theSimpleQuery = f"SELECT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod != 0 AND lem.cod = pfl.lemma WHERE lem.spec LIKE {data} ORDER BY lem.idlem"
- con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
- answer_table = pd.read_sql(theSimpleQuery, con)
- if answer_table.empty:
- print ("Nessun risultato")
- sys.exit(1)
- else:
- return answer_table
- else:
- data = " OR lem.spec LIKE ".join(entries)
- data_norm = " OR lem.norm LIKE ".join(list_normalize(entries))
- doubleddata_norm = " OR lem.norm LIKE ".join(list_normalize(inizialeraddoppiata(entries)))
- doubleddata = " OR lem.spec LIKE ".join(inizialeraddoppiata(entries))
- if raddoppiata == 1:
- theSimpleQuery = f"SELECT DISTINCT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (lem.spec LIKE {data}) OR (lem.norm LIKE {data_norm}) OR (lem.spec LIKE {doubleddata}) OR (lem.norm LIKE {doubleddata_norm}) ORDER BY lem.idlem"
- else:
- theSimpleQuery = f"SELECT DISTINCT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (lem.spec LIKE {data}) OR (lem.norm LIKE {data_norm}) ORDER BY lem.idlem"
- con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
- answer_table = pd.read_sql(theSimpleQuery, con)
- if answer_table.empty:
- print ("Nessun risultato")
- sys.exit(1)
- else:
- return answer_table
- #deprecated
- """if espansa == 0:
- data=" OR form.spec LIKE ".join(entries)
- doubleddata=" OR form.spec LIKE ".join(inizialeraddoppiata(entries))
-
- if raddoppiata == 1:
- theSimpleQuery = "SELECT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod != 0 AND lem.cod = pfl.lemma WHERE lem.spec LIKE " + data + " OR form.spec LIKE " + doubleddata + " ORDER BY lem.idlem"
- else:
- theSimpleQuery = "SELECT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma,lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod != 0 AND lem.cod = pfl.lemma WHERE lem.spec LIKE " + data + " ORDER BY lem.idlem"
- con = sqlite3.connect("file:" + path + "/db/test1.db" + "?mode=ro", uri=True)
- answer_table = pd.read_sql(theSimpleQuery, con)
- return answer_table
-
- else:
-
- data=" OR lem.spec LIKE ".join(entries)
- data_norm=" OR lem.norm LIKE ".join(list_normalize(entries))
- doubleddata_norm=" OR lem.norm LIKE ".join(list_normalize(inizialeraddoppiata(entries)))
- doubleddata=" OR lem.spec LIKE ".join(inizialeraddoppiata(entries))
- if raddoppiata == 1:
- theSimpleQuery = "SELECT DISTINCT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (lem.spec LIKE " + data +") OR (lem.norm LIKE " + data_norm + ") OR (lem.spec LIKE " + doubleddata + ") OR (lem.norm LIKE " + doubleddata_norm + ")" + " ORDER BY lem.idlem"
- else:
- theSimpleQuery = "SELECT DISTINCT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (lem.spec LIKE " + data +") OR (lem.norm LIKE " + data_norm + ")" + " ORDER BY lem.idlem"
- con = sqlite3.connect("file:" + path + "/db/test1.db" + "?mode=ro", uri=True)
- answer_table = pd.read_sql(theSimpleQuery, con)
- return answer_table"""
- # %% Ricerca per categorie grammaticali
- def ricercacatgr (entry, path):
- theSimpleQuery = f"SELECT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE cat = '{entry}' ORDER BY idlem"
- con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
- answer_table = pd.read_sql(theSimpleQuery, con)
- if answer_table.empty:
- print ("Nessun risultato")
- sys.exit(1)
- else:
- return answer_table
- #%%
- path = "/Users/leonardocanova/Library/CloudStorage/OneDrive-ConsiglioNazionaledelleRicerche/TIGRO/Ricerche/db/first_db"
- entry = "filius"
- #df=ricercacatgr(entry, path)
- df=ricercaforme(interpreter(entry), path, 0, 0)
- dtale.show(df)
- # %%
-
|