# Questo NON è parte del codice: è un notebook Jupyter (nell'implementazione di VSCode) # che ho usato per fare dei test! # %% # Test code using Jupyter # %% import sqlite3 import pandas as pd import unicodedata import sys #%% funzione combinazioni <> è chiamata da interpreter def combinations(s): result = [] start = s.find("<") end = s.find(">") if start == -1 or end == -1: return [s] items = s[start + 1:end].split(",") for item in items: result.extend([s[:start] + item + rest for rest in combinations(s[end + 1:])]) return result #%% funzione interprete, sta alla base di ogni ricerca def interpreter (data): clean_data= "'"+data.replace("*", "%").replace("?", "_").replace(" ","").replace("'", "''").replace("’", "''") +"'" return combinations(clean_data) # %% funzione iniziale raddoppiata, è chiamata dalle funzioni di ricerca con iniziale raddoppiata def inizialeraddoppiata (data): doubleddata=[] for el in data: if el[1] != "%" and "_": doubleddata = doubleddata + ["'"+ el[1] + el[1:]] return doubleddata # %% funzione normalizza stringa (ricerca espansa), è chiamata dalle funzioni di ricerca espansa def normalize(stringa): return unicodedata.normalize('NFKD', stringa).encode('ASCII', 'ignore').decode('utf-8') def list_normalize(lista): return [normalize(stringa) for stringa in lista] # %% funzione counter, può essere chiamata sui risultati delle ricerche per visualizzare le forme/lemmi e il numero di occorrenze individuate def counter (results): if not results.empty: trovati= len(results.index) occorrenze= results['occ'].sum() return ("Trovati=" + str(trovati) + " Occorrenze=" + str(occorrenze)) #%% Funzione ricerca per forme def ricercaforme (entries, path, espansa, raddoppiata): if espansa == 0: data=" OR spec LIKE ".join(entries) doubleddata=" OR spec LIKE ".join(inizialeraddoppiata(entries)) if raddoppiata == 1: theSimpleQuery = f"SELECT spec AS forma, nocc AS occ, cod FROM form WHERE spec LIKE {data} OR spec LIKE {doubleddata} ORDER BY idfor" else: theSimpleQuery = f"SELECT spec AS forma, nocc AS occ, cod FROM form WHERE spec LIKE {data} ORDER BY idfor" con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True) answer_table = pd.read_sql(theSimpleQuery, con) if answer_table.empty: print ("Nessun risultato") sys.exit(1) else: return answer_table else: data=" OR spec LIKE ".join(entries) data_norm=" OR norm LIKE ".join(list_normalize(entries)) doubleddata_norm=" OR norm LIKE ".join(list_normalize(inizialeraddoppiata(entries))) doubleddata=" OR spec LIKE ".join(inizialeraddoppiata(entries)) if raddoppiata == 1: theSimpleQuery = f"SELECT DISTINCT spec AS forma, nocc AS occ, cod FROM form WHERE (spec LIKE {data}) OR (norm LIKE {data_norm}) OR (spec LIKE {doubleddata}) OR (norm LIKE {doubleddata_norm}) ORDER BY idfor" else: theSimpleQuery = f"SELECT DISTINCT spec AS forma, nocc AS occ, cod FROM form WHERE (spec LIKE {data}) OR (norm LIKE {data_norm}) ORDER BY idfor" con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True) answer_table = pd.read_sql(theSimpleQuery, con) if answer_table.empty: print ("Nessun risultato") sys.exit(1) else: return answer_table #%% Funzione ricerca per lemmi def ricercalemmi (entries, path, espansa, raddoppiata): if espansa == 0: data = " OR spec LIKE ".join(entries) doubleddata = " OR spec LIKE ".join(inizialeraddoppiata(entries)) if raddoppiata == 1: theSimpleQuery = f"SELECT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE spec LIKE {data} OR spec LIKE {doubleddata} ORDER BY idlem" else: theSimpleQuery = f"SELECT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE spec LIKE {data} ORDER BY idlem" con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True) answer_table = pd.read_sql(theSimpleQuery, con) if answer_table.empty: print ("Nessun risultato") sys.exit(1) else: return answer_table else: data = " OR spec LIKE ".join(entries) data_norm = " OR norm LIKE ".join(list_normalize(entries)) doubleddata_norm = " OR norm LIKE ".join(list_normalize(inizialeraddoppiata(entries))) doubleddata = " OR spec LIKE ".join(inizialeraddoppiata(entries)) if raddoppiata == 1: theSimpleQuery = f"SELECT DISTINCT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE (spec LIKE {data}) OR (norm LIKE {data_norm}) OR (spec LIKE {doubleddata}) OR (norm LIKE {doubleddata_norm}) ORDER BY idlem" else: theSimpleQuery = f"SELECT DISTINCT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE (spec LIKE {data}) OR (norm LIKE {data_norm}) ORDER BY idlem" con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True) answer_table = pd.read_sql(theSimpleQuery, con) if answer_table.empty: print ("Nessun risultato") sys.exit(1) else: return answer_table #%% Funzione ricerca di forme con vista lemmi def ricercaformelemmi (entries, path, espansa, raddoppiata): if espansa == 0: data = " OR form.spec LIKE ".join(entries) doubleddata = " OR form.spec LIKE ".join(inizialeraddoppiata(entries)) if raddoppiata == 1: theSimpleQuery = f"SELECT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE form.spec LIKE {data} OR form.spec LIKE {doubleddata} ORDER BY form.idfor" else: theSimpleQuery = f"SELECT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE form.spec LIKE {data} ORDER BY form.idfor" con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True) answer_table = pd.read_sql(theSimpleQuery, con) if answer_table.empty: print ("Nessun risultato") sys.exit(1) else: return answer_table else: data = " OR form.spec LIKE ".join(entries) data_norm = " OR form.norm LIKE ".join(list_normalize(entries)) doubleddata_norm = " OR form.norm LIKE ".join(list_normalize(inizialeraddoppiata(entries))) doubleddata = " OR form.spec LIKE ".join(inizialeraddoppiata(entries)) if raddoppiata == 1: theSimpleQuery = f"SELECT DISTINCT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (form.spec LIKE {data}) OR (form.norm LIKE {data_norm}) OR (form.spec LIKE {doubleddata}) OR (form.norm LIKE {doubleddata_norm}) ORDER BY form.idfor" else: theSimpleQuery = f"SELECT DISTINCT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (form.spec LIKE {data}) OR (form.norm LIKE {data_norm}) ORDER BY form.idfor" con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True) answer_table = pd.read_sql(theSimpleQuery, con) if answer_table.empty: print ("Nessun risultato") sys.exit(1) else: return answer_table #%% Funzione ricerca lemmi con vista forme def ricercalemmiforme (entries, path, espansa, raddoppiata): if espansa == 0: data = " OR form.spec LIKE ".join(entries) doubleddata = " OR form.spec LIKE ".join(inizialeraddoppiata(entries)) if raddoppiata == 1: theSimpleQuery = f"SELECT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod != 0 AND lem.cod = pfl.lemma WHERE lem.spec LIKE {data} OR form.spec LIKE {doubleddata} ORDER BY lem.idlem" else: theSimpleQuery = f"SELECT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod != 0 AND lem.cod = pfl.lemma WHERE lem.spec LIKE {data} ORDER BY lem.idlem" con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True) answer_table = pd.read_sql(theSimpleQuery, con) if answer_table.empty: print ("Nessun risultato") sys.exit(1) else: return answer_table else: data = " OR lem.spec LIKE ".join(entries) data_norm = " OR lem.norm LIKE ".join(list_normalize(entries)) doubleddata_norm = " OR lem.norm LIKE ".join(list_normalize(inizialeraddoppiata(entries))) doubleddata = " OR lem.spec LIKE ".join(inizialeraddoppiata(entries)) if raddoppiata == 1: theSimpleQuery = f"SELECT DISTINCT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (lem.spec LIKE {data}) OR (lem.norm LIKE {data_norm}) OR (lem.spec LIKE {doubleddata}) OR (lem.norm LIKE {doubleddata_norm}) ORDER BY lem.idlem" else: theSimpleQuery = f"SELECT DISTINCT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (lem.spec LIKE {data}) OR (lem.norm LIKE {data_norm}) ORDER BY lem.idlem" con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True) answer_table = pd.read_sql(theSimpleQuery, con) if answer_table.empty: print ("Nessun risultato") sys.exit(1) else: return answer_table # %% Ricerca per categorie grammaticali def ricercacatgr (entry, path): theSimpleQuery = f"SELECT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE cat = '{entry}' ORDER BY idlem" con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True) answer_table = pd.read_sql(theSimpleQuery, con) if answer_table.empty: print ("Nessun risultato") sys.exit(1) else: return answer_table