simple_query_test_pandas.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. # Questo NON è parte del codice: è un notebook Jupyter (nell'implementazione di VSCode)
  2. # che ho usato per fare dei test!
  3. # %%
  4. # Test code using Jupyter
  5. # %%
  6. import sqlite3
  7. import pandas as pd
  8. import unicodedata
  9. import sys
  10. #%% funzione combinazioni <> è chiamata da interpreter
  11. def combinations(s):
  12. result = []
  13. start = s.find("<")
  14. end = s.find(">")
  15. if start == -1 or end == -1:
  16. return [s]
  17. items = s[start + 1:end].split(",")
  18. for item in items:
  19. result.extend([s[:start] + item + rest for rest in combinations(s[end + 1:])])
  20. return result
  21. #%% funzione interprete, sta alla base di ogni ricerca
  22. def interpreter (data):
  23. clean_data= "'"+data.replace("*", "%").replace("?", "_").replace(" ","").replace("'", "''").replace("’", "''") +"'"
  24. return combinations(clean_data)
  25. # %% funzione iniziale raddoppiata, è chiamata dalle funzioni di ricerca con iniziale raddoppiata
  26. def inizialeraddoppiata (data):
  27. doubleddata=[]
  28. for el in data:
  29. if el[1] != "%" and "_":
  30. doubleddata = doubleddata + ["'"+ el[1] + el[1:]]
  31. return doubleddata
  32. # %% funzione normalizza stringa (ricerca espansa), è chiamata dalle funzioni di ricerca espansa
  33. def normalize(stringa):
  34. return unicodedata.normalize('NFKD', stringa).encode('ASCII', 'ignore').decode('utf-8')
  35. def list_normalize(lista):
  36. return [normalize(stringa) for stringa in lista]
  37. # %% funzione counter, può essere chiamata sui risultati delle ricerche per visualizzare le forme/lemmi e il numero di occorrenze individuate
  38. def counter (results):
  39. if not results.empty:
  40. trovati= len(results.index)
  41. occorrenze= results['occ'].sum()
  42. return ("Trovati=" + str(trovati) + " Occorrenze=" + str(occorrenze))
  43. #%% Funzione ricerca per forme
  44. def ricercaforme (entries, path, espansa, raddoppiata):
  45. if espansa == 0:
  46. data=" OR spec LIKE ".join(entries)
  47. doubleddata=" OR spec LIKE ".join(inizialeraddoppiata(entries))
  48. if raddoppiata == 1:
  49. theSimpleQuery = f"SELECT spec AS forma, nocc AS occ, cod FROM form WHERE spec LIKE {data} OR spec LIKE {doubleddata} ORDER BY idfor"
  50. else:
  51. theSimpleQuery = f"SELECT spec AS forma, nocc AS occ, cod FROM form WHERE spec LIKE {data} ORDER BY idfor"
  52. con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
  53. answer_table = pd.read_sql(theSimpleQuery, con)
  54. if answer_table.empty:
  55. print ("Nessun risultato")
  56. sys.exit(1)
  57. else:
  58. return answer_table
  59. else:
  60. data=" OR spec LIKE ".join(entries)
  61. data_norm=" OR norm LIKE ".join(list_normalize(entries))
  62. doubleddata_norm=" OR norm LIKE ".join(list_normalize(inizialeraddoppiata(entries)))
  63. doubleddata=" OR spec LIKE ".join(inizialeraddoppiata(entries))
  64. if raddoppiata == 1:
  65. theSimpleQuery = f"SELECT DISTINCT spec AS forma, nocc AS occ, cod FROM form WHERE (spec LIKE {data}) OR (norm LIKE {data_norm}) OR (spec LIKE {doubleddata}) OR (norm LIKE {doubleddata_norm}) ORDER BY idfor"
  66. else:
  67. theSimpleQuery = f"SELECT DISTINCT spec AS forma, nocc AS occ, cod FROM form WHERE (spec LIKE {data}) OR (norm LIKE {data_norm}) ORDER BY idfor"
  68. con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
  69. answer_table = pd.read_sql(theSimpleQuery, con)
  70. if answer_table.empty:
  71. print ("Nessun risultato")
  72. sys.exit(1)
  73. else:
  74. return answer_table
  75. #%% Funzione ricerca per lemmi
  76. def ricercalemmi (entries, path, espansa, raddoppiata):
  77. if espansa == 0:
  78. data = " OR spec LIKE ".join(entries)
  79. doubleddata = " OR spec LIKE ".join(inizialeraddoppiata(entries))
  80. if raddoppiata == 1:
  81. theSimpleQuery = f"SELECT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE spec LIKE {data} OR spec LIKE {doubleddata} ORDER BY idlem"
  82. else:
  83. theSimpleQuery = f"SELECT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE spec LIKE {data} ORDER BY idlem"
  84. con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
  85. answer_table = pd.read_sql(theSimpleQuery, con)
  86. if answer_table.empty:
  87. print ("Nessun risultato")
  88. sys.exit(1)
  89. else:
  90. return answer_table
  91. else:
  92. data = " OR spec LIKE ".join(entries)
  93. data_norm = " OR norm LIKE ".join(list_normalize(entries))
  94. doubleddata_norm = " OR norm LIKE ".join(list_normalize(inizialeraddoppiata(entries)))
  95. doubleddata = " OR spec LIKE ".join(inizialeraddoppiata(entries))
  96. if raddoppiata == 1:
  97. theSimpleQuery = f"SELECT DISTINCT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE (spec LIKE {data}) OR (norm LIKE {data_norm}) OR (spec LIKE {doubleddata}) OR (norm LIKE {doubleddata_norm}) ORDER BY idlem"
  98. else:
  99. theSimpleQuery = f"SELECT DISTINCT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE (spec LIKE {data}) OR (norm LIKE {data_norm}) ORDER BY idlem"
  100. con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
  101. answer_table = pd.read_sql(theSimpleQuery, con)
  102. if answer_table.empty:
  103. print ("Nessun risultato")
  104. sys.exit(1)
  105. else:
  106. return answer_table
  107. #%% Funzione ricerca di forme con vista lemmi
  108. def ricercaformelemmi (entries, path, espansa, raddoppiata):
  109. if espansa == 0:
  110. data = " OR form.spec LIKE ".join(entries)
  111. doubleddata = " OR form.spec LIKE ".join(inizialeraddoppiata(entries))
  112. if raddoppiata == 1:
  113. theSimpleQuery = f"SELECT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE form.spec LIKE {data} OR form.spec LIKE {doubleddata} ORDER BY form.idfor"
  114. else:
  115. theSimpleQuery = f"SELECT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE form.spec LIKE {data} ORDER BY form.idfor"
  116. con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
  117. answer_table = pd.read_sql(theSimpleQuery, con)
  118. if answer_table.empty:
  119. print ("Nessun risultato")
  120. sys.exit(1)
  121. else:
  122. return answer_table
  123. else:
  124. data = " OR form.spec LIKE ".join(entries)
  125. data_norm = " OR form.norm LIKE ".join(list_normalize(entries))
  126. doubleddata_norm = " OR form.norm LIKE ".join(list_normalize(inizialeraddoppiata(entries)))
  127. doubleddata = " OR form.spec LIKE ".join(inizialeraddoppiata(entries))
  128. if raddoppiata == 1:
  129. theSimpleQuery = f"SELECT DISTINCT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (form.spec LIKE {data}) OR (form.norm LIKE {data_norm}) OR (form.spec LIKE {doubleddata}) OR (form.norm LIKE {doubleddata_norm}) ORDER BY form.idfor"
  130. else:
  131. theSimpleQuery = f"SELECT DISTINCT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (form.spec LIKE {data}) OR (form.norm LIKE {data_norm}) ORDER BY form.idfor"
  132. con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
  133. answer_table = pd.read_sql(theSimpleQuery, con)
  134. if answer_table.empty:
  135. print ("Nessun risultato")
  136. sys.exit(1)
  137. else:
  138. return answer_table
  139. #%% Funzione ricerca lemmi con vista forme
  140. def ricercalemmiforme (entries, path, espansa, raddoppiata):
  141. if espansa == 0:
  142. data = " OR form.spec LIKE ".join(entries)
  143. doubleddata = " OR form.spec LIKE ".join(inizialeraddoppiata(entries))
  144. if raddoppiata == 1:
  145. theSimpleQuery = f"SELECT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod != 0 AND lem.cod = pfl.lemma WHERE lem.spec LIKE {data} OR form.spec LIKE {doubleddata} ORDER BY lem.idlem"
  146. else:
  147. theSimpleQuery = f"SELECT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod != 0 AND lem.cod = pfl.lemma WHERE lem.spec LIKE {data} ORDER BY lem.idlem"
  148. con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
  149. answer_table = pd.read_sql(theSimpleQuery, con)
  150. if answer_table.empty:
  151. print ("Nessun risultato")
  152. sys.exit(1)
  153. else:
  154. return answer_table
  155. else:
  156. data = " OR lem.spec LIKE ".join(entries)
  157. data_norm = " OR lem.norm LIKE ".join(list_normalize(entries))
  158. doubleddata_norm = " OR lem.norm LIKE ".join(list_normalize(inizialeraddoppiata(entries)))
  159. doubleddata = " OR lem.spec LIKE ".join(inizialeraddoppiata(entries))
  160. if raddoppiata == 1:
  161. theSimpleQuery = f"SELECT DISTINCT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (lem.spec LIKE {data}) OR (lem.norm LIKE {data_norm}) OR (lem.spec LIKE {doubleddata}) OR (lem.norm LIKE {doubleddata_norm}) ORDER BY lem.idlem"
  162. else:
  163. theSimpleQuery = f"SELECT DISTINCT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (lem.spec LIKE {data}) OR (lem.norm LIKE {data_norm}) ORDER BY lem.idlem"
  164. con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
  165. answer_table = pd.read_sql(theSimpleQuery, con)
  166. if answer_table.empty:
  167. print ("Nessun risultato")
  168. sys.exit(1)
  169. else:
  170. return answer_table
  171. # %% Ricerca per categorie grammaticali
  172. def ricercacatgr (entry, path):
  173. theSimpleQuery = f"SELECT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE cat = '{entry}' ORDER BY idlem"
  174. con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
  175. answer_table = pd.read_sql(theSimpleQuery, con)
  176. if answer_table.empty:
  177. print ("Nessun risultato")
  178. sys.exit(1)
  179. else:
  180. return answer_table