test_decoder.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498
  1. # %%
  2. import csv
  3. import sqlite3
  4. import pandas as pd
  5. import dtale
  6. import unicodedata
  7. import sys
  8. import random
  9. # %%
  10. # encoding/decoding functions for db
  11. # %% funzione decodifica
  12. def db_decode(string):
  13. res = ""
  14. with open("vettSpec.csv", 'r') as file1:
  15. reader = csv.DictReader(file1)
  16. vettSpec = [row for row in reader]
  17. for c in string:
  18. char_code = int.from_bytes(c.encode("utf-32-le"), 'little', signed=False)
  19. found = next((el['unicode'] for el in vettSpec if int(el['intcode']) == char_code), "")
  20. res += chr(int(found, 16))
  21. return res
  22. #%% funzione codifica
  23. def db_encode(string):
  24. encoded_string = ""
  25. with open("vettSpec.csv", 'r') as file1:
  26. reader = csv.DictReader(file1)
  27. vettSpec = [row for row in reader]
  28. for char in string:
  29. post = next((el['intcode'] for el in vettSpec if el['unicode'] == char.encode("utf-32-be").hex().lstrip('0').upper()), None)
  30. byteini = int.to_bytes(int(post), 4, byteorder='little')
  31. encoded_string += byteini.decode('utf-32-le')
  32. return encoded_string
  33. #%% funzione combinazioni (gestione <>)
  34. def combinations(s):
  35. result = []
  36. start = s.find("˴")
  37. end = s.find("˶")
  38. if start == -1 or end == -1:
  39. return [s]
  40. items = s[start + 1:end].split("Ñ")
  41. for item in items:
  42. result.extend([s[:start] + item + rest for rest in combinations(s[end + 1:])])
  43. return result
  44. #%% funzione interprete (chiama le funzioni precedenti e sostituisce i caratteri jolly non standard)
  45. def interpreter (data):
  46. encoded_data = db_encode(data)
  47. clean_data= "'"+encoded_data.replace("˯", "%").replace("Ò", "_")+"'"
  48. return combinations(clean_data)
  49. #%% raddoppia l'iniziale della stringa in ingresso (se non è un carattere jolly)
  50. def inizialeraddoppiata (data):
  51. doubleddata=[]
  52. for el in data:
  53. if el[1] != "%" and "_":
  54. doubleddata = doubleddata + ["'"+ el[1] + el[1:]]
  55. return doubleddata
  56. #%% normalizza la stringa di testo togliendo i diacritici
  57. def str_normalize(string):
  58. normalized_string = ""
  59. with open("atab.csv", 'r') as file1:
  60. reader = csv.DictReader(file1)
  61. atab = [row for row in reader]
  62. for c in string:
  63. char_code = int.from_bytes(c.encode("utf-32-le"), 'little', signed=False)
  64. found = next((el['norm'] for el in atab if int(el['spec']) == char_code), None)
  65. pre = found if found else ""
  66. byteini = int.to_bytes(int(pre), 4, byteorder='little')
  67. normalized_string += byteini.decode('utf-32-le')
  68. return normalized_string
  69. #%% applica la normalizzazione a una lista di stringhe
  70. def list_normalize(lista):
  71. return ["'" + str_normalize(stringa.strip("'")) + "'" for stringa in lista]
  72. def db_results_decode (df):
  73. columns = ['forma', 'lemma', 'cat_gr', 'disambiguatore']
  74. for col in df.columns:
  75. if col in columns or col.startswith('highlight'):
  76. df[col] = df[col].apply(db_decode)
  77. #df = df.applymap(lambda x: db_decode(x) if type(x) == str else x)
  78. return df
  79. #%% funzione lista tabelle occorrenziario
  80. def get_tables_occ(path):
  81. conn = sqlite3.connect(f"file:{path}/corpus.db?mode=ro", uri=True)
  82. cursor = conn.cursor()
  83. cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
  84. table_names = cursor.fetchall()
  85. occ_tables = [table[0] for table in table_names if table[0].startswith('Occ')]
  86. cursor.close()
  87. conn.close()
  88. return occ_tables
  89. #funzione contatore risultati
  90. def counter (results):
  91. if not results.empty:
  92. trovati= len(results.index)
  93. occorrenze= results['occ'].sum()
  94. return ("Trovati=" + str(trovati) + " Occorrenze=" + str(occorrenze))
  95. #%% funzioni test
  96. def ricercaforme (entries, path, espansa, raddoppiata):
  97. if espansa == 0:
  98. data=" OR spec LIKE ".join(entries)
  99. doubleddata=" OR spec LIKE ".join(inizialeraddoppiata(entries))
  100. if raddoppiata == 1:
  101. theSimpleQuery = f"SELECT spec AS forma, nocc AS occ, cod FROM form WHERE spec LIKE {data} OR spec LIKE {doubleddata} ORDER BY idfor"
  102. else:
  103. theSimpleQuery = f"SELECT spec AS forma, nocc AS occ, cod FROM form WHERE spec LIKE {data} ORDER BY idfor"
  104. con = sqlite3.connect(f"file:{path}/corpus.db?mode=ro", uri=True)
  105. answer_table = pd.read_sql(theSimpleQuery, con)
  106. if answer_table.empty:
  107. print ("Nessun risultato")
  108. sys.exit(1)
  109. else:
  110. return answer_table
  111. else:
  112. data=" OR spec LIKE ".join(entries)
  113. data_norm=" OR norm LIKE ".join(list_normalize(entries))
  114. doubleddata_norm=" OR norm LIKE ".join(list_normalize(inizialeraddoppiata(entries)))
  115. doubleddata=" OR spec LIKE ".join(inizialeraddoppiata(entries))
  116. if raddoppiata == 1:
  117. theSimpleQuery = f"SELECT DISTINCT spec AS forma, nocc AS occ, cod FROM form WHERE (spec LIKE {data}) OR (norm LIKE {data_norm}) OR (spec LIKE {doubleddata}) OR (norm LIKE {doubleddata_norm}) ORDER BY idfor"
  118. else:
  119. theSimpleQuery = f"SELECT DISTINCT spec AS forma, nocc AS occ, cod FROM form WHERE (spec LIKE {data}) OR (norm LIKE {data_norm}) ORDER BY idfor"
  120. con = sqlite3.connect(f"file:{path}/corpus.db?mode=ro", uri=True)
  121. answer_table = pd.read_sql(theSimpleQuery, con)
  122. if answer_table.empty:
  123. print ("Nessun risultato")
  124. sys.exit(1)
  125. else:
  126. return answer_table
  127. def findtexts(type, df, listOcc, path, index=None):
  128. if index is None:
  129. df = pd.DataFrame(df)
  130. else:
  131. if isinstance(index, range):
  132. index = list(index)
  133. elif not isinstance(index, list):
  134. index = [index]
  135. df = pd.DataFrame(df.loc[index])
  136. textlist = pd.DataFrame()
  137. codlist = list(df["cod"])
  138. con = sqlite3.connect(f"file:{path}/corpus.db?mode=ro", uri=True)
  139. for table in listOcc:
  140. strlist = ",".join(str(c) for c in codlist)
  141. if type == 0:
  142. Query = f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.cod IN ({strlist})"
  143. extendequeryReponse = pd.read_sql(Query, con)
  144. textlist = pd.concat([textlist, extendequeryReponse])
  145. elif type == 1:
  146. Query = f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.indlem IN ({strlist})"
  147. extendequeryReponse = pd.read_sql(Query, con)
  148. textlist = pd.concat([textlist, extendequeryReponse])
  149. elif type == 2:
  150. subquery = f"SELECT DISTINCT lemma, forma FROM pfl WHERE lemma IN ({strlist})"
  151. subdf = pd.read_sql(subquery, con)
  152. formcodlist = list(subdf["forma"])
  153. strform = ",".join(str(c) for c in formcodlist)
  154. Query = f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.indlem IN ({strlist}) OR (tab.indlem = 0 AND tab.cod IN ({strform}))"
  155. extendequeryReponse = pd.read_sql(Query, con)
  156. textlist = pd.concat([textlist, extendequeryReponse])
  157. return textlist
  158. def findcontexts(textlist, listOcc, path):
  159. contexts = []
  160. ampiezzacontesto = 31
  161. con = sqlite3.connect(f"file:{path}/corpus.db?mode=ro", uri=True)
  162. for ind, row in textlist.iterrows():
  163. pitxtLocal = row["pitxt"]
  164. sigla = row["sigla"]
  165. periodlocal = row["numperiod"]
  166. ntxlocal = row["ntx"]
  167. mappalocal = row["mappa"]
  168. pointerlist = pd.DataFrame()
  169. for table in listOcc:
  170. query = f"SELECT tab.pitxt, tab.elemlen FROM {table} AS tab WHERE tab.ntx = {ntxlocal} AND tab.mappa <= {mappalocal+int(ampiezzacontesto/2)} AND tab.mappa >= {mappalocal-int(ampiezzacontesto/2)}"
  171. queryresponse = pd.read_sql(query, con)
  172. pointerlist = pd.concat([pointerlist, queryresponse])
  173. with open(f"{path}/itxt/{sigla}", 'r', encoding="utf-32-le") as file1:
  174. file1.seek(4*pointerlist["pitxt"].min())
  175. cont = file1.read(pointerlist["pitxt"].max()-pointerlist["pitxt"].min())
  176. contexts.append(cont)
  177. textlist['contesto'] = contexts
  178. return (textlist.reset_index())
  179. def findbib(contexts, path):
  180. infobib = pd.DataFrame()
  181. rif_org = pd.DataFrame()
  182. for ind, row in contexts.iterrows():
  183. con = sqlite3.connect(f"file:{path}/bibliografia/BiblioTLIO.db", uri=True)
  184. Query = f"SELECT [Anno iniziale], [Anno finale], [Data codificata], [Titolo Abbreviato], [Autore], [Titolo], [Curatore], [Data descrittiva], [Area generica], [Area specifica], [Genere], [Forma], [Tipo], IQ FROM datibib WHERE Sigla='{row['sigla']}'"
  185. bib = pd.read_sql(Query, con)
  186. infobib = pd.concat([infobib, bib])
  187. con2 = sqlite3.connect(f"file:{path}/corpus.db?mode=ro", uri=True)
  188. Query2 = f"SELECT head AS Rif_organico, full AS Rif_completo FROM org WHERE (indice='{row['numorg']}' AND ntx='{row['ntx']}')"
  189. rif = pd.read_sql(Query2, con2)
  190. rif_org = pd.concat([rif_org, rif])
  191. annoiniz = list(infobib['Anno iniziale'])
  192. annofin = list(infobib['Anno finale'])
  193. datacod = list(infobib['Data codificata'])
  194. datadesc = list(infobib['Data descrittiva'])
  195. titoloabb = list(infobib['Titolo Abbreviato'])
  196. autore = list(infobib['Autore'])
  197. titolo = list(infobib['Titolo'])
  198. curatore = list(infobib['Curatore'])
  199. areagen = list(infobib['Area generica'])
  200. areaspec = list(infobib['Area specifica'])
  201. genere = list(infobib['Genere'])
  202. forma = list(infobib['Forma'])
  203. tipo = list(infobib['Tipo'])
  204. iq = list(infobib['IQ'])
  205. rif1 = list(rif_org['Rif_organico'])
  206. rif2 = list(rif_org['Rif_completo'])
  207. contexts['Anno iniziale'] = annoiniz
  208. contexts['Anno finale'] = annofin
  209. contexts['Data codificata'] = datacod
  210. contexts['Data descrittiva'] = datadesc
  211. contexts['Autore'] = autore
  212. contexts['Titolo Abbreviato'] = titoloabb
  213. contexts['Titolo'] = titolo
  214. contexts['Curatore'] = curatore
  215. contexts['Area generica'] = areagen
  216. contexts['Area specifica'] = areaspec
  217. contexts['Genere'] = genere
  218. contexts['Forma'] = forma
  219. contexts['Tipo'] = tipo
  220. contexts ['IQ'] = iq
  221. contexts['Rif_organico'] = rif1
  222. contexts['Rig_completo'] = rif2
  223. contexts.pag = contexts.pag.astype(int)
  224. chrono = contexts.sort_values(by=['Anno iniziale', 'Rif_organico', 'pag'])
  225. cols = ['links','Titolo Abbreviato', 'Rif_organico', 'tipostanza', 'stanza', 'verso', 'pag', 'riga', 'IQ', 'lemma', 'cat_gr', 'disambiguatore', 'contesto', 'Autore', 'Titolo', 'Anno iniziale', 'Anno finale', 'Data codificata', 'Data descrittiva', 'Area generica', 'Area specifica', 'Genere', 'Forma', 'Tipo', 'Curatore', 'cod', 'ntx', 'pitxt', 'elemlen', 'mappa', 'numperiod', 'numorg', 'sigla', 'vol', 'col', 'numbrano', 'Rig_completo']
  226. clean_df = chrono[cols].reset_index()
  227. return clean_df
  228. def highlight (bibliocontexts, path):
  229. index = 0
  230. con = sqlite3.connect(f"file:{path}/corpus.db?mode=ro", uri=True)
  231. for col in bibliocontexts.columns:
  232. forme = []
  233. if col.startswith('cod'):
  234. for ind, row in bibliocontexts.iterrows():
  235. Query = f"SELECT spec FROM form WHERE cod={row[col]}"
  236. query_answer = pd.read_sql(Query, con)
  237. forme += list(query_answer['spec'])
  238. if index == 0:
  239. bibliocontexts['highlight'] = forme
  240. else:
  241. bibliocontexts['highlight'+str(index)] = forme
  242. index += 1
  243. return bibliocontexts
  244. def contestimultipli (tipo_ricerca, ricerca, listOcc, path):
  245. textlist = findtexts(tipo_ricerca, ricerca, listOcc, path)
  246. contexts = findcontexts (textlist,listOcc, path)
  247. bibliocontexts = findbib (contexts, path)
  248. highlights = highlight(bibliocontexts, path)
  249. return highlights
  250. def singlecontexts(textlist, index, parole, periodi, brani, listOcc, path):
  251. context = textlist.iloc[index]
  252. contexts = []
  253. formats = []
  254. con = sqlite3.connect(f"file:{path}/corpus.db?mode=ro", uri=True)
  255. sigla = textlist.loc[index, "sigla"]
  256. periodlocal = textlist.loc[index, "numperiod"]
  257. ntxlocal = textlist.loc[index, "ntx"]
  258. mappalocal = textlist.loc[index, "mappa"]
  259. linkslocal = textlist.loc[index, "links"]
  260. if parole != 0:
  261. pointerlist = pd.DataFrame()
  262. for table in listOcc:
  263. query = f"SELECT tab.pitxt, tab.elemlen FROM {table} AS tab WHERE tab.ntx = {ntxlocal} AND tab.mappa <= {mappalocal+int(parole/2)} AND tab.mappa >= {mappalocal-int(parole/2)}"
  264. queryresponse = pd.read_sql(query, con)
  265. pointerlist = pd.concat([pointerlist, queryresponse])
  266. with open(f"{path}/itxt/{sigla}", 'r', encoding="utf-32-le") as file1:
  267. file1.seek(4*pointerlist["pitxt"].min())
  268. cont = file1.read(pointerlist["pitxt"].max()-pointerlist["pitxt"].min())
  269. contexts.append(cont)
  270. with open(f"{path}/ftxt/{sigla}", 'rb') as file1:
  271. file1.seek(pointerlist["pitxt"].min()-1)
  272. formBytes = file1.read(pointerlist["pitxt"].max()-pointerlist["pitxt"].min())
  273. form = [byte for byte in formBytes]
  274. formats.append(form)
  275. context ['piniz'] = pointerlist["pitxt"].min()
  276. context ['pfin'] = pointerlist["pitxt"].max()
  277. elif periodi != 0:
  278. query = f"SELECT piniz, pfin FROM periodi WHERE ntx = {ntxlocal} AND numperiod <= {periodlocal+int(periodi/2)} AND numperiod >= {periodlocal-int(periodi/2)}"
  279. queryresponse = pd.read_sql(query, con)
  280. with open(f"{path}/itxt/{sigla}", 'r', encoding="utf-32-le") as file1:
  281. file1.seek(4*queryresponse["piniz"].min())
  282. cont = file1.read(queryresponse["pfin"].max()-queryresponse["piniz"].min())
  283. contexts.append(cont)
  284. context ['piniz'] = queryresponse["piniz"].min()
  285. context ['pfin'] = queryresponse["pfin"].max()
  286. elif brani != 0:
  287. if linkslocal == 0 or linkslocal == 1:
  288. return "Nessun brano associato a questo contesto"
  289. else:
  290. numbranolocal = textlist.loc[index, "numbrano"]
  291. query = f"SELECT piniz, pfin FROM linkbase WHERE {ntxlocal} = ntx AND tipo = 2 AND id BETWEEN {numbranolocal-int(brani/2)} AND {numbranolocal+int(brani/2)}"
  292. queryresponse = pd.read_sql(query, con)
  293. with open(f"{path}/itxt/{sigla}", 'r', encoding="utf-32-le") as file1:
  294. file1.seek(4*queryresponse["piniz"].min())
  295. cont = file1.read(queryresponse["pfin"].max()-queryresponse["piniz"].min())
  296. contexts.append(cont)
  297. context ['piniz'] = queryresponse["piniz"].min()
  298. context ['pfin'] = queryresponse["pfin"].max()
  299. context['contesto'] = contexts [0]
  300. context['formattazione'] = formats
  301. return pd.DataFrame(context).T.reset_index(drop=True)
  302. #%% funzione di ricerca dei brani associati. Ha in input singlecontexts.
  303. def findlinks (context, path):
  304. con = sqlite3.connect(f"file:{path}/corpus.db?mode=ro", uri=True)
  305. linkslocal = context.loc[0, "links"]
  306. siglalocal = context.loc[0, "sigla"]
  307. ntxlocal = context.loc[0, "ntx"]
  308. pitxtlocal = context.loc[0, "pitxt"]
  309. pinizlocal = context.loc[0, "piniz"]
  310. pfinlocal = context.loc[0, "pfin"]
  311. if linkslocal == 0:
  312. return context
  313. if linkslocal == 1:
  314. query = f"SELECT ta.ntx, ta.id, ta.piniz, ta.pfin, tb.mappain, tb.mappafin FROM linkbase AS tb INNER JOIN linknoteass AS ta ON tb.ntx = ta.ntx AND tb.id = ta.id WHERE (((tb.tipo= 1) AND (tb.ntx = {ntxlocal})) AND ((tb.piniz BETWEEN {pinizlocal} AND {pfinlocal}) OR ({pitxtlocal} BETWEEN tb.piniz AND tb.pfin)))"
  315. queryresponse = pd.read_sql(query, con)
  316. with open(f"{path}/itxt/{siglalocal}", 'r', encoding="utf-32-le") as file1:
  317. file1.seek(4*queryresponse["piniz"].min())
  318. cont = file1.read(queryresponse["pfin"].max()-queryresponse["piniz"].min()-2)
  319. context['nota'] = cont
  320. return context
  321. if linkslocal == 2:
  322. query = f"SELECT ta.ntx, ta.id, ta.piniz, ta.pfin, tb.mappain, tb.mappafin FROM linkbase AS tb INNER JOIN linknoteass AS ta ON tb.ntx = ta.ntx AND tb.id = ta.id WHERE (((tb.tipo= 2) AND (tb.ntx = {ntxlocal})) AND ((tb.piniz BETWEEN {pinizlocal} AND {pfinlocal}) OR ({pitxtlocal} BETWEEN tb.piniz AND tb.pfin)))"
  323. queryresponse = pd.read_sql(query, con)
  324. with open(f"{path}/itxt/{siglalocal}", 'r', encoding="utf-32-le") as file1:
  325. file1.seek(4*queryresponse["piniz"].min())
  326. cont = file1.read(queryresponse["pfin"].max()-queryresponse["piniz"].min()-2)
  327. context['testo associato'] = cont
  328. if linkslocal == 3:
  329. query = f"SELECT ta.ntx, ta.id, ta.piniz, ta.pfin, tb.piniz AS iniz, tb.pfin AS fin, tb.mappain, tb.mappafin FROM linkbase AS tb INNER JOIN linknoteass AS ta ON tb.ntx = ta.ntx AND tb.id = ta.id WHERE (((tb.tipo= 1) AND (tb.ntx = {ntxlocal})) AND ((tb.piniz BETWEEN {pinizlocal} AND {pfinlocal}) OR ({pitxtlocal} BETWEEN tb.piniz AND tb.pfin)))"
  330. queryresponse = pd.read_sql(query, con)
  331. with open(f"{path}/itxt/{siglalocal}", 'r', encoding="utf-32-le") as file1:
  332. file1.seek(4*queryresponse["piniz"].min())
  333. cont = file1.read(queryresponse["pfin"].max()-queryresponse["piniz"].min()-2)
  334. context['nota'] = cont
  335. query2 = f"SELECT ta.ntx, ta.id, ta.piniz, ta.pfin, tb.piniz AS iniz, tb.pfin AS fin, tb.mappain, tb.mappafin FROM linkbase AS tb INNER JOIN linknoteass AS ta ON tb.ntx = ta.ntx AND tb.id = ta.id WHERE (((tb.tipo= 2) AND (tb.ntx = {ntxlocal})) AND ((tb.piniz BETWEEN {pinizlocal} AND {pfinlocal}) OR ({pitxtlocal} BETWEEN tb.piniz AND tb.pfin)))"
  336. queryresponse2 = pd.read_sql(query2, con)
  337. with open(f"{path}/itxt/{siglalocal}", 'r', encoding="utf-32-le") as file2:
  338. file2.seek(4*queryresponse2["piniz"].min())
  339. cont2 = file2.read(queryresponse2["pfin"].max()-queryresponse2["piniz"].min()-2)
  340. context['testo associato'] = cont2
  341. return context
  342. #%% Ha in input links, associa i riferimenti bibliografici ad ogni contesto.
  343. def singlefindbib(contexts, path):
  344. infobib = pd.DataFrame()
  345. rif_org = pd.DataFrame()
  346. for ind, row in contexts.iterrows():
  347. con = sqlite3.connect(f"file:{path}/bibliografia/BiblioTLIO.db?mode=ro", uri=True)
  348. Query = f"SELECT [Anno iniziale], [Titolo Abbreviato], IQ FROM datibib WHERE Sigla='{row['sigla']}'"
  349. bib = pd.read_sql(Query, con)
  350. infobib = pd.concat([infobib, bib])
  351. con2 = sqlite3.connect(f"file:{path}/corpus.db?mode=ro", uri=True)
  352. Query2 = f"SELECT head AS Rif_organico, full AS Rif_completo FROM org WHERE (indice='{row['numorg']}' AND ntx='{row['ntx']}')"
  353. rif = pd.read_sql(Query2, con2)
  354. rif_org = pd.concat([rif_org, rif])
  355. anno = list(infobib['Anno iniziale'])
  356. titolo = list(infobib['Titolo Abbreviato'])
  357. iq = list(infobib['IQ'])
  358. rif1 = list(rif_org['Rif_organico'])
  359. rif2 = list(rif_org['Rif_completo'])
  360. contexts['Anno iniziale'] = anno
  361. contexts['Titolo Abbreviato'] = titolo
  362. contexts ['IQ'] = iq
  363. contexts['Rif_organico'] = rif1
  364. contexts['Rig_completo'] = rif2
  365. contexts.pag = contexts.pag.astype(int)
  366. chrono = contexts.sort_values(by=['Anno iniziale', 'Rif_organico', 'pag'])
  367. if 'nota' in chrono.columns and 'testo associato' in chrono.columns:
  368. cols = ['links','Titolo Abbreviato', 'Rif_organico', 'tipostanza', 'stanza', 'verso', 'pag', 'riga', 'IQ', 'lemma', 'cat_gr', 'disambiguatore', 'contesto', 'nota', 'testo associato', 'highlight']
  369. elif 'nota' in chrono.columns:
  370. cols = ['links','Titolo Abbreviato', 'Rif_organico', 'tipostanza', 'stanza', 'verso', 'pag', 'riga', 'IQ', 'lemma', 'cat_gr', 'disambiguatore', 'contesto', 'nota', 'highlight']
  371. elif 'testo associato' in chrono.columns:
  372. cols = ['links','Titolo Abbreviato', 'Rif_organico', 'tipostanza', 'stanza', 'verso', 'pag', 'riga', 'IQ', 'lemma', 'cat_gr', 'disambiguatore', 'contesto', 'testo associato', 'highlight']
  373. else:
  374. cols = ['links','Titolo Abbreviato', 'Rif_organico', 'tipostanza', 'stanza', 'verso', 'pag', 'riga', 'IQ', 'lemma', 'cat_gr', 'disambiguatore', 'contesto', 'highlight']
  375. chrono = chrono.drop(chrono.columns[0], axis=1)
  376. clean_df = chrono.reindex(columns=cols + list(chrono.columns.difference(cols)))
  377. return clean_df
  378. # %% funzione contesti singoli
  379. def contestosingolo (contestimultipli, indice, parole, periodi, brani, listOcc, path):
  380. contestosingolo = singlecontexts(contestimultipli, indice, parole, periodi, brani, listOcc, path)
  381. braniassociati = findlinks(contestosingolo, path)
  382. contestosingoloclean = singlefindbib (braniassociati, path)
  383. return contestosingoloclean
  384. #%% funzioni decodifica contesti
  385. def GeneraKey (sigla=None):
  386. # Genera i parametri di cifratura di un testo
  387. valore = [0, 0, 0]
  388. origine = None
  389. global LengKey, KeyBody
  390. con = sqlite3.connect(f"file:{path}/corpus.db?mode=ro", uri=True)
  391. query = f"SELECT notea,noteb,notec FROM Intbib WHERE lower(sigla)='{sigla}'"
  392. query_answer = pd.read_sql(query, con)
  393. valore[0] = int(query_answer['notea'][0])
  394. valore[1] = int(query_answer['noteb'][0])
  395. valore[2] = int(query_answer['notec'][0])
  396. print (valore)
  397. dato = valore[0] + valore[1]
  398. LengKey = 50 + (valore[2] % 300)
  399. if (LengKey // 2) * 2 != LengKey:
  400. LengKey = LengKey + 1
  401. KeyBody = [0] * LengKey
  402. iniziale = dato % (valore[2] + 123)
  403. random.seed(iniziale)
  404. for k in range(LengKey):
  405. KeyBody[k] = int(1000 * random.random())
  406. path = "/Users/leonardocanova/Library/CloudStorage/OneDrive-ConsiglioNazionaledelleRicerche/TIGRO/Ricerche/db/ndg2.gat4"
  407. GeneraKey("d17")
  408. #%% funzione cifratura/decifratura
  409. def CifraS(df, verso):
  410. # Cifra o decifra una stringa
  411. # StartInFile = posizione, all'interno del testo cifrato, del primo carattere della stringa da cifrare (base 1)
  412. # stringa = stringa da cifrare o decifrare
  413. # verso = 1 => cifra; 2 => decifra
  414. # FirstInStringa = indice del primo byte considerato all'interno del vettore da decifrare
  415. # LastInStringa = indice dell'ultimo byte considerato all'interno del vettore da decifrare
  416. # PosizInKey = indice del primo carattere da utilizzare all'interno della chiave fissa
  417. global LengKey, KeyBody
  418. print (KeyBody)
  419. print(LengKey)
  420. StartInFile = df['piniz'][0]
  421. stringa = df['contesto'] [0]
  422. print(StartInFile, stringa)
  423. NumCharsToConv = 0
  424. LastInStringa = 0
  425. LengKey2 = LengKey // 2
  426. while LastInStringa < len(stringa):
  427. StartInFile = StartInFile + NumCharsToConv
  428. NumKeyAppl = 1 + (StartInFile - 1) // LengKey2
  429. FirstInStringa = LastInStringa + 1
  430. PosizInKey = StartInFile % LengKey2
  431. if PosizInKey == 0:
  432. PosizInKey = LengKey2
  433. MaxCharsToConv = LengKey - PosizInKey + 1
  434. if MaxCharsToConv > LengKey2 - PosizInKey + 1:
  435. MaxCharsToConv = LengKey2 - PosizInKey + 1
  436. NumCharsToConv = len(stringa) - FirstInStringa + 1
  437. if NumCharsToConv > MaxCharsToConv:
  438. NumCharsToConv = MaxCharsToConv
  439. LastInStringa = FirstInStringa + NumCharsToConv - 1
  440. for k in range(FirstInStringa - 1, LastInStringa):
  441. if verso == 1:
  442. stringa = stringa[:k] + chr(ord(stringa[k]) + KeyBody[PosizInKey-1]) + stringa[k+1:]
  443. else:
  444. stringa = stringa[:k] + chr(ord(stringa[k]) - KeyBody[PosizInKey-1]) + stringa[k+1:]
  445. print("Stringa: "+stringa)
  446. PosizInKey += 1
  447. return stringa
  448. #%% provine
  449. testdec = 'ӗЫГҨғϩҘМϩӲѢҨҘ'
  450. testdec2 = 'ӻ'
  451. testenc = "recomandaţion"
  452. testenc2 = "çà"
  453. testnorm = "fòra"
  454. path = "/Users/leonardocanova/Library/CloudStorage/OneDrive-ConsiglioNazionaledelleRicerche/TIGRO/Ricerche/db/ndg2.gat4"
  455. listOcc = get_tables_occ(path)
  456. ricerca = ricercaforme(interpreter(testenc), path, 1, 0)
  457. #dtale.show(db_results_decode(ricerca))
  458. contesti_multipli = contestimultipli(0, ricerca, listOcc, path)
  459. contesto_singolo = contestosingolo(contesti_multipli, 0, 31, 0, 0, listOcc, path)
  460. #dtale.show(db_results_decode(contesto_singolo))
  461. #CifraS(contesto_singolo, 2)
  462. # %%