Просмотр исходного кода

refine su base testi associati

Leonardo Canova 2 лет назад
Родитель
Сommit
a1dde4f45a

+ 34 - 19
flask_be/engine/test/test_contesti_singoli.py

@@ -14,7 +14,7 @@ from test_occorrenzario_pandas import findcontexts
 from test_occorrenzario_pandas import findbib
 
 #funzione di ricerca dei contesti singoli, con personalizzazione dell'ampiezza per parole o periodi, ha in input findcontexts e deve essere passata a findbib.
-def singlecontexts(textlist, index, parole, periodi, listOcc, path):
+def singlecontexts(textlist, index, parole, periodi, brani, listOcc, path):
     context = textlist.iloc[index]
     contexts = []
     formats = []
@@ -23,17 +23,18 @@ def singlecontexts(textlist, index, parole, periodi, listOcc, path):
     periodlocal = textlist.loc[index, "numperiod"]
     ntxlocal = textlist.loc[index, "ntx"]
     mappalocal = textlist.loc[index, "mappa"]
+    linkslocal = textlist.loc[index, "links"]
     if parole != 0:
         pointerlist = pd.DataFrame()
         for table in listOcc:
             query = f"SELECT tab.pitxt, tab.elemlen FROM {table} AS tab WHERE tab.ntx = {ntxlocal} AND tab.mappa <= {mappalocal+int(parole/2)} AND tab.mappa >= {mappalocal-int(parole/2)}"
             queryresponse = pd.read_sql(query, con)
             pointerlist = pd.concat([pointerlist, queryresponse])
-        with open(f"../db/itxt/{sigla}", 'r', encoding="utf-32-le") as file1:
+        with open(f"{path}/db/itxt/{sigla}", 'r', encoding="utf-32-le") as file1:
             file1.seek(4*pointerlist["pitxt"].min())
             cont = file1.read(pointerlist["pitxt"].max()-pointerlist["pitxt"].min())
             contexts.append(cont)
-        with open(f"../db/ftxt/{sigla}", 'rb') as file1:
+        with open(f"{path}/db/ftxt/{sigla}", 'rb') as file1:
             file1.seek(pointerlist["pitxt"].min()-1)
             formBytes = file1.read(pointerlist["pitxt"].max()-pointerlist["pitxt"].min())
             form = [byte for byte in formBytes]
@@ -43,12 +44,25 @@ def singlecontexts(textlist, index, parole, periodi, listOcc, path):
     elif periodi != 0:
         query = f"SELECT piniz, pfin FROM periodi WHERE ntx = {ntxlocal} AND numperiod <= {periodlocal+int(periodi/2)} AND numperiod >= {periodlocal-int(periodi/2)}"
         queryresponse = pd.read_sql(query, con)
-        with open(f"../db/itxt/{sigla}", 'r', encoding="utf-32-le") as file1:
+        with open(f"{path}/db/itxt/{sigla}", 'r', encoding="utf-32-le") as file1:
             file1.seek(4*queryresponse["piniz"].min())
             cont = file1.read(queryresponse["pfin"].max()-queryresponse["piniz"].min())
             contexts.append(cont)
             context ['piniz'] = queryresponse["piniz"].min()
             context ['pfin'] = queryresponse["pfin"].max()
+    elif brani != 0:
+        if linkslocal == 0 or linkslocal == 1:
+            return "Nessun brano associato a questo contesto"
+        else:
+            numbranolocal = textlist.loc[index, "numbrano"]
+            query = f"SELECT piniz, pfin FROM linkbase WHERE {ntxlocal} = ntx AND tipo = 2 AND id BETWEEN {numbranolocal-int(brani/2)} AND {numbranolocal+int(brani/2)}"
+            queryresponse = pd.read_sql(query, con)
+            with open(f"{path}/db/itxt/{sigla}", 'r', encoding="utf-32-le") as file1:
+                file1.seek(4*queryresponse["piniz"].min())
+                cont = file1.read(queryresponse["pfin"].max()-queryresponse["piniz"].min())
+                contexts.append(cont)
+                context ['piniz'] = queryresponse["piniz"].min()
+                context ['pfin'] = queryresponse["pfin"].max() 
     context['contesto'] = contexts
     context['formattazione'] = formats
     return pd.DataFrame(context).T.set_index('index')
@@ -70,7 +84,7 @@ def findlinks (context, path):
         query = f"SELECT ta.ntx, ta.id, ta.piniz, ta.pfin, tb.mappain, tb.mappafin FROM linkbase AS tb INNER JOIN linknoteass AS ta ON tb.ntx = ta.ntx AND tb.id = ta.id WHERE (((tb.tipo= 1) AND (tb.ntx = {ntxlocal})) AND ((tb.piniz BETWEEN {pinizlocal} AND {pfinlocal}) OR ({pitxtlocal} BETWEEN tb.piniz AND tb.pfin)))"
         queryresponse = pd.read_sql(query, con)
         print(queryresponse)
-        with open(f"../db/itxt/{siglalocal}", 'r', encoding="utf-32-le") as file1:
+        with open(f"{path}/db/itxt/{siglalocal}", 'r', encoding="utf-32-le") as file1:
                 file1.seek(4*queryresponse["piniz"].min())
                 cont = file1.read(queryresponse["pfin"].max()-queryresponse["piniz"].min()-1)
         context['nota'] = cont
@@ -80,7 +94,7 @@ def findlinks (context, path):
         query = f"SELECT ta.ntx, ta.id, ta.piniz, ta.pfin, tb.mappain, tb.mappafin FROM linkbase AS tb INNER JOIN linknoteass AS ta ON tb.ntx = ta.ntx AND tb.id = ta.id WHERE (((tb.tipo= 2) AND (tb.ntx = {ntxlocal})) AND ((tb.piniz BETWEEN {pinizlocal} AND {pfinlocal}) OR ({pitxtlocal} BETWEEN tb.piniz AND tb.pfin)))"
         queryresponse = pd.read_sql(query, con)
         print(queryresponse)
-        with open(f"../db/itxt/{siglalocal}", 'r', encoding="utf-32-le") as file1:
+        with open(f"{path}/db/itxt/{siglalocal}", 'r', encoding="utf-32-le") as file1:
                 file1.seek(4*queryresponse["piniz"].min())
                 cont = file1.read(queryresponse["pfin"].max()-queryresponse["piniz"].min()-1)
         context['testo associato'] = cont
@@ -88,14 +102,14 @@ def findlinks (context, path):
         query = f"SELECT ta.ntx, ta.id, ta.piniz, ta.pfin, tb.piniz AS iniz, tb.pfin AS fin, tb.mappain, tb.mappafin FROM linkbase AS tb INNER JOIN linknoteass AS ta ON tb.ntx = ta.ntx AND tb.id = ta.id WHERE (((tb.tipo= 1) AND (tb.ntx = {ntxlocal})) AND ((tb.piniz BETWEEN {pinizlocal} AND {pfinlocal}) OR ({pitxtlocal} BETWEEN tb.piniz AND tb.pfin)))"
         queryresponse = pd.read_sql(query, con)
         print(queryresponse)
-        with open(f"../db/itxt/{siglalocal}", 'r', encoding="utf-32-le") as file1:
+        with open(f"{path}/db/itxt/{siglalocal}", 'r', encoding="utf-32-le") as file1:
                 file1.seek(4*queryresponse["piniz"].min())
                 cont = file1.read(queryresponse["pfin"].max()-queryresponse["piniz"].min()-1)
         context['nota'] = cont
         query2 = f"SELECT ta.ntx, ta.id, ta.piniz, ta.pfin, tb.piniz AS iniz, tb.pfin AS fin, tb.mappain, tb.mappafin FROM linkbase AS tb INNER JOIN linknoteass AS ta ON tb.ntx = ta.ntx AND tb.id = ta.id WHERE (((tb.tipo= 2) AND (tb.ntx = {ntxlocal})) AND ((tb.piniz BETWEEN {pinizlocal} AND {pfinlocal}) OR ({pitxtlocal} BETWEEN tb.piniz AND tb.pfin)))"
         queryresponse2 = pd.read_sql(query2, con)
         print (queryresponse2)
-        with open(f"../db/itxt/{siglalocal}", 'r', encoding="utf-32-le") as file2:
+        with open(f"{path}/db/itxt/{siglalocal}", 'r', encoding="utf-32-le") as file2:
                 file2.seek(4*queryresponse2["piniz"].min())
                 cont2 = file2.read(queryresponse2["pfin"].max()-queryresponse2["piniz"].min()-1)
         context['testo associato'] = cont2
@@ -106,7 +120,7 @@ def singlefindbib(contexts, path):
     infobib = pd.DataFrame()
     rif_org = pd.DataFrame()
     for ind, row in contexts.iterrows():
-        con = sqlite3.connect(f"file:{path}db/bibliografia/BiblioTLIO.db?mode=ro", uri=True)
+        con = sqlite3.connect(f"file:{path}/db/bibliografia/BiblioTLIO.db?mode=ro", uri=True)
         Query = f"SELECT [Anno iniziale], [Titolo Abbreviato], IQ FROM datibib WHERE Sigla='{row['sigla']}'"
         bib = pd.read_sql(Query, con)
         infobib = pd.concat([infobib, bib])
@@ -126,28 +140,29 @@ def singlefindbib(contexts, path):
     contexts['Rig_completo'] = rif2
     contexts.pag = contexts.pag.astype(int)
     chrono = contexts.sort_values(by=['Anno iniziale', 'Rif_organico', 'pag'])   
-    if 'nota' in chrono:
+    if 'nota' and 'testo associato' in chrono:
+        cols = ['links','Titolo Abbreviato', 'Rif_organico', 'tipostanza', 'stanza', 'verso', 'pag', 'riga', 'IQ', 'lemma', 'cat_gr', 'disambiguatore', 'contesto', 'nota', 'testo associato']
+    elif 'nota' in chrono:
         cols = ['links','Titolo Abbreviato', 'Rif_organico', 'tipostanza', 'stanza', 'verso', 'pag', 'riga', 'IQ', 'lemma', 'cat_gr', 'disambiguatore', 'contesto', 'nota']
     elif 'testo associato' in chrono:
         cols = ['links','Titolo Abbreviato', 'Rif_organico', 'tipostanza', 'stanza', 'verso', 'pag', 'riga', 'IQ', 'lemma', 'cat_gr', 'disambiguatore', 'contesto', 'nota', 'testo associato']
-    elif 'nota' and 'testo associato' in chrono:
-        cols = ['links','Titolo Abbreviato', 'Rif_organico', 'tipostanza', 'stanza', 'verso', 'pag', 'riga', 'IQ', 'lemma', 'cat_gr', 'disambiguatore', 'contesto', 'nota', 'testo associato']
     else:
         cols = ['links','Titolo Abbreviato', 'Rif_organico', 'tipostanza', 'stanza', 'verso', 'pag', 'riga', 'IQ', 'lemma', 'cat_gr', 'disambiguatore', 'contesto']
     clean_df = chrono[cols].reset_index()
     return clean_df
 
 # %%
-parola = "pes*"
+parola = "divelle"
+path = "/Users/leonardocanova/Library/CloudStorage/OneDrive-ConsiglioNazionaledelleRicerche/TIGRO/Ricerche/flask_be"
 # tipo di ricerca (0 per forme, 1 per lemmi, 2 per lemmi con opzione "mostra occorrenze non lemmatizzate")
 type= 0
 listOcc = ["occ00001", "occ00002", "occ00003"]
-search=ricercaforme(interpreter(parola), "../", 0, 0)
-textlist=findtexts(type, search, listOcc, "../")
-contexts = findcontexts(textlist, 30, 0, listOcc, "../")
-context = singlecontexts(contexts, 0, 10, 0, listOcc, "../")
-links = findlinks(context,"../")
-bibliocontext = singlefindbib(links,"../")
+search=ricercaforme(interpreter(parola), path, 0, 0)
+textlist=findtexts(type, search, listOcc, path)
+contexts = findcontexts(textlist, 30, 0, listOcc, path)
+context = singlecontexts(contexts, 0, 0, 0, 4, listOcc, path)
+links = findlinks(context, path)
+bibliocontext = singlefindbib(links, path)
 dtale.show(bibliocontext)
 # %%
 #print(context)

+ 3 - 3
flask_be/engine/test/test_occorrenzario_pandas.py

@@ -19,11 +19,11 @@ def findtexts(type, df, listOcc, path):
     for table in listOcc:
         strlist = ",".join(str(c) for c in codlist)
         if type == 0:
-            Query = f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.cod IN ({strlist})"
+            Query = f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.cod IN ({strlist})"
             extendequeryReponse = pd.read_sql(Query, con)
             textlist = pd.concat([textlist, extendequeryReponse])
         elif type == 1:
-            Query = f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.indlem IN ({strlist})"
+            Query = f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.indlem IN ({strlist})"
             extendequeryReponse = pd.read_sql(Query, con)
             textlist = pd.concat([textlist, extendequeryReponse])
         elif type == 2:
@@ -31,7 +31,7 @@ def findtexts(type, df, listOcc, path):
             subdf = pd.read_sql(subquery, con)
             formcodlist = list(subdf["forma"])
             strform = ",".join(str(c) for c in formcodlist)
-            Query = f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.indlem IN ({strlist}) OR (tab.indlem = 0 AND tab.cod IN ({strform}))"
+            Query = f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.indlem IN ({strlist}) OR (tab.indlem = 0 AND tab.cod IN ({strform}))"
             extendequeryReponse = pd.read_sql(Query, con)
             textlist = pd.concat([textlist, extendequeryReponse])
     return textlist