|
@@ -14,7 +14,7 @@ from test_occorrenzario_pandas import findcontexts
|
|
|
from test_occorrenzario_pandas import findbib
|
|
|
|
|
|
|
|
|
-def singlecontexts(textlist, index, parole, periodi, listOcc, path):
|
|
|
+def singlecontexts(textlist, index, parole, periodi, brani, listOcc, path):
|
|
|
context = textlist.iloc[index]
|
|
|
contexts = []
|
|
|
formats = []
|
|
@@ -23,17 +23,18 @@ def singlecontexts(textlist, index, parole, periodi, listOcc, path):
|
|
|
periodlocal = textlist.loc[index, "numperiod"]
|
|
|
ntxlocal = textlist.loc[index, "ntx"]
|
|
|
mappalocal = textlist.loc[index, "mappa"]
|
|
|
+ linkslocal = textlist.loc[index, "links"]
|
|
|
if parole != 0:
|
|
|
pointerlist = pd.DataFrame()
|
|
|
for table in listOcc:
|
|
|
query = f"SELECT tab.pitxt, tab.elemlen FROM {table} AS tab WHERE tab.ntx = {ntxlocal} AND tab.mappa <= {mappalocal+int(parole/2)} AND tab.mappa >= {mappalocal-int(parole/2)}"
|
|
|
queryresponse = pd.read_sql(query, con)
|
|
|
pointerlist = pd.concat([pointerlist, queryresponse])
|
|
|
- with open(f"../db/itxt/{sigla}", 'r', encoding="utf-32-le") as file1:
|
|
|
+ with open(f"{path}/db/itxt/{sigla}", 'r', encoding="utf-32-le") as file1:
|
|
|
file1.seek(4*pointerlist["pitxt"].min())
|
|
|
cont = file1.read(pointerlist["pitxt"].max()-pointerlist["pitxt"].min())
|
|
|
contexts.append(cont)
|
|
|
- with open(f"../db/ftxt/{sigla}", 'rb') as file1:
|
|
|
+ with open(f"{path}/db/ftxt/{sigla}", 'rb') as file1:
|
|
|
file1.seek(pointerlist["pitxt"].min()-1)
|
|
|
formBytes = file1.read(pointerlist["pitxt"].max()-pointerlist["pitxt"].min())
|
|
|
form = [byte for byte in formBytes]
|
|
@@ -43,12 +44,25 @@ def singlecontexts(textlist, index, parole, periodi, listOcc, path):
|
|
|
elif periodi != 0:
|
|
|
query = f"SELECT piniz, pfin FROM periodi WHERE ntx = {ntxlocal} AND numperiod <= {periodlocal+int(periodi/2)} AND numperiod >= {periodlocal-int(periodi/2)}"
|
|
|
queryresponse = pd.read_sql(query, con)
|
|
|
- with open(f"../db/itxt/{sigla}", 'r', encoding="utf-32-le") as file1:
|
|
|
+ with open(f"{path}/db/itxt/{sigla}", 'r', encoding="utf-32-le") as file1:
|
|
|
file1.seek(4*queryresponse["piniz"].min())
|
|
|
cont = file1.read(queryresponse["pfin"].max()-queryresponse["piniz"].min())
|
|
|
contexts.append(cont)
|
|
|
context ['piniz'] = queryresponse["piniz"].min()
|
|
|
context ['pfin'] = queryresponse["pfin"].max()
|
|
|
+ elif brani != 0:
|
|
|
+ if linkslocal == 0 or linkslocal == 1:
|
|
|
+ return "Nessun brano associato a questo contesto"
|
|
|
+ else:
|
|
|
+ numbranolocal = textlist.loc[index, "numbrano"]
|
|
|
+ query = f"SELECT piniz, pfin FROM linkbase WHERE {ntxlocal} = ntx AND tipo = 2 AND id BETWEEN {numbranolocal-int(brani/2)} AND {numbranolocal+int(brani/2)}"
|
|
|
+ queryresponse = pd.read_sql(query, con)
|
|
|
+ with open(f"{path}/db/itxt/{sigla}", 'r', encoding="utf-32-le") as file1:
|
|
|
+ file1.seek(4*queryresponse["piniz"].min())
|
|
|
+ cont = file1.read(queryresponse["pfin"].max()-queryresponse["piniz"].min())
|
|
|
+ contexts.append(cont)
|
|
|
+ context ['piniz'] = queryresponse["piniz"].min()
|
|
|
+ context ['pfin'] = queryresponse["pfin"].max()
|
|
|
context['contesto'] = contexts
|
|
|
context['formattazione'] = formats
|
|
|
return pd.DataFrame(context).T.set_index('index')
|
|
@@ -70,7 +84,7 @@ def findlinks (context, path):
|
|
|
query = f"SELECT ta.ntx, ta.id, ta.piniz, ta.pfin, tb.mappain, tb.mappafin FROM linkbase AS tb INNER JOIN linknoteass AS ta ON tb.ntx = ta.ntx AND tb.id = ta.id WHERE (((tb.tipo= 1) AND (tb.ntx = {ntxlocal})) AND ((tb.piniz BETWEEN {pinizlocal} AND {pfinlocal}) OR ({pitxtlocal} BETWEEN tb.piniz AND tb.pfin)))"
|
|
|
queryresponse = pd.read_sql(query, con)
|
|
|
print(queryresponse)
|
|
|
- with open(f"../db/itxt/{siglalocal}", 'r', encoding="utf-32-le") as file1:
|
|
|
+ with open(f"{path}/db/itxt/{siglalocal}", 'r', encoding="utf-32-le") as file1:
|
|
|
file1.seek(4*queryresponse["piniz"].min())
|
|
|
cont = file1.read(queryresponse["pfin"].max()-queryresponse["piniz"].min()-1)
|
|
|
context['nota'] = cont
|
|
@@ -80,7 +94,7 @@ def findlinks (context, path):
|
|
|
query = f"SELECT ta.ntx, ta.id, ta.piniz, ta.pfin, tb.mappain, tb.mappafin FROM linkbase AS tb INNER JOIN linknoteass AS ta ON tb.ntx = ta.ntx AND tb.id = ta.id WHERE (((tb.tipo= 2) AND (tb.ntx = {ntxlocal})) AND ((tb.piniz BETWEEN {pinizlocal} AND {pfinlocal}) OR ({pitxtlocal} BETWEEN tb.piniz AND tb.pfin)))"
|
|
|
queryresponse = pd.read_sql(query, con)
|
|
|
print(queryresponse)
|
|
|
- with open(f"../db/itxt/{siglalocal}", 'r', encoding="utf-32-le") as file1:
|
|
|
+ with open(f"{path}/db/itxt/{siglalocal}", 'r', encoding="utf-32-le") as file1:
|
|
|
file1.seek(4*queryresponse["piniz"].min())
|
|
|
cont = file1.read(queryresponse["pfin"].max()-queryresponse["piniz"].min()-1)
|
|
|
context['testo associato'] = cont
|
|
@@ -88,14 +102,14 @@ def findlinks (context, path):
|
|
|
query = f"SELECT ta.ntx, ta.id, ta.piniz, ta.pfin, tb.piniz AS iniz, tb.pfin AS fin, tb.mappain, tb.mappafin FROM linkbase AS tb INNER JOIN linknoteass AS ta ON tb.ntx = ta.ntx AND tb.id = ta.id WHERE (((tb.tipo= 1) AND (tb.ntx = {ntxlocal})) AND ((tb.piniz BETWEEN {pinizlocal} AND {pfinlocal}) OR ({pitxtlocal} BETWEEN tb.piniz AND tb.pfin)))"
|
|
|
queryresponse = pd.read_sql(query, con)
|
|
|
print(queryresponse)
|
|
|
- with open(f"../db/itxt/{siglalocal}", 'r', encoding="utf-32-le") as file1:
|
|
|
+ with open(f"{path}/db/itxt/{siglalocal}", 'r', encoding="utf-32-le") as file1:
|
|
|
file1.seek(4*queryresponse["piniz"].min())
|
|
|
cont = file1.read(queryresponse["pfin"].max()-queryresponse["piniz"].min()-1)
|
|
|
context['nota'] = cont
|
|
|
query2 = f"SELECT ta.ntx, ta.id, ta.piniz, ta.pfin, tb.piniz AS iniz, tb.pfin AS fin, tb.mappain, tb.mappafin FROM linkbase AS tb INNER JOIN linknoteass AS ta ON tb.ntx = ta.ntx AND tb.id = ta.id WHERE (((tb.tipo= 2) AND (tb.ntx = {ntxlocal})) AND ((tb.piniz BETWEEN {pinizlocal} AND {pfinlocal}) OR ({pitxtlocal} BETWEEN tb.piniz AND tb.pfin)))"
|
|
|
queryresponse2 = pd.read_sql(query2, con)
|
|
|
print (queryresponse2)
|
|
|
- with open(f"../db/itxt/{siglalocal}", 'r', encoding="utf-32-le") as file2:
|
|
|
+ with open(f"{path}/db/itxt/{siglalocal}", 'r', encoding="utf-32-le") as file2:
|
|
|
file2.seek(4*queryresponse2["piniz"].min())
|
|
|
cont2 = file2.read(queryresponse2["pfin"].max()-queryresponse2["piniz"].min()-1)
|
|
|
context['testo associato'] = cont2
|
|
@@ -106,7 +120,7 @@ def singlefindbib(contexts, path):
|
|
|
infobib = pd.DataFrame()
|
|
|
rif_org = pd.DataFrame()
|
|
|
for ind, row in contexts.iterrows():
|
|
|
- con = sqlite3.connect(f"file:{path}db/bibliografia/BiblioTLIO.db?mode=ro", uri=True)
|
|
|
+ con = sqlite3.connect(f"file:{path}/db/bibliografia/BiblioTLIO.db?mode=ro", uri=True)
|
|
|
Query = f"SELECT [Anno iniziale], [Titolo Abbreviato], IQ FROM datibib WHERE Sigla='{row['sigla']}'"
|
|
|
bib = pd.read_sql(Query, con)
|
|
|
infobib = pd.concat([infobib, bib])
|
|
@@ -126,28 +140,29 @@ def singlefindbib(contexts, path):
|
|
|
contexts['Rig_completo'] = rif2
|
|
|
contexts.pag = contexts.pag.astype(int)
|
|
|
chrono = contexts.sort_values(by=['Anno iniziale', 'Rif_organico', 'pag'])
|
|
|
- if 'nota' in chrono:
|
|
|
+ if 'nota' and 'testo associato' in chrono:
|
|
|
+ cols = ['links','Titolo Abbreviato', 'Rif_organico', 'tipostanza', 'stanza', 'verso', 'pag', 'riga', 'IQ', 'lemma', 'cat_gr', 'disambiguatore', 'contesto', 'nota', 'testo associato']
|
|
|
+ elif 'nota' in chrono:
|
|
|
cols = ['links','Titolo Abbreviato', 'Rif_organico', 'tipostanza', 'stanza', 'verso', 'pag', 'riga', 'IQ', 'lemma', 'cat_gr', 'disambiguatore', 'contesto', 'nota']
|
|
|
elif 'testo associato' in chrono:
|
|
|
cols = ['links','Titolo Abbreviato', 'Rif_organico', 'tipostanza', 'stanza', 'verso', 'pag', 'riga', 'IQ', 'lemma', 'cat_gr', 'disambiguatore', 'contesto', 'nota', 'testo associato']
|
|
|
- elif 'nota' and 'testo associato' in chrono:
|
|
|
- cols = ['links','Titolo Abbreviato', 'Rif_organico', 'tipostanza', 'stanza', 'verso', 'pag', 'riga', 'IQ', 'lemma', 'cat_gr', 'disambiguatore', 'contesto', 'nota', 'testo associato']
|
|
|
else:
|
|
|
cols = ['links','Titolo Abbreviato', 'Rif_organico', 'tipostanza', 'stanza', 'verso', 'pag', 'riga', 'IQ', 'lemma', 'cat_gr', 'disambiguatore', 'contesto']
|
|
|
clean_df = chrono[cols].reset_index()
|
|
|
return clean_df
|
|
|
|
|
|
|
|
|
-parola = "pes*"
|
|
|
+parola = "divelle"
|
|
|
+path = "/Users/leonardocanova/Library/CloudStorage/OneDrive-ConsiglioNazionaledelleRicerche/TIGRO/Ricerche/flask_be"
|
|
|
|
|
|
type= 0
|
|
|
listOcc = ["occ00001", "occ00002", "occ00003"]
|
|
|
-search=ricercaforme(interpreter(parola), "../", 0, 0)
|
|
|
-textlist=findtexts(type, search, listOcc, "../")
|
|
|
-contexts = findcontexts(textlist, 30, 0, listOcc, "../")
|
|
|
-context = singlecontexts(contexts, 0, 10, 0, listOcc, "../")
|
|
|
-links = findlinks(context,"../")
|
|
|
-bibliocontext = singlefindbib(links,"../")
|
|
|
+search=ricercaforme(interpreter(parola), path, 0, 0)
|
|
|
+textlist=findtexts(type, search, listOcc, path)
|
|
|
+contexts = findcontexts(textlist, 30, 0, listOcc, path)
|
|
|
+context = singlecontexts(contexts, 0, 0, 0, 4, listOcc, path)
|
|
|
+links = findlinks(context, path)
|
|
|
+bibliocontext = singlefindbib(links, path)
|
|
|
dtale.show(bibliocontext)
|
|
|
|
|
|
|