|
@@ -1,87 +1,111 @@
|
|
|
import json
|
|
|
-import pandas as pd
|
|
|
+import polars as pl
|
|
|
|
|
|
from .basic_queries import basicQueries
|
|
|
from .utilities.format import formatAllContexts, formatContext
|
|
|
|
|
|
|
|
|
# Executes query sequences to recover single and multiple contexts
|
|
|
-# Returns Pandas dataframes
|
|
|
+# Returns data in a dictionary format
|
|
|
class contexts(basicQueries):
|
|
|
-
|
|
|
+
|
|
|
def __init__(self, dataConfig):
|
|
|
super().__init__(dataConfig)
|
|
|
|
|
|
- #%% funzione contesti multipli cumulativa
|
|
|
- # Potrebbe essere unita alle cooccorrenze?
|
|
|
- def contestimultipli (self, tipo_ricerca, ricerca, index = None):
|
|
|
- ricercadf = pd.DataFrame(ricerca)
|
|
|
+ # %% funzione contesti multipli cumulativa
|
|
|
+ def contestimultipli(self, tipo_ricerca, ricerca, index=None):
|
|
|
+ ricercadf = pl.DataFrame(ricerca)
|
|
|
textlist = self.findtexts(tipo_ricerca, ricercadf, index)
|
|
|
- contexts = self.findcontexts (textlist)
|
|
|
- bibliocontexts = self.findbib (contexts)
|
|
|
+ contexts = self.findcontexts(textlist)
|
|
|
+ bibliocontexts = self.findbib(contexts)
|
|
|
highlights = formatAllContexts(bibliocontexts)
|
|
|
- return highlights.to_dict(orient='records')
|
|
|
-
|
|
|
- #%% funzione contesti singoli cumulativa
|
|
|
- def contestosingolo (self, contestimultipli, indice, parole, periodi, brani):
|
|
|
- ### droppa le colonne "highlight" che gli rompono le scatole###
|
|
|
- contestimultipli = {k: v for k, v in contestimultipli.items() if not k.startswith('highlight')}
|
|
|
- ###############################################################
|
|
|
- contestimultiplidf = pd.DataFrame(contestimultipli, index=[0])
|
|
|
+
|
|
|
+ return highlights.to_dict(as_series=False)
|
|
|
+
|
|
|
+ # %% funzione contesti singoli cumulativa
|
|
|
+ def contestosingolo(self, contestimultipli, indice, parole, periodi, brani):
|
|
|
+ contestimultipli = {k: v for k, v in contestimultipli.items() if
|
|
|
+ not k.startswith('highlight')}
|
|
|
+ contestimultiplidf = pl.DataFrame(contestimultipli)
|
|
|
contestosingolo = self.singlecontexts(contestimultiplidf, indice, parole, periodi, brani)
|
|
|
braniassociati = self.findlinks(contestosingolo)
|
|
|
- contestosingoloclean = self.findbib (braniassociati)
|
|
|
+ contestosingoloclean = self.findbib(braniassociati)
|
|
|
contestosingoloclean = formatAllContexts(contestosingoloclean)
|
|
|
- return contestosingoloclean.to_dict(orient='records')
|
|
|
-
|
|
|
- #%% funzione reperimento e raffinamento contesti singoli
|
|
|
+
|
|
|
+ return contestosingoloclean.to_dict(as_series=False)
|
|
|
+
|
|
|
+ # %% funzione reperimento e raffinamento contesti singoli
|
|
|
def singlecontexts(self, textlist, index, parole, periodi, brani):
|
|
|
- context = textlist.iloc[index]
|
|
|
+ context = textlist.row(index).as_dict()
|
|
|
contexts = []
|
|
|
formats = []
|
|
|
listOcc = self.listOcc
|
|
|
- sigla = textlist.loc[index, "sigla"]
|
|
|
- periodlocal = textlist.loc[index, "numperiod"]
|
|
|
- ntxlocal = textlist.loc[index, "ntx"]
|
|
|
- mappalocal = textlist.loc[index, "mappa"]
|
|
|
- linkslocal = textlist.loc[index, "links"]
|
|
|
- numbranolocal = textlist.loc[index, "numbrano"]
|
|
|
+ sigla = textlist[index, "sigla"]
|
|
|
+ periodlocal = textlist[index, "numperiod"]
|
|
|
+ ntxlocal = textlist[index, "ntx"]
|
|
|
+ mappalocal = textlist[index, "mappa"]
|
|
|
+ linkslocal = textlist[index, "links"]
|
|
|
+ numbranolocal = textlist[index, "numbrano"]
|
|
|
+ pointerlist = pl.DataFrame()
|
|
|
+
|
|
|
if parole != 0:
|
|
|
- pointerlist = pd.DataFrame()
|
|
|
for table in listOcc:
|
|
|
- queryData = {'queryType': 'singlecontext', 'querySubtype': 'parole', 'parole': parole, 'periodi': periodi, 'brani': brani, 'table': table, 'ntxlocal': ntxlocal, 'mappalocal': mappalocal, 'periodlocal': periodlocal, 'numbranolocal': numbranolocal}
|
|
|
- queryresponse = self.queryHandler.query(queryData, pandas=True)
|
|
|
- pointerlist = pd.concat([pointerlist, queryresponse])
|
|
|
- fileQueryData = {'sigla': sigla, 'minChar': pointerlist["pitxt"].min(), 'maxChar': pointerlist["pitxt"].max()}
|
|
|
+ queryData = {'queryType': 'singlecontext', 'querySubtype': 'parole',
|
|
|
+ 'parole': parole, 'periodi': periodi, 'brani': brani, 'table': table,
|
|
|
+ 'ntxlocal': ntxlocal, 'mappalocal': mappalocal,
|
|
|
+ 'periodlocal': periodlocal, 'numbranolocal': numbranolocal}
|
|
|
+ queryresponse = self.queryHandler.query(queryData, polars=True)
|
|
|
+ pointerlist = pl.concat([pointerlist, queryresponse])
|
|
|
+ fileQueryData = {'sigla': sigla, 'minChar': pointerlist["pitxt"].min(),
|
|
|
+ 'maxChar': pointerlist["pitxt"].max()}
|
|
|
cont, form = self.queryHandler.textQuery(fileQueryData, True)
|
|
|
contexts.append(cont)
|
|
|
formats.append(json.dumps(form))
|
|
|
- context ['piniz'] = pointerlist["pitxt"].min()
|
|
|
- context ['pfin'] = pointerlist["pitxt"].max()
|
|
|
+ context['piniz'] = pointerlist["pitxt"].min()
|
|
|
+ context['pfin'] = pointerlist["pitxt"].max()
|
|
|
+
|
|
|
elif periodi != 0:
|
|
|
- queryData = {'queryType': 'singlecontext', 'querySubtype': 'parole', 'parole': parole, 'periodi': periodi, 'brani': brani, 'table': table, 'ntxlocal': ntxlocal, 'mappalocal': mappalocal, 'periodlocal': periodlocal, 'numbranolocal': numbranolocal}
|
|
|
- queryresponse = self.queryHandler.query(queryData, pandas=True)
|
|
|
- fileQueryData = {'sigla': sigla, 'minChar': pointerlist["pitxt"].min(), 'maxChar': pointerlist["pitxt"].max()}
|
|
|
+ for table in listOcc:
|
|
|
+ queryData = {'queryType': 'singlecontext', 'querySubtype': 'parole',
|
|
|
+ 'parole': parole, 'periodi': periodi, 'brani': brani, 'table': table,
|
|
|
+ 'ntxlocal': ntxlocal, 'mappalocal': mappalocal,
|
|
|
+ 'periodlocal': periodlocal, 'numbranolocal': numbranolocal}
|
|
|
+ queryresponse = self.queryHandler.query(queryData, polars=True)
|
|
|
+ pointerlist = pl.concat([pointerlist, queryresponse])
|
|
|
+ fileQueryData = {'sigla': sigla, 'minChar': pointerlist["pitxt"].min(),
|
|
|
+ 'maxChar': pointerlist["pitxt"].max()}
|
|
|
cont, form = self.queryHandler.textQuery(fileQueryData, True)
|
|
|
contexts.append(cont)
|
|
|
formats.append(json.dumps(form))
|
|
|
- context ['piniz'] = queryresponse["piniz"].min()
|
|
|
- context ['pfin'] = queryresponse["pfin"].max()
|
|
|
+ context['piniz'] = queryresponse["piniz"].min()
|
|
|
+ context['pfin'] = queryresponse["pfin"].max()
|
|
|
+
|
|
|
elif brani != 0:
|
|
|
if linkslocal == 0 or linkslocal == 1:
|
|
|
return "Nessun brano associato a questo contesto"
|
|
|
else:
|
|
|
- queryData = {'queryType': 'singlecontext', 'querySubtype': 'brani', 'parole': parole, 'periodi': periodi, 'brani': brani, 'table': table, 'ntxlocal': ntxlocal, 'mappalocal': mappalocal, 'periodlocal': periodlocal, 'numbranolocal': numbranolocal}
|
|
|
- queryresponse = self.queryHandler.query(queryData, pandas=True)
|
|
|
- fileQueryData = {'sigla': sigla, 'minChar': pointerlist["pitxt"].min(), 'maxChar': pointerlist["pitxt"].max()}
|
|
|
+ for table in listOcc:
|
|
|
+ queryData = {'queryType': 'singlecontext', 'querySubtype': 'brani',
|
|
|
+ 'parole': parole, 'periodi': periodi, 'brani': brani,
|
|
|
+ 'table': table, 'ntxlocal': ntxlocal, 'mappalocal': mappalocal,
|
|
|
+ 'periodlocal': periodlocal, 'numbranolocal': numbranolocal}
|
|
|
+ queryresponse = self.queryHandler.query(queryData, polars=True)
|
|
|
+ pointerlist = pl.concat([pointerlist, queryresponse])
|
|
|
+ fileQueryData = {'sigla': sigla, 'minChar': pointerlist["pitxt"].min(),
|
|
|
+ 'maxChar': pointerlist["pitxt"].max()}
|
|
|
cont, form = self.queryHandler.textQuery(fileQueryData, True)
|
|
|
contexts.append(cont)
|
|
|
formats.append(json.dumps(form))
|
|
|
- context ['piniz'] = queryresponse["piniz"].min()
|
|
|
- context ['pfin'] = queryresponse["pfin"].max()
|
|
|
+ context['piniz'] = queryresponse["piniz"].min()
|
|
|
+ context['pfin'] = queryresponse["pfin"].max()
|
|
|
+
|
|
|
context['contesto'] = contexts[0]
|
|
|
context['formattazione contesto'] = formats[0]
|
|
|
- return pd.DataFrame(context).T.reset_index(drop=True)
|
|
|
+
|
|
|
+ # Trasponi il dizionario in un DataFrame di Polars
|
|
|
+ context_df = pl.DataFrame(context).melt()
|
|
|
+
|
|
|
+ return context_df
|
|
|
|
|
|
#%% funzione reperimento note e brani associati
|
|
|
def findlinks (self, context):
|
|
@@ -92,6 +116,7 @@ class contexts(basicQueries):
|
|
|
pinizlocal = context.loc[0, "piniz"]
|
|
|
pfinlocal = context.loc[0, "pfin"]
|
|
|
if linkslocal == 0:
|
|
|
+
|
|
|
return context
|
|
|
if linkslocal == 1:
|
|
|
queryData = {'queryType': 'links', 'querySubtype': 'nota', 'ntxlocal': ntxlocal, 'pinizlocal': pinizlocal, 'pitxtlocal': pitxtlocal, 'pfinlocal': pfinlocal}
|
|
@@ -101,6 +126,7 @@ class contexts(basicQueries):
|
|
|
context['nota'] = cont
|
|
|
context['formattazione nota'] = json.dumps(form)
|
|
|
context['nota formattata'] = formatContext(json.dumps(form))
|
|
|
+
|
|
|
return context
|
|
|
if linkslocal == 2:
|
|
|
queryData = {'queryType': 'links', 'querySubtype': 'testo_associato', 'ntxlocal': ntxlocal, 'pinizlocal': pinizlocal, 'pitxtlocal': pitxtlocal, 'pfinlocal': pfinlocal}
|
|
@@ -125,4 +151,5 @@ class contexts(basicQueries):
|
|
|
context['testo associato'] = cont2
|
|
|
context['formattazione testo associato'] = json.dumps(form2)
|
|
|
context['testo associato formattato'] = formatContext(cont2, json.dumps(form2))
|
|
|
+
|
|
|
return context
|