import json import pandas as pd from .basic_queries import basicQueries from .utilities.format import formatAllContexts, formatContext # Executes query sequences to recover single and multiple contexts # Returns Pandas dataframes class contexts(basicQueries): def __init__(self, dataConfig): super().__init__(dataConfig) #%% funzione contesti multipli cumulativa # Potrebbe essere unita alle cooccorrenze? def contestimultipli (self, tipo_ricerca, ricerca, index = None): ricercadf = pd.DataFrame(ricerca) textlist = self.findtexts(tipo_ricerca, ricercadf, index) contexts = self.findcontexts (textlist) bibliocontexts = self.findbib (contexts) highlights = formatAllContexts(bibliocontexts) return highlights.to_dict(orient='records') #%% funzione contesti singoli cumulativa def contestosingolo (self, contestimultipli, indice, parole, periodi, brani): ### droppa le colonne "highlight" che gli rompono le scatole### contestimultipli = {k: v for k, v in contestimultipli.items() if not k.startswith('highlight')} ############################################################### contestimultiplidf = pd.DataFrame(contestimultipli, index=[0]) contestosingolo = self.singlecontexts(contestimultiplidf, indice, parole, periodi, brani) braniassociati = self.findlinks(contestosingolo) contestosingoloclean = self.findbib (braniassociati) contestosingoloclean = formatAllContexts(contestosingoloclean) return contestosingoloclean.to_dict(orient='records') #%% funzione reperimento e raffinamento contesti singoli def singlecontexts(self, textlist, index, parole, periodi, brani): context = textlist.iloc[index] contexts = [] formats = [] listOcc = self.listOcc sigla = textlist.loc[index, "sigla"] periodlocal = textlist.loc[index, "numperiod"] ntxlocal = textlist.loc[index, "ntx"] mappalocal = textlist.loc[index, "mappa"] linkslocal = textlist.loc[index, "links"] numbranolocal = textlist.loc[index, "numbrano"] if parole != 0: pointerlist = pd.DataFrame() for table in listOcc: queryData = {'queryType': 'singlecontext', 'querySubtype': 'parole', 'parole': parole, 'periodi': periodi, 'brani': brani, 'table': table, 'ntxlocal': ntxlocal, 'mappalocal': mappalocal, 'periodlocal': periodlocal, 'numbranolocal': numbranolocal} queryresponse = self.queryHandler.query(queryData, pandas=True) pointerlist = pd.concat([pointerlist, queryresponse]) fileQueryData = {'sigla': sigla, 'minChar': pointerlist["pitxt"].min(), 'maxChar': pointerlist["pitxt"].max()} cont, form = self.queryHandler.textQuery(fileQueryData, True) contexts.append(cont) formats.append(json.dumps(form)) context ['piniz'] = pointerlist["pitxt"].min() context ['pfin'] = pointerlist["pitxt"].max() elif periodi != 0: queryData = {'queryType': 'singlecontext', 'querySubtype': 'parole', 'parole': parole, 'periodi': periodi, 'brani': brani, 'table': table, 'ntxlocal': ntxlocal, 'mappalocal': mappalocal, 'periodlocal': periodlocal, 'numbranolocal': numbranolocal} queryresponse = self.queryHandler.query(queryData, pandas=True) fileQueryData = {'sigla': sigla, 'minChar': pointerlist["pitxt"].min(), 'maxChar': pointerlist["pitxt"].max()} cont, form = self.queryHandler.textQuery(fileQueryData, True) contexts.append(cont) formats.append(json.dumps(form)) context ['piniz'] = queryresponse["piniz"].min() context ['pfin'] = queryresponse["pfin"].max() elif brani != 0: if linkslocal == 0 or linkslocal == 1: return "Nessun brano associato a questo contesto" else: queryData = {'queryType': 'singlecontext', 'querySubtype': 'brani', 'parole': parole, 'periodi': periodi, 'brani': brani, 'table': table, 'ntxlocal': ntxlocal, 'mappalocal': mappalocal, 'periodlocal': periodlocal, 'numbranolocal': numbranolocal} queryresponse = self.queryHandler.query(queryData, pandas=True) fileQueryData = {'sigla': sigla, 'minChar': pointerlist["pitxt"].min(), 'maxChar': pointerlist["pitxt"].max()} cont, form = self.queryHandler.textQuery(fileQueryData, True) contexts.append(cont) formats.append(json.dumps(form)) context ['piniz'] = queryresponse["piniz"].min() context ['pfin'] = queryresponse["pfin"].max() context['contesto'] = contexts[0] context['formattazione contesto'] = formats[0] return pd.DataFrame(context).T.reset_index(drop=True) #%% funzione reperimento note e brani associati def findlinks (self, context): linkslocal = context.loc[0, "links"] siglalocal = context.loc[0, "sigla"] ntxlocal = context.loc[0, "ntx"] pitxtlocal = context.loc[0, "pitxt"] pinizlocal = context.loc[0, "piniz"] pfinlocal = context.loc[0, "pfin"] if linkslocal == 0: return context if linkslocal == 1: queryData = {'queryType': 'links', 'querySubtype': 'nota', 'ntxlocal': ntxlocal, 'pinizlocal': pinizlocal, 'pitxtlocal': pitxtlocal, 'pfinlocal': pfinlocal} queryresponse = self.queryHandler.query(queryData, pandas=True) fileQueryData = {'sigla': siglalocal, 'minChar': queryresponse["piniz"].min(), 'maxChar': queryresponse["pfin"].max()} cont, form = self.queryHandler.textQuery(fileQueryData, True) context['nota'] = cont context['formattazione nota'] = json.dumps(form) context['nota formattata'] = formatContext(json.dumps(form)) return context if linkslocal == 2: queryData = {'queryType': 'links', 'querySubtype': 'testo_associato', 'ntxlocal': ntxlocal, 'pinizlocal': pinizlocal, 'pitxtlocal': pitxtlocal, 'pfinlocal': pfinlocal} queryresponse = self.queryHandler.query(queryData, pandas=True) fileQueryData = {'sigla': siglalocal, 'minChar': queryresponse["piniz"].min(), 'maxChar': queryresponse["pfin"].max()} cont, form = self.queryHandler.textQuery(fileQueryData, True) context['testo associato'] = cont context['formattazione testo associato'] = json.dumps(form) context['testo associato formattato'] = formatContext(cont, json.dumps(form)) if linkslocal == 3: queryData = {'queryType': 'links', 'querySubtype': 'nota', 'ntxlocal': ntxlocal, 'pinizlocal': pinizlocal, 'pitxtlocal': pitxtlocal, 'pfinlocal': pfinlocal} queryresponse = self.queryHandler.query(queryData, pandas=True) fileQueryData = {'sigla': siglalocal, 'minChar': queryresponse["piniz"].min(), 'maxChar': queryresponse["pfin"].max()} cont, form = self.queryHandler.textQuery(fileQueryData, True) context['nota'] = cont context['formattazione nota'] = json.dumps(form) context['nota formattata'] = formatContext(cont, json.dumps(form)) queryData2 = {'queryType': 'links', 'querySubtype': 'testo_associato', 'ntxlocal': ntxlocal, 'pinizlocal': pinizlocal, 'pitxtlocal': pitxtlocal, 'pfinlocal': pfinlocal} queryresponse2 = self.queryHandler.query(queryData2, pandas=True) fileQueryData2 = {'sigla': siglalocal, 'minChar': queryresponse2["piniz"].min(), 'maxChar': queryresponse2["pfin"].max()} cont2, form2 = self.queryHandler.textQuery(fileQueryData2, True) context['testo associato'] = cont2 context['formattazione testo associato'] = json.dumps(form2) context['testo associato formattato'] = formatContext(cont2, json.dumps(form2)) return context