Browse Source

Major improvement on "contestimultipli"

Leonardo Canova 1 year ago
parent
commit
33453eb744
2 changed files with 35 additions and 39 deletions
  1. 10 18
      flask_be/engine/basic_queries.py
  2. 25 21
      flask_be/interface_sqlite3/actual_queries.py

+ 10 - 18
flask_be/engine/basic_queries.py

@@ -9,6 +9,7 @@ from .data_interface.data_providers_setup import queryHandlerFactory
 
 import pandas as pd
 import numpy as np
+import math
 
 # Main class for basic queries contains:
 # - a data provider instance
@@ -68,11 +69,6 @@ class basicQueries:
     # %% ha in input findtexts, restituisce i contesti associati agli elementi localizzati.
     # Il range dei contesti è impostato di default a 30 parole e può essere rimodulato nel passaggio al contesto singolo.
     def findcontexts(self, textlist):
-        # Set the number of words to include in contexts
-        # NOTE: this could be changed into number of sentences by taking parts of the code from "singlecontext" function
-        parole = 31
-        # Get the 'listOcc' values
-        listOcc = self.listOcc
         # Initialize lists to store context information
         contexts = []
         formats = []
@@ -80,18 +76,15 @@ class basicQueries:
         maxChar_list = []
         # Iterate over each row in 'textlist'
         for _, row in textlist.iterrows():
-            # Get the 'sigla', 'ntx', and 'mappa' values from the row
             sigla = row["sigla"]
-            ntxlocal = row["ntx"]
-            mappalocal = row["mappa"]
-            # Create a list of query data dictionaries for each table in 'listOcc'
-            queryData_list = [{'table': table, 'queryType': 'contexts', 'ntxlocal': ntxlocal, 'mappalocal': mappalocal, 'parole': parole} for table in listOcc]
-            # Make a query to each table in 'listOcc' and concatenate the results
-            pointerlist = pd.concat([self.queryHandler.query(queryData, pandas=True) for queryData in queryData_list])
-            # Get the minimum and maximum character positions from the 'pointerlist' DataFrame
-            minChar = pointerlist["pitxt"].min()
-            maxChar = pointerlist["pitxt"].max()
-            # Create a dictionary with query data for 'textQuery'
+            if math.isnan(row["piniz"]):
+                minChar = int(row["backup_piniz"])
+            else:
+                minChar = int(row["piniz"])
+            if math.isnan(row["pfin"]):
+                maxChar = int(row["backup_pfin"])
+            else:
+                maxChar = int(row["pfin"])
             fileQueryData = {'sigla': sigla, 'minChar': minChar, 'maxChar': maxChar}
             # Call 'textQuery' to get the context and format information
             cont, form = self.queryHandler.textQuery(fileQueryData, True)
@@ -100,7 +93,6 @@ class basicQueries:
             maxChar_list.append(maxChar)
             contexts.append(cont)
             formats.append(json.dumps(form))
-
         # Add the context information to 'textlist' and reset the index
         textlist['piniz'] = minChar_list
         textlist['pifin'] = maxChar_list
@@ -108,7 +100,7 @@ class basicQueries:
         textlist['format contesto'] = formats
         return textlist.reset_index(drop=True)
 
-    # %% Ha in input findcontexts, associa i riferimenti bibliografici ad ogni contesto.
+    # %% Ha in input findcontexts, associa i riferimenti bibliografici ad ogni contesto dal db BiblioTLIO.
     def findbib(self, contexts):
         infobib = pd.DataFrame()
         rif_org = pd.DataFrame()

+ 25 - 21
flask_be/interface_sqlite3/actual_queries.py

@@ -82,29 +82,33 @@ def prepareQueryString(queryData):
             formCodList = queryData.get('formCodList') # KeyError-safe (None if absent)
         except KeyError as err:
             raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
-        
+        # These values can be changed to changa multiple contexts widht. Default value for Gatto is parole=31 #
+        parole = 31
+        periodi = 0
+        #                                                                                                     #
         strlist = ",".join(str(c) for c in codList)
-        if subtype==0:
-            return f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, periodi.piniz, periodi.pfin FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod INNER JOIN periodi ON (tab.ntx = periodi.ntx AND tab.numperiod = periodi.numperiod) WHERE tab.cod IN ({strlist})"
-        elif subtype==1:
-            return f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.indlem IN ({strlist})"
-        elif subtype==2:
-            if formCodList is None:
-                return None
-            strform = ",".join(str(c) for c in formCodList)
-            return f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.indlem IN ({strlist}) OR (tab.indlem = 0 AND tab.cod IN ({strform}))"
+        if parole != 0:
+            if subtype==0:
+                return f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, prev_tab.pitxt AS piniz, next_tab.pitxt AS pfin, periodi.piniz AS backup_piniz, periodi.pfin AS backup_pfin FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod LEFT JOIN {table} AS prev_tab ON (tab.ntx = prev_tab.ntx AND tab.mappa = prev_tab.mappa+{int(parole/2)}) LEFT JOIN {table} AS next_tab ON (tab.ntx = next_tab.ntx AND tab.mappa = next_tab.mappa-{int(parole/2)}) LEFT JOIN periodi ON (tab.ntx = periodi.ntx AND tab.numperiod = periodi.numperiod) WHERE tab.cod IN ({strlist})"
+            elif subtype==1:
+                return f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, prev_tab.pitxt AS piniz, next_tab.pitxt AS pfin, periodi.piniz AS backup_piniz, periodi.pfin AS backup_pfin FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod LEFT JOIN {table} AS prev_tab ON (tab.ntx = prev_tab.ntx AND tab.mappa = prev_tab.mappa+{int(parole/2)}) LEFT JOIN {table} AS next_tab ON (tab.ntx = next_tab.ntx AND tab.mappa = next_tab.mappa-{int(parole/2)}) LEFT JOIN periodi ON (tab.ntx = periodi.ntx AND tab.numperiod = periodi.numperiod) WHERE tab.indlem IN ({strlist})"
+            elif subtype==2:
+                if formCodList is None:
+                    return None
+                strform = ",".join(str(c) for c in formCodList)
+                return f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, prev_tab.pitxt AS piniz, next_tab.pitxt AS pfin, periodi.piniz AS backup_piniz, periodi.pfin AS backup_pfin FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod LEFT JOIN {table} AS prev_tab ON (tab.ntx = prev_tab.ntx AND tab.mappa = prev_tab.mappa+{int(parole/2)}) LEFT JOIN {table} AS next_tab ON (tab.ntx = next_tab.ntx AND tab.mappa = next_tab.mappa-{int(parole/2)}) LEFT JOIN periodi ON (tab.ntx = periodi.ntx AND tab.numperiod = periodi.numperiod) WHERE tab.indlem IN ({strlist}) OR (tab.indlem = 0 AND tab.cod IN ({strform}))"
+        else:
+            if subtype==0:
+                return f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, prev_periodi.piniz, next_periodi.pfin, periodi.piniz AS backup_piniz, periodi.pfin AS backup_pfin FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod LEFT JOIN periodi AS prev_periodi ON (tab.ntx = prev_periodi.ntx AND tab.numperiod = prev_periodi.numperiod+{int(periodi/2)}) LEFT JOIN periodi AS next_periodi ON (tab.ntx = next_periodi.ntx AND tab.numperiod = next_periodi.numperiod-{int(periodi/2)}) LEFT JOIN periodi ON (tab.ntx = periodi.ntx AND tab.numperiod = periodi.numperiod) WHERE tab.cod IN ({strlist})"
+            elif subtype==1:
+                return f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, prev_periodi.piniz, next_periodi.pfin, periodi.piniz AS backup_piniz, periodi.pfin AS backup_pfin FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod LEFT JOIN periodi AS prev_periodi ON (tab.ntx = prev_periodi.ntx AND tab.numperiod = prev_periodi.numperiod+{int(periodi/2)}) LEFT JOIN periodi AS next_periodi ON (tab.ntx = next_periodi.ntx AND tab.numperiod = next_periodi.numperiod-{int(periodi/2)}) LEFT JOIN periodi ON (tab.ntx = periodi.ntx AND tab.numperiod = periodi.numperiod) WHERE tab.indlem IN ({strlist})"
+            elif subtype==2:
+                if formCodList is None:
+                    return None
+                strform = ",".join(str(c) for c in formCodList)
+                return f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, prev_periodi.piniz, next_periodi.pfin, periodi.piniz AS backup_piniz, periodi.pfin AS backup_pfin FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod LEFT JOIN periodi AS prev_periodi ON (tab.ntx = prev_periodi.ntx AND tab.numperiod = prev_periodi.numperiod+{int(periodi/2)}) LEFT JOIN periodi AS next_periodi ON (tab.ntx = next_periodi.ntx AND tab.numperiod = next_periodi.numperiod-{int(periodi/2)}) LEFT JOIN periodi ON (tab.ntx = periodi.ntx AND tab.numperiod = periodi.numperiod) WHERE tab.indlem IN ({strlist}) OR (tab.indlem = 0 AND tab.cod IN ({strform}))"
+    
     ######################
-    elif type=='contexts':
-        try:
-            table = queryData['table']
-            ntxlocal = queryData['ntxlocal']
-            mappalocal = queryData['mappalocal']
-            parole = queryData['parole']
-        except KeyError as err:
-            raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
-        return f"SELECT tab.pitxt, tab.elemlen FROM {table} AS tab WHERE tab.ntx = {ntxlocal} AND tab.mappa <= {mappalocal+int(parole/2)} AND tab.mappa >= {mappalocal-int(parole/2)}"
-
-    #################
     elif type=='bib':
         try:
             row = queryData['row']