Procházet zdrojové kódy

Polars introduced. Many tests needed.

pinna před 6 měsíci
rodič
revize
df8ea82ba4

+ 19 - 24
flask_be/engine/basic_queries.py

@@ -1,14 +1,10 @@
 #%%
 import json
+import polars as pl
 
 from .utilities.parsing_utilities import interpreter, inizialeraddoppiata, list_normalize
-
-# Basic data provider class; can be instantiated to handle different kinds
-# of data-providing connections or interfaces based on config options
 from .data_interface.data_providers_setup import queryHandlerFactory
 
-import pandas as pd
-import math
 
 # Main class for basic queries contains:
 # - a data provider instance
@@ -20,49 +16,48 @@ class basicQueries:
         self.listOcc = dataConfig.get('listOcc')
 
     # Prepares and sends query OBJECTS which will be processed by the data provider
-    def sendBasicQuery(self, text, queryType, espansa, raddoppiata, pandas=False, dbFile=None):
-        
+    def sendBasicQuery(self, text, queryType, espansa, raddoppiata, polars=False, dbFile=None):
+
         entries = interpreter(text)
-        
+
         data = entries
         dataNorm = []
-        if raddoppiata==1:
+        if raddoppiata == 1:
             data = entries + inizialeraddoppiata(entries)
-        if espansa==1 and raddoppiata==0:
+        if espansa == 1 and raddoppiata == 0:
             dataNorm = list_normalize(entries)
-        elif espansa==1 and raddoppiata==1:
+        elif espansa == 1 and raddoppiata == 1:
             dataNorm = entries + list_normalize(inizialeraddoppiata(entries))
 
-        return self.queryHandler.query({'data': data, 'dataNorm': dataNorm, 'queryType': queryType}, pandas, dbFile)
-    
-    #%% ha in input le funzioni di ricerca, cerca nell'occorrenziario i puntatori ai contesti e altri elementi ad essi associati. 
-    #l'attributo type definisce il tipo di ricerca in input (0 per forme, 1 per lemmi, 2 per lemmi con opzione "mostra occorrenze non lemmatizzate")
+        return self.queryHandler.query({'data': data, 'dataNorm': dataNorm, 'queryType': queryType}, polars, dbFile)
+
+    # Finds texts based on search functions, looks in the occurrence for pointers to contexts and other elements associated with them.
     def findtexts(self, type, df, index=None):
         # Check if the input is a DataFrame, and convert it if it is not
-        if not isinstance(df, pd.DataFrame):
-            df = pd.DataFrame(df)
+        if not isinstance(df, pl.DataFrame):
+            df = pl.DataFrame(df)
         # If an index is provided, select the rows that correspond to the index
         if index is not None:
             if isinstance(index, range):
                 index = list(index)
             elif not isinstance(index, list):
                 index = [index]
-            df = df.loc[index]
+            df = df.filter(pl.col('index').is_in(index))
         # Create a list of 'cod' values from the input DataFrame
-        codList = [row['cod'] for _, row in df.iterrows()]
+        codList = df.select('cod').to_list()
         # Get the 'listOcc' values
         listOcc = self.listOcc
         # Create a dictionary with query information
         queryData = {'queryType': 'texts', 'querySubtype': type, 'codList': codList}
         # If 'type' is 2 (option "non lemmatizzate"), make an additional query to get form codes
-        # NOTE: A this stage of development 'type 2' is default for "lemma" searches (this could be changed).
+        # NOTE: At this stage of development 'type 2' is default for "lemma" searches (this could be changed).
         if type == 2:
             subQueryData = {'queryType': 'pfl', 'codList': codList}
-            subdf = self.queryHandler.query(subQueryData, pandas=True)
-            queryData['formCodList'] = list(subdf['codForma'])
+            subdf = self.queryHandler.query(subQueryData, polars=True)
+            queryData['formCodList'] = subdf.select('codForma').to_list()
         # Make a query to each table in 'listOcc' and concatenate the results
-        queryResponses = [self.queryHandler.query(dict(queryData, table=table), pandas=True) for table in listOcc]
-        textlist = pd.concat(queryResponses)
+        queryResponses = [self.queryHandler.query(dict(queryData, table=table), polars=True) for table in listOcc]
+        textlist = pl.concat(queryResponses)
         return textlist
 
     # %% ha in input findtexts, restituisce i contesti associati agli elementi localizzati.

+ 106 - 88
flask_be/interface_sqlite3/actual_queries.py

@@ -1,20 +1,21 @@
-import pandas as pd
+import polars as pl
+
 
 def prepareQuery(queryData):
 
-    type = queryData.get('queryType') # KeyError protected -- returns None if the key is not defined
+    local_type = queryData.get('queryType')  # KeyError protected -- returns None if the key is not defined
 
     #################
-    if type=='occ_tables':
+    if local_type=='occ_tables':
         return "SELECT name FROM sqlite_master WHERE type='table'"
 
     #################
-    if type=='forma':
+    if local_type=='forma':
         try:
             data = queryData['data']
             dataNorm = queryData['dataNorm']
         except KeyError as err:
-            raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
+            raise KeyError('Missing required data for query type ' + local_type + ': ' + str(err))
         joinedQueryData = "'" + "' OR spec LIKE '".join(data) + "'"
         if len(dataNorm)==0:
             return f"SELECT spec AS forma, nocc AS occ, cod FROM form WHERE spec LIKE {joinedQueryData} ORDER BY idfor"
@@ -23,12 +24,12 @@ def prepareQuery(queryData):
             return f"SELECT DISTINCT spec AS forma, nocc AS occ, cod FROM form WHERE (spec LIKE {joinedQueryData}) OR (norm LIKE {joinedQueryDataNorm}) ORDER BY idfor"
 
     ###################
-    elif type=='lemma':
+    elif local_type=='lemma':
         try:
             data = queryData['data']
             dataNorm = queryData['dataNorm']
         except KeyError as err:
-            raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
+            raise KeyError('Missing required data for query type ' + local_type + ': ' + str(err))
         joinedQueryData = "'" + "' OR spec LIKE '".join(data) + "'"
         if len(dataNorm)==0:
             return f"SELECT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE spec LIKE {joinedQueryData} ORDER BY idlem"
@@ -37,12 +38,12 @@ def prepareQuery(queryData):
             return f"SELECT DISTINCT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE (spec LIKE {joinedQueryData}) OR (norm LIKE {joinedQueryDataNorm}) ORDER BY idlem"
 
     ########################
-    elif type=='lemmaForma':
+    elif local_type=='lemmaForma':
         try:
             data = queryData['data']
             dataNorm = queryData['dataNorm']
         except KeyError as err:
-            raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
+            raise KeyError('Missing required data for query type ' + local_type + ': ' + str(err))
         joinedQueryData = "'" + "' OR form.spec LIKE '".join(data) + "'"
         if len(dataNorm)==0:
             return f"SELECT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod != 0 AND lem.cod = pfl.lemma WHERE lem.spec LIKE {joinedQueryData} ORDER BY lem.idlem"
@@ -52,12 +53,12 @@ def prepareQuery(queryData):
             return f"SELECT DISTINCT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (lem.spec LIKE {joinedQueryData}) OR (lem.norm LIKE {joinedQueryDataNorm}) ORDER BY lem.idlem"
 
     ########################
-    elif type=='formaLemma':
+    elif local_type=='formaLemma':
         try:
             data = queryData['data']
             dataNorm = queryData['dataNorm']
         except KeyError as err:
-            raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
+            raise KeyError('Missing required data for query type ' + local_type + ': ' + str(err))
         joinedQueryData = "'" + "' OR form.spec LIKE '".join(data) + "'"
         if len(dataNorm)==0:
             return f"SELECT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE form.spec LIKE {joinedQueryData} ORDER BY form.idfor"
@@ -66,43 +67,43 @@ def prepareQuery(queryData):
             return f"SELECT DISTINCT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (form.spec LIKE {joinedQueryData}) OR (form.norm LIKE {joinedQueryDataNorm}) ORDER BY form.idfor"
 
     #################
-    elif type=='pfl':
+    elif local_type=='pfl':
         try:
             codList = queryData['codList']
         except KeyError as err:
-            raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
+            raise KeyError('Missing required data for query type ' + local_type + ': ' + str(err))
 
         strlist = ",".join(str(c) for c in codList)
         return f"SELECT DISTINCT lemma as codLemma, forma as codForma FROM pfl WHERE lemma IN ({strlist})"
 
     ###################
-    elif type=='texts':
+    elif local_type=='texts':
         return complexQueryTexts
     
     ###################
-    elif type=='co-occurrences':
+    elif local_type=='co-occurrences':
         return complexQueryCooccurrences
 
     ######################
-    elif type=='bib':
+    elif local_type=='bib':
         try:
             row = queryData['row']
             sigla = row['sigla']
         except KeyError as err:
-            raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
+            raise KeyError('Missing required data for query type ' + local_type + ': ' + str(err))
         return f"SELECT [Anno iniziale], [Anno finale], [Data codificata], [Titolo Abbreviato], [Autore], [Titolo], [Curatore], [Data descrittiva], [Area generica], [Area specifica], [Genere], [Forma], [Tipo], IQ FROM datibib WHERE Sigla='{sigla}'"
     
     #################
-    elif type=='bibAlt':
+    elif local_type=='bibAlt':
         try:
             siglaSet = queryData['siglaSet']
         except KeyError as err:
-            raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
+            raise KeyError('Missing required data for query type ' + local_type + ': ' + str(err))
         siglaStr = "'" + "','".join(siglaSet) + "'"
         return f"SELECT Sigla, [Anno iniziale], [Anno finale], [Data codificata], [Titolo Abbreviato], [Autore], [Titolo], [Curatore], [Data descrittiva], [Area generica], [Area specifica], [Genere], [Forma], [Tipo], IQ FROM datibib WHERE Sigla IN ({siglaStr})"
 
     #################
-    elif type=='rif':
+    elif local_type=='rif':
         try:
             row = queryData['row']
             numorg = row['numorg']
@@ -112,11 +113,11 @@ def prepareQuery(queryData):
         return f"SELECT head AS Rif_organico, full AS Rif_completo FROM org WHERE (indice='{numorg}' AND ntx='{ntx}')"
     
     #################
-    elif type=='rifAlt':
+    elif local_type=='rifAlt':
         try:
             coordsSet = queryData['coordsSet']
         except KeyError as err:
-            raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
+            raise KeyError('Missing required data for query type ' + local_type + ': ' + str(err))
         
         subQueries = []
         for coords in coordsSet:
@@ -124,13 +125,13 @@ def prepareQuery(queryData):
                 numorg = coords[0]
                 ntx = coords[1]
             except IndexError as err:
-                raise KeyError('Incomplete required data for query type ' + type + ': ' + str(err))
+                raise KeyError('Incomplete required data for query type ' + local_type + ': ' + str(err))
             subQueries.append( f"SELECT indice AS numorg, ntx, head AS Rif_organico, full AS Rif_completo FROM org WHERE (indice='{numorg}' AND ntx='{ntx}')" )
                 
         return ' UNION ALL '.join(subQueries)
 
     #################
-    elif type=='highlight':
+    elif local_type=='highlight':
         try:
             row = queryData['row']
             col = queryData['col']
@@ -139,7 +140,7 @@ def prepareQuery(queryData):
         return f"SELECT spec as highlight FROM form WHERE cod={row[col]}"
     
     #################
-    elif type =='singlecontext':
+    elif local_type =='singlecontext':
         try:
             subtype = queryData['querySubtype']
             table = queryData['table']
@@ -159,7 +160,7 @@ def prepareQuery(queryData):
         elif subtype == 'brani':
             return f"SELECT piniz, pfin FROM linkbase WHERE {ntxlocal} = ntx AND tipo = 2 AND id BETWEEN {numbranolocal-int(brani/2)} AND {numbranolocal+int(brani/2)}"
     #################
-    elif type =='links':
+    elif local_type =='links':
         try:
             subtype = queryData['querySubtype']
             ntxlocal = queryData['ntxlocal']
@@ -174,7 +175,7 @@ def prepareQuery(queryData):
             return f"SELECT ta.ntx, ta.id, ta.piniz, ta.pfin, tb.mappain, tb.mappafin FROM linkbase AS tb INNER JOIN linknoteass AS ta ON tb.ntx = ta.ntx AND tb.id = ta.id WHERE (((tb.tipo= 2) AND (tb.ntx = {ntxlocal})) AND ((tb.piniz BETWEEN {pinizlocal} AND {pfinlocal}) OR ({pitxtlocal} BETWEEN tb.piniz AND tb.pfin)))"
     #####
     else:
-        raise ValueError('Unrecognized query type: ' + type)
+        raise ValueError('Unrecognized query type: ' + local_type)
 
 
 def complexQueryTexts(connection, queryData):
@@ -182,19 +183,27 @@ def complexQueryTexts(connection, queryData):
         codList = queryData['codList']
         table = queryData['table']
         subtype = queryData['querySubtype']
-        formCodList = queryData.get('formCodList') # KeyError-safe (None if absent)
+        formCodList = queryData.get('formCodList')  # KeyError-safe (None if absent)
     except KeyError as err:
-        raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
-    
+        raise KeyError(f'Missing required data for query type {queryData.get('queryType')} : {str(err)}')
+
     strCodList = ",".join(str(c) for c in codList)
 
     # Main query, verified to be fast!
-    mainQueryString = f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod"
-    if subtype==0:
+    mainQueryString = f"""
+    SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, 
+           intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, 
+           tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore 
+    FROM {table} AS tab 
+    INNER JOIN intbib ON tab.ntx = intbib.ntx 
+    INNER JOIN lem ON tab.indlem = lem.cod
+    """
+
+    if subtype == 0:
         condition = f"WHERE tab.cod IN ({strCodList})"
-    elif subtype==1:
+    elif subtype == 1:
         condition = f"WHERE tab.indlem IN ({strCodList})"
-    elif subtype==2:
+    elif subtype == 2:
         if formCodList is None:
             return None
         strFormCodList = ",".join(str(c) for c in formCodList)
@@ -202,68 +211,71 @@ def complexQueryTexts(connection, queryData):
 
     mainQueryString = f'{mainQueryString} {condition}'
 
-    # This value can be changed to change multiple contexts width. Default value for Gatto is parole=31 #
+    # This value can be changed to change multiple contexts' width. Default value for Gatto is parole=31 #
     parole = 31
     # C'è la possibilità di scegliere periodi invece che parole, ma per il momento è disabilitata
-    createTempTable = f'CREATE TEMPORARY TABLE stuff AS {mainQueryString}' 
-    mainQuery = f'SELECT * from stuff'
-    addQuery1 = f'SELECT stuff.ntx, stuff.mappa, tab.pitxt AS piniz FROM stuff LEFT JOIN Occ00001 AS tab ON tab.ntx=stuff.ntx AND tab.mappa=stuff.mappa-{int(parole/2)}'
+    createTempTable = f'CREATE TEMPORARY TABLE stuff AS {mainQueryString}'
+    mainQuery = 'SELECT * from stuff'
+    addQuery1 = f'SELECT stuff.ntx, stuff.mappa, tab.pitxt AS piniz FROM stuff LEFT JOIN Occ00001 AS tab ON tab.ntx=stuff.ntx AND tab.mappa=stuff.mappa-{int(parole / 2)}'
     addQuery2 = f'SELECT stuff.ntx, stuff.mappa, tab.pitxt AS pfin FROM stuff LEFT JOIN Occ00001 AS tab ON tab.ntx=stuff.ntx AND tab.mappa=stuff.mappa+{int(parole/2)}'
-    addQuery3 = f'SELECT stuff.ntx, stuff.numperiod, periodi.piniz AS backup_piniz, periodi.pfin AS backup_pfin FROM stuff, periodi WHERE stuff.ntx = periodi.ntx AND stuff.numperiod = periodi.numperiod'
-
+    addQuery3 = 'SELECT stuff.ntx, stuff.numperiod, periodi.piniz AS backup_piniz, periodi.pfin AS backup_pfin FROM stuff, periodi WHERE stuff.ntx = periodi.ntx AND stuff.numperiod = periodi.numperiod'
 
     # Start communication with DB
-    connection.cursor().execute(createTempTable)
+    cursor = connection.cursor()
+    cursor.execute(createTempTable)
 
-    results = pd.read_sql(mainQuery, connection)
-    results_add1 = pd.read_sql(addQuery1, connection)
-    results_add2 = pd.read_sql(addQuery2, connection)
-    results_add3 = pd.read_sql(addQuery3, connection)
-    results['piniz'] = results_add1['piniz']
-    results['pfin'] = results_add2['pfin']
-    results[['backup_piniz', 'backup_pfin']] = results_add3[['backup_piniz', 'backup_pfin']]
+    results = pl.read_sql(mainQuery, connection)
+    results_add1 = pl.read_sql(addQuery1, connection)
+    results_add2 = pl.read_sql(addQuery2, connection)
+    results_add3 = pl.read_sql(addQuery3, connection)
+
+    results = results.with_columns([
+        results_add1['piniz'],
+        results_add2['pfin'],
+        results_add3['backup_piniz'],
+        results_add3['backup_pfin']
+    ])
 
     return results
 
 
 def complexQueryCooccurrences(connection, queryData):
     try:
-        # the get method for dicts is KeyError-safe (returns None if key is absent)
         occurrences = queryData['occurrences']
         table = queryData['table']
         intervallo = queryData['intervallo']
-        periodo = queryData.get('periodo') # Unused for the moment
-        ordinate = queryData.get('ordinate') # Unused for the moment
-        if periodo is None:
-            periodo = 0
-        if ordinate is None:
-            ordinate = 0
+        periodo = queryData.get('periodo', 0) # Unused for the moment
+        ordinate = queryData.get('ordinate', 0) # Unused for the moment
     except KeyError as err:
         raise KeyError('Missing required data for query: ' + str(err))
 
+    preMainQueryString = f"""
+    SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, 
+           intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, 
+           tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore 
+    FROM {table} AS tab 
+    INNER JOIN intbib ON tab.ntx = intbib.ntx 
+    INNER JOIN lem ON tab.indlem = lem.cod
+    """
 
-    # Main part of main query -- verified to be fast!
-    preMainQueryString = f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod"
-
-    # Main loop on the different occurrences searched by user
     pitxtList = ['pitxt']
     elemlenList = ['elemlen']
-    for index, occ in enumerate(occurrences):
 
+    for index, occ in enumerate(occurrences):
         try:
             subtype = occ['querySubtype']
             codList = occ['codList']
-            formCodList = occ.get('formCodList') 
+            formCodList = occ.get('formCodList')
         except KeyError as err:
             raise KeyError('Missing required data for query: ' + str(err))
-        
+
         strCodList = ",".join(str(c) for c in codList)
 
-        if subtype==0:
+        if subtype == 0:
             condition = f" WHERE tab.cod IN ({strCodList})"
-        elif subtype==1:
+        elif subtype == 1:
             condition = f" WHERE tab.indlem IN ({strCodList})"
-        elif subtype==2:
+        elif subtype == 2:
             if formCodList is None:
                 return None
             strFormCodList = ",".join(str(c) for c in formCodList)
@@ -271,42 +283,48 @@ def complexQueryCooccurrences(connection, queryData):
 
         mainQueryString = f'{preMainQueryString} {condition}'
 
-        # First occurrence:
-        if index==0:
-            # Create a temporary table for results
+        if index == 0:
             resTable = 'tempOcc_' + str(index)
-            connection.cursor().execute(f'CREATE TEMPORARY TABLE {resTable} AS {mainQueryString}')
-            connection.cursor().execute(f'CREATE INDEX aa_{index} ON {resTable} (ntx, mappa)')
-            continue
-        
+            cursor = connection.cursor()
+            cursor.execute(f'CREATE TEMPORARY TABLE {resTable} AS {mainQueryString}')
+            cursor.execute(f'CREATE INDEX aa_{index} ON {resTable} (ntx, mappa)')
         else:
-            # update results
-            connection.cursor().execute(f'CREATE TEMPORARY TABLE tempOccB AS {mainQueryString}')
-            connection.cursor().execute(f'CREATE INDEX bb ON tempOccB (ntx, mappa)')
+            cursor.execute(f'CREATE TEMPORARY TABLE tempOccB AS {mainQueryString}')
+            cursor.execute(f'CREATE INDEX bb ON tempOccB (ntx, mappa)')
 
             oldTable = resTable
             resTable = 'tempOcc_' + str(index)
-            connection.cursor().execute(f'CREATE TEMPORARY TABLE {resTable} AS SELECT tabA.cod, tabA.ntx, tabA.{" tabA.".join(pitxtList)}, tabA.{" tabA.".join(elemlenList)}, tabA.mappa, tabA.numperiod, tabA.links, tabA.numorg, tabA.sigla, tabA.vol, tabA.pag, tabA.riga, tabA.col, tabA.tipostanza, tabA.stanza, tabA.verso, tabA.numbrano, tabA.lemma, tabA.cat_gr, tabA.disambiguatore, tabB.ntx AS ntx2, tabB.mappa AS mappa2, tabB.pitxt as pitxt_{index}, tabB.elemlen as elemlen_{index} FROM {oldTable} AS tabA, tempOccB AS tabB WHERE tabA.ntx=tabB.ntx AND tabA.mappa BETWEEN tabB.mappa-{intervallo} AND tabB.mappa+{intervallo} AND tabA.mappa != tabB.mappa')
-            connection.cursor().execute(f'CREATE INDEX aa_{index} ON {resTable} (ntx, mappa)')
-            connection.cursor().execute(f'DROP TABLE {oldTable}')
+            cursor.execute(f"""
+            CREATE TEMPORARY TABLE {resTable} AS 
+            SELECT tabA.cod, tabA.ntx, tabA.{" tabA.".join(pitxtList)}, tabA.{" tabA.".join(elemlenList)}, 
+                   tabA.mappa, tabA.numperiod, tabA.links, tabA.numorg, tabA.sigla, tabA.vol, tabA.pag, 
+                   tabA.riga, tabA.col, tabA.tipostanza, tabA.stanza, tabA.verso, tabA.numbrano, 
+                   tabA.lemma, tabA.cat_gr, tabA.disambiguatore, tabB.ntx AS ntx2, tabB.mappa AS mappa2, 
+                   tabB.pitxt as pitxt_{index}, tabB.elemlen as elemlen_{index} 
+            FROM {oldTable} AS tabA, tempOccB AS tabB 
+            WHERE tabA.ntx=tabB.ntx AND tabA.mappa BETWEEN tabB.mappa-{intervallo} AND tabB.mappa+{intervallo} AND tabA.mappa != tabB.mappa
+            """)
+            cursor.execute(f'CREATE INDEX aa_{index} ON {resTable} (ntx, mappa)')
+            cursor.execute(f'DROP TABLE {oldTable}')
             pitxtList.append(f'pitxt_{index}')
             elemlenList.append(f'elemlen_{index}')
 
+    results = pl.read_sql(f'SELECT * FROM {resTable}', connection)
 
-    results = pd.read_sql(f'SELECT * FROM {resTable}', connection)
-
-    # This value can be changed to change multiple contexts width. Default value for Gatto is parole=31
     parole = 31
-    # C'è la possibilità di scegliere periodi invece che parole, ma per il momento è disabilitata
     queryPiniz = f'SELECT stuff.ntx, stuff.mappa, tab.pitxt AS piniz FROM {resTable} AS stuff LEFT JOIN {table} AS tab ON tab.ntx=stuff.ntx AND tab.mappa=stuff.mappa-{int(parole/2)}'
     queryPfin = f'SELECT stuff.ntx, stuff.mappa, tab.pitxt AS pfin FROM {resTable} AS stuff LEFT JOIN {table} AS tab ON tab.ntx=stuff.ntx AND tab.mappa=stuff.mappa+{int(parole/2)}'
     queryPeriodi = f'SELECT stuff.ntx, stuff.numperiod, periodi.piniz AS backup_piniz, periodi.pfin AS backup_pfin FROM {resTable} AS stuff, periodi WHERE stuff.ntx = periodi.ntx AND stuff.numperiod = periodi.numperiod'
 
-    resultsPiniz = pd.read_sql(queryPiniz, connection)
-    resultsPfin = pd.read_sql(queryPfin, connection)
-    resultsPeriodi = pd.read_sql(queryPeriodi, connection)
-    results['piniz'] = resultsPiniz['piniz']
-    results['pfin'] = resultsPfin['pfin']
-    results[['backup_piniz', 'backup_pfin']] = resultsPeriodi[['backup_piniz', 'backup_pfin']]
+    resultsPiniz = pl.read_sql(queryPiniz, connection)
+    resultsPfin = pl.read_sql(queryPfin, connection)
+    resultsPeriodi = pl.read_sql(queryPeriodi, connection)
+
+    results = results.with_columns([
+        resultsPiniz['piniz'],
+        resultsPfin['pfin'],
+        resultsPeriodi['backup_piniz'],
+        resultsPeriodi['backup_pfin']
+    ])
 
     return results

+ 4 - 0
flask_be/requirements.txt

@@ -0,0 +1,4 @@
+Flask==3.0.3
+flask_cors==4.0.1
+pandas==2.2.2
+polars==0.20.26