Browse Source

from many minutes to 20-30 s, still much to do

kora 1 year ago
parent
commit
c059ca014b

BIN
db/ndg2.gat4/corpus.db


+ 2 - 2
flask_be/engine/basic_queries.py

@@ -77,11 +77,11 @@ class basicQueries:
         # Iterate over each row in 'textlist'
         for _, row in textlist.iterrows():
             sigla = row["sigla"]
-            if math.isnan(row["piniz"]):
+            if row["piniz"] is None or math.isnan(row["piniz"]):
                 minChar = int(row["backup_piniz"])
             else:
                 minChar = int(row["piniz"])
-            if math.isnan(row["pfin"]):
+            if row["pfin"] is None or math.isnan(row["pfin"]):
                 maxChar = int(row["backup_pfin"])
             else:
                 maxChar = int(row["pfin"])

+ 59 - 33
flask_be/interface_sqlite3/actual_queries.py

@@ -1,4 +1,6 @@
-def prepareQueryString(queryData):
+import pandas as pd
+
+def prepareQuery(queryData):
 
     type = queryData.get('queryType') # KeyError protected -- returns None if the key is not defined
 
@@ -75,38 +77,7 @@ def prepareQueryString(queryData):
 
     ###################
     elif type=='texts':
-        try:
-            codList = queryData['codList']
-            table = queryData['table']
-            subtype = queryData['querySubtype']
-            formCodList = queryData.get('formCodList') # KeyError-safe (None if absent)
-        except KeyError as err:
-            raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
-        # These values can be changed to changa multiple contexts widht. Default value for Gatto is parole=31 #
-        parole = 31
-        periodi = 0
-        #                                                                                                     #
-        strlist = ",".join(str(c) for c in codList)
-        if parole != 0:
-            if subtype==0:
-                return f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, prev_tab.pitxt AS piniz, next_tab.pitxt AS pfin, periodi.piniz AS backup_piniz, periodi.pfin AS backup_pfin FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod LEFT JOIN {table} AS prev_tab ON (tab.ntx = prev_tab.ntx AND tab.mappa = prev_tab.mappa+{int(parole/2)}) LEFT JOIN {table} AS next_tab ON (tab.ntx = next_tab.ntx AND tab.mappa = next_tab.mappa-{int(parole/2)}) LEFT JOIN periodi ON (tab.ntx = periodi.ntx AND tab.numperiod = periodi.numperiod) WHERE tab.cod IN ({strlist})"
-            elif subtype==1:
-                return f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, prev_tab.pitxt AS piniz, next_tab.pitxt AS pfin, periodi.piniz AS backup_piniz, periodi.pfin AS backup_pfin FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod LEFT JOIN {table} AS prev_tab ON (tab.ntx = prev_tab.ntx AND tab.mappa = prev_tab.mappa+{int(parole/2)}) LEFT JOIN {table} AS next_tab ON (tab.ntx = next_tab.ntx AND tab.mappa = next_tab.mappa-{int(parole/2)}) LEFT JOIN periodi ON (tab.ntx = periodi.ntx AND tab.numperiod = periodi.numperiod) WHERE tab.indlem IN ({strlist})"
-            elif subtype==2:
-                if formCodList is None:
-                    return None
-                strform = ",".join(str(c) for c in formCodList)
-                return f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, prev_tab.pitxt AS piniz, next_tab.pitxt AS pfin, periodi.piniz AS backup_piniz, periodi.pfin AS backup_pfin FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod LEFT JOIN {table} AS prev_tab ON (tab.ntx = prev_tab.ntx AND tab.mappa = prev_tab.mappa+{int(parole/2)}) LEFT JOIN {table} AS next_tab ON (tab.ntx = next_tab.ntx AND tab.mappa = next_tab.mappa-{int(parole/2)}) LEFT JOIN periodi ON (tab.ntx = periodi.ntx AND tab.numperiod = periodi.numperiod) WHERE tab.indlem IN ({strlist}) OR (tab.indlem = 0 AND tab.cod IN ({strform}))"
-        else:
-            if subtype==0:
-                return f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, prev_periodi.piniz, next_periodi.pfin, periodi.piniz AS backup_piniz, periodi.pfin AS backup_pfin FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod LEFT JOIN periodi AS prev_periodi ON (tab.ntx = prev_periodi.ntx AND tab.numperiod = prev_periodi.numperiod+{int(periodi/2)}) LEFT JOIN periodi AS next_periodi ON (tab.ntx = next_periodi.ntx AND tab.numperiod = next_periodi.numperiod-{int(periodi/2)}) LEFT JOIN periodi ON (tab.ntx = periodi.ntx AND tab.numperiod = periodi.numperiod) WHERE tab.cod IN ({strlist})"
-            elif subtype==1:
-                return f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, prev_periodi.piniz, next_periodi.pfin, periodi.piniz AS backup_piniz, periodi.pfin AS backup_pfin FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod LEFT JOIN periodi AS prev_periodi ON (tab.ntx = prev_periodi.ntx AND tab.numperiod = prev_periodi.numperiod+{int(periodi/2)}) LEFT JOIN periodi AS next_periodi ON (tab.ntx = next_periodi.ntx AND tab.numperiod = next_periodi.numperiod-{int(periodi/2)}) LEFT JOIN periodi ON (tab.ntx = periodi.ntx AND tab.numperiod = periodi.numperiod) WHERE tab.indlem IN ({strlist})"
-            elif subtype==2:
-                if formCodList is None:
-                    return None
-                strform = ",".join(str(c) for c in formCodList)
-                return f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, prev_periodi.piniz, next_periodi.pfin, periodi.piniz AS backup_piniz, periodi.pfin AS backup_pfin FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod LEFT JOIN periodi AS prev_periodi ON (tab.ntx = prev_periodi.ntx AND tab.numperiod = prev_periodi.numperiod+{int(periodi/2)}) LEFT JOIN periodi AS next_periodi ON (tab.ntx = next_periodi.ntx AND tab.numperiod = next_periodi.numperiod-{int(periodi/2)}) LEFT JOIN periodi ON (tab.ntx = periodi.ntx AND tab.numperiod = periodi.numperiod) WHERE tab.indlem IN ({strlist}) OR (tab.indlem = 0 AND tab.cod IN ({strform}))"
+        return complexQueryTexts
     
     ######################
     elif type=='bib':
@@ -173,3 +144,58 @@ def prepareQueryString(queryData):
     #####
     else:
         raise ValueError('Unrecognized query type: ' + type)
+
+
+def complexQueryTexts(connection, queryData):
+    try:
+        codList = queryData['codList']
+        table = queryData['table']
+        subtype = queryData['querySubtype']
+        formCodList = queryData.get('formCodList') # KeyError-safe (None if absent)
+    except KeyError as err:
+        raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
+    # These values can be changed to changa multiple contexts widht. Default value for Gatto is parole=31 #
+    parole = 31
+    periodi = 0
+    #                                                                                                     #
+    strlist = ",".join(str(c) for c in codList)
+
+    mainQueryString = ""
+    if parole != 0:
+        if subtype==0:
+            mainQueryString = f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.cod IN ({strlist})"
+        elif subtype==1:
+            mainQueryString = f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.indlem IN ({strlist})"
+        elif subtype==2:
+            if formCodList is None:
+                return None
+            strform = ",".join(str(c) for c in formCodList)
+            mainQueryString = f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.indlem IN ({strlist}) OR (tab.indlem = 0 AND tab.cod IN ({strform}))"
+    else:
+        if subtype==0:
+            mainQueryString = f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.cod IN ({strlist})"
+        elif subtype==1:
+            mainQueryString = f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.indlem IN ({strlist})"
+        elif subtype==2:
+            if formCodList is None:
+                return None
+            strform = ",".join(str(c) for c in formCodList)
+            mainQueryString = f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.indlem IN ({strlist}) OR (tab.indlem = 0 AND tab.cod IN ({strform}))"
+
+    # Start communication with DB
+    createTempTable = f'CREATE TEMPORARY TABLE stuff AS {mainQueryString}' 
+    connection.cursor().execute(createTempTable)
+
+    mainQuery = f'SELECT * from stuff'
+    addQuery1 = f'SELECT stuff.ntx, stuff.mappa, tab.pitxt AS piniz FROM stuff LEFT JOIN Occ00001 AS tab ON tab.ntx=stuff.ntx AND tab.mappa=stuff.mappa-15'
+    addQuery2 = f'SELECT stuff.ntx, stuff.mappa, tab.pitxt AS pfin FROM stuff LEFT JOIN Occ00001 AS tab ON tab.ntx=stuff.ntx AND tab.mappa=stuff.mappa+15'
+    addQuery3 = f'SELECT stuff.ntx, stuff.numperiod, periodi.piniz AS backup_piniz, periodi.pfin AS backup_pfin FROM stuff, periodi WHERE stuff.ntx = periodi.ntx AND stuff.numperiod = periodi.numperiod'
+    results = pd.read_sql(mainQuery, connection)
+    results_add1 = pd.read_sql(addQuery1, connection)
+    results_add2 = pd.read_sql(addQuery2, connection)
+    results_add3 = pd.read_sql(addQuery3, connection)
+    results['piniz'] = results_add1['piniz']
+    results['pfin'] = results_add2['pfin']
+    results[['backup_piniz', 'backup_pfin']] = results_add3[['backup_piniz', 'backup_pfin']]
+
+    return results

+ 26 - 16
flask_be/interface_sqlite3/query_handlers.py

@@ -2,7 +2,7 @@ import sqlite3
 import pandas as pd
 import interface_sqlite3.encdec.de_code as dc
 
-from .actual_queries import prepareQueryString
+from interface_sqlite3.actual_queries import prepareQuery
 
 # First version
 class queryHandlerBasicSqlite:
@@ -28,33 +28,43 @@ class queryHandlerBasicSqlite:
     
     def query(self, queryData, pandas=False, dbFile=None):
 
-        # Formerly the query string was pre-generated outside and
-        # sent here _in lieu_ of the query data
-        # Now the method processes a query data OBJECT and creates the query
+        # PREPARE THE QUERY
+        # Formerly, a query string was pre-generated outside and
+        # sent directly
+        # Now the method processes a query data OBJECT
+        # and creates the query (which may be complex)
         # accordingly
         if self.dbEncoded:
             queryData = self.encodeQuery(queryData)
-        queryString = prepareQueryString(queryData)
+        queryToExecute = prepareQuery(queryData)
 
+        # Get the connection to the DB
         dbFileLocal = dbFile if dbFile is not None else self.dbfileDefault
         if dbFileLocal is None:
             raise Exception("No db file specified with no default given -- can't execute query")
-
+        #
         db = self.dbPath + dbFileLocal
-        
         connection = sqlite3.connect(f"file:{db}?mode=ro", uri=True)
-        # PANDAS?
-        if pandas:
-            results = pd.read_sql(queryString, connection)
-            if(self.dbEncoded):
-                results = self.db_results_decode_pandas(results)
+
+
+        # If the query is a simple string, execute it here:
+        if type(queryToExecute)==str:
+            if pandas:
+                results = pd.read_sql(queryToExecute, connection)
+                if(self.dbEncoded):
+                    results = self.db_results_decode_pandas(results)
+            else:
+                connection.row_factory = dict_factory
+                queryReponse = connection.cursor().execute(queryToExecute)
+                results = queryReponse.fetchall()
+                if(self.dbEncoded):
+                    results = self.db_results_decode(results)
         
         else:
-            connection.row_factory = dict_factory
-            queryReponse = connection.cursor().execute(queryString)
-            results = queryReponse.fetchall()
+            # If not a string, 'queryToExecute' should be a method/function reference
+            results = queryToExecute(connection, queryData)
             if(self.dbEncoded):
-                results = self.db_results_decode(results)
+                results = self.db_results_decode_pandas(results)
 
         connection.close()
 

+ 0 - 1
test_suite/tests_kora_misc/Query_speed/queries.py

@@ -164,7 +164,6 @@ with sqlite3.connect(f"file:{dbFile}?mode=ro", uri=True) as connection:
     tmpQuery = theQuerySimp2(','.join(codesStr), ','.join(formCodesStr))
     querr = 'CREATE TEMPORARY TABLE stuff AS ' + tmpQuery
     connection.cursor().execute(querr)
-    "cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo"
     riQuery = f'SELECT stuff.ntx, stuff.mappa, tab.pitxt AS piniz FROM stuff LEFT JOIN Occ00001 AS tab ON tab.ntx=stuff.ntx AND tab.mappa=stuff.mappa-15'
     bisQuery = f'SELECT stuff.ntx, stuff.mappa, tab.pitxt AS pfin FROM stuff LEFT JOIN Occ00001 AS tab ON tab.ntx=stuff.ntx AND tab.mappa=stuff.mappa+15'
     trisQuery = f'SELECT * from stuff'