Browse Source

an intermediate-good result to check

Francesco 1 year ago
parent
commit
967f9e4e5a
1 changed files with 18 additions and 25 deletions
  1. 18 25
      test_suite/tests_kora_misc/Query_speed/queries.py

+ 18 - 25
test_suite/tests_kora_misc/Query_speed/queries.py

@@ -121,26 +121,32 @@ def theQuerySimp2(LIST1, LIST2):
     return f'SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM Occ00001 AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.indlem IN ({LIST1}) OR (tab.indlem = 0 AND tab.cod IN ({LIST2}))'
 
 '''
+NOTA:
+
 prev_tab.pitxt AS piniz, next_tab.pitxt AS pfin, periodi.piniz AS backup_piniz, periodi.pfin AS backup_pfin
  LEFT JOIN Occ00001 AS prev_tab ON (tab.ntx = prev_tab.ntx AND tab.mappa = prev_tab.mappa+15)
  LEFT JOIN Occ00001 AS next_tab ON (tab.ntx = next_tab.ntx AND tab.mappa = next_tab.mappa-15)
  LEFT JOIN periodi ON (tab.ntx = periodi.ntx AND tab.numperiod = periodi.numperiod)
 '''
+# %%
+# %%
+timestamp0 = time.time()
+with sqlite3.connect(f"file:{dbFile}?mode=ro", uri=True) as connection:
+    tmpQuery = theQuerySimp2(','.join(codesStr), ','.join(formCodesStr))
+    results2Simp2_a = pd.read_sql(tmpQuery, connection)
 
-def theQuerySimpBIS(LISTMAPPA1):
-    return f'SELECT indlem, pitxt, ntx, mappa AS piniz FROM Occ00001 WHERE mappa IN ({LISTMAPPA1})'
-
+print(time.time() - timestamp0)
 # %%
 timestamp0 = time.time()
 with sqlite3.connect(f"file:{dbFile}?mode=ro", uri=True) as connection:
     tmpQuery = theQuerySimp2(','.join(codesStr), ','.join(formCodesStr))
-    results2Simp2 = pd.read_sql(tmpQuery, connection)
+    results2Simp2_b = pd.read_sql(tmpQuery, connection)
     piniz = []
     pfin = []
     backup_piniz = []
     backup_pfin = []
     cur = connection.cursor()
-    for index, row in results2Simp2.iterrows():
+    for index, row in results2Simp2_b.iterrows():
         ntx = row['ntx']
         prevMappa = row['mappa'] - 15
         cur.execute(f'SELECT prev_tab.pitxt AS piniz FROM Occ00001 AS prev_tab WHERE prev_tab.ntx = {ntx} AND prev_tab.mappa = {prevMappa}')
@@ -149,30 +155,17 @@ with sqlite3.connect(f"file:{dbFile}?mode=ro", uri=True) as connection:
 
 print(time.time() - timestamp0)
 # %%
+############################
+# Test with temporary table!
+############################
+
 timestamp0 = time.time()
 with sqlite3.connect(f"file:{dbFile}?mode=ro", uri=True) as connection:
     tmpQuery = theQuerySimp2(','.join(codesStr), ','.join(formCodesStr))
     querr = 'CREATE TEMPORARY TABLE stuff AS ' + tmpQuery
-    connection.execute(querr)
-    riQuery = "SELECT stuff.*, prev_tab.mappa, periodi.pfin FROM stuff LEFT JOIN Occ00001 AS prev_tab ON prev_tab.ntx = stuff.ntx AND prev_tab.mappa = stuff.mappa - 15 LEFT JOIN periodi ON stuff.ntx = periodi.ntx AND stuff.numperiod = periodi.numperiod"
-    results2Simp2 = pd.read_sql(tmpQuery, connection)
+    connection.cursor().execute(querr)
+    riQuery = "SELECT stuff.*, prev_tab.pitxt AS piniz, next_tab.pitxt AS pfin, periodi.piniz AS backup_piniz, periodi.pfin AS backup_pfin FROM stuff LEFT JOIN Occ00001 AS prev_tab ON prev_tab.ntx = stuff.ntx AND prev_tab.mappa = stuff.mappa - 15 LEFT JOIN Occ00001 AS next_tab ON next_tab.ntx = stuff.ntx AND prev_tab.mappa = stuff.mappa + 15 LEFT JOIN periodi ON stuff.ntx = periodi.ntx AND stuff.numperiod = periodi.numperiod"
+    results2Simp2_c = pd.read_sql(tmpQuery, connection)
 
 print(time.time() - timestamp0)
 # %%
-
-########################
-# Test temporary tables!
-########################
-
-def theQueryGNU():
-    return f'SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM Occ00001 AS tab INNER JOIN codes ON codes.cod = tab.indlem INNER JOIN lem ON tab.indlem = lem.cod INNER JOIN intbib ON tab.ntx = intbib.ntx'
-
-# %%
-timestamp0 = time.time()
-with sqlite3.connect(f"file:{dbFile}?mode=ro", uri=True) as connection:
-    querr = 'CREATE TEMPORARY TABLE codes AS ' + firstQuery
-    connection.execute(querr)
-    results2Simp2GNU = pd.read_sql(theQueryGNU(), connection)
-
-print(time.time() - timestamp0)
-# %%