Bladeren bron

Speeding up BIB too

kora 1 jaar geleden
bovenliggende
commit
01e2e148a8

BIN
test_suite/tests_kora_misc/Query_speed/bibliografia/BiblioTLIO.db


+ 121 - 24
test_suite/tests_kora_misc/Query_speed/queries_2.py

@@ -129,7 +129,7 @@ with sqlite3.connect(f"file:{dbFile}?mode=ro", uri=True) as connection:
     ind += 1
     oldTable = resTable
     resTable = 'tempOcc_' + str(ind)
-    connection.cursor().execute(f'CREATE TEMPORARY TABLE {resTable} AS SELECT tabA.ntx as ntx, tabA.mappa as mappa, tabB.ntx as ntx2, tabB.mappa as mappa2 FROM {oldTable} AS tabA, tempOcc AS tabB WHERE tabA.ntx=tabB.ntx AND tabA.mappa BETWEEN tabB.mappa-10 AND tabB.mappa+10 AND tabA.mappa != tabB.mappa')
+    connection.cursor().execute(f'CREATE TEMPORARY TABLE {resTable} AS SELECT tabA.ntx as ntx, tabA.mappa as mappa, tabB.ntx as ntx2, tabB.mappa as mappa2, tabA.sigla, tabA.numorg FROM {oldTable} AS tabA, tempOcc AS tabB WHERE tabA.ntx=tabB.ntx AND tabA.mappa BETWEEN tabB.mappa-10 AND tabB.mappa+10 AND tabA.mappa != tabB.mappa')
     connection.cursor().execute(f'CREATE INDEX aa_{ind} ON {resTable} (ntx, mappa)')
     connection.cursor().execute(f'DROP TABLE {oldTable}')
 
@@ -139,10 +139,6 @@ with sqlite3.connect(f"file:{dbFile}?mode=ro", uri=True) as connection:
 timestamp1 = time.time()
 print(timestamp1 - timestamp0)
 # %%
-res
-# %%
-len(res)
-# %%
 timestamp0 = time.time()
 intervallo = 10
 ordinate = 0
@@ -160,20 +156,7 @@ with sqlite3.connect(f"file:{dbFile}?mode=ro", uri=True) as connection:
 timestamp1 = time.time()
 print(timestamp1 - timestamp0)
 # %%
-print(len(listatesti), len(textlist))
-# %%
-cod="1"
-listatesti = listatesti.merge(textlist, on='ntx', suffixes=('', f'_{cod}'))
-# %%
-cond1 = listatesti['numperiod'] == listatesti[f'numperiod_{cod}'] if periodo == 1 else True
-cond2 = ((listatesti['mappa'] - listatesti[f'mappa_{cod}']) != 0) & ((listatesti['mappa'] - listatesti[f'mappa_{cod}']).abs() <= intervallo) if ordinate == 0 else ((listatesti[f'mappa_{cod}'] - listatesti['mappa']) > 0) & ((listatesti[f'mappa_{cod}'] - listatesti['mappa']).abs() <= intervallo)
-# %%
-listatesti = listatesti[cond1 & cond2]
-# %%
-# %%
 # Better decoding?
-vettSpec
-# %%
 print('The first one:', chr(int(vettSpec[0]['unicode'], 16)) )
 print('... is a newline')
 
@@ -186,7 +169,6 @@ for index, entry in enumerate(vettSpec):
             break
 
 print('Total:', len(vettSpec))
-
 # %%
 timestamp0 = time.time()
 vettDictDec = {}
@@ -228,11 +210,6 @@ def db_results_decodeB(result):
             row[key] = db_decodeB(row[key])
     return result
 # %%
-lemsNoPandas0[0:1]
-db_results_decodeB(lemsNoPandas0[0:1])
-lemsNoPandas0[0:1]
-# %%
-# %%
 timestamp0 = time.time()
 str0 = 'c%'
 strEnc0 = db_encode(vettSpec, str0)
@@ -250,3 +227,123 @@ timestamp1 = time.time()
 print(timestamp1 - timestamp0)
 # IT WORKZ!
 # %%
+# FINALLY:
+# The bib search
+def reducedQueryString(queryData):
+
+    type = queryData['queryType']
+
+    if type=='bib':
+        try:
+            row = queryData['row']
+            sigla = row['sigla']
+        except KeyError as err:
+            raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
+        return f"SELECT [Anno iniziale], [Anno finale], [Data codificata], [Titolo Abbreviato], [Autore], [Titolo], [Curatore], [Data descrittiva], [Area generica], [Area specifica], [Genere], [Forma], [Tipo], IQ FROM datibib WHERE Sigla='{sigla}'"
+    
+    #################
+    elif type=='bibAlt':
+        try:
+            siglaSet = queryData['siglaSet']
+        except KeyError as err:
+            raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
+        siglaStr = "'" + "','".join(siglaSet) + "'"
+        return f"SELECT Sigla, [Anno iniziale], [Anno finale], [Data codificata], [Titolo Abbreviato], [Autore], [Titolo], [Curatore], [Data descrittiva], [Area generica], [Area specifica], [Genere], [Forma], [Tipo], IQ FROM datibib WHERE Sigla IN ({siglaStr})"
+    
+    #################
+    elif type=='rif':
+        try:
+            row = queryData['row']
+            numorg = row['numorg']
+            ntx = row['ntx']
+        except:
+            return None
+        return f"SELECT head AS Rif_organico, full AS Rif_completo FROM org WHERE (indice='{numorg}' AND ntx='{ntx}')"
+    
+    return ""
+
+# %%
+timestamp0 = time.time()
+
+infobib = pd.DataFrame()
+rif_org = pd.DataFrame()
+for ind, row in res.iterrows():
+    queryData = {'queryType': 'bib', 'row': row}
+    queryStringBib = reducedQueryString(queryData)
+    dbFileBib='bibliografia/BiblioTLIO.db'
+    with sqlite3.connect(f"file:{dbFileBib}?mode=ro", uri=True) as connection:
+        bib = pd.read_sql(queryStringBib, connection)
+    infobib = pd.concat([infobib, bib])
+
+    queryData = {'queryType': 'rif', 'row': row}
+    queryStringRif = reducedQueryString(queryData)
+    with sqlite3.connect(f"file:{dbFile}?mode=ro", uri=True) as connection:
+        rif = pd.read_sql(queryStringRif, connection)
+    rif_org = pd.concat([rif_org, rif])
+
+
+
+timestamp1 = time.time()
+print(timestamp1 - timestamp0)
+# %%
+timestamp0 = time.time()
+
+infobib = pd.DataFrame()
+siglaList = list(res['sigla'])
+siglaSet = set(siglaList)
+queryData = {'queryType': 'bibAlt', 'siglaSet': siglaList}
+queryStringBib = reducedQueryString(queryData)
+dbFileBib='bibliografia/BiblioTLIO.db'
+with sqlite3.connect(f"file:{dbFileBib}?mode=ro", uri=True) as connection:    
+    bib = pd.read_sql(queryStringBib, connection, index_col='Sigla')
+
+timestamp1 = time.time()
+# %%
+
+
+annoiniz = [bib.loc[sigla, 'Anno iniziale'] for sigla in siglaList]
+annofin = [bib.loc[sigla, 'Anno finale'] for sigla in siglaList]
+datacod = [bib.loc[sigla, 'Data codificata'] for sigla in siglaList]
+datadesc = [bib.loc[sigla, 'Data descrittiva'] for sigla in siglaList]
+titoloabb = [bib.loc[sigla, 'Titolo Abbreviato'] for sigla in siglaList]
+autore = [bib.loc[sigla, 'Autore'] for sigla in siglaList]
+titolo = [bib.loc[sigla, 'Titolo'] for sigla in siglaList]
+curatore = [bib.loc[sigla, 'Curatore'] for sigla in siglaList]
+areagen = [bib.loc[sigla, 'Area generica'] for sigla in siglaList]
+areaspec = [bib.loc[sigla, 'Area specifica'] for sigla in siglaList]
+genere = [bib.loc[sigla, 'Genere'] for sigla in siglaList]
+forma = [bib.loc[sigla, 'Forma'] for sigla in siglaList]
+tipo = [bib.loc[sigla, 'Tipo'] for sigla in siglaList]
+iq = [bib.loc[sigla, 'IQ'] for sigla in siglaList]
+
+timestamp2 = time.time()
+print(timestamp1 - timestamp0)
+print(timestamp2 - timestamp0)
+# %%
+timestamp0 = time.time()
+
+aglia = {sigla: bib.loc[sigla].to_dict() for sigla in siglaSet}
+
+out = [aglia[sigla] for sigla in siglaList]
+
+annoiniz = [el['Anno iniziale'] for el in out]
+annofin = [el['Anno finale'] for el in out]
+datacod = [el['Data codificata'] for el in out]
+datadesc = [el['Data descrittiva'] for el in out]
+titoloabb = [el['Titolo Abbreviato'] for el in out]
+autore = [el['Autore'] for el in out]
+titolo = [el['Titolo'] for el in out]
+curatore = [el['Curatore'] for el in out]
+areagen = [el['Area generica'] for el in out]
+areaspec = [el['Area specifica'] for el in out]
+genere = [el['Genere'] for el in out]
+forma = [el['Forma'] for el in out]
+tipo = [el['Tipo'] for el in out]
+iq = [el['IQ'] for el in out]
+
+
+timestamp1 = time.time()
+print(timestamp1 - timestamp0)
+# %%
+[el['Anno iniziale'] for el in out]
+# %%