|
@@ -79,6 +79,10 @@ def prepareQuery(queryData):
|
|
|
elif type=='texts':
|
|
|
return complexQueryTexts
|
|
|
|
|
|
+
|
|
|
+ elif type=='co-occurrences':
|
|
|
+ return complexQueryCooccurrences
|
|
|
+
|
|
|
|
|
|
elif type=='bib':
|
|
|
try:
|
|
@@ -154,42 +158,36 @@ def complexQueryTexts(connection, queryData):
|
|
|
formCodList = queryData.get('formCodList')
|
|
|
except KeyError as err:
|
|
|
raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
|
|
|
-
|
|
|
+
|
|
|
+ strCodList = ",".join(str(c) for c in codList)
|
|
|
+
|
|
|
+
|
|
|
+ mainQueryString = f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod"
|
|
|
+ if subtype==0:
|
|
|
+ condition = f"WHERE tab.cod IN ({strCodList})"
|
|
|
+ elif subtype==1:
|
|
|
+ condition = f"WHERE tab.indlem IN ({strCodList})"
|
|
|
+ elif subtype==2:
|
|
|
+ if formCodList is None:
|
|
|
+ return None
|
|
|
+ strFormCodList = ",".join(str(c) for c in formCodList)
|
|
|
+ condition = f" WHERE tab.indlem IN ({strCodList}) OR (tab.indlem = 0 AND tab.cod IN ({strFormCodList}))"
|
|
|
+
|
|
|
+ mainQueryString = f'{mainQueryString} {condition}'
|
|
|
+
|
|
|
+
|
|
|
parole = 31
|
|
|
- periodi = 0
|
|
|
-
|
|
|
- strlist = ",".join(str(c) for c in codList)
|
|
|
+
|
|
|
+ createTempTable = f'CREATE TEMPORARY TABLE stuff AS {mainQueryString}'
|
|
|
+ mainQuery = f'SELECT * from stuff'
|
|
|
+ addQuery1 = f'SELECT stuff.ntx, stuff.mappa, tab.pitxt AS piniz FROM stuff LEFT JOIN Occ00001 AS tab ON tab.ntx=stuff.ntx AND tab.mappa=stuff.mappa-{int(parole/2)}'
|
|
|
+ addQuery2 = f'SELECT stuff.ntx, stuff.mappa, tab.pitxt AS pfin FROM stuff LEFT JOIN Occ00001 AS tab ON tab.ntx=stuff.ntx AND tab.mappa=stuff.mappa+{int(parole/2)}'
|
|
|
+ addQuery3 = f'SELECT stuff.ntx, stuff.numperiod, periodi.piniz AS backup_piniz, periodi.pfin AS backup_pfin FROM stuff, periodi WHERE stuff.ntx = periodi.ntx AND stuff.numperiod = periodi.numperiod'
|
|
|
|
|
|
- mainQueryString = ""
|
|
|
- if parole != 0:
|
|
|
- if subtype==0:
|
|
|
- mainQueryString = f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.cod IN ({strlist})"
|
|
|
- elif subtype==1:
|
|
|
- mainQueryString = f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.indlem IN ({strlist})"
|
|
|
- elif subtype==2:
|
|
|
- if formCodList is None:
|
|
|
- return None
|
|
|
- strform = ",".join(str(c) for c in formCodList)
|
|
|
- mainQueryString = f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.indlem IN ({strlist}) OR (tab.indlem = 0 AND tab.cod IN ({strform}))"
|
|
|
- else:
|
|
|
- if subtype==0:
|
|
|
- mainQueryString = f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.cod IN ({strlist})"
|
|
|
- elif subtype==1:
|
|
|
- mainQueryString = f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.indlem IN ({strlist})"
|
|
|
- elif subtype==2:
|
|
|
- if formCodList is None:
|
|
|
- return None
|
|
|
- strform = ",".join(str(c) for c in formCodList)
|
|
|
- mainQueryString = f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.indlem IN ({strlist}) OR (tab.indlem = 0 AND tab.cod IN ({strform}))"
|
|
|
|
|
|
|
|
|
- createTempTable = f'CREATE TEMPORARY TABLE stuff AS {mainQueryString}'
|
|
|
connection.cursor().execute(createTempTable)
|
|
|
|
|
|
- mainQuery = f'SELECT * from stuff'
|
|
|
- addQuery1 = f'SELECT stuff.ntx, stuff.mappa, tab.pitxt AS piniz FROM stuff LEFT JOIN Occ00001 AS tab ON tab.ntx=stuff.ntx AND tab.mappa=stuff.mappa-15'
|
|
|
- addQuery2 = f'SELECT stuff.ntx, stuff.mappa, tab.pitxt AS pfin FROM stuff LEFT JOIN Occ00001 AS tab ON tab.ntx=stuff.ntx AND tab.mappa=stuff.mappa+15'
|
|
|
- addQuery3 = f'SELECT stuff.ntx, stuff.numperiod, periodi.piniz AS backup_piniz, periodi.pfin AS backup_pfin FROM stuff, periodi WHERE stuff.ntx = periodi.ntx AND stuff.numperiod = periodi.numperiod'
|
|
|
results = pd.read_sql(mainQuery, connection)
|
|
|
results_add1 = pd.read_sql(addQuery1, connection)
|
|
|
results_add2 = pd.read_sql(addQuery2, connection)
|
|
@@ -199,3 +197,89 @@ def complexQueryTexts(connection, queryData):
|
|
|
results[['backup_piniz', 'backup_pfin']] = results_add3[['backup_piniz', 'backup_pfin']]
|
|
|
|
|
|
return results
|
|
|
+
|
|
|
+
|
|
|
+def complexQueryCooccurrences(connection, queryData):
|
|
|
+ try:
|
|
|
+
|
|
|
+ occurrences = queryData['occurrences']
|
|
|
+ table = queryData['table']
|
|
|
+ intervallo = queryData['intervallo']
|
|
|
+ periodo = queryData.get('periodo')
|
|
|
+ ordinate = queryData.get('ordinate')
|
|
|
+ if periodo is None:
|
|
|
+ periodo = 0
|
|
|
+ if ordinate is None:
|
|
|
+ ordinate = 0
|
|
|
+ except KeyError as err:
|
|
|
+ raise KeyError('Missing required data for query: ' + str(err))
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ preMainQueryString = f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod"
|
|
|
+
|
|
|
+
|
|
|
+ pitxtList = ['pitxt']
|
|
|
+ elemlenList = ['elemlen']
|
|
|
+ for index, occ in enumerate(occurrences):
|
|
|
+
|
|
|
+ try:
|
|
|
+ subtype = occ['querySubtype']
|
|
|
+ codList = occ['codList']
|
|
|
+ formCodList = occ.get('formCodList')
|
|
|
+ except KeyError as err:
|
|
|
+ raise KeyError('Missing required data for query: ' + str(err))
|
|
|
+
|
|
|
+ strCodList = ",".join(str(c) for c in codList)
|
|
|
+
|
|
|
+ if subtype==0:
|
|
|
+ condition = f" WHERE tab.cod IN ({strCodList})"
|
|
|
+ elif subtype==1:
|
|
|
+ condition = f" WHERE tab.indlem IN ({strCodList})"
|
|
|
+ elif subtype==2:
|
|
|
+ if formCodList is None:
|
|
|
+ return None
|
|
|
+ strFormCodList = ",".join(str(c) for c in formCodList)
|
|
|
+ condition = f" WHERE tab.indlem IN ({strCodList}) OR (tab.indlem = 0 AND tab.cod IN ({strFormCodList}))"
|
|
|
+
|
|
|
+ mainQueryString = f'{preMainQueryString} {condition}'
|
|
|
+
|
|
|
+
|
|
|
+ if index==0:
|
|
|
+
|
|
|
+ resTable = 'tempOcc_' + str(index)
|
|
|
+ connection.cursor().execute(f'CREATE TEMPORARY TABLE {resTable} AS {mainQueryString}')
|
|
|
+ connection.cursor().execute(f'CREATE INDEX aa_{index} ON {resTable} (ntx, mappa)')
|
|
|
+ continue
|
|
|
+
|
|
|
+ else:
|
|
|
+
|
|
|
+ connection.cursor().execute(f'CREATE TEMPORARY TABLE tempOccB AS {mainQueryString}')
|
|
|
+ connection.cursor().execute(f'CREATE INDEX bb ON tempOccB (ntx, mappa)')
|
|
|
+
|
|
|
+ oldTable = resTable
|
|
|
+ resTable = 'tempOcc_' + str(index)
|
|
|
+ connection.cursor().execute(f'CREATE TEMPORARY TABLE {resTable} AS SELECT tabA.cod, tabA.ntx, tabA.{" tabA.".join(pitxtList)}, tabA.{" tabA.".join(elemlenList)}, tabA.mappa, tabA.numperiod, tabA.links, tabA.numorg, tabA.sigla, tabA.vol, tabA.pag, tabA.riga, tabA.col, tabA.tipostanza, tabA.stanza, tabA.verso, tabA.numbrano, tabA.lemma, tabA.cat_gr, tabA.disambiguatore, tabB.ntx AS ntx2, tabB.mappa AS mappa2, tabB.pitxt as pitxt_{index}, tabB.elemlen as elemlen_{index} FROM {oldTable} AS tabA, tempOccB AS tabB WHERE tabA.ntx=tabB.ntx AND tabA.mappa BETWEEN tabB.mappa-{intervallo} AND tabB.mappa+{intervallo} AND tabA.mappa != tabB.mappa')
|
|
|
+ connection.cursor().execute(f'CREATE INDEX aa_{index} ON {resTable} (ntx, mappa)')
|
|
|
+ connection.cursor().execute(f'DROP TABLE {oldTable}')
|
|
|
+ pitxtList.append(f'pitxt_{index}')
|
|
|
+ elemlenList.append(f'elemlen_{index}')
|
|
|
+
|
|
|
+
|
|
|
+ results = pd.read_sql(f'SELECT * FROM {resTable}', connection)
|
|
|
+
|
|
|
+
|
|
|
+ parole = 31
|
|
|
+
|
|
|
+ queryPiniz = f'SELECT stuff.ntx, stuff.mappa, tab.pitxt AS piniz FROM {resTable} AS stuff LEFT JOIN {table} AS tab ON tab.ntx=stuff.ntx AND tab.mappa=stuff.mappa-{int(parole/2)}'
|
|
|
+ queryPfin = f'SELECT stuff.ntx, stuff.mappa, tab.pitxt AS pfin FROM {resTable} AS stuff LEFT JOIN {table} AS tab ON tab.ntx=stuff.ntx AND tab.mappa=stuff.mappa+{int(parole/2)}'
|
|
|
+ queryPeriodi = f'SELECT stuff.ntx, stuff.numperiod, periodi.piniz AS backup_piniz, periodi.pfin AS backup_pfin FROM {resTable} AS stuff, periodi WHERE stuff.ntx = periodi.ntx AND stuff.numperiod = periodi.numperiod'
|
|
|
+
|
|
|
+ resultsPiniz = pd.read_sql(queryPiniz, connection)
|
|
|
+ resultsPfin = pd.read_sql(queryPfin, connection)
|
|
|
+ resultsPeriodi = pd.read_sql(queryPeriodi, connection)
|
|
|
+ results['piniz'] = resultsPiniz['piniz']
|
|
|
+ results['pfin'] = resultsPfin['pfin']
|
|
|
+ results[['backup_piniz', 'backup_pfin']] = resultsPeriodi[['backup_piniz', 'backup_pfin']]
|
|
|
+
|
|
|
+ return results
|