|
@@ -1,20 +1,21 @@
|
|
-import pandas as pd
|
|
|
|
|
|
+import polars as pl
|
|
|
|
+
|
|
|
|
|
|
def prepareQuery(queryData):
|
|
def prepareQuery(queryData):
|
|
|
|
|
|
- type = queryData.get('queryType') # KeyError protected -- returns None if the key is not defined
|
|
|
|
|
|
+ local_type = queryData.get('queryType') # KeyError protected -- returns None if the key is not defined
|
|
|
|
|
|
#################
|
|
#################
|
|
- if type=='occ_tables':
|
|
|
|
|
|
+ if local_type=='occ_tables':
|
|
return "SELECT name FROM sqlite_master WHERE type='table'"
|
|
return "SELECT name FROM sqlite_master WHERE type='table'"
|
|
|
|
|
|
#################
|
|
#################
|
|
- if type=='forma':
|
|
|
|
|
|
+ if local_type=='forma':
|
|
try:
|
|
try:
|
|
data = queryData['data']
|
|
data = queryData['data']
|
|
dataNorm = queryData['dataNorm']
|
|
dataNorm = queryData['dataNorm']
|
|
except KeyError as err:
|
|
except KeyError as err:
|
|
- raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
|
|
|
|
|
|
+ raise KeyError('Missing required data for query type ' + local_type + ': ' + str(err))
|
|
joinedQueryData = "'" + "' OR spec LIKE '".join(data) + "'"
|
|
joinedQueryData = "'" + "' OR spec LIKE '".join(data) + "'"
|
|
if len(dataNorm)==0:
|
|
if len(dataNorm)==0:
|
|
return f"SELECT spec AS forma, nocc AS occ, cod FROM form WHERE spec LIKE {joinedQueryData} ORDER BY idfor"
|
|
return f"SELECT spec AS forma, nocc AS occ, cod FROM form WHERE spec LIKE {joinedQueryData} ORDER BY idfor"
|
|
@@ -23,12 +24,12 @@ def prepareQuery(queryData):
|
|
return f"SELECT DISTINCT spec AS forma, nocc AS occ, cod FROM form WHERE (spec LIKE {joinedQueryData}) OR (norm LIKE {joinedQueryDataNorm}) ORDER BY idfor"
|
|
return f"SELECT DISTINCT spec AS forma, nocc AS occ, cod FROM form WHERE (spec LIKE {joinedQueryData}) OR (norm LIKE {joinedQueryDataNorm}) ORDER BY idfor"
|
|
|
|
|
|
###################
|
|
###################
|
|
- elif type=='lemma':
|
|
|
|
|
|
+ elif local_type=='lemma':
|
|
try:
|
|
try:
|
|
data = queryData['data']
|
|
data = queryData['data']
|
|
dataNorm = queryData['dataNorm']
|
|
dataNorm = queryData['dataNorm']
|
|
except KeyError as err:
|
|
except KeyError as err:
|
|
- raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
|
|
|
|
|
|
+ raise KeyError('Missing required data for query type ' + local_type + ': ' + str(err))
|
|
joinedQueryData = "'" + "' OR spec LIKE '".join(data) + "'"
|
|
joinedQueryData = "'" + "' OR spec LIKE '".join(data) + "'"
|
|
if len(dataNorm)==0:
|
|
if len(dataNorm)==0:
|
|
return f"SELECT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE spec LIKE {joinedQueryData} ORDER BY idlem"
|
|
return f"SELECT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE spec LIKE {joinedQueryData} ORDER BY idlem"
|
|
@@ -37,12 +38,12 @@ def prepareQuery(queryData):
|
|
return f"SELECT DISTINCT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE (spec LIKE {joinedQueryData}) OR (norm LIKE {joinedQueryDataNorm}) ORDER BY idlem"
|
|
return f"SELECT DISTINCT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE (spec LIKE {joinedQueryData}) OR (norm LIKE {joinedQueryDataNorm}) ORDER BY idlem"
|
|
|
|
|
|
########################
|
|
########################
|
|
- elif type=='lemmaForma':
|
|
|
|
|
|
+ elif local_type=='lemmaForma':
|
|
try:
|
|
try:
|
|
data = queryData['data']
|
|
data = queryData['data']
|
|
dataNorm = queryData['dataNorm']
|
|
dataNorm = queryData['dataNorm']
|
|
except KeyError as err:
|
|
except KeyError as err:
|
|
- raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
|
|
|
|
|
|
+ raise KeyError('Missing required data for query type ' + local_type + ': ' + str(err))
|
|
joinedQueryData = "'" + "' OR form.spec LIKE '".join(data) + "'"
|
|
joinedQueryData = "'" + "' OR form.spec LIKE '".join(data) + "'"
|
|
if len(dataNorm)==0:
|
|
if len(dataNorm)==0:
|
|
return f"SELECT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod != 0 AND lem.cod = pfl.lemma WHERE lem.spec LIKE {joinedQueryData} ORDER BY lem.idlem"
|
|
return f"SELECT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod != 0 AND lem.cod = pfl.lemma WHERE lem.spec LIKE {joinedQueryData} ORDER BY lem.idlem"
|
|
@@ -52,12 +53,12 @@ def prepareQuery(queryData):
|
|
return f"SELECT DISTINCT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (lem.spec LIKE {joinedQueryData}) OR (lem.norm LIKE {joinedQueryDataNorm}) ORDER BY lem.idlem"
|
|
return f"SELECT DISTINCT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (lem.spec LIKE {joinedQueryData}) OR (lem.norm LIKE {joinedQueryDataNorm}) ORDER BY lem.idlem"
|
|
|
|
|
|
########################
|
|
########################
|
|
- elif type=='formaLemma':
|
|
|
|
|
|
+ elif local_type=='formaLemma':
|
|
try:
|
|
try:
|
|
data = queryData['data']
|
|
data = queryData['data']
|
|
dataNorm = queryData['dataNorm']
|
|
dataNorm = queryData['dataNorm']
|
|
except KeyError as err:
|
|
except KeyError as err:
|
|
- raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
|
|
|
|
|
|
+ raise KeyError('Missing required data for query type ' + local_type + ': ' + str(err))
|
|
joinedQueryData = "'" + "' OR form.spec LIKE '".join(data) + "'"
|
|
joinedQueryData = "'" + "' OR form.spec LIKE '".join(data) + "'"
|
|
if len(dataNorm)==0:
|
|
if len(dataNorm)==0:
|
|
return f"SELECT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE form.spec LIKE {joinedQueryData} ORDER BY form.idfor"
|
|
return f"SELECT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE form.spec LIKE {joinedQueryData} ORDER BY form.idfor"
|
|
@@ -66,43 +67,43 @@ def prepareQuery(queryData):
|
|
return f"SELECT DISTINCT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (form.spec LIKE {joinedQueryData}) OR (form.norm LIKE {joinedQueryDataNorm}) ORDER BY form.idfor"
|
|
return f"SELECT DISTINCT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (form.spec LIKE {joinedQueryData}) OR (form.norm LIKE {joinedQueryDataNorm}) ORDER BY form.idfor"
|
|
|
|
|
|
#################
|
|
#################
|
|
- elif type=='pfl':
|
|
|
|
|
|
+ elif local_type=='pfl':
|
|
try:
|
|
try:
|
|
codList = queryData['codList']
|
|
codList = queryData['codList']
|
|
except KeyError as err:
|
|
except KeyError as err:
|
|
- raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
|
|
|
|
|
|
+ raise KeyError('Missing required data for query type ' + local_type + ': ' + str(err))
|
|
|
|
|
|
strlist = ",".join(str(c) for c in codList)
|
|
strlist = ",".join(str(c) for c in codList)
|
|
return f"SELECT DISTINCT lemma as codLemma, forma as codForma FROM pfl WHERE lemma IN ({strlist})"
|
|
return f"SELECT DISTINCT lemma as codLemma, forma as codForma FROM pfl WHERE lemma IN ({strlist})"
|
|
|
|
|
|
###################
|
|
###################
|
|
- elif type=='texts':
|
|
|
|
|
|
+ elif local_type=='texts':
|
|
return complexQueryTexts
|
|
return complexQueryTexts
|
|
|
|
|
|
###################
|
|
###################
|
|
- elif type=='co-occurrences':
|
|
|
|
|
|
+ elif local_type=='co-occurrences':
|
|
return complexQueryCooccurrences
|
|
return complexQueryCooccurrences
|
|
|
|
|
|
######################
|
|
######################
|
|
- elif type=='bib':
|
|
|
|
|
|
+ elif local_type=='bib':
|
|
try:
|
|
try:
|
|
row = queryData['row']
|
|
row = queryData['row']
|
|
sigla = row['sigla']
|
|
sigla = row['sigla']
|
|
except KeyError as err:
|
|
except KeyError as err:
|
|
- raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
|
|
|
|
|
|
+ raise KeyError('Missing required data for query type ' + local_type + ': ' + str(err))
|
|
return f"SELECT [Anno iniziale], [Anno finale], [Data codificata], [Titolo Abbreviato], [Autore], [Titolo], [Curatore], [Data descrittiva], [Area generica], [Area specifica], [Genere], [Forma], [Tipo], IQ FROM datibib WHERE Sigla='{sigla}'"
|
|
return f"SELECT [Anno iniziale], [Anno finale], [Data codificata], [Titolo Abbreviato], [Autore], [Titolo], [Curatore], [Data descrittiva], [Area generica], [Area specifica], [Genere], [Forma], [Tipo], IQ FROM datibib WHERE Sigla='{sigla}'"
|
|
|
|
|
|
#################
|
|
#################
|
|
- elif type=='bibAlt':
|
|
|
|
|
|
+ elif local_type=='bibAlt':
|
|
try:
|
|
try:
|
|
siglaSet = queryData['siglaSet']
|
|
siglaSet = queryData['siglaSet']
|
|
except KeyError as err:
|
|
except KeyError as err:
|
|
- raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
|
|
|
|
|
|
+ raise KeyError('Missing required data for query type ' + local_type + ': ' + str(err))
|
|
siglaStr = "'" + "','".join(siglaSet) + "'"
|
|
siglaStr = "'" + "','".join(siglaSet) + "'"
|
|
return f"SELECT Sigla, [Anno iniziale], [Anno finale], [Data codificata], [Titolo Abbreviato], [Autore], [Titolo], [Curatore], [Data descrittiva], [Area generica], [Area specifica], [Genere], [Forma], [Tipo], IQ FROM datibib WHERE Sigla IN ({siglaStr})"
|
|
return f"SELECT Sigla, [Anno iniziale], [Anno finale], [Data codificata], [Titolo Abbreviato], [Autore], [Titolo], [Curatore], [Data descrittiva], [Area generica], [Area specifica], [Genere], [Forma], [Tipo], IQ FROM datibib WHERE Sigla IN ({siglaStr})"
|
|
|
|
|
|
#################
|
|
#################
|
|
- elif type=='rif':
|
|
|
|
|
|
+ elif local_type=='rif':
|
|
try:
|
|
try:
|
|
row = queryData['row']
|
|
row = queryData['row']
|
|
numorg = row['numorg']
|
|
numorg = row['numorg']
|
|
@@ -112,11 +113,11 @@ def prepareQuery(queryData):
|
|
return f"SELECT head AS Rif_organico, full AS Rif_completo FROM org WHERE (indice='{numorg}' AND ntx='{ntx}')"
|
|
return f"SELECT head AS Rif_organico, full AS Rif_completo FROM org WHERE (indice='{numorg}' AND ntx='{ntx}')"
|
|
|
|
|
|
#################
|
|
#################
|
|
- elif type=='rifAlt':
|
|
|
|
|
|
+ elif local_type=='rifAlt':
|
|
try:
|
|
try:
|
|
coordsSet = queryData['coordsSet']
|
|
coordsSet = queryData['coordsSet']
|
|
except KeyError as err:
|
|
except KeyError as err:
|
|
- raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
|
|
|
|
|
|
+ raise KeyError('Missing required data for query type ' + local_type + ': ' + str(err))
|
|
|
|
|
|
subQueries = []
|
|
subQueries = []
|
|
for coords in coordsSet:
|
|
for coords in coordsSet:
|
|
@@ -124,13 +125,13 @@ def prepareQuery(queryData):
|
|
numorg = coords[0]
|
|
numorg = coords[0]
|
|
ntx = coords[1]
|
|
ntx = coords[1]
|
|
except IndexError as err:
|
|
except IndexError as err:
|
|
- raise KeyError('Incomplete required data for query type ' + type + ': ' + str(err))
|
|
|
|
|
|
+ raise KeyError('Incomplete required data for query type ' + local_type + ': ' + str(err))
|
|
subQueries.append( f"SELECT indice AS numorg, ntx, head AS Rif_organico, full AS Rif_completo FROM org WHERE (indice='{numorg}' AND ntx='{ntx}')" )
|
|
subQueries.append( f"SELECT indice AS numorg, ntx, head AS Rif_organico, full AS Rif_completo FROM org WHERE (indice='{numorg}' AND ntx='{ntx}')" )
|
|
|
|
|
|
return ' UNION ALL '.join(subQueries)
|
|
return ' UNION ALL '.join(subQueries)
|
|
|
|
|
|
#################
|
|
#################
|
|
- elif type=='highlight':
|
|
|
|
|
|
+ elif local_type=='highlight':
|
|
try:
|
|
try:
|
|
row = queryData['row']
|
|
row = queryData['row']
|
|
col = queryData['col']
|
|
col = queryData['col']
|
|
@@ -139,7 +140,7 @@ def prepareQuery(queryData):
|
|
return f"SELECT spec as highlight FROM form WHERE cod={row[col]}"
|
|
return f"SELECT spec as highlight FROM form WHERE cod={row[col]}"
|
|
|
|
|
|
#################
|
|
#################
|
|
- elif type =='singlecontext':
|
|
|
|
|
|
+ elif local_type =='singlecontext':
|
|
try:
|
|
try:
|
|
subtype = queryData['querySubtype']
|
|
subtype = queryData['querySubtype']
|
|
table = queryData['table']
|
|
table = queryData['table']
|
|
@@ -159,7 +160,7 @@ def prepareQuery(queryData):
|
|
elif subtype == 'brani':
|
|
elif subtype == 'brani':
|
|
return f"SELECT piniz, pfin FROM linkbase WHERE {ntxlocal} = ntx AND tipo = 2 AND id BETWEEN {numbranolocal-int(brani/2)} AND {numbranolocal+int(brani/2)}"
|
|
return f"SELECT piniz, pfin FROM linkbase WHERE {ntxlocal} = ntx AND tipo = 2 AND id BETWEEN {numbranolocal-int(brani/2)} AND {numbranolocal+int(brani/2)}"
|
|
#################
|
|
#################
|
|
- elif type =='links':
|
|
|
|
|
|
+ elif local_type =='links':
|
|
try:
|
|
try:
|
|
subtype = queryData['querySubtype']
|
|
subtype = queryData['querySubtype']
|
|
ntxlocal = queryData['ntxlocal']
|
|
ntxlocal = queryData['ntxlocal']
|
|
@@ -174,7 +175,7 @@ def prepareQuery(queryData):
|
|
return f"SELECT ta.ntx, ta.id, ta.piniz, ta.pfin, tb.mappain, tb.mappafin FROM linkbase AS tb INNER JOIN linknoteass AS ta ON tb.ntx = ta.ntx AND tb.id = ta.id WHERE (((tb.tipo= 2) AND (tb.ntx = {ntxlocal})) AND ((tb.piniz BETWEEN {pinizlocal} AND {pfinlocal}) OR ({pitxtlocal} BETWEEN tb.piniz AND tb.pfin)))"
|
|
return f"SELECT ta.ntx, ta.id, ta.piniz, ta.pfin, tb.mappain, tb.mappafin FROM linkbase AS tb INNER JOIN linknoteass AS ta ON tb.ntx = ta.ntx AND tb.id = ta.id WHERE (((tb.tipo= 2) AND (tb.ntx = {ntxlocal})) AND ((tb.piniz BETWEEN {pinizlocal} AND {pfinlocal}) OR ({pitxtlocal} BETWEEN tb.piniz AND tb.pfin)))"
|
|
#####
|
|
#####
|
|
else:
|
|
else:
|
|
- raise ValueError('Unrecognized query type: ' + type)
|
|
|
|
|
|
+ raise ValueError('Unrecognized query type: ' + local_type)
|
|
|
|
|
|
|
|
|
|
def complexQueryTexts(connection, queryData):
|
|
def complexQueryTexts(connection, queryData):
|
|
@@ -182,19 +183,27 @@ def complexQueryTexts(connection, queryData):
|
|
codList = queryData['codList']
|
|
codList = queryData['codList']
|
|
table = queryData['table']
|
|
table = queryData['table']
|
|
subtype = queryData['querySubtype']
|
|
subtype = queryData['querySubtype']
|
|
- formCodList = queryData.get('formCodList') # KeyError-safe (None if absent)
|
|
|
|
|
|
+ formCodList = queryData.get('formCodList') # KeyError-safe (None if absent)
|
|
except KeyError as err:
|
|
except KeyError as err:
|
|
- raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
|
|
|
|
-
|
|
|
|
|
|
+ raise KeyError(f'Missing required data for query type {queryData.get('queryType')} : {str(err)}')
|
|
|
|
+
|
|
strCodList = ",".join(str(c) for c in codList)
|
|
strCodList = ",".join(str(c) for c in codList)
|
|
|
|
|
|
# Main query, verified to be fast!
|
|
# Main query, verified to be fast!
|
|
- mainQueryString = f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod"
|
|
|
|
- if subtype==0:
|
|
|
|
|
|
+ mainQueryString = f"""
|
|
|
|
+ SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg,
|
|
|
|
+ intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso,
|
|
|
|
+ tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore
|
|
|
|
+ FROM {table} AS tab
|
|
|
|
+ INNER JOIN intbib ON tab.ntx = intbib.ntx
|
|
|
|
+ INNER JOIN lem ON tab.indlem = lem.cod
|
|
|
|
+ """
|
|
|
|
+
|
|
|
|
+ if subtype == 0:
|
|
condition = f"WHERE tab.cod IN ({strCodList})"
|
|
condition = f"WHERE tab.cod IN ({strCodList})"
|
|
- elif subtype==1:
|
|
|
|
|
|
+ elif subtype == 1:
|
|
condition = f"WHERE tab.indlem IN ({strCodList})"
|
|
condition = f"WHERE tab.indlem IN ({strCodList})"
|
|
- elif subtype==2:
|
|
|
|
|
|
+ elif subtype == 2:
|
|
if formCodList is None:
|
|
if formCodList is None:
|
|
return None
|
|
return None
|
|
strFormCodList = ",".join(str(c) for c in formCodList)
|
|
strFormCodList = ",".join(str(c) for c in formCodList)
|
|
@@ -202,68 +211,71 @@ def complexQueryTexts(connection, queryData):
|
|
|
|
|
|
mainQueryString = f'{mainQueryString} {condition}'
|
|
mainQueryString = f'{mainQueryString} {condition}'
|
|
|
|
|
|
- # This value can be changed to change multiple contexts width. Default value for Gatto is parole=31 #
|
|
|
|
|
|
+ # This value can be changed to change multiple contexts' width. Default value for Gatto is parole=31 #
|
|
parole = 31
|
|
parole = 31
|
|
# C'è la possibilità di scegliere periodi invece che parole, ma per il momento è disabilitata
|
|
# C'è la possibilità di scegliere periodi invece che parole, ma per il momento è disabilitata
|
|
- createTempTable = f'CREATE TEMPORARY TABLE stuff AS {mainQueryString}'
|
|
|
|
- mainQuery = f'SELECT * from stuff'
|
|
|
|
- addQuery1 = f'SELECT stuff.ntx, stuff.mappa, tab.pitxt AS piniz FROM stuff LEFT JOIN Occ00001 AS tab ON tab.ntx=stuff.ntx AND tab.mappa=stuff.mappa-{int(parole/2)}'
|
|
|
|
|
|
+ createTempTable = f'CREATE TEMPORARY TABLE stuff AS {mainQueryString}'
|
|
|
|
+ mainQuery = 'SELECT * from stuff'
|
|
|
|
+ addQuery1 = f'SELECT stuff.ntx, stuff.mappa, tab.pitxt AS piniz FROM stuff LEFT JOIN Occ00001 AS tab ON tab.ntx=stuff.ntx AND tab.mappa=stuff.mappa-{int(parole / 2)}'
|
|
addQuery2 = f'SELECT stuff.ntx, stuff.mappa, tab.pitxt AS pfin FROM stuff LEFT JOIN Occ00001 AS tab ON tab.ntx=stuff.ntx AND tab.mappa=stuff.mappa+{int(parole/2)}'
|
|
addQuery2 = f'SELECT stuff.ntx, stuff.mappa, tab.pitxt AS pfin FROM stuff LEFT JOIN Occ00001 AS tab ON tab.ntx=stuff.ntx AND tab.mappa=stuff.mappa+{int(parole/2)}'
|
|
- addQuery3 = f'SELECT stuff.ntx, stuff.numperiod, periodi.piniz AS backup_piniz, periodi.pfin AS backup_pfin FROM stuff, periodi WHERE stuff.ntx = periodi.ntx AND stuff.numperiod = periodi.numperiod'
|
|
|
|
-
|
|
|
|
|
|
+ addQuery3 = 'SELECT stuff.ntx, stuff.numperiod, periodi.piniz AS backup_piniz, periodi.pfin AS backup_pfin FROM stuff, periodi WHERE stuff.ntx = periodi.ntx AND stuff.numperiod = periodi.numperiod'
|
|
|
|
|
|
# Start communication with DB
|
|
# Start communication with DB
|
|
- connection.cursor().execute(createTempTable)
|
|
|
|
|
|
+ cursor = connection.cursor()
|
|
|
|
+ cursor.execute(createTempTable)
|
|
|
|
|
|
- results = pd.read_sql(mainQuery, connection)
|
|
|
|
- results_add1 = pd.read_sql(addQuery1, connection)
|
|
|
|
- results_add2 = pd.read_sql(addQuery2, connection)
|
|
|
|
- results_add3 = pd.read_sql(addQuery3, connection)
|
|
|
|
- results['piniz'] = results_add1['piniz']
|
|
|
|
- results['pfin'] = results_add2['pfin']
|
|
|
|
- results[['backup_piniz', 'backup_pfin']] = results_add3[['backup_piniz', 'backup_pfin']]
|
|
|
|
|
|
+ results = pl.read_sql(mainQuery, connection)
|
|
|
|
+ results_add1 = pl.read_sql(addQuery1, connection)
|
|
|
|
+ results_add2 = pl.read_sql(addQuery2, connection)
|
|
|
|
+ results_add3 = pl.read_sql(addQuery3, connection)
|
|
|
|
+
|
|
|
|
+ results = results.with_columns([
|
|
|
|
+ results_add1['piniz'],
|
|
|
|
+ results_add2['pfin'],
|
|
|
|
+ results_add3['backup_piniz'],
|
|
|
|
+ results_add3['backup_pfin']
|
|
|
|
+ ])
|
|
|
|
|
|
return results
|
|
return results
|
|
|
|
|
|
|
|
|
|
def complexQueryCooccurrences(connection, queryData):
|
|
def complexQueryCooccurrences(connection, queryData):
|
|
try:
|
|
try:
|
|
- # the get method for dicts is KeyError-safe (returns None if key is absent)
|
|
|
|
occurrences = queryData['occurrences']
|
|
occurrences = queryData['occurrences']
|
|
table = queryData['table']
|
|
table = queryData['table']
|
|
intervallo = queryData['intervallo']
|
|
intervallo = queryData['intervallo']
|
|
- periodo = queryData.get('periodo') # Unused for the moment
|
|
|
|
- ordinate = queryData.get('ordinate') # Unused for the moment
|
|
|
|
- if periodo is None:
|
|
|
|
- periodo = 0
|
|
|
|
- if ordinate is None:
|
|
|
|
- ordinate = 0
|
|
|
|
|
|
+ periodo = queryData.get('periodo', 0) # Unused for the moment
|
|
|
|
+ ordinate = queryData.get('ordinate', 0) # Unused for the moment
|
|
except KeyError as err:
|
|
except KeyError as err:
|
|
raise KeyError('Missing required data for query: ' + str(err))
|
|
raise KeyError('Missing required data for query: ' + str(err))
|
|
|
|
|
|
|
|
+ preMainQueryString = f"""
|
|
|
|
+ SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg,
|
|
|
|
+ intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso,
|
|
|
|
+ tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore
|
|
|
|
+ FROM {table} AS tab
|
|
|
|
+ INNER JOIN intbib ON tab.ntx = intbib.ntx
|
|
|
|
+ INNER JOIN lem ON tab.indlem = lem.cod
|
|
|
|
+ """
|
|
|
|
|
|
- # Main part of main query -- verified to be fast!
|
|
|
|
- preMainQueryString = f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod"
|
|
|
|
-
|
|
|
|
- # Main loop on the different occurrences searched by user
|
|
|
|
pitxtList = ['pitxt']
|
|
pitxtList = ['pitxt']
|
|
elemlenList = ['elemlen']
|
|
elemlenList = ['elemlen']
|
|
- for index, occ in enumerate(occurrences):
|
|
|
|
|
|
|
|
|
|
+ for index, occ in enumerate(occurrences):
|
|
try:
|
|
try:
|
|
subtype = occ['querySubtype']
|
|
subtype = occ['querySubtype']
|
|
codList = occ['codList']
|
|
codList = occ['codList']
|
|
- formCodList = occ.get('formCodList')
|
|
|
|
|
|
+ formCodList = occ.get('formCodList')
|
|
except KeyError as err:
|
|
except KeyError as err:
|
|
raise KeyError('Missing required data for query: ' + str(err))
|
|
raise KeyError('Missing required data for query: ' + str(err))
|
|
-
|
|
|
|
|
|
+
|
|
strCodList = ",".join(str(c) for c in codList)
|
|
strCodList = ",".join(str(c) for c in codList)
|
|
|
|
|
|
- if subtype==0:
|
|
|
|
|
|
+ if subtype == 0:
|
|
condition = f" WHERE tab.cod IN ({strCodList})"
|
|
condition = f" WHERE tab.cod IN ({strCodList})"
|
|
- elif subtype==1:
|
|
|
|
|
|
+ elif subtype == 1:
|
|
condition = f" WHERE tab.indlem IN ({strCodList})"
|
|
condition = f" WHERE tab.indlem IN ({strCodList})"
|
|
- elif subtype==2:
|
|
|
|
|
|
+ elif subtype == 2:
|
|
if formCodList is None:
|
|
if formCodList is None:
|
|
return None
|
|
return None
|
|
strFormCodList = ",".join(str(c) for c in formCodList)
|
|
strFormCodList = ",".join(str(c) for c in formCodList)
|
|
@@ -271,42 +283,48 @@ def complexQueryCooccurrences(connection, queryData):
|
|
|
|
|
|
mainQueryString = f'{preMainQueryString} {condition}'
|
|
mainQueryString = f'{preMainQueryString} {condition}'
|
|
|
|
|
|
- # First occurrence:
|
|
|
|
- if index==0:
|
|
|
|
- # Create a temporary table for results
|
|
|
|
|
|
+ if index == 0:
|
|
resTable = 'tempOcc_' + str(index)
|
|
resTable = 'tempOcc_' + str(index)
|
|
- connection.cursor().execute(f'CREATE TEMPORARY TABLE {resTable} AS {mainQueryString}')
|
|
|
|
- connection.cursor().execute(f'CREATE INDEX aa_{index} ON {resTable} (ntx, mappa)')
|
|
|
|
- continue
|
|
|
|
-
|
|
|
|
|
|
+ cursor = connection.cursor()
|
|
|
|
+ cursor.execute(f'CREATE TEMPORARY TABLE {resTable} AS {mainQueryString}')
|
|
|
|
+ cursor.execute(f'CREATE INDEX aa_{index} ON {resTable} (ntx, mappa)')
|
|
else:
|
|
else:
|
|
- # update results
|
|
|
|
- connection.cursor().execute(f'CREATE TEMPORARY TABLE tempOccB AS {mainQueryString}')
|
|
|
|
- connection.cursor().execute(f'CREATE INDEX bb ON tempOccB (ntx, mappa)')
|
|
|
|
|
|
+ cursor.execute(f'CREATE TEMPORARY TABLE tempOccB AS {mainQueryString}')
|
|
|
|
+ cursor.execute(f'CREATE INDEX bb ON tempOccB (ntx, mappa)')
|
|
|
|
|
|
oldTable = resTable
|
|
oldTable = resTable
|
|
resTable = 'tempOcc_' + str(index)
|
|
resTable = 'tempOcc_' + str(index)
|
|
- connection.cursor().execute(f'CREATE TEMPORARY TABLE {resTable} AS SELECT tabA.cod, tabA.ntx, tabA.{" tabA.".join(pitxtList)}, tabA.{" tabA.".join(elemlenList)}, tabA.mappa, tabA.numperiod, tabA.links, tabA.numorg, tabA.sigla, tabA.vol, tabA.pag, tabA.riga, tabA.col, tabA.tipostanza, tabA.stanza, tabA.verso, tabA.numbrano, tabA.lemma, tabA.cat_gr, tabA.disambiguatore, tabB.ntx AS ntx2, tabB.mappa AS mappa2, tabB.pitxt as pitxt_{index}, tabB.elemlen as elemlen_{index} FROM {oldTable} AS tabA, tempOccB AS tabB WHERE tabA.ntx=tabB.ntx AND tabA.mappa BETWEEN tabB.mappa-{intervallo} AND tabB.mappa+{intervallo} AND tabA.mappa != tabB.mappa')
|
|
|
|
- connection.cursor().execute(f'CREATE INDEX aa_{index} ON {resTable} (ntx, mappa)')
|
|
|
|
- connection.cursor().execute(f'DROP TABLE {oldTable}')
|
|
|
|
|
|
+ cursor.execute(f"""
|
|
|
|
+ CREATE TEMPORARY TABLE {resTable} AS
|
|
|
|
+ SELECT tabA.cod, tabA.ntx, tabA.{" tabA.".join(pitxtList)}, tabA.{" tabA.".join(elemlenList)},
|
|
|
|
+ tabA.mappa, tabA.numperiod, tabA.links, tabA.numorg, tabA.sigla, tabA.vol, tabA.pag,
|
|
|
|
+ tabA.riga, tabA.col, tabA.tipostanza, tabA.stanza, tabA.verso, tabA.numbrano,
|
|
|
|
+ tabA.lemma, tabA.cat_gr, tabA.disambiguatore, tabB.ntx AS ntx2, tabB.mappa AS mappa2,
|
|
|
|
+ tabB.pitxt as pitxt_{index}, tabB.elemlen as elemlen_{index}
|
|
|
|
+ FROM {oldTable} AS tabA, tempOccB AS tabB
|
|
|
|
+ WHERE tabA.ntx=tabB.ntx AND tabA.mappa BETWEEN tabB.mappa-{intervallo} AND tabB.mappa+{intervallo} AND tabA.mappa != tabB.mappa
|
|
|
|
+ """)
|
|
|
|
+ cursor.execute(f'CREATE INDEX aa_{index} ON {resTable} (ntx, mappa)')
|
|
|
|
+ cursor.execute(f'DROP TABLE {oldTable}')
|
|
pitxtList.append(f'pitxt_{index}')
|
|
pitxtList.append(f'pitxt_{index}')
|
|
elemlenList.append(f'elemlen_{index}')
|
|
elemlenList.append(f'elemlen_{index}')
|
|
|
|
|
|
|
|
+ results = pl.read_sql(f'SELECT * FROM {resTable}', connection)
|
|
|
|
|
|
- results = pd.read_sql(f'SELECT * FROM {resTable}', connection)
|
|
|
|
-
|
|
|
|
- # This value can be changed to change multiple contexts width. Default value for Gatto is parole=31
|
|
|
|
parole = 31
|
|
parole = 31
|
|
- # C'è la possibilità di scegliere periodi invece che parole, ma per il momento è disabilitata
|
|
|
|
queryPiniz = f'SELECT stuff.ntx, stuff.mappa, tab.pitxt AS piniz FROM {resTable} AS stuff LEFT JOIN {table} AS tab ON tab.ntx=stuff.ntx AND tab.mappa=stuff.mappa-{int(parole/2)}'
|
|
queryPiniz = f'SELECT stuff.ntx, stuff.mappa, tab.pitxt AS piniz FROM {resTable} AS stuff LEFT JOIN {table} AS tab ON tab.ntx=stuff.ntx AND tab.mappa=stuff.mappa-{int(parole/2)}'
|
|
queryPfin = f'SELECT stuff.ntx, stuff.mappa, tab.pitxt AS pfin FROM {resTable} AS stuff LEFT JOIN {table} AS tab ON tab.ntx=stuff.ntx AND tab.mappa=stuff.mappa+{int(parole/2)}'
|
|
queryPfin = f'SELECT stuff.ntx, stuff.mappa, tab.pitxt AS pfin FROM {resTable} AS stuff LEFT JOIN {table} AS tab ON tab.ntx=stuff.ntx AND tab.mappa=stuff.mappa+{int(parole/2)}'
|
|
queryPeriodi = f'SELECT stuff.ntx, stuff.numperiod, periodi.piniz AS backup_piniz, periodi.pfin AS backup_pfin FROM {resTable} AS stuff, periodi WHERE stuff.ntx = periodi.ntx AND stuff.numperiod = periodi.numperiod'
|
|
queryPeriodi = f'SELECT stuff.ntx, stuff.numperiod, periodi.piniz AS backup_piniz, periodi.pfin AS backup_pfin FROM {resTable} AS stuff, periodi WHERE stuff.ntx = periodi.ntx AND stuff.numperiod = periodi.numperiod'
|
|
|
|
|
|
- resultsPiniz = pd.read_sql(queryPiniz, connection)
|
|
|
|
- resultsPfin = pd.read_sql(queryPfin, connection)
|
|
|
|
- resultsPeriodi = pd.read_sql(queryPeriodi, connection)
|
|
|
|
- results['piniz'] = resultsPiniz['piniz']
|
|
|
|
- results['pfin'] = resultsPfin['pfin']
|
|
|
|
- results[['backup_piniz', 'backup_pfin']] = resultsPeriodi[['backup_piniz', 'backup_pfin']]
|
|
|
|
|
|
+ resultsPiniz = pl.read_sql(queryPiniz, connection)
|
|
|
|
+ resultsPfin = pl.read_sql(queryPfin, connection)
|
|
|
|
+ resultsPeriodi = pl.read_sql(queryPeriodi, connection)
|
|
|
|
+
|
|
|
|
+ results = results.with_columns([
|
|
|
|
+ resultsPiniz['piniz'],
|
|
|
|
+ resultsPfin['pfin'],
|
|
|
|
+ resultsPeriodi['backup_piniz'],
|
|
|
|
+ resultsPeriodi['backup_pfin']
|
|
|
|
+ ])
|
|
|
|
|
|
return results
|
|
return results
|