actual_queries.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. import polars as pl
  2. def prepareQuery(queryData):
  3. local_type = queryData.get('queryType') # KeyError protected -- returns None if the key is not defined
  4. #################
  5. if local_type=='occ_tables':
  6. return "SELECT name FROM sqlite_master WHERE type='table'"
  7. #################
  8. if local_type=='forma':
  9. try:
  10. data = queryData['data']
  11. dataNorm = queryData['dataNorm']
  12. except KeyError as err:
  13. raise KeyError('Missing required data for query type ' + local_type + ': ' + str(err))
  14. joinedQueryData = "'" + "' OR spec LIKE '".join(data) + "'"
  15. if len(dataNorm)==0:
  16. return f"SELECT spec AS forma, nocc AS occ, cod FROM form WHERE spec LIKE {joinedQueryData} ORDER BY idfor"
  17. else:
  18. joinedQueryDataNorm = "'" + "' OR norm LIKE '".join(dataNorm) + "'"
  19. return f"SELECT DISTINCT spec AS forma, nocc AS occ, cod FROM form WHERE (spec LIKE {joinedQueryData}) OR (norm LIKE {joinedQueryDataNorm}) ORDER BY idfor"
  20. ###################
  21. elif local_type=='lemma':
  22. try:
  23. data = queryData['data']
  24. dataNorm = queryData['dataNorm']
  25. except KeyError as err:
  26. raise KeyError('Missing required data for query type ' + local_type + ': ' + str(err))
  27. joinedQueryData = "'" + "' OR spec LIKE '".join(data) + "'"
  28. if len(dataNorm)==0:
  29. return f"SELECT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE spec LIKE {joinedQueryData} ORDER BY idlem"
  30. else:
  31. joinedQueryDataNorm = "'" + "' OR norm LIKE '".join(dataNorm) + "'"
  32. return f"SELECT DISTINCT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE (spec LIKE {joinedQueryData}) OR (norm LIKE {joinedQueryDataNorm}) ORDER BY idlem"
  33. ########################
  34. elif local_type=='lemmaForma':
  35. try:
  36. data = queryData['data']
  37. dataNorm = queryData['dataNorm']
  38. except KeyError as err:
  39. raise KeyError('Missing required data for query type ' + local_type + ': ' + str(err))
  40. joinedQueryData = "'" + "' OR form.spec LIKE '".join(data) + "'"
  41. if len(dataNorm)==0:
  42. return f"SELECT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod != 0 AND lem.cod = pfl.lemma WHERE lem.spec LIKE {joinedQueryData} ORDER BY lem.idlem"
  43. else:
  44. joinedQueryData = "'" + "' OR lem.spec LIKE '".join(data) + "'"
  45. joinedQueryDataNorm = "'" + "' OR lem.norm LIKE '".join(dataNorm) + "'"
  46. return f"SELECT DISTINCT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (lem.spec LIKE {joinedQueryData}) OR (lem.norm LIKE {joinedQueryDataNorm}) ORDER BY lem.idlem"
  47. ########################
  48. elif local_type=='formaLemma':
  49. try:
  50. data = queryData['data']
  51. dataNorm = queryData['dataNorm']
  52. except KeyError as err:
  53. raise KeyError('Missing required data for query type ' + local_type + ': ' + str(err))
  54. joinedQueryData = "'" + "' OR form.spec LIKE '".join(data) + "'"
  55. if len(dataNorm)==0:
  56. return f"SELECT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE form.spec LIKE {joinedQueryData} ORDER BY form.idfor"
  57. else:
  58. joinedQueryDataNorm = "'" + "' OR form.norm LIKE '".join(dataNorm) + "'"
  59. return f"SELECT DISTINCT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (form.spec LIKE {joinedQueryData}) OR (form.norm LIKE {joinedQueryDataNorm}) ORDER BY form.idfor"
  60. #################
  61. elif local_type=='pfl':
  62. try:
  63. codList = queryData['codList']
  64. except KeyError as err:
  65. raise KeyError('Missing required data for query type ' + local_type + ': ' + str(err))
  66. strlist = ",".join(str(c) for c in codList)
  67. return f"SELECT DISTINCT lemma as codLemma, forma as codForma FROM pfl WHERE lemma IN ({strlist})"
  68. ###################
  69. elif local_type=='texts':
  70. return complexQueryTexts
  71. ###################
  72. elif local_type=='co-occurrences':
  73. return complexQueryCooccurrences
  74. ######################
  75. elif local_type=='bib':
  76. try:
  77. row = queryData['row']
  78. sigla = row['sigla']
  79. except KeyError as err:
  80. raise KeyError('Missing required data for query type ' + local_type + ': ' + str(err))
  81. return f"SELECT [Anno iniziale], [Anno finale], [Data codificata], [Titolo Abbreviato], [Autore], [Titolo], [Curatore], [Data descrittiva], [Area generica], [Area specifica], [Genere], [Forma], [Tipo], IQ FROM datibib WHERE Sigla='{sigla}'"
  82. #################
  83. elif local_type=='bibAlt':
  84. try:
  85. siglaSet = queryData['siglaSet']
  86. except KeyError as err:
  87. raise KeyError('Missing required data for query type ' + local_type + ': ' + str(err))
  88. siglaStr = "'" + "','".join(siglaSet) + "'"
  89. return f"SELECT Sigla, [Anno iniziale], [Anno finale], [Data codificata], [Titolo Abbreviato], [Autore], [Titolo], [Curatore], [Data descrittiva], [Area generica], [Area specifica], [Genere], [Forma], [Tipo], IQ FROM datibib WHERE Sigla IN ({siglaStr})"
  90. #################
  91. elif local_type=='rif':
  92. try:
  93. row = queryData['row']
  94. numorg = row['numorg']
  95. ntx = row['ntx']
  96. except:
  97. return None
  98. return f"SELECT head AS Rif_organico, full AS Rif_completo FROM org WHERE (indice='{numorg}' AND ntx='{ntx}')"
  99. #################
  100. elif local_type=='rifAlt':
  101. try:
  102. coordsSet = queryData['coordsSet']
  103. except KeyError as err:
  104. raise KeyError('Missing required data for query type ' + local_type + ': ' + str(err))
  105. subQueries = []
  106. for coords in coordsSet:
  107. try:
  108. numorg = coords[0]
  109. ntx = coords[1]
  110. except IndexError as err:
  111. raise KeyError('Incomplete required data for query type ' + local_type + ': ' + str(err))
  112. subQueries.append( f"SELECT indice AS numorg, ntx, head AS Rif_organico, full AS Rif_completo FROM org WHERE (indice='{numorg}' AND ntx='{ntx}')" )
  113. return ' UNION ALL '.join(subQueries)
  114. #################
  115. elif local_type=='highlight':
  116. try:
  117. row = queryData['row']
  118. col = queryData['col']
  119. except:
  120. return None
  121. return f"SELECT spec as highlight FROM form WHERE cod={row[col]}"
  122. #################
  123. elif local_type =='singlecontext':
  124. try:
  125. subtype = queryData['querySubtype']
  126. table = queryData['table']
  127. parole = queryData['parole']
  128. periodi = queryData['periodi']
  129. brani = queryData['brani']
  130. ntxlocal = queryData['ntxlocal']
  131. mappalocal = queryData['mappalocal']
  132. periodlocal = queryData['periodlocal']
  133. numbranolocal = queryData['numbranolocal']
  134. except:
  135. return None
  136. if subtype == 'parole':
  137. return f"SELECT tab.pitxt, tab.elemlen FROM {table} AS tab WHERE tab.ntx = {ntxlocal} AND tab.mappa <= {mappalocal+int(parole/2)} AND tab.mappa >= {mappalocal-int(parole/2)}"
  138. elif subtype == 'periodi':
  139. return f"SELECT piniz, pfin FROM periodi WHERE ntx = {ntxlocal} AND numperiod <= {periodlocal+int(periodi/2)} AND numperiod >= {periodlocal-int(periodi/2)}"
  140. elif subtype == 'brani':
  141. return f"SELECT piniz, pfin FROM linkbase WHERE {ntxlocal} = ntx AND tipo = 2 AND id BETWEEN {numbranolocal-int(brani/2)} AND {numbranolocal+int(brani/2)}"
  142. #################
  143. elif local_type =='links':
  144. try:
  145. subtype = queryData['querySubtype']
  146. ntxlocal = queryData['ntxlocal']
  147. pinizlocal = queryData['pinizlocal']
  148. pfinlocal = queryData['pfinlocal']
  149. pitxtlocal = queryData['pitxtlocal']
  150. except:
  151. return None
  152. if subtype == 'nota':
  153. return f"SELECT ta.ntx, ta.id, ta.piniz, ta.pfin, tb.mappain, tb.mappafin FROM linkbase AS tb INNER JOIN linknoteass AS ta ON tb.ntx = ta.ntx AND tb.id = ta.id WHERE (((tb.tipo= 1) AND (tb.ntx = {ntxlocal})) AND ((tb.piniz BETWEEN {pinizlocal} AND {pfinlocal}) OR ({pitxtlocal} BETWEEN tb.piniz AND tb.pfin)))"
  154. if subtype == 'testo_associato':
  155. return f"SELECT ta.ntx, ta.id, ta.piniz, ta.pfin, tb.mappain, tb.mappafin FROM linkbase AS tb INNER JOIN linknoteass AS ta ON tb.ntx = ta.ntx AND tb.id = ta.id WHERE (((tb.tipo= 2) AND (tb.ntx = {ntxlocal})) AND ((tb.piniz BETWEEN {pinizlocal} AND {pfinlocal}) OR ({pitxtlocal} BETWEEN tb.piniz AND tb.pfin)))"
  156. #####
  157. else:
  158. raise ValueError('Unrecognized query type: ' + local_type)
  159. def complexQueryTexts(connection, queryData):
  160. try:
  161. codList = queryData['codList']
  162. table = queryData['table']
  163. subtype = queryData['querySubtype']
  164. formCodList = queryData.get('formCodList') # KeyError-safe (None if absent)
  165. except KeyError as err:
  166. raise KeyError(f'Missing required data for query type {queryData.get('queryType')} : {str(err)}')
  167. strCodList = ",".join(str(c) for c in codList)
  168. # Main query, verified to be fast!
  169. mainQueryString = f"""
  170. SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg,
  171. intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso,
  172. tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore
  173. FROM {table} AS tab
  174. INNER JOIN intbib ON tab.ntx = intbib.ntx
  175. INNER JOIN lem ON tab.indlem = lem.cod
  176. """
  177. if subtype == 0:
  178. condition = f"WHERE tab.cod IN ({strCodList})"
  179. elif subtype == 1:
  180. condition = f"WHERE tab.indlem IN ({strCodList})"
  181. elif subtype == 2:
  182. if formCodList is None:
  183. return None
  184. strFormCodList = ",".join(str(c) for c in formCodList)
  185. condition = f" WHERE tab.indlem IN ({strCodList}) OR (tab.indlem = 0 AND tab.cod IN ({strFormCodList}))"
  186. mainQueryString = f'{mainQueryString} {condition}'
  187. # This value can be changed to change multiple contexts' width. Default value for Gatto is parole=31 #
  188. parole = 31
  189. # C'è la possibilità di scegliere periodi invece che parole, ma per il momento è disabilitata
  190. createTempTable = f'CREATE TEMPORARY TABLE stuff AS {mainQueryString}'
  191. mainQuery = 'SELECT * from stuff'
  192. addQuery1 = f'SELECT stuff.ntx, stuff.mappa, tab.pitxt AS piniz FROM stuff LEFT JOIN Occ00001 AS tab ON tab.ntx=stuff.ntx AND tab.mappa=stuff.mappa-{int(parole / 2)}'
  193. addQuery2 = f'SELECT stuff.ntx, stuff.mappa, tab.pitxt AS pfin FROM stuff LEFT JOIN Occ00001 AS tab ON tab.ntx=stuff.ntx AND tab.mappa=stuff.mappa+{int(parole/2)}'
  194. addQuery3 = 'SELECT stuff.ntx, stuff.numperiod, periodi.piniz AS backup_piniz, periodi.pfin AS backup_pfin FROM stuff, periodi WHERE stuff.ntx = periodi.ntx AND stuff.numperiod = periodi.numperiod'
  195. # Start communication with DB
  196. cursor = connection.cursor()
  197. cursor.execute(createTempTable)
  198. results = pl.read_sql(mainQuery, connection)
  199. results_add1 = pl.read_sql(addQuery1, connection)
  200. results_add2 = pl.read_sql(addQuery2, connection)
  201. results_add3 = pl.read_sql(addQuery3, connection)
  202. results = results.with_columns([
  203. results_add1['piniz'],
  204. results_add2['pfin'],
  205. results_add3['backup_piniz'],
  206. results_add3['backup_pfin']
  207. ])
  208. return results
  209. def complexQueryCooccurrences(connection, queryData):
  210. try:
  211. occurrences = queryData['occurrences']
  212. table = queryData['table']
  213. intervallo = queryData['intervallo']
  214. periodo = queryData.get('periodo', 0) # Unused for the moment
  215. ordinate = queryData.get('ordinate', 0) # Unused for the moment
  216. except KeyError as err:
  217. raise KeyError('Missing required data for query: ' + str(err))
  218. preMainQueryString = f"""
  219. SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg,
  220. intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso,
  221. tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore
  222. FROM {table} AS tab
  223. INNER JOIN intbib ON tab.ntx = intbib.ntx
  224. INNER JOIN lem ON tab.indlem = lem.cod
  225. """
  226. pitxtList = ['pitxt']
  227. elemlenList = ['elemlen']
  228. for index, occ in enumerate(occurrences):
  229. try:
  230. subtype = occ['querySubtype']
  231. codList = occ['codList']
  232. formCodList = occ.get('formCodList')
  233. except KeyError as err:
  234. raise KeyError('Missing required data for query: ' + str(err))
  235. strCodList = ",".join(str(c) for c in codList)
  236. if subtype == 0:
  237. condition = f" WHERE tab.cod IN ({strCodList})"
  238. elif subtype == 1:
  239. condition = f" WHERE tab.indlem IN ({strCodList})"
  240. elif subtype == 2:
  241. if formCodList is None:
  242. return None
  243. strFormCodList = ",".join(str(c) for c in formCodList)
  244. condition = f" WHERE tab.indlem IN ({strCodList}) OR (tab.indlem = 0 AND tab.cod IN ({strFormCodList}))"
  245. mainQueryString = f'{preMainQueryString} {condition}'
  246. if index == 0:
  247. resTable = 'tempOcc_' + str(index)
  248. cursor = connection.cursor()
  249. cursor.execute(f'CREATE TEMPORARY TABLE {resTable} AS {mainQueryString}')
  250. cursor.execute(f'CREATE INDEX aa_{index} ON {resTable} (ntx, mappa)')
  251. else:
  252. cursor.execute(f'CREATE TEMPORARY TABLE tempOccB AS {mainQueryString}')
  253. cursor.execute(f'CREATE INDEX bb ON tempOccB (ntx, mappa)')
  254. oldTable = resTable
  255. resTable = 'tempOcc_' + str(index)
  256. cursor.execute(f"""
  257. CREATE TEMPORARY TABLE {resTable} AS
  258. SELECT tabA.cod, tabA.ntx, tabA.{" tabA.".join(pitxtList)}, tabA.{" tabA.".join(elemlenList)},
  259. tabA.mappa, tabA.numperiod, tabA.links, tabA.numorg, tabA.sigla, tabA.vol, tabA.pag,
  260. tabA.riga, tabA.col, tabA.tipostanza, tabA.stanza, tabA.verso, tabA.numbrano,
  261. tabA.lemma, tabA.cat_gr, tabA.disambiguatore, tabB.ntx AS ntx2, tabB.mappa AS mappa2,
  262. tabB.pitxt as pitxt_{index}, tabB.elemlen as elemlen_{index}
  263. FROM {oldTable} AS tabA, tempOccB AS tabB
  264. WHERE tabA.ntx=tabB.ntx AND tabA.mappa BETWEEN tabB.mappa-{intervallo} AND tabB.mappa+{intervallo} AND tabA.mappa != tabB.mappa
  265. """)
  266. cursor.execute(f'CREATE INDEX aa_{index} ON {resTable} (ntx, mappa)')
  267. cursor.execute(f'DROP TABLE {oldTable}')
  268. pitxtList.append(f'pitxt_{index}')
  269. elemlenList.append(f'elemlen_{index}')
  270. results = pl.read_sql(f'SELECT * FROM {resTable}', connection)
  271. parole = 31
  272. queryPiniz = f'SELECT stuff.ntx, stuff.mappa, tab.pitxt AS piniz FROM {resTable} AS stuff LEFT JOIN {table} AS tab ON tab.ntx=stuff.ntx AND tab.mappa=stuff.mappa-{int(parole/2)}'
  273. queryPfin = f'SELECT stuff.ntx, stuff.mappa, tab.pitxt AS pfin FROM {resTable} AS stuff LEFT JOIN {table} AS tab ON tab.ntx=stuff.ntx AND tab.mappa=stuff.mappa+{int(parole/2)}'
  274. queryPeriodi = f'SELECT stuff.ntx, stuff.numperiod, periodi.piniz AS backup_piniz, periodi.pfin AS backup_pfin FROM {resTable} AS stuff, periodi WHERE stuff.ntx = periodi.ntx AND stuff.numperiod = periodi.numperiod'
  275. resultsPiniz = pl.read_sql(queryPiniz, connection)
  276. resultsPfin = pl.read_sql(queryPfin, connection)
  277. resultsPeriodi = pl.read_sql(queryPeriodi, connection)
  278. results = results.with_columns([
  279. resultsPiniz['piniz'],
  280. resultsPfin['pfin'],
  281. resultsPeriodi['backup_piniz'],
  282. resultsPeriodi['backup_pfin']
  283. ])
  284. return results