query_handlers.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. import sqlite3
  2. import pandas as pd
  3. import interface_sqlite3.encdec.de_code as dc
  4. from engine.data_interface.QueryHandlerAbstract import QueryHandlerAbstract
  5. # First version
  6. class queryHandlerBasicSqlite(QueryHandlerAbstract):
  7. def __init__(self, dataConfig):
  8. try:
  9. dbPath = dataConfig['dbPath']
  10. dbfileDefault = dataConfig['dbfile_default']
  11. except:
  12. raise Exception('Missing required input in Data Provider Configuration')
  13. self.dbPath = dbPath
  14. self.dbfileDefault = dbfileDefault
  15. # Encoding
  16. self.dbEncoded = True if dataConfig.get("db_encoded") is True else False
  17. self.textsEncoded = True if dataConfig.get("texts_encoded") is True else False
  18. self.keyRing = None
  19. if self.dbEncoded or self.textsEncoded:
  20. keyPath = self.dbPath + 'keys/'
  21. self.keyRing = dc.keyRing(keyPath, self.dbEncoded, self.textsEncoded)
  22. def query(self, queryData, pandas=False, dbFile=None):
  23. # Formerly the query string was pre-generated outside and
  24. # sent here _in lieu_ of the query data
  25. # Now the method processes a query data OBJECT and creates the query
  26. # accordingly
  27. if self.dbEncoded:
  28. queryData = self.encodeQuery(queryData)
  29. queryString = prepareQueryString(queryData)
  30. dbfileLocal = dbFile if dbFile is not None else self.dbfileDefault
  31. db = self.dbPath + dbfileLocal
  32. connection = sqlite3.connect(f"file:{db}?mode=ro", uri=True)
  33. # PANDAS?
  34. if pandas:
  35. results = pd.read_sql(queryString, connection)
  36. if(self.dbEncoded):
  37. results = self.db_results_decode_pandas(results)
  38. else:
  39. connection.row_factory = dict_factory
  40. queryReponse = connection.cursor().execute(queryString)
  41. results = queryReponse.fetchall()
  42. if(self.dbEncoded):
  43. results = self.db_results_decode(results)
  44. connection.close()
  45. return results
  46. def textQuery(self, queryData):
  47. try:
  48. sigla = queryData['sigla']
  49. minChar = queryData['minChar']
  50. maxChar = queryData['maxChar']
  51. except:
  52. return None
  53. with open(f"{self.dbPath}/itxt/{sigla}", 'r', encoding="utf-32-le") as file1:
  54. file1.seek(4*minChar)
  55. cont = file1.read(maxChar-minChar)
  56. if self.textsEncoded and self.keyRing.textKeys.get(sigla) is not None:
  57. key = self.keyRing.textKeys.get(sigla)
  58. cont = dc.decodeTextByKey(cont, key, minChar-1)
  59. return cont
  60. def formatQuery(self, queryData):
  61. try:
  62. sigla = queryData['sigla']
  63. minChar = queryData['minChar']
  64. maxChar = queryData['maxChar']
  65. except:
  66. return None
  67. with open(f"{self.dbPath}/ftxt/{sigla}", 'r', encoding="utf-32-le") as file1:
  68. file1.seek(4*minChar)
  69. formbytes = file1.read(maxChar-minChar)
  70. form = [byte for byte in formbytes]
  71. return form
  72. def encodeQuery(self, queryData):
  73. type = queryData.get('queryType')
  74. if type in ["forma", "lemma", "formaLemma", "lemmaForma"]:
  75. try:
  76. data = queryData['data']
  77. dataNorm = queryData['dataNorm']
  78. data = [dc.db_encode(self.keyRing.vettSpec, datum) for datum in data]
  79. dataNorm = [dc.db_encode(self.keyRing.vettSpec, datum) for datum in dataNorm]
  80. queryData['data'] = data
  81. queryData['dataNorm'] = dataNorm
  82. except KeyError as err:
  83. raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
  84. return queryData
  85. def db_results_decode(self, result):
  86. for row in result:
  87. for key, value in row.items():
  88. if isColumnToDecode(key):
  89. row[key] = dc.db_decode(self.keyRing.vettSpec, value)
  90. return result
  91. def db_results_decode_pandas(self, df):
  92. for col in df.columns:
  93. if isColumnToDecode(col):
  94. df[col] = df[col].apply( lambda el: dc.db_decode(self.keyRing.vettSpec, el) )
  95. return df
  96. # Utilities
  97. def prepareQueryString(queryData):
  98. type = queryData.get('queryType') # KeyError protected -- returns None if the key is not defined
  99. #################
  100. if type=='occ_tables':
  101. return "SELECT name FROM sqlite_master WHERE type='table'"
  102. #################
  103. if type=='forma':
  104. try:
  105. data = queryData['data']
  106. dataNorm = queryData['dataNorm']
  107. except KeyError as err:
  108. raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
  109. joinedQueryData = "'" + "' OR spec LIKE '".join(data) + "'"
  110. if len(dataNorm)==0:
  111. return f"SELECT spec AS forma, nocc AS occ, cod FROM form WHERE spec LIKE {joinedQueryData} ORDER BY idfor"
  112. else:
  113. joinedQueryDataNorm = "'" + "' OR norm LIKE '".join(dataNorm) + "'"
  114. return f"SELECT DISTINCT spec AS forma, nocc AS occ, cod FROM form WHERE (spec LIKE {joinedQueryData}) OR (norm LIKE {joinedQueryDataNorm}) ORDER BY idfor"
  115. ###################
  116. elif type=='lemma':
  117. try:
  118. data = queryData['data']
  119. dataNorm = queryData['dataNorm']
  120. except KeyError as err:
  121. raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
  122. joinedQueryData = "'" + "' OR spec LIKE '".join(data) + "'"
  123. if len(dataNorm)==0:
  124. return f"SELECT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE spec LIKE {joinedQueryData} ORDER BY idlem"
  125. else:
  126. joinedQueryDataNorm = "'" + "' OR norm LIKE '".join(dataNorm) + "'"
  127. return f"SELECT DISTINCT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE (spec LIKE {joinedQueryData}) OR (norm LIKE {joinedQueryDataNorm}) ORDER BY idlem"
  128. ########################
  129. elif type=='lemmaForma':
  130. try:
  131. data = queryData['data']
  132. dataNorm = queryData['dataNorm']
  133. except KeyError as err:
  134. raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
  135. joinedQueryData = "'" + "' OR form.spec LIKE '".join(data) + "'"
  136. if len(dataNorm)==0:
  137. return f"SELECT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod != 0 AND lem.cod = pfl.lemma WHERE lem.spec LIKE {joinedQueryData} ORDER BY lem.idlem"
  138. else:
  139. joinedQueryData = "'" + "' OR lem.spec LIKE '".join(data) + "'"
  140. joinedQueryDataNorm = "'" + "' OR lem.norm LIKE '".join(dataNorm) + "'"
  141. return f"SELECT DISTINCT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (lem.spec LIKE {joinedQueryData}) OR (lem.norm LIKE {joinedQueryDataNorm}) ORDER BY lem.idlem"
  142. ########################
  143. elif type=='formaLemma':
  144. try:
  145. data = queryData['data']
  146. dataNorm = queryData['dataNorm']
  147. except KeyError as err:
  148. raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
  149. joinedQueryData = "'" + "' OR form.spec LIKE '".join(data) + "'"
  150. if len(dataNorm)==0:
  151. return f"SELECT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE form.spec LIKE {joinedQueryData} ORDER BY form.idfor"
  152. else:
  153. joinedQueryDataNorm = "'" + "' OR form.norm LIKE '".join(dataNorm) + "'"
  154. return f"SELECT DISTINCT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (form.spec LIKE {joinedQueryData}) OR (form.norm LIKE {joinedQueryDataNorm}) ORDER BY form.idfor"
  155. #################
  156. elif type=='pfl':
  157. try:
  158. codList = queryData['codList']
  159. except KeyError as err:
  160. raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
  161. strlist = ",".join(str(c) for c in codList)
  162. return f"SELECT DISTINCT lemma as codLemma, forma as codForma FROM pfl WHERE lemma IN ({strlist})"
  163. ###################
  164. elif type=='texts':
  165. try:
  166. codList = queryData['codList']
  167. table = queryData['table']
  168. subtype = queryData['querySubtype']
  169. formCodList = queryData.get('formCodList') # KeyError-safe (None if absent)
  170. except KeyError as err:
  171. raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
  172. strlist = ",".join(str(c) for c in codList)
  173. if subtype==0:
  174. return f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.cod IN ({strlist})"
  175. elif subtype==1:
  176. return f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.indlem IN ({strlist})"
  177. elif subtype==2:
  178. if formCodList is None:
  179. return None
  180. strform = ",".join(str(c) for c in formCodList)
  181. return f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.indlem IN ({strlist}) OR (tab.indlem = 0 AND tab.cod IN ({strform}))"
  182. ######################
  183. elif type=='contexts':
  184. try:
  185. table = queryData['table']
  186. ntxlocal = queryData['ntxlocal']
  187. mappalocal = queryData['mappalocal']
  188. parole = queryData['parole']
  189. except KeyError as err:
  190. raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
  191. return f"SELECT tab.pitxt, tab.elemlen FROM {table} AS tab WHERE tab.ntx = {ntxlocal} AND tab.mappa <= {mappalocal+int(parole/2)} AND tab.mappa >= {mappalocal-int(parole/2)}"
  192. #################
  193. elif type=='bib':
  194. try:
  195. row = queryData['row']
  196. sigla = row['sigla']
  197. except KeyError as err:
  198. raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
  199. return f"SELECT [Anno iniziale], [Anno finale], [Data codificata], [Titolo Abbreviato], [Autore], [Titolo], [Curatore], [Data descrittiva], [Area generica], [Area specifica], [Genere], [Forma], [Tipo], IQ FROM datibib WHERE Sigla='{sigla}'"
  200. #################
  201. elif type=='rif':
  202. try:
  203. row = queryData['row']
  204. numorg = row['numorg']
  205. ntx = row['ntx']
  206. except:
  207. return None
  208. return f"SELECT head AS Rif_organico, full AS Rif_completo FROM org WHERE (indice='{numorg}' AND ntx='{ntx}')"
  209. #################
  210. elif type=='highlight':
  211. try:
  212. row = queryData['row']
  213. col = queryData['col']
  214. except:
  215. return None
  216. return f"SELECT spec as highlight FROM form WHERE cod={row[col]}"
  217. #################
  218. elif type =='singlecontext':
  219. try:
  220. subtype = queryData['querySubtype']
  221. table = queryData['table']
  222. parole = queryData['parole']
  223. periodi = queryData['periodi']
  224. brani = queryData['brani']
  225. ntxlocal = queryData['ntxlocal']
  226. mappalocal = queryData['mappalocal']
  227. periodlocal = queryData['periodlocal']
  228. numbranolocal = queryData['numbranolocal']
  229. except:
  230. return None
  231. if subtype == 'parole':
  232. return f"SELECT tab.pitxt, tab.elemlen FROM {table} AS tab WHERE tab.ntx = {ntxlocal} AND tab.mappa <= {mappalocal+int(parole/2)} AND tab.mappa >= {mappalocal-int(parole/2)}"
  233. elif subtype == 'periodi':
  234. return f"SELECT piniz, pfin FROM periodi WHERE ntx = {ntxlocal} AND numperiod <= {periodlocal+int(periodi/2)} AND numperiod >= {periodlocal-int(periodi/2)}"
  235. elif subtype == 'brani':
  236. return f"SELECT piniz, pfin FROM linkbase WHERE {ntxlocal} = ntx AND tipo = 2 AND id BETWEEN {numbranolocal-int(brani/2)} AND {numbranolocal+int(brani/2)}"
  237. #################
  238. elif type =='links':
  239. try:
  240. subtype = queryData['querySubtype']
  241. ntxlocal = queryData['ntxlocal']
  242. pinizlocal = queryData['pinizlocal']
  243. pfinlocal = queryData['pfinlocal']
  244. pitxtlocal = queryData['pitxtlocal']
  245. except:
  246. return None
  247. if subtype == 'nota':
  248. return f"SELECT ta.ntx, ta.id, ta.piniz, ta.pfin, tb.mappain, tb.mappafin FROM linkbase AS tb INNER JOIN linknoteass AS ta ON tb.ntx = ta.ntx AND tb.id = ta.id WHERE (((tb.tipo= 1) AND (tb.ntx = {ntxlocal})) AND ((tb.piniz BETWEEN {pinizlocal} AND {pfinlocal}) OR ({pitxtlocal} BETWEEN tb.piniz AND tb.pfin)))"
  249. if subtype == 'testo_associato':
  250. return f"SELECT ta.ntx, ta.id, ta.piniz, ta.pfin, tb.mappain, tb.mappafin FROM linkbase AS tb INNER JOIN linknoteass AS ta ON tb.ntx = ta.ntx AND tb.id = ta.id WHERE (((tb.tipo= 2) AND (tb.ntx = {ntxlocal})) AND ((tb.piniz BETWEEN {pinizlocal} AND {pfinlocal}) OR ({pitxtlocal} BETWEEN tb.piniz AND tb.pfin)))"
  251. #####
  252. else:
  253. raise ValueError('Unrecognized query type: ' + type)
  254. # Dict factory non-Pandas queries
  255. def dict_factory(cursor, row):
  256. fields = [column[0] for column in cursor.description]
  257. return {key: value for key, value in zip(fields, row)}
  258. # Does the column data (in returned results) need decoding?
  259. def isColumnToDecode(col):
  260. columns = ['forma', 'lemma', 'cat_gr', 'disambiguatore']
  261. if col in columns or col.startswith('highlight'):
  262. return True
  263. return False