query_handlers.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. import sqlite3
  2. import pandas as pd
  3. import interface_sqlite3.encdec.de_code as dc
  4. from interface_sqlite3.actual_queries import prepareQuery
  5. # First version
  6. class queryHandlerBasicSqlite:
  7. def __init__(self, dataConfig):
  8. try:
  9. dbPath = dataConfig['dbPath']
  10. dbfileDefault = dataConfig.get('dbfile_default')
  11. except:
  12. raise Exception('Missing required input in Data Provider Configuration')
  13. self.dbPath = dbPath
  14. self.dbfileDefault = dbfileDefault
  15. # Encoding
  16. self.dbEncoded = True if dataConfig.get("db_encoded") is True else False
  17. self.textsEncoded = True if dataConfig.get("texts_encoded") is True else False
  18. self.keyRing = None
  19. if self.dbEncoded or self.textsEncoded:
  20. keyPath = self.dbPath + 'keys/'
  21. self.keyRing = dc.keyRing(keyPath, self.dbEncoded, self.textsEncoded)
  22. def query(self, queryData, pandas=False, dbFile=None):
  23. # PREPARE THE QUERY
  24. # Formerly, a query string was pre-generated outside and
  25. # sent directly
  26. # Now the method processes a query data OBJECT
  27. # and creates the query (which may be complex)
  28. # accordingly
  29. if self.dbEncoded:
  30. queryData = self.encodeQuery(queryData)
  31. queryToExecute = prepareQuery(queryData)
  32. # Get the connection to the DB
  33. dbFileLocal = dbFile if dbFile is not None else self.dbfileDefault
  34. if dbFileLocal is None:
  35. raise Exception("No db file specified with no default given -- can't execute query")
  36. #
  37. db = self.dbPath + dbFileLocal
  38. connection = sqlite3.connect(f"file:{db}?mode=ro", uri=True)
  39. # If the query is a simple string, execute it here:
  40. if type(queryToExecute)==str:
  41. if pandas:
  42. results = pd.read_sql(queryToExecute, connection)
  43. if(self.dbEncoded):
  44. results = self.db_results_decode_pandas(results)
  45. else:
  46. connection.row_factory = dict_factory
  47. queryReponse = connection.cursor().execute(queryToExecute)
  48. results = queryReponse.fetchall()
  49. if(self.dbEncoded):
  50. results = self.db_results_decode(results)
  51. else:
  52. # If not a string, 'queryToExecute' should be a method/function reference
  53. results = queryToExecute(connection, queryData)
  54. if(self.dbEncoded):
  55. results = self.db_results_decode_pandas(results)
  56. connection.close()
  57. return results
  58. def textQuery(self, queryData, getFormatting=False):
  59. try:
  60. sigla = queryData['sigla']
  61. minChar = queryData['minChar']
  62. maxChar = queryData['maxChar']
  63. except:
  64. return None
  65. with open(f"{self.dbPath}/itxt/{sigla}", 'r', encoding="utf-32-le") as file1:
  66. file1.seek(4*minChar)
  67. cont = file1.read(maxChar-minChar)
  68. if self.textsEncoded and self.keyRing.textKeys.get(sigla) is not None:
  69. key = self.keyRing.textKeys.get(sigla)
  70. cont = dc.decodeTextByKey(cont, key, minChar-1)
  71. if not getFormatting:
  72. return cont
  73. else:
  74. return cont, self.getTextFormatting(sigla, minChar, maxChar)
  75. def getTextFormatting(self, sigla, minChar, maxChar):
  76. with open(f"{self.dbPath}/ftxt/{sigla}", 'rb') as file1:
  77. file1.seek(minChar-1)
  78. formatCodes = [char for char in file1.read(maxChar-minChar)]
  79. return formatCodes
  80. def encodeQuery(self, queryData):
  81. type = queryData.get('queryType')
  82. if type in ["forma", "lemma", "formaLemma", "lemmaForma"]:
  83. try:
  84. data = queryData['data']
  85. dataNorm = queryData['dataNorm']
  86. data = [dc.db_encode(self.keyRing.vettDictEnc, datum) for datum in data]
  87. dataNorm = [dc.db_encode(self.keyRing.vettDictEnc, datum) for datum in dataNorm]
  88. queryData['data'] = data
  89. queryData['dataNorm'] = dataNorm
  90. except KeyError as err:
  91. raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
  92. return queryData
  93. def db_results_decode(self, result):
  94. for row in result:
  95. for key, value in row.items():
  96. if isColumnToDecode(key):
  97. row[key] = dc.db_decode(self.keyRing.vettDictDec, value)
  98. return result
  99. def db_results_decode_pandas(self, df):
  100. for col in df.columns:
  101. if isColumnToDecode(col):
  102. df[col] = df[col].apply( lambda el: dc.db_decode(self.keyRing.vettDictDec, el) )
  103. return df
  104. # Utilities
  105. # Dict factory non-Pandas queries
  106. def dict_factory(cursor, row):
  107. fields = [column[0] for column in cursor.description]
  108. return {key: value for key, value in zip(fields, row)}
  109. # Does the column data (in returned results) need decoding?
  110. def isColumnToDecode(col):
  111. columns = ['forma', 'lemma', 'cat_gr', 'disambiguatore']
  112. if col in columns or col.startswith('highlight'):
  113. return True
  114. return False