query_handlers.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. import sqlite3
  2. import polars as pl
  3. import interface_sqlite3.encdec.de_code as dc
  4. from interface_sqlite3.actual_queries import prepareQuery
  5. # First version
  6. class queryHandlerBasicSqlite:
  7. def __init__(self, dataConfig):
  8. try:
  9. dbPath = dataConfig['dbPath']
  10. dbfileDefault = dataConfig.get('dbfile_default')
  11. except:
  12. raise Exception('Missing required input in Data Provider Configuration')
  13. self.dbPath = dbPath
  14. self.dbfileDefault = dbfileDefault
  15. # Encoding
  16. self.dbEncoded = True if dataConfig.get("db_encoded") is True else False
  17. self.textsEncoded = True if dataConfig.get("texts_encoded") is True else False
  18. self.keyRing = None
  19. if self.dbEncoded or self.textsEncoded:
  20. keyPath = self.dbPath + 'keys/'
  21. self.keyRing = dc.keyRing(keyPath, self.dbEncoded, self.textsEncoded)
  22. def query(self, queryData, polars=False, dbFile=None):
  23. # PREPARE THE QUERY
  24. # Formerly, a query string was pre-generated outside and
  25. # sent directly
  26. # Now the method processes a query data OBJECT
  27. # and creates the query (which may be complex)
  28. # accordingly
  29. if self.dbEncoded:
  30. queryData = self.encodeQuery(queryData)
  31. queryToExecute = prepareQuery(queryData)
  32. # Get the connection to the DB
  33. dbFileLocal = dbFile if dbFile is not None else self.dbfileDefault
  34. if dbFileLocal is None:
  35. raise Exception("No db file specified with no default given -- can't execute query")
  36. db = self.dbPath + dbFileLocal
  37. connection = sqlite3.connect(f"file:{db}?mode=ro", uri=True)
  38. # If the query is a simple string, execute it here:
  39. if type(queryToExecute) == str:
  40. if polars:
  41. results = pl.read_sql(queryToExecute, connection)
  42. if self.dbEncoded:
  43. results = self.db_results_decode_polars(results)
  44. else:
  45. connection.row_factory = dict_factory
  46. queryReponse = connection.cursor().execute(queryToExecute)
  47. results = queryReponse.fetchall()
  48. if self.dbEncoded:
  49. results = self.db_results_decode(results)
  50. else:
  51. # If not a string, 'queryToExecute' should be a method/function reference
  52. results = queryToExecute(connection, queryData)
  53. if self.dbEncoded:
  54. results = self.db_results_decode_polars(results)
  55. connection.close()
  56. return results
  57. def textQuery(self, queryData, getFormatting=False):
  58. try:
  59. sigla = queryData['sigla']
  60. minChar = queryData['minChar']
  61. maxChar = queryData['maxChar']
  62. except:
  63. return None
  64. with open(f"{self.dbPath}/itxt/{sigla}", 'r', encoding="utf-32-le") as file1:
  65. file1.seek(4*minChar)
  66. cont = file1.read(maxChar-minChar)
  67. if self.textsEncoded and self.keyRing.textKeys.get(sigla) is not None:
  68. key = self.keyRing.textKeys.get(sigla)
  69. cont = dc.decodeTextByKey(cont, key, minChar-1)
  70. if not getFormatting:
  71. return cont
  72. else:
  73. return cont, self.getTextFormatting(sigla, minChar, maxChar)
  74. def getTextFormatting(self, sigla, minChar, maxChar):
  75. with open(f"{self.dbPath}/ftxt/{sigla}", 'rb') as file1:
  76. file1.seek(minChar-1)
  77. formatCodes = [char for char in file1.read(maxChar-minChar)]
  78. return formatCodes
  79. def encodeQuery(self, queryData):
  80. type = queryData.get('queryType')
  81. if type in ["forma", "lemma", "formaLemma", "lemmaForma"]:
  82. try:
  83. data = queryData['data']
  84. dataNorm = queryData['dataNorm']
  85. data = [dc.db_encode(self.keyRing.vettDictEnc, datum) for datum in data]
  86. dataNorm = [dc.db_encode(self.keyRing.vettDictEnc, datum) for datum in dataNorm]
  87. queryData['data'] = data
  88. queryData['dataNorm'] = dataNorm
  89. except KeyError as err:
  90. raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
  91. return queryData
  92. def db_results_decode(self, result):
  93. for row in result:
  94. for key, value in row.items():
  95. if isColumnToDecode(key):
  96. row[key] = dc.db_decode(self.keyRing.vettDictDec, value)
  97. return result
  98. def db_results_decode_polars(self, df):
  99. for col in df.columns:
  100. if isColumnToDecode(col):
  101. df = df.with_column(pl.col(col).apply(lambda el: dc.db_decode(self.keyRing.vettDictDec, el)))
  102. return df
  103. # Utilities
  104. # Dict factory non-Pandas queries
  105. def dict_factory(cursor, row):
  106. fields = [column[0] for column in cursor.description]
  107. return {key: value for key, value in zip(fields, row)}
  108. # Does the column data (in returned results) need decoding?
  109. def isColumnToDecode(col):
  110. columns = ['forma', 'lemma', 'cat_gr', 'disambiguatore']
  111. if col in columns or col.startswith('highlight'):
  112. return True
  113. return False