|
@@ -1,21 +1,39 @@
|
|
|
import sqlite3
|
|
|
import pandas as pd
|
|
|
+import interface_sqlite3.encdec.de_code as dc
|
|
|
|
|
|
from engine.data_interface.QueryHandlerAbstract import QueryHandlerAbstract
|
|
|
|
|
|
|
|
|
class queryHandlerBasicSqlite(QueryHandlerAbstract):
|
|
|
|
|
|
- def __init__(self, dbPath, dbfileDefault):
|
|
|
+ def __init__(self, dataConfig):
|
|
|
+
|
|
|
+ try:
|
|
|
+ dbPath = dataConfig['dbPath']
|
|
|
+ dbfileDefault = dataConfig['dbfile_default']
|
|
|
+ except:
|
|
|
+ raise Exception('Missing required input in Data Provider Configuration')
|
|
|
self.dbPath = dbPath
|
|
|
self.dbfileDefault = dbfileDefault
|
|
|
+
|
|
|
+
|
|
|
+ self.dbEncoded = True if dataConfig.get("db_encoded") is True else False
|
|
|
+ self.textsEncoded = True if dataConfig.get("texts_encoded") is True else False
|
|
|
+ self.keyring = None
|
|
|
+ if self.dbEncoded or self.textsEncoded:
|
|
|
+ keyPath = self.dbPath + 'keys/'
|
|
|
+ self.keyRing = dc.keyRing(keyPath, self.dbEncoded, self.textsEncoded)
|
|
|
+
|
|
|
|
|
|
def query(self, queryData, pandas=False, dbFile=None):
|
|
|
|
|
|
|
|
|
-
|
|
|
+
|
|
|
|
|
|
|
|
|
+ if self.dbEncoded:
|
|
|
+ queryData = self.encodeQuery(queryData)
|
|
|
queryString = prepareQueryString(queryData)
|
|
|
|
|
|
dbfileLocal = dbFile if dbFile is not None else self.dbfileDefault
|
|
@@ -26,13 +44,18 @@ class queryHandlerBasicSqlite(QueryHandlerAbstract):
|
|
|
|
|
|
if pandas:
|
|
|
results = pd.read_sql(queryString, connection)
|
|
|
+ if(self.dbEncoded):
|
|
|
+ results = self.db_results_decode_pandas(results)
|
|
|
|
|
|
else:
|
|
|
connection.row_factory = dict_factory
|
|
|
queryReponse = connection.cursor().execute(queryString)
|
|
|
results = queryReponse.fetchall()
|
|
|
+ if(self.dbEncoded):
|
|
|
+ results = self.db_results_decode(results)
|
|
|
|
|
|
connection.close()
|
|
|
+
|
|
|
return results
|
|
|
|
|
|
def textQuery(self, queryData):
|
|
@@ -46,7 +69,38 @@ class queryHandlerBasicSqlite(QueryHandlerAbstract):
|
|
|
with open(f"{self.dbPath}/itxt/{sigla}", 'r', encoding="utf-32-le") as file1:
|
|
|
file1.seek(4*minChar)
|
|
|
cont = file1.read(maxChar-minChar)
|
|
|
- return cont
|
|
|
+
|
|
|
+ if self.textsEncoded and self.keyRing.get(sigla) is not None:
|
|
|
+ key = self.keyRing.get(sigla)
|
|
|
+ cont = dc.decodeTextByKey(cont, key)
|
|
|
+
|
|
|
+ return cont
|
|
|
+
|
|
|
+ def encodeQuery(self, queryData):
|
|
|
+ type = queryData.get('queryType')
|
|
|
+ if type in ["forma", "lemma", "formaLemma", "lemmaForma"]:
|
|
|
+ try:
|
|
|
+ data = queryData['data']
|
|
|
+ dataNorm = queryData['dataNorm']
|
|
|
+ except KeyError as err:
|
|
|
+ raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
|
|
|
+ data = [dc.db_encode(self.keyRing.vettSpec, datum) for datum in data]
|
|
|
+ dataNorm = [dc.db_encode(self.keyRing.vettSpec, datum) for datum in dataNorm]
|
|
|
+ queryData['data'] = data
|
|
|
+ queryData['dataNorm'] = dataNorm
|
|
|
+
|
|
|
+ def db_results_decode(self, result):
|
|
|
+ for row in result:
|
|
|
+ for key, value in row:
|
|
|
+ if isColumnToDecode(key):
|
|
|
+ value = dc.db_decode(self.keyRing.vettSpec, value)
|
|
|
+ return result
|
|
|
+
|
|
|
+ def db_results_decode_pandas(self, df):
|
|
|
+ for col in df.columns:
|
|
|
+ if isColumnToDecode(col):
|
|
|
+ df[col] = df[col].apply( lambda el: dc.db_decode(self.keyRing.vettSpec, el) )
|
|
|
+ return df
|
|
|
|
|
|
|
|
|
|
|
@@ -189,14 +243,17 @@ def prepareQueryString(queryData):
|
|
|
|
|
|
else:
|
|
|
raise ValueError('Unrecognized query type: ' + type)
|
|
|
-
|
|
|
|
|
|
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
+
|
|
|
def dict_factory(cursor, row):
|
|
|
fields = [column[0] for column in cursor.description]
|
|
|
return {key: value for key, value in zip(fields, row)}
|
|
|
|
|
|
+
|
|
|
+
|
|
|
+def isColumnToDecode(col):
|
|
|
+ columns = ['forma', 'lemma', 'cat_gr', 'disambiguatore']
|
|
|
+ if col in columns or col.startswith('highlight'):
|
|
|
+ return True
|
|
|
+ return False
|