Browse Source

DB+text decoding after first run of testing -- text decoding NOT WORKING yet

kora 1 year ago
parent
commit
8442c99441

+ 4 - 4
flask_be/Config/basic_config.json

@@ -1,11 +1,11 @@
 {
     "DATA_CONFIG": {
-        "dbPath": "db/first_db/",
-        "dbfile_default": "test1.db",
+        "dbPath": "db/ndg2.gat4/",
+        "dbfile_default": "corpus.db",
         "data_interface": "sqlite3",
         "dynamic_occ_tables": true,
-        "db_encoded": false,
-        "texts_encoded": false
+        "db_encoded": true,
+        "texts_encoded": true
     },
     "LOGGER_CONFIG": {
         "filename": "Progetto2023_BE.log",

+ 2 - 95
flask_be/engine/cooccorrenze.py

@@ -51,8 +51,8 @@ class cooccorrenze(basicQueries):
                     cond3 = ((row1['mappa'] - row2['mappa']) != 0) and ((row1['mappa'] - row2['mappa']) in range(-intervallo, intervallo)) if ordinate == 0 else ((row2['mappa'] - row1['mappa']) > 0) and ((row2['mappa'] - row1['mappa']) <= intervallo)
 
                     if cond1 and cond2 and cond3:
-                        row1[f'cod{cod}'] = textlist.loc[index2, 'cod'].iloc[1]
-                        print (type(textlist.loc[index2, 'cod'].iloc[1]))
+                        row1[f'cod{cod}'] = textlist['cod'].iloc[index2]
+#                        print (type(textlist.loc[index2, 'cod'].iloc[1]))
                         cod_cols.append(f'cod{cod}')
                         df_new = pd.concat([df_new, row1.to_frame().T])
 
@@ -70,96 +70,3 @@ class cooccorrenze(basicQueries):
         return  highlights.to_dict(orient='records') # is this slow? CHECK!
         #return clean.to_json(orient='records') # possibilità alternativa -- molte opzioni possibili!
 
-'''
-        for ricerca, tipo, espansa, raddoppiata in listaricerche[1:]:
-            if tipo == 0:
-                search = self.sendBasicQuery(ricerca, 'forma', espansa, raddoppiata, pandas=True)
-                textlist = contesti_multipli.findtexts(0, search)
-                print('vediamo un po')
-                print(listatesti)
-                print(textlist)
-                print(search)
-                df_new = pd.DataFrame(columns=list(listatesti.columns))
-                if periodo == 0:
-                    for index1, row1 in listatesti.iterrows():
-                        for index2, row2 in textlist.iterrows():
-                            if ordinate == 0:
-                                if row1['ntx'] == row2['ntx'] and (row1['mappa'] - row2['mappa']) in range(-intervallo,intervallo):
-                                    df_new = pd.concat([df_new, row1.to_frame().T])
-                            elif ordinate == 1:
-                                if row1['ntx'] == row2['ntx'] and (row1['mappa'] - row2['mappa']) in range(intervallo):
-                                    df_new = pd.concat([df_new, row1.to_frame().T])
-                elif periodo == 1: 
-                    for index1, row1 in listatesti.iterrows():
-                        for index2, row2 in textlist.iterrows():
-                            if ordinate == 0:
-                                if row1['ntx'] == row2['ntx'] and row1['numperiod'] == row2['numperiod'] and (row1['mappa'] - row2['mappa']) in range(-intervallo,intervallo):
-                                    df_new = pd.concat([df_new, row1.to_frame().T])
-                            elif ordinate == 1:
-                                if row1['ntx'] == row2['ntx'] and row1['numperiod'] == row2['numperiod'] and (row1['mappa'] - row2['mappa']) in range(intervallo):
-                                    df_new = pd.concat([df_new, row1.to_frame().T])
-                listatesti = df_new
-            elif tipo == 1:
-                search = self.sendBasicQuery(ricerca, 'lemma', espansa, raddoppiata, pandas=True)
-                textlist = self.findtexts(1, search)
-                print('vediamo un po')
-                print(listatesti)
-                print(textlist)
-                print(search)
-                df_new = pd.DataFrame(columns=list(listatesti.columns))
-                if periodo == 0:
-                    for index1, row1 in listatesti.iterrows():
-                        for index2, row2 in textlist.iterrows():
-                            if ordinate == 0:
-                                if row1['ntx'] == row2['ntx'] and (row1['mappa'] - row2['mappa']) in range(-intervallo,intervallo):
-                                    df_new = pd.concat([df_new, row1.to_frame().T])
-                            if ordinate == 1:
-                                if row1['ntx'] == row2['ntx'] and (row1['mappa'] - row2['mappa']) in range(intervallo):
-                                    df_new = pd.concat([df_new, row1.to_frame().T])
-                elif periodo == 1: 
-                    for index1, row1 in listatesti.iterrows():
-                        for index2, row2 in textlist.iterrows():
-                            if ordinate == 0:
-                                if row1['ntx'] == row2['ntx'] and row1['numperiod'] == row2['numperiod'] and (row1['mappa'] - row2['mappa']) in range(-intervallo,intervallo):
-                                    df_new = pd.concat([df_new, row1.to_frame().T])
-                            if ordinate == 1:
-                                if row1['ntx'] == row2['ntx'] and row1['numperiod'] == row2['numperiod'] and (row1['mappa'] - row2['mappa']) in range(intervallo):
-                                    df_new = pd.concat([df_new, row1.to_frame().T])
-                listatesti = df_new
-            elif tipo == 2:
-                search = self.sendBasicQuery(ricerca, 'lemma', espansa, raddoppiata, pandas=True)
-                textlist = self.findtexts(2, search)
-                print('vediamo un po')
-                print(listatesti)
-                print(textlist)
-                print(search)
-                df_new = pd.DataFrame(columns=list(listatesti.columns))
-                if periodo == 0:
-                    for index1, row1 in listatesti.iterrows():
-                        for index2, row2 in textlist.iterrows():
-                            if ordinate == 0:
-                                if row1['ntx'] == row2['ntx'] and (row1['mappa'] - row2['mappa']) in range(-intervallo,intervallo):
-                                    df_new = pd.concat([df_new, row1.to_frame().T])
-                            if ordinate == 1:
-                                if row1['ntx'] == row2['ntx'] and (row1['mappa'] - row2['mappa']) in range(intervallo):
-                                    df_new = pd.concat([df_new, row1.to_frame().T])
-                elif periodo == 1: 
-                    for index1, row1 in listatesti.iterrows():
-                        for index2, row2 in textlist.iterrows():
-                            if ordinate == 0:
-                                if row1['ntx'] == row2['ntx'] and row1['numperiod'] == row2['numperiod'] and (row1['mappa'] - row2['mappa']) in range(-intervallo,intervallo):
-                                    df_new = pd.concat([df_new, row1.to_frame().T])
-                            if ordinate == 1:
-                                if row1['ntx'] == row2['ntx'] and row1['numperiod'] == row2['numperiod'] and (row1['mappa'] - row2['mappa']) in range(intervallo):
-                                    df_new = pd.concat([df_new, row1.to_frame().T])
-                listatesti = df_new
-
-        if listatesti.empty:
-            return []
-
-        contexts = self.findcontexts(listatesti, 30)
-        bibliocontexts = self.findbib(contexts)
-        clean = bibliocontexts.drop_duplicates()
-        #return clean
-        return clean.to_dict(orient='records') # is this slow? CHECK!
-        #return clean.to_json(orient='records') # possibilità alternativa -- molte opzioni possibili!'''

+ 1 - 1
flask_be/engine/parsing_utilities.py

@@ -16,7 +16,7 @@ def combinations(s):
 #%% funzione interprete, sta alla base di ogni ricerca
 ## DA MODIFICARE PER DB CIFRATO
 def interpreter (data):
-    clean_data= "'"+data.replace("*", "%").replace("?", "_").replace(" ","").replace("'", "''").replace("’", "''") +"'"
+    clean_data= data.replace("*", "%").replace("?", "_").replace(" ","").replace("'", "''").replace("’", "''")
     return combinations(clean_data)    
 
 # %% funzione iniziale raddoppiata, è chiamata dalle funzioni di ricerca con iniziale raddoppiata

+ 2 - 0
flask_be/interface_sqlite3/encdec/de_code.py

@@ -31,6 +31,8 @@ class keyRing:
             except:
                 pass
 
+        return keys
+
     def getKeyByCode(self, keyFile):
         with open(self.keyPath + keyFile, 'r') as file1:
             reader = csv.reader(file1)

+ 21 - 19
flask_be/interface_sqlite3/query_handlers.py

@@ -20,7 +20,7 @@ class queryHandlerBasicSqlite(QueryHandlerAbstract):
         # Encoding
         self.dbEncoded = True if dataConfig.get("db_encoded") is True else False
         self.textsEncoded = True if dataConfig.get("texts_encoded") is True else False
-        self.keyring = None
+        self.keyRing = None
         if self.dbEncoded or self.textsEncoded:
             keyPath = self.dbPath + 'keys/'
             self.keyRing = dc.keyRing(keyPath, self.dbEncoded, self.textsEncoded)
@@ -70,9 +70,9 @@ class queryHandlerBasicSqlite(QueryHandlerAbstract):
             file1.seek(4*minChar)
             cont = file1.read(maxChar-minChar)
         
-        if self.textsEncoded and self.keyRing.get(sigla) is not None:
-            key = self.keyRing.get(sigla)
-            cont = dc.decodeTextByKey(cont, key)
+        if self.textsEncoded and self.keyRing.textKeys.get(sigla) is not None:
+            key = self.keyRing.textKeys.get(sigla)
+            cont = dc.decodeTextByKey(cont, key, minChar)
         
         return cont
     
@@ -82,18 +82,20 @@ class queryHandlerBasicSqlite(QueryHandlerAbstract):
             try:
                 data = queryData['data']
                 dataNorm = queryData['dataNorm']
+                data = [dc.db_encode(self.keyRing.vettSpec, datum) for datum in data]
+                dataNorm = [dc.db_encode(self.keyRing.vettSpec, datum) for datum in dataNorm]
+                queryData['data'] = data
+                queryData['dataNorm'] = dataNorm
             except KeyError as err:
                 raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
-        data = [dc.db_encode(self.keyRing.vettSpec, datum) for datum in data]
-        dataNorm = [dc.db_encode(self.keyRing.vettSpec, datum) for datum in dataNorm]
-        queryData['data'] = data
-        queryData['dataNorm'] = dataNorm
+        
+        return queryData
 
     def db_results_decode(self, result):
         for row in result:
-            for key, value in row:
+            for key, value in row.items():
                 if isColumnToDecode(key):
-                    value = dc.db_decode(self.keyRing.vettSpec, value)
+                    row[key] = dc.db_decode(self.keyRing.vettSpec, value)
         return result
 
     def db_results_decode_pandas(self, df):
@@ -121,11 +123,11 @@ def prepareQueryString(queryData):
             dataNorm = queryData['dataNorm']
         except KeyError as err:
             raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
-        joinedQueryData = " OR spec LIKE ".join(data)
+        joinedQueryData = "'" + "' OR spec LIKE '".join(data) + "'"
         if len(dataNorm)==0:
             return f"SELECT spec AS forma, nocc AS occ, cod FROM form WHERE spec LIKE {joinedQueryData} ORDER BY idfor"
         else:
-            joinedQueryDataNorm = " OR norm LIKE ".join(dataNorm)
+            joinedQueryDataNorm = "'" + "' OR norm LIKE '".join(dataNorm) + "'"
             return f"SELECT DISTINCT spec AS forma, nocc AS occ, cod FROM form WHERE (spec LIKE {joinedQueryData}) OR (norm LIKE {joinedQueryDataNorm}) ORDER BY idfor"
 
     ###################
@@ -135,11 +137,11 @@ def prepareQueryString(queryData):
             dataNorm = queryData['dataNorm']
         except KeyError as err:
             raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
-        joinedQueryData = " OR spec LIKE ".join(data)
+        joinedQueryData = "'" + "' OR spec LIKE '".join(data) + "'"
         if len(dataNorm)==0:
             return f"SELECT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE spec LIKE {joinedQueryData} ORDER BY idlem"
         else:
-            joinedQueryDataNorm = " OR norm LIKE ".join(dataNorm)
+            joinedQueryDataNorm = "'" + "' OR norm LIKE '".join(dataNorm) + "'"
             return f"SELECT DISTINCT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE (spec LIKE {joinedQueryData}) OR (norm LIKE {joinedQueryDataNorm}) ORDER BY idlem"
 
     ########################
@@ -149,12 +151,12 @@ def prepareQueryString(queryData):
             dataNorm = queryData['dataNorm']
         except KeyError as err:
             raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
-        joinedQueryData = " OR form.spec LIKE ".join(data)
+        joinedQueryData = "'" + "' OR form.spec LIKE '".join(data) + "'"
         if len(dataNorm)==0:
             return f"SELECT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod != 0 AND lem.cod = pfl.lemma WHERE lem.spec LIKE {joinedQueryData} ORDER BY lem.idlem"
         else:
-            joinedQueryData = " OR lem.spec LIKE ".join(data)
-            joinedQueryDataNorm = " OR lem.norm LIKE ".join(dataNorm)
+            joinedQueryData = "'" + "' OR lem.spec LIKE '".join(data) + "'"
+            joinedQueryDataNorm = "'" + "' OR lem.norm LIKE '".join(dataNorm) + "'"
             return f"SELECT DISTINCT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (lem.spec LIKE {joinedQueryData}) OR (lem.norm LIKE {joinedQueryDataNorm}) ORDER BY lem.idlem"
 
     ########################
@@ -164,11 +166,11 @@ def prepareQueryString(queryData):
             dataNorm = queryData['dataNorm']
         except KeyError as err:
             raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
-        joinedQueryData = " OR form.spec LIKE ".join(data)
+        joinedQueryData = "'" + "' OR form.spec LIKE '".join(data) + "'"
         if len(dataNorm)==0:
             return f"SELECT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE form.spec LIKE {joinedQueryData} ORDER BY form.idfor"
         else:
-            joinedQueryDataNorm = " OR form.norm LIKE ".join(dataNorm)
+            joinedQueryDataNorm = "'" + "' OR form.norm LIKE '".join(dataNorm) + "'"
             return f"SELECT DISTINCT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (form.spec LIKE {joinedQueryData}) OR (form.norm LIKE {joinedQueryDataNorm}) ORDER BY form.idfor"
 
     #################