Browse Source

introduce highlight function based on indexes

Leonardo Canova 1 year ago
parent
commit
a8a2063730
2 changed files with 24 additions and 14 deletions
  1. 24 4
      flask_be/engine/basic_queries.py
  2. 0 10
      test_suite/test/test_occorrenzario_pandas.py

+ 24 - 4
flask_be/engine/basic_queries.py

@@ -69,6 +69,8 @@ class basicQueries:
         parole = 31
         listOcc = self.listOcc
         contexts = []
+        minChar_list = []
+        maxChar_list = []
         for ind, row in textlist.iterrows():
             sigla = row["sigla"]
             queryData = {'queryType': 'contexts', 'ntxlocal': row["ntx"], 'mappalocal': row['mappa'], 'parole': parole}
@@ -77,9 +79,15 @@ class basicQueries:
                 queryData['table'] = table
                 queryresponse = self.queryHandler.query(queryData, pandas=True)
                 pointerlist = pd.concat([pointerlist, queryresponse])
-            fileQueryData = {'sigla': sigla, 'minChar': pointerlist["pitxt"].min(), 'maxChar': pointerlist["pitxt"].max()}
+                minChar = pointerlist["pitxt"].min()
+                maxChar = pointerlist["pitxt"].max()
+            fileQueryData = {'sigla': sigla, 'minChar': minChar, 'maxChar': maxChar}
+            minChar_list.append(minChar)
+            maxChar_list.append(maxChar)
             cont = self.queryHandler.textQuery(fileQueryData)
             contexts.append(cont)
+        textlist['minChar'] = minChar_list
+        textlist['maxChar'] = maxChar_list
         textlist['contesto'] = contexts
         return (textlist.reset_index(drop=True))
 
@@ -133,9 +141,8 @@ class basicQueries:
         clean_df = chrono.reindex(columns=cols + list(chrono.columns.difference(cols)))
         return (clean_df.reset_index(drop=True))
     
-    #%% funzione stringa/stringhe da evidenziare
-
-    def highlight (self, bibliocontexts):
+    #%% funzione stringa/stringhe da evidenziare ####### OLD #######
+    '''def highlight (self, bibliocontexts):
         index = 0
         for col in bibliocontexts.columns:
             forme = []
@@ -152,6 +159,19 @@ class basicQueries:
         highlight_cols = bibliocontexts.filter(regex='^highlight')
         create_array = lambda row: highlight_cols.loc[row.name].values.tolist()
         bibliocontexts['highlights_combined'] = highlight_cols.apply(create_array, axis=1)
+        return bibliocontexts'''
+    
+    #%% funzione indici da evidenziare nel testo
+
+    def highlight (self, bibliocontexts):
+        index = 0
+        for col in bibliocontexts.columns:
+            if col.startswith('pitxt'):
+                if index == 0:
+                    bibliocontexts['highlight'] = bibliocontexts.apply (lambda row: [row['pitxt'] - row['minChar'], row['elemlen']], axis=1)
+                else:
+                    bibliocontexts['highlight_'+str(index)] = bibliocontexts.apply (lambda row: [row['pitxt_'+str(index)] - row['minChar'], row['elemlen_'+str(index)]], axis=1)
+                index += 1
         return bibliocontexts
 
     #%% funzione contesti multipli cumulativa

+ 0 - 10
test_suite/test/test_occorrenzario_pandas.py

@@ -59,10 +59,6 @@ def findcontexts(textlist, listOcc, path):
     ampiezzacontesto = 31
     con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
     for ind, row in textlist.iterrows():
-        columns_starting_with_pitxt = row.filter(regex='^pitxt')
-        pitxtLocal = columns_starting_with_pitxt.values.tolist()
-        columns_starting_with_elemlen = row.filter(regex='^elemlen')
-        elemlenLocal = columns_starting_with_elemlen.values.tolist()
         sigla = row["sigla"]
         periodlocal = row["numperiod"]
         ntxlocal = row["ntx"]
@@ -77,12 +73,6 @@ def findcontexts(textlist, listOcc, path):
             file_contents = file1.read()
             cont = file_contents[pointerlist["pitxt"].min():pointerlist["pitxt"].max()]
             print (cont)
-            for i in range(len(pitxtLocal)):
-                start_index = pitxtLocal[i]
-                end_index = start_index + elemlenLocal[i]
-                highlighted_substring = "\033[1m" + file_contents[start_index:end_index] + "\033[0m"
-                cont = cont[:start_index-pointerlist["pitxt"].min()] + highlighted_substring + cont[end_index-pointerlist["pitxt"].min():]
-                #print (cont)
             contexts.append(cont)
     textlist['contesto'] = contexts
     return (textlist.reset_index(drop=True))