1 year ago · 0da627530e
--- a/flask_be/engine/basic_queries.py
+++ b/flask_be/engine/basic_queries.py
@@ -37,82 +37,75 @@ class basicQueries:
 
				     
			
 
				     #%% ha in input le funzioni di ricerca, cerca nell'occorrenziario i puntatori ai contesti e altri elementi ad essi associati. 
			
 
				     #l'attributo type definisce il tipo di ricerca in input (0 per forme, 1 per lemmi, 2 per lemmi con opzione "mostra occorrenze non lemmatizzate")
			
 
				-    def findtexts(self, type, df, index = None):
			
 
				-        if index is None:
			
 
				+    def findtexts(self, type, df, index=None):
			
 
				+        # Check if the input is a DataFrame, and convert it if it is not
			
 
				+        if not isinstance(df, pd.DataFrame):
			
 
				             df = pd.DataFrame(df)
			
 
				-        else:
			
 
				+        # If an index is provided, select the rows that correspond to the index
			
 
				+        if index is not None:
			
 
				             if isinstance(index, range):
			
 
				                 index = list(index)
			
 
				             elif not isinstance(index, list):
			
 
				                 index = [index]
			
 
				-            df = pd.DataFrame(df.loc[index])
			
 
				-        textlist = pd.DataFrame()
			
 
				-        codList = list(df["cod"])
			
 
				+            df = df.loc[index]
			
 
				+        # Create a list of 'cod' values from the input DataFrame
			
 
				+        codList = [row['cod'] for _, row in df.iterrows()]
			
 
				+        # Get the 'listOcc' values
			
 
				         listOcc = self.listOcc
			
 
				-        
			
 
				+        # Create a dictionary with query information
			
 
				         queryData = {'queryType': 'texts', 'querySubtype': type, 'codList': codList}
			
 
				-
			
 
				-        for table in listOcc:
			
 
				-            queryData['table'] = table
			
 
				-            if type==2:
			
 
				-                subQueryData = {'queryType': 'pfl', 'codList': codList}
			
 
				-                subdf = self.queryHandler.query(subQueryData, pandas=True)
			
 
				-                queryData['formCodList'] = list(subdf['codForma'])
			
 
				-
			
 
				-            extendequeryReponse = self.queryHandler.query(queryData, pandas=True)
			
 
				-
			
 
				-            textlist = pd.concat([textlist, extendequeryReponse])
			
 
				-
			
 
				+        # If 'type' is 2 (option "non lemmatizzate"), make an additional query to get form codes
			
 
				+        # NOTE: A this stage of development 'type 2' is default for "lemma" searches (this could be changed).
			
 
				+        if type == 2:
			
 
				+            subQueryData = {'queryType': 'pfl', 'codList': codList}
			
 
				+            subdf = self.queryHandler.query(subQueryData, pandas=True)
			
 
				+            queryData['formCodList'] = list(subdf['codForma'])
			
 
				+        # Make a query to each table in 'listOcc' and concatenate the results
			
 
				+        queryResponses = [self.queryHandler.query(dict(queryData, table=table), pandas=True) for table in listOcc]
			
 
				+        textlist = pd.concat(queryResponses)
			
 
				         return textlist
			
 
				 
			
 
				     # %% ha in input findtexts, restituisce i contesti associati agli elementi localizzati.
			
 
				     # Il range dei contesti è impostato di default a 30 parole e può essere rimodulato nel passaggio al contesto singolo.
			
 
				     def findcontexts(self, textlist):
			
 
				+        # Set the number of words to include in contexts
			
 
				+        # NOTE: this could be changed into number of sentences by taking parts of the code from "singlecontext" function
			
 
				         parole = 31
			
 
				+        # Get the 'listOcc' values
			
 
				         listOcc = self.listOcc
			
 
				+        # Initialize lists to store context information
			
 
				         contexts = []
			
 
				         formats = []
			
 
				         minChar_list = []
			
 
				         maxChar_list = []
			
 
				+        # Iterate over each row in 'textlist'
			
 
				         for _, row in textlist.iterrows():
			
 
				+            # Get the 'sigla', 'ntx', and 'mappa' values from the row
			
 
				             sigla = row["sigla"]
			
 
				             ntxlocal = row["ntx"]
			
 
				             mappalocal = row["mappa"]
			
 
				+            # Create a list of query data dictionaries for each table in 'listOcc'
			
 
				             queryData_list = [{'table': table, 'queryType': 'contexts', 'ntxlocal': ntxlocal, 'mappalocal': mappalocal, 'parole': parole} for table in listOcc]
			
 
				+            # Make a query to each table in 'listOcc' and concatenate the results
			
 
				             pointerlist = pd.concat([self.queryHandler.query(queryData, pandas=True) for queryData in queryData_list])
			
 
				+            # Get the minimum and maximum character positions from the 'pointerlist' DataFrame
			
 
				             minChar = pointerlist["pitxt"].min()
			
 
				             maxChar = pointerlist["pitxt"].max()
			
 
				+            # Create a dictionary with query data for 'textQuery'
			
 
				             fileQueryData = {'sigla': sigla, 'minChar': minChar, 'maxChar': maxChar}
			
 
				-            minChar_list.append(minChar)
			
 
				-            maxChar_list.append(maxChar)
			
 
				+            # Call 'textQuery' to get the context and format information
			
 
				             cont, form = self.queryHandler.textQuery(fileQueryData, True)
			
 
				-            contexts.append(cont)
			
 
				-            formats.append(json.dumps(form))
			
 
				-        ####################################
			
 
				-        #   SHOULD BE FASTER               #
			
 
				-        ####################################
			
 
				-        '''for ind, row in textlist.iterrows():
			
 
				-            sigla = row["sigla"]
			
 
				-            queryData = {'queryType': 'contexts', 'ntxlocal': row["ntx"], 'mappalocal': row['mappa'], 'parole': parole}
			
 
				-            pointerlist = pd.DataFrame()
			
 
				-            for table in listOcc:
			
 
				-                queryData['table'] = table
			
 
				-                queryresponse = self.queryHandler.query(queryData, pandas=True)
			
 
				-                pointerlist = pd.concat([pointerlist, queryresponse])
			
 
				-                minChar = pointerlist["pitxt"].min()
			
 
				-                maxChar = pointerlist["pitxt"].max()
			
 
				-            fileQueryData = {'sigla': sigla, 'minChar': minChar, 'maxChar': maxChar}
			
 
				+            # Append the results to the respective lists
			
 
				             minChar_list.append(minChar)
			
 
				             maxChar_list.append(maxChar)
			
 
				-            cont, form = self.queryHandler.textQuery(fileQueryData, True)
			
 
				             contexts.append(cont)
			
 
				-            formats.append(json.dumps(form))'''
			
 
				-        #####################################
			
 
				+            formats.append(json.dumps(form))
			
 
				+        # Add the context information to 'textlist' and reset the index
			
 
				         textlist['piniz'] = minChar_list
			
 
				         textlist['pifin'] = maxChar_list
			
 
				         textlist['contesto'] = contexts
			
 
				         textlist['format contesto'] = formats
			
 
				-        return (textlist.reset_index(drop=True))
			
 
				+        return textlist.reset_index(drop=True)
			
 
				 
			
 
				     # %% Ha in input findcontexts, associa i riferimenti bibliografici ad ogni contesto.
			
 
				     def findbib(self, contexts):