Browse Source

comments on code

Leonardo Canova 1 year ago
parent
commit
0da627530e
1 changed files with 34 additions and 41 deletions
  1. 34 41
      flask_be/engine/basic_queries.py

+ 34 - 41
flask_be/engine/basic_queries.py

@@ -37,82 +37,75 @@ class basicQueries:
     
     #%% ha in input le funzioni di ricerca, cerca nell'occorrenziario i puntatori ai contesti e altri elementi ad essi associati. 
     #l'attributo type definisce il tipo di ricerca in input (0 per forme, 1 per lemmi, 2 per lemmi con opzione "mostra occorrenze non lemmatizzate")
-    def findtexts(self, type, df, index = None):
-        if index is None:
+    def findtexts(self, type, df, index=None):
+        # Check if the input is a DataFrame, and convert it if it is not
+        if not isinstance(df, pd.DataFrame):
             df = pd.DataFrame(df)
-        else:
+        # If an index is provided, select the rows that correspond to the index
+        if index is not None:
             if isinstance(index, range):
                 index = list(index)
             elif not isinstance(index, list):
                 index = [index]
-            df = pd.DataFrame(df.loc[index])
-        textlist = pd.DataFrame()
-        codList = list(df["cod"])
+            df = df.loc[index]
+        # Create a list of 'cod' values from the input DataFrame
+        codList = [row['cod'] for _, row in df.iterrows()]
+        # Get the 'listOcc' values
         listOcc = self.listOcc
-        
+        # Create a dictionary with query information
         queryData = {'queryType': 'texts', 'querySubtype': type, 'codList': codList}
-
-        for table in listOcc:
-            queryData['table'] = table
-            if type==2:
-                subQueryData = {'queryType': 'pfl', 'codList': codList}
-                subdf = self.queryHandler.query(subQueryData, pandas=True)
-                queryData['formCodList'] = list(subdf['codForma'])
-
-            extendequeryReponse = self.queryHandler.query(queryData, pandas=True)
-
-            textlist = pd.concat([textlist, extendequeryReponse])
-
+        # If 'type' is 2 (option "non lemmatizzate"), make an additional query to get form codes
+        # NOTE: A this stage of development 'type 2' is default for "lemma" searches (this could be changed).
+        if type == 2:
+            subQueryData = {'queryType': 'pfl', 'codList': codList}
+            subdf = self.queryHandler.query(subQueryData, pandas=True)
+            queryData['formCodList'] = list(subdf['codForma'])
+        # Make a query to each table in 'listOcc' and concatenate the results
+        queryResponses = [self.queryHandler.query(dict(queryData, table=table), pandas=True) for table in listOcc]
+        textlist = pd.concat(queryResponses)
         return textlist
 
     # %% ha in input findtexts, restituisce i contesti associati agli elementi localizzati.
     # Il range dei contesti è impostato di default a 30 parole e può essere rimodulato nel passaggio al contesto singolo.
     def findcontexts(self, textlist):
+        # Set the number of words to include in contexts
+        # NOTE: this could be changed into number of sentences by taking parts of the code from "singlecontext" function
         parole = 31
+        # Get the 'listOcc' values
         listOcc = self.listOcc
+        # Initialize lists to store context information
         contexts = []
         formats = []
         minChar_list = []
         maxChar_list = []
+        # Iterate over each row in 'textlist'
         for _, row in textlist.iterrows():
+            # Get the 'sigla', 'ntx', and 'mappa' values from the row
             sigla = row["sigla"]
             ntxlocal = row["ntx"]
             mappalocal = row["mappa"]
+            # Create a list of query data dictionaries for each table in 'listOcc'
             queryData_list = [{'table': table, 'queryType': 'contexts', 'ntxlocal': ntxlocal, 'mappalocal': mappalocal, 'parole': parole} for table in listOcc]
+            # Make a query to each table in 'listOcc' and concatenate the results
             pointerlist = pd.concat([self.queryHandler.query(queryData, pandas=True) for queryData in queryData_list])
+            # Get the minimum and maximum character positions from the 'pointerlist' DataFrame
             minChar = pointerlist["pitxt"].min()
             maxChar = pointerlist["pitxt"].max()
+            # Create a dictionary with query data for 'textQuery'
             fileQueryData = {'sigla': sigla, 'minChar': minChar, 'maxChar': maxChar}
-            minChar_list.append(minChar)
-            maxChar_list.append(maxChar)
+            # Call 'textQuery' to get the context and format information
             cont, form = self.queryHandler.textQuery(fileQueryData, True)
-            contexts.append(cont)
-            formats.append(json.dumps(form))
-        ####################################
-        #   SHOULD BE FASTER               #
-        ####################################
-        '''for ind, row in textlist.iterrows():
-            sigla = row["sigla"]
-            queryData = {'queryType': 'contexts', 'ntxlocal': row["ntx"], 'mappalocal': row['mappa'], 'parole': parole}
-            pointerlist = pd.DataFrame()
-            for table in listOcc:
-                queryData['table'] = table
-                queryresponse = self.queryHandler.query(queryData, pandas=True)
-                pointerlist = pd.concat([pointerlist, queryresponse])
-                minChar = pointerlist["pitxt"].min()
-                maxChar = pointerlist["pitxt"].max()
-            fileQueryData = {'sigla': sigla, 'minChar': minChar, 'maxChar': maxChar}
+            # Append the results to the respective lists
             minChar_list.append(minChar)
             maxChar_list.append(maxChar)
-            cont, form = self.queryHandler.textQuery(fileQueryData, True)
             contexts.append(cont)
-            formats.append(json.dumps(form))'''
-        #####################################
+            formats.append(json.dumps(form))
+        # Add the context information to 'textlist' and reset the index
         textlist['piniz'] = minChar_list
         textlist['pifin'] = maxChar_list
         textlist['contesto'] = contexts
         textlist['format contesto'] = formats
-        return (textlist.reset_index(drop=True))
+        return textlist.reset_index(drop=True)
 
     # %% Ha in input findcontexts, associa i riferimenti bibliografici ad ogni contesto.
     def findbib(self, contexts):