Browse Source

Co-occurrences function slightly more efficient -- there's room for improvement yet

francesco 1 year ago
parent
commit
25ef9b08fd
1 changed files with 19 additions and 15 deletions
  1. 19 15
      flask_be/engine/cooccorrenze.py

+ 19 - 15
flask_be/engine/cooccorrenze.py

@@ -46,18 +46,17 @@ class cooccorrenze(basicQueries):
             ##########################################
             # KORA: questo blocco sembra troppo lento!
             ##########################################
-            df_new = pd.DataFrame(columns=list(listatesti.columns))
-            for index1, row1 in listatesti.iterrows():
-                for index2, row2 in textlist.iterrows():
-                    cond1 = row1['ntx'] == row2['ntx']
-                    cond2 = row1['numperiod'] == row2['numperiod'] if periodo == 1 else True
-                    cond3 = ((row1['mappa'] - row2['mappa']) != 0) and ((row1['mappa'] - row2['mappa']) in range(-intervallo, intervallo)) if ordinate == 0 else ((row2['mappa'] - row1['mappa']) > 0) and ((row2['mappa'] - row1['mappa']) <= intervallo)
-
-                    if cond1 and cond2 and cond3:
-                        row1[f'cod{cod}'] = textlist['cod'].iloc[index2]
-#                        print (type(textlist.loc[index2, 'cod'].iloc[1]))
-                        cod_cols.append(f'cod{cod}')
-                        df_new = pd.concat([df_new, row1.to_frame().T])
+#             df_new = pd.DataFrame(columns=list(listatesti.columns))
+#             for index1, row1 in listatesti.iterrows():
+#                 for index2, row2 in textlist.iterrows():
+#                     cond1 = row1['ntx'] == row2['ntx']
+#                     cond2 = row1['numperiod'] == row2['numperiod'] if periodo == 1 else True
+#                     cond3 = ((row1['mappa'] - row2['mappa']) != 0) and ((row1['mappa'] - row2['mappa']) in range(-intervallo, intervallo)) if ordinate == 0 else ((row2['mappa'] - row1['mappa']) > 0) and ((row2['mappa'] - row1['mappa']) <= intervallo)
+
+#                     if cond1 and cond2 and cond3:
+#                         row1[f'cod{cod}'] = textlist['cod'].iloc[index2]
+#                         cod_cols.append(f'cod{cod}')
+#                         df_new = pd.concat([df_new, row1.to_frame().T])
 
             ##########################################
             # KORA: fino a qui
@@ -70,11 +69,16 @@ class cooccorrenze(basicQueries):
             for index1, row1 in listatesti.iterrows():
                 ntx1 = row1['ntx']
                 mappa1 = row1['mappa']
+                nperiodo1 = row1['numperiod']
 
-                df_temp = textlist[(textlist['ntx'].astype(int) == ntx1) & (textlist['mappa'].astype(int) > mappa1-intervallo)  & (textlist['mappa'].astype(int) < mappa1+intervallo)  & (textlist['mappa'].astype(int) != mappa1)]
+                if ordinate == 0: 
+                    test = (textlist['ntx'].astype(int) == ntx1) & (textlist['mappa'].astype(int) > mappa1-intervallo) & (textlist['mappa'].astype(int) < mappa1+intervallo) & (textlist['mappa'].astype(int) != mappa1)
+                else:
+                    test = (textlist['ntx'].astype(int) == ntx1) & (textlist['mappa'].astype(int) > mappa1-intervallo)
+                if periodo == 1:
+                    test = test & (textlist['numperiod'] == nperiodo1)
 
-                #(textlist['ntx'].astype(int) == ntx1) & (textlist['mappa'].astype(int) in range(mappa1-intervallo, mappa1+intervallo)) & 
-                #& (textlist['mappa'] in range(mappa1-intervallo, mappa1+intervallo)) & (textlist['mappa'] != mappa1)]
+                df_temp = textlist[test]
                 df_new = pd.concat([df_new, df_temp])
 
             ##########################################