Browse Source

ricerca dei contesti linkata a quella degli elementi

Leonardo Canova 2 years ago
parent
commit
7cc31ceb86

+ 2 - 1
.gitignore

@@ -1 +1,2 @@
-.DS_Store
+.DS_Store
+TIgrO

+ 4 - 5
.vscode/launch.json

@@ -10,16 +10,15 @@
             "request": "launch",
             "module": "flask",
             "env": {
-                "FLASK_APP": "app.py",
+                "FLASK_APP": "flask_be/app.py",
                 "FLASK_DEBUG": "1"
             },
             "args": [
-                "run",
-                "--no-debugger",
-                "--no-reload"
+                "run"
             ],
             "jinja": true,
-            "justMyCode": true
+            "justMyCode": true,
+            "python": "/Users/leonardocanova/Library/CloudStorage/OneDrive-ConsiglioNazionaledelleRicerche/TIGRO/Ricerche/TIgrO/bin/python3"
         }
     ]
 }

BIN
flask_be/engine/__pycache__/__init__.cpython-310.pyc


BIN
flask_be/engine/__pycache__/query_generator.cpython-310.pyc


BIN
flask_be/engine/__pycache__/simple_query_test_pandas.cpython-310.pyc


BIN
flask_be/engine/data_interface_sqlite3/__pycache__/__init__.cpython-310.pyc


BIN
flask_be/engine/data_interface_sqlite3/__pycache__/query_handlers.cpython-310.pyc


+ 3 - 4
flask_be/engine/simple_query_test_pandas.py

@@ -27,7 +27,6 @@ def combinations(s):
 #%% funzione interprete
 def interpreter (data):
     clean_data= "'"+data.replace("*", "%").replace("?", "_").replace(" ","").replace("'", "''").replace("’", "''") +"'"
-    print (combinations(clean_data))
     return combinations(clean_data)    
       
 # %% funzione iniziale raddoppiata
@@ -62,9 +61,9 @@ def ricercaforme (entries, path, espansa, raddoppiata):
         doubleddata=" OR spec LIKE ".join(inizialeraddoppiata(entries))
 
         if raddoppiata == 1: 
-            theSimpleQuery = "SELECT spec AS forma, nocc AS occ FROM form WHERE spec LIKE " + data + " OR spec LIKE " + doubleddata + "ORDER BY idfor"
+            theSimpleQuery = "SELECT spec AS forma, nocc AS occ, cod FROM form WHERE spec LIKE " + data + " OR spec LIKE " + doubleddata + "ORDER BY idfor"
         else:
-            theSimpleQuery = "SELECT spec AS forma, nocc AS occ FROM form WHERE spec LIKE " + data + " ORDER BY idfor"
+            theSimpleQuery = "SELECT spec AS forma, nocc AS occ, cod FROM form WHERE spec LIKE " + data + " ORDER BY idfor"
 
         con = sqlite3.connect("file:" + path + "/db/test1.db" + "?mode=ro", uri=True)
         answer_table = pd.read_sql(theSimpleQuery, con)
@@ -276,7 +275,7 @@ def ricercalemmiforme (entries, path, espansa, raddoppiata):
 #%% ricercaforme(interpreter(entry), "../")
 entry = "proferire*"
 print ("Ricerca di: " + entry)
-df=ricercalemmiforme(interpreter(entry), "../", 1, 0)
+df=ricercaforme(interpreter(entry), "../", 1, 0)
 print (counter(df))
 dtale.show(df)
 # %%

+ 85 - 0
flask_be/engine/test_occorrenzario_pandas.py

@@ -0,0 +1,85 @@
+# %%
+import sqlite3
+import pandas as pd
+import dtale
+import unicodedata
+from simple_query_test_pandas import ricercaforme
+from simple_query_test_pandas import ricercalemmi
+from simple_query_test_pandas import ricercaformelemmi
+from simple_query_test_pandas import ricercalemmiforme
+from simple_query_test_pandas import inizialeraddoppiata
+from simple_query_test_pandas import interpreter
+#
+#
+#
+# %%
+parola = "dare"
+
+# %% Step 1: trovare le parole nel formario
+# (in questo test, per semplicità, si usa la ricerca estesa senza '<,>',
+# ed il replace dei caratteri speciali è incompleto)
+
+search=ricercaforme(interpreter(parola), "../", 0, 0)
+print(search)
+#
+
+#%% Step 2: trovare, nelle tabelle degli occorrenzari, i riferimenti al testo (versione 'itxt') delle
+# forme recuperate al punto 1, recuperare le sigle dei documenti e le loro associazioni agli ntx
+listOcc = ["occ00001", "occ00002", "occ00003"]
+
+def findtexts (df, listOcc, path):
+    codelist = df[['cod']]
+    textlist = pd.DataFrame()
+    con = sqlite3.connect("file:" + path + "/db/test1.db" + "?mode=ro", uri=True)
+    for table in listOcc:
+        for res in codelist:
+            expandedQuery = "SELECT tab.ntx, tab.pitxt, tab.elemlen, intbib.sigla FROM " + table + " AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx WHERE cod=" + res
+            extendequeryReponse = pd.read_sql(expandedQuery, con)
+            textlist = pd.concat([textlist, extendequeryReponse])
+    return textlist
+
+textlist=findtexts(search, listOcc, "../")
+print(textlist)
+
+# %% Step 3: mettere insieme le informazioni recuperare i contesti, versione 'itxt'
+
+
+def findcontexts (textlist, charOffsetConst):
+    print(textlist)
+    contexts = []
+    for ind, row in textlist.iterrows():
+        print (row["ntx"], row ['sigla'])
+        ntxLocal = row["ntx"]
+        pitxtLocal = row["pitxt"]
+        sigla = row["sigla"]
+        print('Indice: ' + str(pitxtLocal))
+        print('Sigla: ' + sigla)
+        #
+        with open("../db/itxt/" + sigla, 'r', encoding="utf-32-le") as file1:
+            file1.seek( max( 4*(pitxtLocal-charOffsetConst), 0), 0 )
+            cont = file1.read(textlist['elemlen'][ind]+2*charOffsetConst)
+            contexts.append(cont)
+    return (contexts)
+
+charOffsetConst = int(100)
+contexts = findcontexts(textlist, charOffsetConst)
+print(contexts)
+# %% Print results
+print()
+for ii, cont in enumerate(contexts):
+    print(str(ii+1) + ":")
+    print(cont)
+    print()
+
+# Chiudere la connessione
+closeConn(cur1)
+# %%
+# OUTDATED!!!
+# Experimentally: 'Gatto' (exact spelling) is in 'p07'
+with open("../db/itxt/p07", 'r', encoding="utf-32-le") as file:
+    pyppa = file.read()
+
+print( pyppa[int(res2[0][1])-20:int(res2[0][1])+20] )
+print()
+print( pyppa[int(res2[1][1])-20:int(res2[1][1])+20] )
+# %%xt from " +