Browse Source

Folder Restructuring

francesco 1 year ago
parent
commit
035bb01b72
100 changed files with 1955 additions and 1 deletions
  1. 1 1
      test_suite/Tests_New/results_output_JSON.py
  2. 0 0
      test_suite/Tests_New/simple_query_test_pandas.py
  3. 0 0
      test_suite/Tests_New/test_data/results.json
  4. 0 0
      test_suite/Tests_New/test_data/searches.json
  5. 0 0
      test_suite/Tests_New/tester_prova.py
  6. BIN
      test_suite/__pycache__/simple_query_test_pandas.cpython-311.pyc
  7. BIN
      test_suite/test/__pycache__/simple_query_test_pandas.cpython-310.pyc
  8. BIN
      test_suite/test/__pycache__/simple_query_test_pandas.cpython-311.pyc
  9. 35 0
      test_suite/test/format.py
  10. BIN
      test_suite/test/img/1.png
  11. BIN
      test_suite/test/img/10.png
  12. BIN
      test_suite/test/img/11.png
  13. BIN
      test_suite/test/img/12.png
  14. BIN
      test_suite/test/img/13.png
  15. BIN
      test_suite/test/img/15.png
  16. BIN
      test_suite/test/img/16.png
  17. BIN
      test_suite/test/img/17.png
  18. BIN
      test_suite/test/img/18.png
  19. BIN
      test_suite/test/img/19.png
  20. BIN
      test_suite/test/img/2.png
  21. BIN
      test_suite/test/img/20.png
  22. BIN
      test_suite/test/img/21.png
  23. BIN
      test_suite/test/img/22.png
  24. BIN
      test_suite/test/img/23.png
  25. BIN
      test_suite/test/img/24.png
  26. BIN
      test_suite/test/img/25.png
  27. BIN
      test_suite/test/img/26.png
  28. BIN
      test_suite/test/img/27.png
  29. BIN
      test_suite/test/img/28.png
  30. BIN
      test_suite/test/img/29.png
  31. BIN
      test_suite/test/img/3.png
  32. BIN
      test_suite/test/img/30.png
  33. BIN
      test_suite/test/img/31.png
  34. BIN
      test_suite/test/img/32.png
  35. BIN
      test_suite/test/img/33.png
  36. BIN
      test_suite/test/img/4.png
  37. BIN
      test_suite/test/img/5.png
  38. BIN
      test_suite/test/img/6.png
  39. BIN
      test_suite/test/img/7.png
  40. BIN
      test_suite/test/img/8.png
  41. BIN
      test_suite/test/img/9.png
  42. BIN
      test_suite/test/img/c1.png
  43. BIN
      test_suite/test/img/c10.png
  44. BIN
      test_suite/test/img/c11.png
  45. BIN
      test_suite/test/img/c2.png
  46. BIN
      test_suite/test/img/c3.png
  47. BIN
      test_suite/test/img/c4.png
  48. BIN
      test_suite/test/img/c5.png
  49. BIN
      test_suite/test/img/c6.png
  50. BIN
      test_suite/test/img/c7.png
  51. BIN
      test_suite/test/img/c8.png
  52. BIN
      test_suite/test/img/c9.png
  53. BIN
      test_suite/test/img/coocco1.1.png
  54. BIN
      test_suite/test/img/coocco1.png
  55. BIN
      test_suite/test/img/coocco2.0.png
  56. BIN
      test_suite/test/img/coocco2.1.png
  57. BIN
      test_suite/test/img/coocco2.2.png
  58. BIN
      test_suite/test/img/coocco2.png
  59. BIN
      test_suite/test/img/coocco3.0.png
  60. BIN
      test_suite/test/img/coocco3.1.png
  61. BIN
      test_suite/test/img/coocco4.0.png
  62. BIN
      test_suite/test/img/coocco4.1.png
  63. BIN
      test_suite/test/img/coocco5.0.png
  64. BIN
      test_suite/test/img/coocco5.1.png
  65. BIN
      test_suite/test/img/coocco6.0.png
  66. BIN
      test_suite/test/img/coocco6.1.png
  67. BIN
      test_suite/test/img/coocco7.0.png
  68. BIN
      test_suite/test/img/coocco7.1.png
  69. BIN
      test_suite/test/img/cs1filius.png
  70. BIN
      test_suite/test/img/cs2tarda.png
  71. BIN
      test_suite/test/img/cs3amistade.png
  72. BIN
      test_suite/test/img/cs4orgoglia.png
  73. BIN
      test_suite/test/img/cs5intradetta.png
  74. BIN
      test_suite/test/img/cs5intradetta1.png
  75. BIN
      test_suite/test/img/cs6pennace.png
  76. BIN
      test_suite/test/img/cs7invitava.png
  77. BIN
      test_suite/test/img/cs7invitava1.png
  78. BIN
      test_suite/test/img/cta1altresi.png
  79. BIN
      test_suite/test/img/cta1altresi1.png
  80. BIN
      test_suite/test/img/cta1altresi2.png
  81. BIN
      test_suite/test/img/ctaaltresi.png
  82. BIN
      test_suite/test/img/occorrenze.png
  83. 0 0
      test_suite/test/more_tests_kora/engine/__init__.py
  84. 162 0
      test_suite/test/more_tests_kora/engine/basic_queries.py
  85. 97 0
      test_suite/test/more_tests_kora/engine/cooccorrenze.py
  86. 12 0
      test_suite/test/more_tests_kora/engine/data_interface/QueryHandlerAbstract.py
  87. 0 0
      test_suite/test/more_tests_kora/engine/data_interface/__init__.py
  88. 13 0
      test_suite/test/more_tests_kora/engine/data_interface/data_providers_setup.py
  89. 36 0
      test_suite/test/more_tests_kora/engine/parsing_utilities.py
  90. 0 0
      test_suite/test/more_tests_kora/interface_sqlite3/__init__.py
  91. 0 0
      test_suite/test/more_tests_kora/interface_sqlite3/encdec/__init__.py
  92. 97 0
      test_suite/test/more_tests_kora/interface_sqlite3/encdec/de_code.py
  93. 261 0
      test_suite/test/more_tests_kora/interface_sqlite3/query_handlers.py
  94. 114 0
      test_suite/test/more_tests_kora/pippo.py
  95. 33 0
      test_suite/test/query_generator.py
  96. 36 0
      test_suite/test/results_output.py
  97. 106 0
      test_suite/test/results_output_JSON.py
  98. 336 0
      test_suite/test/simple_query_test_pandas.py
  99. 164 0
      test_suite/test/test_contesti_singoli.py
  100. 452 0
      test_suite/test/test_contesti_singoli_notebook.ipynb

+ 1 - 1
test_suite/results_output_JSON.py → test_suite/Tests_New/results_output_JSON.py

@@ -42,7 +42,7 @@ all_results = []
 # Creo una lista vuota per le ricerche
 # Creo una lista vuota per le ricerche
 all_searches = []
 all_searches = []
 # Definisco gli esempi di ricerca da fare
 # Definisco gli esempi di ricerca da fare
-path_db_0 = "../db/first_db"
+path_db_0 = "../../db/first_db"
 search_entries = [
 search_entries = [
     {"tipo_ricerca": "forme e lemmi", "parola_cercata": "pro*", "path_db": path_db_0, "ricerca_espansa": 1, "iniziali_raddoppiate": 0},
     {"tipo_ricerca": "forme e lemmi", "parola_cercata": "pro*", "path_db": path_db_0, "ricerca_espansa": 1, "iniziali_raddoppiate": 0},
     {"tipo_ricerca": "forme", "parola_cercata": "prov*", "path_db": path_db_0, "ricerca_espansa": 1, "iniziali_raddoppiate": 0},
     {"tipo_ricerca": "forme", "parola_cercata": "prov*", "path_db": path_db_0, "ricerca_espansa": 1, "iniziali_raddoppiate": 0},

+ 0 - 0
test_suite/simple_query_test_pandas.py → test_suite/Tests_New/simple_query_test_pandas.py


+ 0 - 0
test_suite/test_data/results.json → test_suite/Tests_New/test_data/results.json


+ 0 - 0
test_suite/test_data/searches.json → test_suite/Tests_New/test_data/searches.json


+ 0 - 0
test_suite/tester_prova.py → test_suite/Tests_New/tester_prova.py


BIN
test_suite/__pycache__/simple_query_test_pandas.cpython-311.pyc


BIN
test_suite/test/__pycache__/simple_query_test_pandas.cpython-310.pyc


BIN
test_suite/test/__pycache__/simple_query_test_pandas.cpython-311.pyc


+ 35 - 0
test_suite/test/format.py

@@ -0,0 +1,35 @@
+
+# %%
+import sqlite3
+import pandas as pd
+import dtale
+import unicodedata
+
+with open("../db/ftxt/aa1", 'rb') as file1:
+    formByte = file1.read()
+    form = [byte for byte in formByte]
+# %%
+form
+# %%
+list( filter(lambda el: el>0, appa) ) 
+# %%
+for index, byte in enumerate(appa):
+    if byte > 0:
+        print(index, byte)
+# %%
+with open("../db/itxt/aa1", 'r', encoding="utf-32-le") as file1:
+    file1.seek(4*179)
+    text = file1.read(188-179 + 1)
+    print(text)
+# %%
+with open("../db/itxt/aa1", 'r', encoding="utf-32-le") as file1:
+    textfull = file1.read()
+# %%
+text[179:188]
+# %%
+len(appa)
+# %%
+len(text)
+# %%
+textfull[179+1:188+1]
+# %%

BIN
test_suite/test/img/1.png


BIN
test_suite/test/img/10.png


BIN
test_suite/test/img/11.png


BIN
test_suite/test/img/12.png


BIN
test_suite/test/img/13.png


BIN
test_suite/test/img/15.png


BIN
test_suite/test/img/16.png


BIN
test_suite/test/img/17.png


BIN
test_suite/test/img/18.png


BIN
test_suite/test/img/19.png


BIN
test_suite/test/img/2.png


BIN
test_suite/test/img/20.png


BIN
test_suite/test/img/21.png


BIN
test_suite/test/img/22.png


BIN
test_suite/test/img/23.png


BIN
test_suite/test/img/24.png


BIN
test_suite/test/img/25.png


BIN
test_suite/test/img/26.png


BIN
test_suite/test/img/27.png


BIN
test_suite/test/img/28.png


BIN
test_suite/test/img/29.png


BIN
test_suite/test/img/3.png


BIN
test_suite/test/img/30.png


BIN
test_suite/test/img/31.png


BIN
test_suite/test/img/32.png


BIN
test_suite/test/img/33.png


BIN
test_suite/test/img/4.png


BIN
test_suite/test/img/5.png


BIN
test_suite/test/img/6.png


BIN
test_suite/test/img/7.png


BIN
test_suite/test/img/8.png


BIN
test_suite/test/img/9.png


BIN
test_suite/test/img/c1.png


BIN
test_suite/test/img/c10.png


BIN
test_suite/test/img/c11.png


BIN
test_suite/test/img/c2.png


BIN
test_suite/test/img/c3.png


BIN
test_suite/test/img/c4.png


BIN
test_suite/test/img/c5.png


BIN
test_suite/test/img/c6.png


BIN
test_suite/test/img/c7.png


BIN
test_suite/test/img/c8.png


BIN
test_suite/test/img/c9.png


BIN
test_suite/test/img/coocco1.1.png


BIN
test_suite/test/img/coocco1.png


BIN
test_suite/test/img/coocco2.0.png


BIN
test_suite/test/img/coocco2.1.png


BIN
test_suite/test/img/coocco2.2.png


BIN
test_suite/test/img/coocco2.png


BIN
test_suite/test/img/coocco3.0.png


BIN
test_suite/test/img/coocco3.1.png


BIN
test_suite/test/img/coocco4.0.png


BIN
test_suite/test/img/coocco4.1.png


BIN
test_suite/test/img/coocco5.0.png


BIN
test_suite/test/img/coocco5.1.png


BIN
test_suite/test/img/coocco6.0.png


BIN
test_suite/test/img/coocco6.1.png


BIN
test_suite/test/img/coocco7.0.png


BIN
test_suite/test/img/coocco7.1.png


BIN
test_suite/test/img/cs1filius.png


BIN
test_suite/test/img/cs2tarda.png


BIN
test_suite/test/img/cs3amistade.png


BIN
test_suite/test/img/cs4orgoglia.png


BIN
test_suite/test/img/cs5intradetta.png


BIN
test_suite/test/img/cs5intradetta1.png


BIN
test_suite/test/img/cs6pennace.png


BIN
test_suite/test/img/cs7invitava.png


BIN
test_suite/test/img/cs7invitava1.png


BIN
test_suite/test/img/cta1altresi.png


BIN
test_suite/test/img/cta1altresi1.png


BIN
test_suite/test/img/cta1altresi2.png


BIN
test_suite/test/img/ctaaltresi.png


BIN
test_suite/test/img/occorrenze.png


+ 0 - 0
test_suite/test/more_tests_kora/engine/__init__.py


+ 162 - 0
test_suite/test/more_tests_kora/engine/basic_queries.py

@@ -0,0 +1,162 @@
+#%%
+from .parsing_utilities import interpreter, inizialeraddoppiata, list_normalize
+
+# Basic data provider class; can be instantiated to handle different kinds
+# of data-providing connections or interfaces based on config options
+from .data_interface.data_providers_setup import queryHandlerFactory
+
+import pandas as pd
+
+# Main class for basic queries contains:
+# - a data provider instance
+# - methods to submit queries to the data provider instance
+class basicQueries:
+
+    def __init__(self, dataConfig):
+        self.queryHandler = queryHandlerFactory(dataConfig)
+        self.listOcc = dataConfig.get('listOcc')
+
+    # Prepares and sends query OBJECTS which will be processed by the data provider
+    def sendBasicQuery(self, text, queryType, espansa, raddoppiata, pandas=False, dbFile=None):
+        
+        entries = interpreter(text)
+        
+        data = entries
+        dataNorm = []
+        if raddoppiata==1:
+            data = entries + inizialeraddoppiata(entries)
+        if espansa==1 and raddoppiata==0:
+            dataNorm = list_normalize(entries)
+        elif espansa==1 and raddoppiata==1:
+            dataNorm = entries + list_normalize(inizialeraddoppiata(entries))
+
+        return self.queryHandler.query({'data': data, 'dataNorm': dataNorm, 'queryType': queryType}, pandas, dbFile)
+    
+    #%% ha in input le funzioni di ricerca, cerca nell'occorrenziario i puntatori ai contesti e altri elementi ad essi associati. 
+    #l'attributo type definisce il tipo di ricerca in input (0 per forme, 1 per lemmi, 2 per lemmi con opzione "mostra occorrenze non lemmatizzate")
+    def findtexts(self, type, df, index = None):
+        if index is None:
+            df = pd.DataFrame(df)
+        else:
+            if isinstance(index, range):
+                index = list(index)
+            elif not isinstance(index, list):
+                index = [index]
+            df = pd.DataFrame(df.loc[index])
+        textlist = pd.DataFrame()
+        codList = list(df["cod"])
+        listOcc = self.listOcc
+        
+        queryData = {'queryType': 'texts', 'querySubtype': type, 'codList': codList}
+
+        for table in listOcc:
+            queryData['table'] = table
+            if type==2:
+                subQueryData = {'queryType': 'pfl', 'codList': codList}
+                subdf = self.queryHandler.query(subQueryData, pandas=True)
+                queryData['formCodList'] = list(subdf["forma"])
+
+            extendequeryReponse = self.queryHandler.query(queryData, pandas=True)
+
+            textlist = pd.concat([textlist, extendequeryReponse])
+
+        return textlist
+
+    # %% ha in input findtexts, restituisce i contesti associati agli elementi localizzati.
+    # Il range dei contesti è impostato di default a 30 parole e può essere rimodulato nel passaggio al contesto singolo.
+
+    def findcontexts(self, textlist):
+        parole = 31
+        listOcc = self.listOcc
+        contexts = []
+        for ind, row in textlist.iterrows():
+            sigla = row["sigla"]
+            queryData = {'queryType': 'contexts', 'ntxlocal': row["ntx"], 'mappalocal': row['mappa'], 'parole': parole}
+            pointerlist = pd.DataFrame()
+            for table in listOcc:
+                queryData['table'] = table
+                queryresponse = self.queryHandler.query(queryData, pandas=True)
+                pointerlist = pd.concat([pointerlist, queryresponse])
+            
+            fileQueryData = {'sigla': sigla, 'minChar': pointerlist["pitxt"].min(), 'maxChar': pointerlist["pitxt"].max()}
+            cont = self.queryHandler.textQuery(fileQueryData)
+            contexts.append(cont)
+        textlist['contesto'] = contexts
+        return (textlist.reset_index(drop=True))
+
+    # %% Ha in input findcontexts, associa i riferimenti bibliografici ad ogni contesto.
+
+    def findbib(self, contexts):
+        infobib = pd.DataFrame()
+        rif_org = pd.DataFrame()
+        for ind, row in contexts.iterrows():
+            queryData = {'queryType': 'bib', 'row': row}
+            bib = self.queryHandler.query(queryData, pandas=True, dbFile='bibliografia/BiblioTLIO.db')
+            infobib = pd.concat([infobib, bib])
+            queryData = {'queryType': 'rif', 'row': row}
+            rif = self.queryHandler.query(queryData, pandas=True)
+            rif_org = pd.concat([rif_org, rif])
+        annoiniz = list(infobib['Anno iniziale'])
+        annofin = list(infobib['Anno finale'])
+        datacod = list(infobib['Data codificata'])
+        datadesc = list(infobib['Data descrittiva'])
+        titoloabb = list(infobib['Titolo Abbreviato'])
+        autore = list(infobib['Autore'])
+        titolo = list(infobib['Titolo'])
+        curatore = list(infobib['Curatore'])
+        areagen = list(infobib['Area generica'])
+        areaspec = list(infobib['Area specifica'])
+        genere = list(infobib['Genere'])
+        forma = list(infobib['Forma'])
+        tipo = list(infobib['Tipo'])
+        iq = list(infobib['IQ'])
+        rif1 = list(rif_org['Rif_organico'])
+        rif2 = list(rif_org['Rif_completo'])
+        contexts['Anno iniziale'] = annoiniz
+        contexts['Anno finale'] = annofin
+        contexts['Data codificata'] = datacod
+        contexts['Data descrittiva'] = datadesc
+        contexts['Autore'] = autore
+        contexts['Titolo Abbreviato'] = titoloabb
+        contexts['Titolo'] = titolo
+        contexts['Curatore'] = curatore
+        contexts['Area generica'] = areagen
+        contexts['Area specifica'] = areaspec
+        contexts['Genere'] = genere
+        contexts['Forma'] = forma
+        contexts['Tipo'] = tipo
+        contexts ['IQ'] = iq
+        contexts['Rif_organico'] = rif1
+        contexts['Rig_completo'] = rif2
+        contexts.pag = contexts.pag.astype(int)
+        chrono = contexts.sort_values(by=['Anno iniziale', 'Rif_organico', 'pag'])   
+        cols = ['links','Titolo Abbreviato', 'Rif_organico', 'tipostanza', 'stanza', 'verso', 'pag', 'riga', 'IQ', 'lemma', 'cat_gr', 'disambiguatore', 'contesto', 'Autore', 'Titolo',  'Anno iniziale', 'Anno finale', 'Data codificata', 'Data descrittiva', 'Area generica', 'Area specifica', 'Genere', 'Forma', 'Tipo', 'Curatore', 'ntx', 'pitxt', 'elemlen', 'mappa', 'numperiod', 'numorg', 'sigla', 'vol', 'col', 'numbrano', 'Rig_completo', 'cod']
+        clean_df = chrono.reindex(columns=cols + list(chrono.columns.difference(cols)))
+        return (clean_df.reset_index(drop=True))
+    
+    #%% funzione stringa/stringhe da evidenziare
+
+    def highlight (self, bibliocontexts):
+        index = 0
+        for col in bibliocontexts.columns:
+            forme = []
+            if col.startswith('cod'):
+                for ind, row in bibliocontexts.iterrows():
+                    queryData = {'queryType': 'highlight', 'row': row, 'col': col}
+                    forma = self.queryHandler.query(queryData, pandas=True)
+                    forme += list(forma['spec'])        
+                if index == 0:
+                    bibliocontexts['highlight'] = forme
+                else:
+                    bibliocontexts['highlight'+str(index)] = forme
+                index += 1
+        return bibliocontexts
+
+    #%% funzione contesti multipli cumulativa
+
+    def contestimultipli (self, tipo_ricerca, ricerca, index = None):
+        textlist = self.findtexts(tipo_ricerca, ricerca, index)
+        contexts = self.findcontexts (textlist)
+        bibliocontexts = self.findbib (contexts)
+        highlights = self.highlight(bibliocontexts)
+        return highlights

+ 97 - 0
test_suite/test/more_tests_kora/engine/cooccorrenze.py

@@ -0,0 +1,97 @@
+# %%
+import pandas as pd
+import time
+
+from .basic_queries import basicQueries
+
+
+# Executes query sequences to recover contexts with co-occurrences according to user input
+# Returns Pandas dataframes
+class cooccorrenze(basicQueries):
+    
+    def __init__(self, dataConfig):
+        super().__init__(dataConfig)
+
+#%% funzione ricerca per cooccorrenze. 
+    # Ha in input un array del tipo [forma/lemma_cercati, tipo_ricerca, ricerca_espansa, iniziale_raddoppiata].
+    # l'attributo tipo_ricerca definisce il tipo di ricerca in input (0 per forme, 1 per lemmi, 2 per lemmi con opzione "mostra occorrenze non lemmatizzate").
+    # Permette di definire l'intervallo di ricerca (in numero di parole), la possibilità di cercare soltanto all'interno dello stesso periodo (0/1) e/o di cercare le occorrenze in modo ordinato (0/1)
+    
+    def ricerca_cooccorrenze (self, listaricerche, intervallo, periodo, ordinate):
+        listatesti = pd.DataFrame()
+        cod = 1
+        if listaricerche[0][1] == 0:
+            ricerca = self.sendBasicQuery(listaricerche[0][0], 'forma', listaricerche[0][2], listaricerche[0][3], pandas=True)
+            listatesti = self.findtexts(0, ricerca)
+        elif listaricerche[0][1] == 1:
+            ricerca = self.sendBasicQuery(listaricerche[0][0], 'lemma', listaricerche[0][2], listaricerche[0][3], pandas=True)
+            listatesti = self.findtexts(1, ricerca)
+        elif listaricerche[0][1] == 2:
+            ricerca = self.sendBasicQuery(listaricerche[0][0], 'lemma', listaricerche[0][2], listaricerche[0][3], pandas=True)
+            listatesti = self.findtexts(2, ricerca)
+        
+        if listatesti.empty:
+            return []
+
+        for ricerca, tipo, espansa, raddoppiata in listaricerche[1:]:
+            if tipo == 0:
+                search = self.sendBasicQuery(ricerca, 'forma', espansa, raddoppiata, pandas=True)
+            elif tipo == 1:
+                search = self.sendBasicQuery(ricerca, 'lemma', espansa, raddoppiata, pandas=True)
+            elif tipo == 2:
+                search = self.sendBasicQuery(ricerca, 'lemma', espansa, raddoppiata, pandas=True)
+     
+            textlist = self.findtexts(tipo, search)
+            df_new = pd.DataFrame(columns=list(listatesti.columns))
+            cod_cols = []
+
+            t1 = time.time()
+            ##########################################
+            # KORA: questo blocco sembra troppo lento!
+            ##########################################
+#             for index1, row1 in listatesti.iterrows():
+#                 for index2, row2 in textlist.iterrows():
+#                     cond1 = row1['ntx'] == row2['ntx']
+#                     cond2 = row1['numperiod'] == row2['numperiod'] if periodo == 1 else True
+#                     cond3 = ((row1['mappa'] - row2['mappa']) != 0) and ((row1['mappa'] - row2['mappa']) in range(-intervallo, intervallo)) if ordinate == 0 else ((row2['mappa'] - row1['mappa']) > 0) and ((row2['mappa'] - row1['mappa']) <= intervallo)
+
+#                     if cond1 and cond2 and cond3:
+#                         row1[f'cod{cod}'] = textlist['cod'].iloc[index2]
+# #                        print (type(textlist.loc[index2, 'cod'].iloc[1]))
+#                         cod_cols.append(f'cod{cod}')
+#                         df_new = pd.concat([df_new, row1.to_frame().T])
+
+            ##########################################
+            # KORA: fino a qui
+            ##########################################
+            t2 = time.time()
+
+            df_new = pd.DataFrame(columns=list(listatesti.columns))
+            for index1, row1 in listatesti.iterrows():
+                ntx1 = row1['ntx']
+                mappa1 = row1['mappa']
+
+                df_temp = textlist[(textlist['ntx'].astype(int) == ntx1) & (textlist['mappa'].astype(int) > mappa1-intervallo)  & (textlist['mappa'].astype(int) < mappa1+intervallo)  & (textlist['mappa'].astype(int) != mappa1)]
+
+                #(textlist['ntx'].astype(int) == ntx1) & (textlist['mappa'].astype(int) in range(mappa1-intervallo, mappa1+intervallo)) & 
+                #& (textlist['mappa'] in range(mappa1-intervallo, mappa1+intervallo)) & (textlist['mappa'] != mappa1)]
+                df_new = pd.concat([df_new, df_temp])
+
+            t3 = time.time()
+            print('Muglio', round(t2 - t1, 1), round(t3 - t2, 1))
+
+
+            listatesti = df_new
+
+            cod += 1
+
+            if listatesti.empty:
+                return []
+            else:
+                contexts = self.findcontexts(listatesti)
+                bibliocontexts = self.findbib(contexts)
+                clean = bibliocontexts.drop_duplicates(subset="contesto")
+                highlights = self.highlight (clean)
+        return  highlights.to_dict(orient='records') # is this slow? CHECK!
+        #return clean.to_json(orient='records') # possibilità alternativa -- molte opzioni possibili!
+

+ 12 - 0
test_suite/test/more_tests_kora/engine/data_interface/QueryHandlerAbstract.py

@@ -0,0 +1,12 @@
+# An abstract class template for query handler factory return object
+# try a 'naive' implementation without using abc
+class QueryHandlerAbstract():
+    def __init__(self):
+        if type(self) is QueryHandlerAbstract:
+            raise Exception("QueryHandlerAbstract is an abstract class and can't be instantiated")
+    
+    def query(self, queryData, pandas=False, dbFile=None):
+        raise Exception("Method not implemented")
+    
+    def textQuery(self, queryData):
+        raise Exception("Method not implemented")

+ 0 - 0
test_suite/test/more_tests_kora/engine/data_interface/__init__.py


+ 13 - 0
test_suite/test/more_tests_kora/engine/data_interface/data_providers_setup.py

@@ -0,0 +1,13 @@
+from interface_sqlite3.query_handlers import queryHandlerBasicSqlite
+
+# Creates the 'query handler' object (that handles communications with the DB) according to configuration specifics
+def queryHandlerFactory(dataConfig):
+
+    interface = dataConfig.get('data_interface')
+
+    if interface=='sqlite3':
+        return queryHandlerBasicSqlite(dataConfig)
+
+    else:
+        raise Exception('Unrecognized data interface in data configuration')
+

+ 36 - 0
test_suite/test/more_tests_kora/engine/parsing_utilities.py

@@ -0,0 +1,36 @@
+import unicodedata
+
+#%% funzione combinazioni <> è chiamata da interpreter
+## DA MODIFICARE PER DB CIFRATO
+def combinations(s):
+  result = []
+  start = s.find("<")
+  end = s.find(">")
+  if start == -1 or end == -1:
+    return [s]
+  items = s[start + 1:end].split(",")
+  for item in items:
+    result.extend([s[:start] + item + rest for rest in combinations(s[end + 1:])])
+  return result
+
+#%% funzione interprete, sta alla base di ogni ricerca
+## DA MODIFICARE PER DB CIFRATO
+def interpreter (data):
+    clean_data= data.replace("*", "%").replace("?", "_").replace(" ","").replace("'", "''").replace("’", "''")
+    return combinations(clean_data)    
+
+# %% funzione iniziale raddoppiata, è chiamata dalle funzioni di ricerca con iniziale raddoppiata
+def inizialeraddoppiata (data):
+    doubleddata=[]
+    for el in data:
+        if el[1] != "%" and "_":
+            doubleddata = doubleddata + ["'"+ el[1] + el[1:]]
+    return doubleddata
+
+# %% funzione normalizza stringa (ricerca espansa), è chiamata dalle funzioni di ricerca espansa
+## DA MODIFICARE PER DB CIFRATO
+def normalize(stringa):
+    return unicodedata.normalize('NFKD', stringa).encode('ASCII', 'ignore').decode('utf-8')
+
+def list_normalize(lista):
+    return [normalize(stringa) for stringa in lista]

+ 0 - 0
test_suite/test/more_tests_kora/interface_sqlite3/__init__.py


+ 0 - 0
test_suite/test/more_tests_kora/interface_sqlite3/encdec/__init__.py


+ 97 - 0
test_suite/test/more_tests_kora/interface_sqlite3/encdec/de_code.py

@@ -0,0 +1,97 @@
+#%%
+import csv
+from os import listdir
+
+class keyRing:
+
+    def __init__(self, keyPath, dbEncoded, textsEncoded):
+        self.keyPath = keyPath
+        self.vettSpec = self.getVettSpec(dbEncoded)
+        self.textKeys = self.getKeys(textsEncoded)
+
+    def getVettSpec(self, dbEncoded):
+        if not dbEncoded:
+            return None
+        with open(self.keyPath + "vettSpec.csv", 'r') as file1:
+            reader = csv.DictReader(file1)
+            vettSpec = [row for row in reader]
+            return vettSpec
+
+    def getKeys(self, textsEncoded):
+        if not textsEncoded:
+            return None
+        files = listdir(self.keyPath)
+        keyFiles = [file for file in files if (file.startswith('key_') and file.endswith('.csv'))]
+
+        keys = {}
+        for keyFile in keyFiles:
+            code = keyFile.replace('key_', '').replace('.csv', '')
+            try:
+                keys[code] = self.getKeyByCode(keyFile)
+            except:
+                pass
+
+        return keys
+
+    def getKeyByCode(self, keyFile):
+        with open(self.keyPath + keyFile, 'r') as file1:
+            reader = csv.reader(file1)
+            key = [int(row[0]) for index, row in enumerate(reader) if index>1]
+            halfKeyLen = len(key)//2
+            key=key[:halfKeyLen]
+            return key
+
+
+# Encoder/Decoders
+
+# DB field encoder/decoder
+# DB Columns that need this:
+# FORM -> norm, spec, invnorm, invspec
+# LEM  -> norm, spec, invnorm, invspec, cat, omo
+def db_decode(vettSpec, string0):
+
+    res = ""
+    for char0 in string0:
+        #1
+        char0Dec = ord(char0) # Dal carattere al codice Unicode DECIMALE corrispondente
+        #2
+        char0ConvDec = next((el['unicode'] for el in vettSpec if el['intcode'] == str(char0Dec)), None) # Il codice DECIMALE (passato come stringa) viene ricercato in vettSpec, ritornando l'Unicode ESADECIMALE del carattere decriptato o None se non c'è riscontro -- il che non DOVREBBE succedere.
+        #3
+        res += chr(int(char0ConvDec, 16)) # Si converte il codice esadecimale a decimale e si usa la built-in chr per recuperare il carattere
+    return res
+#
+def db_encode(vettSpec, string0):
+    res = ""
+    for char0 in string0:
+        #1
+        char0Hex = hex(ord(char0)) # Dal carattere al codice Unicode ESADECIMALE corrispondente
+        #2
+        char0ConvDec = next((el['intcode'] for el in vettSpec if el['unicode'] == char0Hex[2:].upper()), None) # Il codice ESADECIMALE, senza il prefisso '0x' (rimosso tramite [2:]) e convertito in maiuscole per rispettare il formato di vettSpec, viene ricercato in vettSpec, ritornando l'Unicode DECIMALE del carattere criptato o None se non c'è riscontro -- il che non DOVREBBE succedere.
+        #3
+        res += chr(int(char0ConvDec)) # Si usa la built-in chr per recuperare il carattere
+    return res
+
+# Text encoder/decoder
+def decodeTextByKey(text, key, startInFile):
+    initialOffset = startInFile % len(key)
+    res = ""
+    for k, char0 in enumerate(text):
+        offset = k + initialOffset
+        if offset >= len(key):
+            offset = offset % len(key)
+        res += shiftchar(char0, -key[offset])
+    return res
+#
+def codeTextByKey(text, key, startInFile):
+
+    initialOffset = startInFile % len(key)
+    res = ""
+    for k, char0 in enumerate(text):
+        offset = k + initialOffset
+        if offset >= len(key):
+            offset = offset % len(key)
+        res += shiftchar(char0, +key[offset])
+    return res
+#
+def shiftchar(char0, shift):
+    return chr(ord(char0) + shift)

+ 261 - 0
test_suite/test/more_tests_kora/interface_sqlite3/query_handlers.py

@@ -0,0 +1,261 @@
+import sqlite3
+import pandas as pd
+import interface_sqlite3.encdec.de_code as dc
+
+from engine.data_interface.QueryHandlerAbstract import QueryHandlerAbstract
+
+# First version
+class queryHandlerBasicSqlite(QueryHandlerAbstract):
+
+    def __init__(self, dataConfig):
+        
+        try:
+            dbPath = dataConfig['dbPath']
+            dbfileDefault = dataConfig['dbfile_default']
+        except:
+            raise Exception('Missing required input in Data Provider Configuration')
+        self.dbPath = dbPath
+        self.dbfileDefault = dbfileDefault
+
+        # Encoding
+        self.dbEncoded = True if dataConfig.get("db_encoded") is True else False
+        self.textsEncoded = True if dataConfig.get("texts_encoded") is True else False
+        self.keyRing = None
+        if self.dbEncoded or self.textsEncoded:
+            keyPath = self.dbPath + 'keys/'
+            self.keyRing = dc.keyRing(keyPath, self.dbEncoded, self.textsEncoded)
+
+    
+    def query(self, queryData, pandas=False, dbFile=None):
+
+        # Formerly the query string was pre-generated outside and
+        # sent here _in lieu_ of the query data
+        # Now the method processes a query data OBJECT and creates the query
+        # accordingly
+        if self.dbEncoded:
+            queryData = self.encodeQuery(queryData)
+        queryString = prepareQueryString(queryData)
+
+        dbfileLocal = dbFile if dbFile is not None else self.dbfileDefault
+
+        db = self.dbPath + dbfileLocal
+        
+        connection = sqlite3.connect(f"file:{db}?mode=ro", uri=True)
+        # PANDAS?
+        if pandas:
+            results = pd.read_sql(queryString, connection)
+            if(self.dbEncoded):
+                results = self.db_results_decode_pandas(results)
+        
+        else:
+            connection.row_factory = dict_factory
+            queryReponse = connection.cursor().execute(queryString)
+            results = queryReponse.fetchall()
+            if(self.dbEncoded):
+                results = self.db_results_decode(results)
+
+        connection.close()
+
+        return results
+    
+    def textQuery(self, queryData):
+        try:
+            sigla = queryData['sigla']
+            minChar = queryData['minChar']
+            maxChar = queryData['maxChar']
+        except:
+            return None
+
+        with open(f"{self.dbPath}/itxt/{sigla}", 'r', encoding="utf-32-le") as file1:
+            file1.seek(4*minChar)
+            cont = file1.read(maxChar-minChar)
+        
+        if self.textsEncoded and self.keyRing.textKeys.get(sigla) is not None:
+            key = self.keyRing.textKeys.get(sigla)
+            cont = dc.decodeTextByKey(cont, key, minChar-1)
+        
+        return cont
+    
+    def encodeQuery(self, queryData):
+        type = queryData.get('queryType')
+        if type in ["forma", "lemma", "formaLemma", "lemmaForma"]:
+            try:
+                data = queryData['data']
+                dataNorm = queryData['dataNorm']
+                data = [dc.db_encode(self.keyRing.vettSpec, datum) for datum in data]
+                dataNorm = [dc.db_encode(self.keyRing.vettSpec, datum) for datum in dataNorm]
+                queryData['data'] = data
+                queryData['dataNorm'] = dataNorm
+            except KeyError as err:
+                raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
+        
+        return queryData
+
+    def db_results_decode(self, result):
+        for row in result:
+            for key, value in row.items():
+                if isColumnToDecode(key):
+                    row[key] = dc.db_decode(self.keyRing.vettSpec, value)
+        return result
+
+    def db_results_decode_pandas(self, df):
+        for col in df.columns:
+            if isColumnToDecode(col):
+                df[col] = df[col].apply( lambda el: dc.db_decode(self.keyRing.vettSpec, el) )
+        return df
+
+
+
+
+# Utilities
+def prepareQueryString(queryData):
+
+    type = queryData.get('queryType') # KeyError protected -- returns None if the key is not defined
+
+    #################
+    if type=='occ_tables':
+        return "SELECT name FROM sqlite_master WHERE type='table'"
+
+    #################
+    if type=='forma':
+        try:
+            data = queryData['data']
+            dataNorm = queryData['dataNorm']
+        except KeyError as err:
+            raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
+        joinedQueryData = "'" + "' OR spec LIKE '".join(data) + "'"
+        if len(dataNorm)==0:
+            return f"SELECT spec AS forma, nocc AS occ, cod FROM form WHERE spec LIKE {joinedQueryData} ORDER BY idfor"
+        else:
+            joinedQueryDataNorm = "'" + "' OR norm LIKE '".join(dataNorm) + "'"
+            return f"SELECT DISTINCT spec AS forma, nocc AS occ, cod FROM form WHERE (spec LIKE {joinedQueryData}) OR (norm LIKE {joinedQueryDataNorm}) ORDER BY idfor"
+
+    ###################
+    elif type=='lemma':
+        try:
+            data = queryData['data']
+            dataNorm = queryData['dataNorm']
+        except KeyError as err:
+            raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
+        joinedQueryData = "'" + "' OR spec LIKE '".join(data) + "'"
+        if len(dataNorm)==0:
+            return f"SELECT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE spec LIKE {joinedQueryData} ORDER BY idlem"
+        else:
+            joinedQueryDataNorm = "'" + "' OR norm LIKE '".join(dataNorm) + "'"
+            return f"SELECT DISTINCT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE (spec LIKE {joinedQueryData}) OR (norm LIKE {joinedQueryDataNorm}) ORDER BY idlem"
+
+    ########################
+    elif type=='lemmaForma':
+        try:
+            data = queryData['data']
+            dataNorm = queryData['dataNorm']
+        except KeyError as err:
+            raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
+        joinedQueryData = "'" + "' OR form.spec LIKE '".join(data) + "'"
+        if len(dataNorm)==0:
+            return f"SELECT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod != 0 AND lem.cod = pfl.lemma WHERE lem.spec LIKE {joinedQueryData} ORDER BY lem.idlem"
+        else:
+            joinedQueryData = "'" + "' OR lem.spec LIKE '".join(data) + "'"
+            joinedQueryDataNorm = "'" + "' OR lem.norm LIKE '".join(dataNorm) + "'"
+            return f"SELECT DISTINCT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (lem.spec LIKE {joinedQueryData}) OR (lem.norm LIKE {joinedQueryDataNorm}) ORDER BY lem.idlem"
+
+    ########################
+    elif type=='formaLemma':
+        try:
+            data = queryData['data']
+            dataNorm = queryData['dataNorm']
+        except KeyError as err:
+            raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
+        joinedQueryData = "'" + "' OR form.spec LIKE '".join(data) + "'"
+        if len(dataNorm)==0:
+            return f"SELECT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE form.spec LIKE {joinedQueryData} ORDER BY form.idfor"
+        else:
+            joinedQueryDataNorm = "'" + "' OR form.norm LIKE '".join(dataNorm) + "'"
+            return f"SELECT DISTINCT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (form.spec LIKE {joinedQueryData}) OR (form.norm LIKE {joinedQueryDataNorm}) ORDER BY form.idfor"
+
+    #################
+    elif type=='pfl':
+        try:
+            codList = queryData['codList']
+        except KeyError as err:
+            raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
+
+        strlist = ",".join(str(c) for c in codList)
+        return f"SELECT DISTINCT lemma, forma FROM pfl WHERE lemma IN ({strlist})"
+
+    ###################
+    elif type=='texts':
+        try:
+            codList = queryData['codList']
+            table = queryData['table']
+            subtype = queryData['querySubtype']
+            formCodList = queryData.get('formCodList') # KeyError-safe (None if absent)
+        except KeyError as err:
+            raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
+        
+        strlist = ",".join(str(c) for c in codList)
+        if subtype==0:
+            return f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.cod IN ({strlist})"
+        elif subtype==1:
+            return f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.indlem IN ({strlist})"
+        elif subtype==2:
+            if formCodList is None:
+                return None
+            strform = ",".join(str(c) for c in formCodList)
+            return f"SELECT tab.cod, tab.ntx, tab.pitxt, tab.elemlen, tab.mappa, tab.numperiod, tab.links, tab.numorg, intbib.sigla, tab.vol, tab.pag, tab.riga, tab.col, tab.tipostanza, tab.stanza, tab.verso, tab.numbrano, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore FROM {table} AS tab INNER JOIN intbib ON tab.ntx = intbib.ntx INNER JOIN lem ON tab.indlem = lem.cod WHERE tab.indlem IN ({strlist}) OR (tab.indlem = 0 AND tab.cod IN ({strform}))"
+    ######################
+    elif type=='contexts':
+        try:
+            table = queryData['table']
+            ntxlocal = queryData['ntxlocal']
+            mappalocal = queryData['mappalocal']
+            parole = queryData['parole']
+        except KeyError as err:
+            raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
+        return f"SELECT tab.pitxt, tab.elemlen FROM {table} AS tab WHERE tab.ntx = {ntxlocal} AND tab.mappa <= {mappalocal+int(parole/2)} AND tab.mappa >= {mappalocal-int(parole/2)}"
+
+    #################
+    elif type=='bib':
+        try:
+            row = queryData['row']
+            sigla = row['sigla']
+        except KeyError as err:
+            raise KeyError('Missing required data for query type ' + type + ': ' + str(err))
+        return f"SELECT [Anno iniziale], [Anno finale], [Data codificata], [Titolo Abbreviato], [Autore], [Titolo], [Curatore], [Data descrittiva], [Area generica], [Area specifica], [Genere], [Forma], [Tipo], IQ FROM datibib WHERE Sigla='{sigla}'"
+    
+    #################
+    elif type=='rif':
+        try:
+            row = queryData['row']
+            numorg = row['numorg']
+            ntx = row['ntx']
+        except:
+            return None
+        return f"SELECT head AS Rif_organico, full AS Rif_completo FROM org WHERE (indice='{numorg}' AND ntx='{ntx}')"
+    
+    #################
+    elif type=='highlight':
+        try:
+            row = queryData['row']
+            col = queryData['col']
+        except:
+            return None
+        return f"SELECT spec FROM form WHERE cod={row[col]}"
+    
+    #####
+    else:
+        raise ValueError('Unrecognized query type: ' + type)
+
+
+# Dict factory non-Pandas queries
+def dict_factory(cursor, row):
+    fields = [column[0] for column in cursor.description]
+    return {key: value for key, value in zip(fields, row)}
+
+
+# Does the column data (in returned results) need decoding?
+def isColumnToDecode(col):
+    columns = ['forma', 'lemma', 'cat_gr', 'disambiguatore']
+    if col in columns or col.startswith('highlight'):
+        return True
+    return False

+ 114 - 0
test_suite/test/more_tests_kora/pippo.py

@@ -0,0 +1,114 @@
+# %%
+from engine.cooccorrenze import cooccorrenze
+
+dataConfig = {
+            'data_interface': 'sqlite3',
+#            'dbPath': '../../../../db/first_db/',
+#            'dbfile_default': 'test1.db',
+#            'listOcc': ['Occ00001', 'Occ00002', 'Occ00003']
+            'dbPath': '../../../../db/ndg2.gat4/',
+            'dbfile_default': 'corpus.db',
+            "db_encoded": True,
+            "texts_encoded": True,
+            'listOcc': ['Occ00001', 'Occ00002', 'Occ00003', 'Occ00004', 'Occ00005', 'Occ00006', 'Occ00007', 'Occ00008', 'Occ00009', 'Occ00010', 'Occ00011', 'Occ00012', 'Occ00013', 'Occ00014']
+            }
+
+prova = cooccorrenze(dataConfig)
+
+ricerca_prova=[['cavaliere', 0, 0, 0], ['corte', 0, 0, 0]]
+
+#prova.ricerca_cooccorrenze (ricerca_prova, intervallo, periodo, ordinate)
+prova.ricerca_cooccorrenze (ricerca_prova, 10, 0, 0)
+# %%
+
+######
+# KORA
+'''
+Suppose I have two different lists of Python dicts, all of which contain an integer field, for instance named "position". What is the fastest algorithm to find all pairs of objects satisfying the following conditions:
+
+- Each pair contains a dict from the first list and a dict from the second list
+- The absolute difference between the position field in the first member of the pair and the second member of the pair is less than a given integer value
+?
+'''
+
+
+##########
+# Chat GPT
+'''
+One possible approach to solve this problem is to use a nested loop to compare each element from the first list with each element from the second list, and check if the absolute difference between their "position" field is less than the given integer value. However, this approach has a time complexity of O(n^2), which can be slow for large datasets.
+
+A more efficient approach is to use a hash table to store the elements from the second list, and then iterate over the elements from the first list and check if there are any elements in the hash table that satisfy the condition. This approach has a time complexity of O(n), which is much faster than the nested loop approach.
+
+Here's an example implementation of this approach:
+'''
+def find_pairs(list1, list2, max_diff):
+    # Create a dictionary to store the elements from the second list
+    dict2 = {}
+    for obj in list2:
+        dict2[obj['position']] = obj
+
+    # Iterate over the elements from the first list and check for matching pairs
+    pairs = []
+    for obj1 in list1:
+        for pos in range(obj1['position'] - max_diff, obj1['position'] + max_diff + 1):
+            if pos in dict2:
+                obj2 = dict2[pos]
+                if abs(obj1['position'] - obj2['position']) <= max_diff:
+                    pairs.append((obj1, obj2))
+
+    return pairs
+
+'''
+In this implementation, we first create a dictionary dict2 to store the elements from the second list, where the keys are the "position" field of each element. We then iterate over the elements from the first list, and for each element we check the range of possible positions that could match with it based on the given maximum difference. We then look up the corresponding elements from the second list in the dict2 dictionary, and check if they satisfy the condition. If so, we add the pair to the pairs list.
+
+This implementation has a time complexity of O(n), where n is the total number of elements in the two lists, which is faster than the nested loop approach with a time complexity of O(n^2).
+'''
+
+
+######
+# KORA
+'''
+Is there any approach which is faster than O(n)?
+'''
+
+
+##########
+# Chat GPT
+'''
+No, there is no algorithm that can solve this problem in less than O(n) time complexity in the worst case. This is because we need to examine every element in both lists at least once in order to find all pairs that satisfy the condition, and there could be up to n^2 possible pairs to examine.
+
+However, it is possible to optimize the algorithm to reduce the constant factor and make it faster in practice. One way to do this is to first sort the two lists on the "position" field, which can be done in O(n log n) time complexity using a sorting algorithm such as merge sort. We can then use a modified merge algorithm to compare the two sorted lists and find all pairs that satisfy the condition. This approach has a time complexity of O(n log n), which is faster than the O(n^2) approach but still not faster than O(n) in the worst case.
+
+Here's an example implementation of this approach:
+'''
+
+def find_pairs(list1, list2, max_diff):
+    # Sort both lists on the "position" field
+    list1 = sorted(list1, key=lambda x: x['position'])
+    list2 = sorted(list2, key=lambda x: x['position'])
+
+    # Initialize pointers for both lists
+    i, j = 0, 0
+
+    # Iterate over the lists and compare elements to find pairs that satisfy the condition
+    pairs = []
+    while i < len(list1) and j < len(list2):
+        diff = abs(list1[i]['position'] - list2[j]['position'])
+        if diff <= max_diff:
+            pairs.append((list1[i], list2[j]))
+            i += 1
+        elif list1[i]['position'] < list2[j]['position']:
+            i += 1
+        else:
+            j += 1
+
+    return pairs
+
+##########
+# Chat GPT
+'''
+In this implementation, we first sort both lists on the "position" field using the sorted function and a lambda function to extract the "position" field. We then initialize two pointers i and j for both lists, and iterate over the lists using a while loop. At each iteration, we compare the "position" fields of the current elements from both lists, and if the absolute difference is less than or equal to the given maximum difference, we add the pair to the pairs list and increment the pointer for the first list. If the difference is greater than the maximum, we increment the pointer for the list with the smaller current "position" field.
+
+This implementation has a time complexity of O(n log n), which is faster than the O(n^2) approach for large datasets. However, it still has a worst-case time complexity of O(n) and may not provide a significant speedup for small datasets.
+'''
+# %%

+ 33 - 0
test_suite/test/query_generator.py

@@ -0,0 +1,33 @@
+from engine.data_interface_sqlite3.query_handlers import simpleQueryHandler
+import sqlite3
+#from engine.simple_query_test import ricercaformefull
+#from engine.simple_query_test import ricercalemmifull
+#from engine.simple_query_test import ricercaformelemmi
+#from engine.simple_query_test import ricercalemmiforme
+
+def doSimpleQuery(data, tipo, espansa, raddoppiata, formeLemmi, path):
+    results = ""
+
+    e = int(espansa)
+    r = int(raddoppiata)
+    if (formeLemmi == "0"):
+        if (tipo == "forma"):
+            #results = ricercaformefull(data, path, espansa, raddoppiata)
+            results = simpleQueryHandler(data, path)
+            #results = ricercaformelemmi(data, path, espansa, raddoppiata)
+        elif (tipo == "lemma"):
+            #ricercalemmi(entries, path, espansa, raddoppiata)
+            results = simpleQueryHandler(data, path)
+    elif (formeLemmi == "1"):
+        if (tipo == "forma"):
+            #ricercaformelemmi(entries, path, espansa, raddoppiata)
+            results = simpleQueryHandler(data, path)
+        elif (tipo == "lemma"):
+            #ricercalemmiforme(entries, path, espansa, raddoppiata)
+            results = simpleQueryHandler(data, path)
+    else:
+        #results = simpleQueryHandler(data, path)
+        #Qui vuol dire che c'è un problema. Controlla i parametri e inserisci quello che manca
+        results = data + tipo + espansa + raddoppiata + formeLemmi
+
+    return results

+ 36 - 0
test_suite/test/results_output.py

@@ -0,0 +1,36 @@
+# Questo NON è parte del codice: è un notebook Jupyter (nell'implementazione di VSCode)
+# che ho usato per fare dei test!
+
+# %%
+# Test code using Jupyter 
+
+# %%
+
+import sqlite3
+import pandas as pd
+import dtale
+import unicodedata
+from simple_query_test_pandas import ricercaforme
+from simple_query_test_pandas import ricercalemmi
+from simple_query_test_pandas import ricercaformelemmi
+from simple_query_test_pandas import ricercalemmiforme
+from simple_query_test_pandas import inizialeraddoppiata
+from simple_query_test_pandas import interpreter
+from simple_query_test_pandas import counter
+from test_occorrenzario_pandas import findbib, findcontexts, findtexts
+import json
+#%%
+def df_to_json (df):
+    preresults= df.to_json(orient="index")
+    results = [val for val in json.loads(preresults).values()]
+    return results
+
+#%%
+
+entry = "re*"
+print ("Ricerca di: " + entry)
+df=ricercaformelemmi(interpreter(entry), "../", 1, 0)
+print (counter(df))
+df_to_json(df)
+
+# %%

+ 106 - 0
test_suite/test/results_output_JSON.py

@@ -0,0 +1,106 @@
+# Notebook Jupyter per la produzione semi-automatica dei test
+# definire le ricerche da effettuare e il nome del file dove visualizzare i risultati
+
+# %%
+# Test code using Jupyter 
+
+# %%
+import sqlite3
+import pandas as pd
+import dtale
+import unicodedata
+from simple_query_test_pandas import ricercaforme
+from simple_query_test_pandas import ricercalemmi
+from simple_query_test_pandas import ricercaformelemmi
+from simple_query_test_pandas import ricercalemmiforme
+from simple_query_test_pandas import inizialeraddoppiata
+from simple_query_test_pandas import interpreter
+from simple_query_test_pandas import counter
+from test_occorrenzario_pandas import findbib, findcontexts, findtexts
+import json
+import os
+
+# Definisco la funzione per convertire il DataFrame in formato JSON
+def df_to_json_records(df):
+    return df.to_dict(orient='records')
+
+# Definisco la funzione per scrivere i risultati in un file JSON
+def write_to_json_results(data, filename):
+    with open(filename, 'w') as f:
+        json.dump(data, f, indent=4) # aggiungi indentazione di 4 spazi
+
+# Definisco la funzione per scrivere le ricerche in un file JSON
+def write_to_json_searches(data, filename):
+    with open(filename, 'w') as f:
+        json.dump(data, f, indent=4) # aggiungi indentazione di 4 spazi
+      
+# Definisco la funzione per leggere i risultati da un file JSON
+def read_from_json(filename):
+    if not os.path.isfile(filename):
+        return []
+    with open(filename, 'r') as f:
+        content = f.read()
+        if content:
+            return json.loads(content)
+        else:
+            return []
+
+# Creo una lista vuota per i risultati
+all_results = []
+# Creo una lista vuota per le ricerche
+all_searches = []
+# Definisco gli esempi di ricerca da fare
+search_entries = [
+    {"tipo_ricerca": "forme e lemmi", "parola_cercata": "pro*", "path_db": "/Users/federicaspinelli/TEAMOVI/Ricerche/db/first_db", "ricerca_espansa": 1, "iniziali_raddoppiate": 0},
+    {"tipo_ricerca": "forme", "parola_cercata": "prov*", "path_db": "/Users/federicaspinelli/TEAMOVI/Ricerche/db/first_db", "ricerca_espansa": 1, "iniziali_raddoppiate": 0},
+    {"tipo_ricerca": "lemmi", "parola_cercata": "prova", "path_db": "/Users/federicaspinelli/TEAMOVI/Ricerche/db/first_db", "ricerca_espansa": 1, "iniziali_raddoppiate": 0}
+]
+# Definisco il nome del file per i risultati
+filename_results = 'results.json'
+# Definisco il nome del file per le ricerche
+filename_searches = 'searches.json'
+
+# Eseguo le ricerche
+for entry_info in search_entries:
+    print ("Ricerca di: " + entry_info["parola_cercata"])
+    # Definisco il tipo di ricerca, la parola cercata e le altre informazioni richieste
+    tipo_ricerca = entry_info["tipo_ricerca"]
+    parola_cercata = entry_info["parola_cercata"]
+    path_db = entry_info["path_db"]
+    ricerca_espansa = entry_info["ricerca_espansa"]
+    iniziali_raddoppiate = entry_info["iniziali_raddoppiate"]
+    search_info = {"tipo_ricerca": tipo_ricerca, "parola_cercata": parola_cercata, "ricerca_espansa": ricerca_espansa, "iniziali_raddoppiate": iniziali_raddoppiate}
+    # Eseguo la ricerca
+    if tipo_ricerca == "forme e lemmi":
+        df = ricercaformelemmi(interpreter(parola_cercata), path_db, ricerca_espansa, iniziali_raddoppiate)
+    elif tipo_ricerca == "forme":
+        df = ricercaforme(interpreter(parola_cercata), path_db, ricerca_espansa, iniziali_raddoppiate)
+    else:
+        df = ricercalemmi(interpreter(parola_cercata), path_db, ricerca_espansa, iniziali_raddoppiate)
+    results = df_to_json_records(df)
+    # Salvo i risultati nella lista di tutti i risultati
+    for r in results:
+        r.update(search_info)
+    all_results.extend(results)
+    all_searches.append(search_info)
+
+# Leggo i risultati precedenti dal file JSON
+path = "test_data/"
+previous_results = read_from_json(os.path.join(path, filename_results))
+
+# Aggiungo i nuovi risultati alla lista dei risultati precedenti
+previous_results.extend(all_results)
+
+# Scrivo tutti i risultati della ricerca nel file JSON
+if all(isinstance(item, dict) for item in previous_results):
+    write_to_json_results(previous_results, os.path.join(path, filename_results))
+    print("Risultati della ricerca salvati nel file JSON "+ filename_results)
+else:
+    print("La lista non è una lista di dizionari e non può essere convertita in JSON")
+    
+# Scrivo tutte le ricerche eseguite nel file JSON
+if all(isinstance(item, dict) for item in all_searches):
+    write_to_json_searches(all_searches, os.path.join(path, filename_searches))
+    print("Ricerche salvate nel file JSON " + filename_searches)
+else:
+    print("La lista non è una lista di dizionari e non può essere convertita in JSON")

+ 336 - 0
test_suite/test/simple_query_test_pandas.py

@@ -0,0 +1,336 @@
+# Questo NON è parte del codice: è un notebook Jupyter (nell'implementazione di VSCode)
+# che ho usato per fare dei test!
+
+# %%
+# Test code using Jupyter 
+
+# %%
+import sqlite3
+import re
+import pandas as pd
+import dtale
+import unicodedata
+import sys
+
+
+#%% funzione combinazioni <> è chiamata da interpreter
+def combinations(s):
+  result = []
+  start = s.find("<")
+  end = s.find(">")
+  if start == -1 or end == -1:
+    return [s]
+  items = s[start + 1:end].split(",")
+  for item in items:
+    result.extend([s[:start] + item + rest for rest in combinations(s[end + 1:])])
+  return result
+
+#%% funzione interprete, sta alla base di ogni ricerca
+def interpreter (data):
+    clean_data= "'"+data.replace("*", "%").replace("?", "_").replace(" ","").replace("'", "''").replace("’", "''") +"'"
+    return combinations(clean_data)    
+      
+# %% funzione iniziale raddoppiata, è chiamata dalle funzioni di ricerca con iniziale raddoppiata
+def inizialeraddoppiata (data):
+    doubleddata=[]
+    for el in data:
+        if el[1] != "%" and "_":
+            doubleddata = doubleddata + ["'"+ el[1] + el[1:]]
+    return doubleddata
+
+# %% funzione normalizza stringa (ricerca espansa), è chiamata dalle funzioni di ricerca espansa
+def normalize(stringa):
+    return unicodedata.normalize('NFKD', stringa).encode('ASCII', 'ignore').decode('utf-8')
+
+def list_normalize(lista):
+    return [normalize(stringa) for stringa in lista]
+
+
+# %% funzione counter, può essere chiamata sui risultati delle ricerche per visualizzare le forme/lemmi e il numero di occorrenze individuate
+def counter (results):
+    if not results.empty:
+        trovati= len(results.index)
+        occorrenze= results['occ'].sum()
+        return ("Trovati=" + str(trovati) + " Occorrenze=" + str(occorrenze))   
+
+
+#%% Funzione ricerca per forme
+def ricercaforme (entries, path, espansa, raddoppiata):
+    if espansa == 0: 
+        data=" OR spec LIKE ".join(entries)
+        doubleddata=" OR spec LIKE ".join(inizialeraddoppiata(entries))
+        
+        if raddoppiata == 1: 
+            theSimpleQuery = f"SELECT spec AS forma, nocc AS occ, cod FROM form WHERE spec LIKE {data} OR spec LIKE {doubleddata} ORDER BY idfor"
+        else:
+            theSimpleQuery = f"SELECT spec AS forma, nocc AS occ, cod FROM form WHERE spec LIKE {data} ORDER BY idfor"
+
+        con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
+        answer_table = pd.read_sql(theSimpleQuery, con)
+        if answer_table.empty:
+            print ("Nessun risultato")
+            sys.exit(1)
+        else:
+            return answer_table
+
+    else:
+
+        data=" OR spec LIKE ".join(entries)
+        data_norm=" OR norm LIKE ".join(list_normalize(entries))
+        doubleddata_norm=" OR norm LIKE ".join(list_normalize(inizialeraddoppiata(entries)))
+        doubleddata=" OR spec LIKE ".join(inizialeraddoppiata(entries))
+
+        if raddoppiata == 1:
+            theSimpleQuery = f"SELECT DISTINCT spec AS forma, nocc AS occ, cod FROM form WHERE (spec LIKE {data}) OR (norm LIKE {data_norm}) OR (spec LIKE {doubleddata}) OR (norm LIKE {doubleddata_norm}) ORDER BY idfor"
+        else:
+            theSimpleQuery = f"SELECT DISTINCT spec AS forma, nocc AS occ, cod FROM form WHERE (spec LIKE {data}) OR (norm LIKE {data_norm}) ORDER BY idfor"
+
+        con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
+        answer_table = pd.read_sql(theSimpleQuery, con)
+        if answer_table.empty:
+            print ("Nessun risultato")
+            sys.exit(1)
+        else:
+            return answer_table
+    #deprecated
+    """if espansa == 0:
+
+        data=" OR spec LIKE ".join(entries)
+        doubleddata=" OR spec LIKE ".join(inizialeraddoppiata(entries))
+
+        if raddoppiata == 1: 
+            theSimpleQuery = "SELECT spec AS forma, nocc AS occ, cod FROM form WHERE spec LIKE " + data + " OR spec LIKE " + doubleddata + "ORDER BY idfor"
+        else:
+            theSimpleQuery = "SELECT spec AS forma, nocc AS occ, cod FROM form WHERE spec LIKE " + data + " ORDER BY idfor"
+
+        con = sqlite3.connect("file:" + path + "/db/test1.db" + "?mode=ro", uri=True)
+        answer_table = pd.read_sql(theSimpleQuery, con)
+        return answer_table
+
+    else:
+
+        data=" OR spec LIKE ".join(entries)
+        data_norm=" OR norm LIKE ".join(list_normalize(entries))
+        doubleddata_norm=" OR norm LIKE ".join(list_normalize(inizialeraddoppiata(entries)))
+        doubleddata=" OR spec LIKE ".join(inizialeraddoppiata(entries))
+
+        if raddoppiata == 1:
+            theSimpleQuery = "SELECT DISTINCT spec AS forma, nocc AS occ, cod FROM form WHERE (spec LIKE " + data +") OR (norm LIKE " + data_norm + ") OR (spec LIKE " + doubleddata + ") OR (norm LIKE " + doubleddata_norm + ")" + " ORDER BY idfor"
+        else:
+            theSimpleQuery = "SELECT DISTINCT spec AS forma, nocc AS occ, cod FROM form WHERE (spec LIKE " + data +") OR (norm LIKE " + data_norm + ")" + " ORDER BY idfor"
+
+        con = sqlite3.connect("file:" + path + "/db/test1.db" + "?mode=ro", uri=True)
+        answer_table = pd.read_sql(theSimpleQuery, con)
+        return answer_table"""
+#%% Funzione ricerca per lemmi
+def ricercalemmi (entries, path, espansa, raddoppiata):
+    if espansa == 0:
+        data = " OR spec LIKE ".join(entries)
+        doubleddata = " OR spec LIKE ".join(inizialeraddoppiata(entries))
+
+        if raddoppiata == 1:
+            theSimpleQuery = f"SELECT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE spec LIKE {data} OR spec LIKE {doubleddata} ORDER BY idlem"
+        else:
+            theSimpleQuery = f"SELECT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE spec LIKE {data} ORDER BY idlem"
+
+        con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
+        answer_table = pd.read_sql(theSimpleQuery, con)
+        if answer_table.empty:
+            print ("Nessun risultato")
+            sys.exit(1)
+        else:
+            return answer_table
+
+    else:
+        data = " OR spec LIKE ".join(entries)
+        data_norm = " OR norm LIKE ".join(list_normalize(entries))
+        doubleddata_norm = " OR norm LIKE ".join(list_normalize(inizialeraddoppiata(entries)))
+        doubleddata = " OR spec LIKE ".join(inizialeraddoppiata(entries))
+
+        if raddoppiata == 1:
+            theSimpleQuery = f"SELECT DISTINCT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE (spec LIKE {data}) OR (norm LIKE {data_norm}) OR (spec LIKE {doubleddata}) OR (norm LIKE {doubleddata_norm}) ORDER BY idlem"
+        else:
+            theSimpleQuery = f"SELECT DISTINCT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE (spec LIKE {data}) OR (norm LIKE {data_norm}) ORDER BY idlem"
+
+        con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
+        answer_table = pd.read_sql(theSimpleQuery, con)
+        if answer_table.empty:
+            print ("Nessun risultato")
+            sys.exit(1)
+        else:
+            return answer_table
+    #deprecated
+    """if espansa == 0:
+
+        data=" OR spec LIKE ".join(entries)
+        doubleddata=" OR spec LIKE ".join(inizialeraddoppiata(entries))
+        
+        if raddoppiata == 1:
+            theSimpleQuery = "SELECT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE spec LIKE " + data  + " OR spec LIKE " + doubleddata + "ORDER BY idlem"
+        else:
+            theSimpleQuery = "SELECT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE spec LIKE " + data + " ORDER BY idlem"
+
+        con = sqlite3.connect("file:" + path + "/db/test1.db" + "?mode=ro", uri=True)
+        answer_table = pd.read_sql(theSimpleQuery, con)
+        return answer_table
+
+    else:
+
+        data=" OR spec LIKE ".join(entries)
+        data_norm=" OR norm LIKE ".join(list_normalize(entries))
+        doubleddata_norm=" OR norm LIKE ".join(list_normalize(inizialeraddoppiata(entries)))
+        doubleddata=" OR spec LIKE ".join(inizialeraddoppiata(entries))
+
+        if raddoppiata == 1:
+            theSimpleQuery = "SELECT DISTINCT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE (spec LIKE " + data +") OR (norm LIKE " + data_norm + ") OR (spec LIKE " + doubleddata + ") OR (norm LIKE " + doubleddata_norm + ")" + " ORDER BY idlem"
+        else:
+            theSimpleQuery = "SELECT DISTINCT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE (spec LIKE " + data +") OR (norm LIKE " + data_norm + ")" + " ORDER BY idlem"
+
+        con = sqlite3.connect("file:" + path + "/db/test1.db" + "?mode=ro", uri=True)
+        answer_table = pd.read_sql(theSimpleQuery, con)
+        return answer_table"""
+#%% Funzione ricerca di forme con vista lemmi
+def ricercaformelemmi (entries, path, espansa, raddoppiata):
+    if espansa == 0:
+        data = " OR form.spec LIKE ".join(entries)
+        doubleddata = " OR form.spec LIKE ".join(inizialeraddoppiata(entries))
+        if raddoppiata == 1:
+            theSimpleQuery = f"SELECT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE form.spec LIKE {data} OR form.spec LIKE {doubleddata} ORDER BY form.idfor"
+        else:
+            theSimpleQuery = f"SELECT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE form.spec LIKE {data} ORDER BY form.idfor"
+        con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
+        answer_table = pd.read_sql(theSimpleQuery, con)
+        if answer_table.empty:
+            print ("Nessun risultato")
+            sys.exit(1)
+        else:
+            return answer_table
+    else:
+        data = " OR form.spec LIKE ".join(entries)
+        data_norm = " OR form.norm LIKE ".join(list_normalize(entries))
+        doubleddata_norm = " OR form.norm LIKE ".join(list_normalize(inizialeraddoppiata(entries)))
+        doubleddata = " OR form.spec LIKE ".join(inizialeraddoppiata(entries))
+        if raddoppiata == 1:
+            theSimpleQuery = f"SELECT DISTINCT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (form.spec LIKE {data}) OR (form.norm LIKE {data_norm}) OR (form.spec LIKE {doubleddata}) OR (form.norm LIKE {doubleddata_norm}) ORDER BY form.idfor"
+        else:
+            theSimpleQuery = f"SELECT DISTINCT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (form.spec LIKE {data}) OR (form.norm LIKE {data_norm}) ORDER BY form.idfor"
+        con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
+        answer_table = pd.read_sql(theSimpleQuery, con)
+        if answer_table.empty:
+            print ("Nessun risultato")
+            sys.exit(1)
+        else:
+            return answer_table
+    #deprecated
+    """if espansa == 0:
+
+        data=" OR form.spec LIKE ".join(entries)
+        doubleddata=" OR form.spec LIKE ".join(inizialeraddoppiata(entries))
+        
+        if raddoppiata == 1:
+            theSimpleQuery = "SELECT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE form.spec LIKE " + data + " OR form.spec LIKE " + doubleddata + " ORDER BY form.idfor"
+        else:
+            theSimpleQuery = "SELECT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE form.spec LIKE " + data + " ORDER BY form.idfor"
+        
+
+        con = sqlite3.connect("file:" + path + "/db/test1.db" + "?mode=ro", uri=True)
+        answer_table = pd.read_sql(theSimpleQuery, con)
+        return answer_table
+
+    else:
+        
+        data=" OR form.spec LIKE ".join(entries)
+        data_norm=" OR form.norm LIKE ".join(list_normalize(entries))
+        doubleddata_norm=" OR form.norm LIKE ".join(list_normalize(inizialeraddoppiata(entries)))
+        doubleddata=" OR form.spec LIKE ".join(inizialeraddoppiata(entries))
+
+        if raddoppiata == 1:
+            theSimpleQuery = "SELECT DISTINCT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (form.spec LIKE " + data +") OR (form.norm LIKE " + data_norm + ") OR (form.spec LIKE " + doubleddata + ") OR (form.norm LIKE " + doubleddata_norm + ")" + " ORDER BY form.idfor"
+        else:
+            theSimpleQuery = "SELECT DISTINCT form.spec AS forma, lem.spec AS lemma, lem.cat AS cat_gr, lem.omo AS disambiguatore, pfl.nocc AS occ, form.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (form.spec LIKE " + data +") OR (form.norm LIKE " + data_norm + ")" + " ORDER BY form.idfor"
+
+        con = sqlite3.connect("file:" + path + "/db/test1.db" + "?mode=ro", uri=True)
+        answer_table = pd.read_sql(theSimpleQuery, con)
+        return answer_table"""
+#%% Funzione ricerca lemmi con vista forme
+def ricercalemmiforme (entries, path, espansa, raddoppiata):
+    if espansa == 0:
+        data = " OR form.spec LIKE ".join(entries)
+        doubleddata = " OR form.spec LIKE ".join(inizialeraddoppiata(entries))
+        if raddoppiata == 1:
+            theSimpleQuery = f"SELECT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod != 0 AND lem.cod = pfl.lemma WHERE lem.spec LIKE {data} OR form.spec LIKE {doubleddata} ORDER BY lem.idlem"
+        else:
+            theSimpleQuery = f"SELECT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod != 0 AND lem.cod = pfl.lemma WHERE lem.spec LIKE {data} ORDER BY lem.idlem"
+        con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
+        answer_table = pd.read_sql(theSimpleQuery, con)
+        if answer_table.empty:
+            print ("Nessun risultato")
+            sys.exit(1)
+        else:
+            return answer_table
+    else:
+        data = " OR lem.spec LIKE ".join(entries)
+        data_norm = " OR lem.norm LIKE ".join(list_normalize(entries))
+        doubleddata_norm = " OR lem.norm LIKE ".join(list_normalize(inizialeraddoppiata(entries)))
+        doubleddata = " OR lem.spec LIKE ".join(inizialeraddoppiata(entries))
+        if raddoppiata == 1:
+            theSimpleQuery = f"SELECT DISTINCT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (lem.spec LIKE {data}) OR (lem.norm LIKE {data_norm}) OR (lem.spec LIKE {doubleddata}) OR (lem.norm LIKE {doubleddata_norm}) ORDER BY lem.idlem"
+        else:
+            theSimpleQuery = f"SELECT DISTINCT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (lem.spec LIKE {data}) OR (lem.norm LIKE {data_norm}) ORDER BY lem.idlem"
+        con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
+        answer_table = pd.read_sql(theSimpleQuery, con)
+        if answer_table.empty:
+            print ("Nessun risultato")
+            sys.exit(1)
+        else:
+            return answer_table
+    #deprecated
+    """if espansa == 0:
+
+        data=" OR form.spec LIKE ".join(entries)
+        doubleddata=" OR form.spec LIKE ".join(inizialeraddoppiata(entries))
+        
+        if raddoppiata == 1:
+            theSimpleQuery = "SELECT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod != 0 AND lem.cod = pfl.lemma WHERE lem.spec LIKE " + data + " OR form.spec LIKE " + doubleddata + " ORDER BY lem.idlem"
+        else:
+            theSimpleQuery = "SELECT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma,lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod != 0 AND lem.cod = pfl.lemma WHERE lem.spec LIKE " + data + " ORDER BY lem.idlem"
+
+        con = sqlite3.connect("file:" + path + "/db/test1.db" + "?mode=ro", uri=True)
+        answer_table = pd.read_sql(theSimpleQuery, con)
+        return answer_table
+        
+    else:
+        
+        data=" OR lem.spec LIKE ".join(entries)
+        data_norm=" OR lem.norm LIKE ".join(list_normalize(entries))
+        doubleddata_norm=" OR lem.norm LIKE ".join(list_normalize(inizialeraddoppiata(entries)))
+        doubleddata=" OR lem.spec LIKE ".join(inizialeraddoppiata(entries))
+
+        if raddoppiata == 1:
+            theSimpleQuery = "SELECT DISTINCT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (lem.spec LIKE " + data +") OR (lem.norm LIKE " + data_norm + ") OR (lem.spec LIKE " + doubleddata + ") OR (lem.norm LIKE " + doubleddata_norm + ")" + " ORDER BY lem.idlem"
+        else:
+            theSimpleQuery = "SELECT DISTINCT lem.spec AS lemma, lem.cat AS cat_gr, form.spec AS forma, lem.omo AS disambiguatore, pfl.nocc AS occ, lem.cod FROM pfl INNER JOIN form ON form.cod = pfl.forma INNER JOIN lem ON lem.cod = pfl.lemma WHERE (lem.spec LIKE " + data +") OR (lem.norm LIKE " + data_norm + ")" + " ORDER BY lem.idlem"
+
+        con = sqlite3.connect("file:" + path + "/db/test1.db" + "?mode=ro", uri=True)
+        answer_table = pd.read_sql(theSimpleQuery, con)
+        return answer_table"""
+
+# %% Ricerca per categorie grammaticali
+def ricercacatgr (entry, path):
+        theSimpleQuery = f"SELECT spec AS lemma, cat AS cat_gr, omo AS disambiguatore, nocc AS occ, cod FROM lem WHERE cat = '{entry}' ORDER BY idlem"
+        con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
+        answer_table = pd.read_sql(theSimpleQuery, con)
+        if answer_table.empty:
+            print ("Nessun risultato")
+            sys.exit(1)
+        else:
+            return answer_table
+
+#%% 
+path = "/Users/leonardocanova/Library/CloudStorage/OneDrive-ConsiglioNazionaledelleRicerche/TIGRO/Ricerche/db/first_db"
+entry = "filius"
+#df=ricercacatgr(entry, path)
+df=ricercaforme(interpreter(entry), path, 0, 0)
+dtale.show(df)
+# %%
+ 

+ 164 - 0
test_suite/test/test_contesti_singoli.py

@@ -0,0 +1,164 @@
+# %%
+import sqlite3
+import pandas as pd
+import dtale
+import unicodedata
+from simple_query_test_pandas import ricercaforme, ricercalemmi, ricercaformelemmi, ricercalemmiforme, interpreter
+from test_occorrenzario_pandas import findtexts, findcontexts, findbib, contestimultipli, get_tables_occ
+
+#funzione di ricerca dei contesti singoli, con personalizzazione dell'ampiezza per parole o periodi, ha in input findcontexts e deve essere passata a findbib.
+def singlecontexts(textlist, index, parole, periodi, brani, listOcc, path):
+    context = textlist.iloc[index]
+    contexts = []
+    formats = []
+    con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
+    sigla = textlist.loc[index, "sigla"]
+    periodlocal = textlist.loc[index, "numperiod"]
+    ntxlocal = textlist.loc[index, "ntx"]
+    mappalocal = textlist.loc[index, "mappa"]
+    linkslocal = textlist.loc[index, "links"]
+    if parole != 0:
+        pointerlist = pd.DataFrame()
+        for table in listOcc:
+            query = f"SELECT tab.pitxt, tab.elemlen FROM {table} AS tab WHERE tab.ntx = {ntxlocal} AND tab.mappa <= {mappalocal+int(parole/2)} AND tab.mappa >= {mappalocal-int(parole/2)}"
+            queryresponse = pd.read_sql(query, con)
+            pointerlist = pd.concat([pointerlist, queryresponse])
+        with open(f"{path}/itxt/{sigla}", 'r', encoding="utf-32-le") as file1:
+            file1.seek(4*pointerlist["pitxt"].min())
+            cont = file1.read(pointerlist["pitxt"].max()-pointerlist["pitxt"].min())
+            contexts.append(cont)
+        with open(f"{path}/ftxt/{sigla}", 'rb') as file1:
+            file1.seek(pointerlist["pitxt"].min()-1)
+            formBytes = file1.read(pointerlist["pitxt"].max()-pointerlist["pitxt"].min())
+            form = [byte for byte in formBytes]
+            formats.append(form)
+            context ['piniz'] = pointerlist["pitxt"].min()
+            context ['pfin'] = pointerlist["pitxt"].max()
+    elif periodi != 0:
+        query = f"SELECT piniz, pfin FROM periodi WHERE ntx = {ntxlocal} AND numperiod <= {periodlocal+int(periodi/2)} AND numperiod >= {periodlocal-int(periodi/2)}"
+        queryresponse = pd.read_sql(query, con)
+        with open(f"{path}/itxt/{sigla}", 'r', encoding="utf-32-le") as file1:
+            file1.seek(4*queryresponse["piniz"].min())
+            cont = file1.read(queryresponse["pfin"].max()-queryresponse["piniz"].min())
+            contexts.append(cont)
+            context ['piniz'] = queryresponse["piniz"].min()
+            context ['pfin'] = queryresponse["pfin"].max()
+    elif brani != 0:
+        if linkslocal == 0 or linkslocal == 1:
+            return "Nessun brano associato a questo contesto"
+        else:
+            numbranolocal = textlist.loc[index, "numbrano"]
+            query = f"SELECT piniz, pfin FROM linkbase WHERE {ntxlocal} = ntx AND tipo = 2 AND id BETWEEN {numbranolocal-int(brani/2)} AND {numbranolocal+int(brani/2)}"
+            queryresponse = pd.read_sql(query, con)
+            with open(f"{path}/itxt/{sigla}", 'r', encoding="utf-32-le") as file1:
+                file1.seek(4*queryresponse["piniz"].min())
+                cont = file1.read(queryresponse["pfin"].max()-queryresponse["piniz"].min())
+                contexts.append(cont)
+                context ['piniz'] = queryresponse["piniz"].min()
+                context ['pfin'] = queryresponse["pfin"].max() 
+    context['contesto'] = contexts [0]
+    context['formattazione'] = formats
+    return pd.DataFrame(context).T.reset_index(drop=True)
+
+
+#%% funzione di ricerca dei brani associati. Ha in input singlecontexts.
+def findlinks (context, path):
+    con = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
+    linkslocal = context.loc[0, "links"]
+    siglalocal = context.loc[0, "sigla"]
+    ntxlocal = context.loc[0, "ntx"]
+    pitxtlocal = context.loc[0, "pitxt"]
+    pinizlocal = context.loc[0, "piniz"]
+    pfinlocal = context.loc[0, "pfin"]
+    if linkslocal == 0:
+        return context
+    if linkslocal == 1:
+        query = f"SELECT ta.ntx, ta.id, ta.piniz, ta.pfin, tb.mappain, tb.mappafin FROM linkbase AS tb INNER JOIN linknoteass AS ta ON tb.ntx = ta.ntx AND tb.id = ta.id WHERE (((tb.tipo= 1) AND (tb.ntx = {ntxlocal})) AND ((tb.piniz BETWEEN {pinizlocal} AND {pfinlocal}) OR ({pitxtlocal} BETWEEN tb.piniz AND tb.pfin)))"
+        queryresponse = pd.read_sql(query, con)
+        with open(f"{path}/itxt/{siglalocal}", 'r', encoding="utf-32-le") as file1:
+                file1.seek(4*queryresponse["piniz"].min())
+                cont = file1.read(queryresponse["pfin"].max()-queryresponse["piniz"].min()-2)
+        context['nota'] = cont
+        return context
+    if linkslocal == 2:
+        query = f"SELECT ta.ntx, ta.id, ta.piniz, ta.pfin, tb.mappain, tb.mappafin FROM linkbase AS tb INNER JOIN linknoteass AS ta ON tb.ntx = ta.ntx AND tb.id = ta.id WHERE (((tb.tipo= 2) AND (tb.ntx = {ntxlocal})) AND ((tb.piniz BETWEEN {pinizlocal} AND {pfinlocal}) OR ({pitxtlocal} BETWEEN tb.piniz AND tb.pfin)))"
+        queryresponse = pd.read_sql(query, con)
+        with open(f"{path}/itxt/{siglalocal}", 'r', encoding="utf-32-le") as file1:
+                file1.seek(4*queryresponse["piniz"].min())
+                cont = file1.read(queryresponse["pfin"].max()-queryresponse["piniz"].min()-2)
+        context['testo associato'] = cont
+    if linkslocal == 3:
+        query = f"SELECT ta.ntx, ta.id, ta.piniz, ta.pfin, tb.piniz AS iniz, tb.pfin AS fin, tb.mappain, tb.mappafin FROM linkbase AS tb INNER JOIN linknoteass AS ta ON tb.ntx = ta.ntx AND tb.id = ta.id WHERE (((tb.tipo= 1) AND (tb.ntx = {ntxlocal})) AND ((tb.piniz BETWEEN {pinizlocal} AND {pfinlocal}) OR ({pitxtlocal} BETWEEN tb.piniz AND tb.pfin)))"
+        queryresponse = pd.read_sql(query, con)
+        with open(f"{path}/itxt/{siglalocal}", 'r', encoding="utf-32-le") as file1:
+                file1.seek(4*queryresponse["piniz"].min())
+                cont = file1.read(queryresponse["pfin"].max()-queryresponse["piniz"].min()-2)
+        context['nota'] = cont
+        query2 = f"SELECT ta.ntx, ta.id, ta.piniz, ta.pfin, tb.piniz AS iniz, tb.pfin AS fin, tb.mappain, tb.mappafin FROM linkbase AS tb INNER JOIN linknoteass AS ta ON tb.ntx = ta.ntx AND tb.id = ta.id WHERE (((tb.tipo= 2) AND (tb.ntx = {ntxlocal})) AND ((tb.piniz BETWEEN {pinizlocal} AND {pfinlocal}) OR ({pitxtlocal} BETWEEN tb.piniz AND tb.pfin)))"
+        queryresponse2 = pd.read_sql(query2, con)
+        with open(f"{path}/itxt/{siglalocal}", 'r', encoding="utf-32-le") as file2:
+                file2.seek(4*queryresponse2["piniz"].min())
+                cont2 = file2.read(queryresponse2["pfin"].max()-queryresponse2["piniz"].min()-2)
+        context['testo associato'] = cont2
+    return context
+
+#%% Ha in input links, associa i riferimenti bibliografici ad ogni contesto.
+def singlefindbib(contexts, path):
+    infobib = pd.DataFrame()
+    rif_org = pd.DataFrame()
+    for ind, row in contexts.iterrows():
+        con = sqlite3.connect(f"file:{path}/bibliografia/BiblioTLIO.db?mode=ro", uri=True)
+        Query = f"SELECT [Anno iniziale], [Titolo Abbreviato], IQ FROM datibib WHERE Sigla='{row['sigla']}'"
+        bib = pd.read_sql(Query, con)
+        infobib = pd.concat([infobib, bib])
+        con2 = sqlite3.connect(f"file:{path}/test1.db?mode=ro", uri=True)
+        Query2 = f"SELECT head AS Rif_organico, full AS Rif_completo FROM org WHERE (indice='{row['numorg']}' AND ntx='{row['ntx']}')"
+        rif = pd.read_sql(Query2, con2)
+        rif_org = pd.concat([rif_org, rif])
+    anno = list(infobib['Anno iniziale'])
+    titolo = list(infobib['Titolo Abbreviato'])
+    iq = list(infobib['IQ'])
+    rif1 = list(rif_org['Rif_organico'])
+    rif2 = list(rif_org['Rif_completo'])
+    contexts['Anno iniziale'] = anno
+    contexts['Titolo Abbreviato'] = titolo
+    contexts ['IQ'] = iq
+    contexts['Rif_organico'] = rif1
+    contexts['Rig_completo'] = rif2
+    contexts.pag = contexts.pag.astype(int)
+    chrono = contexts.sort_values(by=['Anno iniziale', 'Rif_organico', 'pag'])   
+    if 'nota' in chrono.columns and 'testo associato' in chrono.columns:
+        cols = ['links','Titolo Abbreviato', 'Rif_organico', 'tipostanza', 'stanza', 'verso', 'pag', 'riga', 'IQ', 'lemma', 'cat_gr', 'disambiguatore', 'contesto', 'nota', 'testo associato', 'highlight']
+    elif 'nota' in chrono.columns:
+        cols = ['links','Titolo Abbreviato', 'Rif_organico', 'tipostanza', 'stanza', 'verso', 'pag', 'riga', 'IQ', 'lemma', 'cat_gr', 'disambiguatore', 'contesto', 'nota', 'highlight']
+    elif 'testo associato' in chrono.columns:
+        cols = ['links','Titolo Abbreviato', 'Rif_organico', 'tipostanza', 'stanza', 'verso', 'pag', 'riga', 'IQ', 'lemma', 'cat_gr', 'disambiguatore', 'contesto', 'testo associato', 'highlight']
+    else:
+        cols = ['links','Titolo Abbreviato', 'Rif_organico', 'tipostanza', 'stanza', 'verso', 'pag', 'riga', 'IQ', 'lemma', 'cat_gr', 'disambiguatore', 'contesto', 'highlight']
+    chrono = chrono.drop(chrono.columns[0], axis=1)
+    clean_df = chrono.reindex(columns=cols + list(chrono.columns.difference(cols)))
+    return clean_df
+
+# %% funzione contesti singoli
+
+def contestosingolo (contestimultipli, indice, parole, periodi, brani, listOcc, path):
+     contestosingolo = singlecontexts(contestimultipli, indice, parole, periodi, brani, listOcc, path)
+     braniassociati = findlinks(contestosingolo, path)
+     contestosingoloclean = singlefindbib (braniassociati, path)
+     return contestosingoloclean
+
+# %%
+path = "/Users/leonardocanova/Library/CloudStorage/OneDrive-ConsiglioNazionaledelleRicerche/TIGRO/Ricerche/db/first_db"
+listOcc = get_tables_occ(path)
+entry = "amistade"
+tiporicerca= 0
+# tipo di ricerca (0 per forme, 1 per lemmi, 2 per lemmi con opzione "mostra occorrenze non lemmatizzate")
+indice = 28
+numeroparole = 30
+numeroperiodi = 0
+numerobrani = 0 
+ricerca=ricercaforme(interpreter(entry), path, 0, 0)
+contesti = contestimultipli(tiporicerca, ricerca, listOcc, path)
+contesto = contestosingolo(contesti, indice, numeroparole, numeroperiodi, numerobrani, listOcc, path)
+dtale.show(contesto)
+# %%

+ 452 - 0
test_suite/test/test_contesti_singoli_notebook.ipynb

@@ -0,0 +1,452 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sqlite3\n",
+    "import pandas as pd\n",
+    "import dtale\n",
+    "import unicodedata\n",
+    "from simple_query_test_pandas import ricercaforme\n",
+    "from simple_query_test_pandas import ricercalemmi\n",
+    "from simple_query_test_pandas import ricercaformelemmi \n",
+    "from simple_query_test_pandas import ricercalemmiforme\n",
+    "from simple_query_test_pandas import inizialeraddoppiata\n",
+    "from simple_query_test_pandas import interpreter\n",
+    "from test_occorrenzario_pandas import findtexts\n",
+    "from test_occorrenzario_pandas import findcontexts\n",
+    "from test_occorrenzario_pandas import findbib\n",
+    "from test_cooccorrenze import ricerca_cooccorrenze\n",
+    "from test_contesti_singoli import findbib, findcontexts, findlinks, findtexts, singlecontexts, singlefindbib\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<h1>Funzionamento della funzione ricerca di contesti singoli</h1>\n",
+    "\n",
+    "- visualizzazione dei contesti singoli a partire dalla lista dei contesti multipli\n",
+    "\n",
+    "- visualizzazione delle note e dei testi associati\n",
+    "\n",
+    "- possibilità di personalizzazione dell'ampiezza dei contesti singoli per numero di parole, periodi e brani associati\n",
+    "\n",
+    "- aggiornamento automatico dell'ampiezza dei brani associati, quando si raffina il contesto per parole e periodi"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<h2>Ricerca di contesti singoli</h2>"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- ricerca per forme di: filius (primo contesto utile)\n"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "![](img/cs1filius.png)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "parola = \"filius\"\n",
+    "path = \"/Users/federicaspinelli/TEAMOVI/Ricerche/db/first_db\"\n",
+    "# tipo di ricerca (0 per forme, 1 per lemmi, 2 per lemmi con opzione \"mostra occorrenze non lemmatizzate\")\n",
+    "type = 0\n",
+    "listOcc = [\"occ00001\", \"occ00002\", \"occ00003\"]\n",
+    "search=ricercaforme(interpreter(parola), path, 0, 0)\n",
+    "textlist=findtexts(type, search, listOcc, path)\n",
+    "contexts = findcontexts(textlist, 30, 0, listOcc, path)\n",
+    "# funzione di ricerca dei contesti singoli, con personalizzazione dell'ampiezza per parole o periodi\n",
+    "# (textlist, index, parole, periodi, brani, listOcc, path)\n",
+    "context = singlecontexts(contexts, 0, 30, 0, 0, listOcc, path)\n",
+    "links = findlinks(context, path)\n",
+    "bibliocontext = singlefindbib(links, path)\n",
+    "dtale.show(bibliocontext)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "ricerca per forme di: tarda (primo contesto utile)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "![](img/cs2tarda.png)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "parola = \"tarda\"\n",
+    "path = \"/Users/federicaspinelli/TEAMOVI/Ricerche/db/first_db\"\n",
+    "# tipo di ricerca (0 per forme, 1 per lemmi, 2 per lemmi con opzione \"mostra occorrenze non lemmatizzate\")\n",
+    "type= 0\n",
+    "listOcc = [\"occ00001\", \"occ00002\", \"occ00003\"]\n",
+    "search=ricercaforme(interpreter(parola), path, 0, 0)\n",
+    "textlist=findtexts(type, search, listOcc, path)\n",
+    "contexts = findcontexts(textlist, 30, 0, listOcc, path)\n",
+    "# (textlist, index, parole, periodi, brani, listOcc, path)\n",
+    "context = singlecontexts(contexts, 0, 30, 0, 0, listOcc, path)\n",
+    "links = findlinks(context, path)\n",
+    "bibliocontext = singlefindbib(links, path)\n",
+    "dtale.show(bibliocontext)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "ricerca per forme di: amistade (contesto n. 29)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "![](img/cs3amistade.png)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "parola = \"amistade\"\n",
+    "path = \"/Users/federicaspinelli/TEAMOVI/Ricerche/db/first_db\"\n",
+    "# tipo di ricerca (0 per forme, 1 per lemmi, 2 per lemmi con opzione \"mostra occorrenze non lemmatizzate\")\n",
+    "type= 0\n",
+    "listOcc = [\"occ00001\", \"occ00002\", \"occ00003\"]\n",
+    "search=ricercaforme(interpreter(parola), path, 0, 0)\n",
+    "textlist=findtexts(type, search, listOcc, path)\n",
+    "contexts = findcontexts(textlist, 30, 0, listOcc, path)\n",
+    "# (textlist, index, parole, periodi, brani, listOcc, path)\n",
+    "context = singlecontexts(contexts, 28, 30, 0, 0, listOcc, path)\n",
+    "links = findlinks(context, path)\n",
+    "bibliocontext = singlefindbib(links, path)\n",
+    "dtale.show(bibliocontext)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "ricerca per forme di: orgoglia"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "![](img/cs4orgoglia.png)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "parola = \"orgoglia\"\n",
+    "path = \"/Users/federicaspinelli/TEAMOVI/Ricerche/db/first_db\"\n",
+    "# tipo di ricerca (0 per forme, 1 per lemmi, 2 per lemmi con opzione \"mostra occorrenze non lemmatizzate\")\n",
+    "type= 0\n",
+    "listOcc = [\"occ00001\", \"occ00002\", \"occ00003\"]\n",
+    "search=ricercaforme(interpreter(parola), path, 0, 0)\n",
+    "textlist=findtexts(type, search, listOcc, path)\n",
+    "contexts = findcontexts(textlist, 30, 0, listOcc, path)\n",
+    "# (textlist, index, parole, periodi, brani, listOcc, path)\n",
+    "context = singlecontexts(contexts, 0, 30, 0, 0, listOcc, path)\n",
+    "links = findlinks(context, path)\n",
+    "bibliocontext = singlefindbib(links, path)\n",
+    "dtale.show(bibliocontext)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "ricerca per forme di: intradetta"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "![](img/cs5intradetta.png)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "parola = \"intradetta\"\n",
+    "path = \"/Users/federicaspinelli/TEAMOVI/Ricerche/db/first_db\"\n",
+    "# tipo di ricerca (0 per forme, 1 per lemmi, 2 per lemmi con opzione \"mostra occorrenze non lemmatizzate\")\n",
+    "type= 0\n",
+    "listOcc = [\"occ00001\", \"occ00002\", \"occ00003\"]\n",
+    "search=ricercaforme(interpreter(parola), path, 0, 0)\n",
+    "textlist=findtexts(type, search, listOcc, path)\n",
+    "contexts = findcontexts(textlist, 30, 0, listOcc, path)\n",
+    "# (textlist, index, parole, periodi, brani, listOcc, path)\n",
+    "context = singlecontexts(contexts, 0, 30, 0, 0, listOcc, path)\n",
+    "links = findlinks(context, path)\n",
+    "bibliocontext = singlefindbib(links, path)\n",
+    "dtale.show(bibliocontext)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "a contesto singolo (visualizzazione di 1 periodo)\n",
+    "![](img/cs5intradetta1.png)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "parola = \"intradetta\"\n",
+    "path = \"/Users/federicaspinelli/TEAMOVI/Ricerche/db/first_db\"\n",
+    "# tipo di ricerca (0 per forme, 1 per lemmi, 2 per lemmi con opzione \"mostra occorrenze non lemmatizzate\")\n",
+    "type= 0\n",
+    "listOcc = [\"occ00001\", \"occ00002\", \"occ00003\"]\n",
+    "search=ricercaforme(interpreter(parola), path, 0, 0)\n",
+    "textlist=findtexts(type, search, listOcc, path)\n",
+    "contexts = findcontexts(textlist, 30, 0, listOcc, path)\n",
+    "# visualizzazione di 1 periodo\n",
+    "# (textlist, index, parole, periodi, brani, listOcc, path)\n",
+    "context = singlecontexts(contexts, 0, 0, 1, 0, listOcc, path)\n",
+    "links = findlinks(context, path)\n",
+    "bibliocontext = singlefindbib(links, path)\n",
+    "dtale.show(bibliocontext)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "ricerca per forme di: pennace"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "![](img/cs6pennace.png)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "parola = \"pennace\"\n",
+    "path = \"/Users/federicaspinelli/TEAMOVI/Ricerche/db/first_db\"\n",
+    "# tipo di ricerca (0 per forme, 1 per lemmi, 2 per lemmi con opzione \"mostra occorrenze non lemmatizzate\")\n",
+    "type= 0\n",
+    "listOcc = [\"occ00001\", \"occ00002\", \"occ00003\"]\n",
+    "search=ricercaforme(interpreter(parola), path, 0, 0)\n",
+    "textlist=findtexts(type, search, listOcc, path)\n",
+    "contexts = findcontexts(textlist, 30, 0, listOcc, path)\n",
+    "# (textlist, index, parole, periodi, brani, listOcc, path)\n",
+    "context = singlecontexts(contexts, 0, 30, 0, 0, listOcc, path)\n",
+    "links = findlinks(context, path)\n",
+    "bibliocontext = singlefindbib(links, path)\n",
+    "dtale.show(bibliocontext)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "ricerca per forme di: invitava (interessa solo il primo contesto di Buccio di Ranallo)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "![](img/cs7invitava.png)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "![](img/cs7invitava1.png)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Attenzione: riferimenti organici corretti nella prima immagine; controllare inoltre se c’è un doppio spazio o meno prima di “Quando…” (nel filgat c’è, e andrebbe bene con lo spazio - anche se lo spazio non ci dovrebbe essere)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "parola = \"invitava\"\n",
+    "path = \"/Users/federicaspinelli/TEAMOVI/Ricerche/db/first_db\"\n",
+    "# tipo di ricerca (0 per forme, 1 per lemmi, 2 per lemmi con opzione \"mostra occorrenze non lemmatizzate\")\n",
+    "type= 0\n",
+    "listOcc = [\"occ00001\", \"occ00002\", \"occ00003\"]\n",
+    "search=ricercaforme(interpreter(parola), path, 0, 0)\n",
+    "textlist=findtexts(type, search, listOcc, path)\n",
+    "contexts = findcontexts(textlist, 30, 0, listOcc, path)\n",
+    "# (textlist, index, parole, periodi, brani, listOcc, path)\n",
+    "context = singlecontexts(contexts, 0, 30, 0, 0, listOcc, path)\n",
+    "links = findlinks(context, path)\n",
+    "bibliocontext = singlefindbib(links, path)\n",
+    "dtale.show(bibliocontext)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<h2>Esempi di ricerca di contesti con testi associati (note e trad)</h2>"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<h3>ricerca per lemmi di: “altresì”</h3>"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "![](img/ctaaltresi.png)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "parola = \"altres*\"\n",
+    "path = \"/Users/federicaspinelli/TEAMOVI/Ricerche/db/first_db\"\n",
+    "# tipo di ricerca (0 per forme, 1 per lemmi, 2 per lemmi con opzione \"mostra occorrenze non lemmatizzate\")\n",
+    "type = 0\n",
+    "listOcc = [\"occ00001\", \"occ00002\", \"occ00003\"]\n",
+    "search=ricercaforme(interpreter(parola), path, 0, 0)\n",
+    "textlist=findtexts(type, search, listOcc, path)\n",
+    "contexts = findcontexts(textlist, 30, 0, listOcc, path)\n",
+    "# (textlist, index, parole, periodi, brani, listOcc, path)\n",
+    "context = singlecontexts(contexts, 2, 30, 0, 0, listOcc, path)\n",
+    "links = findlinks(context, path)\n",
+    "bibliocontext = singlefindbib(links, path)\n",
+    "dtale.show(bibliocontext)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Visualizzazione del secondo brano secondo le stesse modalità. \n",
+    "\n",
+    "Attenzione: in questo caso al contesto sono associati un file note e un file trad e devono essere visualizzati entrambi."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "![](img/cta1altresi1.png)\n",
+    "\n",
+    "![](img/cta1altresi2.png)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.0"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

Some files were not shown because too many files changed in this diff