Explorar o código

Formatting + highlighting

kora hai 1 ano
pai
achega
12841b36d1

+ 2 - 2
.gitignore

@@ -1,7 +1,7 @@
 .DS_Store
 TIgrO
 */.DS_Store
-.vscode/*
+**/.vscode/*
 Progetto2023_BE.log
 .idea/*
-**/__pycache__/
+**/__pycache__/

+ 1 - 15
flask_be/engine/basic_queries.py

@@ -97,7 +97,7 @@ class basicQueries:
         textlist['piniz'] = minChar_list
         textlist['pifin'] = maxChar_list
         textlist['contesto'] = contexts
-        textlist['format contesto'] = formats
+        textlist['formattazione contesto'] = formats
         return textlist.reset_index(drop=True)
 
     # %% Ha in input findcontexts, associa i riferimenti bibliografici ad ogni contesto dal db BiblioTLIO.
@@ -146,18 +146,4 @@ class basicQueries:
         chrono = contexts.sort_values(by=['Data codificata', 'Rif_organico'])   
         return (chrono.reset_index(drop=True))
     
-    #%% funzione indici da evidenziare nel testo
-    def highlight (self, bibliocontexts):
-        index = 0
-        for col in bibliocontexts.columns:
-            if col.startswith('pitxt'):
-                if index == 0:
-                    bibliocontexts['highlight'] = bibliocontexts.apply (lambda row: [int(row['pitxt'] - row['piniz']), int(row['elemlen'])], axis=1)
-                else:
-                    bibliocontexts['highlight'+str(index)] = bibliocontexts.apply (lambda row: [int(row['pitxt_'+str(index)] - row['piniz']), int(row['elemlen_'+str(index)])], axis=1)
-                index += 1
-        highlight_cols = bibliocontexts.filter(regex='^highlight')
-        create_array = lambda row: highlight_cols.loc[row.name].values.tolist()
-        bibliocontexts['highlights_combined'] = list(highlight_cols.apply(create_array, axis=1))
-        return bibliocontexts
     

+ 9 - 7
flask_be/engine/contexts.py

@@ -2,6 +2,7 @@ import json
 import pandas as pd
 
 from .basic_queries import basicQueries
+from .format import highlightContext
 
 
 # Executes query sequences to recover single and multiple contexts
@@ -18,7 +19,8 @@ class contexts(basicQueries):
         textlist = self.findtexts(tipo_ricerca, ricercadf, index)
         contexts = self.findcontexts (textlist)
         bibliocontexts = self.findbib (contexts)
-        highlights = self.highlight(bibliocontexts)
+        highlights = highlightContext
+        (bibliocontexts)
         return highlights.to_dict(orient='records')
 
     #%% funzione contesti singoli cumulativa
@@ -30,7 +32,7 @@ class contexts(basicQueries):
         contestosingolo = self.singlecontexts(contestimultiplidf, indice, parole, periodi, brani)
         braniassociati = self.findlinks(contestosingolo)
         contestosingoloclean = self.findbib (braniassociati)
-        contestosingoloclean = self.highlight(contestosingoloclean)
+        contestosingoloclean = highlightContext(contestosingoloclean)
         return contestosingoloclean.to_dict(orient='records')
         
     #%% funzione reperimento e raffinamento contesti singoli
@@ -79,7 +81,7 @@ class contexts(basicQueries):
                 context ['piniz'] = queryresponse["piniz"].min()
                 context ['pfin'] = queryresponse["pfin"].max() 
         context['contesto'] = contexts[0]
-        context['format contesto'] = formats[0]
+        context['formattazione contesto'] = formats[0]
         return pd.DataFrame(context).T.reset_index(drop=True)
     
     #%% funzione reperimento note e brani associati
@@ -98,7 +100,7 @@ class contexts(basicQueries):
             fileQueryData = {'sigla': siglalocal, 'minChar': queryresponse["piniz"].min(), 'maxChar': queryresponse["pfin"].max()}
             cont, form = self.queryHandler.textQuery(fileQueryData, True)
             context['nota'] = cont
-            context['format nota'] = json.dumps(form)
+            context['formattazione nota'] = json.dumps(form)
             return context
         if linkslocal == 2:
             queryData = {'queryType': 'links', 'querySubtype': 'testo_associato', 'ntxlocal': ntxlocal, 'pinizlocal': pinizlocal, 'pitxtlocal': pitxtlocal, 'pfinlocal': pfinlocal}
@@ -106,18 +108,18 @@ class contexts(basicQueries):
             fileQueryData = {'sigla': siglalocal, 'minChar': queryresponse["piniz"].min(), 'maxChar': queryresponse["pfin"].max()}
             cont, form = self.queryHandler.textQuery(fileQueryData, True)
             context['testo associato'] = cont
-            context['format testo associato'] = json.dumps(form)
+            context['formattazione testo associato'] = json.dumps(form)
         if linkslocal == 3:
             queryData = {'queryType': 'links', 'querySubtype': 'nota', 'ntxlocal': ntxlocal, 'pinizlocal': pinizlocal, 'pitxtlocal': pitxtlocal, 'pfinlocal': pfinlocal}
             queryresponse = self.queryHandler.query(queryData, pandas=True)
             fileQueryData = {'sigla': siglalocal, 'minChar': queryresponse["piniz"].min(), 'maxChar': queryresponse["pfin"].max()}
             cont, form = self.queryHandler.textQuery(fileQueryData, True)
             context['nota'] = cont
-            context['format nota'] = json.dumps(form)
+            context['formattazione nota'] = json.dumps(form)
             queryData2 = {'queryType': 'links', 'querySubtype': 'testo_associato', 'ntxlocal': ntxlocal, 'pinizlocal': pinizlocal, 'pitxtlocal': pitxtlocal, 'pfinlocal': pfinlocal}
             queryresponse2 = self.queryHandler.query(queryData2, pandas=True)
             fileQueryData2 = {'sigla': siglalocal, 'minChar': queryresponse2["piniz"].min(), 'maxChar': queryresponse2["pfin"].max()}
             cont2, form2 = self.queryHandler.textQuery(fileQueryData2, True)
             context['testo associato'] = cont2
-            context['format testo associato'] = json.dumps(form2)
+            context['formattazione testo associato'] = json.dumps(form2)
         return context

+ 8 - 7
flask_be/engine/cooccorrenze.py

@@ -2,6 +2,7 @@
 import pandas as pd
 
 from .basic_queries import basicQueries
+from .format import highlightContext
 
 
 # Executes query sequences to recover contexts with co-occurrences according to user input
@@ -47,12 +48,12 @@ class cooccorrenze(basicQueries):
             cod += 1
             listatesti = listatesti[cond1 & cond2]
 
-            if listatesti.empty:
-                return []
-            else:
-                contexts = self.findcontexts(listatesti)
-                bibliocontexts = self.findbib(contexts)
-                clean = bibliocontexts.drop_duplicates(subset="contesto")
-                highlights = self.highlight(clean)
+        if listatesti.empty:
+            return []
+        else:
+            contexts = self.findcontexts(listatesti)
+            bibliocontexts = self.findbib(contexts)
+            clean = bibliocontexts.drop_duplicates(subset="contesto")
+            highlights = highlightContext(clean)
 
         return highlights.to_dict(orient='records')

+ 63 - 0
flask_be/engine/format.py

@@ -0,0 +1,63 @@
+import json
+import pandas as pd
+
+def formatContext(context, formatCodesJson):
+    # Get formatting code CHANGES + first code
+    formatCodes = json.loads(formatCodesJson)
+    formatChanges = [(0, formatCodes[0])] + [(ind, formatCodes[ind]) for ind in range(1, len(formatCodes)) if formatCodes[ind]!=formatCodes[ind-1]]
+    formattedContext = []
+    for ind, form in enumerate(formatChanges):
+        format0 = []
+        if form[1]>0:
+            format0 = bitToFormat(getBits(form[1]))
+        start = form[0]
+        end = formatChanges[ind+1][0] if ind < len(formatChanges)-1 else -1
+        formattedContext.append( {'formatting': format0, 'stringPart': context[start:end]} )
+    return json.dumps(formattedContext)
+
+#%% funzione indici da evidenziare nel testo
+def highlightContext(bibliocontexts: pd.DataFrame):
+    index = 0
+    for col in bibliocontexts.columns:
+        if col.startswith('pitxt'):
+            if index == 0:
+                bibliocontexts['formattazione contesto'] = bibliocontexts.apply (lambda row: addHighlightToFormatting(row['formattazione contesto'], int(row['pitxt'] - row['piniz']), int(row['elemlen'])), axis=1)
+            else:
+                bibliocontexts['formattazione contesto'] = bibliocontexts.apply (lambda row: addHighlightToFormatting(row['formattazione contesto'], int(row['pitxt_'+str(index)] - row['piniz']), int(row['elemlen_'+str(index)])), axis=1)
+            index += 1
+    
+    bibliocontexts['contesto formattato'] = bibliocontexts.apply (lambda row: formatContext(row['contesto'], row['formattazione contesto']), axis=1)
+
+    return bibliocontexts
+
+
+# Utility: modify a single format codes string to add highlighting
+def addHighlightToFormatting(formatCodesJson, highlightStart, highlightLength):
+    formatCodes = json.loads(formatCodesJson)
+    for index in range(highlightLength):
+        try:
+            formatCodes[highlightStart + index] += 16
+        except IndexError:
+            pass
+    return json.dumps(formatCodes)
+
+
+# Formatting helper functions
+def getBits(num):
+    numOfBits = 5
+    aa = bin(num) # Bin converts the input to a bit string (with prefix \b)
+    return aa[2:].rjust(numOfBits, "0")
+
+def bitToFormat(bitString):
+    format0 = []
+    if bitString[-1]=="1":
+        format0.append('grassetto')
+    if bitString[-2]=="1":
+        format0.append('corsivo')
+    if bitString[-3]=="1":
+        format0.append('sottolineato')
+    if bitString[-4]=="1":
+        format0.append('barrato')
+    if bitString[-5]=="1":
+        format0.append('evidenziato')
+    return format0

+ 2 - 23
flask_be/interface_sqlite3/query_handlers.py

@@ -84,19 +84,9 @@ class queryHandlerBasicSqlite:
     def getTextFormatting(self, sigla, minChar, maxChar):
         with open(f"{self.dbPath}/ftxt/{sigla}", 'rb') as file1:
             file1.seek(minChar-1)
-            preFormats = [char for char in file1.read(maxChar-minChar)]
+            formatCodes = [char for char in file1.read(maxChar-minChar)]
 
-        # Get format CHANGES + first format; only return non-zero format sequences
-        formatChanges = [(0, preFormats[0])] + [(ind, preFormats[ind]) for ind in range(1, len(preFormats)) if preFormats[ind]!=preFormats[ind-1]]
-        formats = []
-        for ind, form in enumerate(formatChanges):
-            if form[1]>0:
-                format0 = bitToFormat(fourBits(form[1]))
-                start = form[0]
-                end = formatChanges[ind+1][0] if ind < len(formatChanges)-1 else -1
-                formats.append( {'format': format0, 'coordinates': [start, end]} )
-
-        return formats
+        return formatCodes
 
     
     def encodeQuery(self, queryData):
@@ -128,8 +118,6 @@ class queryHandlerBasicSqlite:
         return df
 
 
-
-
 # Utilities
 
 # Dict factory non-Pandas queries
@@ -144,12 +132,3 @@ def isColumnToDecode(col):
     if col in columns or col.startswith('highlight'):
         return True
     return False
-
-
-# Formatting helper functions
-def fourBits(num):
-    aa = bin(num) # Bin converts the input to a bit string (with prefix \b)
-    return aa[2:].rjust(4, "0")
-
-def bitToFormat(str4):
-    return {'grassetto': str4[-1], 'corsivo': str4[-2], 'sottolineato': str4[-3], 'barrato': str4[-4]}

+ 23 - 0
site2/css/format.css

@@ -0,0 +1,23 @@
+/* Da aggiungere all'altro CSS, direi */
+
+.italic {
+    font-style: italic;
+}
+
+.bold {
+    font-weight: bold;
+}
+
+.underline {
+    text-decoration-line: underline;
+}
+
+.linethrough {
+    text-decoration-line: line-through;
+}
+
+/* Ugly workaround */
+.underlinethrough {
+    text-decoration-line: underline line-through;
+}
+

+ 48 - 0
site2/js/format.js

@@ -0,0 +1,48 @@
+export function getFormattedContext(formattedContext){
+
+    let formatArray = JSON.parse(formattedContext);
+
+    // Correct formatting style where needed
+    formatArray.forEach(
+        formatObj => {
+
+            formatObj.cssStyles = [];
+            let formatting = formatObj.formatting;
+
+            if (formatting.includes('grassetto')){
+                formatObj.cssStyles.push('bold');
+            }
+            if (formatting.includes('corsivo')) {
+                formatObj.cssStyles.push("italic");
+            }
+
+            // Handle 'barrato' and 'sottolineato' together!
+            if (formatting.includes('sottolineato') && formatting.includes('barrato')) {
+                formatObj.cssStyles.push("underlinethrough");
+            }
+            else if (formatting.includes('sottolineato')) {
+                formatObj.cssStyles.push("underline");
+            }
+            else if (formatting.includes('barrato')) {
+                formatObj.cssStyles.push("linethrough");
+            }
+
+            if (formatting.includes('evidenziato')) {
+                formatObj.cssStyles.push("highlight")
+            }
+            
+        }
+    );
+    
+    return combineStringPartsWithStyles(formatArray);
+}
+
+export function combineStringPartsWithStyles(stringPartArray){
+    let formattedStringParts = stringPartArray.map(stringPartWithstyle => {
+        let classString = 'class="' + stringPartWithstyle.cssStyles.join(" ") + '"';
+        return '<span ' + classString + '>' + stringPartWithstyle.stringPart + '</span>'
+    })
+
+    let outString = formattedStringParts.join('');
+    return outString.replaceAll('\n', '<br>');
+}

+ 25 - 31
test_suite/tests_kora_misc/format_FE_modules/format.js

@@ -1,54 +1,48 @@
-export function splitStringByStyle(strIn, formatData){
+export function getFormattedContext(formattedContext){
 
-    let formatArray = JSON.parse(formatData);
-    let strOutArray = [];
-    let strIndex = 0;
-    
+    let formatArray = JSON.parse(formattedContext);
+
+    // Correct formatting style where needed
     formatArray.forEach(
         formatObj => {
 
-            let start = formatObj.coordinates[0];
-            let end = formatObj.coordinates[1];
-            let format = formatObj.format;
-
-            if(start > strIndex){
-                strOutArray.push( {stringPart: strIn.substring(strIndex, start), styles: []} );
-            }
-
-            strOutArray.push( {stringPart: strIn.substring(start, end), styles: []} )
+            formatObj.cssStyles = [];
+            let formatting = formatObj.formatting;
 
-            if (format.grassetto=="1"){
-                strOutArray[strOutArray.length-1].styles.push("bold");
+            if (formatting.includes('grassetto')){
+                formatObj.cssStyles.push('bold');
             }
-            if (format.corsivo=="1") {
-                strOutArray[strOutArray.length-1].styles.push("italic");
+            if (formatting.includes('corsivo')) {
+                formatObj.cssStyles.push("italic");
             }
 
             // Handle 'barrato' and 'sottolineato' together!
-            if (format.barrato=="1" && format.sottolineato=="0") {
-                strOutArray[strOutArray.length-1].styles.push("linethrough");
+            if (formatting.includes('sottolineato') && formatting.includes('barrato')) {
+                formatObj.cssStyles.push("underlinethrough");
+            }
+            else if (formatting.includes('sottolineato')) {
+                formatObj.cssStyles.push("underline");
             }
-            else if (format.barrato=="0" && format.sottolineato=="1") {
-                strOutArray[strOutArray.length-1].styles.push("underline");
+            else if (formatting.includes('barrato')) {
+                formatObj.cssStyles.push("linethrough");
             }
-            else if (format.barrato=="1" && format.sottolineato=="1") {
-                strOutArray[strOutArray.length-1].styles.push("underlinethrough");
+
+            if (formatting.includes('evidenziato')) {
+                formatObj.cssStyles.push("highlight")
             }
             
-            strIndex = end;
         }
     );
     
-    if(strIndex<strIn.length){
-        strOutArray.push( { stringPart: strIn.substring(strIndex, strIn.length), styles: []} );
-    }
-    return strOutArray;
+    return combineStringPartsWithStyles(formatArray);
 }
 
 export function combineStringPartsWithStyles(stringPartArray){
     let formattedStringParts = stringPartArray.map(stringPartWithstyle => {
-        let classString = 'class="' + stringPartWithstyle.styles.join(" ") + '"';
+        let classString = 'class="' + stringPartWithstyle.cssStyles.join(" ") + '"';
         return '<span ' + classString + '>' + stringPartWithstyle.stringPart + '</span>'
     })
-    return formattedStringParts.join('');
+
+    let outString = formattedStringParts.join('');
+    return outString.replaceAll('\n', '<br>');
 }

+ 0 - 0
test_suite/tests_kora_misc/format_FE_modules/provàci.css → test_suite/tests_kora_misc/format_FE_modules/provaci.css


+ 14 - 0
test_suite/tests_kora_misc/format_FE_modules/provaci.html

@@ -0,0 +1,14 @@
+<!DOCTYPE html>
+<html>
+
+<head>
+<link rel="stylesheet" href="provaci.css">
+</head>
+
+<body>
+    <p id="test"><p>
+
+    <script type="module" src="provaci.js"></script>
+
+</body>
+</html>

+ 14 - 0
test_suite/tests_kora_misc/format_FE_modules/provaci.js

@@ -0,0 +1,14 @@
+import { getFormattedContext } from "./format.js"
+
+
+let testFormatDTO = `[{"formatting": [], "stringPart": "rispuose e disse: - De[h] cagiu' ti foss'ello! -\\n"}, {"formatting": ["corsivo"], "stringPart": "Di messere Beriuolo, cavaliere di "}, {"formatting": ["corsivo", "evidenziato"], "stringPart": "corte"}, {"formatting": [], "stringPart": ".\\n[1] Uno "}, {"formatting": ["evidenziato"], "stringPart": "cavaliere"}, {"formatting": [], "stringPart": " di corte ch'"}, {"formatting": ["evidenziato"], "stringPart": "ebbe"}, {"formatting": [], "stringPart": " nome messere Beriuolo\\nera in Genova. [2] Venne a rampogne con"}]`;
+
+
+let jsonVai = JSON.parse(testFormatDTO);
+console.log(jsonVai);
+
+let vai = getFormattedContext(testFormatDTO);
+console.log(vai);
+
+let el = document.getElementById("test");
+el.innerHTML = vai;

+ 0 - 14
test_suite/tests_kora_misc/format_FE_modules/provàci.html

@@ -1,14 +0,0 @@
-<!DOCTYPE html>
-<html>
-
-<head>
-<link rel="stylesheet" href="provàci.css">
-</head>
-
-<body>
-    <p id="test"><p>
-
-    <script type="module" src="provàci.js"></script>
-
-</body>
-</html>

+ 0 - 19
test_suite/tests_kora_misc/format_FE_modules/provàci.js

@@ -1,19 +0,0 @@
-import { splitStringByStyle, combineStringPartsWithStyles } from "./format.js"
-
-
-let testFormatDTO = '[\
-        {"format": {"grassetto": "0", "corsivo": "1", "sottolineato": "0", "barrato": "0"}, "coordinates": [0, 5]},\
-        {"format": {"grassetto": "1", "corsivo": "1", "sottolineato": "0", "barrato": "0"}, "coordinates": [14, 17]},\
-        {"format": {"grassetto": "0", "corsivo": "0", "sottolineato": "1", "barrato": "1"}, "coordinates": [21, 28]}\
-    ]';
-let testString = "Pippo è stato qui in vecanza";
-
-console.log('Lunghezza string test:', testString.length);
-
-let vai = splitStringByStyle(testString, testFormatDTO);
-console.log(vai);
-let su = combineStringPartsWithStyles(vai);
-console.log(su);
-
-let el = document.getElementById("test");
-el.innerHTML = su;