1 年之前 · 9452776b83
--- a/FORMS/app.py
+++ b/FORMS/app.py
@@ -9,10 +9,14 @@ app = Flask(__name__)
 
				 def main():
			
 
				 
			
 
				     configurationFolder = 'parsers/configuration_files/'
			
 
				-    configurationFileName = 'descrizione_minimal.json'   
			
 
				+    configurationFileName = 'configuration.json'   
			
 
				     confFilePath = configurationFolder + configurationFileName
			
 
				 
			
 
				-    formFields = getFormFields(confFilePath)
			
 
				+    try:
			
 
				+        formFields = getFormFields(confFilePath)
			
 
				+    except:
			
 
				+        return redirect('/error/')
			
 
				+
			
 
				 
			
 
				     if request.method == 'POST':
			
 
				 
			
@@ -26,19 +30,18 @@ def main():
 
				             outFilePath = outputFolder+outFileName
			
 
				             inFile = fileFromRequest.read()
			
 
				             # try to create list of dictionaries keyed by header row
			
 
				-            parsefromfile(confFilePath, inFile, outFilePath)
			
 
				+            parsefromfile(confFilePath, formFields, inFile, outFilePath)
			
 
				 
			
 
				         else:
			
 
				             # Da completare
			
 
				             data =  {}
			
 
				-            data['AUTN'] = request.form['AUTN']
			
 
				-            data['AUTL'] = request.form['AUTL']
			
 
				-            data['AUTD'] = request.form['AUTD']
			
 
				+            for field in formFields:
			
 
				+                data[field] = request.form[field]
			
 
				 
			
 
				             check = [val for val in data.values() if val!='']
			
 
				 
			
 
				             if len(check)>0:
			
 
				-                parse(confFilePath, [data], outputFolder + 'form_output.ttl')
			
 
				+                parse(confFilePath, formFields,[data], outputFolder + 'form_output.ttl')
			
 
				             else:
			
 
				                 return redirect('/error/')
			
 
				 
			
--- a/FORMS/parsers/CSV_to_RDF_generico.py
+++ b/FORMS/parsers/CSV_to_RDF_generico.py
@@ -2,6 +2,7 @@
 
				 
			
 
				 # Utilities to read/write csv files
			
 
				 import csv, json
			
 
				+from operator import truediv
			
 
				 
			
 
				 
			
 
				 # Custom class to store URIs + related infos for the ontologies/repositories
			
@@ -61,86 +62,99 @@ def writeTTLHeader(output):
 
				 
			
 
				 max_entries = None
			
 
				 
			
 
				-def parsefromfile(mapfilename, infile, outfilename):
			
 
				+def parsefromfile(mapfilename, formFields, infile, outfilename):
			
 
				     inputFile = infile.decode()
			
 
				     csv_dicts = [{k: v for k, v in row.items()} for row in csv.DictReader(inputFile.splitlines(), skipinitialspace=True)]
			
 
				-    parse(mapfilename, csv_dicts, outfilename)
			
 
				+    parse(mapfilename, formFields, csv_dicts, outfilename)
			
 
				 
			
 
				 
			
 
				-def parse(mapfilename, csv_dicts, outfilename):
			
 
				+def parse(mapfilename, formFields, csv_dicts, outfilename):
			
 
				 
			
 
				     with open (mapfilename) as mapfile:
			
 
				-        json_dicts = json.load(mapfile)
			
 
				+        triple_blocks = json.load(mapfile)
			
 
				+    
			
 
				+    allRefs = getRefs(triple_blocks)
			
 
				+    doReplace = True
			
 
				+    while doReplace:
			
 
				+        doReplace = False
			
 
				+        for ref in allRefs['subjects_with_refs']:
			
 
				+            oldVal = ref['value']
			
 
				+            newVal = replaceRefs(allRefs, oldVal)
			
 
				+            if(oldVal != newVal):
			
 
				+                ref['value'] = newVal
			
 
				+                doReplace = True
			
 
				+        for ref in allRefs['objects_with_refs']:
			
 
				+            oldVal = ref['value']
			
 
				+            newVal = replaceRefs(allRefs, oldVal)
			
 
				+            if(oldVal != newVal):
			
 
				+                ref['value'] = newVal
			
 
				+                doReplace = True
			
 
				+
			
 
				 
			
 
				     with open(outfilename, 'w') as outputfile:
			
 
				 
			
 
				         writeTTLHeader(outputfile)
			
 
				 
			
 
				-        for ii, csvrow in enumerate(csv_dicts):
			
 
				+        for csvrow in csv_dicts:
			
 
				+            for entry in triple_blocks:
			
 
				 
			
 
				-            # The index ii is mainly used to limit the number of entries to process, for testing purposes
			
 
				-            for node in json_dicts:
			
 
				+                subject = entry['subject']['value'] if type(entry['subject']) is dict else entry['subject']
			
 
				+                subject = replaceRefs(allRefs, subject)
			
 
				+                subject = replace_csv_values(formFields, csvrow, subject)
			
 
				+                for content in entry['content']:
			
 
				+                    attribute = content['predicate']
			
 
				+                    object1 = content['object']['value'] if type(content['object']) is dict else content['object']
			
 
				+                    object1 = replaceRefs(allRefs, object1)
			
 
				+                    object1 = replace_csv_values(formFields, csvrow, object1)
			
 
				 
			
 
				-                uri = node['uri']
			
 
				+                    toWrite = triple(subject, attribute, object1)
			
 
				 
			
 
				-                mainCols = node.get('colonna')
			
 
				-                if mainCols is None:
			
 
				-                    mainCols = node.get('principale')
			
 
				-                if mainCols is None:
			
 
				-                    continue
			
 
				+                    outputfile.write(toWrite)
			
 
				+                    outputfile.write(closeLine)
			
 
				+                
			
 
				+            outputfile.write('\n')
			
 
				+                
			
 
				 
			
 
				-                if type(mainCols) is list:
			
 
				-                    main = [csvrow[col] for col in mainCols]
			
 
				-                else:
			
 
				-                    if '#' in mainCols:
			
 
				-                        continue
			
 
				-                    main = csvrow[mainCols]
			
 
				 
			
 
				-                
			
 
				-                rdfType = node["tipo"]
			
 
				-
			
 
				-                if type(rdfType) is list:
			
 
				-                    for type1 in rdfType:
			
 
				-                        line = triple(settripleuri(main, uri), nsCoords.prefix + 'type', type1) + closeLine
			
 
				-                        outputfile.write(line)
			
 
				-          
			
 
				-                sottoelemento = ''
			
 
				-                try:
			
 
				-                    sottoelemento = node["sottoelementodi"]
			
 
				-                except:
			
 
				-                    pass
			
 
				-                if sottoelemento != '':
			
 
				-                    parent = next (filter(lambda el: el.get("identificatore")==node["sottoelementodi"], json_dicts), None)
			
 
				-                    if parent is not None:
			
 
				-                        if type(parent["colonna"]) is list:
			
 
				-                            parent_main = [csvrow[col] for col in parent["colonna"]]
			
 
				-                        else:
			
 
				-                            parent_main = csvrow[parent["colonna"]]
			
 
				-                        subject = settripleuri(parent_main, parent["uri"])
			
 
				-                        property = node["relazione"]
			
 
				-                        object = settripleuri(main, node["uri"])
			
 
				-                        line = triple(subject, property,
			
 
				-                        object) + closeLine
			
 
				-                        outputfile.write(line)
			
 
				-
			
 
				-                outputfile.write('\n')
			
 
				-            #
			
 
				-            #
			
 
				-            # To limit number of entries processed (if desired for testing purposes)
			
 
				-            if (max_entries is not None and ii > max_entries): 
			
 
				-                break
			
 
				-
			
 
				-def settripleuri (csvvalue, nodeuri):
			
 
				-    output = "\""+nodeuri+"\""
			
 
				-    if type(csvvalue) is list:
			
 
				-        for ii, value in enumerate(csvvalue):
			
 
				-            if value=='':
			
 
				-                output = output.replace('$VALORE_CSV_'+ str(ii)+'$', 'N/A')
			
 
				-            else:
			
 
				-                output = output.replace('$VALORE_CSV_'+ str(ii)+'$', value)
			
 
				-
			
 
				-    else:
			
 
				-        output = output.replace('$VALORE_CSV$', csvvalue)
			
 
				-
			
 
				-    return output
			
 
				+def getRefs(triple_blocks: dict):
			
 
				+    subjects_with_refs = []
			
 
				+    for block in triple_blocks:
			
 
				+        try:
			
 
				+            subject_ref = block['subject']['ref']
			
 
				+        except:
			
 
				+            subject_ref = None
			
 
				+        if subject_ref is not None:
			
 
				+            subjects_with_refs.append(block['subject'])
			
 
				+    
			
 
				+    objects_with_refs = []
			
 
				+    for block in triple_blocks:
			
 
				+        for content in block['content']:
			
 
				+            try:
			
 
				+                object_ref = content['object']['ref']
			
 
				+            except:
			
 
				+                object_ref = None
			
 
				+            if object_ref is not None:
			
 
				+                objects_with_refs.append(content['object'])
			
 
				+    
			
 
				+    return {'subjects_with_refs': subjects_with_refs, 'objects_with_refs': objects_with_refs}
			
 
				+
			
 
				+
			
 
				+def replace_csv_values(formFields: list, csvrow: dict, val: str):
			
 
				+    
			
 
				+    outStr = val
			
 
				+    for field in formFields:
			
 
				+        outStr = outStr.replace('#csv:'+field+'#', csvrow[field])
			
 
				+
			
 
				+    return outStr
			
 
				+
			
 
				+
			
 
				+def replaceRefs(allRefs, val):
			
 
				+
			
 
				+    outStr = val
			
 
				+    for ref in allRefs['subjects_with_refs']:
			
 
				+        outStr = outStr.replace('#ref:'+ref['ref']+'#', ref['value'])
			
 
				+    for ref in allRefs['objects_with_refs']:
			
 
				+        outStr = outStr.replace('#obj_ref:'+ref['ref']+'#', ref['value'])
			
 
				+    
			
 
				+    return outStr
			
 
				 
			
--- a/FORMS/parsers/configuration_files/configuration.json
+++ b/FORMS/parsers/configuration_files/configuration.json
@@ -1,7 +1,7 @@
 
				 [
			
 
				     {
			
 
				         "subject": {
			
 
				-          "value": "aut:#(csv:URL)#",
			
 
				+          "value": "aut:#csv:URL#",
			
 
				           "ref": "MAIN"
			
 
				         },
			
 
				         "comment": "OPZIONALE - ci si può scrivere icché si vòle.",
			
@@ -16,59 +16,59 @@
 
				             },
			
 
				             {
			
 
				               "predicate": "foaf:name",
			
 
				-              "object": "\"#(csv:AUTN)#\""
			
 
				+              "object": "\"#csv:AUTN#\""
			
 
				             },
			
 
				             {
			
 
				               "predicate": "foaf:givenName",
			
 
				-              "object": "\"#(csv:AUTO)#\""
			
 
				+              "object": "\"#csv:AUTO#\""
			
 
				             },
			
 
				             {
			
 
				               "predicate": "foaf:gender",
			
 
				-              "object": "\"#(csv:AUTZ)#\""
			
 
				+              "object": "\"#csv:AUTZ#\""
			
 
				             },
			
 
				             {
			
 
				               "predicate": "rdfs:label",
			
 
				-              "object": "\"#(csv:AUTN)#, #(csv:AUTA)#\""
			
 
				+              "object": "\"#csv:AUTN#, #csv:AUTA#\""
			
 
				             },
			
 
				             {
			
 
				               "predicate": "crm:P3_has_note",
			
 
				               "object": {
			
 
				-                "value": "#(ref:MAIN)#_E62",
			
 
				+                "value": "#ref:MAIN#_E62",
			
 
				                 "ref": "E62"
			
 
				               }
			
 
				             },
			
 
				             {
			
 
				               "predicate": "crm:P98i_was_born",
			
 
				               "object": {
			
 
				-                "value": "#(ref:MAIN)#_E67",
			
 
				+                "value": "#ref:MAIN#_E67",
			
 
				                 "ref": "E67"
			
 
				               }
			
 
				             },
			
 
				             {
			
 
				               "predicate": "crm:P100i_died_in",
			
 
				               "object": {
			
 
				-                "value": "#(ref:MAIN)#_E69",
			
 
				+                "value": "#ref:MAIN#_E69",
			
 
				                 "ref": "E69"
			
 
				               }
			
 
				             },
			
 
				             {
			
 
				               "predicate": "crm:P1_is_identified_by",
			
 
				               "object": {
			
 
				-                "value": "aut:#(csv:AUTH)#",
			
 
				+                "value": "aut:#csv:AUTH#",
			
 
				                 "ref": "E42"
			
 
				               }
			
 
				             },
			
 
				             {
			
 
				               "predicate": "schema:hasOccupation",
			
 
				               "object": {
			
 
				-                "value": "mpp:#(csv:AUTQ)#",
			
 
				+                "value": "mpp:#csv:AUTQ#",
			
 
				                 "ref": "OCCUPATION"
			
 
				               }
			
 
				             }
			
 
				         ]
			
 
				     },
			
 
				     {
			
 
				-      "subject": "#(obj_ref:E62)#",
			
 
				+      "subject": "#obj_ref:E62#",
			
 
				       "content": [
			
 
				           {
			
 
				             "predicate": "rdf:type",
			
@@ -81,7 +81,7 @@
 
				       ]
			
 
				     },
			
 
				     {
			
 
				-      "subject": "#(obj_ref:E42)#",
			
 
				+      "subject": "#obj_ref:E42#",
			
 
				       "content": [
			
 
				           {
			
 
				             "predicate": "rdf:type",
			
@@ -89,12 +89,12 @@
 
				           },
			
 
				           {
			
 
				             "predicate": "rdfs:label",
			
 
				-            "object": "\"#(csv:AUTH)#\""
			
 
				+            "object": "\"#csv:AUTH#\""
			
 
				           }
			
 
				       ]
			
 
				     },
			
 
				     {
			
 
				-      "subject": "#(obj_ref:E67)#",
			
 
				+      "subject": "#obj_ref:E67#",
			
 
				       "content": [
			
 
				           {
			
 
				             "predicate": "rdf:type",
			
@@ -102,26 +102,26 @@
 
				           },
			
 
				           {
			
 
				             "predicate": "rdfs:label",
			
 
				-            "object": "\"Nascita di #(csv:AUTN)#\""
			
 
				+            "object": "\"Nascita di #csv:AUTN#\""
			
 
				           },
			
 
				           {
			
 
				             "predicate": "crm:P7_took_place_at",
			
 
				             "object": {
			
 
				-              "value": "mpp:#(csv:AUTL)#",
			
 
				+              "value": "mpp:#csv:AUTL#",
			
 
				               "ref": "LOC-BIRTH"
			
 
				             }
			
 
				           },
			
 
				           {
			
 
				             "predicate": "crm:P4_has_time-span",
			
 
				             "object": {
			
 
				-              "value": "mpp:#(csv:AUTD)#",
			
 
				+              "value": "mpp:#csv:AUTD#",
			
 
				               "ref": "TIME-BIRTH"
			
 
				             }
			
 
				           }
			
 
				       ]
			
 
				     },
			
 
				     {
			
 
				-      "subject": "#(obj_ref:E69)#",
			
 
				+      "subject": "#obj_ref:E69#",
			
 
				       "content": [
			
 
				           {
			
 
				             "predicate": "rdf:type",
			
@@ -129,26 +129,26 @@
 
				           },
			
 
				           {
			
 
				             "predicate": "rdfs:label",
			
 
				-            "object": "\"Morte di #(csv:AUTN)#\""
			
 
				+            "object": "\"Morte di #csv:AUTN#\""
			
 
				           },
			
 
				           {
			
 
				             "predicate": "crm:P7_took_place_at",
			
 
				             "object": {
			
 
				-              "value": "mpp:#(csv:AUTX)#",
			
 
				+              "value": "mpp:#csv:AUTX#",
			
 
				               "ref": "LOC-DEATH"
			
 
				             }
			
 
				           },
			
 
				           {
			
 
				             "predicate": "crm:P4_has_time-span",
			
 
				             "object": {
			
 
				-              "value": "mpp:#(csv:AUTT)#",
			
 
				+              "value": "mpp:#csv:AUTT#",
			
 
				               "ref": "TIME-DEATH"
			
 
				             }
			
 
				           }
			
 
				       ]
			
 
				     },
			
 
				     {
			
 
				-      "subject": "#(obj_ref:TIME-BIRTH)#",
			
 
				+      "subject": "#obj_ref:TIME-BIRTH#",
			
 
				       "content": [
			
 
				           {
			
 
				             "predicate": "rdf:type",
			
@@ -156,12 +156,12 @@
 
				           },
			
 
				           {
			
 
				             "predicate": "rdfs:label",
			
 
				-            "object": "\"#(csv:AUTD)#\""
			
 
				+            "object": "\"#csv:AUTD#\""
			
 
				           }
			
 
				       ]
			
 
				     },
			
 
				     {
			
 
				-      "subject": "#(obj_ref:LOC-BIRTH)#",
			
 
				+      "subject": "#obj_ref:LOC-BIRTH#",
			
 
				       "content": [
			
 
				           {
			
 
				             "predicate": "rdf:type",
			
@@ -169,12 +169,12 @@
 
				           },
			
 
				           {
			
 
				             "predicate": "rdfs:label",
			
 
				-            "object": "\"#(csv:AUTL)#\""
			
 
				+            "object": "\"#csv:AUTL#\""
			
 
				           }
			
 
				       ]
			
 
				     },
			
 
				     {
			
 
				-      "subject": "#(obj_ref:TIME-DEATH)#",
			
 
				+      "subject": "#obj_ref:TIME-DEATH#",
			
 
				       "content": [
			
 
				           {
			
 
				             "predicate": "rdf:type",
			
@@ -182,12 +182,12 @@
 
				           },
			
 
				           {
			
 
				             "predicate": "rdfs:label",
			
 
				-            "object": "\"#(csv:AUTT)#\""
			
 
				+            "object": "\"#csv:AUTT#\""
			
 
				           }
			
 
				       ]
			
 
				     },
			
 
				     {
			
 
				-      "subject": "#(obj_ref:LOC-DEATH)#",
			
 
				+      "subject": "#obj_ref:LOC-DEATH#",
			
 
				       "content": [
			
 
				           {
			
 
				             "predicate": "rdf:type",
			
@@ -195,7 +195,7 @@
 
				           },
			
 
				           {
			
 
				             "predicate": "rdfs:label",
			
 
				-            "object": "\"#(csv:AUTX)#\""
			
 
				+            "object": "\"#csv:AUTX#\""
			
 
				           }
			
 
				       ]
			
 
				     }
			
--- a/FORMS/parsers/get_form_fields.py
+++ b/FORMS/parsers/get_form_fields.py
@@ -1,24 +1,45 @@
 
				 ## IMPORTS
			
 
				 
			
 
				 # Utilities to read/write csv files
			
 
				-import csv, json
			
 
				+import json
			
 
				+import re
			
 
				 
			
 
				 def getFormFields(mapfilename):
			
 
				 
			
 
				     with open (mapfilename) as mapfile:
			
 
				-        json_dicts = json.load(mapfile)
			
 
				+        triple_blocks = json.load(mapfile)
			
 
				 
			
 
				-        form_fields_iter = map(extractFields, json_dicts)
			
 
				+        all_csvs = []
			
 
				+        for block in triple_blocks:
			
 
				+            all_csvs = all_csvs + extractFields(block)
			
 
				 
			
 
				-        return list(form_fields_iter)
			
 
				+        all_csvs_filtered = []
			
 
				+        for csv in all_csvs:
			
 
				+            if csv in all_csvs_filtered:
			
 
				+                continue
			
 
				+            all_csvs_filtered.append(csv)
			
 
				+
			
 
				+        return all_csvs_filtered
			
 
				 
			
 
				 
			
 
				 def extractFields(entry: dict):
			
 
				     try:
			
 
				-        return entry['colonna']
			
 
				-    except:
			
 
				-        pass
			
 
				-    try:
			
 
				-        return entry['principale']
			
 
				+        subject_val = entry['subject']['value'] if type(entry['subject']) is dict else entry['subject']
			
 
				+        all_csvs = list( csvsFromVals(subject_val) )
			
 
				+
			
 
				+        objs = map(lambda el: el['object'], entry['content'])
			
 
				+        for obj in objs:
			
 
				+            val = obj['value'] if type(obj) is dict else obj
			
 
				+            obj_csvs = csvsFromVals(val)
			
 
				+            all_csvs = all_csvs + list(obj_csvs)
			
 
				+
			
 
				+        return all_csvs        
			
 
				+            
			
 
				     except:
			
 
				-        return None
			
 
				+        raise Exception('Malformed Configuration File')
			
 
				+
			
 
				+def csvsFromVals(value: str):
			
 
				+    val_parts = re.split('#', value)
			
 
				+    val_csvs = list( filter(lambda str1: str1.startswith('csv:'), val_parts) )
			
 
				+    val_csvs = map(lambda str1: str1.replace('csv:', ''), val_csvs)
			
 
				+    return val_csvs
			
--- a/FORMS/samples/RDF/form_output.ttl
+++ b/FORMS/samples/RDF/form_output.ttl
@@ -0,0 +1,41 @@
 
				+@prefix mpp: <https://palazzopretorio.prato.it/it/le-opere/alcuni-capolavori/> .
			
 
				+@prefix foaf: <http://xmlns.com/foaf/0.1/> .
			
 
				+@prefix aut: <https://palazzopretorio.prato.it/it/opere/autori/> .
			
 
				+@prefix crm: <http://www.cidoc-crm.org/cidoc-crm/> .
			
 
				+@prefix aat: <http://vocab.getty.edu/aat/> .
			
 
				+@prefix schema: <http://www.schema.org/> .
			
 
				+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
			
 
				+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
			
 
				+
			
 
				+aut:Tyu rdf:type crm:E21_Person .
			
 
				+aut:Tyu rdf:type foaf:person .
			
 
				+aut:Tyu foaf:name "Pippo Jiollo" .
			
 
				+aut:Tyu foaf:givenName "" .
			
 
				+aut:Tyu foaf:gender "" .
			
 
				+aut:Tyu rdfs:label "Pippo Jiollo, " .
			
 
				+aut:Tyu crm:P3_has_note aut:Tyu_E62 .
			
 
				+aut:Tyu crm:P98i_was_born aut:Tyu_E67 .
			
 
				+aut:Tyu crm:P100i_died_in aut:Tyu_E69 .
			
 
				+aut:Tyu crm:P1_is_identified_by aut: .
			
 
				+aut:Tyu schema:hasOccupation mpp: .
			
 
				+aut:Tyu_E62 rdf:type crm:E62_String .
			
 
				+aut:Tyu_E62 rdfs:label "Fonte: Museo di Palazzo Pretorio - Collezione Martini" .
			
 
				+aut: rdf:type crm:E42_Identifier .
			
 
				+aut: rdfs:label "" .
			
 
				+aut:Tyu_E67 rdf:type crm:E67_Birth .
			
 
				+aut:Tyu_E67 rdfs:label "Nascita di Pippo Jiollo" .
			
 
				+aut:Tyu_E67 crm:P7_took_place_at mpp: .
			
 
				+aut:Tyu_E67 crm:P4_has_time-span mpp: .
			
 
				+aut:Tyu_E69 rdf:type crm:E69_Death .
			
 
				+aut:Tyu_E69 rdfs:label "Morte di Pippo Jiollo" .
			
 
				+aut:Tyu_E69 crm:P7_took_place_at mpp: .
			
 
				+aut:Tyu_E69 crm:P4_has_time-span mpp: .
			
 
				+mpp: rdf:type crm:E52_Time-Span .
			
 
				+mpp: rdfs:label "" .
			
 
				+mpp: rdf:type crm:E53_Place .
			
 
				+mpp: rdfs:label "" .
			
 
				+mpp: rdf:type crm:E52_Time-Span .
			
 
				+mpp: rdfs:label "" .
			
 
				+mpp: rdf:type crm:E53_Place .
			
 
				+mpp: rdfs:label "" .
			
 
				+