Francesco 1 年之前
父節點
當前提交
9452776b83

+ 10 - 7
FORMS/app.py

@@ -9,10 +9,14 @@ app = Flask(__name__)
 def main():
 
     configurationFolder = 'parsers/configuration_files/'
-    configurationFileName = 'descrizione_minimal.json'   
+    configurationFileName = 'configuration.json'   
     confFilePath = configurationFolder + configurationFileName
 
-    formFields = getFormFields(confFilePath)
+    try:
+        formFields = getFormFields(confFilePath)
+    except:
+        return redirect('/error/')
+
 
     if request.method == 'POST':
 
@@ -26,19 +30,18 @@ def main():
             outFilePath = outputFolder+outFileName
             inFile = fileFromRequest.read()
             # try to create list of dictionaries keyed by header row
-            parsefromfile(confFilePath, inFile, outFilePath)
+            parsefromfile(confFilePath, formFields, inFile, outFilePath)
 
         else:
             # Da completare
             data =  {}
-            data['AUTN'] = request.form['AUTN']
-            data['AUTL'] = request.form['AUTL']
-            data['AUTD'] = request.form['AUTD']
+            for field in formFields:
+                data[field] = request.form[field]
 
             check = [val for val in data.values() if val!='']
 
             if len(check)>0:
-                parse(confFilePath, [data], outputFolder + 'form_output.ttl')
+                parse(confFilePath, formFields,[data], outputFolder + 'form_output.ttl')
             else:
                 return redirect('/error/')
 

+ 80 - 66
FORMS/parsers/CSV_to_RDF_generico.py

@@ -2,6 +2,7 @@
 
 # Utilities to read/write csv files
 import csv, json
+from operator import truediv
 
 
 # Custom class to store URIs + related infos for the ontologies/repositories
@@ -61,86 +62,99 @@ def writeTTLHeader(output):
 
 max_entries = None
 
-def parsefromfile(mapfilename, infile, outfilename):
+def parsefromfile(mapfilename, formFields, infile, outfilename):
     inputFile = infile.decode()
     csv_dicts = [{k: v for k, v in row.items()} for row in csv.DictReader(inputFile.splitlines(), skipinitialspace=True)]
-    parse(mapfilename, csv_dicts, outfilename)
+    parse(mapfilename, formFields, csv_dicts, outfilename)
 
 
-def parse(mapfilename, csv_dicts, outfilename):
+def parse(mapfilename, formFields, csv_dicts, outfilename):
 
     with open (mapfilename) as mapfile:
-        json_dicts = json.load(mapfile)
+        triple_blocks = json.load(mapfile)
+    
+    allRefs = getRefs(triple_blocks)
+    doReplace = True
+    while doReplace:
+        doReplace = False
+        for ref in allRefs['subjects_with_refs']:
+            oldVal = ref['value']
+            newVal = replaceRefs(allRefs, oldVal)
+            if(oldVal != newVal):
+                ref['value'] = newVal
+                doReplace = True
+        for ref in allRefs['objects_with_refs']:
+            oldVal = ref['value']
+            newVal = replaceRefs(allRefs, oldVal)
+            if(oldVal != newVal):
+                ref['value'] = newVal
+                doReplace = True
+
 
     with open(outfilename, 'w') as outputfile:
 
         writeTTLHeader(outputfile)
 
-        for ii, csvrow in enumerate(csv_dicts):
+        for csvrow in csv_dicts:
+            for entry in triple_blocks:
 
-            # The index ii is mainly used to limit the number of entries to process, for testing purposes
-            for node in json_dicts:
+                subject = entry['subject']['value'] if type(entry['subject']) is dict else entry['subject']
+                subject = replaceRefs(allRefs, subject)
+                subject = replace_csv_values(formFields, csvrow, subject)
+                for content in entry['content']:
+                    attribute = content['predicate']
+                    object1 = content['object']['value'] if type(content['object']) is dict else content['object']
+                    object1 = replaceRefs(allRefs, object1)
+                    object1 = replace_csv_values(formFields, csvrow, object1)
 
-                uri = node['uri']
+                    toWrite = triple(subject, attribute, object1)
 
-                mainCols = node.get('colonna')
-                if mainCols is None:
-                    mainCols = node.get('principale')
-                if mainCols is None:
-                    continue
+                    outputfile.write(toWrite)
+                    outputfile.write(closeLine)
+                
+            outputfile.write('\n')
+                
 
-                if type(mainCols) is list:
-                    main = [csvrow[col] for col in mainCols]
-                else:
-                    if '#' in mainCols:
-                        continue
-                    main = csvrow[mainCols]
 
-                
-                rdfType = node["tipo"]
-
-                if type(rdfType) is list:
-                    for type1 in rdfType:
-                        line = triple(settripleuri(main, uri), nsCoords.prefix + 'type', type1) + closeLine
-                        outputfile.write(line)
-          
-                sottoelemento = ''
-                try:
-                    sottoelemento = node["sottoelementodi"]
-                except:
-                    pass
-                if sottoelemento != '':
-                    parent = next (filter(lambda el: el.get("identificatore")==node["sottoelementodi"], json_dicts), None)
-                    if parent is not None:
-                        if type(parent["colonna"]) is list:
-                            parent_main = [csvrow[col] for col in parent["colonna"]]
-                        else:
-                            parent_main = csvrow[parent["colonna"]]
-                        subject = settripleuri(parent_main, parent["uri"])
-                        property = node["relazione"]
-                        object = settripleuri(main, node["uri"])
-                        line = triple(subject, property,
-                        object) + closeLine
-                        outputfile.write(line)
-
-                outputfile.write('\n')
-            #
-            #
-            # To limit number of entries processed (if desired for testing purposes)
-            if (max_entries is not None and ii > max_entries): 
-                break
-
-def settripleuri (csvvalue, nodeuri):
-    output = "\""+nodeuri+"\""
-    if type(csvvalue) is list:
-        for ii, value in enumerate(csvvalue):
-            if value=='':
-                output = output.replace('$VALORE_CSV_'+ str(ii)+'$', 'N/A')
-            else:
-                output = output.replace('$VALORE_CSV_'+ str(ii)+'$', value)
-
-    else:
-        output = output.replace('$VALORE_CSV$', csvvalue)
-
-    return output
+def getRefs(triple_blocks: dict):
+    subjects_with_refs = []
+    for block in triple_blocks:
+        try:
+            subject_ref = block['subject']['ref']
+        except:
+            subject_ref = None
+        if subject_ref is not None:
+            subjects_with_refs.append(block['subject'])
+    
+    objects_with_refs = []
+    for block in triple_blocks:
+        for content in block['content']:
+            try:
+                object_ref = content['object']['ref']
+            except:
+                object_ref = None
+            if object_ref is not None:
+                objects_with_refs.append(content['object'])
+    
+    return {'subjects_with_refs': subjects_with_refs, 'objects_with_refs': objects_with_refs}
+
+
+def replace_csv_values(formFields: list, csvrow: dict, val: str):
+    
+    outStr = val
+    for field in formFields:
+        outStr = outStr.replace('#csv:'+field+'#', csvrow[field])
+
+    return outStr
+
+
+def replaceRefs(allRefs, val):
+
+    outStr = val
+    for ref in allRefs['subjects_with_refs']:
+        outStr = outStr.replace('#ref:'+ref['ref']+'#', ref['value'])
+    for ref in allRefs['objects_with_refs']:
+        outStr = outStr.replace('#obj_ref:'+ref['ref']+'#', ref['value'])
+    
+    return outStr
 

+ 29 - 29
FORMS/parsers/configuration_files/configuration.json

@@ -1,7 +1,7 @@
 [
     {
         "subject": {
-          "value": "aut:#(csv:URL)#",
+          "value": "aut:#csv:URL#",
           "ref": "MAIN"
         },
         "comment": "OPZIONALE - ci si può scrivere icché si vòle.",
@@ -16,59 +16,59 @@
             },
             {
               "predicate": "foaf:name",
-              "object": "\"#(csv:AUTN)#\""
+              "object": "\"#csv:AUTN#\""
             },
             {
               "predicate": "foaf:givenName",
-              "object": "\"#(csv:AUTO)#\""
+              "object": "\"#csv:AUTO#\""
             },
             {
               "predicate": "foaf:gender",
-              "object": "\"#(csv:AUTZ)#\""
+              "object": "\"#csv:AUTZ#\""
             },
             {
               "predicate": "rdfs:label",
-              "object": "\"#(csv:AUTN)#, #(csv:AUTA)#\""
+              "object": "\"#csv:AUTN#, #csv:AUTA#\""
             },
             {
               "predicate": "crm:P3_has_note",
               "object": {
-                "value": "#(ref:MAIN)#_E62",
+                "value": "#ref:MAIN#_E62",
                 "ref": "E62"
               }
             },
             {
               "predicate": "crm:P98i_was_born",
               "object": {
-                "value": "#(ref:MAIN)#_E67",
+                "value": "#ref:MAIN#_E67",
                 "ref": "E67"
               }
             },
             {
               "predicate": "crm:P100i_died_in",
               "object": {
-                "value": "#(ref:MAIN)#_E69",
+                "value": "#ref:MAIN#_E69",
                 "ref": "E69"
               }
             },
             {
               "predicate": "crm:P1_is_identified_by",
               "object": {
-                "value": "aut:#(csv:AUTH)#",
+                "value": "aut:#csv:AUTH#",
                 "ref": "E42"
               }
             },
             {
               "predicate": "schema:hasOccupation",
               "object": {
-                "value": "mpp:#(csv:AUTQ)#",
+                "value": "mpp:#csv:AUTQ#",
                 "ref": "OCCUPATION"
               }
             }
         ]
     },
     {
-      "subject": "#(obj_ref:E62)#",
+      "subject": "#obj_ref:E62#",
       "content": [
           {
             "predicate": "rdf:type",
@@ -81,7 +81,7 @@
       ]
     },
     {
-      "subject": "#(obj_ref:E42)#",
+      "subject": "#obj_ref:E42#",
       "content": [
           {
             "predicate": "rdf:type",
@@ -89,12 +89,12 @@
           },
           {
             "predicate": "rdfs:label",
-            "object": "\"#(csv:AUTH)#\""
+            "object": "\"#csv:AUTH#\""
           }
       ]
     },
     {
-      "subject": "#(obj_ref:E67)#",
+      "subject": "#obj_ref:E67#",
       "content": [
           {
             "predicate": "rdf:type",
@@ -102,26 +102,26 @@
           },
           {
             "predicate": "rdfs:label",
-            "object": "\"Nascita di #(csv:AUTN)#\""
+            "object": "\"Nascita di #csv:AUTN#\""
           },
           {
             "predicate": "crm:P7_took_place_at",
             "object": {
-              "value": "mpp:#(csv:AUTL)#",
+              "value": "mpp:#csv:AUTL#",
               "ref": "LOC-BIRTH"
             }
           },
           {
             "predicate": "crm:P4_has_time-span",
             "object": {
-              "value": "mpp:#(csv:AUTD)#",
+              "value": "mpp:#csv:AUTD#",
               "ref": "TIME-BIRTH"
             }
           }
       ]
     },
     {
-      "subject": "#(obj_ref:E69)#",
+      "subject": "#obj_ref:E69#",
       "content": [
           {
             "predicate": "rdf:type",
@@ -129,26 +129,26 @@
           },
           {
             "predicate": "rdfs:label",
-            "object": "\"Morte di #(csv:AUTN)#\""
+            "object": "\"Morte di #csv:AUTN#\""
           },
           {
             "predicate": "crm:P7_took_place_at",
             "object": {
-              "value": "mpp:#(csv:AUTX)#",
+              "value": "mpp:#csv:AUTX#",
               "ref": "LOC-DEATH"
             }
           },
           {
             "predicate": "crm:P4_has_time-span",
             "object": {
-              "value": "mpp:#(csv:AUTT)#",
+              "value": "mpp:#csv:AUTT#",
               "ref": "TIME-DEATH"
             }
           }
       ]
     },
     {
-      "subject": "#(obj_ref:TIME-BIRTH)#",
+      "subject": "#obj_ref:TIME-BIRTH#",
       "content": [
           {
             "predicate": "rdf:type",
@@ -156,12 +156,12 @@
           },
           {
             "predicate": "rdfs:label",
-            "object": "\"#(csv:AUTD)#\""
+            "object": "\"#csv:AUTD#\""
           }
       ]
     },
     {
-      "subject": "#(obj_ref:LOC-BIRTH)#",
+      "subject": "#obj_ref:LOC-BIRTH#",
       "content": [
           {
             "predicate": "rdf:type",
@@ -169,12 +169,12 @@
           },
           {
             "predicate": "rdfs:label",
-            "object": "\"#(csv:AUTL)#\""
+            "object": "\"#csv:AUTL#\""
           }
       ]
     },
     {
-      "subject": "#(obj_ref:TIME-DEATH)#",
+      "subject": "#obj_ref:TIME-DEATH#",
       "content": [
           {
             "predicate": "rdf:type",
@@ -182,12 +182,12 @@
           },
           {
             "predicate": "rdfs:label",
-            "object": "\"#(csv:AUTT)#\""
+            "object": "\"#csv:AUTT#\""
           }
       ]
     },
     {
-      "subject": "#(obj_ref:LOC-DEATH)#",
+      "subject": "#obj_ref:LOC-DEATH#",
       "content": [
           {
             "predicate": "rdf:type",
@@ -195,7 +195,7 @@
           },
           {
             "predicate": "rdfs:label",
-            "object": "\"#(csv:AUTX)#\""
+            "object": "\"#csv:AUTX#\""
           }
       ]
     }

+ 31 - 10
FORMS/parsers/get_form_fields.py

@@ -1,24 +1,45 @@
 ## IMPORTS
 
 # Utilities to read/write csv files
-import csv, json
+import json
+import re
 
 def getFormFields(mapfilename):
 
     with open (mapfilename) as mapfile:
-        json_dicts = json.load(mapfile)
+        triple_blocks = json.load(mapfile)
 
-        form_fields_iter = map(extractFields, json_dicts)
+        all_csvs = []
+        for block in triple_blocks:
+            all_csvs = all_csvs + extractFields(block)
 
-        return list(form_fields_iter)
+        all_csvs_filtered = []
+        for csv in all_csvs:
+            if csv in all_csvs_filtered:
+                continue
+            all_csvs_filtered.append(csv)
+
+        return all_csvs_filtered
 
 
 def extractFields(entry: dict):
     try:
-        return entry['colonna']
-    except:
-        pass
-    try:
-        return entry['principale']
+        subject_val = entry['subject']['value'] if type(entry['subject']) is dict else entry['subject']
+        all_csvs = list( csvsFromVals(subject_val) )
+
+        objs = map(lambda el: el['object'], entry['content'])
+        for obj in objs:
+            val = obj['value'] if type(obj) is dict else obj
+            obj_csvs = csvsFromVals(val)
+            all_csvs = all_csvs + list(obj_csvs)
+
+        return all_csvs        
+            
     except:
-        return None
+        raise Exception('Malformed Configuration File')
+
+def csvsFromVals(value: str):
+    val_parts = re.split('#', value)
+    val_csvs = list( filter(lambda str1: str1.startswith('csv:'), val_parts) )
+    val_csvs = map(lambda str1: str1.replace('csv:', ''), val_csvs)
+    return val_csvs

+ 41 - 0
FORMS/samples/RDF/form_output.ttl

@@ -0,0 +1,41 @@
+@prefix mpp: <https://palazzopretorio.prato.it/it/le-opere/alcuni-capolavori/> .
+@prefix foaf: <http://xmlns.com/foaf/0.1/> .
+@prefix aut: <https://palazzopretorio.prato.it/it/opere/autori/> .
+@prefix crm: <http://www.cidoc-crm.org/cidoc-crm/> .
+@prefix aat: <http://vocab.getty.edu/aat/> .
+@prefix schema: <http://www.schema.org/> .
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+
+aut:Tyu rdf:type crm:E21_Person .
+aut:Tyu rdf:type foaf:person .
+aut:Tyu foaf:name "Pippo Jiollo" .
+aut:Tyu foaf:givenName "" .
+aut:Tyu foaf:gender "" .
+aut:Tyu rdfs:label "Pippo Jiollo, " .
+aut:Tyu crm:P3_has_note aut:Tyu_E62 .
+aut:Tyu crm:P98i_was_born aut:Tyu_E67 .
+aut:Tyu crm:P100i_died_in aut:Tyu_E69 .
+aut:Tyu crm:P1_is_identified_by aut: .
+aut:Tyu schema:hasOccupation mpp: .
+aut:Tyu_E62 rdf:type crm:E62_String .
+aut:Tyu_E62 rdfs:label "Fonte: Museo di Palazzo Pretorio - Collezione Martini" .
+aut: rdf:type crm:E42_Identifier .
+aut: rdfs:label "" .
+aut:Tyu_E67 rdf:type crm:E67_Birth .
+aut:Tyu_E67 rdfs:label "Nascita di Pippo Jiollo" .
+aut:Tyu_E67 crm:P7_took_place_at mpp: .
+aut:Tyu_E67 crm:P4_has_time-span mpp: .
+aut:Tyu_E69 rdf:type crm:E69_Death .
+aut:Tyu_E69 rdfs:label "Morte di Pippo Jiollo" .
+aut:Tyu_E69 crm:P7_took_place_at mpp: .
+aut:Tyu_E69 crm:P4_has_time-span mpp: .
+mpp: rdf:type crm:E52_Time-Span .
+mpp: rdfs:label "" .
+mpp: rdf:type crm:E53_Place .
+mpp: rdfs:label "" .
+mpp: rdf:type crm:E52_Time-Span .
+mpp: rdfs:label "" .
+mpp: rdf:type crm:E53_Place .
+mpp: rdfs:label "" .
+