|
@@ -2,6 +2,7 @@
|
|
|
|
|
|
# Utilities to read/write csv files
|
|
|
import csv, json
|
|
|
+from operator import truediv
|
|
|
|
|
|
|
|
|
# Custom class to store URIs + related infos for the ontologies/repositories
|
|
@@ -61,86 +62,99 @@ def writeTTLHeader(output):
|
|
|
|
|
|
max_entries = None
|
|
|
|
|
|
-def parsefromfile(mapfilename, infile, outfilename):
|
|
|
+def parsefromfile(mapfilename, formFields, infile, outfilename):
|
|
|
inputFile = infile.decode()
|
|
|
csv_dicts = [{k: v for k, v in row.items()} for row in csv.DictReader(inputFile.splitlines(), skipinitialspace=True)]
|
|
|
- parse(mapfilename, csv_dicts, outfilename)
|
|
|
+ parse(mapfilename, formFields, csv_dicts, outfilename)
|
|
|
|
|
|
|
|
|
-def parse(mapfilename, csv_dicts, outfilename):
|
|
|
+def parse(mapfilename, formFields, csv_dicts, outfilename):
|
|
|
|
|
|
with open (mapfilename) as mapfile:
|
|
|
- json_dicts = json.load(mapfile)
|
|
|
+ triple_blocks = json.load(mapfile)
|
|
|
+
|
|
|
+ allRefs = getRefs(triple_blocks)
|
|
|
+ doReplace = True
|
|
|
+ while doReplace:
|
|
|
+ doReplace = False
|
|
|
+ for ref in allRefs['subjects_with_refs']:
|
|
|
+ oldVal = ref['value']
|
|
|
+ newVal = replaceRefs(allRefs, oldVal)
|
|
|
+ if(oldVal != newVal):
|
|
|
+ ref['value'] = newVal
|
|
|
+ doReplace = True
|
|
|
+ for ref in allRefs['objects_with_refs']:
|
|
|
+ oldVal = ref['value']
|
|
|
+ newVal = replaceRefs(allRefs, oldVal)
|
|
|
+ if(oldVal != newVal):
|
|
|
+ ref['value'] = newVal
|
|
|
+ doReplace = True
|
|
|
+
|
|
|
|
|
|
with open(outfilename, 'w') as outputfile:
|
|
|
|
|
|
writeTTLHeader(outputfile)
|
|
|
|
|
|
- for ii, csvrow in enumerate(csv_dicts):
|
|
|
+ for csvrow in csv_dicts:
|
|
|
+ for entry in triple_blocks:
|
|
|
|
|
|
- # The index ii is mainly used to limit the number of entries to process, for testing purposes
|
|
|
- for node in json_dicts:
|
|
|
+ subject = entry['subject']['value'] if type(entry['subject']) is dict else entry['subject']
|
|
|
+ subject = replaceRefs(allRefs, subject)
|
|
|
+ subject = replace_csv_values(formFields, csvrow, subject)
|
|
|
+ for content in entry['content']:
|
|
|
+ attribute = content['predicate']
|
|
|
+ object1 = content['object']['value'] if type(content['object']) is dict else content['object']
|
|
|
+ object1 = replaceRefs(allRefs, object1)
|
|
|
+ object1 = replace_csv_values(formFields, csvrow, object1)
|
|
|
|
|
|
- uri = node['uri']
|
|
|
+ toWrite = triple(subject, attribute, object1)
|
|
|
|
|
|
- mainCols = node.get('colonna')
|
|
|
- if mainCols is None:
|
|
|
- mainCols = node.get('principale')
|
|
|
- if mainCols is None:
|
|
|
- continue
|
|
|
+ outputfile.write(toWrite)
|
|
|
+ outputfile.write(closeLine)
|
|
|
+
|
|
|
+ outputfile.write('\n')
|
|
|
+
|
|
|
|
|
|
- if type(mainCols) is list:
|
|
|
- main = [csvrow[col] for col in mainCols]
|
|
|
- else:
|
|
|
- if '#' in mainCols:
|
|
|
- continue
|
|
|
- main = csvrow[mainCols]
|
|
|
|
|
|
-
|
|
|
- rdfType = node["tipo"]
|
|
|
-
|
|
|
- if type(rdfType) is list:
|
|
|
- for type1 in rdfType:
|
|
|
- line = triple(settripleuri(main, uri), nsCoords.prefix + 'type', type1) + closeLine
|
|
|
- outputfile.write(line)
|
|
|
-
|
|
|
- sottoelemento = ''
|
|
|
- try:
|
|
|
- sottoelemento = node["sottoelementodi"]
|
|
|
- except:
|
|
|
- pass
|
|
|
- if sottoelemento != '':
|
|
|
- parent = next (filter(lambda el: el.get("identificatore")==node["sottoelementodi"], json_dicts), None)
|
|
|
- if parent is not None:
|
|
|
- if type(parent["colonna"]) is list:
|
|
|
- parent_main = [csvrow[col] for col in parent["colonna"]]
|
|
|
- else:
|
|
|
- parent_main = csvrow[parent["colonna"]]
|
|
|
- subject = settripleuri(parent_main, parent["uri"])
|
|
|
- property = node["relazione"]
|
|
|
- object = settripleuri(main, node["uri"])
|
|
|
- line = triple(subject, property,
|
|
|
- object) + closeLine
|
|
|
- outputfile.write(line)
|
|
|
-
|
|
|
- outputfile.write('\n')
|
|
|
- #
|
|
|
- #
|
|
|
- # To limit number of entries processed (if desired for testing purposes)
|
|
|
- if (max_entries is not None and ii > max_entries):
|
|
|
- break
|
|
|
-
|
|
|
-def settripleuri (csvvalue, nodeuri):
|
|
|
- output = "\""+nodeuri+"\""
|
|
|
- if type(csvvalue) is list:
|
|
|
- for ii, value in enumerate(csvvalue):
|
|
|
- if value=='':
|
|
|
- output = output.replace('$VALORE_CSV_'+ str(ii)+'$', 'N/A')
|
|
|
- else:
|
|
|
- output = output.replace('$VALORE_CSV_'+ str(ii)+'$', value)
|
|
|
-
|
|
|
- else:
|
|
|
- output = output.replace('$VALORE_CSV$', csvvalue)
|
|
|
-
|
|
|
- return output
|
|
|
+def getRefs(triple_blocks: dict):
|
|
|
+ subjects_with_refs = []
|
|
|
+ for block in triple_blocks:
|
|
|
+ try:
|
|
|
+ subject_ref = block['subject']['ref']
|
|
|
+ except:
|
|
|
+ subject_ref = None
|
|
|
+ if subject_ref is not None:
|
|
|
+ subjects_with_refs.append(block['subject'])
|
|
|
+
|
|
|
+ objects_with_refs = []
|
|
|
+ for block in triple_blocks:
|
|
|
+ for content in block['content']:
|
|
|
+ try:
|
|
|
+ object_ref = content['object']['ref']
|
|
|
+ except:
|
|
|
+ object_ref = None
|
|
|
+ if object_ref is not None:
|
|
|
+ objects_with_refs.append(content['object'])
|
|
|
+
|
|
|
+ return {'subjects_with_refs': subjects_with_refs, 'objects_with_refs': objects_with_refs}
|
|
|
+
|
|
|
+
|
|
|
+def replace_csv_values(formFields: list, csvrow: dict, val: str):
|
|
|
+
|
|
|
+ outStr = val
|
|
|
+ for field in formFields:
|
|
|
+ outStr = outStr.replace('#csv:'+field+'#', csvrow[field])
|
|
|
+
|
|
|
+ return outStr
|
|
|
+
|
|
|
+
|
|
|
+def replaceRefs(allRefs, val):
|
|
|
+
|
|
|
+ outStr = val
|
|
|
+ for ref in allRefs['subjects_with_refs']:
|
|
|
+ outStr = outStr.replace('#ref:'+ref['ref']+'#', ref['value'])
|
|
|
+ for ref in allRefs['objects_with_refs']:
|
|
|
+ outStr = outStr.replace('#obj_ref:'+ref['ref']+'#', ref['value'])
|
|
|
+
|
|
|
+ return outStr
|
|
|
|