## IMPORTS
# Utilities to read/write csv files
import csv, json
from operator import truediv
# Custom class to store URIs + related infos for the ontologies/repositories
class RDFcoords:
def __init__(self, uri, prefix, code = None):
self.uri = uri
self.prefix = prefix
self.code = code
# Repositories
museoCoords = RDFcoords('', 'mpp:')
aspoCoords = RDFcoords('', 'aspo:')
autCoords = RDFcoords('', 'aut:')
foafCoords = RDFcoords('', 'foaf:')
cidocCoords = RDFcoords('', 'crm:')
aatCoords = RDFcoords('', 'aat:')
nsCoords = RDFcoords('', 'rdf:')
schemaCoords = RDFcoords('', 'schema:')
rdfsCoords = RDFcoords('', 'rdfs:')
owlCoords = RDFcoords('', 'owl:')
# Basic utilities to format triples / shortened triples in TTL format
#
# Format full triple
def triple(subject, predicate, object1):
line = subject + ' ' + predicate + ' ' + object1
return line
# Format entry in predicate list (no subject)
def doublet(predicate, object1):
line = ' ' + predicate + ' ' + object1
return line
# Format entry in object list (object only)
def singlet(object1):
line = ' ' + object1
return line
# Line endings
continueLine1 = ' ;\n' # Before a predicate list, that is if the FOLLOWING triple has the same subject
continueLine2 = ' ,\n' # Before an object list, that is if the FOLLOWING triple has the same subject and predicate
closeLine = ' .\n' # To end a triple / a triples block
def writeTTLHeader(output):
output.write('@prefix ' + museoCoords.prefix + ' ' + museoCoords.uri + closeLine)
output.write('@prefix ' + foafCoords.prefix + ' ' + foafCoords.uri + closeLine)
output.write('@prefix ' + autCoords.prefix + ' ' + autCoords.uri + closeLine)
output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)
output.write('@prefix ' + aatCoords.prefix + ' ' + aatCoords.uri + closeLine)
output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)
output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)
output.write('@prefix ' + rdfsCoords.prefix + ' ' + rdfsCoords.uri + closeLine)
output.write('@prefix ' + owlCoords.prefix + ' ' + owlCoords.uri + closeLine)
output.write('@prefix ' + aspoCoords.prefix + ' ' + aspoCoords.uri + closeLine)
output.write('\n')
max_entries = None
def parsefromfile(mapfilename, formFields, infile, outfilename):
inputFile = infile.decode()
csv_dicts = [{k: v for k, v in row.items()} for row in csv.DictReader(inputFile.splitlines(), skipinitialspace=True)]
parse(mapfilename, formFields, csv_dicts, outfilename)
def parse(mapfilename, formFields, csv_dicts, outfilename):
with open (mapfilename) as mapfile:
triple_blocks = json.load(mapfile)
allRefs = getRefs(triple_blocks)
doReplace = True
while doReplace:
doReplace = False
for ref in allRefs['subjects_with_refs']:
oldVal = ref['value']
newVal = replaceRefs(allRefs, oldVal)
if(oldVal != newVal):
ref['value'] = newVal
doReplace = True
for ref in allRefs['objects_with_refs']:
oldVal = ref['value']
newVal = replaceRefs(allRefs, oldVal)
if(oldVal != newVal):
ref['value'] = newVal
doReplace = True
with open(outfilename, 'w') as outputfile:
writeTTLHeader(outputfile)
for csvrow in csv_dicts:
for entry in triple_blocks:
subject = entry['subject']['value'] if type(entry['subject']) is dict else entry['subject']
subject = replaceRefs(allRefs, subject)
subject = replace_csv_values(formFields, csvrow, subject)
for content in entry['content']:
attribute = content['predicate']
object1 = content['object']['value'] if type(content['object']) is dict else content['object']
object1 = replaceRefs(allRefs, object1)
object1 = replace_csv_values(formFields, csvrow, object1)
toWrite = triple(subject, attribute, object1)
outputfile.write(toWrite)
outputfile.write(closeLine)
outputfile.write('\n')
def getRefs(triple_blocks: dict):
subjects_with_refs = []
for block in triple_blocks:
try:
subject_ref = block['subject']['ref']
except:
subject_ref = None
if subject_ref is not None:
subjects_with_refs.append(block['subject'])
objects_with_refs = []
for block in triple_blocks:
for content in block['content']:
try:
object_ref = content['object']['ref']
except:
object_ref = None
if object_ref is not None:
objects_with_refs.append(content['object'])
return {'subjects_with_refs': subjects_with_refs, 'objects_with_refs': objects_with_refs}
def replace_csv_values(formFields: list, csvrow: dict, val: str):
outStr = val
for field in formFields:
outStr = outStr.replace('#csv:'+field+'#', csvrow[field])
return outStr
def replaceRefs(allRefs, val):
outStr = val
for ref in allRefs['subjects_with_refs']:
outStr = outStr.replace('#ref:'+ref['ref']+'#', ref['value'])
for ref in allRefs['objects_with_refs']:
outStr = outStr.replace('#obj_ref:'+ref['ref']+'#', ref['value'])
return outStr