## IMPORTS # Utilities to read/write csv files import csv, json from operator import truediv # Custom class to store URIs + related infos for the ontologies/repositories class RDFcoords: def __init__(self, uri, prefix, code = None): self.uri = uri self.prefix = prefix self.code = code # Repositories museoCoords = RDFcoords('', 'mpp:') aspoCoords = RDFcoords('', 'aspo:') autCoords = RDFcoords('', 'aut:') foafCoords = RDFcoords('', 'foaf:') cidocCoords = RDFcoords('', 'crm:') aatCoords = RDFcoords('', 'aat:') nsCoords = RDFcoords('', 'rdf:') schemaCoords = RDFcoords('', 'schema:') rdfsCoords = RDFcoords('', 'rdfs:') owlCoords = RDFcoords('', 'owl:') # Basic utilities to format triples / shortened triples in TTL format # # Format full triple def triple(subject, predicate, object1): line = subject + ' ' + predicate + ' ' + object1 return line # Format entry in predicate list (no subject) def doublet(predicate, object1): line = ' ' + predicate + ' ' + object1 return line # Format entry in object list (object only) def singlet(object1): line = ' ' + object1 return line # Line endings continueLine1 = ' ;\n' # Before a predicate list, that is if the FOLLOWING triple has the same subject continueLine2 = ' ,\n' # Before an object list, that is if the FOLLOWING triple has the same subject and predicate closeLine = ' .\n' # To end a triple / a triples block def writeTTLHeader(output): output.write('@prefix ' + museoCoords.prefix + ' ' + museoCoords.uri + closeLine) output.write('@prefix ' + foafCoords.prefix + ' ' + foafCoords.uri + closeLine) output.write('@prefix ' + autCoords.prefix + ' ' + autCoords.uri + closeLine) output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine) output.write('@prefix ' + aatCoords.prefix + ' ' + aatCoords.uri + closeLine) output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine) output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine) output.write('@prefix ' + rdfsCoords.prefix + ' ' + rdfsCoords.uri + closeLine) output.write('@prefix ' + owlCoords.prefix + ' ' + owlCoords.uri + closeLine) output.write('@prefix ' + aspoCoords.prefix + ' ' + aspoCoords.uri + closeLine) output.write('\n') max_entries = None def parsefromfile(mapfilename, formFields, infile, outfilename): inputFile = infile.decode() csv_dicts = [{k: v for k, v in row.items()} for row in csv.DictReader(inputFile.splitlines(), skipinitialspace=True)] parse(mapfilename, formFields, csv_dicts, outfilename) def parse(mapfilename, formFields, csv_dicts, outfilename): with open (mapfilename) as mapfile: triple_blocks = json.load(mapfile) allRefs = getRefs(triple_blocks) doReplace = True while doReplace: doReplace = False for ref in allRefs['subjects_with_refs']: oldVal = ref['value'] newVal = replaceRefs(allRefs, oldVal) if(oldVal != newVal): ref['value'] = newVal doReplace = True for ref in allRefs['objects_with_refs']: oldVal = ref['value'] newVal = replaceRefs(allRefs, oldVal) if(oldVal != newVal): ref['value'] = newVal doReplace = True with open(outfilename, 'w') as outputfile: writeTTLHeader(outputfile) for csvrow in csv_dicts: for entry in triple_blocks: subject = entry['subject']['value'] if type(entry['subject']) is dict else entry['subject'] subject = replaceRefs(allRefs, subject) subject = replace_csv_values(formFields, csvrow, subject) for content in entry['content']: attribute = content['predicate'] object1 = content['object']['value'] if type(content['object']) is dict else content['object'] object1 = replaceRefs(allRefs, object1) object1 = replace_csv_values(formFields, csvrow, object1) toWrite = triple(subject, attribute, object1) outputfile.write(toWrite) outputfile.write(closeLine) outputfile.write('\n') def getRefs(triple_blocks: dict): subjects_with_refs = [] for block in triple_blocks: try: subject_ref = block['subject']['ref'] except: subject_ref = None if subject_ref is not None: subjects_with_refs.append(block['subject']) objects_with_refs = [] for block in triple_blocks: for content in block['content']: try: object_ref = content['object']['ref'] except: object_ref = None if object_ref is not None: objects_with_refs.append(content['object']) return {'subjects_with_refs': subjects_with_refs, 'objects_with_refs': objects_with_refs} def replace_csv_values(formFields: list, csvrow: dict, val: str): outStr = val for field in formFields: outStr = outStr.replace('#csv:'+field+'#', csvrow[field]) return outStr def replaceRefs(allRefs, val): outStr = val for ref in allRefs['subjects_with_refs']: outStr = outStr.replace('#ref:'+ref['ref']+'#', ref['value']) for ref in allRefs['objects_with_refs']: outStr = outStr.replace('#obj_ref:'+ref['ref']+'#', ref['value']) return outStr