123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164 |
- ## IMPORTS
- # Utilities to read/write csv files
- import csv, json
- from operator import truediv
- # Custom class to store URIs + related infos for the ontologies/repositories
- class RDFcoords:
- def __init__(self, uri, prefix, code = None):
- self.uri = uri
- self.prefix = prefix
- self.code = code
- # Repositories
- museoCoords = RDFcoords('<https://palazzopretorio.prato.it/it/le-opere/alcuni-capolavori/>', 'mpp:')
- aspoCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/>', 'aspo:')
- autCoords = RDFcoords('<https://palazzopretorio.prato.it/it/opere/autori/>', 'aut:')
- foafCoords = RDFcoords('<http://xmlns.com/foaf/0.1/>', 'foaf:')
- cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')
- aatCoords = RDFcoords('<http://vocab.getty.edu/aat/>', 'aat:')
- nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')
- schemaCoords = RDFcoords('<http://www.schema.org/>', 'schema:')
- rdfsCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')
- owlCoords = RDFcoords('<http://www.w3.org/2002/07/owl#>', 'owl:')
- # Basic utilities to format triples / shortened triples in TTL format
- #
- # Format full triple
- def triple(subject, predicate, object1):
- line = subject + ' ' + predicate + ' ' + object1
- return line
- # Format entry in predicate list (no subject)
- def doublet(predicate, object1):
- line = ' ' + predicate + ' ' + object1
- return line
- # Format entry in object list (object only)
- def singlet(object1):
- line = ' ' + object1
- return line
- # Line endings
- continueLine1 = ' ;\n' # Before a predicate list, that is if the FOLLOWING triple has the same subject
- continueLine2 = ' ,\n' # Before an object list, that is if the FOLLOWING triple has the same subject and predicate
- closeLine = ' .\n' # To end a triple / a triples block
- def writeTTLHeader(output):
- output.write('@prefix ' + museoCoords.prefix + ' ' + museoCoords.uri + closeLine)
- output.write('@prefix ' + foafCoords.prefix + ' ' + foafCoords.uri + closeLine)
- output.write('@prefix ' + autCoords.prefix + ' ' + autCoords.uri + closeLine)
- output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)
- output.write('@prefix ' + aatCoords.prefix + ' ' + aatCoords.uri + closeLine)
- output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)
- output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)
- output.write('@prefix ' + rdfsCoords.prefix + ' ' + rdfsCoords.uri + closeLine)
- output.write('@prefix ' + owlCoords.prefix + ' ' + owlCoords.uri + closeLine)
- output.write('@prefix ' + aspoCoords.prefix + ' ' + aspoCoords.uri + closeLine)
- output.write('\n')
- max_entries = None
- def parsefromfile(mapfilename, formFields, infile, outfilename):
- inputFile = infile.decode()
- csv_dicts = [{k: v for k, v in row.items()} for row in csv.DictReader(inputFile.splitlines(), skipinitialspace=True)]
- parse(mapfilename, formFields, csv_dicts, outfilename)
- def parse(mapfilename, formFields, csv_dicts, outfilename):
- with open (mapfilename) as mapfile:
- triple_blocks = json.load(mapfile)
-
- allRefs = getRefs(triple_blocks)
- doReplace = True
- while doReplace:
- doReplace = False
- for ref in allRefs['subjects_with_refs']:
- oldVal = ref['value']
- newVal = replaceRefs(allRefs, oldVal)
- if(oldVal != newVal):
- ref['value'] = newVal
- doReplace = True
- for ref in allRefs['objects_with_refs']:
- oldVal = ref['value']
- newVal = replaceRefs(allRefs, oldVal)
- if(oldVal != newVal):
- ref['value'] = newVal
- doReplace = True
- with open(outfilename, 'w') as outputfile:
- writeTTLHeader(outputfile)
- for csvrow in csv_dicts:
- for entry in triple_blocks:
- subject = entry['subject']['value'] if type(entry['subject']) is dict else entry['subject']
- subject = replaceRefs(allRefs, subject)
- subject = replace_csv_values(formFields, csvrow, subject)
- for content in entry['content']:
- attribute = content['predicate']
- object1 = content['object']['value'] if type(content['object']) is dict else content['object']
- object1 = replaceRefs(allRefs, object1)
- object1 = replace_csv_values(formFields, csvrow, object1)
- toWrite = triple(subject, attribute, object1)
- outputfile.write(toWrite)
- outputfile.write(closeLine)
-
- outputfile.write('\n')
-
- def getRefs(triple_blocks: dict):
- subjects_with_refs = []
- for block in triple_blocks:
- try:
- subject_ref = block['subject']['ref']
- except:
- subject_ref = None
- if subject_ref is not None:
- subjects_with_refs.append(block['subject'])
-
- objects_with_refs = []
- for block in triple_blocks:
- for content in block['content']:
- try:
- object_ref = content['object']['ref']
- except:
- object_ref = None
- if object_ref is not None:
- objects_with_refs.append(content['object'])
-
- return {'subjects_with_refs': subjects_with_refs, 'objects_with_refs': objects_with_refs}
- def replace_csv_values(formFields: list, csvrow: dict, val: str):
-
- outStr = val
- for field in formFields:
- outStr = outStr.replace('#csv:'+field+'#', csvrow[field])
- return outStr
- def replaceRefs(allRefs, val):
- outStr = val
- for ref in allRefs['subjects_with_refs']:
- outStr = outStr.replace('#ref:'+ref['ref']+'#', ref['value'])
- for ref in allRefs['objects_with_refs']:
- outStr = outStr.replace('#obj_ref:'+ref['ref']+'#', ref['value'])
-
- return outStr
|