## IMPORTS # Utilities to read/write csv files import csv, json from operator import truediv # Custom class to store URIs + related infos for the ontologies/repositories class RDFcoords: def __init__(self, uri, prefix, code = None): self.uri = uri self.prefix = prefix self.code = code # Repositories museoCoords = RDFcoords('', 'mpp:') autCoords = RDFcoords('', 'aut:') foafCoords = RDFcoords('', 'foaf:') cidocCoords = RDFcoords('', 'crm:') aatCoords = RDFcoords('', 'aat:') nsCoords = RDFcoords('', 'rdf:') schemaCoords = RDFcoords('', 'schema:') rdfsCoords = RDFcoords('', 'rdfs:') # Basic utilities to format triples / shortened triples in TTL format # # Format full triple def triple(subject, predicate, object1): line = subject + ' ' + predicate + ' ' + object1 return line # Format entry in predicate list (no subject) def doublet(predicate, object1): line = ' ' + predicate + ' ' + object1 return line # Format entry in object list (object only) def singlet(object1): line = ' ' + object1 return line # Line endings continueLine1 = ' ;\n' # Before a predicate list, that is if the FOLLOWING triple has the same subject continueLine2 = ' ,\n' # Before an object list, that is if the FOLLOWING triple has the same subject and predicate closeLine = ' .\n' # To end a triple / a triples block def writeTTLHeader(output): output.write('@prefix ' + museoCoords.prefix + ' ' + museoCoords.uri + closeLine) output.write('@prefix ' + foafCoords.prefix + ' ' + foafCoords.uri + closeLine) output.write('@prefix ' + autCoords.prefix + ' ' + autCoords.uri + closeLine) output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine) output.write('@prefix ' + aatCoords.prefix + ' ' + aatCoords.uri + closeLine) output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine) output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine) output.write('@prefix ' + rdfsCoords.prefix + ' ' + rdfsCoords.uri + closeLine) output.write('\n') max_entries = None def parsefromfile(mapfilename, infile, outfilename): inputFile = infile.decode() csv_dicts = [{k: v for k, v in row.items()} for row in csv.DictReader(inputFile.splitlines(), skipinitialspace=True)] with open (mapfilename) as mapfile: json_dicts = json.load(mapfile) parse(json_dicts, csv_dicts, outfilename) def parse(json_dicts, csv_dicts, outfilename): with open(outfilename, 'w') as outputfile: writeTTLHeader(outputfile) first = True # In case something needs processing only once for the whole CSV input for ii, csvrow in enumerate(csv_dicts): # Skip the first line as it carries info we don't want to triplify if(first): first = False continue # The index ii is mainly used to limit the number of entries to process, for testing purposes for jj, node in enumerate(json_dicts): if type(node["colonna"]) is list: csvvalue = [csvrow[col] for col in node["colonna"]] else: csvvalue = csvrow[node["colonna"]] if checkEmptyValue(csvvalue): continue line = triple(settripleuri(csvvalue, node["uri"]), nsCoords.prefix + 'type', node["tipo"]) + closeLine outputfile.write(line) if node["sottoelementodi"] != '': parent = next (filter(lambda el: el["identificatore"]==node["sottoelementodi"], json_dicts), None) if parent is not None: if type(parent["colonna"]) is list: parent_csvvalue = [csvrow[col] for col in parent["colonna"]] else: parent_csvvalue = csvrow[parent["colonna"]] subject = settripleuri(parent_csvvalue, parent["uri"]) property = node["relazione"] object = settripleuri(csvvalue, node["uri"]) line = triple(subject, property, object) + closeLine outputfile.write(line) outputfile.write('\n') # # # To limit number of entries processed (if desired for testing purposes) if (max_entries is not None and ii > max_entries): break def settripleuri (csvvalue, nodeuri): output = "\""+nodeuri+"\"" if type(csvvalue) is list: for ii, value in enumerate(csvvalue): if value=='': output = output.replace('$VALORE_CSV_'+ str(ii)+'$', 'N/A') else: output = output.replace('$VALORE_CSV_'+ str(ii)+'$', value) else: output = output.replace('$VALORE_CSV$', csvvalue) return output def checkEmptyValue(csvvalue): if type(csvvalue) is list: emptyList = ['' for el in csvvalue] if emptyList==csvvalue: return True if csvvalue=='': return True return False