## IMPORTS
# Utilities to read/write csv files
import csv, json
from operator import truediv
# Custom class to store URIs + related infos for the ontologies/repositories
class RDFcoords:
def __init__(self, uri, prefix, code = None):
self.uri = uri
self.prefix = prefix
self.code = code
# Repositories
museoCoords = RDFcoords('', 'mpp:')
autCoords = RDFcoords('', 'aut:')
foafCoords = RDFcoords('', 'foaf:')
cidocCoords = RDFcoords('', 'crm:')
aatCoords = RDFcoords('', 'aat:')
nsCoords = RDFcoords('', 'rdf:')
schemaCoords = RDFcoords('', 'schema:')
rdfsCoords = RDFcoords('', 'rdfs:')
# Basic utilities to format triples / shortened triples in TTL format
#
# Format full triple
def triple(subject, predicate, object1):
line = subject + ' ' + predicate + ' ' + object1
return line
# Format entry in predicate list (no subject)
def doublet(predicate, object1):
line = ' ' + predicate + ' ' + object1
return line
# Format entry in object list (object only)
def singlet(object1):
line = ' ' + object1
return line
# Line endings
continueLine1 = ' ;\n' # Before a predicate list, that is if the FOLLOWING triple has the same subject
continueLine2 = ' ,\n' # Before an object list, that is if the FOLLOWING triple has the same subject and predicate
closeLine = ' .\n' # To end a triple / a triples block
def writeTTLHeader(output):
output.write('@prefix ' + museoCoords.prefix + ' ' + museoCoords.uri + closeLine)
output.write('@prefix ' + foafCoords.prefix + ' ' + foafCoords.uri + closeLine)
output.write('@prefix ' + autCoords.prefix + ' ' + autCoords.uri + closeLine)
output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)
output.write('@prefix ' + aatCoords.prefix + ' ' + aatCoords.uri + closeLine)
output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)
output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)
output.write('@prefix ' + rdfsCoords.prefix + ' ' + rdfsCoords.uri + closeLine)
output.write('\n')
max_entries = None
def parsefromfile(mapfilename, infile, outfilename):
inputFile = infile.decode()
csv_dicts = [{k: v for k, v in row.items()} for row in csv.DictReader(inputFile.splitlines(), skipinitialspace=True)]
with open (mapfilename) as mapfile:
json_dicts = json.load(mapfile)
parse(json_dicts, csv_dicts, outfilename)
def parse(json_dicts, csv_dicts, outfilename):
with open(outfilename, 'w') as outputfile:
writeTTLHeader(outputfile)
first = True # In case something needs processing only once for the whole CSV input
for ii, csvrow in enumerate(csv_dicts):
# Skip the first line as it carries info we don't want to triplify
if(first):
first = False
continue
# The index ii is mainly used to limit the number of entries to process, for testing purposes
for jj, node in enumerate(json_dicts):
if type(node["colonna"]) is list:
csvvalue = [csvrow[col] for col in node["colonna"]]
else:
csvvalue = csvrow[node["colonna"]]
if checkEmptyValue(csvvalue):
continue
line = triple(settripleuri(csvvalue, node["uri"]), nsCoords.prefix + 'type', node["tipo"]) + closeLine
outputfile.write(line)
if node["sottoelementodi"] != '':
parent = next (filter(lambda el: el["identificatore"]==node["sottoelementodi"], json_dicts), None)
if parent is not None:
if type(parent["colonna"]) is list:
parent_csvvalue = [csvrow[col] for col in parent["colonna"]]
else:
parent_csvvalue = csvrow[parent["colonna"]]
subject = settripleuri(parent_csvvalue, parent["uri"])
property = node["relazione"]
object = settripleuri(csvvalue, node["uri"])
line = triple(subject, property,
object) + closeLine
outputfile.write(line)
outputfile.write('\n')
#
#
# To limit number of entries processed (if desired for testing purposes)
if (max_entries is not None and ii > max_entries):
break
def settripleuri (csvvalue, nodeuri):
output = "\""+nodeuri+"\""
if type(csvvalue) is list:
for ii, value in enumerate(csvvalue):
if value=='':
output = output.replace('$VALORE_CSV_'+ str(ii)+'$', 'N/A')
else:
output = output.replace('$VALORE_CSV_'+ str(ii)+'$', value)
else:
output = output.replace('$VALORE_CSV$', csvvalue)
return output
def checkEmptyValue(csvvalue):
if type(csvvalue) is list:
emptyList = ['' for el in csvvalue]
if emptyList==csvvalue:
return True
if csvvalue=='':
return True
return False