TEAMOVI
/
Semantization_Interface


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
							## IMPORTS

# Utilities to read/write csv files
import csv, json
from operator import truediv


# Custom class to store URIs + related infos for the ontologies/repositories

class RDFcoords:
    def __init__(self, uri, prefix, code = None):
        self.uri = uri
        self.prefix = prefix
        self.code = code


# Repositories
museoCoords = RDFcoords('<https://palazzopretorio.prato.it/it/le-opere/alcuni-capolavori/>', 'mpp:')
aspoCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/>', 'aspo:')
autCoords = RDFcoords('<https://palazzopretorio.prato.it/it/opere/autori/>', 'aut:')
foafCoords = RDFcoords('<http://xmlns.com/foaf/0.1/>', 'foaf:')

cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')
aatCoords = RDFcoords('<http://vocab.getty.edu/aat/>', 'aat:')
nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')
schemaCoords = RDFcoords('<http://www.schema.org/>', 'schema:')
rdfsCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')
owlCoords = RDFcoords('<http://www.w3.org/2002/07/owl#>', 'owl:')


# Basic utilities to format triples / shortened triples in TTL format
#
# Format full triple
def triple(subject, predicate, object1):
    line = subject + ' ' + predicate + ' ' + object1
    return line

# Format entry in predicate list (no subject)
def doublet(predicate, object1):
    line = '    ' + predicate + ' ' + object1
    return line

# Format entry in object list (object only)
def singlet(object1):
    line = '        ' + object1
    return line

# Line endings
continueLine1 = ' ;\n' # Before a predicate list, that is if the FOLLOWING triple has the same subject
continueLine2 = ' ,\n' # Before an object list, that is if the FOLLOWING triple has the same subject and predicate
closeLine = ' .\n' # To end a triple / a triples block


def writeTTLHeader(output):
    output.write('@prefix ' + museoCoords.prefix + ' ' + museoCoords.uri + closeLine)
    output.write('@prefix ' + foafCoords.prefix + ' ' + foafCoords.uri + closeLine)
    output.write('@prefix ' + autCoords.prefix + ' ' + autCoords.uri + closeLine)
    output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)
    output.write('@prefix ' + aatCoords.prefix + ' ' + aatCoords.uri + closeLine)
    output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)
    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)
    output.write('@prefix ' + rdfsCoords.prefix + ' ' + rdfsCoords.uri + closeLine)
    output.write('@prefix ' + owlCoords.prefix + ' ' + owlCoords.uri + closeLine)
    output.write('@prefix ' + aspoCoords.prefix + ' ' + aspoCoords.uri + closeLine)
    output.write('\n')

max_entries = None

def parsefromfile(mapfilename, formFields, infile, outfilename):
    inputFile = infile.decode()
    csv_dicts = [{k: v for k, v in row.items()} for row in csv.DictReader(inputFile.splitlines(), skipinitialspace=True)]
    parse(mapfilename, formFields, csv_dicts, outfilename)


def parse(mapfilename, formFields, csv_dicts, outfilename):

    with open (mapfilename) as mapfile:
        triple_blocks = json.load(mapfile)
    
    allRefs = getRefs(triple_blocks)
    doReplace = True
    while doReplace:
        doReplace = False
        for ref in allRefs['subjects_with_refs']:
            oldVal = ref['value']
            newVal = replaceRefs(allRefs, oldVal)
            if(oldVal != newVal):
                ref['value'] = newVal
                doReplace = True
        for ref in allRefs['objects_with_refs']:
            oldVal = ref['value']
            newVal = replaceRefs(allRefs, oldVal)
            if(oldVal != newVal):
                ref['value'] = newVal
                doReplace = True


    with open(outfilename, 'w') as outputfile:

        writeTTLHeader(outputfile)

        for csvrow in csv_dicts:
            for entry in triple_blocks:

                subject = entry['subject']['value'] if type(entry['subject']) is dict else entry['subject']
                subject = replaceRefs(allRefs, subject)
                subject = replace_csv_values(formFields, csvrow, subject)
                for content in entry['content']:
                    attribute = content['predicate']
                    object1 = content['object']['value'] if type(content['object']) is dict else content['object']
                    object1 = replaceRefs(allRefs, object1)
                    object1 = replace_csv_values(formFields, csvrow, object1)

                    toWrite = triple(subject, attribute, object1)

                    outputfile.write(toWrite)
                    outputfile.write(closeLine)
                
            outputfile.write('\n')
                

def getRefs(triple_blocks: dict):
    subjects_with_refs = []
    for block in triple_blocks:
        try:
            subject_ref = block['subject']['ref']
        except:
            subject_ref = None
        if subject_ref is not None:
            subjects_with_refs.append(block['subject'])
    
    objects_with_refs = []
    for block in triple_blocks:
        for content in block['content']:
            try:
                object_ref = content['object']['ref']
            except:
                object_ref = None
            if object_ref is not None:
                objects_with_refs.append(content['object'])
    
    return {'subjects_with_refs': subjects_with_refs, 'objects_with_refs': objects_with_refs}


def replace_csv_values(formFields: list, csvrow: dict, val: str):
    
    outStr = val
    for field in formFields:
        outStr = outStr.replace('#csv:'+field+'#', csvrow[field])

    return outStr


def replaceRefs(allRefs, val):

    outStr = val
    for ref in allRefs['subjects_with_refs']:
        outStr = outStr.replace('#ref:'+ref['ref']+'#', ref['value'])
    for ref in allRefs['objects_with_refs']:
        outStr = outStr.replace('#obj_ref:'+ref['ref']+'#', ref['value'])
    
    return outStr