In [31]:
# Utilities to read/write csv files
import csv
# Utilities to handle character encodings
import unicodedata
# Ordered Dicts
from collections import OrderedDict

import json


# OPZIONAL IMPORTS

# For timestamping/simple speed tests
from datetime import datetime
# Random number generator
from random import *
# System & command line utilities
import sys
# Json for the dictionary
import json

In [32]:
import_dir = '/Users/federicaspinelli/Google Drive/OVI:CNR/CSV/ASPO/datini/'
export_dir = '/Users/federicaspinelli/Google Drive/OVI:CNR/RDF/ASPO/datini/'

In [33]:
# Custom class to store URIs + related infos for the ontologies/repositories

class RDFcoords:
 def __init__(self, uri, prefix, code = None):
 self.uri = uri
 self.prefix = prefix
 self.code = code


# Repositories
datiniCoords = RDFcoords('', 'dt:')
# W3/CIDOC Predicates
hasTypeCoords = RDFcoords('', 'tp:')
carriesCoords = RDFcoords('', 'ca:')
identifiedByCoords = RDFcoords('', 'ib:')
labelCoords = RDFcoords('', 'lb:')

# CIDOC Objects
manMadeObjectCoords = RDFcoords('', 'mo:', 'E22')
informationObjectCoords = RDFcoords('', 'io:', 'E73')
titleCoords = RDFcoords('', 'ti:' ,'E35')
placeAppellationCoords = RDFcoords('', 'pa:', 'E44')
identifierCoords = RDFcoords('', 'id:', 'E42')

In [34]:
# Basic functions for triples / shortened triples in TTL format

def triple(subject, predicate, object1):
 line = subject + ' ' + predicate + ' ' + object1
 return line

def doublet(predicate, object1):
 line = ' ' + predicate + ' ' + object1
 return line

def singlet(object1):
 line = ' ' + object1
 return line

# Line endings in TTL format
continueLine1 = ' ;\n'
continueLine2 = ' ,\n'
closeLine = ' .\n'

In [35]:
def writeTTLHeader(output):
 output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)
 output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)
 output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)
 output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)
 output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)
 output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)
 output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)
 output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)
 output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)
 output.write('\n')


In [36]:
filePrefix = 'data_'
fileType = 'file'
max_entries = 1000000000

with open(import_dir + filePrefix + fileType + '.csv', newline="") as csv_file, open(export_dir + filePrefix + fileType + '.ttl', 'w') as output:
 reader = csv.DictReader(csv_file)
 writeTTLHeader(output)
 first = True
 ii = 0
 for row in reader:
 # The index ii is used to process a limited number of entries for testing purposes
 ii = ii+1
 # Skip the first line as it carries info we don't want to triplify
 if(first):
 first = False
 continue
 # Write E22 Man Made Object & E73 Information Object -- should exist for every entry?
 line = triple(datiniCoords.prefix + row['id'], hasTypeCoords.prefix, manMadeObjectCoords.prefix) + closeLine
 output.write(line)
 line = triple(datiniCoords.prefix + row['id'], carriesCoords.prefix, datiniCoords.prefix + row['id'] + informationObjectCoords.code) + closeLine
 output.write(line)
 line = triple(datiniCoords.prefix + row['id'] + informationObjectCoords.code, hasTypeCoords.prefix, informationObjectCoords.prefix) + closeLine
 output.write(line)
 #
 # If the 'titolo_aspo' property is not empty for the given entry, write down title-related triples
 if(row['titolo_aspo'] != 'None'):
 line = triple(datiniCoords.prefix + row['id'] + informationObjectCoords.code, identifiedByCoords.prefix, datiniCoords.prefix + row['id'] + titleCoords.code) + closeLine
 output.write(line)
 line = triple(datiniCoords.prefix + row['id'] + titleCoords.code, hasTypeCoords.prefix, titleCoords.prefix) + closeLine
 output.write(line)
 line = triple(datiniCoords.prefix + row['id'] + titleCoords.code, labelCoords.prefix, '\"' + row['titolo_aspo'].replace('\\','\\\\').replace('"','\\"')+ '\"') + closeLine
 output.write(line)
 # 
 output.write('\n')
 #
 #
 # Limit number of entries processed (if desired)
 if(ii>max_entries):
 break
 