123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322 |
- # Utilities to read/write csv files
- import csv
- # Utilities to handle character encodings
- import unicodedata
- # Ordered Dicts
- from collections import OrderedDict
- import json
- # OPZIONAL IMPORTS
- # For timestamping/simple speed tests
- from datetime import datetime
- # Random number generator
- from random import *
- # System & command line utilities
- import sys
- # Json for the dictionary
- import json
- import_dir = '/Users/alessiaspadi/Documents/RESTORE/temp_MPP/tabelle/Ospedale/mod/'
- export_dir = '/Users/alessiaspadi/Documents/RESTORE/temp_MPP/tabelle/Ospedale/mod/E9_'
- # Custom class to store URIs + related infos for the ontologies/repositories
- class RDFcoords:
- def __init__(self, uri, prefix, code=None):
- self.uri = uri
- self.prefix = prefix
- self.code = code
- # Repositories
- museoCoords = RDFcoords('<http://palazzopretorio.comune.prato.it/it/le-opere/alcuni-capolavori/>', 'mpp:')
- cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')
- aatCoords = RDFcoords('<http://vocab.getty.edu/aat/>', 'aat:')
- nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')
- schemaCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')
- # Basic functions for triples / shortened triples in TTL format
- def triple(subject, predicate, object1):
- line = subject + ' ' + predicate + ' ' + object1
- return line
- def doublet(predicate, object1):
- line = ' ' + predicate + ' ' + object1
- return line
- def singlet(object1):
- line = ' ' + object1
- return line
- # Line endings in TTL format
- continueLine1 = ' ;\n'
- continueLine2 = ' ,\n'
- closeLine = ' .\n'
- def writeTTLHeader(output):
- output.write('@prefix ' + museoCoords.prefix + ' ' + museoCoords.uri + closeLine)
- output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)
- output.write('@prefix ' + aatCoords.prefix + ' ' + aatCoords.uri + closeLine)
- output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)
- output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)
- output.write('\n')
- filePrefix = 'SR20OA_'
- fileType = 'Ospedale'
- max_entries = 1000000000
- with open(import_dir + filePrefix + fileType + '.csv', newline="") as csv_file, open(
- export_dir + filePrefix + fileType + '.ttl', 'w') as output:
- reader = csv.DictReader(csv_file)
- writeTTLHeader(output)
- first = True
- ii = 0
- for row in reader:
- # The index ii is used to process a limited number of entries for testing purposes
- ii = ii + 1
- if row['RVEL'] == '' or row['RVEL'] == '0':
- # Triplify the 'codice' -- should exist for every entry
- codice = ''
- if (row['NCTR'] != '' and row['NCTN'] != ''):
- codice = row['NCTR'] + row['NCTN']
- url = row['URL']
- # placeHolders
- datplaceHolder = museoCoords.prefix + url
- e53placeHolder = museoCoords.prefix + url + '_E53'
- e9placeHolder = museoCoords.prefix + url + '_E9'
- columnName = list(row)
- tcl = []
- for name in columnName:
- if 'TCL' in name:
- tcl.append(name)
- j=0
- for el in tcl:
- if row[el] != '':
- j = j+1
- last = str(j-1)
- n = len(tcl) - 1
- for i in range(n - 1):
- k = str(i + 1)
- if i + 1 == 1:
- w = ''
- else:
- w = i
- f = str(w)
- if row['TCL' + k] != '':
- pastLocation = ''
- newLocation = ''
- pl = ''
- if row['PRCD' + k] != '':
- newLocation = ' a ' + row['PRCD' + k]
- if row['PRCD' + f] != '':
- pastLocation = ' da ' + row['PRCD' + f]
- pl = row['PRCD' + f].replace(' ', '')
- newe9placeHolder = museoCoords.prefix + url + "_E9_" + k
- line = triple(datplaceHolder,
- cidocCoords.prefix + 'P25i_moved_by',
- newe9placeHolder) + closeLine
- output.write(line)
- line = triple(newe9placeHolder,
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E9_Move') + closeLine
- output.write(line)
- line = triple(newe9placeHolder,
- schemaCoords.prefix + 'label',
- '\"Trasferimento di ' + row['SGTI'] + pastLocation +
- newLocation + '\"') + closeLine
- output.write(line)
- timespan = ''
- ts = ''
- if row['PRDI' + f] != '':
- timespan = row['PRDI' + f]
- if row['PRDU' + f] != '':
- timespan = timespan + ' - ' + row['PRDU' + f]
- tt = timespan.replace(' ', '')
- ts = tt.replace('/', '')
- timespanPlaceholder = museoCoords.prefix + url + '_' + ts
- pastLocationPlaceholder = museoCoords.prefix + url + '_' + pl
- newLoc = row['PRCD' + k].replace(' ', '')
- newLocationPlaceholder = museoCoords.prefix + url + '_' + newLoc
- # E9 P4 E52
- line = triple(newe9placeHolder,
- cidocCoords.prefix + 'P4_has_time-span',
- timespanPlaceholder) + closeLine
- output.write(line)
- line = triple(timespanPlaceholder,
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E52_Time-Span') + closeLine
- output.write(line)
- line = triple(timespanPlaceholder,
- schemaCoords.prefix + 'label',
- '\"' + timespan + '\"') + closeLine
- output.write(line)
- # E9 P26 E53 (moved to)
- if newLocationPlaceholder != '':
- line = triple(newe9placeHolder,
- cidocCoords.prefix + 'P26_moved_to',
- newLocationPlaceholder) + closeLine
- output.write(line)
- # E9 P27 E53
- pastLocationLabel = row['PRCD' + f]
- if row['PRCU' + f] != '':
- pastLocationLabel = pastLocationLabel + ', ' + row['PRCU' + f]
- if row['PRVC' + f] != '':
- pastLocationLabel = pastLocationLabel + ', ' + row['PRVC' + f]
- if row['PRVP' + f] != '':
- pastLocationLabel = pastLocationLabel + ' (' + row['PRVP' + f] + ')'
- if row['PRVR' + f] != '':
- pastLocationLabel = pastLocationLabel + ', ' + row['PRVR' + f]
- if row['PRVS' + f] != '':
- pastLocationLabel = pastLocationLabel + ', ' + row['PRVS' + f]
- line = triple(newe9placeHolder,
- cidocCoords.prefix + 'P27_moved_from',
- pastLocationPlaceholder) + closeLine
- output.write(line)
- line = triple(pastLocationPlaceholder,
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E74_Group') + closeLine
- output.write(line)
- line = triple(pastLocationPlaceholder,
- schemaCoords.prefix + 'label',
- '\"' + pastLocationLabel + '\"') + closeLine
- output.write(line)
- pastLocation = ''
- newLocation = ''
- pl = ''
- if row['LDCN'] != '':
- newLocation = ' a ' + row['LDCN']
- if row['PRCD' + last] != '':
- pastLocation = ' da ' + row['PRCD' + last]
- pl = row['PRCD' + last].replace(' ', '')
- line = triple(datplaceHolder,
- cidocCoords.prefix + 'P25i_moved_by',
- e9placeHolder) + closeLine
- output.write(line)
- line = triple(e9placeHolder,
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E9_Move') + closeLine
- output.write(line)
- line = triple(e9placeHolder,
- schemaCoords.prefix + 'label',
- '\"Trasferimento di ' + row['SGTI'] + pastLocation +
- newLocation + '\"') + closeLine
- output.write(line)
- timespan = ''
- ts = ''
- if row['PRDI' + last] != '':
- timespan = row['PRDI' + last]
- if row['PRDU' + last] != '':
- timespan = timespan + ' - ' + row['PRDU' + last]
- tt = timespan.replace(' ', '')
- ts = tt.replace('/', '')
- timespanPlaceholder = museoCoords.prefix + url + '_' + ts
- pastLocationPlaceholder = museoCoords.prefix + url + '_' + pl
- newLocationPlaceholder = e53placeHolder
- # E9 P4 E52
- line = triple(e9placeHolder,
- cidocCoords.prefix + 'P4_has_time-span',
- timespanPlaceholder) + closeLine
- output.write(line)
- line = triple(timespanPlaceholder,
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E52_Time-Span') + closeLine
- output.write(line)
- line = triple(timespanPlaceholder,
- schemaCoords.prefix + 'label',
- '\"' + timespan + '\"') + closeLine
- output.write(line)
- # E9 P26 E53 (moved to)
- if newLocationPlaceholder != '':
- line = triple(e9placeHolder,
- cidocCoords.prefix + 'P26_moved_to',
- newLocationPlaceholder) + closeLine
- output.write(line)
- # E9 P27 E53
- pastLocationLabel = row['PRCD' + last] + ', ' + row['PRCU' + last] + ', ' + row['PRVC' + last] \
- + ' (' + row['PRVP' + last] + '), ' + row['PRVR' + last] + \
- ', ' + row['PRVS' + last]
- line = triple(e9placeHolder,
- cidocCoords.prefix + 'P27_moved_from',
- pastLocationPlaceholder) + closeLine
- output.write(line)
- line = triple(pastLocationPlaceholder,
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E74_Group') + closeLine
- output.write(line)
- line = triple(pastLocationPlaceholder,
- schemaCoords.prefix + 'label',
- '\"' + pastLocationLabel + '\"') + closeLine
- output.write(line)
- output.write('\n')
- #
- #
- # Limit number of entries processed (if desired)
- if (ii > max_entries):
- break
|