# Utilities to read/write csv files
import csv
# Utilities to handle character encodings
import unicodedata
# Ordered Dicts
from collections import OrderedDict
from urllib.request import urlopen
from bs4 import BeautifulSoup
import json
# OPZIONAL IMPORTS
# For timestamping/simple speed tests
from datetime import datetime
# Random number generator
from random import *
# System & command line utilities
import sys
# Json for the dictionary
import json
import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/MPP/corretti/'
export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/MPP/'
# Custom class to store URIs + related infos for the ontologies/repositories
class RDFcoords:
def __init__(self, uri, prefix, code=None):
self.uri = uri
self.prefix = prefix
self.code = code
# Repositories
museoCoords = RDFcoords('', 'mpp:')
autCoords = RDFcoords('', 'aut:')
cidocCoords = RDFcoords('', 'crm:')
aatCoords = RDFcoords('', 'aat:')
nsCoords = RDFcoords('', 'rdf:')
schemaCoords = RDFcoords('', 'rdfs:')
xsdCoords = RDFcoords('', 'xsd:')
iconCoords = RDFcoords('', 'ico:')
# Basic functions for triples / shortened triples in TTL format
def triple(subject, predicate, object1):
line = subject + ' ' + predicate + ' ' + object1
return line
def doublet(predicate, object1):
line = ' ' + predicate + ' ' + object1
return line
def singlet(object1):
line = ' ' + object1
return line
# Line endings in TTL format
continueLine1 = ' ;\n'
continueLine2 = ' ,\n'
closeLine = ' .\n'
def writeTTLHeader(output):
output.write('@prefix ' + museoCoords.prefix + ' ' + museoCoords.uri + closeLine)
output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)
output.write('@prefix ' + aatCoords.prefix + ' ' + aatCoords.uri + closeLine)
output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)
output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)
output.write('@prefix ' + autCoords.prefix + ' ' + autCoords.uri + closeLine)
output.write('@prefix ' + xsdCoords.prefix + ' ' + xsdCoords.uri + closeLine)
output.write('@prefix ' + iconCoords.prefix + ' ' + iconCoords.uri + closeLine)
output.write('\n')
filePrefix = '00_SR20OA_'
fileType = 'Datini'
max_entries = 1000000000
def get_aut_url(code):
aut_file = open(import_dir + 'AR20AUT_' + fileType + '.csv', newline="")
reader = csv.DictReader(aut_file)
for row in reader:
auth = int(row['AUTH'])
cod = int(code)
role = ''
if row['AUTQ'] != '':
role = row['AUTQ']
else:
role = ''
if auth == cod:
return [row['URL'], role]
def get_role(role):
role_file = open('/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/MPP/AAT_RUOLI.csv', newline="")
reader = csv.DictReader(role_file)
for row in reader:
if row['Label'] == role:
return row['AAT']
def get_elem(mtc):
mtc_file = open('/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/MPP/AAT_MTC.csv', newline="")
reader = csv.DictReader(mtc_file)
for row in reader:
if row['MTC'] == mtc:
return [row['AAT'], row['Type']]
with open(import_dir + filePrefix + fileType + '.csv', newline="") as csv_file, open(
export_dir + filePrefix + fileType + '.ttl', 'w') as output:
reader = csv.DictReader(csv_file)
writeTTLHeader(output)
first = True
ii = 0
for row in reader:
# The index ii is used to process a limited number of entries for testing purposes
ii = ii + 1
sb = ''
subj = ''
pp = row['OGTD'] + ' (' + row['ACC'] + ') '
if row['SGTI'] != '':
sb = pp + row['SGTI']
if row['LDCN'] != '':
subj = sb + ' in ' + row['LDCN']
else:
subj = sb
# Triplify the 'codice' -- should exist for every entry
codice = ''
if (row['NCTR'] != '' and row['NCTN'] != ''):
codice = row['NCTR'] + row['NCTN']
codiceP = ''
if (row['AUTH'] != ''):
codiceP = row['AUTH']
place = ''
if (row['PRVC'] != ''):
place = row['PRVC']
columnName = list(row)
url = row['URL']
# placeHolders
datplaceHolder = museoCoords.prefix + url
e1placeHolder = museoCoords.prefix + url + '_E1'
e3placeHolder = museoCoords.prefix + url + 'E3'
e10placeHolder = museoCoords.prefix + url + '_E10'
e12placeHolder = museoCoords.prefix + url + '_E12'
e13placeHolder = museoCoords.prefix + url + '_E13'
e21placeHolder = museoCoords.prefix + url + '_InE21'
e25placeHolder = museoCoords.prefix + url + '_E25'
e34placeHolder = museoCoords.prefix + url + '_E34'
e35placeHolder1 = museoCoords.prefix + url + '_E35'
e42placeHolder = museoCoords.prefix + url + '_E42'
e42CplaceHolder = museoCoords.prefix + url + '_E42_MPP'
e65placeHolder = museoCoords.prefix + url + '_InE65'
e73placeHolder = museoCoords.prefix + url + '_E73'
e74placeHolder = museoCoords.prefix + url + '_E74'
if (codice != ''):
line = triple(datplaceHolder, cidocCoords.prefix + 'P1_is_identified_by', e42placeHolder) + closeLine
output.write(line)
line = triple(e42placeHolder, nsCoords.prefix + 'type',
cidocCoords.prefix + 'E42_Identifier') + closeLine
output.write(line)
line = triple(e42placeHolder,
schemaCoords.prefix + 'label',
'\"' + codice + '\"') + closeLine
output.write(line)
###
line = triple(e42placeHolder,
cidocCoords.prefix + 'P2_has_type',
'\"Codice univoco del bene (NCT)\"') + closeLine
output.write(line)
# Write E22 Man Made Object & E73 Information Object -- should exist for every entry?
line = triple(datplaceHolder, nsCoords.prefix + 'type',
cidocCoords.prefix + 'E22_Man-Made_Object') + closeLine
output.write(line)
# Added by AS
line = triple(datplaceHolder, schemaCoords.prefix + 'label', '\"' + subj + '\"') + closeLine
output.write(line)
# End AS
line = triple(datplaceHolder, cidocCoords.prefix + 'P128_carries', e73placeHolder) + closeLine
output.write(line)
line = triple(e73placeHolder, nsCoords.prefix + 'type',
cidocCoords.prefix + 'E73_Information_Object') + closeLine
output.write(line)
# AS
ss = ''
if row['SGTI'] != '':
ss = row['SGTI']
else:
ss = 'senza titolo'
line = triple(e73placeHolder, schemaCoords.prefix + 'label',
'\"Opera d\'arte raffigurante ' + ss + '\"') + closeLine
output.write(line)
# E73 - P2 - E55
tt = ''
typeLabel = ''
if row['OGTD'] == 'dipinto':
tt = aatCoords.prefix + "300033618"
elif row['OGTD'] == 'rilievo':
tt = aatCoords.prefix + "300047230"
elif row['OGTD'] == 'polittico':
tt = aatCoords.prefix + "300178235"
elif row['OGTD'] == 'predella':
tt = aatCoords.prefix + "300003745"
line = triple(e73placeHolder,
cidocCoords.prefix + 'P2_has_type',
tt) + closeLine
output.write(line)
line = triple(tt, schemaCoords.prefix + 'label',
'\"' + row['OGTD'] + '\"') + closeLine
output.write(line)
# E73 - P1 - E35
if row['SGTT'] != '':
line = triple(e73placeHolder, cidocCoords.prefix + 'P1_is_identified_by', e35placeHolder1) + closeLine
output.write(line)
line = triple(e35placeHolder1, nsCoords.prefix + 'type', cidocCoords.prefix + 'E35_Title') + closeLine
output.write(line)
line = triple(e35placeHolder1, schemaCoords.prefix + 'label', '\"' + row['SGTT'] + '\"') + closeLine
output.write(line)
# E22 - P62 - E1
if row['SGTI'] != '':
line = triple(datplaceHolder,
cidocCoords.prefix + 'P62_depicts',
e1placeHolder) + closeLine
output.write(line)
line = triple(e1placeHolder,
nsCoords.prefix + 'type',
cidocCoords.prefix + 'E1_CRM_Entity') + closeLine
output.write(line)
line = triple(e1placeHolder,
schemaCoords.prefix + 'label', '\"' +
row['SGTI'] + '\"') + closeLine
output.write(line)
line = triple(e1placeHolder,
cidocCoords.prefix + 'P2_has_type',
'\"Identificazione Iconografica\"') + closeLine
output.write(line)
# Attention: these triples are identified only for C100005 Museo di Palazzo Pretorio
if row['ESC'] == 'C100005':
line = triple(datplaceHolder,
cidocCoords.prefix + 'P52_has_current_owner',
'') + closeLine
output.write(line)
line = triple('',
nsCoords.prefix + 'type',
cidocCoords.prefix + 'E74_Group') + closeLine
output.write(line)
line = triple('',
schemaCoords.prefix + 'label',
'\"Museo di Palazzo Pretorio\"') + closeLine
output.write(line)
currentLocation = ''
# E22 - P54 - E53
if row['LDCN'] != '':
if row['LDCS'] != '':
currentLocation = row['LDCS']
else:
currentLocation = currentLocation
if row['LDCM'] != '':
currentLocation = currentLocation + ', ' + row['LDCM']
else:
currentLocation = currentLocation
if row['LDCN'] != '':
currentLocation = currentLocation + ', ' + row['LDCN']
else:
currentLocation = currentLocation
currentLocation = currentLocation + ', ' + row['PVCC'] + ' (' + row['PVCP'] + ')'
line = triple(datplaceHolder, cidocCoords.prefix + 'P54_has_current_permanent_location',
'\"' + currentLocation + '\"') + closeLine
output.write(line)
e12FplaceHolder = ''
if row['DTSI'] != row['DTSF']:
e12FplaceHolder = museoCoords.prefix + url + '_E12F'
# Write E12 Production -- should exist for every entry?
# E12 P108 E22
line = triple(e12placeHolder, cidocCoords.prefix + 'P108_has_produced', datplaceHolder) + closeLine
output.write(line)
line = triple(e12placeHolder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E12_Production') + closeLine
output.write(line)
# E73 P108i E12
line = triple(e73placeHolder, cidocCoords.prefix + 'P108i_was_produced_by', e12placeHolder) + closeLine
output.write(line)
if e12FplaceHolder != '':
line = triple(e12FplaceHolder, cidocCoords.prefix + 'P108_has_produced', datplaceHolder) + closeLine
output.write(line)
line = triple(e12FplaceHolder, nsCoords.prefix + 'type',
cidocCoords.prefix + 'E12_Production') + closeLine
output.write(line)
line = triple(e12FplaceHolder, schemaCoords.prefix + 'label',
'\"Fine produzione di ' + row['SGTI'] + '\"') + closeLine
output.write(line)
# E73 P108i E12
line = triple(e73placeHolder, cidocCoords.prefix + 'P108i_was_produced_by', e12FplaceHolder) + closeLine
output.write(line)
# E12 P140i E13
line = triple(e12FplaceHolder,
cidocCoords.prefix + 'P140i_was_attributed_by',
e13placeHolder) + closeLine
output.write(line)
# E12 P2
line = triple(e12FplaceHolder,
cidocCoords.prefix + 'P2_has_type',
'\"Fine\"^^xsd:string') + closeLine
output.write(line)
line = triple(e12placeHolder,
cidocCoords.prefix + 'P2_has_type',
'\"Inizio\"^^xsd:string') + closeLine
output.write(line)
line = triple(e12placeHolder, schemaCoords.prefix + 'label',
'\"Inizio produzione di ' + row['SGTI'] + '\"') + closeLine
output.write(line)
else:
line = triple(e12placeHolder, schemaCoords.prefix + 'label',
'\"Produzione di ' + row['SGTI'] + '\"') + closeLine
output.write(line)
tcl = []
for name in columnName:
if 'TCL' in name:
tcl.append(name)
# E12 - P7 - E53
for el in tcl:
i = 0
if row[el] == 'luogo di produzione':
pl = ''
if i == 0:
pl = row['PRVC']
else:
pl = row['PRVC' + i]
line = triple(e12placeHolder,
cidocCoords.prefix + 'P7_took_place_at',
museoCoords.prefix + pl) + closeLine
output.write(line)
if e12FplaceHolder != '':
line = triple(e12FplaceHolder,
cidocCoords.prefix + 'P7_took_place_at',
museoCoords.prefix + pl) + closeLine
output.write(line)
i = i + 1
# E12 - PC14 - E21
if row['AUTH'] != '':
aut = get_aut_url(row['AUTH'])
aut_url = aut[0]
aut_role = aut[1]
ll = row['AUTN'] + '_' + aut_role
lab = ll.replace(' ', '')
label = lab.replace(',', '')
AuthorPlaceholder = autCoords.prefix + aut_url
line = triple(museoCoords.prefix + '_' + label,
cidocCoords.prefix + 'P01_has_domain',
e12placeHolder) + closeLine
output.write(line)
if e12FplaceHolder != '':
line = triple(museoCoords.prefix + '_' + label,
cidocCoords.prefix + 'P01_has_domain',
e12FplaceHolder) + closeLine
output.write(line)
if 'AUTH1' in columnName:
if row['AUTH1'] != '':
aut = get_aut_url(row['AUTH1'])
aut_url = aut[0]
aut_role = aut[1]
ll = row['AUTN1'] + '_' + aut_role
lab = ll.replace(' ', '')
label = lab.replace(',', '')
AuthorPlaceholder = autCoords.prefix + aut_url
line = triple(museoCoords.prefix + '_' + label,
cidocCoords.prefix + 'P01_has_domain',
e12placeHolder) + closeLine
output.write(line)
if e12FplaceHolder != '':
line = triple(museoCoords.prefix + '_' + label,
cidocCoords.prefix + 'P01_has_domain',
e12FplaceHolder) + closeLine
output.write(line)
# E12 - PC14 - E21
if 'CMMN' in columnName:
if row['CMMN'] != '':
cc = row['CMMN']
cm = cc.replace(' ', '')
cmmn = cm.replace(',', '')
cmmPlaceholder = museoCoords.prefix + '_' + cmmn
line = triple(museoCoords.prefix + '_commit_' + cmmn,
cidocCoords.prefix + 'P01_has_domain',
e12placeHolder) + closeLine
output.write(line)
if e12FplaceHolder != '':
line = triple(museoCoords.prefix + '_commit_' + cmmn,
cidocCoords.prefix + 'P01_has_domain',
e12FplaceHolder) + closeLine
output.write(line)
line = triple(museoCoords.prefix + '_commit_' + cmmn,
nsCoords.prefix + 'type',
cidocCoords.prefix + 'PC14_carried_out_by') + closeLine
output.write(line)
line = triple(museoCoords.prefix + '_commit_' + cmmn,
schemaCoords.prefix + 'label',
'\"' + row['CMMN'] + ' nel ruolo di committente\"') + closeLine
output.write(line)
line = triple(museoCoords.prefix + '_commit_' + cmmn,
cidocCoords.prefix + 'P02_has_range',
cmmPlaceholder) + closeLine
output.write(line)
line = triple(cmmPlaceholder,
nsCoords.prefix + 'type',
cidocCoords.prefix + 'E39_Actor') + closeLine
output.write(line)
line = triple(cmmPlaceholder,
schemaCoords.prefix + 'label',
'\"' + row['CMMN'] + '\"') + closeLine
output.write(line)
line = triple(museoCoords.prefix + '_commit_' + cmmn,
cidocCoords.prefix + 'P14.1_in_the_role_of',
museoCoords.prefix + '_client') + closeLine
output.write(line)
line = triple(museoCoords.prefix + '_client',
nsCoords.prefix + 'type',
cidocCoords.prefix + 'E55_Type') + closeLine
output.write(line)
line = triple(museoCoords.prefix + '_client',
schemaCoords.prefix + 'label',
'\"Committente\"') + closeLine
output.write(line)
# E12 - P4 - E52
if row['DTSI'] != '':
line = triple(e12placeHolder,
cidocCoords.prefix + 'P4_has_time-span',
museoCoords.prefix + row['DTSI']) + closeLine
output.write(line)
line = triple(museoCoords.prefix + row['DTSI'],
nsCoords.prefix + 'type',
cidocCoords.prefix + 'E52_Time-Span') + closeLine
output.write(line)
line = triple(museoCoords.prefix + row['DTSI'],
schemaCoords.prefix + 'label',
'\"' + row['DTSI'] + '\"') + closeLine
output.write(line)
if e12FplaceHolder != '':
line = triple(e12FplaceHolder,
cidocCoords.prefix + 'P4_has_time-span',
museoCoords.prefix + row['DTSF']) + closeLine
output.write(line)
line = triple(museoCoords.prefix + row['DTSF'],
nsCoords.prefix + 'type',
cidocCoords.prefix + 'E52_Time-Span') + closeLine
output.write(line)
line = triple(museoCoords.prefix + row['DTSF'],
schemaCoords.prefix + 'label',
'\"' + row['DTSF'] + '\"') + closeLine
output.write(line)
tcl = []
for name in columnName:
if 'TCL' in name:
tcl.append(name)
j = 0
for el in tcl:
if row[el] != '':
j = j + 1
last = str(j - 1)
n = len(tcl) - 1
for i in range(n):
k = str(i + 1)
if i + 1 == 1:
w = ''
else:
w = i
f = str(w)
if row['TCL' + k] != '':
pastActor = ''
newActor = ''
pl = ''
if row['PRCD' + k] != '':
newActor = ' a ' + row['PRCD' + k]
if row['PRCD' + f] != '':
pastActor = ' da ' + row['PRCD' + f]
pl = row['PRCD' + f].replace(' ', '')
newe10placeHolder = museoCoords.prefix + url + "_E10_" + k
line = triple(newe10placeHolder,
cidocCoords.prefix + 'P30_transferred_custody_of',
datplaceHolder) + closeLine
output.write(line)
line = triple(newe10placeHolder,
nsCoords.prefix + 'type',
cidocCoords.prefix + 'E10_Transfer_of_Custody') + closeLine
output.write(line)
line = triple(newe10placeHolder,
schemaCoords.prefix + 'label',
'\"Passaggio di ' + row['SGTI'] + pastActor +
newActor + '\"') + closeLine
output.write(line)
if row['PRDI' + f] != '':
timespan = row['PRDI' + f]
tt = timespan.replace(' ', '')
tp = tt.replace('.', '')
ts = tp.replace('/', '')
timespanPlaceholder = museoCoords.prefix + '_' + ts
# E10 P4 E52
line = triple(newe10placeHolder,
cidocCoords.prefix + 'P4_has_time-span',
timespanPlaceholder) + closeLine
output.write(line)
line = triple(timespanPlaceholder,
nsCoords.prefix + 'type',
cidocCoords.prefix + 'E52_Time-Span') + closeLine
output.write(line)
line = triple(timespanPlaceholder,
schemaCoords.prefix + 'label',
'\"' + timespan + '\"') + closeLine
output.write(line)
pastActorPlaceholder = museoCoords.prefix + '_' + pl
newLoc = row['PRCD' + k].replace(' ', '')
newActorPlaceholder = museoCoords.prefix + '_' + newLoc
# E10 P26 E74 (moved to)
if newActorPlaceholder != '':
line = triple(newe10placeHolder,
cidocCoords.prefix + 'P29_custody_received_by',
newActorPlaceholder) + closeLine
output.write(line)
# E10 P27 E74
pastActorLabel = row['PRCD' + f]
line = triple(newe10placeHolder,
cidocCoords.prefix + 'P28_custody_surrendered_by',
pastActorPlaceholder) + closeLine
output.write(line)
line = triple(pastActorPlaceholder,
nsCoords.prefix + 'type',
cidocCoords.prefix + 'E39_Actor') + closeLine
output.write(line)
line = triple(pastActorPlaceholder,
schemaCoords.prefix + 'label',
'\"' + pastActorLabel + '\"') + closeLine
output.write(line)
line = triple(datplaceHolder,
cidocCoords.prefix + 'P49_has_former_or_current_keeper',
pastActorPlaceholder) + closeLine
output.write(line)
# E74 P74 E53
pastResidenceLabel = row['PRVC' + f]
pastResidencePlaceHolder = museoCoords.prefix + row['PRVC' + f]
line = triple(pastActorPlaceholder,
cidocCoords.prefix + 'P74_has_current_or_former_residence',
pastResidencePlaceHolder) + closeLine
output.write(line)
####
pastActor = ''
newActor = ''
pl = ''
if row['LDCN'] != '':
newActor = ' a ' + row['LDCN']
if row['PRCD' + last] != '':
pastActor = ' da ' + row['PRCD' + last]
pl = row['PRCD' + last].replace(' ', '')
line = triple(e10placeHolder,
cidocCoords.prefix + 'P30_transferred_custody_of',
datplaceHolder) + closeLine
output.write(line)
line = triple(e10placeHolder,
nsCoords.prefix + 'type',
cidocCoords.prefix + 'E10_Transfer_of_Custody') + closeLine
output.write(line)
line = triple(e10placeHolder,
schemaCoords.prefix + 'label',
'\"Passaggio di ' + row['SGTI'] + pastActor +
newActor + '\"') + closeLine
output.write(line)
if row['PRDU' + last] != '':
timespan = row['PRDU' + last]
tt = timespan.replace(' ', '')
ts = tt.replace('/', '')
timespanPlaceholder = museoCoords.prefix + '_' + ts
# E10 P4 E52
line = triple(e10placeHolder,
cidocCoords.prefix + 'P4_has_time-span',
timespanPlaceholder) + closeLine
output.write(line)
line = triple(timespanPlaceholder,
nsCoords.prefix + 'type',
cidocCoords.prefix + 'E52_Time-Span') + closeLine
output.write(line)
line = triple(timespanPlaceholder,
schemaCoords.prefix + 'label',
'\"' + timespan + '\"') + closeLine
output.write(line)
pastActorPlaceholder = museoCoords.prefix + '_' + pl
newLocPlaceholder = e74placeHolder
# E10 P26 E74 (moved to)
if newLocPlaceholder != '':
line = triple(e10placeHolder,
cidocCoords.prefix + 'P29_custody_received_by',
newLocPlaceholder) + closeLine
output.write(line)
# E10 P27 E74
pastActorLabel = row['PRCD' + last]
line = triple(e10placeHolder,
cidocCoords.prefix + 'P28_custody_surrendered_by',
pastActorPlaceholder) + closeLine
output.write(line)
line = triple(pastActorPlaceholder,
nsCoords.prefix + 'type',
cidocCoords.prefix + 'E39_Actor') + closeLine
output.write(line)
line = triple(pastActorPlaceholder,
schemaCoords.prefix + 'label',
'\"' + pastActorLabel + '\"') + closeLine
output.write(line)
line = triple(datplaceHolder,
cidocCoords.prefix + 'P49_has_former_or_current_keeper',
pastActorPlaceholder) + closeLine
output.write(line)
# E74 P74 E53
pastResidenceLabel = row['PRVC' + last]
pastResidencePlaceHolder = museoCoords.prefix + row['PRVC' + last]
if row['PRVP' + last] != '':
pastResidenceLabel = pastResidenceLabel + ' (' + row['PRVP' + last] + ')'
if row['PRVR' + last] != '':
pastResidenceLabel = pastResidenceLabel + ', ' + row['PRVR' + last]
if row['PRVS' + last] != '':
pastResidenceLabel = pastResidenceLabel + ', ' + row['PRVS' + last]
line = triple(pastActorPlaceholder,
cidocCoords.prefix + 'P74_has_current_or_former_residence',
pastResidencePlaceHolder) + closeLine
output.write(line)
line = triple(pastResidencePlaceHolder,
nsCoords.prefix + 'type',
cidocCoords.prefix + 'E53_Place') + closeLine
output.write(line)
# E22 P44 E3
if row['STCC'] != '':
line = triple(datplaceHolder,
cidocCoords.prefix + 'P44_has_condition',
e3placeHolder) + closeLine
output.write(line)
line = triple(e3placeHolder,
nsCoords.prefix + 'type',
cidocCoords.prefix + 'E3_Condition_State') + closeLine
output.write(line)
line = triple(e3placeHolder,
schemaCoords.prefix + 'label',
'\"Condizione di: ' + row['SGTI'] + '\"') + closeLine
output.write(line)
line = triple(e3placeHolder,
cidocCoords.prefix + 'P2_has_type',
'\"' + row['STCC'] + '\"') + closeLine
output.write(line)
# E22 P65 E34
if (row['ISRI'] != ''):
line = triple(datplaceHolder,
cidocCoords.prefix + 'P56_bears_feature',
e25placeHolder) + closeLine
output.write(line)
line = triple(e25placeHolder,
nsCoords.prefix + 'type',
cidocCoords.prefix + 'E25_Man-Made_Feature') + closeLine
output.write(line)
line = triple(e25placeHolder,
schemaCoords.prefix + 'label',
'\"Iscrizione su ' + subj + '\"') + closeLine
output.write(line)
line = triple(e25placeHolder,
cidocCoords.prefix + 'P128_carries',
e34placeHolder) + closeLine
output.write(line)
line = triple(e34placeHolder,
nsCoords.prefix + 'type',
cidocCoords.prefix + 'E34_Inscription') + closeLine
output.write(line)
line = triple(e34placeHolder,
schemaCoords.prefix + 'label',
'\"Iscrizione: ' + row['ISRI'] + '\"') + closeLine
output.write(line)
pl = row['ISRI'].replace(' ', '-')
pla = pl.replace('.', '')
line = triple(e34placeHolder,
cidocCoords.prefix + 'P3_has_note',
'\"' + row['ISRI'] + '\"') + closeLine
output.write(line)
# E34 P2 E55
if (row['ISRT'] != ''):
rr = row['ISRT'].replace(' ', '')
line = triple(e34placeHolder,
cidocCoords.prefix + 'P2_has_type',
'\"' + row['ISRT'] + '\"') + closeLine
output.write(line)
# E34 P72 E56
if (row['ISRL'] != ''):
line = triple(e34placeHolder,
cidocCoords.prefix + 'P72_has_language',
museoCoords.prefix + '_' + row['ISRL']) + closeLine
output.write(line)
line = triple(museoCoords.prefix + '_' + row['ISRL'],
nsCoords.prefix + 'type',
cidocCoords.prefix + 'E56_Language') + closeLine
output.write(line)
line = triple(museoCoords.prefix + '_' + row['ISRL'],
schemaCoords.prefix + 'label',
'\"' + row['ISRL'] + '\"') + closeLine
output.write(line)
if row['ISRS'] != '':
line = triple(e34placeHolder,
cidocCoords.prefix + 'P92i_was_brought_into_existence_by',
e65placeHolder) + closeLine
output.write(line)
line = triple(e65placeHolder,
nsCoords.prefix + 'type',
cidocCoords.prefix + 'E65_Creation') + closeLine
output.write(line)
line = triple(e65placeHolder,
schemaCoords.prefix + 'label',
'\"Creazione dell\'Iscrizione ' + row['ISRI'] + '\"') + closeLine
output.write(line)
if row['ISRS']:
ss = row['ISRS'].replace(' ', '')
tecPlaceholder = museoCoords.prefix + url + '_' + ss
line = triple(e65placeHolder,
cidocCoords.prefix + 'P32_used_general_technique',
tecPlaceholder) + closeLine
output.write(line)
line = triple(tecPlaceholder,
nsCoords.prefix + 'type',
cidocCoords.prefix + 'E55_Type') + closeLine
output.write(line)
line = triple(tecPlaceholder,
schemaCoords.prefix + 'label',
'\"' + row['ISRS'] + '\"') + closeLine
output.write(line)
if row['ISRP'] != '':
line = triple(e25placeHolder,
cidocCoords.prefix + 'P3_has_note',
'\"' + row['ISRP'] + '\"^^xsd:string') + closeLine
output.write(line)
unit = ''
if (row['MISU'] != ''):
unit = row['MISU']
valueA = ''
valueL = ''
if (row['MISA'] != ''):
value = row['MISA']
valueA = value.replace(',', 'v')
if (row['MISL'] != ''):
value = row['MISL']
valueL = value.replace(',', 'v')
# Altezza
# E22 P43 E54
if (row['MISA'] != ''):
line = triple(datplaceHolder,
cidocCoords.prefix + 'P43_has_dimension',
museoCoords.prefix + url + '_Altezza') + closeLine
output.write(line)
line = triple(museoCoords.prefix + url + '_Altezza',
nsCoords.prefix + 'type',
cidocCoords.prefix + 'E54_Dimension') + closeLine
output.write(line)
line = triple(museoCoords.prefix + url + '_Altezza',
schemaCoords.prefix + 'label',
'\"Altezza: ' + row['MISA'] + row['MISU'] + '\"') + closeLine
output.write(line)
# E54 P90 E60
line = triple(museoCoords.prefix + url + '_Altezza',
cidocCoords.prefix + 'P90_has_value',
'\"' + row['MISA'] + '\"^^xsd:integer') + closeLine
output.write(line)
# E54 P2 E55
line = triple(museoCoords.prefix + url + '_Altezza',
cidocCoords.prefix + 'P2_has_type',
aatCoords.prefix + '300055644') + closeLine
output.write(line)
line = triple(aatCoords.prefix + '300055644',
schemaCoords.prefix + 'label',
'\"altezza\"') + closeLine
output.write(line)
# E54 P91 E58
if (row['MISU'] != ''):
line = triple(museoCoords.prefix + url + '_Altezza',
cidocCoords.prefix + 'P91_has_unit',
aatCoords.prefix + '300379098') + closeLine
output.write(line)
line = triple(aatCoords.prefix + '300379098',
nsCoords.prefix + 'type',
cidocCoords.prefix + 'E58_Measurement_Unit') + closeLine
output.write(line)
line = triple(aatCoords.prefix + '300379098',
schemaCoords.prefix + 'label',
'\"' + row['MISU'] + '\"') + closeLine
output.write(line)
# Larghezza
# E22 P43 E54
if (row['MISL'] != ''):
line = triple(datplaceHolder,
cidocCoords.prefix + 'P43_has_dimension',
museoCoords.prefix + url + '_Larghezza') + closeLine
output.write(line)
line = triple(museoCoords.prefix + url + '_Larghezza',
nsCoords.prefix + 'type',
cidocCoords.prefix + 'E54_Dimension') + closeLine
output.write(line)
line = triple(museoCoords.prefix + url + '_Larghezza',
schemaCoords.prefix + 'label',
'\"Larghezza: ' + row['MISL'] + row['MISU'] + '\"') + closeLine
output.write(line)
# E54 P90 E60
line = triple(museoCoords.prefix + url + '_Larghezza',
cidocCoords.prefix + 'P90_has_value',
'\"' + row['MISL'] + '\"^^xsd:integer') + closeLine
output.write(line)
# E54 P2 E55
line = triple(museoCoords.prefix + url + '_Larghezza',
cidocCoords.prefix + 'P2_has_type',
aatCoords.prefix + '300055647') + closeLine
output.write(line)
line = triple(aatCoords.prefix + '300055647',
schemaCoords.prefix + 'label',
'\"larghezza\"') + closeLine
output.write(line)
# E54 P91 E58
if (row['MISU'] != ''):
line = triple(museoCoords.prefix + url + '_Larghezza',
cidocCoords.prefix + 'P91_has_unit',
aatCoords.prefix + '300379098') + closeLine
output.write(line)
line = triple(aatCoords.prefix + '300379098',
nsCoords.prefix + 'type',
cidocCoords.prefix + 'E58_Measurement_Unit') + closeLine
output.write(line)
line = triple(aatCoords.prefix + '300379098',
schemaCoords.prefix + 'label',
'\"' + row['MISU'] + '\"') + closeLine
output.write(line)
if row['MTC'] != '':
mtcs = []
if '/' in row['MTC']:
mtcs = row['MTC'].split('/')
else:
mtcs.append(row['MTC'])
for tc in mtcs:
mtc = tc.lstrip()
el = get_elem(mtc)
if el[1] == 'MTC/M':
line = triple(datplaceHolder,
cidocCoords.prefix + 'P45_consists_of',
aatCoords.prefix + el[0]) + closeLine
output.write(line)
line = triple(aatCoords.prefix + el[0],
nsCoords.prefix + 'type',
cidocCoords.prefix + 'E57_Material') + closeLine
output.write(line)
line = triple(aatCoords.prefix + el[0],
schemaCoords.prefix + 'label',
'\"' + mtc + '\"') + closeLine
output.write(line)
else: #E12 Production - P32 used technique - E55 Type
line = triple(e12placeHolder,
cidocCoords.prefix + 'P32_used_general_technique',
aatCoords.prefix + el[0]) + closeLine
output.write(line)
if e12FplaceHolder != '':
line = triple(e12FplaceHolder,
cidocCoords.prefix + 'P32_used_general_technique',
aatCoords.prefix + el[0]) + closeLine
output.write(line)
line = triple(aatCoords.prefix + el[0],
nsCoords.prefix + 'type',
cidocCoords.prefix + 'E55_Type') + closeLine
output.write(line)
line = triple(aatCoords.prefix + el[0],
schemaCoords.prefix + 'label',
'\"' + mtc + '\"') + closeLine
output.write(line)
# E12 P140i E13
if row['AUTM'] != '':
mot = row['AUTM'].replace(' ', '_')
e55placeHolder = museoCoords.prefix + url + '_' + mot
line = triple(e12placeHolder,
cidocCoords.prefix + 'P140i_was_attributed_by',
e13placeHolder) + closeLine
output.write(line)
line = triple(e13placeHolder,
nsCoords.prefix + 'type',
cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine
output.write(line)
line = triple(e13placeHolder,
schemaCoords.prefix + 'label',
'\"Motivazione attribuzione\"') + closeLine
output.write(line)
line = triple(e13placeHolder,
cidocCoords.prefix + 'P2_has_type',
'\"' + row['AUTM'] + '\"') + closeLine
output.write(line)
aut = get_aut_url(row['AUTH'])
aut_url = aut[0]
AuthorPlaceholder = autCoords.prefix + aut_url
line = triple(e13placeHolder,
cidocCoords.prefix + 'P141_assigned',
AuthorPlaceholder) + closeLine
output.write(line)
# E22 P44 E62
if row['NSC'] != '':
ph = row['NSC'].replace(' "', ' «')
phr = ph.replace('"', '»')
line = triple(datplaceHolder,
cidocCoords.prefix + 'P3_has_note',
'\"' + phr + '\"^^xsd:string') + closeLine
output.write(line)
iconclass = row['DESI']
icon = iconclass.replace(' ', '')
list_icon = []
if ':' in icon:
list_icon = icon.split(':')
else:
list_icon.append(icon)
for ic in list_icon:
url = 'http://iconclass.org/rdk/' + str(ic)
html = urlopen(url).read()
soup = BeautifulSoup(html, 'html.parser')
# kill all script and style elements
for script in soup(["script", "style"]):
script.extract() # rip it out
# get text
text = soup.get_text()
pretty = soup.prettify()
ff = soup.find("div", {"id": "ic_current"})
dd = ff.find("a", {"class", "ic_notation"})
ss = dd.text
x = ss.find(' ')
icon_label = ss[x + 1:]
ur = ic.replace("(", "%28")
urr = ur.replace(")", "%29")
line = triple(datplaceHolder,
cidocCoords.prefix + 'P62_depicts',
iconCoords.prefix + urr) + closeLine
output.write(line)
line = triple(iconCoords.prefix + urr,
nsCoords.prefix + 'type',
cidocCoords.prefix + 'E1_CRM_Entity') + closeLine
output.write(line)
# P2 Opera d'arte
line = triple(datplaceHolder,
cidocCoords.prefix + 'P2_has_type',
'\"Opera d\'Arte\"') + closeLine
output.write(line)
output.write('\n')
#
#
# Limit number of entries processed (if desired)
if (ii > max_entries):
break