12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103 |
- # Utilities to read/write csv files
- import csv
- # Utilities to handle character encodings
- import unicodedata
- # Ordered Dicts
- from collections import OrderedDict
- from urllib.request import urlopen
- from bs4 import BeautifulSoup
- import json
- # OPZIONAL IMPORTS
- # For timestamping/simple speed tests
- from datetime import datetime
- # Random number generator
- from random import *
- # System & command line utilities
- import sys
- # Json for the dictionary
- import json
- import_dir = '/Users/alessiaspadi/Documents/RESTORE/temp_MPP/tabelle/Ospedale/mod/'
- export_dir = '/Users/alessiaspadi/Documents/RESTORE/temp_MPP/tabelle/Carica/'
- # Custom class to store URIs + related infos for the ontologies/repositories
- class RDFcoords:
- def __init__(self, uri, prefix, code=None):
- self.uri = uri
- self.prefix = prefix
- self.code = code
- # Repositories
- museoCoords = RDFcoords('<http://palazzopretorio.comune.prato.it/it/le-opere/alcuni-capolavori/>', 'mpp:')
- autCoords = RDFcoords('<http://palazzopretorio.comune.prato.it/it/opere/autori/>', 'aut:')
- cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')
- aatCoords = RDFcoords('<http://vocab.getty.edu/aat/>', 'aat:')
- nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')
- schemaCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')
- xsdCoords = RDFcoords('<http://www.w3.org/2001/XMLSchema#>', 'xsd:')
- iconCoords = RDFcoords('<http://iconclass.org/>', 'ico:')
- # Basic functions for triples / shortened triples in TTL format
- def triple(subject, predicate, object1):
- line = subject + ' ' + predicate + ' ' + object1
- return line
- def doublet(predicate, object1):
- line = ' ' + predicate + ' ' + object1
- return line
- def singlet(object1):
- line = ' ' + object1
- return line
- # Line endings in TTL format
- continueLine1 = ' ;\n'
- continueLine2 = ' ,\n'
- closeLine = ' .\n'
- def writeTTLHeader(output):
- output.write('@prefix ' + museoCoords.prefix + ' ' + museoCoords.uri + closeLine)
- output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)
- output.write('@prefix ' + aatCoords.prefix + ' ' + aatCoords.uri + closeLine)
- output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)
- output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)
- output.write('@prefix ' + autCoords.prefix + ' ' + autCoords.uri + closeLine)
- output.write('@prefix ' + xsdCoords.prefix + ' ' + xsdCoords.uri + closeLine)
- output.write('@prefix ' + iconCoords.prefix + ' ' + iconCoords.uri + closeLine)
- output.write('\n')
- filePrefix = 'SR20OA_'
- fileType = 'Ospedale'
- max_entries = 1000000000
- def get_aut_url(code):
- aut_file = open(import_dir + 'AR20AUT_' + fileType + '.csv', newline="")
- reader = csv.DictReader(aut_file)
- for row in reader:
- role = ''
- if row['AUTQ'] != '':
- role = row['AUTQ']
- else:
- role = ''
- if row['AUTH'] == code:
- return [row['URL'], role]
- def get_role(role):
- role_file = open('/Users/alessiaspadi/Documents/RESTORE/temp_MPP/tabelle/AAT_RUOLI.csv', newline="")
- reader = csv.DictReader(role_file)
- for row in reader:
- if row['Label'] == role:
- return row['AAT']
- def get_elem(mtc):
- mtc_file = open('/Users/alessiaspadi/Documents/RESTORE/temp_MPP/tabelle/AAT_MTC.csv', newline="")
- reader = csv.DictReader(mtc_file)
- for row in reader:
- if row['MTC'] == mtc:
- return [row['AAT'], row['Type']]
- with open(import_dir + filePrefix + fileType + '.csv', newline="") as csv_file, open(
- export_dir + filePrefix + fileType + '.ttl', 'w') as output:
- reader = csv.DictReader(csv_file)
- writeTTLHeader(output)
- first = True
- ii = 0
- for row in reader:
- # The index ii is used to process a limited number of entries for testing purposes
- ii = ii + 1
- if row['RVEL'] == '' or row['RVEL'] == '0':
- sb = ''
- subj = ''
- pp = row['OGTD'] + ' (' + row['ACC'] + ') '
- if row['SGTI'] != '':
- sb = pp + row['SGTI']
- if row['LDCN'] != '':
- subj = sb + ' in ' + row['LDCN']
- else:
- subj = sb
- # Triplify the 'codice' -- should exist for every entry
- codice = ''
- if (row['NCTR'] != '' and row['NCTN'] != ''):
- codice = row['NCTR'] + row['NCTN']
- codiceP = ''
- if (row['AUTH'] != ''):
- codiceP = row['AUTH']
- place = ''
- if (row['PRVC'] != ''):
- place = row['PRVC']
- columnName = list(row)
- url = row['URL']
- # placeHolders
- datplaceHolder = museoCoords.prefix + url
- e1placeHolder = museoCoords.prefix + url + '_E1'
- e3placeHolder = museoCoords.prefix + url + 'E3'
- e10placeHolder = museoCoords.prefix + url + '_E10'
- e12placeHolder = museoCoords.prefix + url + '_E12'
- e13placeHolder = museoCoords.prefix + url + '_E13'
- e21placeHolder = museoCoords.prefix + url + '_InE21'
- e25placeHolder = museoCoords.prefix + url + '_E25'
- e34placeHolder = museoCoords.prefix + url + '_E34'
- e35placeHolder1 = museoCoords.prefix + url + '_E35'
- e42placeHolder = museoCoords.prefix + url + '_E42'
- e42CplaceHolder = museoCoords.prefix + url + '_E42_MPP'
- e65placeHolder = museoCoords.prefix + url + '_InE65'
- e73placeHolder = museoCoords.prefix + url + '_E73'
- e74placeHolder = museoCoords.prefix + url + '_E74'
- if (codice != ''):
- line = triple(datplaceHolder, cidocCoords.prefix + 'P1_is_identified_by', e42placeHolder) + closeLine
- output.write(line)
- line = triple(e42placeHolder, nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E42_Identifier') + closeLine
- output.write(line)
- line = triple(e42placeHolder,
- schemaCoords.prefix + 'label',
- '\"' + codice + '\"') + closeLine
- output.write(line)
- ###
- line = triple(e42placeHolder,
- cidocCoords.prefix + 'P2_has_type',
- '\"Codice univoco del bene (NCT)\"') + closeLine
- output.write(line)
- # Write E22 Man Made Object & E73 Information Object -- should exist for every entry?
- line = triple(datplaceHolder, nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E22_Man-Made_Object') + closeLine
- output.write(line)
- # Added by AS
- line = triple(datplaceHolder, schemaCoords.prefix + 'label', '\"' + subj + '\"') + closeLine
- output.write(line)
- # End AS
- line = triple(datplaceHolder, cidocCoords.prefix + 'P128_carries', e73placeHolder) + closeLine
- output.write(line)
- line = triple(e73placeHolder, nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E73_Information_Object') + closeLine
- output.write(line)
- # AS
- ss = ''
- if row['SGTI'] != '':
- ss = row['SGTI']
- else:
- ss = 'senza titolo'
- line = triple(e73placeHolder, schemaCoords.prefix + 'label',
- '\"Opera d\'arte raffigurante ' + ss + '\"') + closeLine
- output.write(line)
- # E73 - P2 - E55
- tt = ''
- typeLabel = ''
- if row['OGTD'] == 'dipinto':
- tt = aatCoords.prefix + "300033618"
- elif row['OGTD'] == 'rilievo':
- tt = aatCoords.prefix + "300047230"
- elif row['OGTD'] == 'polittico':
- tt = aatCoords.prefix + "300178235"
- elif row['OGTD'] == 'predella':
- tt = aatCoords.prefix + "300003745"
- line = triple(e73placeHolder,
- cidocCoords.prefix + 'P2_has_type',
- tt) + closeLine
- output.write(line)
- line = triple(tt, schemaCoords.prefix + 'label',
- '\"' + row['OGTD'] + '\"') + closeLine
- output.write(line)
- # E73 - P1 - E35
- if row['SGTT'] != '':
- line = triple(e73placeHolder, cidocCoords.prefix + 'P1_is_identified_by', e35placeHolder1) + closeLine
- output.write(line)
- line = triple(e35placeHolder1, nsCoords.prefix + 'type', cidocCoords.prefix + 'E35_Title') + closeLine
- output.write(line)
- line = triple(e35placeHolder1, schemaCoords.prefix + 'label', '\"' + row['SGTT'] + '\"') + closeLine
- output.write(line)
- # E22 - P62 - E1
- if row['SGTI'] != '':
- line = triple(datplaceHolder,
- cidocCoords.prefix + 'P62_depicts',
- e1placeHolder) + closeLine
- output.write(line)
- line = triple(e1placeHolder,
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E1_CRM_Entity') + closeLine
- output.write(line)
- line = triple(e1placeHolder,
- schemaCoords.prefix + 'label', '\"' +
- row['SGTI'] + '\"') + closeLine
- output.write(line)
- line = triple(e1placeHolder,
- cidocCoords.prefix + 'P2_has_type',
- '\"Identificazione Iconografica\"') + closeLine
- output.write(line)
- # Attention: these triples are identified only for C100005 Museo di Palazzo Pretorio
- if row['ESC'] == 'C100005':
- line = triple(datplaceHolder,
- cidocCoords.prefix + 'P52_has_current_owner',
- '<http://palazzopretorio.comune.prato.it/it/>') + closeLine
- output.write(line)
- line = triple('<http://palazzopretorio.comune.prato.it/it/>',
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E74_Group') + closeLine
- output.write(line)
- line = triple('<http://palazzopretorio.comune.prato.it/it/>',
- schemaCoords.prefix + 'label',
- '\"Museo di Palazzo Pretorio\"') + closeLine
- output.write(line)
- currentLocation = ''
- # E22 - P54 - E53
- if row['LDCN'] != '':
- if row['LDCS'] != '':
- currentLocation = row['LDCS']
- else:
- currentLocation = currentLocation
- if row['LDCM'] != '':
- currentLocation = currentLocation + ', ' + row['LDCM']
- else:
- currentLocation = currentLocation
- if row['LDCN'] != '':
- currentLocation = currentLocation + ', ' + row['LDCN']
- else:
- currentLocation = currentLocation
- currentLocation = currentLocation + ', ' + row['PVCC'] + ' (' + row['PVCP'] + ')'
- line = triple(datplaceHolder, cidocCoords.prefix + 'P54_has_current_permanent_location',
- '\"' + currentLocation + '\"') + closeLine
- output.write(line)
- e12FplaceHolder = ''
- if row['DTSI'] != row['DTSF']:
- e12FplaceHolder = museoCoords.prefix + url + '_E12F'
- # Write E12 Production -- should exist for every entry?
- # E12 P108 E22
- line = triple(e12placeHolder, cidocCoords.prefix + 'P108_has_produced', datplaceHolder) + closeLine
- output.write(line)
- line = triple(e12placeHolder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E12_Production') + closeLine
- output.write(line)
- # E73 P108i E12
- line = triple(e73placeHolder, cidocCoords.prefix + 'P108i_was_produced_by', e12placeHolder) + closeLine
- output.write(line)
- if e12FplaceHolder != '':
- line = triple(e12FplaceHolder, cidocCoords.prefix + 'P108_has_produced', datplaceHolder) + closeLine
- output.write(line)
- line = triple(e12FplaceHolder, nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E12_Production') + closeLine
- output.write(line)
- line = triple(e12FplaceHolder, schemaCoords.prefix + 'label',
- '\"Fine produzione di ' + row['SGTI'] + '\"') + closeLine
- output.write(line)
- # E73 P108i E12
- line = triple(e73placeHolder, cidocCoords.prefix + 'P108i_was_produced_by', e12FplaceHolder) + closeLine
- output.write(line)
- # E12 P140i E13
- line = triple(e12FplaceHolder,
- cidocCoords.prefix + 'P140i_was_attributed_by',
- e13placeHolder) + closeLine
- output.write(line)
- # E12 P2
- line = triple(e12FplaceHolder,
- cidocCoords.prefix + 'P2_has_type',
- '\"Fine\"^^xsd:string') + closeLine
- output.write(line)
- line = triple(e12placeHolder,
- cidocCoords.prefix + 'P2_has_type',
- '\"Inizio\"^^xsd:string') + closeLine
- output.write(line)
- line = triple(e12placeHolder, schemaCoords.prefix + 'label',
- '\"Inizio produzione di ' + row['SGTI'] + '\"') + closeLine
- output.write(line)
- else:
- line = triple(e12placeHolder, schemaCoords.prefix + 'label',
- '\"Produzione di ' + row['SGTI'] + '\"') + closeLine
- output.write(line)
- tcl = []
- for name in columnName:
- if 'TCL' in name:
- tcl.append(name)
- # E12 - P7 - E53
- for el in tcl:
- i = 0
- if row[el] == 'luogo di produzione':
- pl = ''
- if i == 0:
- pl = row['PRVC']
- else:
- pl = row['PRVC' + i]
- line = triple(e12placeHolder,
- cidocCoords.prefix + 'P7_took_place_at',
- museoCoords.prefix + pl) + closeLine
- output.write(line)
- if e12FplaceHolder != '':
- line = triple(e12FplaceHolder,
- cidocCoords.prefix + 'P7_took_place_at',
- museoCoords.prefix + pl) + closeLine
- output.write(line)
- i = i + 1
- # E12 - PC14 - E21
- if row['AUTH'] != '':
- aut = get_aut_url(row['AUTH'])
- aut_url = aut[0]
- aut_role = aut[1]
- ll = row['AUTN'] + '_' + aut_role
- lab = ll.replace(' ', '')
- label = lab.replace(',', '')
- AuthorPlaceholder = autCoords.prefix + aut_url
- line = triple(museoCoords.prefix + '_' + label,
- cidocCoords.prefix + 'P01_has_domain',
- e12placeHolder) + closeLine
- output.write(line)
- if e12FplaceHolder != '':
- line = triple(museoCoords.prefix + '_' + label,
- cidocCoords.prefix + 'P01_has_domain',
- e12FplaceHolder) + closeLine
- output.write(line)
- if 'AUTH1' in columnName:
- if row['AUTH1'] != '':
- aut = get_aut_url(row['AUTH1'])
- aut_url = aut[0]
- aut_role = aut[1]
- ll = row['AUTN1'] + '_' + aut_role
- lab = ll.replace(' ', '')
- label = lab.replace(',', '')
- AuthorPlaceholder = autCoords.prefix + aut_url
- line = triple(museoCoords.prefix + '_' + label,
- cidocCoords.prefix + 'P01_has_domain',
- e12placeHolder) + closeLine
- output.write(line)
- if e12FplaceHolder != '':
- line = triple(museoCoords.prefix + '_' + label,
- cidocCoords.prefix + 'P01_has_domain',
- e12FplaceHolder) + closeLine
- output.write(line)
- # E12 - PC14 - E21
- if 'CMMN' in columnName:
- if row['CMMN'] != '':
- cc = row['CMMN']
- cm = cc.replace(' ', '')
- cmmn = cm.replace(',', '')
- cmmPlaceholder = museoCoords.prefix + '_' + cmmn
- line = triple(museoCoords.prefix + '_commit_' + cmmn,
- cidocCoords.prefix + 'P01_has_domain',
- e12placeHolder) + closeLine
- output.write(line)
- if e12FplaceHolder != '':
- line = triple(museoCoords.prefix + '_commit_' + cmmn,
- cidocCoords.prefix + 'P01_has_domain',
- e12FplaceHolder) + closeLine
- output.write(line)
- line = triple(museoCoords.prefix + '_commit_' + cmmn,
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'PC14_carried_out_by') + closeLine
- output.write(line)
- line = triple(museoCoords.prefix + '_commit_' + cmmn,
- schemaCoords.prefix + 'label',
- '\"' + row['CMMN'] + ' nel ruolo di committente\"') + closeLine
- output.write(line)
- line = triple(museoCoords.prefix + '_commit_' + cmmn,
- cidocCoords.prefix + 'P02_has_range',
- cmmPlaceholder) + closeLine
- output.write(line)
- line = triple(cmmPlaceholder,
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E39_Actor') + closeLine
- output.write(line)
- line = triple(cmmPlaceholder,
- schemaCoords.prefix + 'label',
- '\"' + row['CMMN'] + '\"') + closeLine
- output.write(line)
- line = triple(museoCoords.prefix + '_commit_' + cmmn,
- cidocCoords.prefix + 'P14.1_in_the_role_of',
- museoCoords.prefix + '_client') + closeLine
- output.write(line)
- line = triple(museoCoords.prefix + '_client',
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E55_Type') + closeLine
- output.write(line)
- line = triple(museoCoords.prefix + '_client',
- schemaCoords.prefix + 'label',
- '\"Committente\"') + closeLine
- output.write(line)
- # E12 - P4 - E52
- if row['DTSI'] != '':
- line = triple(e12placeHolder,
- cidocCoords.prefix + 'P4_has_time-span',
- museoCoords.prefix + row['DTSI']) + closeLine
- output.write(line)
- line = triple(museoCoords.prefix + row['DTSI'],
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E52_Time-Span') + closeLine
- output.write(line)
- line = triple(museoCoords.prefix + row['DTSI'],
- schemaCoords.prefix + 'label',
- '\"' + row['DTSI'] + '\"') + closeLine
- output.write(line)
- if e12FplaceHolder != '':
- line = triple(e12FplaceHolder,
- cidocCoords.prefix + 'P4_has_time-span',
- museoCoords.prefix + row['DTSF']) + closeLine
- output.write(line)
- line = triple(museoCoords.prefix + row['DTSF'],
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E52_Time-Span') + closeLine
- output.write(line)
- line = triple(museoCoords.prefix + row['DTSF'],
- schemaCoords.prefix + 'label',
- '\"' + row['DTSF'] + '\"') + closeLine
- output.write(line)
- tcl = []
- for name in columnName:
- if 'TCL' in name:
- tcl.append(name)
- j = 0
- for el in tcl:
- if row[el] != '':
- j = j + 1
- last = str(j - 1)
- n = len(tcl) - 1
- for i in range(n):
- k = str(i + 1)
- if i + 1 == 1:
- w = ''
- else:
- w = i
- f = str(w)
- if row['TCL' + k] != '':
- pastActor = ''
- newActor = ''
- pl = ''
- if row['PRCD' + k] != '':
- newActor = ' a ' + row['PRCD' + k]
- if row['PRCD' + f] != '':
- pastActor = ' da ' + row['PRCD' + f]
- pl = row['PRCD' + f].replace(' ', '')
- newe10placeHolder = museoCoords.prefix + url + "_E10_" + k
- line = triple(newe10placeHolder,
- cidocCoords.prefix + 'P30_transferred_custody_of',
- datplaceHolder) + closeLine
- output.write(line)
- line = triple(newe10placeHolder,
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E10_Transfer_of_Custody') + closeLine
- output.write(line)
- line = triple(newe10placeHolder,
- schemaCoords.prefix + 'label',
- '\"Passaggio di ' + row['SGTI'] + pastActor +
- newActor + '\"') + closeLine
- output.write(line)
- if row['PRDI' + f] != '':
- timespan = row['PRDI' + f]
- tt = timespan.replace(' ', '')
- tp = tt.replace('.', '')
- ts = tp.replace('/', '')
- timespanPlaceholder = museoCoords.prefix + '_' + ts
- # E10 P4 E52
- line = triple(newe10placeHolder,
- cidocCoords.prefix + 'P4_has_time-span',
- timespanPlaceholder) + closeLine
- output.write(line)
- line = triple(timespanPlaceholder,
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E52_Time-Span') + closeLine
- output.write(line)
- line = triple(timespanPlaceholder,
- schemaCoords.prefix + 'label',
- '\"' + timespan + '\"') + closeLine
- output.write(line)
- pastActorPlaceholder = museoCoords.prefix + '_' + pl
- newLoc = row['PRCD' + k].replace(' ', '')
- newActorPlaceholder = museoCoords.prefix + '_' + newLoc
- # E10 P26 E74 (moved to)
- if newActorPlaceholder != '':
- line = triple(newe10placeHolder,
- cidocCoords.prefix + 'P29_custody_received_by',
- newActorPlaceholder) + closeLine
- output.write(line)
- # E10 P27 E74
- pastActorLabel = row['PRCD' + f]
- line = triple(newe10placeHolder,
- cidocCoords.prefix + 'P28_custody_surrendered_by',
- pastActorPlaceholder) + closeLine
- output.write(line)
- line = triple(pastActorPlaceholder,
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E39_Actor') + closeLine
- output.write(line)
- line = triple(pastActorPlaceholder,
- schemaCoords.prefix + 'label',
- '\"' + pastActorLabel + '\"') + closeLine
- output.write(line)
- line = triple(datplaceHolder,
- cidocCoords.prefix + 'P49_has_former_or_current_keeper',
- pastActorPlaceholder) + closeLine
- output.write(line)
- # E74 P74 E53
- pastResidenceLabel = row['PRVC' + f]
- pastResidencePlaceHolder = museoCoords.prefix + row['PRVC' + f]
- line = triple(pastActorPlaceholder,
- cidocCoords.prefix + 'P74_has_current_or_former_residence',
- pastResidencePlaceHolder) + closeLine
- output.write(line)
- ####
- pastActor = ''
- newActor = ''
- pl = ''
- if row['LDCN'] != '':
- newActor = ' a ' + row['LDCN']
- if row['PRCD' + last] != '':
- pastActor = ' da ' + row['PRCD' + last]
- pl = row['PRCD' + last].replace(' ', '')
- line = triple(e10placeHolder,
- cidocCoords.prefix + 'P30_transferred_custody_of',
- datplaceHolder) + closeLine
- output.write(line)
- line = triple(e10placeHolder,
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E10_Transfer_of_Custody') + closeLine
- output.write(line)
- line = triple(e10placeHolder,
- schemaCoords.prefix + 'label',
- '\"Passaggio di ' + row['SGTI'] + pastActor +
- newActor + '\"') + closeLine
- output.write(line)
- if row['PRDU' + last] != '':
- timespan = row['PRDU' + last]
- tt = timespan.replace(' ', '')
- ts = tt.replace('/', '')
- timespanPlaceholder = museoCoords.prefix + '_' + ts
- # E10 P4 E52
- line = triple(e10placeHolder,
- cidocCoords.prefix + 'P4_has_time-span',
- timespanPlaceholder) + closeLine
- output.write(line)
- line = triple(timespanPlaceholder,
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E52_Time-Span') + closeLine
- output.write(line)
- line = triple(timespanPlaceholder,
- schemaCoords.prefix + 'label',
- '\"' + timespan + '\"') + closeLine
- output.write(line)
- pastActorPlaceholder = museoCoords.prefix + '_' + pl
- newLocPlaceholder = e74placeHolder
- # E10 P26 E74 (moved to)
- if newLocPlaceholder != '':
- line = triple(e10placeHolder,
- cidocCoords.prefix + 'P29_custody_received_by',
- newLocPlaceholder) + closeLine
- output.write(line)
- # E10 P27 E74
- pastActorLabel = row['PRCD' + last]
- line = triple(e10placeHolder,
- cidocCoords.prefix + 'P28_custody_surrendered_by',
- pastActorPlaceholder) + closeLine
- output.write(line)
- line = triple(pastActorPlaceholder,
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E39_Actor') + closeLine
- output.write(line)
- line = triple(pastActorPlaceholder,
- schemaCoords.prefix + 'label',
- '\"' + pastActorLabel + '\"') + closeLine
- output.write(line)
- line = triple(datplaceHolder,
- cidocCoords.prefix + 'P49_has_former_or_current_keeper',
- pastActorPlaceholder) + closeLine
- output.write(line)
- # E74 P74 E53
- pastResidenceLabel = row['PRVC' + last]
- pastResidencePlaceHolder = museoCoords.prefix + row['PRVC' + last]
- if row['PRVP' + last] != '':
- pastResidenceLabel = pastResidenceLabel + ' (' + row['PRVP' + last] + ')'
- if row['PRVR' + last] != '':
- pastResidenceLabel = pastResidenceLabel + ', ' + row['PRVR' + last]
- if row['PRVS' + last] != '':
- pastResidenceLabel = pastResidenceLabel + ', ' + row['PRVS' + last]
- line = triple(pastActorPlaceholder,
- cidocCoords.prefix + 'P74_has_current_or_former_residence',
- pastResidencePlaceHolder) + closeLine
- output.write(line)
- line = triple(pastResidencePlaceHolder,
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E53_Place') + closeLine
- output.write(line)
- # E22 P44 E3
- if row['STCC'] != '':
- line = triple(datplaceHolder,
- cidocCoords.prefix + 'P44_has_condition',
- e3placeHolder) + closeLine
- output.write(line)
- line = triple(e3placeHolder,
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E3_Condition_State') + closeLine
- output.write(line)
- line = triple(e3placeHolder,
- schemaCoords.prefix + 'label',
- '\"Condizione di: ' + row['SGTI'] + '\"') + closeLine
- output.write(line)
- line = triple(e3placeHolder,
- cidocCoords.prefix + 'P2_has_type',
- '\"' + row['STCC'] + '\"') + closeLine
- output.write(line)
- # E22 P65 E34
- if (row['ISRI'] != ''):
- line = triple(datplaceHolder,
- cidocCoords.prefix + 'P56_bears_feature',
- e25placeHolder) + closeLine
- output.write(line)
- line = triple(e25placeHolder,
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E25_Man-Made_Feature') + closeLine
- output.write(line)
- line = triple(e25placeHolder,
- schemaCoords.prefix + 'label',
- '\"Iscrizione su ' + subj + '\"') + closeLine
- output.write(line)
- line = triple(e25placeHolder,
- cidocCoords.prefix + 'P128_carries',
- e34placeHolder) + closeLine
- output.write(line)
- line = triple(e34placeHolder,
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E34_Inscription') + closeLine
- output.write(line)
- line = triple(e34placeHolder,
- schemaCoords.prefix + 'label',
- '\"Iscrizione: ' + row['ISRI'] + '\"') + closeLine
- output.write(line)
- pl = row['ISRI'].replace(' ', '-')
- pla = pl.replace('.', '')
- line = triple(e34placeHolder,
- cidocCoords.prefix + 'P3_has_note',
- '\"' + row['ISRI'] + '\"') + closeLine
- output.write(line)
- # E34 P2 E55
- if (row['ISRT'] != ''):
- rr = row['ISRT'].replace(' ', '')
- line = triple(e34placeHolder,
- cidocCoords.prefix + 'P2_has_type',
- '\"' + row['ISRT'] + '\"') + closeLine
- output.write(line)
- # E34 P72 E56
- if (row['ISRL'] != ''):
- line = triple(e34placeHolder,
- cidocCoords.prefix + 'P72_has_language',
- museoCoords.prefix + '_' + row['ISRL']) + closeLine
- output.write(line)
- line = triple(museoCoords.prefix + '_' + row['ISRL'],
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E56_Language') + closeLine
- output.write(line)
- line = triple(museoCoords.prefix + '_' + row['ISRL'],
- schemaCoords.prefix + 'label',
- '\"' + row['ISRL'] + '\"') + closeLine
- output.write(line)
- if (row['ISRA'] != '') or (row['ISRS'] != ''):
- line = triple(e34placeHolder,
- cidocCoords.prefix + 'P92i_was_brought_into_existence_by',
- e65placeHolder) + closeLine
- output.write(line)
- line = triple(e65placeHolder,
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E65_Creation') + closeLine
- output.write(line)
- line = triple(e65placeHolder,
- schemaCoords.prefix + 'label',
- '\"Creazione dell\'Iscrizione ' + row['ISRI'] + '\"') + closeLine
- output.write(line)
- if row['ISRA'] != '':
- line = triple(e65placeHolder,
- cidocCoords.prefix + 'P14_carried_out_by',
- e21placeHolder) + closeLine
- output.write(line)
- line = triple(e21placeHolder,
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E21_Person') + closeLine
- output.write(line)
- line = triple(e21placeHolder,
- schemaCoords.prefix + 'label',
- '\"' + row['ISRA'] + '\"') + closeLine
- output.write(line)
- if row['ISRS']:
- ss = row['ISRS'].replace(' ', '')
- tecPlaceholder = museoCoords.prefix + url + '_' + ss
- line = triple(e65placeHolder,
- cidocCoords.prefix + 'P32_used_general_technique',
- tecPlaceholder) + closeLine
- output.write(line)
- line = triple(tecPlaceholder,
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E55_Type') + closeLine
- output.write(line)
- line = triple(tecPlaceholder,
- schemaCoords.prefix + 'label',
- '\"' + row['ISRS'] + '\"') + closeLine
- output.write(line)
- if row['ISRP'] != '':
- line = triple(e25placeHolder,
- cidocCoords.prefix + 'P3_has_note',
- '\"' + row['ISRP'] + '\"^^xsd:string') + closeLine
- output.write(line)
- unit = ''
- if (row['MISU'] != ''):
- unit = row['MISU']
- valueA = ''
- valueL = ''
- if (row['MISA'] != ''):
- value = row['MISA']
- valueA = value.replace(',', 'v')
- if (row['MISL'] != ''):
- value = row['MISL']
- valueL = value.replace(',', 'v')
- # Altezza
- # E22 P43 E54
- if (row['MISA'] != ''):
- line = triple(datplaceHolder,
- cidocCoords.prefix + 'P43_has_dimension',
- museoCoords.prefix + url + '_Altezza') + closeLine
- output.write(line)
- line = triple(museoCoords.prefix + url + '_Altezza',
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E54_Dimension') + closeLine
- output.write(line)
- line = triple(museoCoords.prefix + url + '_Altezza',
- schemaCoords.prefix + 'label',
- '\"Altezza: ' + row['MISA'] + row['MISU'] + '\"') + closeLine
- output.write(line)
- # E54 P90 E60
- line = triple(museoCoords.prefix + url + '_Altezza',
- cidocCoords.prefix + 'P90_has_value',
- '\"' + row['MISA'] + '\"^^xsd:integer') + closeLine
- output.write(line)
- # E54 P2 E55
- line = triple(museoCoords.prefix + url + '_Altezza',
- cidocCoords.prefix + 'P2_has_type',
- aatCoords.prefix + '300055644') + closeLine
- output.write(line)
- line = triple(aatCoords.prefix + '300055644',
- schemaCoords.prefix + 'label',
- '\"altezza\"') + closeLine
- output.write(line)
- # E54 P91 E58
- if (row['MISU'] != ''):
- line = triple(museoCoords.prefix + url + '_Altezza',
- cidocCoords.prefix + 'P91_has_unit',
- aatCoords.prefix + '300379098') + closeLine
- output.write(line)
- line = triple(aatCoords.prefix + '300379098',
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E58_Measurement_Unit') + closeLine
- output.write(line)
- line = triple(aatCoords.prefix + '300379098',
- schemaCoords.prefix + 'label',
- '\"' + row['MISU'] + '\"') + closeLine
- output.write(line)
- # Larghezza
- # E22 P43 E54
- if (row['MISL'] != ''):
- line = triple(datplaceHolder,
- cidocCoords.prefix + 'P43_has_dimension',
- museoCoords.prefix + url + '_Larghezza') + closeLine
- output.write(line)
- line = triple(museoCoords.prefix + url + '_Larghezza',
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E54_Dimension') + closeLine
- output.write(line)
- line = triple(museoCoords.prefix + url + '_Larghezza',
- schemaCoords.prefix + 'label',
- '\"Larghezza: ' + row['MISL'] + row['MISU'] + '\"') + closeLine
- output.write(line)
- # E54 P90 E60
- line = triple(museoCoords.prefix + url + '_Larghezza',
- cidocCoords.prefix + 'P90_has_value',
- '\"' + row['MISL'] + '\"^^xsd:integer') + closeLine
- output.write(line)
- # E54 P2 E55
- line = triple(museoCoords.prefix + url + '_Larghezza',
- cidocCoords.prefix + 'P2_has_type',
- aatCoords.prefix + '300055647') + closeLine
- output.write(line)
- line = triple(aatCoords.prefix + '300055647',
- schemaCoords.prefix + 'label',
- '\"larghezza\"') + closeLine
- output.write(line)
- # E54 P91 E58
- if (row['MISU'] != ''):
- line = triple(museoCoords.prefix + url + '_Larghezza',
- cidocCoords.prefix + 'P91_has_unit',
- aatCoords.prefix + '300379098') + closeLine
- output.write(line)
- line = triple(aatCoords.prefix + '300379098',
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E58_Measurement_Unit') + closeLine
- output.write(line)
- line = triple(aatCoords.prefix + '300379098',
- schemaCoords.prefix + 'label',
- '\"' + row['MISU'] + '\"') + closeLine
- output.write(line)
- if row['MTC'] != '':
- mtcs = []
- if '/' in row['MTC']:
- mtcs = row['MTC'].split('/')
- else:
- mtcs.append(row['MTC'])
- for tc in mtcs:
- mtc = tc.lstrip()
- el = get_elem(mtc)
- if el[1] == 'MTC/M':
- line = triple(datplaceHolder,
- cidocCoords.prefix + 'P45_consists_of',
- aatCoords.prefix + el[0]) + closeLine
- output.write(line)
- line = triple(aatCoords.prefix + el[0],
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E57_Material') + closeLine
- output.write(line)
- line = triple(aatCoords.prefix + el[0],
- schemaCoords.prefix + 'label',
- '\"' + mtc + '\"') + closeLine
- output.write(line)
- else: #E12 Production - P32 used technique - E55 Type
- line = triple(e12placeHolder,
- cidocCoords.prefix + 'P32_used_general_technique',
- aatCoords.prefix + el[0]) + closeLine
- output.write(line)
- if e12FplaceHolder != '':
- line = triple(e12FplaceHolder,
- cidocCoords.prefix + 'P32_used_general_technique',
- aatCoords.prefix + el[0]) + closeLine
- output.write(line)
- line = triple(aatCoords.prefix + el[0],
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E55_Type') + closeLine
- output.write(line)
- line = triple(aatCoords.prefix + el[0],
- schemaCoords.prefix + 'label',
- '\"' + mtc + '\"') + closeLine
- output.write(line)
- # E12 P140i E13
- if row['AUTM'] != '':
- mot = row['AUTM'].replace(' ', '_')
- e55placeHolder = museoCoords.prefix + url + '_' + mot
- line = triple(e12placeHolder,
- cidocCoords.prefix + 'P140i_was_attributed_by',
- e13placeHolder) + closeLine
- output.write(line)
- line = triple(e13placeHolder,
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine
- output.write(line)
- line = triple(e13placeHolder,
- schemaCoords.prefix + 'label',
- '\"Motivazione attribuzione\"') + closeLine
- output.write(line)
- line = triple(e13placeHolder,
- cidocCoords.prefix + 'P2_has_type',
- '\"' + row['AUTM'] + '\"') + closeLine
- output.write(line)
- aut = get_aut_url(row['AUTH'])
- aut_url = aut[0]
- AuthorPlaceholder = autCoords.prefix + aut_url
- line = triple(e13placeHolder,
- cidocCoords.prefix + 'P141_assigned',
- AuthorPlaceholder) + closeLine
- output.write(line)
- # E22 P44 E62
- if row['NSC'] != '':
- ph = row['NSC'].replace(' "', ' «')
- phr = ph.replace('"', '»')
- line = triple(datplaceHolder,
- cidocCoords.prefix + 'P3_has_note',
- '\"' + phr + '\"^^xsd:string') + closeLine
- output.write(line)
- iconclass = row['DESI']
- icon = iconclass.replace(' ', '')
- list_icon = []
- if ':' in icon:
- list_icon = icon.split(':')
- else:
- list_icon.append(icon)
- for ic in list_icon:
- url = 'http://iconclass.org/rdk/' + str(ic)
- html = urlopen(url).read()
- soup = BeautifulSoup(html, 'html.parser')
- # kill all script and style elements
- for script in soup(["script", "style"]):
- script.extract() # rip it out
- # get text
- text = soup.get_text()
- pretty = soup.prettify()
- ff = soup.find("div", {"id": "ic_current"})
- dd = ff.find("a", {"class", "ic_notation"})
- ss = dd.text
- x = ss.find(' ')
- icon_label = ss[x + 1:]
- ur = ic.replace("(", "%28")
- urr = ur.replace(")", "%29")
- line = triple(datplaceHolder,
- cidocCoords.prefix + 'P62_depicts',
- iconCoords.prefix + urr) + closeLine
- output.write(line)
- line = triple(iconCoords.prefix + urr,
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E1_CRM_Entity') + closeLine
- output.write(line)
- output.write('\n')
- #
- #
- # Limit number of entries processed (if desired)
- if (ii > max_entries):
- break
|