# Utilities to read/write csv files import csv # Utilities to handle character encodings import unicodedata # Ordered Dicts from collections import OrderedDict from urllib.request import urlopen from bs4 import BeautifulSoup import json # OPZIONAL IMPORTS # For timestamping/simple speed tests from datetime import datetime # Random number generator from random import * # System & command line utilities import sys # Json for the dictionary import json import_dir = '/Users/alessiaspadi/Documents/RESTORE/temp_MPP/tabelle/Datini/mod/' export_dir = '/Users/alessiaspadi/Documents/RESTORE/temp_MPP/tabelle/Carica/' # Custom class to store URIs + related infos for the ontologies/repositories class RDFcoords: def __init__(self, uri, prefix, code=None): self.uri = uri self.prefix = prefix self.code = code # Repositories museoCoords = RDFcoords('', 'mpp:') autCoords = RDFcoords('', 'aut:') cidocCoords = RDFcoords('', 'crm:') aatCoords = RDFcoords('', 'aat:') nsCoords = RDFcoords('', 'rdf:') schemaCoords = RDFcoords('', 'rdfs:') xsdCoords = RDFcoords('', 'xsd:') iconCoords = RDFcoords('', 'ico:') # Basic functions for triples / shortened triples in TTL format def triple(subject, predicate, object1): line = subject + ' ' + predicate + ' ' + object1 return line def doublet(predicate, object1): line = ' ' + predicate + ' ' + object1 return line def singlet(object1): line = ' ' + object1 return line # Line endings in TTL format continueLine1 = ' ;\n' continueLine2 = ' ,\n' closeLine = ' .\n' def writeTTLHeader(output): output.write('@prefix ' + museoCoords.prefix + ' ' + museoCoords.uri + closeLine) output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine) output.write('@prefix ' + aatCoords.prefix + ' ' + aatCoords.uri + closeLine) output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine) output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine) output.write('@prefix ' + autCoords.prefix + ' ' + autCoords.uri + closeLine) output.write('@prefix ' + xsdCoords.prefix + ' ' + xsdCoords.uri + closeLine) output.write('@prefix ' + iconCoords.prefix + ' ' + iconCoords.uri + closeLine) output.write('\n') filePrefix = 'SR20OA_' fileType = 'Datini' max_entries = 1000000000 def get_aut_url(code): aut_file = open(import_dir + 'AR20AUT_' + fileType + '.csv', newline="") reader = csv.DictReader(aut_file) for row in reader: role = '' if row['AUTQ'] != '': role = row['AUTQ'] else: role = '' if row['AUTH'] == code: return [row['URL'], role] def get_role(role): role_file = open('/Users/alessiaspadi/Documents/RESTORE/temp_MPP/tabelle/AAT_RUOLI.csv', newline="") reader = csv.DictReader(role_file) for row in reader: if row['Label'] == role: return row['AAT'] def get_elem(mtc): mtc_file = open('/Users/alessiaspadi/Documents/RESTORE/temp_MPP/tabelle/AAT_MTC.csv', newline="") reader = csv.DictReader(mtc_file) for row in reader: if row['MTC'] == mtc: return [row['AAT'], row['Type']] with open(import_dir + filePrefix + fileType + '.csv', newline="") as csv_file, open( export_dir + filePrefix + fileType + '.ttl', 'w') as output: reader = csv.DictReader(csv_file) writeTTLHeader(output) first = True ii = 0 for row in reader: # The index ii is used to process a limited number of entries for testing purposes ii = ii + 1 sb = '' subj = '' pp = row['OGTD'] + ' (' + row['ACC'] + ') ' if row['SGTI'] != '': sb = pp + row['SGTI'] if row['LDCN'] != '': subj = sb + ' in ' + row['LDCN'] else: subj = sb # Triplify the 'codice' -- should exist for every entry codice = '' if (row['NCTR'] != '' and row['NCTN'] != ''): codice = row['NCTR'] + row['NCTN'] codiceP = '' if (row['AUTH'] != ''): codiceP = row['AUTH'] place = '' if (row['PRVC'] != ''): place = row['PRVC'] columnName = list(row) url = row['URL'] # placeHolders datplaceHolder = museoCoords.prefix + url e1placeHolder = museoCoords.prefix + url + '_E1' e3placeHolder = museoCoords.prefix + url + 'E3' e10placeHolder = museoCoords.prefix + url + '_E10' e12placeHolder = museoCoords.prefix + url + '_E12' e13placeHolder = museoCoords.prefix + url + '_E13' e21placeHolder = museoCoords.prefix + url + '_InE21' e25placeHolder = museoCoords.prefix + url + '_E25' e34placeHolder = museoCoords.prefix + url + '_E34' e35placeHolder1 = museoCoords.prefix + url + '_E35' e42placeHolder = museoCoords.prefix + url + '_E42' e42CplaceHolder = museoCoords.prefix + url + '_E42_MPP' e65placeHolder = museoCoords.prefix + url + '_InE65' e73placeHolder = museoCoords.prefix + url + '_E73' e74placeHolder = museoCoords.prefix + url + '_E74' if (codice != ''): line = triple(datplaceHolder, cidocCoords.prefix + 'P1_is_identified_by', e42placeHolder) + closeLine output.write(line) line = triple(e42placeHolder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E42_Identifier') + closeLine output.write(line) line = triple(e42placeHolder, schemaCoords.prefix + 'label', '\"' + codice + '\"') + closeLine output.write(line) ### line = triple(e42placeHolder, cidocCoords.prefix + 'P2_has_type', '\"Codice univoco del bene (NCT)\"') + closeLine output.write(line) # Write E22 Man Made Object & E73 Information Object -- should exist for every entry? line = triple(datplaceHolder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E22_Man-Made_Object') + closeLine output.write(line) # Added by AS line = triple(datplaceHolder, schemaCoords.prefix + 'label', '\"' + subj + '\"') + closeLine output.write(line) # End AS line = triple(datplaceHolder, cidocCoords.prefix + 'P128_carries', e73placeHolder) + closeLine output.write(line) line = triple(e73placeHolder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E73_Information_Object') + closeLine output.write(line) # AS ss = '' if row['SGTI'] != '': ss = row['SGTI'] else: ss = 'senza titolo' line = triple(e73placeHolder, schemaCoords.prefix + 'label', '\"Opera d\'arte raffigurante ' + ss + '\"') + closeLine output.write(line) # E73 - P2 - E55 tt = '' typeLabel = '' if row['OGTD'] == 'dipinto': tt = aatCoords.prefix + "300033618" elif row['OGTD'] == 'rilievo': tt = aatCoords.prefix + "300047230" elif row['OGTD'] == 'polittico': tt = aatCoords.prefix + "300178235" elif row['OGTD'] == 'predella': tt = aatCoords.prefix + "300003745" line = triple(e73placeHolder, cidocCoords.prefix + 'P2_has_type', tt) + closeLine output.write(line) line = triple(tt, schemaCoords.prefix + 'label', '\"' + row['OGTD'] + '\"') + closeLine output.write(line) # E73 - P1 - E35 if row['SGTT'] != '': line = triple(e73placeHolder, cidocCoords.prefix + 'P1_is_identified_by', e35placeHolder1) + closeLine output.write(line) line = triple(e35placeHolder1, nsCoords.prefix + 'type', cidocCoords.prefix + 'E35_Title') + closeLine output.write(line) line = triple(e35placeHolder1, schemaCoords.prefix + 'label', '\"' + row['SGTT'] + '\"') + closeLine output.write(line) # E22 - P62 - E1 if row['SGTI'] != '': line = triple(datplaceHolder, cidocCoords.prefix + 'P62_depicts', e1placeHolder) + closeLine output.write(line) line = triple(e1placeHolder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E1_CRM_Entity') + closeLine output.write(line) line = triple(e1placeHolder, schemaCoords.prefix + 'label', '\"' + row['SGTI'] + '\"') + closeLine output.write(line) line = triple(e1placeHolder, cidocCoords.prefix + 'P2_has_type', '\"Identificazione Iconografica\"') + closeLine output.write(line) # Attention: these triples are identified only for C100005 Museo di Palazzo Pretorio if row['ESC'] == 'C100005': line = triple(datplaceHolder, cidocCoords.prefix + 'P52_has_current_owner', '') + closeLine output.write(line) line = triple('', nsCoords.prefix + 'type', cidocCoords.prefix + 'E74_Group') + closeLine output.write(line) line = triple('', schemaCoords.prefix + 'label', '\"Museo di Palazzo Pretorio\"') + closeLine output.write(line) currentLocation = '' # E22 - P54 - E53 if row['LDCN'] != '': if row['LDCS'] != '': currentLocation = row['LDCS'] else: currentLocation = currentLocation if row['LDCM'] != '': currentLocation = currentLocation + ', ' + row['LDCM'] else: currentLocation = currentLocation if row['LDCN'] != '': currentLocation = currentLocation + ', ' + row['LDCN'] else: currentLocation = currentLocation currentLocation = currentLocation + ', ' + row['PVCC'] + ' (' + row['PVCP'] + ')' line = triple(datplaceHolder, cidocCoords.prefix + 'P54_has_current_permanent_location', '\"' + currentLocation + '\"') + closeLine output.write(line) e12FplaceHolder = '' if row['DTSI'] != row['DTSF']: e12FplaceHolder = museoCoords.prefix + url + '_E12F' # Write E12 Production -- should exist for every entry? # E12 P108 E22 line = triple(e12placeHolder, cidocCoords.prefix + 'P108_has_produced', datplaceHolder) + closeLine output.write(line) line = triple(e12placeHolder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E12_Production') + closeLine output.write(line) # E73 P108i E12 line = triple(e73placeHolder, cidocCoords.prefix + 'P108i_was_produced_by', e12placeHolder) + closeLine output.write(line) if e12FplaceHolder != '': line = triple(e12FplaceHolder, cidocCoords.prefix + 'P108_has_produced', datplaceHolder) + closeLine output.write(line) line = triple(e12FplaceHolder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E12_Production') + closeLine output.write(line) line = triple(e12FplaceHolder, schemaCoords.prefix + 'label', '\"Fine produzione di ' + row['SGTI'] + '\"') + closeLine output.write(line) # E73 P108i E12 line = triple(e73placeHolder, cidocCoords.prefix + 'P108i_was_produced_by', e12FplaceHolder) + closeLine output.write(line) # E12 P140i E13 line = triple(e12FplaceHolder, cidocCoords.prefix + 'P140i_was_attributed_by', e13placeHolder) + closeLine output.write(line) # E12 P2 line = triple(e12FplaceHolder, cidocCoords.prefix + 'P2_has_type', '\"Fine\"^^xsd:string') + closeLine output.write(line) line = triple(e12placeHolder, cidocCoords.prefix + 'P2_has_type', '\"Inizio\"^^xsd:string') + closeLine output.write(line) line = triple(e12placeHolder, schemaCoords.prefix + 'label', '\"Inizio produzione di ' + row['SGTI'] + '\"') + closeLine output.write(line) else: line = triple(e12placeHolder, schemaCoords.prefix + 'label', '\"Produzione di ' + row['SGTI'] + '\"') + closeLine output.write(line) tcl = [] for name in columnName: if 'TCL' in name: tcl.append(name) # E12 - P7 - E53 for el in tcl: i = 0 if row[el] == 'luogo di produzione': pl = '' if i == 0: pl = row['PRVC'] else: pl = row['PRVC' + i] line = triple(e12placeHolder, cidocCoords.prefix + 'P7_took_place_at', museoCoords.prefix + pl) + closeLine output.write(line) if e12FplaceHolder != '': line = triple(e12FplaceHolder, cidocCoords.prefix + 'P7_took_place_at', museoCoords.prefix + pl) + closeLine output.write(line) i = i + 1 # E12 - PC14 - E21 if row['AUTH'] != '': aut = get_aut_url(row['AUTH']) aut_url = aut[0] aut_role = aut[1] ll = row['AUTN'] + '_' + aut_role lab = ll.replace(' ', '') label = lab.replace(',', '') AuthorPlaceholder = autCoords.prefix + aut_url line = triple(museoCoords.prefix + '_' + label, cidocCoords.prefix + 'P01_has_domain', e12placeHolder) + closeLine output.write(line) if e12FplaceHolder != '': line = triple(museoCoords.prefix + '_' + label, cidocCoords.prefix + 'P01_has_domain', e12FplaceHolder) + closeLine output.write(line) if 'AUTH1' in columnName: if row['AUTH1'] != '': aut = get_aut_url(row['AUTH1']) aut_url = aut[0] aut_role = aut[1] ll = row['AUTN1'] + '_' + aut_role lab = ll.replace(' ', '') label = lab.replace(',', '') AuthorPlaceholder = autCoords.prefix + aut_url line = triple(museoCoords.prefix + '_' + label, cidocCoords.prefix + 'P01_has_domain', e12placeHolder) + closeLine output.write(line) if e12FplaceHolder != '': line = triple(museoCoords.prefix + '_' + label, cidocCoords.prefix + 'P01_has_domain', e12FplaceHolder) + closeLine output.write(line) # E12 - PC14 - E21 if 'CMMN' in columnName: if row['CMMN'] != '': cc = row['CMMN'] cm = cc.replace(' ', '') cmmn = cm.replace(',', '') cmmPlaceholder = museoCoords.prefix + '_' + cmmn line = triple(museoCoords.prefix + '_commit_' + cmmn, cidocCoords.prefix + 'P01_has_domain', e12placeHolder) + closeLine output.write(line) if e12FplaceHolder != '': line = triple(museoCoords.prefix + '_commit_' + cmmn, cidocCoords.prefix + 'P01_has_domain', e12FplaceHolder) + closeLine output.write(line) line = triple(museoCoords.prefix + '_commit_' + cmmn, nsCoords.prefix + 'type', cidocCoords.prefix + 'PC14_carried_out_by') + closeLine output.write(line) line = triple(museoCoords.prefix + '_commit_' + cmmn, schemaCoords.prefix + 'label', '\"' + row['CMMN'] + ' nel ruolo di committente\"') + closeLine output.write(line) line = triple(museoCoords.prefix + '_commit_' + cmmn, cidocCoords.prefix + 'P02_has_range', cmmPlaceholder) + closeLine output.write(line) line = triple(cmmPlaceholder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E39_Actor') + closeLine output.write(line) line = triple(cmmPlaceholder, schemaCoords.prefix + 'label', '\"' + row['CMMN'] + '\"') + closeLine output.write(line) line = triple(museoCoords.prefix + '_commit_' + cmmn, cidocCoords.prefix + 'P14.1_in_the_role_of', museoCoords.prefix + '_client') + closeLine output.write(line) line = triple(museoCoords.prefix + '_client', nsCoords.prefix + 'type', cidocCoords.prefix + 'E55_Type') + closeLine output.write(line) line = triple(museoCoords.prefix + '_client', schemaCoords.prefix + 'label', '\"Committente\"') + closeLine output.write(line) # E12 - P4 - E52 if row['DTSI'] != '': line = triple(e12placeHolder, cidocCoords.prefix + 'P4_has_time-span', museoCoords.prefix + row['DTSI']) + closeLine output.write(line) line = triple(museoCoords.prefix + row['DTSI'], nsCoords.prefix + 'type', cidocCoords.prefix + 'E52_Time-Span') + closeLine output.write(line) line = triple(museoCoords.prefix + row['DTSI'], schemaCoords.prefix + 'label', '\"' + row['DTSI'] + '\"') + closeLine output.write(line) if e12FplaceHolder != '': line = triple(e12FplaceHolder, cidocCoords.prefix + 'P4_has_time-span', museoCoords.prefix + row['DTSF']) + closeLine output.write(line) line = triple(museoCoords.prefix + row['DTSF'], nsCoords.prefix + 'type', cidocCoords.prefix + 'E52_Time-Span') + closeLine output.write(line) line = triple(museoCoords.prefix + row['DTSF'], schemaCoords.prefix + 'label', '\"' + row['DTSF'] + '\"') + closeLine output.write(line) tcl = [] for name in columnName: if 'TCL' in name: tcl.append(name) j = 0 for el in tcl: if row[el] != '': j = j + 1 last = str(j - 1) n = len(tcl) - 1 for i in range(n): k = str(i + 1) if i + 1 == 1: w = '' else: w = i f = str(w) if row['TCL' + k] != '': pastActor = '' newActor = '' pl = '' if row['PRCD' + k] != '': newActor = ' a ' + row['PRCD' + k] if row['PRCD' + f] != '': pastActor = ' da ' + row['PRCD' + f] pl = row['PRCD' + f].replace(' ', '') newe10placeHolder = museoCoords.prefix + url + "_E10_" + k line = triple(newe10placeHolder, cidocCoords.prefix + 'P30_transferred_custody_of', datplaceHolder) + closeLine output.write(line) line = triple(newe10placeHolder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E10_Transfer_of_Custody') + closeLine output.write(line) line = triple(newe10placeHolder, schemaCoords.prefix + 'label', '\"Passaggio di ' + row['SGTI'] + pastActor + newActor + '\"') + closeLine output.write(line) if row['PRDI' + f] != '': timespan = row['PRDI' + f] tt = timespan.replace(' ', '') tp = tt.replace('.', '') ts = tp.replace('/', '') timespanPlaceholder = museoCoords.prefix + '_' + ts # E10 P4 E52 line = triple(newe10placeHolder, cidocCoords.prefix + 'P4_has_time-span', timespanPlaceholder) + closeLine output.write(line) line = triple(timespanPlaceholder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E52_Time-Span') + closeLine output.write(line) line = triple(timespanPlaceholder, schemaCoords.prefix + 'label', '\"' + timespan + '\"') + closeLine output.write(line) pastActorPlaceholder = museoCoords.prefix + '_' + pl newLoc = row['PRCD' + k].replace(' ', '') newActorPlaceholder = museoCoords.prefix + '_' + newLoc # E10 P26 E74 (moved to) if newActorPlaceholder != '': line = triple(newe10placeHolder, cidocCoords.prefix + 'P29_custody_received_by', newActorPlaceholder) + closeLine output.write(line) # E10 P27 E74 pastActorLabel = row['PRCD' + f] line = triple(newe10placeHolder, cidocCoords.prefix + 'P28_custody_surrendered_by', pastActorPlaceholder) + closeLine output.write(line) line = triple(pastActorPlaceholder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E39_Actor') + closeLine output.write(line) line = triple(pastActorPlaceholder, schemaCoords.prefix + 'label', '\"' + pastActorLabel + '\"') + closeLine output.write(line) line = triple(datplaceHolder, cidocCoords.prefix + 'P49_has_former_or_current_keeper', pastActorPlaceholder) + closeLine output.write(line) # E74 P74 E53 pastResidenceLabel = row['PRVC' + f] pastResidencePlaceHolder = museoCoords.prefix + row['PRVC' + f] line = triple(pastActorPlaceholder, cidocCoords.prefix + 'P74_has_current_or_former_residence', pastResidencePlaceHolder) + closeLine output.write(line) #### pastActor = '' newActor = '' pl = '' if row['LDCN'] != '': newActor = ' a ' + row['LDCN'] if row['PRCD' + last] != '': pastActor = ' da ' + row['PRCD' + last] pl = row['PRCD' + last].replace(' ', '') line = triple(e10placeHolder, cidocCoords.prefix + 'P30_transferred_custody_of', datplaceHolder) + closeLine output.write(line) line = triple(e10placeHolder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E10_Transfer_of_Custody') + closeLine output.write(line) line = triple(e10placeHolder, schemaCoords.prefix + 'label', '\"Passaggio di ' + row['SGTI'] + pastActor + newActor + '\"') + closeLine output.write(line) if row['PRDU' + last] != '': timespan = row['PRDU' + last] tt = timespan.replace(' ', '') ts = tt.replace('/', '') timespanPlaceholder = museoCoords.prefix + '_' + ts # E10 P4 E52 line = triple(e10placeHolder, cidocCoords.prefix + 'P4_has_time-span', timespanPlaceholder) + closeLine output.write(line) line = triple(timespanPlaceholder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E52_Time-Span') + closeLine output.write(line) line = triple(timespanPlaceholder, schemaCoords.prefix + 'label', '\"' + timespan + '\"') + closeLine output.write(line) pastActorPlaceholder = museoCoords.prefix + '_' + pl newLocPlaceholder = e74placeHolder # E10 P26 E74 (moved to) if newLocPlaceholder != '': line = triple(e10placeHolder, cidocCoords.prefix + 'P29_custody_received_by', newLocPlaceholder) + closeLine output.write(line) # E10 P27 E74 pastActorLabel = row['PRCD' + last] line = triple(e10placeHolder, cidocCoords.prefix + 'P28_custody_surrendered_by', pastActorPlaceholder) + closeLine output.write(line) line = triple(pastActorPlaceholder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E39_Actor') + closeLine output.write(line) line = triple(pastActorPlaceholder, schemaCoords.prefix + 'label', '\"' + pastActorLabel + '\"') + closeLine output.write(line) line = triple(datplaceHolder, cidocCoords.prefix + 'P49_has_former_or_current_keeper', pastActorPlaceholder) + closeLine output.write(line) # E74 P74 E53 pastResidenceLabel = row['PRVC' + last] pastResidencePlaceHolder = museoCoords.prefix + row['PRVC' + last] if row['PRVP' + last] != '': pastResidenceLabel = pastResidenceLabel + ' (' + row['PRVP' + last] + ')' if row['PRVR' + last] != '': pastResidenceLabel = pastResidenceLabel + ', ' + row['PRVR' + last] if row['PRVS' + last] != '': pastResidenceLabel = pastResidenceLabel + ', ' + row['PRVS' + last] line = triple(pastActorPlaceholder, cidocCoords.prefix + 'P74_has_current_or_former_residence', pastResidencePlaceHolder) + closeLine output.write(line) line = triple(pastResidencePlaceHolder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E53_Place') + closeLine output.write(line) # E22 P44 E3 if row['STCC'] != '': line = triple(datplaceHolder, cidocCoords.prefix + 'P44_has_condition', e3placeHolder) + closeLine output.write(line) line = triple(e3placeHolder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E3_Condition_State') + closeLine output.write(line) line = triple(e3placeHolder, schemaCoords.prefix + 'label', '\"Condizione di: ' + row['SGTI'] + '\"') + closeLine output.write(line) line = triple(e3placeHolder, cidocCoords.prefix + 'P2_has_type', '\"' + row['STCC'] + '\"') + closeLine output.write(line) # E22 P65 E34 if (row['ISRI'] != ''): line = triple(datplaceHolder, cidocCoords.prefix + 'P56_bears_feature', e25placeHolder) + closeLine output.write(line) line = triple(e25placeHolder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E25_Man-Made_Feature') + closeLine output.write(line) line = triple(e25placeHolder, schemaCoords.prefix + 'label', '\"Iscrizione su ' + subj + '\"') + closeLine output.write(line) line = triple(e25placeHolder, cidocCoords.prefix + 'P128_carries', e34placeHolder) + closeLine output.write(line) line = triple(e34placeHolder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E34_Inscription') + closeLine output.write(line) line = triple(e34placeHolder, schemaCoords.prefix + 'label', '\"Iscrizione: ' + row['ISRI'] + '\"') + closeLine output.write(line) pl = row['ISRI'].replace(' ', '-') pla = pl.replace('.', '') line = triple(e34placeHolder, cidocCoords.prefix + 'P3_has_note', '\"' + row['ISRI'] + '\"') + closeLine output.write(line) # E34 P2 E55 if (row['ISRT'] != ''): rr = row['ISRT'].replace(' ', '') line = triple(e34placeHolder, cidocCoords.prefix + 'P2_has_type', '\"' + row['ISRT'] + '\"') + closeLine output.write(line) # E34 P72 E56 if (row['ISRL'] != ''): line = triple(e34placeHolder, cidocCoords.prefix + 'P72_has_language', museoCoords.prefix + '_' + row['ISRL']) + closeLine output.write(line) line = triple(museoCoords.prefix + '_' + row['ISRL'], nsCoords.prefix + 'type', cidocCoords.prefix + 'E56_Language') + closeLine output.write(line) line = triple(museoCoords.prefix + '_' + row['ISRL'], schemaCoords.prefix + 'label', '\"' + row['ISRL'] + '\"') + closeLine output.write(line) if row['ISRS'] != '': line = triple(e34placeHolder, cidocCoords.prefix + 'P92i_was_brought_into_existence_by', e65placeHolder) + closeLine output.write(line) line = triple(e65placeHolder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E65_Creation') + closeLine output.write(line) line = triple(e65placeHolder, schemaCoords.prefix + 'label', '\"Creazione dell\'Iscrizione ' + row['ISRI'] + '\"') + closeLine output.write(line) if row['ISRS']: ss = row['ISRS'].replace(' ', '') tecPlaceholder = museoCoords.prefix + url + '_' + ss line = triple(e65placeHolder, cidocCoords.prefix + 'P32_used_general_technique', tecPlaceholder) + closeLine output.write(line) line = triple(tecPlaceholder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E55_Type') + closeLine output.write(line) line = triple(tecPlaceholder, schemaCoords.prefix + 'label', '\"' + row['ISRS'] + '\"') + closeLine output.write(line) if row['ISRP'] != '': line = triple(e25placeHolder, cidocCoords.prefix + 'P3_has_note', '\"' + row['ISRP'] + '\"^^xsd:string') + closeLine output.write(line) unit = '' if (row['MISU'] != ''): unit = row['MISU'] valueA = '' valueL = '' if (row['MISA'] != ''): value = row['MISA'] valueA = value.replace(',', 'v') if (row['MISL'] != ''): value = row['MISL'] valueL = value.replace(',', 'v') # Altezza # E22 P43 E54 if (row['MISA'] != ''): line = triple(datplaceHolder, cidocCoords.prefix + 'P43_has_dimension', museoCoords.prefix + url + '_Altezza') + closeLine output.write(line) line = triple(museoCoords.prefix + url + '_Altezza', nsCoords.prefix + 'type', cidocCoords.prefix + 'E54_Dimension') + closeLine output.write(line) line = triple(museoCoords.prefix + url + '_Altezza', schemaCoords.prefix + 'label', '\"Altezza: ' + row['MISA'] + row['MISU'] + '\"') + closeLine output.write(line) # E54 P90 E60 line = triple(museoCoords.prefix + url + '_Altezza', cidocCoords.prefix + 'P90_has_value', '\"' + row['MISA'] + '\"^^xsd:integer') + closeLine output.write(line) # E54 P2 E55 line = triple(museoCoords.prefix + url + '_Altezza', cidocCoords.prefix + 'P2_has_type', aatCoords.prefix + '300055644') + closeLine output.write(line) line = triple(aatCoords.prefix + '300055644', schemaCoords.prefix + 'label', '\"altezza\"') + closeLine output.write(line) # E54 P91 E58 if (row['MISU'] != ''): line = triple(museoCoords.prefix + url + '_Altezza', cidocCoords.prefix + 'P91_has_unit', aatCoords.prefix + '300379098') + closeLine output.write(line) line = triple(aatCoords.prefix + '300379098', nsCoords.prefix + 'type', cidocCoords.prefix + 'E58_Measurement_Unit') + closeLine output.write(line) line = triple(aatCoords.prefix + '300379098', schemaCoords.prefix + 'label', '\"' + row['MISU'] + '\"') + closeLine output.write(line) # Larghezza # E22 P43 E54 if (row['MISL'] != ''): line = triple(datplaceHolder, cidocCoords.prefix + 'P43_has_dimension', museoCoords.prefix + url + '_Larghezza') + closeLine output.write(line) line = triple(museoCoords.prefix + url + '_Larghezza', nsCoords.prefix + 'type', cidocCoords.prefix + 'E54_Dimension') + closeLine output.write(line) line = triple(museoCoords.prefix + url + '_Larghezza', schemaCoords.prefix + 'label', '\"Larghezza: ' + row['MISL'] + row['MISU'] + '\"') + closeLine output.write(line) # E54 P90 E60 line = triple(museoCoords.prefix + url + '_Larghezza', cidocCoords.prefix + 'P90_has_value', '\"' + row['MISL'] + '\"^^xsd:integer') + closeLine output.write(line) # E54 P2 E55 line = triple(museoCoords.prefix + url + '_Larghezza', cidocCoords.prefix + 'P2_has_type', aatCoords.prefix + '300055647') + closeLine output.write(line) line = triple(aatCoords.prefix + '300055647', schemaCoords.prefix + 'label', '\"larghezza\"') + closeLine output.write(line) # E54 P91 E58 if (row['MISU'] != ''): line = triple(museoCoords.prefix + url + '_Larghezza', cidocCoords.prefix + 'P91_has_unit', aatCoords.prefix + '300379098') + closeLine output.write(line) line = triple(aatCoords.prefix + '300379098', nsCoords.prefix + 'type', cidocCoords.prefix + 'E58_Measurement_Unit') + closeLine output.write(line) line = triple(aatCoords.prefix + '300379098', schemaCoords.prefix + 'label', '\"' + row['MISU'] + '\"') + closeLine output.write(line) if row['MTC'] != '': mtcs = [] if '/' in row['MTC']: mtcs = row['MTC'].split('/') else: mtcs.append(row['MTC']) for tc in mtcs: mtc = tc.lstrip() el = get_elem(mtc) if el[1] == 'MTC/M': line = triple(datplaceHolder, cidocCoords.prefix + 'P45_consists_of', aatCoords.prefix + el[0]) + closeLine output.write(line) line = triple(aatCoords.prefix + el[0], nsCoords.prefix + 'type', cidocCoords.prefix + 'E57_Material') + closeLine output.write(line) line = triple(aatCoords.prefix + el[0], schemaCoords.prefix + 'label', '\"' + mtc + '\"') + closeLine output.write(line) else: #E12 Production - P32 used technique - E55 Type line = triple(e12placeHolder, cidocCoords.prefix + 'P32_used_general_technique', aatCoords.prefix + el[0]) + closeLine output.write(line) if e12FplaceHolder != '': line = triple(e12FplaceHolder, cidocCoords.prefix + 'P32_used_general_technique', aatCoords.prefix + el[0]) + closeLine output.write(line) line = triple(aatCoords.prefix + el[0], nsCoords.prefix + 'type', cidocCoords.prefix + 'E55_Type') + closeLine output.write(line) line = triple(aatCoords.prefix + el[0], schemaCoords.prefix + 'label', '\"' + mtc + '\"') + closeLine output.write(line) # E12 P140i E13 if row['AUTM'] != '': mot = row['AUTM'].replace(' ', '_') e55placeHolder = museoCoords.prefix + url + '_' + mot line = triple(e12placeHolder, cidocCoords.prefix + 'P140i_was_attributed_by', e13placeHolder) + closeLine output.write(line) line = triple(e13placeHolder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine output.write(line) line = triple(e13placeHolder, schemaCoords.prefix + 'label', '\"Motivazione attribuzione\"') + closeLine output.write(line) line = triple(e13placeHolder, cidocCoords.prefix + 'P2_has_type', '\"' + row['AUTM'] + '\"') + closeLine output.write(line) aut = get_aut_url(row['AUTH']) aut_url = aut[0] AuthorPlaceholder = autCoords.prefix + aut_url line = triple(e13placeHolder, cidocCoords.prefix + 'P141_assigned', AuthorPlaceholder) + closeLine output.write(line) # E22 P44 E62 if row['NSC'] != '': ph = row['NSC'].replace(' "', ' «') phr = ph.replace('"', '»') line = triple(datplaceHolder, cidocCoords.prefix + 'P3_has_note', '\"' + phr + '\"^^xsd:string') + closeLine output.write(line) iconclass = row['DESI'] icon = iconclass.replace(' ', '') list_icon = [] if ':' in icon: list_icon = icon.split(':') else: list_icon.append(icon) for ic in list_icon: url = 'http://iconclass.org/rdk/' + str(ic) html = urlopen(url).read() soup = BeautifulSoup(html, 'html.parser') # kill all script and style elements for script in soup(["script", "style"]): script.extract() # rip it out # get text text = soup.get_text() pretty = soup.prettify() ff = soup.find("div", {"id": "ic_current"}) dd = ff.find("a", {"class", "ic_notation"}) ss = dd.text x = ss.find(' ') icon_label = ss[x + 1:] ur = ic.replace("(", "%28") urr = ur.replace(")", "%29") line = triple(datplaceHolder, cidocCoords.prefix + 'P62_depicts', iconCoords.prefix + urr) + closeLine output.write(line) line = triple(iconCoords.prefix + urr, nsCoords.prefix + 'type', cidocCoords.prefix + 'E1_CRM_Entity') + closeLine output.write(line) output.write('\n') # # # Limit number of entries processed (if desired) if (ii > max_entries): break