|
@@ -0,0 +1,178 @@
|
|
|
+
|
|
|
+import csv
|
|
|
+
|
|
|
+import unicodedata
|
|
|
+
|
|
|
+from collections import OrderedDict
|
|
|
+
|
|
|
+from urllib.request import urlopen
|
|
|
+from bs4 import BeautifulSoup
|
|
|
+
|
|
|
+import json
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+from datetime import datetime
|
|
|
+
|
|
|
+from random import *
|
|
|
+
|
|
|
+import sys
|
|
|
+
|
|
|
+import json
|
|
|
+
|
|
|
+import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/MPP/corretti/'
|
|
|
+export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/MPP/'
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+class RDFcoords:
|
|
|
+ def __init__(self, uri, prefix, code=None):
|
|
|
+ self.uri = uri
|
|
|
+ self.prefix = prefix
|
|
|
+ self.code = code
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+museoCoords = RDFcoords('<http://palazzopretorio.prato.it/it/le-opere/alcuni-capolavori/>', 'mpp:')
|
|
|
+autCoords = RDFcoords('<http://palazzopretorio.prato.it/it/opere/autori/>', 'aut:')
|
|
|
+cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')
|
|
|
+aatCoords = RDFcoords('<http://vocab.getty.edu/aat/>', 'aat:')
|
|
|
+nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')
|
|
|
+schemaCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')
|
|
|
+xsdCoords = RDFcoords('<http://www.w3.org/2001/XMLSchema#>', 'xsd:')
|
|
|
+iconCoords = RDFcoords('<http://iconclass.org/>', 'ico:')
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+def triple(subject, predicate, object1):
|
|
|
+ line = subject + ' ' + predicate + ' ' + object1
|
|
|
+ return line
|
|
|
+
|
|
|
+
|
|
|
+def doublet(predicate, object1):
|
|
|
+ line = ' ' + predicate + ' ' + object1
|
|
|
+ return line
|
|
|
+
|
|
|
+
|
|
|
+def singlet(object1):
|
|
|
+ line = ' ' + object1
|
|
|
+ return line
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+continueLine1 = ' ;\n'
|
|
|
+continueLine2 = ' ,\n'
|
|
|
+closeLine = ' .\n'
|
|
|
+
|
|
|
+
|
|
|
+def writeTTLHeader(output):
|
|
|
+ output.write('@prefix ' + museoCoords.prefix + ' ' + museoCoords.uri + closeLine)
|
|
|
+ output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)
|
|
|
+ output.write('@prefix ' + aatCoords.prefix + ' ' + aatCoords.uri + closeLine)
|
|
|
+ output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)
|
|
|
+ output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)
|
|
|
+ output.write('@prefix ' + autCoords.prefix + ' ' + autCoords.uri + closeLine)
|
|
|
+ output.write('@prefix ' + xsdCoords.prefix + ' ' + xsdCoords.uri + closeLine)
|
|
|
+ output.write('@prefix ' + iconCoords.prefix + ' ' + iconCoords.uri + closeLine)
|
|
|
+
|
|
|
+ output.write('\n')
|
|
|
+
|
|
|
+
|
|
|
+filePrefix = '00_SR20OA_'
|
|
|
+fileType = 'Datini'
|
|
|
+max_entries = 1000000000
|
|
|
+
|
|
|
+def get_aut_url(code):
|
|
|
+ aut_file = open(import_dir + 'AR20AUT_' + fileType + '.csv', newline="")
|
|
|
+ reader = csv.DictReader(aut_file)
|
|
|
+ for row in reader:
|
|
|
+ auth = int(row['AUTH'])
|
|
|
+ cod = int(code)
|
|
|
+ role = ''
|
|
|
+ if row['AUTQ'] != '':
|
|
|
+ role = row['AUTQ']
|
|
|
+ else:
|
|
|
+ role = ''
|
|
|
+ if auth == cod:
|
|
|
+ return [row['URL'], role]
|
|
|
+
|
|
|
+def get_role(role):
|
|
|
+ role_file = open('/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/MPP/AAT_RUOLI.csv', newline="")
|
|
|
+ reader = csv.DictReader(role_file)
|
|
|
+ for row in reader:
|
|
|
+ if row['Label'] == role:
|
|
|
+ return row['AAT']
|
|
|
+
|
|
|
+def get_elem(mtc):
|
|
|
+ mtc_file = open('/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/MPP/AAT_MTC.csv', newline="")
|
|
|
+ reader = csv.DictReader(mtc_file)
|
|
|
+ for row in reader:
|
|
|
+ if row['MTC'] == mtc:
|
|
|
+ return [row['AAT'], row['Type']]
|
|
|
+
|
|
|
+with open(import_dir + filePrefix + fileType + '.csv', newline="") as csv_file, open(
|
|
|
+ export_dir + filePrefix + fileType + '.ttl', 'w') as output:
|
|
|
+ reader = csv.DictReader(csv_file)
|
|
|
+ writeTTLHeader(output)
|
|
|
+ first = True
|
|
|
+ ii = 0
|
|
|
+ for row in reader:
|
|
|
+
|
|
|
+ ii = ii + 1
|
|
|
+
|
|
|
+ sb = ''
|
|
|
+ subj = ''
|
|
|
+ pp = row['OGTD'] + ' (' + row['ACC'] + ') '
|
|
|
+ if row['SGTI'] != '':
|
|
|
+ sb = pp + row['SGTI']
|
|
|
+ if row['LDCN'] != '':
|
|
|
+ subj = sb + ' in ' + row['LDCN']
|
|
|
+ else:
|
|
|
+ subj = sb
|
|
|
+
|
|
|
+
|
|
|
+ codice = ''
|
|
|
+ if (row['NCTR'] != '' and row['NCTN'] != ''):
|
|
|
+ codice = row['NCTR'] + row['NCTN']
|
|
|
+
|
|
|
+ codiceP = ''
|
|
|
+ if (row['AUTH'] != ''):
|
|
|
+ codiceP = row['AUTH']
|
|
|
+
|
|
|
+ place = ''
|
|
|
+ if (row['PRVC'] != ''):
|
|
|
+ place = row['PRVC']
|
|
|
+
|
|
|
+ columnName = list(row)
|
|
|
+ url = row['URL']
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ datplaceHolder = museoCoords.prefix + url
|
|
|
+ e1placeHolder = museoCoords.prefix + url + '_E1'
|
|
|
+ e3placeHolder = museoCoords.prefix + url + 'E3'
|
|
|
+ e10placeHolder = museoCoords.prefix + url + '_E10'
|
|
|
+ e12placeHolder = museoCoords.prefix + url + '_E12'
|
|
|
+ e13placeHolder = museoCoords.prefix + url + '_E13'
|
|
|
+ e21placeHolder = museoCoords.prefix + url + '_InE21'
|
|
|
+ e25placeHolder = museoCoords.prefix + url + '_E25'
|
|
|
+ e34placeHolder = museoCoords.prefix + url + '_E34'
|
|
|
+ e35placeHolder1 = museoCoords.prefix + url + '_E35'
|
|
|
+ e42placeHolder = museoCoords.prefix + url + '_E42'
|
|
|
+ e42CplaceHolder = museoCoords.prefix + url + '_E42_MPP'
|
|
|
+ e65placeHolder = museoCoords.prefix + url + '_InE65'
|
|
|
+ e73placeHolder = museoCoords.prefix + url + '_E73'
|
|
|
+ e74placeHolder = museoCoords.prefix + url + '_E74'
|
|
|
+
|
|
|
+ if (codice != ''):
|
|
|
+ line = triple(datplaceHolder, cidocCoords.prefix + 'P1_is_identified_by', '\"' + codice + '\"') + closeLine
|
|
|
+ output.write(line)
|
|
|
+ output.write('\n')
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ if (ii > max_entries):
|
|
|
+ break
|