|
@@ -0,0 +1,222 @@
|
|
|
+
|
|
|
+import csv
|
|
|
+import re
|
|
|
+
|
|
|
+import unicodedata
|
|
|
+
|
|
|
+from collections import OrderedDict
|
|
|
+
|
|
|
+import json
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+from datetime import datetime
|
|
|
+
|
|
|
+from random import *
|
|
|
+
|
|
|
+import sys
|
|
|
+
|
|
|
+import json
|
|
|
+
|
|
|
+import_dir = '/Users/alessiaspadi/Documents/RESTORE/temp_ASPO/'
|
|
|
+export_dir = '/Users/alessiaspadi/Documents/RESTORE/temp_ASPO/'
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+class RDFcoords:
|
|
|
+ def __init__(self, uri, prefix, code = None):
|
|
|
+ self.uri = uri
|
|
|
+ self.prefix = prefix
|
|
|
+ self.code = code
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+aspoCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/>', 'aspo:')
|
|
|
+foafCoords = RDFcoords('<http://xmlns.com/foaf/0.1/>', 'foaf:')
|
|
|
+cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')
|
|
|
+schemaCoords = RDFcoords('<http://schema.org/>', 'schema:')
|
|
|
+personCoords = RDFcoords('<http://www.w3.org/ns/person#>', 'person:')
|
|
|
+nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')
|
|
|
+rdfsCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+def triple(subject, predicate, object1):
|
|
|
+ line = subject + ' ' + predicate + ' ' + object1
|
|
|
+ return line
|
|
|
+
|
|
|
+def doublet(predicate, object1):
|
|
|
+ line = ' ' + predicate + ' ' + object1
|
|
|
+ return line
|
|
|
+
|
|
|
+def singlet(object1):
|
|
|
+ line = ' ' + object1
|
|
|
+ return line
|
|
|
+
|
|
|
+
|
|
|
+continueLine1 = ' ;\n'
|
|
|
+continueLine2 = ' ,\n'
|
|
|
+closeLine = ' .\n'
|
|
|
+
|
|
|
+def writeTTLHeader(output):
|
|
|
+ output.write('@prefix ' + aspoCoords.prefix + ' ' + aspoCoords.uri + closeLine)
|
|
|
+ output.write('@prefix ' + foafCoords.prefix + ' ' + foafCoords.uri + closeLine)
|
|
|
+ output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)
|
|
|
+ output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)
|
|
|
+ output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)
|
|
|
+ output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)
|
|
|
+ output.write('@prefix ' + rdfsCoords.prefix + ' ' + rdfsCoords.uri + closeLine)
|
|
|
+
|
|
|
+ output.write('\n')
|
|
|
+
|
|
|
+
|
|
|
+filePrefix = 'Onomastica_'
|
|
|
+fileType = 'Datini'
|
|
|
+max_entries = 1000000000
|
|
|
+
|
|
|
+with open(import_dir + filePrefix + fileType + '.csv', newline="") as csv_file, open(
|
|
|
+ export_dir + filePrefix + fileType + '.ttl', 'w') as output:
|
|
|
+ reader = csv.DictReader(csv_file)
|
|
|
+ writeTTLHeader(output)
|
|
|
+ first = True
|
|
|
+ ii = 0
|
|
|
+ for row in reader:
|
|
|
+
|
|
|
+ ii = ii + 1
|
|
|
+ if row['entityType'] == 'person':
|
|
|
+
|
|
|
+ id_aspo = row['recordId']
|
|
|
+
|
|
|
+
|
|
|
+ aspoPlaceHolder = aspoCoords.prefix + id_aspo
|
|
|
+
|
|
|
+ line = triple(aspoPlaceHolder,
|
|
|
+ nsCoords.prefix + 'type',
|
|
|
+ foafCoords.prefix + 'person') + closeLine
|
|
|
+ output.write(line)
|
|
|
+ line = triple(aspoPlaceHolder,
|
|
|
+ foafCoords.prefix + 'name',
|
|
|
+ '\"' + row['nameEntry@normal'] + '\"') + closeLine
|
|
|
+ output.write(line)
|
|
|
+
|
|
|
+ if row['nome proprio'] != '':
|
|
|
+ name = row['nome proprio'].replace("\n", "")
|
|
|
+ line = triple(aspoPlaceHolder,
|
|
|
+ foafCoords.prefix + 'givenName',
|
|
|
+ '\"' + name + '\"') + closeLine
|
|
|
+ output.write(line)
|
|
|
+
|
|
|
+ if row['nome di famiglia'] != '':
|
|
|
+ familyName = row['nome di famiglia'].replace("\n", "")
|
|
|
+ line = triple(aspoPlaceHolder,
|
|
|
+ foafCoords.prefix + 'familyName',
|
|
|
+ '\"' + familyName + '\"') + closeLine
|
|
|
+ output.write(line)
|
|
|
+
|
|
|
+ if row['Alias'] != '':
|
|
|
+ line = triple(aspoPlaceHolder,
|
|
|
+ schemaCoords.prefix + 'alternateName',
|
|
|
+ '\"' + row['Alias'] + '\"') + closeLine
|
|
|
+ output.write(line)
|
|
|
+
|
|
|
+ if row['genere'] != '':
|
|
|
+ line = triple(aspoPlaceHolder,
|
|
|
+ foafCoords.prefix + 'gender',
|
|
|
+ '\"' + row['genere'] + '\"') + closeLine
|
|
|
+ output.write(line)
|
|
|
+
|
|
|
+ if row['patronimico/matronimico'] != '':
|
|
|
+ pp = row['patronimico/matronimico']
|
|
|
+ patronimyc = pp.replace("\n", "")
|
|
|
+ line = triple(aspoPlaceHolder,
|
|
|
+ personCoords.prefix + 'patronymicName',
|
|
|
+ '\"' + patronimyc + '\"') + closeLine
|
|
|
+ output.write(line)
|
|
|
+
|
|
|
+ if row['occupation'] != '' and row['occupation'] != '\n':
|
|
|
+ occ = row['occupation']
|
|
|
+ occupation = re.sub(r'[^A-Za-z]', '', occ)
|
|
|
+ line = triple(aspoPlaceHolder,
|
|
|
+ schemaCoords.prefix + 'hasOccupation',
|
|
|
+ aspoCoords.prefix + occupation) + closeLine
|
|
|
+ output.write(line)
|
|
|
+ line = triple(aspoCoords.prefix + occupation,
|
|
|
+ nsCoords.prefix + 'type',
|
|
|
+ schemaCoords.prefix + 'Occupation') + closeLine
|
|
|
+ output.write(line)
|
|
|
+ line = triple(aspoCoords.prefix + occupation,
|
|
|
+ rdfsCoords.prefix + 'label',
|
|
|
+ '\"' + row['occupation'] + '\"') + closeLine
|
|
|
+ output.write(line)
|
|
|
+
|
|
|
+ if row['avo 1'] != '':
|
|
|
+ avo1 = row['avo 1'].replace('di ', '')
|
|
|
+ avo1card = re.sub(r'[^A-Za-z]', '', avo1)
|
|
|
+ line = triple(aspoPlaceHolder,
|
|
|
+ schemaCoords.prefix + 'relatedTo',
|
|
|
+ aspoCoords.prefix + avo1card) + closeLine
|
|
|
+ output.write(line)
|
|
|
+ line = triple(aspoCoords.prefix + avo1card,
|
|
|
+ nsCoords.prefix + 'type',
|
|
|
+ foafCoords.prefix + 'Person') + closeLine
|
|
|
+ output.write(line)
|
|
|
+ line = triple(aspoCoords.prefix + avo1card,
|
|
|
+ rdfsCoords.prefix + 'label',
|
|
|
+ '\"' + avo1 + '\"') + closeLine
|
|
|
+ output.write(line)
|
|
|
+
|
|
|
+ if row['avo 2'] != '':
|
|
|
+ avo2 = row['avo 2'].replace('di ', '')
|
|
|
+ avo2card = re.sub(r'[^A-Za-z]', '', avo2)
|
|
|
+ line = triple(aspoPlaceHolder,
|
|
|
+ schemaCoords.prefix + 'relatedTo',
|
|
|
+ aspoCoords.prefix + avo2card) + closeLine
|
|
|
+ output.write(line)
|
|
|
+ line = triple(aspoCoords.prefix + avo2card,
|
|
|
+ nsCoords.prefix + 'type',
|
|
|
+ foafCoords.prefix + 'Person') + closeLine
|
|
|
+ output.write(line)
|
|
|
+ line = triple(aspoCoords.prefix + avo2card,
|
|
|
+ rdfsCoords.prefix + 'label',
|
|
|
+ '\"' + avo2 + '\"') + closeLine
|
|
|
+ output.write(line)
|
|
|
+
|
|
|
+ if row['Qualifica'] != '':
|
|
|
+ qq = row['Qualifica']
|
|
|
+ qualifiche = []
|
|
|
+ if '|' in qq:
|
|
|
+ qualifiche = qq.split('|')
|
|
|
+ else:
|
|
|
+ qualifiche.append(qq)
|
|
|
+ for qualifica in qualifiche:
|
|
|
+ honorific = qualifica.replace("\n", "")
|
|
|
+ line = triple(aspoPlaceHolder,
|
|
|
+ schemaCoords.prefix + 'honorificPrefix',
|
|
|
+ '\"' + honorific + '\"') + closeLine
|
|
|
+ output.write(line)
|
|
|
+
|
|
|
+ if row['place_occupation_Qualifica'] != '':
|
|
|
+ line = triple(aspoPlaceHolder,
|
|
|
+ schemaCoords.prefix + 'workLocation',
|
|
|
+ '\"' + row['place_occupation_Qualifica'] + '\"') + closeLine
|
|
|
+ output.write(line)
|
|
|
+
|
|
|
+ if row['biogHist p'] != '':
|
|
|
+ bio = row['biogHist p']
|
|
|
+ biog = bio.replace("\n", " ")
|
|
|
+ line = triple(aspoPlaceHolder,
|
|
|
+ schemaCoords.prefix + 'description',
|
|
|
+ '\"' + biog + '\"') + closeLine
|
|
|
+ output.write(line)
|
|
|
+
|
|
|
+
|
|
|
+ output.write('\n')
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ if (ii > max_entries):
|
|
|
+ break
|