Browse Source

Add all parsers

federicaspinelli 2 years ago
parent
commit
891b31c62e
45 changed files with 2004 additions and 856 deletions
  1. 253 0
      ASPO/CSV_to_RDF/ceppo/ASPO_CSV_to_RDF_onomastica_ceppo.py
  2. 31 14
      ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_file.ipynb
  3. 30 41
      ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_fonds.ipynb
  4. 6 20
      ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item.ipynb
  5. 4 10
      ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_event_creation.ipynb
  6. 2 7
      ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_event_creation_actor.ipynb
  7. 2 10
      ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_event_creation_date.ipynb
  8. 3 8
      ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_event_exchange.ipynb
  9. 3 7
      ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_event_exchange_date.ipynb
  10. 4 8
      ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_event_exchange_place.ipynb
  11. 2 5
      ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_event_exchange_receiver.ipynb
  12. 3 7
      ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_event_exchange_sender.ipynb
  13. 3 6
      ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_extent.ipynb
  14. 15 11
      ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_id.ipynb
  15. 18 16
      ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_phydesc.ipynb
  16. 45 43
      ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_physfacet.ipynb
  17. 23 23
      ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_phystech.ipynb
  18. 8 9
      ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_scopecontent.ipynb
  19. 9 9
      ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_segnatura.ipynb
  20. 3 9
      ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_title.ipynb
  21. 22 18
      ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_type.ipynb
  22. 24 7
      ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_series.ipynb
  23. 243 0
      ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_subfonds.ipynb
  24. 0 203
      ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_subfonds_genreform.ipynb
  25. 31 22
      ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_subseries.ipynb
  26. 0 0
      ASPO/CSV_to_RDF/datini/CSV_to_RDF_onomastica_datini.py
  27. 45 24
      ASPO/CSV_to_RDF/gettatelli/CSV_to_RDF_gettatelli_fonds.ipynb
  28. 189 41
      ASPO/CSV_to_RDF/gettatelli/CSV_to_RDF_gettatelli_item.ipynb
  29. 393 0
      ASPO/CSV_to_RDF/gettatelli/CSV_to_RDF_gettatelli_item_person.ipynb
  30. 27 36
      ASPO/CSV_to_RDF/gettatelli/CSV_to_RDF_gettatelli_type.ipynb
  31. 240 0
      ASPO/CSV_to_RDF/marcovaldi/CSV_to_RDF_marcovaldi_date.ipynb
  32. 64 25
      ASPO/CSV_to_RDF/marcovaldi/CSV_to_RDF_marcovaldi_fonds.ipynb
  33. 54 21
      ASPO/CSV_to_RDF/marcovaldi/CSV_to_RDF_marcovaldi_item.ipynb
  34. 2 2
      ASPO/CSV_to_RDF/marcovaldi/CSV_to_RDF_marcovaldi_item_event_exchange_date_place.ipynb
  35. 21 20
      ASPO/CSV_to_RDF/marcovaldi/CSV_to_RDF_marcovaldi_type.ipynb
  36. 32 20
      ASPO/CSV_to_RDF/ospedale/CSV_to_RDF_ospedale_all.ipynb
  37. 75 55
      ASPO/CSV_to_RDF/ospedale/CSV_to_RDF_ospedale_eac.ipynb
  38. 26 12
      ASPO/CSV_to_RDF/ospedale/CSV_to_RDF_ospedale_file.ipynb
  39. 1 7
      ASPO/CSV_to_RDF/ospedale/CSV_to_RDF_ospedale_file_dimensions.ipynb
  40. 3 17
      ASPO/CSV_to_RDF/ospedale/CSV_to_RDF_ospedale_file_event_creation.ipynb
  41. 3 9
      ASPO/CSV_to_RDF/ospedale/CSV_to_RDF_ospedale_file_event_creation_date.ipynb
  42. 2 7
      ASPO/CSV_to_RDF/ospedale/CSV_to_RDF_ospedale_file_extent.ipynb
  43. 20 20
      ASPO/CSV_to_RDF/ospedale/CSV_to_RDF_ospedale_file_physfacet.ipynb
  44. 19 23
      ASPO/CSV_to_RDF/ospedale/CSV_to_RDF_ospedale_file_phystech.ipynb
  45. 1 4
      ASPO/CSV_to_RDF/ospedale/CSV_to_RDF_ospedale_file_scopecontent.ipynb

+ 253 - 0
ASPO/CSV_to_RDF/ceppo/ASPO_CSV_to_RDF_onomastica_ceppo.py

@@ -0,0 +1,253 @@
+#Parser to convert the Ceppo Vecchio onomastics CSV file into TTL format
+
+# Utilities to read/write csv files
+import csv
+# Utilities to handle character encodings
+import unicodedata
+# Ordered Dicts
+from collections import OrderedDict
+
+import json
+import re
+
+
+# OPZIONAL IMPORTS
+
+# For timestamping/simple speed tests
+from datetime import datetime
+# Random number generator
+from random import *
+# System & command line utilities
+import sys
+# Json for the dictionary
+import json
+
+import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/ASPO/ceppo/'
+export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/ASPO/ceppo/'
+
+# Custom class to store URIs + related infos for the ontologies/repositories
+
+class RDFcoords:
+    def __init__(self, uri, prefix, code = None):
+        self.uri = uri
+        self.prefix = prefix
+        self.code = code
+
+# Repositories
+aspoCoords = RDFcoords('<http://www.archiviodistato.prato.it/patrimonio/complessi-archivistici-e-soggetti-produttori/>', 'aspo:')
+foafCoords = RDFcoords('<http://xmlns.com/foaf/0.1/>', 'foaf:')
+cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')
+schemaCoords = RDFcoords('<http://schema.org/>', 'schema:')
+personCoords = RDFcoords('<http://www.w3.org/ns/person#>', 'person:')
+nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')
+rdfsCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')
+
+# Basic functions for triples / shortened triples in TTL format
+
+def triple(subject, predicate, object1):
+    line = subject + ' ' + predicate + ' ' + object1
+    return line
+
+def doublet(predicate, object1):
+    line = '    ' + predicate + ' ' + object1
+    return line
+
+def singlet(object1):
+    line = '        ' + object1
+    return line
+
+# Line endings in TTL format
+continueLine1 = ' ;\n'
+continueLine2 = ' ,\n'
+closeLine = ' .\n'
+
+def writeTTLHeader(output):
+    output.write('@prefix ' + aspoCoords.prefix + ' ' + aspoCoords.uri + closeLine)
+    output.write('@prefix ' + foafCoords.prefix + ' ' + foafCoords.uri + closeLine)
+    output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)
+    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)
+    output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)
+    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)
+    output.write('@prefix ' + rdfsCoords.prefix + ' ' + rdfsCoords.uri + closeLine)
+
+    output.write('\n')
+
+
+filePrefix = 'onomastica_'
+fileType = 'ceppo_vecchio'
+max_entries = 1000000000
+
+with open(import_dir + filePrefix + fileType + '.csv', newline="") as csv_file, open(
+        export_dir + filePrefix + fileType + '.ttl', 'w') as output:
+    reader = csv.DictReader(csv_file)
+    writeTTLHeader(output)
+    first = True
+    ii = 0
+    for row in reader:
+        # The index ii is used to process a limited number of entries for testing purposes
+        ii = ii + 1
+        if row['nameEntry@normal'] != '':
+
+            id_aspo = row['recordId']
+
+            #placeHolders
+            aspoPlaceHolder = aspoCoords.prefix + id_aspo
+            id_aspo = row['recordId']
+
+            line = triple(aspoPlaceHolder,
+                          cidocCoords.prefix + 'P1_is_identified_by',
+                          aspoPlaceHolder + "_E42") + closeLine
+            output.write(line)
+            line = triple(aspoPlaceHolder + "_E42",
+                          nsCoords.prefix + 'type',
+                          cidocCoords.prefix + 'E42_Identifier') + closeLine
+            output.write(line)
+            line = triple(aspoPlaceHolder + "_E42",
+                          rdfsCoords.prefix + 'label',
+                          '\"' + id_aspo + '\"') + closeLine
+            output.write(line)
+
+            line = triple(aspoPlaceHolder,
+                          nsCoords.prefix + 'type',
+                          foafCoords.prefix + 'person') + closeLine
+            output.write(line)
+            line = triple(aspoPlaceHolder,
+                          foafCoords.prefix + 'name',
+                          '\"' + row['nameEntry@normal'] + '\"') + closeLine
+            output.write(line)
+
+            if row['nome proprio'] != '':
+                #Remove all white-space characters:
+                txt = row['nome proprio']
+                x = re.sub(" \n", "", txt)
+                y = re.sub("\s\s", "", x)
+                line = triple(aspoPlaceHolder,
+                              foafCoords.prefix + 'givenName',
+                              '\"' + y + '\"') + closeLine
+                output.write(line)
+
+            if row['nome di famiglia'] != '':
+                #Remove all white-space characters:
+                txt = row['nome di famiglia']
+                x = re.sub("\n", " ", txt)
+                y = re.sub("\s\s", "", x)
+                line = triple(aspoPlaceHolder,
+                              foafCoords.prefix + 'familyName',
+                              '\"' + y + '\"') + closeLine
+                output.write(line)
+            
+            if row['Alias'] != '' and row['Alias'] != ' ':
+                #Remove all white-space characters:
+                txt = row['Alias']
+                x = re.sub("\n", " ", txt)
+                y = re.sub("\s\s", "", x)
+                line = triple(aspoPlaceHolder,
+                              schemaCoords.prefix + 'alternateName',
+                              '\"' + y + '\"') + closeLine
+                output.write(line)
+
+            if row['genere'] != '':
+                #Remove all white-space characters:
+                txt = row['genere']
+                x = re.sub("\n", " ", txt)
+                y = re.sub("\s\s", "", x)
+                line = triple(aspoPlaceHolder,
+                              foafCoords.prefix + 'gender',
+                              '\"' + y + '\"') + closeLine
+                output.write(line)
+
+            if row['patronimico/matronimico'] != '':
+                #Remove all white-space characters:
+                txt = row['patronimico/matronimico']
+                x = re.sub("\n", " ", txt)
+                y = re.sub("\s\s", "", x)
+                line = triple(aspoPlaceHolder,
+                              personCoords.prefix + 'patronymicName',
+                              '\"' + y + '\"') + closeLine
+                output.write(line)
+
+            if row['occupation'] != '' and row['occupation'] != ' ' :
+                occupationPlaceHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + id_aspo + "/occupation>"
+                #Remove all white-space characters:
+                txt = row['occupation']
+                x = re.sub("\n", " ", txt)
+                y = re.sub("\s\s", "", x)
+                line = triple(aspoPlaceHolder,
+                              schemaCoords.prefix + 'hasOccupation',
+                              occupationPlaceHolder) + closeLine
+                output.write(line)
+                line = triple(occupationPlaceHolder,
+                              nsCoords.prefix + 'type',
+                              schemaCoords.prefix + 'Occupation') + closeLine
+                output.write(line)
+                line = triple(occupationPlaceHolder,
+                              rdfsCoords.prefix + 'label',
+                              '\"' + y + '\"') + closeLine
+                output.write(line)
+
+            if row['avo 1'] != '':
+                avo1 = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + id_aspo + "/avo1>"
+                line = triple(aspoPlaceHolder,
+                              schemaCoords.prefix + 'relatedTo',
+                              avo1) + closeLine
+                output.write(line)
+                line = triple(avo1,
+                              nsCoords.prefix + 'type',
+                              foafCoords.prefix + 'Person') + closeLine
+                output.write(line)
+                line = triple(avo1,
+                              rdfsCoords.prefix + 'label',
+                              '\"' + row['avo 1'] + '\"') + closeLine
+                output.write(line)
+
+            if row['avo 2'] != '':
+                avo2 = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + id_aspo + "/avo2>"
+                line = triple(aspoPlaceHolder,
+                              schemaCoords.prefix + 'relatedTo',
+                              avo2) + closeLine
+                output.write(line)
+                line = triple(avo2,
+                              nsCoords.prefix + 'type',
+                              foafCoords.prefix + 'Person') + closeLine
+                output.write(line)
+                line = triple(avo2,
+                              rdfsCoords.prefix + 'label',
+                              '\"' + row['avo 2'] + '\"') + closeLine
+                output.write(line)
+
+            if row['Qualifica'] != '':
+                #Remove all white-space characters:
+                txt = row['Qualifica']
+                x = re.sub("\n", " ", txt)
+                y = re.sub("\s\s", " ", x)
+                line = triple(aspoPlaceHolder,
+                              schemaCoords.prefix + 'honorificPrefix',
+                              '\"' + y + '\"') + closeLine
+                output.write(line)
+
+            if row['place_occupation_Qualifica'] != '':
+                #Remove all white-space characters:
+                txt = row['place_occupation_Qualifica']
+                x = re.sub("\n", " ", txt)
+                y = re.sub("\s\s", "", x)
+                line = triple(aspoPlaceHolder,
+                              schemaCoords.prefix + 'workLocation',
+                              '\"' + row['place_occupation_Qualifica'].replace('\\','\\\\').replace('"','\\"') + '\"') + closeLine
+                output.write(line)
+
+            if row['biogHist p'] != '':
+                #Remove all white-space characters:
+                txt = row['biogHist p']
+                x = re.sub("\n", " ", txt)
+                y = re.sub("\s\s", " ", x)
+                line = triple(aspoPlaceHolder,
+                              cidocCoords.prefix + 'P3_has_note',
+                              '\"' + y + '\"') + closeLine
+                output.write(line)
+
+        output.write('\n')
+        #
+        #
+        # Limit number of entries processed (if desired)
+        if (ii > max_entries):
+            break

+ 31 - 14
ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_file.ipynb

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -30,7 +30,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -40,7 +40,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -61,6 +61,7 @@
     "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
     "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
     "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
+    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
     "\n",
     "# CIDOC Objects\n",
     "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
@@ -68,12 +69,13 @@
     "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
     "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
     "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
+    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
     "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -99,7 +101,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -115,12 +117,14 @@
     "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
     "    output.write('\\n')\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -140,11 +144,12 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
-    "        # Write E22 Man Made Object & E73 Information Object -- should exist for every entry?\n",
+    "        # E22 Man Made Object\n",
     "        line = triple(datiniCoords.prefix + row['id'], hasTypeCoords.prefix, manMadeObjectCoords.prefix) + closeLine\n",
     "        output.write(line)\n",
     "        line = triple(datiniCoords.prefix + row['id'], labelCoords.prefix, '\\\"Documento fisico: ' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "        output.write(line)\n",
+    "        # E73 Information Object\n",
     "        e73placeHolder =  \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\" \n",
     "        line = triple(datiniCoords.prefix + row['id'], carriesCoords.prefix, e73placeHolder) + closeLine\n",
     "        output.write(line)\n",
@@ -152,8 +157,7 @@
     "        output.write(line)\n",
     "        line = triple(e73placeHolder, labelCoords.prefix, '\\\"Contenuto informativo: ' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "        output.write(line)\n",
-    "        \n",
-    "        # If the 'titolo_aspo' property is not empty for the given entry, write down title-related triples\n",
+    "        # E35 Title\n",
     "        if(row['titolo_aspo'] != 'None'):\n",
     "            e35placeHolder1 = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + titleCoords.code + \">\"\n",
     "            line = triple(e73placeHolder, identifiedByCoords.prefix, e35placeHolder1) + closeLine\n",
@@ -162,8 +166,7 @@
     "            output.write(line)\n",
     "            line = triple(e35placeHolder1, labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "            output.write(line)\n",
-    "        #\n",
-    "        # Triplify the 'segnatura' -- should exist for every entry\n",
+    "        # E42 Identifier\n",
     "        if(row['segnatura_parent'] != ''):\n",
     "            e42placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + identifierCoords.code + \">\"\n",
     "            line = triple(datiniCoords.prefix + row['id'], identifiedByCoords.prefix, e42placeHolder) + closeLine\n",
@@ -172,7 +175,7 @@
     "            output.write(line)\n",
     "            line = triple(e42placeHolder, labelCoords.prefix, '\\\"Segnatura: Fondo Datini, ' + row['segnatura_parent'] + '\\\"') + closeLine\n",
     "            output.write(line)\n",
-    "        \n",
+    "        # Tipologia\n",
     "        if(row['tipologia'] != ''):\n",
     "            tipologie = []\n",
     "            pipe = \"|\" \n",
@@ -185,10 +188,24 @@
     "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['tipologia'].replace(\" \", \"\") + \">\"\n",
     "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
     "                output.write(line)    \n",
-    "        \n",
+    "        # E42 Identifier\n",
+    "        e42placeHolderID = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + identifierCoords.code + \"ID>\"\n",
+    "        line = triple(datiniCoords.prefix + row['id'], identifiedByCoords.prefix, e42placeHolderID) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e42placeHolderID, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e42placeHolderID, labelCoords.prefix, '\\\"' + row['id'].replace(\" \", \"\") + '\\\"') + closeLine            \n",
+    "        output.write(line)\n",
+    "        # E53 Place\n",
+    "        E53placeHolder = '<http://www.archiviodistato.prato.it>'\n",
+    "        line = triple(datiniCoords.prefix + row['id'], hasCurrentPermanentLocationCoords.prefix, E53placeHolder) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(E53placeHolder, hasTypeCoords.prefix, placeCoords.prefix ) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(E53placeHolder, labelCoords.prefix, \"\\\"Archivio di Stato di Prato\\\"\") + closeLine\n",
+    "        output.write(line)\n",
     "        output.write('\\n')\n",
     "        #\n",
-    "        #\n",
     "        # Limit number of entries processed (if desired)\n",
     "        if(ii>max_entries):\n",
     "            break\n",

+ 30 - 41
ASPO/CSV_to_RDF/datini/ASPO_CSV_to_RDF_datini_all.ipynb → ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_fonds.ipynb

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -30,7 +30,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -40,7 +40,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -60,6 +60,8 @@
     "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
     "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
     "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
+    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
+    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
     "\n",
     "# CIDOC Objects\n",
     "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
@@ -72,7 +74,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -98,7 +100,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -114,17 +116,19 @@
     "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
     "    output.write('\\n')\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
     "filePrefix = 'data_'\n",
-    "fileType = 'subfonds'\n",
+    "fileType = 'fonds'\n",
     "max_entries = 1000000000\n",
     "\n",
     "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '.ttl', 'w') as output:\n",
@@ -139,11 +143,12 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
-    "        # Write E22 Man Made Object & E73 Information Object -- should exist for every entry?\n",
+    "        # E22 Man Made Object\n",
     "        line = triple(datiniCoords.prefix + row['id'], hasTypeCoords.prefix, manMadeObjectCoords.prefix) + closeLine\n",
     "        output.write(line)\n",
     "        line = triple(datiniCoords.prefix + row['id'], labelCoords.prefix, '\\\"Documento fisico: ' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "        output.write(line)\n",
+    "        # E73 Information Object\n",
     "        e73placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
     "        line = triple(datiniCoords.prefix + row['id'], carriesCoords.prefix, e73placeHolder) + closeLine\n",
     "        output.write(line)\n",
@@ -151,8 +156,7 @@
     "        output.write(line)\n",
     "        line = triple(e73placeHolder, labelCoords.prefix, '\\\"Contenuto informativo: ' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "        output.write(line)\n",
-    "\n",
-    "        # If the 'titolo_aspo' property is not empty for the given entry, write down title-related triples\n",
+    "        # E35 Title\n",
     "        if(row['titolo_aspo'] != 'None'):\n",
     "            e35placeHolder1 = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + titleCoords.code + \">\"\n",
     "            line = triple(e73placeHolder, identifiedByCoords.prefix, e35placeHolder1) + closeLine\n",
@@ -161,39 +165,24 @@
     "            output.write(line)\n",
     "            line = triple(e35placeHolder1, labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "            output.write(line)\n",
-    "        \n",
-    "        if(row['tipologia'] != ''):\n",
-    "            tipologie = []\n",
-    "            pipe = \"|\" \n",
-    "            if pipe in row['tipologia']:\n",
-    "                tipologie = row['tipologia'].split('|')\n",
-    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + tipologie.replace(\" \", \"\") + \">\"\n",
-    "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "            else:\n",
-    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['tipologia'].replace(\" \", \"\") + \">\"\n",
-    "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "        \n",
-    "        #Genreform disponibile solo per subfonds \n",
-    "        if(row['genere'] != ''):\n",
-    "            tipologie = []\n",
-    "            pipe = \"|\" \n",
-    "            if pipe in row['genere']:\n",
-    "                tipologie = row['genere'].split('|')\n",
-    "                for type in tipologie:\n",
-    "                    tipo = type\n",
-    "                    e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + tipo.replace(\" \", \"\") + \">\"\n",
-    "                    line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "                    output.write(line)\n",
-    "            else:\n",
-    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['genere'].replace(\" \", \"\") + \">\"\n",
-    "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "\n",
+    "        # E42 Identifier\n",
+    "        e42placeHolderID = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + identifierCoords.code + \"ID>\"\n",
+    "        line = triple(datiniCoords.prefix + row['id'], identifiedByCoords.prefix, e42placeHolderID) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e42placeHolderID, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e42placeHolderID, labelCoords.prefix, '\\\"' + row['id'].replace(\" \", \"\") + '\\\"') + closeLine            \n",
+    "        output.write(line)\n",
+    "        # E53 Place\n",
+    "        E53placeHolder = '<http://www.archiviodistato.prato.it>'\n",
+    "        line = triple(datiniCoords.prefix + row['id'], hasCurrentPermanentLocationCoords.prefix, E53placeHolder) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(E53placeHolder, hasTypeCoords.prefix, placeCoords.prefix ) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(E53placeHolder, labelCoords.prefix, \"\\\"Archivio di Stato di Prato\\\"\") + closeLine\n",
+    "        output.write(line)\n",
     "        output.write('\\n')\n",
     "        #\n",
-    "        #\n",
     "        # Limit number of entries processed (if desired)\n",
     "        if(ii>max_entries):\n",
     "            break\n",

+ 6 - 20
ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item.ipynb

@@ -53,7 +53,6 @@
     "\n",
     "# Repositories\n",
     "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "# Added by FS\n",
     "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
     "# W3/CIDOC Predicates\n",
     "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
@@ -61,10 +60,11 @@
     "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
     "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
     "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "# Added by FS CIDOC properties for person\n",
     "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
     "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
     "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
+    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
+    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
     "\n",
     "# CIDOC Objects\n",
     "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
@@ -73,7 +73,6 @@
     "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
     "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
     "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "# Added by FS CIDOC entity\n",
     "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
     "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')"
    ]
@@ -127,6 +126,8 @@
     "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
     "    output.write('\\n')\n"
    ]
   },
@@ -154,11 +155,12 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
-    "        # Write E22 Man Made Object & E73 Information Object -- should exist for every entry?\n",
+    "        # E22 Man Made Object\n",
     "        line = triple(datiniCoords.prefix + row['id'], hasTypeCoords.prefix, manMadeObjectCoords.prefix) + closeLine\n",
     "        output.write(line)\n",
     "        line = triple(datiniCoords.prefix + row['id'], labelCoords.prefix, '\\\"Documento fisico: ' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "        output.write(line)\n",
+    "        # E73 Information Object\n",
     "        e73placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
     "        line = triple(datiniCoords.prefix + row['id'], carriesCoords.prefix, e73placeHolder) + closeLine\n",
     "        output.write(line)\n",
@@ -166,22 +168,6 @@
     "        output.write(line)\n",
     "        line = triple(e73placeHolder, labelCoords.prefix, '\\\"Contenuto informativo: ' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "        output.write(line) \n",
-    "             \n",
-    "        if(row['tipologia'] != ''):\n",
-    "            tipologie = []\n",
-    "            pipe = \"|\" \n",
-    "            if pipe in row['tipologia']:\n",
-    "                tipologie = row['tipologia'].split('|')\n",
-    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + tipologie.replace(\" \", \"\") + \">\"\n",
-    "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "            else:\n",
-    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['tipologia'].replace(\" \", \"\") + \">\"\n",
-    "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "\n",
-    "        output.write('\\n')\n",
-    "        #\n",
     "        #\n",
     "        # Limit number of entries processed (if desired)\n",
     "        if(ii>max_entries):\n",

+ 4 - 10
ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_event_creation.ipynb

@@ -75,7 +75,6 @@
     "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
     "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
     "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "#placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
     "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
     "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
     "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
@@ -166,9 +165,8 @@
     "        # Skip the first line as it carries info we don't want to triplify\n",
     "        if(first):\n",
     "            first = False\n",
-    "            continue\n",
-    "        \n",
-    "        #Evento creazione\n",
+    "            continue        \n",
+    "        # E65 Creation\n",
     "        if(row['tipologia'] != 'carteggio'):\n",
     "            e22placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \">\"\n",
     "            e65placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + creationCoords.code + \">\"\n",
@@ -190,8 +188,7 @@
     "            output.write(line)\n",
     "            line = triple(e65FplaceHolder, hasTypePCoords.prefix, '\\\"Fine\\\"^^xsd:string') + closeLine\n",
     "            output.write(line)\n",
-    "\n",
-    "            #Luogo creazione\n",
+    "            # E65 Creation - E53 Place\n",
     "            if(row['luogo_luogo'] != ''):\n",
     "                auth_luogo = row['luogo_luogo']\n",
     "                authcode_luogo = re.sub('{luogo: .* ', '', auth_luogo)\n",
@@ -206,11 +203,8 @@
     "                geogname = row['luogo_luogo'].replace('{luogo:', '').replace(',', '').replace('authID:', '')\n",
     "                place = re.sub('}', '', geogname).strip()\n",
     "                line = triple(e53placeHolder, labelCoords.prefix, '\\\"' + place + '\\\"') + closeLine\n",
-    "                output.write(line)          \n",
-    "                                \n",
+    "                output.write(line)                                          \n",
     "        output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
     "        # Limit number of entries processed (if desired)\n",
     "        if(ii>max_entries):\n",
     "            break\n",

+ 2 - 7
ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_event_creation_actor.ipynb

@@ -75,7 +75,6 @@
     "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
     "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
     "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "#placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
     "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
     "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
     "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
@@ -179,8 +178,7 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
-    "        \n",
-    "        #Autore della creazione del documento\n",
+    "        # E65 Creation - E39 Actor\n",
     "        if(row['tipologia'] != 'carteggio'):\n",
     "            e65placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + creationCoords.code + \">\"\n",
     "            e65FplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + creationCoords.code + \"F>\"\n",
@@ -244,10 +242,7 @@
     "                output.write(line)\n",
     "                line = triple(P11placeHolder, labelCoords.prefix, \"\\\"Compagnia\\\"\" ) + closeLine\n",
     "                output.write(line)\n",
-    "\n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
+    "            output.write('\\n')\n",
     "        # Limit number of entries processed (if desired)\n",
     "        if(ii>max_entries):\n",
     "            break\n",

+ 2 - 10
ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_event_creation_date.ipynb

@@ -13,7 +13,6 @@
     "from collections import OrderedDict\n",
     "import json\n",
     "\n",
-    "\n",
     "# OPZIONAL IMPORTS\n",
     "\n",
     "# For timestamping/simple speed tests\n",
@@ -78,12 +77,11 @@
     "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
     "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
     "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "#placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
     "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
     "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
     "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
     "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n"
+    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')"
    ]
   },
   {
@@ -146,7 +144,6 @@
     "    output.write('@prefix ' + onGoingTCoords.prefix + ' ' + onGoingTCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)\n",
-    "\n",
     "    output.write('\\n')\n"
    ]
   },
@@ -174,8 +171,7 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
-    "        \n",
-    "        #Periodo creazione\n",
+    "        # E65 Creation - E52 Time Span\n",
     "        if(row['tipologia'] != 'carteggio'):\n",
     "            e65placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + creationCoords.code + \">\"\n",
     "            e65FplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + creationCoords.code + \"F>\"\n",
@@ -197,7 +193,6 @@
     "                output.write(line)\n",
     "                #line = triple(e55placeHolder, labelCoords.prefix, '\\\"Estremo temporale inferiore\\\"') + closeLine\n",
     "                #output.write(line)\n",
-    "\n",
     "            if(row['data_fine'] != ''):\n",
     "                e52FplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + creationCoords.code + \"_\" + timeSpanCoords.code + \"F>\"\n",
     "                line = triple(e65FplaceHolder, hasTimeSpanCoords.prefix, e52FplaceHolder) + closeLine\n",
@@ -215,10 +210,7 @@
     "                output.write(line)\n",
     "                #line = triple(e55placeHolder, labelCoords.prefix, '\\\"Estremo temporale superiore\\\"') + closeLine\n",
     "                #output.write(line)\n",
-    "\n",
     "            output.write('\\n')\n",
-    "            #\n",
-    "            #\n",
     "            # Limit number of entries processed (if desired)\n",
     "            if(ii>max_entries):\n",
     "                break\n",

+ 3 - 8
ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_event_exchange.ipynb

@@ -80,14 +80,12 @@
     "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
     "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
     "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "#placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
     "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
     "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
     "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
     "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
     "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
     "pcarriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/PC14_carried_out_by>', 'cy:')\n",
-    "\n",
     "# New classes (subclasses of E7 Activity) - Exchange, Sending, Recive Letters\n",
     "exchangeLettersCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL1_Exchange_Letters>', 'el:', 'EL1')\n",
     "sendLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL2_Send_Letter>', 'sl:', 'EL2')\n",
@@ -186,11 +184,10 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue   \n",
-    "\n",
+    "        # EL1 Exchange Letters\n",
     "        el1placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + exchangeLettersCoords.code + \">\"\n",
     "        el2placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \">\"\n",
-    "        el3placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \">\"\n",
-    "        \n",
+    "        el3placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \">\"        \n",
     "        if(row['luogo_partenza'] != '' and row['luogo_arrivo'] != ''):\n",
     "            line = triple(exchangeLettersCoords.prefix, labelCoords.prefix, '\\\"Exchange of letters\\\"') + closeLine\n",
     "            output.write(line)\n",
@@ -216,9 +213,7 @@
     "            output.write(line)\n",
     "            line = triple(el3placeHolder, hasTypeCoords.prefix, receiveLetterCoords.prefix) + closeLine\n",
     "            output.write(line)\n",
-    "\n",
-    "        output.write('\\n')\n",
-    "        #\n",
+    "            output.write('\\n')\n",
     "        #\n",
     "        # Limit number of entries processed (if desired)\n",
     "        if(ii>max_entries):\n",

+ 3 - 7
ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_event_exchange_date.ipynb

@@ -82,7 +82,6 @@
     "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
     "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
     "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "#placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
     "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
     "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
     "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
@@ -189,12 +188,10 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
-    "        \n",
-    "        #Evento send letter\n",
+    "        # EL2 Send Letter - EL3 Receive Letter\n",
     "        el2placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \">\"\n",
     "        el3placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \">\"\n",
-    "\n",
-    "        # Data invio\n",
+    "        # EL2 Send Letter - E52 Time Span\n",
     "        if(row['data_inizio'] != ''):\n",
     "            e52PplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \"_\" + timeSpanCoords.code + \">\"\n",
     "            line = triple(el2placeHolder, hasTimeSpanCoords.prefix, e52PplaceHolder) + closeLine\n",
@@ -213,7 +210,7 @@
     "            #line = triple(e55placeHolder, labelCoords.prefix, '\\\"Estremo temporale inferiore\\\"') + closeLine\n",
     "            #output.write(line)\n",
     "\n",
-    "\n",
+    "        # EL3 Reveive Letter - E52 Time Span\n",
     "        if(row['data_fine'] != ''):\n",
     "            e52AplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \"_\" + timeSpanCoords.code + \">\"\n",
     "            line = triple(el3placeHolder, hasTimeSpanCoords.prefix, e52AplaceHolder) + closeLine\n",
@@ -231,7 +228,6 @@
     "            #output.write(line)\n",
     "            #line = triple(e55placeHolder, labelCoords.prefix, '\\\"Estremo temporale superiore\\\"') + closeLine\n",
     "            #output.write(line)\n",
-    "\n",
     "        output.write('\\n')\n",
     "        #\n",
     "        #\n",

+ 4 - 8
ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_event_exchange_place.ipynb

@@ -82,7 +82,6 @@
     "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
     "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
     "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "#placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
     "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
     "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
     "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
@@ -188,11 +187,10 @@
     "            first = False\n",
     "            continue\n",
     "        \n",
-    "        #Evento exchange letters\n",
+    "        # EL1 Exchange Letter - E53 Place\n",
     "        el1placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + exchangeLettersCoords.code + \">\"\n",
     "        el2placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \">\"\n",
     "        el3placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \">\"\n",
-    "          \n",
     "        if(row['luogo_partenza'] != '' and row['luogo_arrivo'] != ''):\n",
     "            partenza = row['luogo_partenza'].replace('{luogo:', '').replace(',', '').replace('authID:', '')\n",
     "            place_partenza = re.sub('IT-ASPO-GEO0001-[0-9]*}', '', partenza).strip()\n",
@@ -204,7 +202,7 @@
     "            auth_arrivo = row['luogo_arrivo']\n",
     "            authcode_arrivo = re.sub('{luogo: .* ', '', auth_arrivo)\n",
     "            authcodeprefix_arrivo = authcode_arrivo.replace('IT-ASPO-GEO0001-', '').replace('\"', '').replace('}', '').strip()\n",
-    "            #Luogo partenza\n",
+    "            # EL2 Send Letter - E53 Place\n",
     "            e53placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + authcodeprefix_partenza +  \">\"\n",
     "            line = triple(el2placeHolder, movedFromCoords.prefix, e53placeHolder) + closeLine\n",
     "            output.write(line)\n",
@@ -212,18 +210,16 @@
     "            output.write(line)\n",
     "            line = triple(e53placeHolder, labelCoords.prefix, '\\\"' + place_partenza + '\\\"') + closeLine\n",
     "            output.write(line)\n",
-    "            #Luogo arrivo\n",
+    "            # ELe Receive Letter - E53 Place\n",
     "            e53placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + authcodeprefix_arrivo +  \">\"\n",
     "            line = triple(el3placeHolder, movedToCoords.prefix, e53placeHolder) + closeLine\n",
     "            output.write(line)\n",
     "            line = triple(e53placeHolder, hasTypeCoords.prefix, placeCoords.prefix) + closeLine\n",
     "            output.write(line)\n",
     "            line = triple(e53placeHolder, labelCoords.prefix, '\\\"' + place_arrivo + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            \n",
+    "            output.write(line)     \n",
     "        output.write('\\n')\n",
     "        #\n",
-    "        #\n",
     "        # Limit number of entries processed (if desired)\n",
     "        if(ii>max_entries):\n",
     "            break\n",

+ 2 - 5
ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_event_exchange_receiver.ipynb

@@ -84,7 +84,6 @@
     "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
     "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
     "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "#placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
     "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
     "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
     "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
@@ -194,8 +193,7 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
-    "        \n",
-    "        #Evento exchange letters\n",
+    "        # EL3 Receive Letter\n",
     "        el1placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + exchangeLettersCoords.code + \">\"\n",
     "        el2placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \">\"\n",
     "        el3placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \">\"\n",
@@ -203,7 +201,6 @@
     "        PC14rplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \"_\" + pcarriedByCoords.code + \">\"   \n",
     "        E55splaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \"_\" + typeCoords.code + \">\"       \n",
     "        E55rplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \"_\" + typeCoords.code + \">\"\n",
-    "    \n",
     "        if(row['persona_destinatario'] != ''):\n",
     "            destinatario = row['persona_destinatario'].replace('{\"nome\":', '').replace(',', '').replace('\"authID\":', '')\n",
     "            name_destinatario = re.sub('IT-ASPO-AU00003-[0-9].*}', '', destinatario).replace('\"', '').replace('}', '').strip()\n",
@@ -223,7 +220,7 @@
     "            output.write(line)\n",
     "            line = triple(PC14rplaceHolder, hasRangeCoords.prefix, actorplaceHolder) + closeLine\n",
     "            output.write(line)\n",
-    "\n",
+    "            output.write('\\n')\n",
     "        #\n",
     "        # Limit number of entries processed (if desired)\n",
     "        if(ii>max_entries):\n",

+ 3 - 7
ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_event_exchange_sender.ipynb

@@ -83,7 +83,6 @@
     "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
     "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
     "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "#placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
     "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
     "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
     "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
@@ -192,9 +191,8 @@
     "        # Skip the first line as it carries info we don't want to triplify\n",
     "        if(first):\n",
     "            first = False\n",
-    "            continue\n",
-    "        \n",
-    "        #Evento exchange letters\n",
+    "            continue        \n",
+    "        # EL2 Send Letter\n",
     "        el1placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + exchangeLettersCoords.code + \">\"\n",
     "        el2placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \">\"\n",
     "        el3placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \">\"\n",
@@ -202,7 +200,6 @@
     "        PC14rplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \"_\" + pcarriedByCoords.code + \">\"   \n",
     "        E55splaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \"_\" + typeCoords.code + \">\"       \n",
     "        E55rplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \"_\" + typeCoords.code + \">\"\n",
-    "    \n",
     "        if(row['persona_mittente'] != ''):\n",
     "            mittente = row['persona_mittente'].replace('{\"nome\":', '').replace(',', '').replace('\"authID\":', '')\n",
     "            name_mittente = re.sub('IT-ASPO-AU00003-[0-9].*}', '', mittente).replace('\"', '').replace('}', '').strip()\n",
@@ -222,8 +219,7 @@
     "            output.write(line)\n",
     "            line = triple(PC14splaceHolder, hasRangeCoords.prefix, actorplaceHolder) + closeLine\n",
     "            output.write(line)\n",
-    "\n",
-    "        #\n",
+    "            output.write('\\n')\n",
     "        # Limit number of entries processed (if desired)\n",
     "        if(ii>max_entries):\n",
     "            break\n",

+ 3 - 6
ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_extent.ipynb

@@ -156,7 +156,7 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
-    "        # NUMERO DI CARTE - CONSISTENZA\n",
+    "        # E54 Dimensions\n",
     "        if(row['numero'] != ''):\n",
     "            e54placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + dimensionsCoords.code + \">\"\n",
     "            line = triple(datiniCoords.prefix + row['id'], hasDimensionsCoords.prefix, e54placeHolder) + closeLine\n",
@@ -164,8 +164,7 @@
     "            line = triple(e54placeHolder, hasTypeCoords.prefix, dimensionsCoords.prefix) + closeLine\n",
     "            output.write(line)\n",
     "            line = triple(e54placeHolder, labelCoords.prefix, '\\\"Consistenza: carte ' + row['numero'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "        \n",
+    "            output.write(line)        \n",
     "        if(row['extent'] != ''):\n",
     "            e54placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + dimensionsCoords.code + \">\"\n",
     "            line = triple(datiniCoords.prefix + row['id'], hasDimensionsCoords.prefix, e54placeHolder) + closeLine\n",
@@ -173,8 +172,7 @@
     "            line = triple(e54placeHolder, hasTypeCoords.prefix, dimensionsCoords.prefix) + closeLine\n",
     "            output.write(line)\n",
     "            line = triple(e54placeHolder, labelCoords.prefix, '\\\"Consistenza: carte ' + row['extent'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "        \n",
+    "            output.write(line)        \n",
     "        if(row['consistenza'] != ''):\n",
     "            e54placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + dimensionsCoords.code + \">\"\n",
     "            line = triple(datiniCoords.prefix + row['id'], hasDimensionsCoords.prefix, e54placeHolder) + closeLine\n",
@@ -183,7 +181,6 @@
     "            output.write(line)\n",
     "            line = triple(e54placeHolder, labelCoords.prefix, '\\\"Consistenza: carte ' + row['consistenza'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "            output.write(line)\n",
-    "\n",
     "        output.write('\\n')\n",
     "        #\n",
     "        #\n",

+ 15 - 11
ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_repository.ipynb → ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_id.ipynb

@@ -53,7 +53,6 @@
     "\n",
     "# Repositories\n",
     "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "# Added by FS\n",
     "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
     "# W3/CIDOC Predicates\n",
     "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
@@ -61,11 +60,11 @@
     "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
     "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
     "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "# Added by FS CIDOC properties for person\n",
     "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
     "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
     "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
     "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
+    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
     "\n",
     "# CIDOC Objects\n",
     "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
@@ -74,8 +73,6 @@
     "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
     "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
     "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "# Added by FS CIDOC entity\n",
     "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
     "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')"
    ]
@@ -146,11 +143,16 @@
     "fileType = 'item'\n",
     "max_entries = 1000000000\n",
     "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_permanent_location.ttl', 'w') as output:\n",
+    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_id.ttl', 'w') as output:\n",
     "    reader = csv.DictReader(csv_file)\n",
     "    writeTTLHeader(output)\n",
     "    first = True\n",
     "    ii = 0\n",
+    "    E53placeHolder = '<http://www.archiviodistato.prato.it>'\n",
+    "    line = triple(E53placeHolder, hasTypeCoords.prefix, placeCoords.prefix ) + closeLine\n",
+    "    output.write(line)        \n",
+    "    line = triple(E53placeHolder, labelCoords.prefix, \"\\\"Archivio di Stato di Prato\\\"\") + closeLine\n",
+    "    output.write(line)\n",
     "    for row in reader:\n",
     "        # The index ii is used to process a limited number of entries for testing purposes\n",
     "        ii = ii+1\n",
@@ -158,17 +160,19 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
-    "        # Write E22 Man Made Object & E73 Information Object -- should exist for every entry?\n",
-    "        E53placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + placeCoords.code + \">\"\n",
-    "        line = triple(datiniCoords.prefix + row['id'], hasCurrentPermanentLocationCoords.prefix, E53placeHolder) + closeLine\n",
+    "        # E42 Identifier\n",
+    "        e42placeHolderID = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + identifierCoords.code + \"ID>\"\n",
+    "        line = triple(datiniCoords.prefix + row['id'], identifiedByCoords.prefix, e42placeHolderID) + closeLine\n",
     "        output.write(line)\n",
-    "        line = triple(E53placeHolder, hasTypeCoords.prefix, placeCoords.prefix ) + closeLine\n",
+    "        line = triple(e42placeHolderID, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
     "        output.write(line)\n",
-    "        line = triple(E53placeHolder, labelCoords.prefix, \"\\\"Archivio di Stato di Prato\\\"\") + closeLine\n",
+    "        line = triple(e42placeHolderID, labelCoords.prefix, '\\\"' + row['id'].replace(\" \", \"\") + '\\\"') + closeLine            \n",
+    "        output.write(line)\n",
+    "        # E53 Place\n",
+    "        line = triple(datiniCoords.prefix + row['id'], hasCurrentPermanentLocationCoords.prefix, E53placeHolder) + closeLine\n",
     "        output.write(line)\n",
     "        output.write('\\n')\n",
     "        #\n",
-    "        #\n",
     "        # Limit number of entries processed (if desired)\n",
     "        if(ii>max_entries):\n",
     "            break\n",

+ 18 - 16
ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_phydesc.ipynb

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -23,12 +23,13 @@
     "# System & command line utilities\n",
     "import sys\n",
     "# Json for the dictionary\n",
-    "import json"
+    "import json\n",
+    "import re"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -38,7 +39,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -80,7 +81,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -106,7 +107,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -130,13 +131,12 @@
     "    output.write('@prefix ' + hasNoteCoords.prefix + ' ' + hasNoteCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + hasTypeNCoords.prefix + ' ' + hasTypeNCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + stringCoords.prefix + ' ' + stringCoords.uri + closeLine)\n",
-    "\n",
     "    output.write('\\n')\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 12,
    "metadata": {
     "tags": []
    },
@@ -151,6 +151,9 @@
     "    writeTTLHeader(output)\n",
     "    first = True\n",
     "    ii = 0\n",
+    "    e55placeHolder = \"<http://archiviodistato.prato.it/E22_\" + stringCoords.code + \"_\" + typeCoords.code + \">\"\n",
+    "    line = triple(e55placeHolder, labelCoords.prefix, '\\\"Nota descrizione fisica\\\"') + closeLine\n",
+    "    output.write(line)\n",
     "    for row in reader:\n",
     "        # The index ii is used to process a limited number of entries for testing purposes\n",
     "        ii = ii+1\n",
@@ -158,23 +161,22 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
-    "        # Physdesc\n",
+    "        # E62 String\n",
     "        if(row['descrizione_fisica'] != ''):\n",
     "            e62placeHolder= \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/E22_\" + stringCoords.code + \">\"\n",
     "            line = triple(datiniCoords.prefix + row['id'], hasNoteCoords.prefix, e62placeHolder) + closeLine\n",
     "            output.write(line)\n",
-    "            line = triple(e62placeHolder, labelCoords.prefix, '\\\"' + row['descrizione_fisica'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
+    "            #Remove all white-space characters:\n",
+    "            txt = row['descrizione_fisica']\n",
+    "            x = re.sub(\" \\n\", \"\", txt)\n",
+    "            y = re.sub(\"\\s\\s\", \"\", x)\n",
+    "            line = triple(e62placeHolder, labelCoords.prefix, '\\\"' + y.replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "            output.write(line)\n",
     "            line = triple(e62placeHolder, hasTypeCoords.prefix, stringCoords.prefix) + closeLine\n",
     "            output.write(line)\n",
-    "            e55placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/E22_\" + stringCoords.code + \"_\" + typeCoords.code + \">\"\n",
     "            line = triple(e62placeHolder, hasTypeNCoords.prefix, e55placeHolder) + closeLine\n",
     "            output.write(line)\n",
-    "            line = triple(e55placeHolder, labelCoords.prefix, '\\\"Nota descrizione fisica\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "\n",
-    "        output.write('\\n')\n",
-    "        #\n",
+    "            output.write('\\n')\n",
     "        #\n",
     "        # Limit number of entries processed (if desired)\n",
     "        if(ii>max_entries):\n",

+ 45 - 43
ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_physfacet.ipynb

@@ -2,7 +2,9 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# Utilities to read/write csv files\n",
     "import csv\n",
@@ -22,23 +24,23 @@
     "import sys\n",
     "# Json for the dictionary\n",
     "import json"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/ASPO/datini/'\n",
     "export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/ASPO/datini/'"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# Custom class to store URIs + related infos for the ontologies/repositories\n",
     "\n",
@@ -51,7 +53,6 @@
     "\n",
     "# Repositories\n",
     "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "# Added by FS\n",
     "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
     "# W3/CIDOC Predicates\n",
     "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
@@ -63,7 +64,6 @@
     "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
     "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
     "consistCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P45_consist_of>', 'cf:')\n",
-    "\n",
     "# CIDOC Objects\n",
     "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
     "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
@@ -74,13 +74,13 @@
     "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
     "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
     "materialCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E57_Material>', 'mt:', 'E57')"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# Basic functions for triples / shortened triples in TTL format\n",
     "\n",
@@ -100,13 +100,13 @@
     "continueLine1 = ' ;\\n'\n",
     "continueLine2 = ' ,\\n'\n",
     "closeLine = ' .\\n'"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "def writeTTLHeader(output):\n",
     "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
@@ -127,15 +127,16 @@
     "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + consistCoords.prefix + ' ' + consistCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + materialCoords.prefix + ' ' + materialCoords.uri + closeLine)\n",
-    "   \n",
     "    output.write('\\n')\n"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 12,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
    "source": [
     "filePrefix = 'data_'\n",
     "fileType = 'item'\n",
@@ -146,6 +147,11 @@
     "    writeTTLHeader(output)\n",
     "    first = True\n",
     "    ii = 0\n",
+    "    e57placeHolder = \"<http://archiviodistato.prato.it/\" + materialCoords.code + \">\"\n",
+    "    line = triple(e57placeHolder, hasTypeCoords.prefix, materialCoords.prefix) + closeLine\n",
+    "    output.write(line)\n",
+    "    line = triple(e57placeHolder, labelCoords.prefix, '\\\"Supporto\\\"') + closeLine\n",
+    "    output.write(line)\n",
     "    for row in reader:\n",
     "        # The index ii is used to process a limited number of entries for testing purposes\n",
     "        ii = ii+1\n",
@@ -153,34 +159,33 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
-    "        # Physfacet - supporto\n",
+    "        # E57 Material\n",
     "        if(row['supporto'] != ''):\n",
-    "            e57placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + materialCoords.code + \">\"\n",
-    "            line = triple(datiniCoords.prefix + row['id'], consistCoords.prefix, e57placeHolder) + closeLine\n",
+    "            e55placeHolder = \"<http://www.archiviodistato.prato.it/\" + materialCoords.code + \"_\" + typeCoords.code + \"_\" + row['supporto'].replace('|','').replace(' ','_').replace('__','_').replace(',','').replace(')','').replace('(','') + \">\"  \n",
+    "            line = triple(datiniCoords.prefix + row['id'], consistCoords.prefix, e55placeHolder) + closeLine\n",
     "            output.write(line)\n",
-    "            line = triple(e57placeHolder, labelCoords.prefix, '\\\"' + row['supporto'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
+    "            line = triple(e55placeHolder, hasTypePCoords.prefix, e57placeHolder) + closeLine\n",
     "            output.write(line)\n",
-    "            line = triple(e57placeHolder, hasTypeCoords.prefix, materialCoords.prefix) + closeLine\n",
+    "            line = triple(e55placeHolder, hasTypeCoords.prefix, typeCoords.prefix) + closeLine\n",
     "            output.write(line)\n",
-    "            \n",
-    "        output.write('\\n')\n",
-    "        #\n",
+    "            line = triple(e55placeHolder, labelCoords.prefix, '\\\"' + row['supporto'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
+    "            output.write(line)\n",
+    "            output.write('\\n')\n",
     "        #\n",
     "        # Limit number of entries processed (if desired)\n",
     "        if(ii>max_entries):\n",
     "            break\n",
     "        "
-   ],
-   "outputs": [],
-   "metadata": {
-    "tags": []
-   }
+   ]
   }
  ],
  "metadata": {
+  "interpreter": {
+   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
+  },
   "kernelspec": {
-   "name": "python3",
-   "display_name": "Python 3.9.0 64-bit"
+   "display_name": "Python 3.9.0 64-bit",
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -198,11 +203,8 @@
    "interpreter": {
     "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
    }
-  },
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}

+ 23 - 23
ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_phystech.ipynb

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -13,7 +13,6 @@
     "from collections import OrderedDict\n",
     "import json\n",
     "\n",
-    "\n",
     "# OPZIONAL IMPORTS\n",
     "\n",
     "# For timestamping/simple speed tests\n",
@@ -23,12 +22,13 @@
     "# System & command line utilities\n",
     "import sys\n",
     "# Json for the dictionary\n",
-    "import json"
+    "import json\n",
+    "import re"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -38,7 +38,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -53,7 +53,6 @@
     "\n",
     "# Repositories\n",
     "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "# Added by FS\n",
     "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
     "# W3/CIDOC Predicates\n",
     "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
@@ -74,14 +73,13 @@
     "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
     "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
     "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "# Added by FS CIDOC entity\n",
     "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
     "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -107,7 +105,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -130,13 +128,12 @@
     "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + hasConditionCoords.prefix + ' ' + hasConditionCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + conditionCoords.prefix + ' ' + conditionCoords.uri + closeLine)\n",
-    "    \n",
     "    output.write('\\n')\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 30,
    "metadata": {
     "tags": []
    },
@@ -151,6 +148,11 @@
     "    writeTTLHeader(output)\n",
     "    first = True\n",
     "    ii = 0\n",
+    "    e3placeHolder = \"<http://archiviodistato.prato.it/\" + conditionCoords.code + \">\"\n",
+    "    line = triple(e3placeHolder, labelCoords.prefix, '\\\"Stato di conservazione\\\"') + closeLine\n",
+    "    output.write(line)\n",
+    "    line = triple(e3placeHolder, hasTypeCoords.prefix, conditionCoords.prefix) + closeLine\n",
+    "    output.write(line)\n",
     "    for row in reader:\n",
     "        # The index ii is used to process a limited number of entries for testing purposes\n",
     "        ii = ii+1\n",
@@ -158,24 +160,22 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
-    "        # STATO DI CONSERVAZIONE\n",
+    "        # E3 Condition State\n",
     "        if(row['conservazione'] != ''):\n",
-    "            e3placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + conditionCoords.code + \">\"\n",
-    "            line = triple(datiniCoords.prefix + row['id'], hasConditionCoords.prefix,  e3placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e3placeHolder, hasTypeCoords.prefix, conditionCoords.prefix) + closeLine\n",
+    "            e55placeHolder = \"<http://www.archiviodistato.prato.it/\" + conditionCoords.code + \"_\" + typeCoords.code + \"_\" + row['conservazione'].replace('|','').replace(' ','_').replace('__','_').replace(',','').replace(')','').replace('(','') + \">\"  \n",
+    "            line = triple(datiniCoords.prefix + row['id'], hasConditionCoords.prefix, e55placeHolder) + closeLine\n",
     "            output.write(line)\n",
-    "            line = triple(e3placeHolder, labelCoords.prefix, '\\\"Stato di conservazione\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            e55placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + conditionCoords.code + \"_\" + typeCoords.code + \">\"  \n",
-    "            line = triple(e3placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
+    "            line = triple(e55placeHolder, hasTypePCoords.prefix, e3placeHolder) + closeLine\n",
     "            output.write(line)\n",
     "            line = triple(e55placeHolder, hasTypeCoords.prefix, typeCoords.prefix) + closeLine\n",
     "            output.write(line)\n",
-    "            line = triple(e55placeHolder, labelCoords.prefix, '\\\"'+ row['conservazione'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
+    "            #Remove all white-space characters:\n",
+    "            txt = row['conservazione']\n",
+    "            x = re.sub(\" \\n\", \"\", txt)\n",
+    "            y = re.sub(\"\\s\\s\", \"\", x)\n",
+    "            line = triple(e55placeHolder, labelCoords.prefix, '\\\"'+ y.replace('\\\\','\\\\\\\\').replace('\\\"','').replace('|',',').replace(' ,',',') + '\\\"') + closeLine\n",
     "            output.write(line)\n",
-    "\n",
-    "        output.write('\\n')\n",
+    "            output.write('\\n')\n",
     "        #\n",
     "        #\n",
     "        # Limit number of entries processed (if desired)\n",

+ 8 - 9
ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_scopecontent.ipynb

@@ -11,8 +11,7 @@
     "import unicodedata\n",
     "# Ordered Dicts\n",
     "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
+    "import re\n",
     "\n",
     "# OPZIONAL IMPORTS\n",
     "\n",
@@ -53,7 +52,6 @@
     "\n",
     "# Repositories\n",
     "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "# Added by FS\n",
     "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
     "# W3/CIDOC Predicates\n",
     "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
@@ -131,7 +129,6 @@
     "    output.write('@prefix ' + refersCoords.prefix + ' ' + refersCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + entityCoords.prefix + ' ' + entityCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + refersHasTypeCoords.prefix + ' ' + refersHasTypeCoords.uri + closeLine)\n",
-    "   \n",
     "    output.write('\\n')\n"
    ]
   },
@@ -159,7 +156,7 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
-    "        # Scopecontent\n",
+    "        # E1 Entity - Scope and Content\n",
     "        if(row['scope-content_body'] != ''):\n",
     "            e73placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
     "            e1placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/E73_\" + entityCoords.code + \">\"\n",
@@ -167,7 +164,11 @@
     "            output.write(line)\n",
     "            line = triple(e1placeHolder, hasTypeCoords.prefix, entityCoords.prefix) + closeLine\n",
     "            output.write(line)\n",
-    "            line = triple(e1placeHolder, labelCoords.prefix, '\\\"' + row['scope-content_body'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
+    "            #Remove all white-space characters:\n",
+    "            txt = row['scope-content_body']\n",
+    "            x = re.sub(\" \\n\", \"\", txt)\n",
+    "            y = re.sub(\"\\s\\s\", \"\", x)\n",
+    "            line = triple(e1placeHolder, labelCoords.prefix, '\\\"' + y.replace('\\\\','\\\\\\\\').replace('\\\"','') + '\\\"') + closeLine\n",
     "            output.write(line)\n",
     "            e55placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + entityCoords.code + \"_\" + typeCoords.code + \">\"\n",
     "            line = triple(e1placeHolder, refersHasTypeCoords.prefix, e55placeHolder) + closeLine\n",
@@ -176,9 +177,7 @@
     "            output.write(line)\n",
     "            line = triple(e55placeHolder, labelCoords.prefix, '\\\"Scope and Content\\\"') + closeLine\n",
     "            output.write(line)\n",
-    "\n",
-    "        output.write('\\n')\n",
-    "        #\n",
+    "            output.write('\\n')\n",
     "        #\n",
     "        # Limit number of entries processed (if desired)\n",
     "        if(ii>max_entries):\n",

+ 9 - 9
ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_segnatura.ipynb

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -28,7 +28,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -38,7 +38,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -79,7 +79,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -105,7 +105,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -130,7 +130,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 6,
    "metadata": {
     "tags": []
    },
@@ -152,6 +152,7 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
+    "        # E42 Identifier\n",
     "        segnatura = ''\n",
     "        if(row['segnatura_registri_1'] != ''):\n",
     "            segnatura = row['segnatura_registri_1']\n",
@@ -171,8 +172,7 @@
     "            output.write(line)\n",
     "            line = triple(e42placeHolder, labelCoords.prefix, '\\\"Segnatura: Fondo Datini, ' + segnatura + '\\\"') + closeLine\n",
     "            output.write(line)\n",
-    "    \n",
-    "        output.write('\\n')\n",
+    "            output.write('\\n')\n",
     "        #\n",
     "        #\n",
     "        # Limit number of entries processed (if desired)\n",
@@ -200,7 +200,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.9.0"
   },
   "metadata": {
    "interpreter": {

+ 3 - 9
ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_title.ipynb

@@ -53,7 +53,6 @@
     "\n",
     "# Repositories\n",
     "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "# Added by FS\n",
     "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
     "# W3/CIDOC Predicates\n",
     "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
@@ -61,7 +60,6 @@
     "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
     "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
     "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "# Added by FS CIDOC properties for person\n",
     "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
     "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
     "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
@@ -73,7 +71,6 @@
     "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
     "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
     "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "# Added by FS CIDOC entity\n",
     "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
     "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')"
    ]
@@ -154,18 +151,16 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
-    "        # If the 'titolo_aspo' property is not empty for the given entry, write down title-related triples\n",
-    "        e37placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
-    "      \n",
+    "        # E35 Title\n",
+    "        e73placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
     "        if(row['titolo_aspo'] != 'None' and row['titolo_aspo'] != ''):\n",
     "            e35placeHolder1 = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + titleCoords.code + \">\"\n",
-    "            line = triple(e37placeHolder, identifiedByCoords.prefix, e35placeHolder1) + closeLine\n",
+    "            line = triple(e73placeHolder, identifiedByCoords.prefix, e35placeHolder1) + closeLine\n",
     "            output.write(line)\n",
     "            line = triple(e35placeHolder1, hasTypeCoords.prefix, titleCoords.prefix) + closeLine\n",
     "            output.write(line)\n",
     "            line = triple(e35placeHolder1, labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "            output.write(line)\n",
-    "        \n",
     "        if(row['titolo_originale'] != 'None' and row['titolo_originale'] != ''):\n",
     "            line = triple(e35placeHolder1, hasAlternativeFormCoords.prefix, \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/original_title>\") + closeLine\n",
     "            output.write(line)\n",
@@ -173,7 +168,6 @@
     "            output.write(line)\n",
     "            line = triple(\"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/original_title>\", labelCoords.prefix, '\\\"' + row['titolo_originale'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "            output.write(line)\n",
-    "\n",
     "        output.write('\\n')\n",
     "        #\n",
     "        #\n",

+ 22 - 18
ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_permanent_location.ipynb → ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_item_type.ipynb

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -28,7 +28,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -38,7 +38,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -65,6 +65,7 @@
     "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
     "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
     "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
+    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
     "\n",
     "# CIDOC Objects\n",
     "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
@@ -73,14 +74,13 @@
     "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
     "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
     "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
     "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
     "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -106,7 +106,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -134,7 +134,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 6,
    "metadata": {
     "tags": []
    },
@@ -144,7 +144,7 @@
     "fileType = 'item'\n",
     "max_entries = 1000000000\n",
     "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_permanent_location.ttl', 'w') as output:\n",
+    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_type.ttl', 'w') as output:\n",
     "    reader = csv.DictReader(csv_file)\n",
     "    writeTTLHeader(output)\n",
     "    first = True\n",
@@ -156,16 +156,20 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
-    "        \n",
-    "        E53placeHolder = '<http://www.archiviodistato.prato.it>'\n",
-    "        line = triple(datiniCoords.prefix + row['id'], hasCurrentPermanentLocationCoords.prefix, E53placeHolder) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(E53placeHolder, hasTypeCoords.prefix, placeCoords.prefix ) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(E53placeHolder, labelCoords.prefix, \"\\\"Archivio di Stato di Prato\\\"\") + closeLine\n",
-    "        output.write(line)\n",
-    "        output.write('\\n')\n",
-    "        #\n",
+    "        e73placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
+    "        # E55 Type\n",
+    "        if(row['tipologia'] != ''):\n",
+    "            tipologie = []\n",
+    "            pipe = \"|\" \n",
+    "            if pipe in row['tipologia']:\n",
+    "                tipologie = row['tipologia'].split('|')\n",
+    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + tipologie.replace(\" \", \"\") + \">\"\n",
+    "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
+    "                output.write(line)\n",
+    "            else:\n",
+    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['tipologia'].replace(\" \", \"\") + \">\"\n",
+    "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
+    "                output.write(line)\n",
     "        #\n",
     "        # Limit number of entries processed (if desired)\n",
     "        if(ii>max_entries):\n",

+ 24 - 7
ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_series.ipynb

@@ -62,6 +62,8 @@
     "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
     "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
     "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
+    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
+    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
     "\n",
     "# CIDOC Objects\n",
     "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
@@ -115,6 +117,8 @@
     "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
     "    output.write('\\n')\n"
    ]
   },
@@ -140,11 +144,12 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
-    "        # Write E22 Man Made Object & E73 Information Object -- should exist for every entry?\n",
+    "        # E22 Man Made Object\n",
     "        line = triple(datiniCoords.prefix + row['id'], hasTypeCoords.prefix, manMadeObjectCoords.prefix) + closeLine\n",
     "        output.write(line)\n",
     "        line = triple(datiniCoords.prefix + row['id'], labelCoords.prefix, '\\\"Documento fisico: ' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "        output.write(line)\n",
+    "        # E73 Information Object\n",
     "        e73placeHolder =  \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
     "        line = triple(datiniCoords.prefix + row['id'], carriesCoords.prefix, e73placeHolder) + closeLine\n",
     "        output.write(line)\n",
@@ -152,9 +157,7 @@
     "        output.write(line)\n",
     "        line = triple(e73placeHolder, labelCoords.prefix, '\\\"Contenuto informativo: ' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "        output.write(line)\n",
-    "\n",
-    "        #\n",
-    "        # If the 'titolo_aspo' property is not empty for the given entry, write down title-related triples\n",
+    "        # E35 Title\n",
     "        if(row['titolo_aspo'] != 'None'):\n",
     "            e35placeHolder1 = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + titleCoords.code + \">\"\n",
     "            line = triple(e73placeHolder, identifiedByCoords.prefix, e35placeHolder1) + closeLine\n",
@@ -163,7 +166,7 @@
     "            output.write(line)\n",
     "            line = triple(e35placeHolder1, labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "            output.write(line)\n",
-    "            \n",
+    "        # E55 Type\n",
     "        if(row['tipologia'] != ''):\n",
     "            tipologie = []\n",
     "            pipe = \"|\" \n",
@@ -176,10 +179,24 @@
     "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['tipologia'].replace(\" \", \"\") + \">\"\n",
     "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
     "                output.write(line)\n",
-    "                   \n",
+    "        # E42 Identifier\n",
+    "        e42placeHolderID = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + identifierCoords.code + \"ID>\"\n",
+    "        line = triple(datiniCoords.prefix + row['id'], identifiedByCoords.prefix, e42placeHolderID) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e42placeHolderID, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e42placeHolderID, labelCoords.prefix, '\\\"' + row['id'].replace(\" \", \"\") + '\\\"') + closeLine\n",
+    "        output.write(line)\n",
+    "        # E53 Place\n",
+    "        E53placeHolder = '<http://www.archiviodistato.prato.it>'\n",
+    "        line = triple(datiniCoords.prefix + row['id'], hasCurrentPermanentLocationCoords.prefix, E53placeHolder) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(E53placeHolder, hasTypeCoords.prefix, placeCoords.prefix ) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(E53placeHolder, labelCoords.prefix, \"\\\"Archivio di Stato di Prato\\\"\") + closeLine\n",
+    "        output.write(line)\n",
     "        output.write('\\n')\n",
     "        #\n",
-    "        #\n",
     "        # Limit number of entries processed (if desired)\n",
     "        if(ii>max_entries):\n",
     "            break\n",

+ 243 - 0
ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_subfonds.ipynb

@@ -0,0 +1,243 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Utilities to read/write csv files\n",
+    "import csv\n",
+    "# Utilities to handle character encodings\n",
+    "import unicodedata\n",
+    "# Ordered Dicts\n",
+    "from collections import OrderedDict\n",
+    "\n",
+    "import json\n",
+    "\n",
+    "\n",
+    "# OPZIONAL IMPORTS\n",
+    "\n",
+    "# For timestamping/simple speed tests\n",
+    "from datetime import datetime\n",
+    "# Random number generator\n",
+    "from random import *\n",
+    "# System & command line utilities\n",
+    "import sys\n",
+    "# Json for the dictionary\n",
+    "import json"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/ASPO/datini/'\n",
+    "export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/ASPO/datini/'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
+    "\n",
+    "class RDFcoords:\n",
+    "    def __init__(self, uri, prefix, code = None):\n",
+    "        self.uri = uri\n",
+    "        self.prefix = prefix\n",
+    "        self.code = code\n",
+    "\n",
+    "\n",
+    "# Repositories\n",
+    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
+    "# W3/CIDOC Predicates\n",
+    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
+    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
+    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
+    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
+    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
+    "hasNoteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'no:')\n",
+    "hasTypeNCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3.1_has_type>', 'tn:')\n",
+    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
+    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
+    "\n",
+    "# CIDOC Objects\n",
+    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
+    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
+    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
+    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
+    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
+    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
+    "stringCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E62_String>', 'sr:', 'E62')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Basic functions for triples / shortened triples in TTL format\n",
+    "\n",
+    "def triple(subject, predicate, object1):\n",
+    "    line = subject + ' ' + predicate + ' ' + object1\n",
+    "    return line\n",
+    "\n",
+    "def doublet(predicate, object1):\n",
+    "    line = '    ' + predicate + ' ' + object1\n",
+    "    return line\n",
+    "\n",
+    "def singlet(object1):\n",
+    "    line = '        ' + object1\n",
+    "    return line\n",
+    "\n",
+    "# Line endings in TTL format\n",
+    "continueLine1 = ' ;\\n'\n",
+    "continueLine2 = ' ,\\n'\n",
+    "closeLine = ' .\\n'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def writeTTLHeader(output):\n",
+    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasNoteCoords.prefix + ' ' + hasNoteCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasTypeNCoords.prefix + ' ' + hasTypeNCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + stringCoords.prefix + ' ' + stringCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
+    "    output.write('\\n')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "filePrefix = 'data_'\n",
+    "fileType = 'subfonds'\n",
+    "max_entries = 1000000000\n",
+    "\n",
+    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '.ttl', 'w') as output:\n",
+    "    reader = csv.DictReader(csv_file)\n",
+    "    writeTTLHeader(output)\n",
+    "    first = True\n",
+    "    ii = 0\n",
+    "    for row in reader:\n",
+    "        # The index ii is used to process a limited number of entries for testing purposes\n",
+    "        ii = ii+1\n",
+    "        # Skip the first line as it carries info we don't want to triplify\n",
+    "        if(first):\n",
+    "            first = False\n",
+    "            continue\n",
+    "        # E22 Man Made Object\n",
+    "        line = triple(datiniCoords.prefix + row['id'], hasTypeCoords.prefix, manMadeObjectCoords.prefix) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(datiniCoords.prefix + row['id'], labelCoords.prefix, '\\\"Documento fisico: ' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
+    "        output.write(line)\n",
+    "        # E73 Information Object\n",
+    "        e73placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
+    "        line = triple(datiniCoords.prefix + row['id'], carriesCoords.prefix, e73placeHolder) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e73placeHolder, hasTypeCoords.prefix, informationObjectCoords.prefix) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e73placeHolder, labelCoords.prefix, '\\\"Contenuto informativo: ' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
+    "        output.write(line)\n",
+    "        # E35 Title \n",
+    "        if(row['titolo_aspo'] != 'None'):\n",
+    "            e35placeHolder1 = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + titleCoords.code + \">\"\n",
+    "            line = triple(e73placeHolder, identifiedByCoords.prefix, e35placeHolder1) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e35placeHolder1, hasTypeCoords.prefix, titleCoords.prefix) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e35placeHolder1, labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
+    "            output.write(line)\n",
+    "        # E55 Type\n",
+    "        if(row['genere'] != ''):\n",
+    "            tipologie = []\n",
+    "            pipe = \"|\" \n",
+    "            if pipe in row['genere']:\n",
+    "                tipologie = row['genere'].split('|')\n",
+    "                for type in tipologie:\n",
+    "                    tipo = type\n",
+    "                    e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + tipo.replace(\" \", \"\") + \">\"\n",
+    "                    line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
+    "                    output.write(line)\n",
+    "            else:\n",
+    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['genere'].replace(\" \", \"\") + \">\"\n",
+    "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
+    "                output.write(line)\n",
+    "        # E42 Identifier\n",
+    "        e42placeHolderID = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + identifierCoords.code + \"ID>\"\n",
+    "        line = triple(datiniCoords.prefix + row['id'], identifiedByCoords.prefix, e42placeHolderID) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e42placeHolderID, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e42placeHolderID, labelCoords.prefix, '\\\"' + row['id'].replace(\" \", \"\") + '\\\"') + closeLine            \n",
+    "        output.write(line)       \n",
+    "        # E53 Place\n",
+    "        E53placeHolder = '<http://www.archiviodistato.prato.it>'\n",
+    "        line = triple(datiniCoords.prefix + row['id'], hasCurrentPermanentLocationCoords.prefix, E53placeHolder) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(E53placeHolder, hasTypeCoords.prefix, placeCoords.prefix ) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(E53placeHolder, labelCoords.prefix, \"\\\"Archivio di Stato di Prato\\\"\") + closeLine\n",
+    "        output.write(line)\n",
+    "        output.write('\\n')\n",
+    "        #\n",
+    "        # Limit number of entries processed (if desired)\n",
+    "        if(ii>max_entries):\n",
+    "            break\n",
+    "        "
+   ]
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.9.0 64-bit",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.0"
+  },
+  "metadata": {
+   "interpreter": {
+    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

+ 0 - 203
ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_subfonds_genreform.ipynb

@@ -1,203 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "# Utilities to handle character encodings\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ],
-   "outputs": [],
-   "metadata": {}
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "source": [
-    "import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/ASPO/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/ASPO/datini/'"
-   ],
-   "outputs": [],
-   "metadata": {}
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "hasNoteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'no:')\n",
-    "hasTypeNCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3.1_has_type>', 'tn:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "stringCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E62_String>', 'sr:', 'E62')"
-   ],
-   "outputs": [],
-   "metadata": {}
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ],
-   "outputs": [],
-   "metadata": {}
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasNoteCoords.prefix + ' ' + hasNoteCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeNCoords.prefix + ' ' + hasTypeNCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + stringCoords.prefix + ' ' + stringCoords.uri + closeLine)\n",
-    "\n",
-    "    output.write('\\n')\n"
-   ],
-   "outputs": [],
-   "metadata": {}
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'subfonds'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_genreform.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        # <genreform> - genere\n",
-    "        if(row['genere'] != ''):\n",
-    "            e62placeHolder= \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + stringCoords.code + \">\"\n",
-    "            line = triple(datiniCoords.prefix + row['id'], hasNoteCoords.prefix, e62placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e62placeHolder, labelCoords.prefix, '\\\"' + row['genere'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"').replace(' |',',') + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e62placeHolder, hasTypeCoords.prefix, stringCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            e55placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + stringCoords.code + \"_\" + typeCoords.code + \">\"\n",
-    "            line = triple(e62placeHolder, hasTypeNCoords.prefix, e55placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e55placeHolder, labelCoords.prefix, '\\\"Genere\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "                        \n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ],
-   "outputs": [],
-   "metadata": {}
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "name": "python3",
-   "display_name": "Python 3.9.0 64-bit"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  },
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 31 - 22
ASPO/CSV_to_RDF/datini/CSV_to_RDF_datini_subseries.ipynb

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 37,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -30,7 +30,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 38,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -40,7 +40,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -62,6 +62,8 @@
     "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
     "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
     "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
+    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
+    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
     "\n",
     "# CIDOC Objects\n",
     "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
@@ -73,7 +75,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 40,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -99,7 +101,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 41,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -115,12 +117,14 @@
     "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
     "    output.write('\\n')\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 42,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -140,11 +144,12 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
-    "        # Write E22 Man Made Object & E73 Information Object -- should exist for every entry?\n",
+    "        # E22 Man Made Object\n",
     "        line = triple(datiniCoords.prefix + row['id'], hasTypeCoords.prefix, manMadeObjectCoords.prefix) + closeLine\n",
     "        output.write(line)\n",
     "        line = triple(datiniCoords.prefix + row['id'], labelCoords.prefix, '\\\"Documento fisico: ' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "        output.write(line)\n",
+    "        # E73 Information Object\n",
     "        e73placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
     "        line = triple(datiniCoords.prefix + row['id'], carriesCoords.prefix, e73placeHolder) + closeLine\n",
     "        output.write(line)\n",
@@ -152,16 +157,7 @@
     "        output.write(line)\n",
     "        line = triple(e73placeHolder, labelCoords.prefix, '\\\"Contenuto informativo: ' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "        output.write(line)\n",
-    "        e55placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + typeCoords.code + \">\"        \n",
-    "        if(row['tipologia'] != ''):\n",
-    "            line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "            output.write(line)            \n",
-    "            line = triple(e55placeHolder, hasTypeCoords.prefix, typeCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e55placeHolder, labelCoords.prefix, '\\\"' + row['tipologia'] + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "        #\n",
-    "        # If the 'titolo_aspo' property is not empty for the given entry, write down title-related triples\n",
+    "        # E53 Title\n",
     "        if(row['titolo_aspo'] != 'None'):\n",
     "            e35placeHolder1 =  \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + titleCoords.code + \">\"\n",
     "            output.write(line)\n",
@@ -171,8 +167,7 @@
     "            output.write(line)\n",
     "            line = triple(e35placeHolder1, labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "            output.write(line)\n",
-    "        #\n",
-    "        # Triplify the 'segnatura'\n",
+    "        # E42 Identifier\n",
     "        if(row['segnatura_parent'] != ''):\n",
     "            e42placeHolder =  \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + identifierCoords.code + \">\"\n",
     "            line = triple(datiniCoords.prefix + row['id'], identifiedByCoords.prefix, e42placeHolder) + closeLine\n",
@@ -181,7 +176,7 @@
     "            output.write(line)\n",
     "            line = triple(e42placeHolder, labelCoords.prefix, '\\\"Segnatura: Fondo Datini, ' + row['segnatura_parent'] + '\\\"') + closeLine\n",
     "            output.write(line)\n",
-    "        \n",
+    "        # E55 Type\n",
     "        if(row['tipologia'] != ''):\n",
     "            tipologie = []\n",
     "            pipe = \"|\" \n",
@@ -194,10 +189,24 @@
     "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['tipologia'].replace(\" \", \"\") + \">\"\n",
     "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
     "                output.write(line)\n",
-    "        \n",
+    "        # E42 Identifier\n",
+    "        e42placeHolderID = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + identifierCoords.code + \"ID>\"\n",
+    "        line = triple(datiniCoords.prefix + row['id'], identifiedByCoords.prefix, e42placeHolderID) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e42placeHolderID, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e42placeHolderID, labelCoords.prefix, '\\\"' + row['id'].replace(\" \", \"\") + '\\\"') + closeLine\n",
+    "        output.write(line)\n",
+    "        # E53 Place\n",
+    "        E53placeHolder = '<http://www.archiviodistato.prato.it>'\n",
+    "        line = triple(datiniCoords.prefix + row['id'], hasCurrentPermanentLocationCoords.prefix, E53placeHolder) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(E53placeHolder, hasTypeCoords.prefix, placeCoords.prefix ) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(E53placeHolder, labelCoords.prefix, \"\\\"Archivio di Stato di Prato\\\"\") + closeLine\n",
+    "        output.write(line)\n",
     "        output.write('\\n')\n",
     "        #\n",
-    "        #\n",
     "        # Limit number of entries processed (if desired)\n",
     "        if(ii>max_entries):\n",
     "            break\n",

+ 0 - 0
ASPO/CSV_to_RDF/datini/ASPO_CSV_to_RDF_onomastica_datini.py → ASPO/CSV_to_RDF/datini/CSV_to_RDF_onomastica_datini.py


+ 45 - 24
ASPO/CSV_to_RDF/gettatelli/CSV_to_RDF_gettatelli_fonds.ipynb

@@ -34,8 +34,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import_dir = '/Users/federicaspinelli/Google Drive/OVI:CNR/CSV/ASPO/gettatelli/'\n",
-    "export_dir = '/Users/federicaspinelli/Google Drive/OVI:CNR/RDF/ASPO/gettatelli/'"
+    "import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/ASPO/gettatelli/'\n",
+    "export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/ASPO/gettatelli/'"
    ]
   },
   {
@@ -55,16 +55,17 @@
     "\n",
     "# Repositories\n",
     "datiniCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/IT-ASPO-GT001->', 'dt:')\n",
-    "# Added by FS\n",
     "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT001->', 'pa:')\n",
     "# W3/CIDOC Predicates\n",
     "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
+    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
     "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
     "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
     "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "# Added by FS CIDOC properties for person\n",
     "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
     "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
+    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
+    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
     "\n",
     "# CIDOC Objects\n",
     "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
@@ -72,7 +73,7 @@
     "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
     "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
     "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "# Added by FS CIDOC entity\n",
+    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
     "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
     "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')"
    ]
@@ -111,9 +112,9 @@
    "source": [
     "def writeTTLHeader(output):\n",
     "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    # Added by FS\n",
     "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
@@ -121,11 +122,12 @@
     "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    # Added by FS\n",
     "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    # Added by FS\n",
     "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
     "    output.write('\\n')\n"
    ]
   },
@@ -144,6 +146,11 @@
     "    writeTTLHeader(output)\n",
     "    first = True\n",
     "    ii = 0\n",
+    "    E53placeHolder = \"<http://www.archiviodistato.prato.it>\"\n",
+    "    line = triple(E53placeHolder, hasTypeCoords.prefix, placeCoords.prefix ) + closeLine\n",
+    "    output.write(line)\n",
+    "    line = triple(E53placeHolder, labelCoords.prefix, \"\\\"Archivio di Stato di Prato\\\"\") + closeLine\n",
+    "    output.write(line)\n",
     "    for row in reader:\n",
     "        # The index ii is used to process a limited number of entries for testing purposes\n",
     "        ii = ii+1\n",
@@ -151,25 +158,36 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
-    "        # Write E22 Man Made Object & E73 Information Object -- should exist for every entry?\n",
-    "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-GT001-', ''), hasTypeCoords.prefix, manMadeObjectCoords.prefix) + continueLine1\n",
+    "        # E22 Man Made Object\n",
+    "        line = triple(\"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/IT-ASPO-ST00005-0005939>\", hasTypeCoords.prefix, manMadeObjectCoords.prefix) + closeLine\n",
     "        output.write(line)\n",
-    "        e37placeHolder = '_:' + informationObjectCoords.code + '_' + row['id'].replace('IT-ASPO-GT001-', '')\n",
-    "        line = doublet(carriesCoords.prefix, e37placeHolder) + closeLine\n",
+    "        line = triple(\"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/IT-ASPO-ST00005-0005939>\", labelCoords.prefix, '\\\"Documento fisico: ' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"') + '\\\"') + closeLine\n",
     "        output.write(line)\n",
-    "        line = triple(e37placeHolder, hasTypeCoords.prefix, informationObjectCoords.prefix) + closeLine\n",
+    "        # E73 Information Object\n",
+    "        e73placeHolder =\"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/IT-ASPO-ST00005-0005939/\" + informationObjectCoords.code + \">\"\n",
+    "        line = triple(\"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/IT-ASPO-ST00005-0005939>\", carriesCoords.prefix, e73placeHolder) + closeLine\n",
     "        output.write(line)\n",
-    "        #   \n",
-    "        # If the 'titolo_aspo' property is not empty for the given entry, write down title-related triples\n",
+    "        line = triple(e73placeHolder, hasTypeCoords.prefix, informationObjectCoords.prefix) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e73placeHolder, labelCoords.prefix, '\\\"Contenuto informativo: ' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"') + '\\\"') + closeLine\n",
+    "        output.write(line)\n",
+    "        # E35 Title\n",
     "        if(row['titolo_aspo'] != 'None'):\n",
-    "            e35placeHolder1 = '_:' + titleCoords.code + '_' + row['id'].replace('IT-ASPO-GT001-', '') + '_a'\n",
-    "            line = triple(e37placeHolder, identifiedByCoords.prefix, e35placeHolder1) + closeLine\n",
+    "            e35placeHolder1 = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/IT-ASPO-ST00005-0005939/\" + titleCoords.code + \">\"\n",
+    "            line = triple(e73placeHolder, identifiedByCoords.prefix, e35placeHolder1) + closeLine\n",
     "            output.write(line)\n",
-    "            line = triple(e35placeHolder1, hasTypeCoords.prefix, titleCoords.prefix) + continueLine1\n",
+    "            line = triple(e35placeHolder1, hasTypeCoords.prefix, titleCoords.prefix) + closeLine\n",
     "            output.write(line)\n",
-    "            line = doublet(labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
+    "            line = triple(e35placeHolder1, labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "            output.write(line)\n",
-    "\n",
+    "        # E42 Identifier\n",
+    "        e42placeHolderID = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/\" + row['id'] + \"/\" + identifierCoords.code + \"ID>\"\n",
+    "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-GT001-', ''), identifiedByCoords.prefix, e42placeHolderID) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e42placeHolderID, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e42placeHolderID, labelCoords.prefix, '\\\"' + row['id'].replace(\" \", \"\") + '\\\"') + closeLine            \n",
+    "        output.write(line)\n",
     "        output.write('\\n')\n",
     "        #\n",
     "        #\n",
@@ -188,9 +206,12 @@
   }
  ],
  "metadata": {
+  "interpreter": {
+   "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
+  },
   "kernelspec": {
-   "name": "python373jvsc74a57bd031f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6",
-   "display_name": "Python 3.7.3 64-bit"
+   "display_name": "Python 3.7.3 64-bit",
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -202,7 +223,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.9.0"
   },
   "metadata": {
    "interpreter": {
@@ -212,4 +233,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}

+ 189 - 41
ASPO/CSV_to_RDF/gettatelli/CSV_to_RDF_gettatelli_item.ipynb

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 121,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -30,17 +30,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 122,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
-    "import_dir = '/Users/federicaspinelli/Google Drive/OVI:CNR/CSV/ASPO/gettatelli/'\n",
-    "export_dir = '/Users/federicaspinelli/Google Drive/OVI:CNR/RDF/ASPO/gettatelli/'"
+    "import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/ASPO/gettatelli/'\n",
+    "export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/ASPO/gettatelli/'"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 123,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -52,19 +52,26 @@
     "        self.prefix = prefix\n",
     "        self.code = code\n",
     "\n",
-    "\n",
     "# Repositories\n",
     "datiniCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/IT-ASPO-GT001->', 'dt:')\n",
-    "# Added by FS\n",
     "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT001->', 'pa:')\n",
     "# W3/CIDOC Predicates\n",
     "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
+    "hasNoteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'no:')\n",
+    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
     "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
     "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
     "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "# Added by FS CIDOC properties for person\n",
     "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
+    "wasBornCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P98i_was_born>', 'wbc:')\n",
+    "diedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P100i_died_in>', 'di:')\n",
     "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
+    "noteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'nt:')\n",
+    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
+    "consistCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P45_consist_of>', 'cf:')\n",
+    "hasConditionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P44_has_condition>', 'hc:')\n",
+    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
+    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
     "\n",
     "# CIDOC Objects\n",
     "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
@@ -72,14 +79,25 @@
     "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
     "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
     "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "# Added by FS CIDOC entity\n",
+    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
     "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')"
+    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
+    "stringCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E62_String>', 'st:', 'E62')\n",
+    "birthCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E67_Birth>', 'th:', 'E67')\n",
+    "deathCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E69_Death>', 'dh:', 'E69')\n",
+    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
+    "materialCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E57_Material>', 'mt:', 'E57')\n",
+    "conditionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E3_Condition_State>', 'cs:', 'E3')\n",
+    "entityCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E1_Entity>', 'ey:', 'E1')\n",
+    "refersCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P67_refers_to>', 'rt:')\n",
+    "refersHasTypeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P67.1_type>', 'rh:')\n",
+    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
+    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 124,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -105,15 +123,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 125,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
     "def writeTTLHeader(output):\n",
     "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    # Added by FS\n",
     "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
@@ -121,17 +139,37 @@
     "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    # Added by FS\n",
     "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    # Added by FS\n",
     "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + stringCoords.prefix + ' ' + stringCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + noteCoords.prefix + ' ' + noteCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + wasBornCoords.prefix + ' ' + wasBornCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + diedCoords.prefix + ' ' + diedCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + birthCoords.prefix + ' ' + birthCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + deathCoords.prefix + ' ' + deathCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + materialCoords.prefix + ' ' + materialCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + consistCoords.prefix + ' ' + consistCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + conditionCoords.prefix + ' ' + conditionCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasConditionCoords.prefix + ' ' + hasConditionCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + refersCoords.prefix + ' ' + refersCoords.uri + closeLine)  \n",
+    "    output.write('@prefix ' + entityCoords.prefix + ' ' + entityCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + refersHasTypeCoords.prefix + ' ' + refersHasTypeCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasNoteCoords.prefix + ' ' + hasNoteCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + creationCoords.prefix + ' ' + creationCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
     "    output.write('\\n')\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 126,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -144,44 +182,151 @@
     "    writeTTLHeader(output)\n",
     "    first = True\n",
     "    ii = 0\n",
+    "    E53placeHolder = \"<http://www.archiviodistato.prato.it>\"\n",
+    "    line = triple(E53placeHolder, hasTypeCoords.prefix, placeCoords.prefix ) + closeLine\n",
+    "    output.write(line)\n",
+    "    line = triple(E53placeHolder, labelCoords.prefix, \"\\\"Archivio di Stato di Prato\\\"\") + closeLine\n",
+    "    output.write(line)\n",
     "    for row in reader:\n",
     "        # The index ii is used to process a limited number of entries for testing purposes\n",
     "        ii = ii+1\n",
     "        # Skip the first line as it carries info we don't want to triplify\n",
     "        if(first):\n",
     "            first = False\n",
-    "            continue\n",
-    "        # Write E22 Man Made Object & E73 Information Object -- should exist for every entry?\n",
-    "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-GT001-', ''), hasTypeCoords.prefix, manMadeObjectCoords.prefix) + continueLine1\n",
-    "        output.write(line)\n",
-    "        e37placeHolder = '_:' + informationObjectCoords.code + '_' + row['id'].replace('IT-ASPO-GT001-', '')\n",
-    "        line = doublet(carriesCoords.prefix, e37placeHolder) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e37placeHolder, hasTypeCoords.prefix, informationObjectCoords.prefix) + closeLine\n",
+    "            continue \n",
+    "        #E22 Man Made Object\n",
+    "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-GT001-', ''), hasTypeCoords.prefix, manMadeObjectCoords.prefix) + closeLine\n",
     "        output.write(line)\n",
-    "        #   \n",
-    "        # Add by FS Person\n",
-    "        # If the 'persname' property is not empty for the given entry, write down triples\n",
     "        bambino = ''\n",
     "        if(row['cognome_bambino'] != ''):\n",
     "            bambino = bambino + row['cognome_bambino'] + ' '\n",
     "        if(row['nome_bambino'] != ''):\n",
     "            bambino = bambino + row['nome_bambino']\n",
     "        if(bambino != ''):\n",
-    "            url = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT001-' + row['id'].replace('IT-ASPO-GT001-', '') + '/' + bambino.replace(' ', '_') + '>'\n",
-    "            line = triple(url, hasTypeCoords.prefix, personCoords.prefix) + closeLine\n",
+    "            line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-GT001-', ''), labelCoords.prefix, '\\\"Documento fisico: ' + row['tipologia'] + ' di ' + bambino + '\\\"') + closeLine\n",
     "            output.write(line)\n",
-    "            line = triple(url, labelCoords.prefix, '\\\"' + bambino + '\\\"') +  closeLine\n",
+    "        else:\n",
+    "            line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-GT001-', ''), labelCoords.prefix, '\\\"Documento fisico: ' + row['tipologia'] + '\\\"') + closeLine\n",
     "            output.write(line)\n",
-    "\n",
-    "        # Add by FS Matricola Bambino\n",
-    "        if(row['matricola'] != ''):\n",
-    "            url = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT001-' + row['id'].replace('IT-ASPO-GT001-', '') + '/' + row['matricola'].replace('/', '') + '>'\n",
-    "            line = triple(url, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
+    "        # E73 Information Object\n",
+    "        e73placeHolder =  \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
+    "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-GT001-', ''), carriesCoords.prefix, e73placeHolder) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e73placeHolder, hasTypeCoords.prefix, informationObjectCoords.prefix) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e73placeHolder, labelCoords.prefix, '\\\"Contenuto informativo: ' + row['tipologia'] + ' di ' + bambino + '\\\"') + closeLine\n",
+    "        output.write(line)\n",
+    "        # E53 Place\n",
+    "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-GT001-', ''), hasCurrentPermanentLocationCoords.prefix, E53placeHolder) + closeLine\n",
+    "        output.write(line)\n",
+    "        # E55 Type\n",
+    "        if(row['tipologia'] != ''):\n",
+    "            tipologie = []\n",
+    "            pipe = \"|\" \n",
+    "            if pipe in row['tipologia']:\n",
+    "                tipologie = row['tipologia'].split('|')\n",
+    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + tipologie.replace(\" \", \"\") + \">\"\n",
+    "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
+    "                output.write(line)\n",
+    "            else:\n",
+    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['tipologia'].replace(\" \", \"\") + \">\"\n",
+    "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
+    "                output.write(line)\n",
+    "        elif(row['genere'] != ''):\n",
+    "            tipologie = []\n",
+    "            pipe = \"|\" \n",
+    "            if pipe in row['genere']:\n",
+    "                tipologie = row['genere'].split('|')\n",
+    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + tipologie.replace(\" \", \"\") + \">\"\n",
+    "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
+    "                output.write(line)\n",
+    "            else:\n",
+    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['genere'].replace(\" \", \"\") + \">\"\n",
+    "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
+    "                output.write(line)\n",
+    "        # E42 Identifier\n",
+    "        e42placeHolderID = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/\" + row['id'] + \"/\" + identifierCoords.code + \"ID>\"\n",
+    "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-GT001-', ''), identifiedByCoords.prefix, e42placeHolderID) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e42placeHolderID, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e42placeHolderID, labelCoords.prefix, '\\\"' + row['id'].replace(\" \", \"\") + '\\\"') + closeLine            \n",
+    "        output.write(line)\n",
+    "        # E65 Creation\n",
+    "        e22placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/\" + row['id'] + \">\"\n",
+    "        e65placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/\" + row['id'] + \"/\" + creationCoords.code + \">\"\n",
+    "        line = triple(e22placeHolder, wasBroughtCoords.prefix, e65placeHolder) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e65placeHolder, hasTypeCoords.prefix, creationCoords.prefix) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e65placeHolder, labelCoords.prefix, '\\\"Creazione di ' + row['tipologia'] + ' di ' + bambino + '\\\"') + closeLine\n",
+    "        output.write(line)      \n",
+    "        # E42 Identifier\n",
+    "        if(row['riferimento_registro'] != ''):\n",
+    "            e42placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/\" + row['id'] + \"/\" + identifierCoords.code + \">\"\n",
+    "            line = triple(e22placeHolder, identifiedByCoords.prefix, e42placeHolder) + closeLine\n",
     "            output.write(line)\n",
-    "            line = triple(url, labelCoords.prefix, '\\\"' + row['matricola'] + '\\\"') + closeLine\n",
+    "            line = triple(e42placeHolder, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e42placeHolder, labelCoords.prefix, '\\\"Riferimento registro: ' + row['riferimento_registro']+ '\\\"') + closeLine\n",
+    "            output.write(line)\n",
+    "        # E57 Material\n",
+    "        if(row['descrizione_contrassegno'] != ''):\n",
+    "            e57placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/\" + row['id'] + \"/\" + materialCoords.code + \">\"\n",
+    "            line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-GT001-', ''), consistCoords.prefix, e57placeHolder) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e57placeHolder, labelCoords.prefix, '\\\"' + row['descrizione_contrassegno'].replace('\"','')+ '\\\"') + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e57placeHolder, hasTypeCoords.prefix, materialCoords.prefix) + closeLine\n",
+    "            output.write(line)\n",
+    "            e55placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda\" + row['id'] + \"/\" + materialCoords.code  + \"_\" + typeCoords.code + \">\"  \n",
+    "            line = triple(e57placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e55placeHolder, hasTypeCoords.prefix, typeCoords.prefix) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e55placeHolder, labelCoords.prefix, '\\\"Descrizione contrassegno\\\"') + closeLine\n",
+    "            output.write(line)  \n",
+    "        # E3 Condition\n",
+    "        if(row['conservazione'] != ''):\n",
+    "            e3placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/\" + row['id'] + \"/\" + conditionCoords.code + \">\"\n",
+    "            line = triple( \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/\" + row['id'] + \">\", hasConditionCoords.prefix,  e3placeHolder) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e3placeHolder, hasTypeCoords.prefix, conditionCoords.prefix) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e3placeHolder, labelCoords.prefix, '\\\"'+ row['conservazione'].replace('\"','')+ '\\\"') + closeLine\n",
+    "            output.write(line)\n",
+    "            e55placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda\" + row['id'] + \"/\" + conditionCoords.code + \"_\" + typeCoords.code + \">\"  \n",
+    "            line = triple(e3placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e55placeHolder, hasTypeCoords.prefix, typeCoords.prefix) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e55placeHolder, labelCoords.prefix, '\\\"Stato di conservazione\\\"') + closeLine\n",
+    "            output.write(line)\n",
+    "        # E1 Entity - Scope and Content\n",
+    "        if(row['scope-content_body'] != ''):\n",
+    "            e1placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda\" + row['id'] + \"/E73_\" + entityCoords.code + \">\"\n",
+    "            line = triple(e73placeHolder, refersCoords.prefix, e1placeHolder) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e1placeHolder, hasTypeCoords.prefix, entityCoords.prefix) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e1placeHolder, labelCoords.prefix, '\\\"' + row['scope-content_body'].replace('\"','') + '\\\"') + closeLine\n",
+    "            output.write(line)\n",
+    "            e55placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda\" + row['id'] + \"/\" + entityCoords.code + \"_\" + typeCoords.code + \">\"\n",
+    "            line = triple(e1placeHolder, refersHasTypeCoords.prefix, e55placeHolder) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e55placeHolder, hasTypeCoords.prefix, typeCoords.prefix) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e55placeHolder, labelCoords.prefix, '\\\"Descrizione biglietto\\\"') + closeLine\n",
+    "            output.write(line)\n",
+    "        #E62 String - Physdesc\n",
+    "        if(row['nota'] != ''):\n",
+    "            e62placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/\" + row['id'] + \"/E73_\" + stringCoords.code + \">\"\n",
+    "            line = triple(e73placeHolder, hasNoteCoords.prefix, e62placeHolder) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e62placeHolder, labelCoords.prefix, '\\\"' + row['nota'].replace('\"','')+ '\\\"') + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e62placeHolder, hasTypeCoords.prefix, stringCoords.prefix) + closeLine\n",
     "            output.write(line)\n",
-    "\n",
     "        output.write('\\n')\n",
     "        #\n",
     "        #\n",
@@ -200,9 +345,12 @@
   }
  ],
  "metadata": {
+  "interpreter": {
+   "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
+  },
   "kernelspec": {
-   "name": "python373jvsc74a57bd031f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6",
-   "display_name": "Python 3.7.3 64-bit"
+   "display_name": "Python 3.7.3 64-bit",
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -214,7 +362,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.9.0"
   },
   "metadata": {
    "interpreter": {
@@ -224,4 +372,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}

+ 393 - 0
ASPO/CSV_to_RDF/gettatelli/CSV_to_RDF_gettatelli_item_person.ipynb

@@ -0,0 +1,393 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Utilities to read/write csv files\n",
+    "import csv\n",
+    "# Utilities to handle character encodings\n",
+    "import unicodedata\n",
+    "# Ordered Dicts\n",
+    "from collections import OrderedDict\n",
+    "\n",
+    "import json\n",
+    "\n",
+    "\n",
+    "# OPZIONAL IMPORTS\n",
+    "\n",
+    "# For timestamping/simple speed tests\n",
+    "from datetime import datetime\n",
+    "# Random number generator\n",
+    "from random import *\n",
+    "# System & command line utilities\n",
+    "import sys\n",
+    "# Json for the dictionary\n",
+    "import json"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/ASPO/gettatelli/'\n",
+    "export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/ASPO/gettatelli/'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
+    "\n",
+    "class RDFcoords:\n",
+    "    def __init__(self, uri, prefix, code = None):\n",
+    "        self.uri = uri\n",
+    "        self.prefix = prefix\n",
+    "        self.code = code\n",
+    "\n",
+    "# Repositories\n",
+    "datiniCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/IT-ASPO-GT001->', 'dt:')\n",
+    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT001->', 'pa:')\n",
+    "# W3/CIDOC Predicates\n",
+    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
+    "hasNoteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'no:')\n",
+    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
+    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
+    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
+    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
+    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
+    "wasBornCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P98i_was_born>', 'wbc:')\n",
+    "diedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P100i_died_in>', 'di:')\n",
+    "wasPresentCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P12i_was_present_at>', 'wp:')\n",
+    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
+    "noteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'nt:')\n",
+    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
+    "consistCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P45_consist_of>', 'cf:')\n",
+    "hasConditionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P44_has_condition>', 'hc:')\n",
+    "foafCoords = RDFcoords('<http://xmlns.com/foaf/0.1/>', 'foaf:')\n",
+    "nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')\n",
+    "\n",
+    "# CIDOC Objects\n",
+    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
+    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
+    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
+    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
+    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
+    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
+    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
+    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
+    "stringCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E62_String>', 'st:', 'E62')\n",
+    "birthCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E67_Birth>', 'th:', 'E67')\n",
+    "deathCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E69_Death>', 'dh:', 'E69')\n",
+    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
+    "materialCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E57_Material>', 'mt:', 'E57')\n",
+    "conditionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E3_Condition_State>', 'cs:', 'E3')\n",
+    "entityCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E1_Entity>', 'ey:', 'E1')\n",
+    "refersCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P67_refers_to>', 'rt:')\n",
+    "refersHasTypeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P67.1_type>', 'rh:')\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Basic functions for triples / shortened triples in TTL format\n",
+    "\n",
+    "def triple(subject, predicate, object1):\n",
+    "    line = subject + ' ' + predicate + ' ' + object1\n",
+    "    return line\n",
+    "\n",
+    "def doublet(predicate, object1):\n",
+    "    line = '    ' + predicate + ' ' + object1\n",
+    "    return line\n",
+    "\n",
+    "def singlet(object1):\n",
+    "    line = '        ' + object1\n",
+    "    return line\n",
+    "\n",
+    "# Line endings in TTL format\n",
+    "continueLine1 = ' ;\\n'\n",
+    "continueLine2 = ' ,\\n'\n",
+    "closeLine = ' .\\n'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def writeTTLHeader(output):\n",
+    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + stringCoords.prefix + ' ' + stringCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + noteCoords.prefix + ' ' + noteCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + wasBornCoords.prefix + ' ' + wasBornCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + diedCoords.prefix + ' ' + diedCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + wasPresentCoords.prefix + ' ' + wasPresentCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + birthCoords.prefix + ' ' + birthCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + deathCoords.prefix + ' ' + deathCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + materialCoords.prefix + ' ' + materialCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + consistCoords.prefix + ' ' + consistCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + conditionCoords.prefix + ' ' + conditionCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasConditionCoords.prefix + ' ' + hasConditionCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + refersCoords.prefix + ' ' + refersCoords.uri + closeLine)  \n",
+    "    output.write('@prefix ' + entityCoords.prefix + ' ' + entityCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + refersHasTypeCoords.prefix + ' ' + refersHasTypeCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasNoteCoords.prefix + ' ' + hasNoteCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + creationCoords.prefix + ' ' + creationCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + foafCoords.prefix + ' ' + foafCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)\n",
+    "\n",
+    "    output.write('\\n')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "filePrefix = 'data_'\n",
+    "fileType = 'item'\n",
+    "max_entries = 1000000000\n",
+    "\n",
+    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_person.ttl', 'w') as output:\n",
+    "    reader = csv.DictReader(csv_file)\n",
+    "    writeTTLHeader(output)\n",
+    "    first = True\n",
+    "    ii = 0\n",
+    "    for row in reader:\n",
+    "        # The index ii is used to process a limited number of entries for testing purposes\n",
+    "        ii = ii+1\n",
+    "        # Skip the first line as it carries info we don't want to triplify\n",
+    "        if(first):\n",
+    "            first = False\n",
+    "            continue\n",
+    "        \n",
+    "        e21placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT001-' + row['id'].replace('IT-ASPO-GT001-', '') + '>'\n",
+    "        e22placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/\" + row['id'] + \">\"\n",
+    "        e65placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/\" + row['id'] + \"/\" + creationCoords.code + \">\"\n",
+    "\n",
+    "        line = triple(e65placeHolder, refersCoords.prefix, e21placeHolder) + closeLine\n",
+    "        output.write(line)\n",
+    "        e55placeHolder = '<http://www.archiviodistato.prato.it/gettatello' + \">\"\n",
+    "        line = triple(e21placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e55placeHolder, labelCoords.prefix, '\\\"Gettatello\\\"') + closeLine\n",
+    "        output.write(line)\n",
+    "        \n",
+    "        # If the 'persname' property is not empty for the given entry, write down triples\n",
+    "        bambino = ''\n",
+    "        if(row['cognome_bambino'] != ''):\n",
+    "            bambino = bambino + row['cognome_bambino'] + ' '\n",
+    "        if(row['nome_bambino'] != ''):\n",
+    "            bambino = bambino + row['nome_bambino']\n",
+    "        if(bambino != ''):\n",
+    "            line = triple(e21placeHolder, hasTypeCoords.prefix, personCoords.prefix) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e21placeHolder, foafCoords.prefix + 'givenName', '\\\"' + bambino + '\\\"') + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e21placeHolder, labelCoords.prefix, '\\\"' + bambino + '\\\"') +  closeLine\n",
+    "            output.write(line)\n",
+    "            e62placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT001-' + row['id'].replace('IT-ASPO-GT001-', '') +'/E62>'\n",
+    "            line = triple(e21placeHolder, noteCoords.prefix, e62placeHolder) +  closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e62placeHolder, hasTypeCoords.prefix, stringCoords.prefix) +  closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e62placeHolder, labelCoords.prefix, '\\\"Fonte: Archivio di Stato di Prato - Fondo Ospedale della Misericordia e Dolce\\\"') +  closeLine\n",
+    "            output.write(line)\n",
+    "\n",
+    "        #Matricola Bambino\n",
+    "        e42placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT001-' + row['id'].replace('IT-ASPO-GT001-', '') + '/E42>'\n",
+    "        if(row['matricola'] != ''):\n",
+    "            line = triple(e21placeHolder, identifiedByCoords.prefix, e42placeHolder) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e42placeHolder, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e42placeHolder, labelCoords.prefix, '\\\"Matricola: ' + row['matricola'] + '\\\"') + closeLine\n",
+    "            output.write(line)\n",
+    "        \n",
+    "        #Nascita\n",
+    "        if row['data_nascita'] != '':\n",
+    "            e67placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT001-' + row['id'].replace('IT-ASPO-GT001-', '') + '/E67>'\n",
+    "            line = triple(e21placeHolder, wasBornCoords.prefix, e67placeHolder) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e67placeHolder, hasTypeCoords.prefix, birthCoords.prefix) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e67placeHolder, labelCoords.prefix, '\\\"Nascita di ' + bambino + '\\\"') + closeLine\n",
+    "            output.write(line)\n",
+    "\n",
+    "            e52NplaceHolder =  '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT001-' + row['id'].replace('IT-ASPO-GT001-', '') + '/E52_N>'\n",
+    "            \n",
+    "            line = triple(e67placeHolder, hasTimeSpanCoords.prefix, e52NplaceHolder) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e52NplaceHolder, hasTypeCoords.prefix, timeSpanCoords.prefix ) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e52NplaceHolder, labelCoords.prefix, '\\\"' + row['data_nascita'] + '\\\"' ) + closeLine\n",
+    "            output.write(line)\n",
+    "\n",
+    "        #Morte\n",
+    "        if row['data_morte'] != '':\n",
+    "            e69placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT001-' + row['id'].replace('IT-ASPO-GT001-', '') + '/E69>'\n",
+    "            line = triple(e21placeHolder, diedCoords.prefix, e69placeHolder) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e69placeHolder, hasTypeCoords.prefix, deathCoords.prefix) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e69placeHolder, labelCoords.prefix, '\\\"Morte di ' + bambino + '\\\"') + closeLine\n",
+    "            output.write(line)\n",
+    "            \n",
+    "            e52MplaceHolder =  '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT001-' + row['id'].replace('IT-ASPO-GT001-', '') + '/E52_M>'\n",
+    "            \n",
+    "            line = triple(e69placeHolder, hasTimeSpanCoords.prefix, e52MplaceHolder) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e52MplaceHolder, hasTypeCoords.prefix, timeSpanCoords.prefix ) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e52MplaceHolder, labelCoords.prefix, '\\\"' + row['data_morte'] + '\\\"' ) + closeLine\n",
+    "            output.write(line)\n",
+    "        \n",
+    "        #Ritrovamento\n",
+    "        if row['data_ritrovamento'] != '':\n",
+    "            e5placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT001-' + row['id'].replace('IT-ASPO-GT001-', '') + '/E5_R>'\n",
+    "            line = triple(e5placeHolder, labelCoords.prefix, '\\\"Ritrovamento\\\"') + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e21placeHolder, wasPresentCoords.prefix, e5placeHolder) + closeLine\n",
+    "            output.write(line)\n",
+    "            e5RplaceHolder = '<http://www.archiviodistato.prato.it/gettatelli_ritrovamento>'\n",
+    "            line = triple(e5RplaceHolder, nsCoords.prefix + 'type', '\\\"Ritrovamento\\\"') + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e5placeHolder, hasTypeCoords.prefix, '\\\"Ritrovamento di ' + bambino + '\\\"') + closeLine\n",
+    "            output.write(line)\n",
+    "\n",
+    "            e52RplaceHolder =  '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT001-' + row['id'].replace('IT-ASPO-GT001-', '') + '/E52_R>'\n",
+    "            line = triple(e5placeHolder, hasTimeSpanCoords.prefix, e52RplaceHolder) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e52RplaceHolder, hasTypeCoords.prefix, timeSpanCoords.prefix ) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e52RplaceHolder, labelCoords.prefix, '\\\"' + row['data_ritrovamento'] + '\\\"' ) + closeLine\n",
+    "            output.write(line)\n",
+    "\n",
+    "        #Ricongiungimento\n",
+    "        if row['data_ricongiungimento'] != '':\n",
+    "            e5placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT001-' + row['id'].replace('IT-ASPO-GT001-', '') + '/E5_RC>'\n",
+    "            line = triple(e21placeHolder, wasPresentCoords.prefix, e5placeHolder) + closeLine\n",
+    "            output.write(line)\n",
+    "            e5RCplaceHolder = '<http://www.archiviodistato.prato.it/gettatelli_ricongiungimento>'\n",
+    "            line = triple(e5RCplaceHolder, nsCoords.prefix + 'type', '\\\"Ricongiungimento\\\"') + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e5placeHolder, labelCoords.prefix, '\\\"Ricongiungimento\\\"') + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e5placeHolder, hasTypeCoords.prefix, '\\\"Ricongiungimento di ' + bambino + '\\\"') + closeLine\n",
+    "            output.write(line)\n",
+    "\n",
+    "            e52RCplaceHolder =  '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT001-' + row['id'].replace('IT-ASPO-GT001-', '') + '/E52_RC>'   \n",
+    "            line = triple(e5placeHolder, hasTimeSpanCoords.prefix, e52RCplaceHolder) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e52RCplaceHolder, hasTypeCoords.prefix, timeSpanCoords.prefix ) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e52RCplaceHolder, labelCoords.prefix, '\\\"' + row['data_ricongiungimento'] + '\\\"' ) + closeLine\n",
+    "            output.write(line)\n",
+    "        \n",
+    "        #Adozione\n",
+    "        if row['data_adozione'] != '':\n",
+    "            e5placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT001-' + row['id'].replace('IT-ASPO-GT001-', '') + '/E5_A>'\n",
+    "            line = triple(e21placeHolder, wasPresentCoords.prefix, e5placeHolder) + closeLine\n",
+    "            output.write(line)\n",
+    "            e5AplaceHolder = '<http://www.archiviodistato.prato.it/gettatelli_adozione>'\n",
+    "            line = triple(e5AplaceHolder, nsCoords.prefix + 'type', '\\\"Adozione\\\"') + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e5placeHolder, labelCoords.prefix, '\\\"Adozione\\\"') + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e5placeHolder, hasTypeCoords.prefix, '\\\"Adozione di ' + bambino + '\\\"') + closeLine\n",
+    "            output.write(line)\n",
+    "\n",
+    "            e52AplaceHolder =  '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT001-' + row['id'].replace('IT-ASPO-GT001-', '') + '/E52_A>'\n",
+    "            line = triple(e5placeHolder, hasTimeSpanCoords.prefix, e52AplaceHolder) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e52AplaceHolder, hasTypeCoords.prefix, timeSpanCoords.prefix ) + closeLine\n",
+    "            output.write(line)\n",
+    "            line = triple(e52AplaceHolder, labelCoords.prefix, '\\\"' + row['data_adozione'] + '\\\"' ) + closeLine\n",
+    "            output.write(line)\n",
+    "\n",
+    "        #Anno unico\n",
+    "        e52UplaceHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT001-' + row['id'].replace('IT-ASPO-GT001-', '') + '/E52_U>'\n",
+    "        line = triple(e52UplaceHolder, refersCoords.prefix, e21placeHolder, ) + closeLine\n",
+    "        output.write(line)  \n",
+    "        line = triple(e52UplaceHolder, hasTimeSpanCoords.prefix, '\\\"' + row['data_periodo'] + '\\\"') + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e52UplaceHolder, labelCoords.prefix, '\\\"' + row['data_periodo'] + '\\\"') + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e52UplaceHolder, hasTypeCoords.prefix, timeSpanCoords.prefix ) + closeLine\n",
+    "        output.write(line)\n",
+    "\n",
+    "        output.write('\\n')\n",
+    "        #\n",
+    "        #\n",
+    "        # Limit number of entries processed (if desired)\n",
+    "        if(ii>max_entries):\n",
+    "            break\n",
+    "        "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.7.3 64-bit",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.0"
+  },
+  "metadata": {
+   "interpreter": {
+    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

+ 27 - 36
ASPO/CSV_to_RDF/marcovaldi/CSV_to_RDF_marcovaldi_item_permanent_location.ipynb → ASPO/CSV_to_RDF/gettatelli/CSV_to_RDF_gettatelli_type.ipynb

@@ -1,18 +1,23 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Parser per estrarre tutte le tipologie di documenti ed associarle solo una volta ad ogni record"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Utilities to read/write csv files\n",
     "import csv\n",
-    "# Utilities to handle character encodings\n",
     "import unicodedata\n",
     "# Ordered Dicts\n",
     "from collections import OrderedDict\n",
-    "\n",
     "import json\n",
     "\n",
     "\n",
@@ -30,17 +35,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
-    "import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/ASPO/marcovaldi/'\n",
-    "export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/ASPO/marcovaldi/'"
+    "import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/ASPO/gettatelli/'\n",
+    "export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/ASPO/gettatelli/'"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -54,7 +59,7 @@
     "\n",
     "\n",
     "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-MV001->', 'dt:')\n",
+    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
     "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
     "# W3/CIDOC Predicates\n",
     "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
@@ -64,9 +69,7 @@
     "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
     "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
     "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "hasDimensionsCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P43_has_dimension>', 'hd:')\n",
-    "dimensionsCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E54_Dimension>', 'dm:', 'E54')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
+    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
     "\n",
     "# CIDOC Objects\n",
     "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
@@ -76,13 +79,12 @@
     "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
     "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
     "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')"
+    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -108,7 +110,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -128,24 +130,23 @@
     "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasDimensionsCoords.prefix + ' ' + hasDimensionsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + dimensionsCoords.prefix + ' ' + dimensionsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
     "    output.write('\\n')\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
+   "execution_count": 12,
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
+    "fileType = 'item_type'\n",
     "max_entries = 1000000000\n",
     "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_permanent_location.ttl', 'w') as output:\n",
+    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '.ttl', 'w') as output:\n",
     "    reader = csv.DictReader(csv_file)\n",
     "    writeTTLHeader(output)\n",
     "    first = True\n",
@@ -157,14 +158,11 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
-    "        E53placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + placeCoords.code + \">\"\n",
-    "        line = triple(\"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \">\", hasCurrentPermanentLocationCoords.prefix, E53placeHolder) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(E53placeHolder, hasTypeCoords.prefix, placeCoords.prefix ) + closeLine\n",
+    "\n",
+    "        line = triple(\"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['tipologia'].replace(\" \", \"\") + \">\", hasTypeCoords.prefix, typeCoords.prefix) + closeLine\n",
     "        output.write(line)\n",
-    "        line = triple(E53placeHolder, labelCoords.prefix, \"\\\"Archivio di Stato di Prato\\\"\") + closeLine\n",
+    "        line = triple(\"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['tipologia'].replace(\" \", \"\") + \">\", labelCoords.prefix,  '\\\"' + row['tipologia']+ '\\\"') + closeLine\n",
     "        output.write(line)\n",
-    "        output.write('\\n')\n",
     "        \n",
     "        output.write('\\n')\n",
     "        #\n",
@@ -174,13 +172,6 @@
     "            break\n",
     "        "
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {

+ 240 - 0
ASPO/CSV_to_RDF/marcovaldi/CSV_to_RDF_marcovaldi_date.ipynb

@@ -0,0 +1,240 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Utilities to read/write csv files\n",
+    "import csv\n",
+    "# Utilities to handle character encodings\n",
+    "import unicodedata\n",
+    "# Ordered Dicts\n",
+    "from collections import OrderedDict\n",
+    "\n",
+    "import json\n",
+    "\n",
+    "\n",
+    "# OPZIONAL IMPORTS\n",
+    "\n",
+    "# For timestamping/simple speed tests\n",
+    "from datetime import datetime\n",
+    "# Random number generator\n",
+    "from random import *\n",
+    "# System & command line utilities\n",
+    "import sys\n",
+    "# Json for the dictionary\n",
+    "import json\n",
+    "import re"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/ASPO/marcovaldi/'\n",
+    "export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/ASPO/marcovaldi/'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
+    "\n",
+    "class RDFcoords:\n",
+    "    def __init__(self, uri, prefix, code = None):\n",
+    "        self.uri = uri\n",
+    "        self.prefix = prefix\n",
+    "        self.code = code\n",
+    "\n",
+    "\n",
+    "# Repositories\n",
+    "datiniCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-MV001->', 'dt:')\n",
+    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
+    "# W3/CIDOC Predicates\n",
+    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
+    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
+    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
+    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
+    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
+    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
+    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
+    "movedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P25i_moved_by>', 'mb:')\n",
+    "subClassOfCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#subClassOf>', 'so:')\n",
+    "tookPlaceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P7_took_place_at>', 'tk:')\n",
+    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
+    "roleOfCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14.1_in_the_role_of>', 'ro:')\n",
+    "hasDomainCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P01_has_domain>', 'hd:')\n",
+    "hasRangeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P02_has_range>', 'hr:')\n",
+    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
+    "movedToCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P26_moved_to>', 'mt:')\n",
+    "movedFromCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P27_moved_from>', 'mf:')\n",
+    "\n",
+    "#CIDOC Objects\n",
+    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
+    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
+    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
+    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
+    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
+    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
+    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
+    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
+    "pcarriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/PC14_carried_out_by>', 'cy:', 'PC14')\n",
+    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
+    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
+    "\n",
+    "\n",
+    "# New classes (subclasses of E7 Activity) - Exchange, Sending, Recive Letters\n",
+    "exchangeLettersCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL1_Exchange_Letters>', 'el:', 'EL1')\n",
+    "sendLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL2_Send_Letter>', 'sl:', 'EL2')\n",
+    "receiveLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL3_Receive_Letter>', 'rl:', 'EL3')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Basic functions for triples / shortened triples in TTL format\n",
+    "\n",
+    "def triple(subject, predicate, object1):\n",
+    "    line = subject + ' ' + predicate + ' ' + object1\n",
+    "    return line\n",
+    "\n",
+    "def doublet(predicate, object1):\n",
+    "    line = '    ' + predicate + ' ' + object1\n",
+    "    return line\n",
+    "\n",
+    "def singlet(object1):\n",
+    "    line = '        ' + object1\n",
+    "    return line\n",
+    "\n",
+    "# Line endings in TTL format\n",
+    "continueLine1 = ' ;\\n'\n",
+    "continueLine2 = ' ,\\n'\n",
+    "closeLine = ' .\\n'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def writeTTLHeader(output):\n",
+    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + exchangeLettersCoords.prefix + ' ' + exchangeLettersCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + sendLetterCoords.prefix + ' ' + sendLetterCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + receiveLetterCoords.prefix + ' ' + receiveLetterCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + subClassOfCoords.prefix + ' ' + subClassOfCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + movedByCoords.prefix + ' ' + movedByCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + roleOfCoords.prefix + ' ' + roleOfCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasDomainCoords.prefix + ' ' + hasDomainCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + pcarriedByCoords.prefix + ' ' + pcarriedByCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasRangeCoords.prefix + ' ' + hasRangeCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + movedToCoords.prefix + ' ' + movedToCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + movedFromCoords.prefix + ' ' + movedFromCoords.uri + closeLine)\n",
+    "    \n",
+    "    output.write('\\n')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "filePrefix = 'data_'\n",
+    "fileType = 'item'\n",
+    "max_entries = 1000000000\n",
+    "\n",
+    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_date.ttl', 'w') as output:\n",
+    "    reader = csv.DictReader(csv_file)\n",
+    "    writeTTLHeader(output)\n",
+    "    first = True\n",
+    "    ii = 0\n",
+    "    for row in reader:\n",
+    "        # The index ii is used to process a limited number of entries for testing purposes\n",
+    "        ii = ii+1\n",
+    "        # Skip the first line as it carries info we don't want to triplify\n",
+    "        if(first):\n",
+    "            first = False\n",
+    "            continue\n",
+    "        el1placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + exchangeLettersCoords.code + \">\"\n",
+    "        el2placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \">\"\n",
+    "        el3placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \">\"\n",
+    "        \n",
+    "        # Data invio\n",
+    "        if(row['data_inizio'] != ''):\n",
+    "            e52PplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \"_\" + timeSpanCoords.code + \">\"\n",
+    "            line = triple(el2placeHolder, e52PplaceHolder, '\\\"'+ row['data_inizio'] + '\\\"') + closeLine\n",
+    "            output.write(line)\n",
+    "       \n",
+    "        if(row['data_fine'] != ''):\n",
+    "            e52AplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \"_\" + timeSpanCoords.code + \">\"\n",
+    "            line = triple(el3placeHolder, e52AplaceHolder, '\\\"' + row['data_fine'] + '\\\"') + closeLine\n",
+    "            output.write(line)\n",
+    "             \n",
+    "        output.write('\\n')\n",
+    "        #\n",
+    "        #\n",
+    "        # Limit number of entries processed (if desired)\n",
+    "        if(ii>max_entries):\n",
+    "            break\n",
+    "        "
+   ]
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.7.3 64-bit",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.0"
+  },
+  "metadata": {
+   "interpreter": {
+    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

+ 64 - 25
ASPO/CSV_to_RDF/marcovaldi/CSV_to_RDF_marcovaldi_fonds.ipynb

@@ -10,7 +10,7 @@
     "import csv\n",
     "# Utilities to handle character encodings\n",
     "import unicodedata\n",
-    "# Ordered Dicts\n",
+    "# Ordered Dicts \n",
     "from collections import OrderedDict\n",
     "\n",
     "import json\n",
@@ -34,8 +34,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import_dir = '/Users/federicaspinelli/Google Drive/OVI:CNR/CSV/ASPO/marcovaldi/'\n",
-    "export_dir = '/Users/federicaspinelli/Google Drive/OVI:CNR/RDF/ASPO/marcovaldi/'"
+    "import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/ASPO/marcovaldi/'\n",
+    "export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/ASPO/marcovaldi/'"
    ]
   },
   {
@@ -52,10 +52,8 @@
     "        self.prefix = prefix\n",
     "        self.code = code\n",
     "\n",
-    "\n",
     "# Repositories\n",
     "datiniCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-MV001->', 'dt:')\n",
-    "# Added by FS\n",
     "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
     "# W3/CIDOC Predicates\n",
     "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
@@ -63,9 +61,10 @@
     "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
     "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
     "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "# Added by FS CIDOC properties for person\n",
     "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
     "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
+    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
+    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
     "\n",
     "# CIDOC Objects\n",
     "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
@@ -74,7 +73,6 @@
     "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
     "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
     "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "# Added by FS CIDOC entity\n",
     "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
     "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')"
    ]
@@ -127,6 +125,8 @@
     "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
     "    output.write('\\n')\n"
    ]
   },
@@ -134,7 +134,19 @@
    "cell_type": "code",
    "execution_count": 6,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "KeyError",
+     "evalue": "'genere'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
+      "\u001b[0;32m/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_8989/1915023737.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     43\u001b[0m             \u001b[0moutput\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     44\u001b[0m         \u001b[0;31m# E55 Type\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 45\u001b[0;31m         \u001b[0;32mif\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'genere'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;34m''\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     46\u001b[0m             \u001b[0mtipologie\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     47\u001b[0m             \u001b[0mpipe\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"|\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mKeyError\u001b[0m: 'genere'"
+     ]
+    }
+   ],
    "source": [
     "filePrefix = 'data_'\n",
     "fileType = 'fonds'\n",
@@ -145,6 +157,11 @@
     "    writeTTLHeader(output)\n",
     "    first = True\n",
     "    ii = 0\n",
+    "    E53placeHolder = \"<http://www.archiviodistato.prato.it>\"\n",
+    "    line = triple(E53placeHolder, hasTypeCoords.prefix, placeCoords.prefix ) + closeLine\n",
+    "    output.write(line)\n",
+    "    line = triple(E53placeHolder, labelCoords.prefix, \"\\\"Archivio di Stato di Prato\\\"\") + closeLine\n",
+    "    output.write(line)\n",
     "    for row in reader:\n",
     "        # The index ii is used to process a limited number of entries for testing purposes\n",
     "        ii = ii+1\n",
@@ -152,32 +169,54 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
-    "        # Write E22 Man Made Object & E73 Information Object -- should exist for every entry?\n",
+    "        # E22 Man Made Object\n",
     "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-MV001-', ''), hasTypeCoords.prefix, manMadeObjectCoords.prefix) + closeLine\n",
     "        output.write(line)\n",
     "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-MV001-', ''), labelCoords.prefix, '\\\"Documento fisico: ' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "        output.write(line)\n",
-    "        e37placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
-    "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-MV001-', ''), carriesCoords.prefix, e37placeHolder) + closeLine\n",
+    "        # E73 Information Object\n",
+    "        e73placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
+    "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-MV001-', ''), carriesCoords.prefix, e73placeHolder) + closeLine\n",
     "        output.write(line)\n",
-    "        line = triple(e37placeHolder, hasTypeCoords.prefix, informationObjectCoords.prefix) + closeLine\n",
+    "        line = triple(e73placeHolder, hasTypeCoords.prefix, informationObjectCoords.prefix) + closeLine\n",
     "        output.write(line)\n",
-    "        line = triple(e37placeHolder, labelCoords.prefix, '\\\"Contenuto informativo: ' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
+    "        line = triple(e73placeHolder, labelCoords.prefix, '\\\"Contenuto informativo: ' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "        output.write(line)\n",
-    "        #\n",
-    "        # If the 'titolo_aspo' property is not empty for the given entry, write down title-related triples\n",
+    "        # E35 Title        \n",
     "        if(row['titolo_aspo'] != 'None'):\n",
     "            e35placeHolder1 = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + titleCoords.code + \">\"\n",
-    "            line = triple(e37placeHolder, identifiedByCoords.prefix, e35placeHolder1) + closeLine\n",
+    "            line = triple(e73placeHolder, identifiedByCoords.prefix, e35placeHolder1) + closeLine\n",
     "            output.write(line)\n",
     "            line = triple(e35placeHolder1, hasTypeCoords.prefix, titleCoords.prefix) + closeLine\n",
     "            output.write(line)\n",
     "            line = triple(e35placeHolder1, labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "            output.write(line)\n",
-    "        \n",
+    "        # E55 Type\n",
+    "        if(row['genere'] != ''):\n",
+    "            tipologie = []\n",
+    "            pipe = \"|\" \n",
+    "            if pipe in row['genere']:\n",
+    "                tipologie = row['genere'].split('|')\n",
+    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + tipologie.replace(\" \", \"\") + \">\"\n",
+    "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
+    "                output.write(line)\n",
+    "            else:\n",
+    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['genere'].replace(\" \", \"\") + \">\"\n",
+    "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
+    "                output.write(line)\n",
+    "        # E42 Identifier\n",
+    "        e42placeHolderID = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + identifierCoords.code + \"ID>\"\n",
+    "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-MV001-', ''), identifiedByCoords.prefix, e42placeHolderID) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e42placeHolderID, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e42placeHolderID, labelCoords.prefix, '\\\"' + row['id'].replace(\" \", \"\") + '\\\"') + closeLine            \n",
+    "        output.write(line)\n",
+    "        # E53 Place\n",
+    "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-MV001-', ''), hasCurrentPermanentLocationCoords.prefix, E53placeHolder) + closeLine\n",
+    "        output.write(line)\n",
     "        output.write('\\n')\n",
     "        #\n",
-    "        #\n",
     "        # Limit number of entries processed (if desired)\n",
     "        if(ii>max_entries):\n",
     "            break\n",
@@ -193,9 +232,12 @@
   }
  ],
  "metadata": {
+  "interpreter": {
+   "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
+  },
   "kernelspec": {
-   "name": "python3",
-   "display_name": "Python 3.7.3 64-bit"
+   "display_name": "Python 3.7.3 64-bit",
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -207,17 +249,14 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.9.0"
   },
   "metadata": {
    "interpreter": {
     "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
    }
-  },
-  "interpreter": {
-   "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}

+ 54 - 21
ASPO/CSV_to_RDF/marcovaldi/CSV_to_RDF_marcovaldi_item.ipynb

@@ -55,7 +55,6 @@
     "\n",
     "# Repositories\n",
     "datiniCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-MV001->', 'dt:')\n",
-    "# Added by FS\n",
     "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
     "# W3/CIDOC Predicates\n",
     "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
@@ -63,9 +62,10 @@
     "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
     "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
     "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "# Added by FS CIDOC properties for person\n",
     "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
     "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
+    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
+    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
     "\n",
     "# CIDOC Objects\n",
     "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
@@ -74,7 +74,6 @@
     "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
     "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
     "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "# Added by FS CIDOC entity\n",
     "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
     "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')"
    ]
@@ -127,6 +126,8 @@
     "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
     "    output.write('\\n')\n"
    ]
   },
@@ -145,6 +146,11 @@
     "    writeTTLHeader(output)\n",
     "    first = True\n",
     "    ii = 0\n",
+    "    E53placeHolder = \"<http://www.archiviodistato.prato.it>\"\n",
+    "    line = triple(E53placeHolder, hasTypeCoords.prefix, placeCoords.prefix ) + closeLine\n",
+    "    output.write(line)\n",
+    "    line = triple(E53placeHolder, labelCoords.prefix, \"\\\"Archivio di Stato di Prato\\\"\") + closeLine\n",
+    "    output.write(line)\n",
     "    for row in reader:\n",
     "        # The index ii is used to process a limited number of entries for testing purposes\n",
     "        ii = ii+1\n",
@@ -152,7 +158,7 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
-    "        # Write E22 Man Made Object & E73 Information Object -- should exist for every entry?\n",
+    "        # E22 Man Made Object\n",
     "        tt = ''\n",
     "        if(row['titolo_aspo'] != 'None' and row['titolo_aspo'] != ''):\n",
     "            tt = row['titolo_aspo']\n",
@@ -160,34 +166,50 @@
     "            output.write(line)\n",
     "            line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-MV001-', ''), labelCoords.prefix, '\\\"Documento fisico: ' + tt.replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "            output.write(line)\n",
-    "            e37placeHolder =  \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
-    "            line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-MV001-', ''), carriesCoords.prefix, e37placeHolder) + closeLine\n",
+    "        # E73 Information Object\n",
+    "            e73placeHolder =  \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
+    "            line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-MV001-', ''), carriesCoords.prefix, e73placeHolder) + closeLine\n",
     "            output.write(line)\n",
-    "            line = triple(e37placeHolder, hasTypeCoords.prefix, informationObjectCoords.prefix) + closeLine\n",
+    "            line = triple(e73placeHolder, hasTypeCoords.prefix, informationObjectCoords.prefix) + closeLine\n",
     "            output.write(line)\n",
-    "            line = triple(e37placeHolder, labelCoords.prefix, '\\\"Contenuto informativo: ' + tt.replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
+    "            line = triple(e73placeHolder, labelCoords.prefix, '\\\"Contenuto informativo: ' + tt.replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "            output.write(line)\n",
+    "        # E55 Type\n",
     "        if(row['tipologia'] != ''):\n",
-    "            e55placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + typeCoords.code + \">\"\n",
-    "            line = triple(e37placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e55placeHolder, hasTypeCoords.prefix, typeCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e55placeHolder, labelCoords.prefix,  '\\\"' + row['tipologia'] + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "        \n",
-    "        # If the 'titolo_aspo' property is not empty for the given entry, write down title-related triples\n",
+    "            tipologie = []\n",
+    "            pipe = \"|\" \n",
+    "            if pipe in row['tipologia']:\n",
+    "                tipologie = row['tipologia'].split('|')\n",
+    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + tipologie.replace(\" \", \"\") + \">\"\n",
+    "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
+    "                output.write(line)\n",
+    "            else:\n",
+    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['tipologia'].replace(\" \", \"\") + \">\"\n",
+    "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
+    "                output.write(line)\n",
+    "        elif(row['genere'] != ''):\n",
+    "            tipologie = []\n",
+    "            pipe = \"|\" \n",
+    "            if pipe in row['genere']:\n",
+    "                tipologie = row['genere'].split('|')\n",
+    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + tipologie.replace(\" \", \"\") + \">\"\n",
+    "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
+    "                output.write(line)\n",
+    "            else:\n",
+    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['genere'].replace(\" \", \"\") + \">\"\n",
+    "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
+    "                output.write(line)       \n",
+    "        # E35 Title        \n",
     "        if(row['titolo_aspo'] != 'None' and row['titolo_aspo'] != ''):\n",
     "            tut = row['titolo_aspo']\n",
     "            e35placeHolder1 = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + titleCoords.code + \">\"\n",
-    "            line = triple(e37placeHolder, identifiedByCoords.prefix, e35placeHolder1) + closeLine\n",
+    "            line = triple(e73placeHolder, identifiedByCoords.prefix, e35placeHolder1) + closeLine\n",
     "            output.write(line)\n",
     "            line = triple(e35placeHolder1, hasTypeCoords.prefix, titleCoords.prefix) + closeLine\n",
     "            output.write(line)\n",
     "            line = triple(e35placeHolder1, labelCoords.prefix, '\\\"' + tut.replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "            output.write(line)\n",
-    "        #\n",
-    "        # Triplify the 'segnatura' -- should exist for every entry\n",
+    "        # E42 Identifier\n",
     "        segnatura = ''\n",
     "        if(row['segnatura_busta'] != ''):\n",
     "            segnatura = segnatura + row['segnatura_busta']\n",
@@ -201,7 +223,18 @@
     "            output.write(line)\n",
     "            line = triple(e42placeHolder, labelCoords.prefix, '\\\"Segnatura: Fondo Ospedale della Misericordia e Dolce, ' + segnatura + '\\\"') + closeLine\n",
     "            output.write(line)\n",
-    "        \n",
+    "        # E42 Identifier\n",
+    "        e42placeHolderID = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + identifierCoords.code + \"ID>\"\n",
+    "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-MV001-', ''), identifiedByCoords.prefix, e42placeHolderID) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e42placeHolderID, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e42placeHolderID, labelCoords.prefix, '\\\"' + row['id'].replace(\" \", \"\") + '\\\"') + closeLine            \n",
+    "        output.write(line)\n",
+    "        # E53 Place\n",
+    "        E53placeHolder = \"<http://www.archiviodistato.prato.it>\"\n",
+    "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-MV001-', ''), hasCurrentPermanentLocationCoords.prefix, E53placeHolder) + closeLine\n",
+    "        output.write(line)\n",
     "        output.write('\\n')\n",
     "        #\n",
     "        #\n",

+ 2 - 2
ASPO/CSV_to_RDF/marcovaldi/CSV_to_RDF_marcovaldi_item_event_exchange_date_place.ipynb

@@ -219,7 +219,7 @@
     "            authcode_arrivo = re.sub('{luogo: .* ', '', auth_arrivo)\n",
     "            authcodeprefix_arrivo = authcode_arrivo.replace('IT-ASPO-GEO0001-', '').replace('\"', '').replace('}', '').strip()\n",
     "            #Luogo partenza\n",
-    "            e53placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + authcodeprefix_partenza + \"/\" + sendLetterCoords.code + \"_\" + placeCoords.code +  \">\"\n",
+    "            e53placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + authcodeprefix_partenza + \">\"\n",
     "            line = triple(el2placeHolder, movedFromCoords.prefix, e53placeHolder) + closeLine\n",
     "            output.write(line)\n",
     "            line = triple(e53placeHolder, hasTypeCoords.prefix, placeCoords.prefix) + closeLine\n",
@@ -227,7 +227,7 @@
     "            line = triple(e53placeHolder, labelCoords.prefix, '\\\"' + place_partenza + '\\\"') + closeLine\n",
     "            output.write(line)\n",
     "            #Luogo arrivo\n",
-    "            e53placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + authcodeprefix_arrivo + \"/\" + receiveLetterCoords.code + \"_\" + placeCoords.code +  \">\"\n",
+    "            e53placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + authcodeprefix_arrivo + \">\"\n",
     "            line = triple(el3placeHolder, movedToCoords.prefix, e53placeHolder) + closeLine\n",
     "            output.write(line)\n",
     "            line = triple(e53placeHolder, hasTypeCoords.prefix, placeCoords.prefix) + closeLine\n",

+ 21 - 20
ASPO/CSV_to_RDF/ospedale/CSV_to_RDF_ospedale_file_permanent_location.ipynb → ASPO/CSV_to_RDF/marcovaldi/CSV_to_RDF_marcovaldi_type.ipynb

@@ -1,5 +1,12 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Parser per estrarre tutte le tipologie di documenti ed associarle solo una volta ad ogni record"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 1,
@@ -8,11 +15,9 @@
    "source": [
     "# Utilities to read/write csv files\n",
     "import csv\n",
-    "# Utilities to handle character encodings\n",
     "import unicodedata\n",
     "# Ordered Dicts\n",
     "from collections import OrderedDict\n",
-    "\n",
     "import json\n",
     "\n",
     "\n",
@@ -34,8 +39,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/ASPO/ospedale/'\n",
-    "export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/ASPO/ospedale/'"
+    "import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/ASPO/marcovaldi/'\n",
+    "export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/ASPO/marcovaldi/'"
    ]
   },
   {
@@ -54,17 +59,17 @@
     "\n",
     "\n",
     "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/IT-ASPO-ST00005->', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00002->', 'pa:')\n",
+    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
+    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
     "# W3/CIDOC Predicates\n",
     "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
     "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
     "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
     "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
     "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
     "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
     "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
+    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
     "\n",
     "# CIDOC Objects\n",
     "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
@@ -73,7 +78,6 @@
     "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
     "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
     "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
     "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
     "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')"
    ]
@@ -126,8 +130,7 @@
     "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
     "    output.write('\\n')\n"
    ]
   },
@@ -140,10 +143,10 @@
    "outputs": [],
    "source": [
     "filePrefix = 'data_'\n",
-    "fileType = 'file'\n",
-    "max_entries = 100000000\n",
+    "fileType = 'item_type'\n",
+    "max_entries = 1000000000\n",
     "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_permanent_location.ttl', 'w') as output:\n",
+    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '.ttl', 'w') as output:\n",
     "    reader = csv.DictReader(csv_file)\n",
     "    writeTTLHeader(output)\n",
     "    first = True\n",
@@ -156,20 +159,18 @@
     "            first = False\n",
     "            continue\n",
     "\n",
-    "        E53placeHolder = \"<http://www.archiviodistato.prato.it>\"\n",
-    "        line = triple(datiniCoords.prefix + row['id'], hasCurrentPermanentLocationCoords.prefix, E53placeHolder) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(E53placeHolder, hasTypeCoords.prefix, placeCoords.prefix ) + closeLine\n",
+    "        line = triple(\"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['tipologia'].replace(\" \", \"\") + \">\", hasTypeCoords.prefix, typeCoords.prefix) + closeLine\n",
     "        output.write(line)\n",
-    "        line = triple(E53placeHolder, labelCoords.prefix, \"\\\"Archivio di Stato di Prato\\\"\") + closeLine\n",
+    "        line = triple(\"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['tipologia'].replace(\" \", \"\") + \">\", labelCoords.prefix,  '\\\"' + row['tipologia']+ '\\\"') + closeLine\n",
     "        output.write(line)\n",
+    "        \n",
     "        output.write('\\n')\n",
-    "        output.write('\\n')       \n",
     "        #\n",
     "        #\n",
     "        # Limit number of entries processed (if desired)\n",
     "        if(ii>max_entries):\n",
-    "            break"
+    "            break\n",
+    "        "
    ]
   }
  ],

+ 32 - 20
ASPO/CSV_to_RDF/ospedale/CSV_to_RDF_ospedale_all.ipynb

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 49,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -30,7 +30,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 50,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -40,7 +40,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 51,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -62,9 +62,10 @@
     "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
     "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
     "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "# Added by FS CIDOC properties for person\n",
     "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
     "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
+    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
+    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
     "\n",
     "# CIDOC Objects\n",
     "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
@@ -73,14 +74,13 @@
     "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
     "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
     "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "# Added by FS CIDOC entity\n",
     "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
     "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 52,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -106,13 +106,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 53,
    "metadata": {},
    "outputs": [],
    "source": [
     "def writeTTLHeader(output):\n",
     "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    # Added by FS\n",
     "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
@@ -123,24 +122,25 @@
     "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    # Added by FS\n",
     "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    # Added by FS\n",
     "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
     "    output.write('\\n')\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 54,
    "metadata": {
     "tags": []
    },
    "outputs": [],
    "source": [
     "filePrefix = 'data_'\n",
+    "# inserire i livelli otherlevel, collection, fonds, recordgrp, series, subfonds, subgrp, subseries\n",
     "fileType = 'subseries'\n",
     "max_entries = 10000000000000000000000000000000\n",
     "\n",
@@ -149,6 +149,11 @@
     "    writeTTLHeader(output)\n",
     "    first = True\n",
     "    ii = 0\n",
+    "    E53placeHolder = \"<http://www.archiviodistato.prato.it>\"\n",
+    "    line = triple(E53placeHolder, hasTypeCoords.prefix, placeCoords.prefix ) + closeLine\n",
+    "    output.write(line)\n",
+    "    line = triple(E53placeHolder, labelCoords.prefix, \"\\\"Archivio di Stato di Prato\\\"\") + closeLine\n",
+    "    output.write(line)\n",
     "    for row in reader:\n",
     "        # The index ii is used to process a limited number of entries for testing purposes\n",
     "        ii = ii+1\n",
@@ -156,11 +161,12 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
-    "        # Write E22 Man Made Object & E73 Information Object -- should exist for every entry?\n",
+    "        # E22 Man Made Object\n",
     "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-ST00005-', ''), hasTypeCoords.prefix, manMadeObjectCoords.prefix) + closeLine\n",
     "        output.write(line)\n",
     "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-ST00005-', ''), labelCoords.prefix, '\\\"Documento fisico: ' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "        output.write(line)\n",
+    "        # E73 Information Object\n",
     "        e73placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
     "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-ST00005-', ''), carriesCoords.prefix, e73placeHolder) + closeLine\n",
     "        output.write(line)\n",
@@ -168,8 +174,7 @@
     "        output.write(line)\n",
     "        line = triple(e73placeHolder, labelCoords.prefix, '\\\"Contenuto informativo: ' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "        output.write(line)\n",
-    "        #\n",
-    "        # If the 'titolo_aspo' property is not empty for the given entry, write down title-related triples\n",
+    "        # E35 Title\n",
     "        if(row['titolo_aspo'] != 'None'):\n",
     "            e35placeHolder1 = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['id'] + \"/\" + titleCoords.code + \">\"\n",
     "            line = triple(e73placeHolder, identifiedByCoords.prefix, e35placeHolder1) + closeLine\n",
@@ -178,8 +183,8 @@
     "            output.write(line)\n",
     "            line = triple(e35placeHolder1, labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "            output.write(line)\n",
-    "        \n",
-    "        if(row['genere'] != ''):\n",
+    "        # E55 Type\n",
+    "        if(fileType != 'collection' and fileType != 'fonds' and fileType != 'recordgrp' and fileType != 'subgrp' and fileType != 'subfonds' and row['genere'] != ''):\n",
     "            tipologie = []\n",
     "            pipe = \"|\" \n",
     "            if pipe in row['genere']:\n",
@@ -191,11 +196,18 @@
     "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['genere'].replace(\" \", \"\") + \">\"\n",
     "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
     "                output.write(line)\n",
-    "\n",
+    "        # E42 Identifier\n",
+    "        e42placeHolderID = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['id'] + \"/\" + identifierCoords.code + \"ID>\"\n",
+    "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-ST00005-', ''), identifiedByCoords.prefix, e42placeHolderID) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e42placeHolderID, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e42placeHolderID, labelCoords.prefix, '\\\"' + row['id'].replace(\" \", \"\") + '\\\"') + closeLine            \n",
+    "        output.write(line)\n",
+    "        # E53 Place\n",
+    "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-ST00005-', ''), hasCurrentPermanentLocationCoords.prefix, E53placeHolder) + closeLine\n",
+    "        output.write(line)\n",
     "        output.write('\\n')\n",
-    "        \n",
-    "        #\n",
-    "        #\n",
     "        # Limit number of entries processed (if desired)\n",
     "        if(ii>max_entries):\n",
     "            break    "

+ 75 - 55
ASPO/CSV_to_RDF/ospedale/CSV_to_RDF_ospedale_file_genreform.ipynb → ASPO/CSV_to_RDF/ospedale/CSV_to_RDF_ospedale_eac.ipynb

@@ -2,9 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 13,
    "source": [
     "# Utilities to read/write csv files\n",
     "import csv\n",
@@ -26,23 +24,23 @@
     "import sys\n",
     "# Json for the dictionary\n",
     "import json"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 14,
    "source": [
     "import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/ASPO/ospedale/'\n",
     "export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/ASPO/ospedale/'"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 15,
    "source": [
     "# Custom class to store URIs + related infos for the ontologies/repositories\n",
     "\n",
@@ -62,11 +60,9 @@
     "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
     "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
     "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
+    "# Added by FS CIDOC properties for person\n",
     "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
     "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "hasNoteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'no:')\n",
-    "hasTypeNCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3.1_has_type>', 'tn:')\n",
-    "\n",
     "\n",
     "# CIDOC Objects\n",
     "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
@@ -75,16 +71,20 @@
     "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
     "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
     "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
+    "# Added by FS CIDOC entity\n",
     "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
     "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "stringCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E62_String>', 'sr:', 'E62')"
-   ]
+    "actorCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E39_Actor>', 'ac:', 'E39')\n",
+    "stringCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E62_String>', 'st:', 'E62')\n",
+    "noteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'nt:')\n",
+    "groupCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E74_Group>', 'gp:', 'E74')"
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 16,
    "source": [
     "# Basic functions for triples / shortened triples in TTL format\n",
     "\n",
@@ -104,13 +104,13 @@
     "continueLine1 = ' ;\\n'\n",
     "continueLine2 = ' ,\\n'\n",
     "closeLine = ' .\\n'"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 17,
    "source": [
     "def writeTTLHeader(output):\n",
     "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
@@ -128,25 +128,25 @@
     "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasNoteCoords.prefix + ' ' + hasNoteCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeNCoords.prefix + ' ' + hasTypeNCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + actorCoords.prefix + ' ' + actorCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + stringCoords.prefix + ' ' + stringCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + noteCoords.prefix + ' ' + noteCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + groupCoords.prefix + ' ' + groupCoords.uri + closeLine)\n",
+    "    \n",
     "    output.write('\\n')\n"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
+   "execution_count": 18,
    "source": [
     "filePrefix = 'data_'\n",
-    "fileType = 'file'\n",
-    "max_entries = 100000000\n",
+    "fileType = 'eacAuth02'\n",
+    "max_entries = 100000000000000000000000\n",
     "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_genreform.ttl', 'w') as output:\n",
+    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '.ttl', 'w') as output:\n",
     "    reader = csv.DictReader(csv_file)\n",
     "    writeTTLHeader(output)\n",
     "    first = True\n",
@@ -157,40 +157,57 @@
     "        # Skip the first line as it carries info we don't want to triplify\n",
     "        if(first):\n",
     "            first = False\n",
-    "            continue\n",
+    "            continue            \n",
+    "        \n",
+    "        # <URL dell'archivio di stato alla persona/gruppo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.cidoc-crm.org/cidoc-crm/E39_Actor> .\n",
+    "        actorplaceHolder = personAuthCoords.prefix + row[\"recordId\"].replace('IT-ASPO-AU00002-','')\n",
+    "        # <URL dell'archivio di stato alla persona/gruppo> <http://www.w3.org/2000/01/rdf-schema#label> \"Pippo Pandolfi\"\n",
+    "        nome_actor = row[\"nameEntry@normal\"].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')\n",
+    "        line = triple(actorplaceHolder, labelCoords.prefix, '\\\"' + nome_actor.strip() + '\\\"') +  closeLine\n",
+    "        output.write(line)\n",
+    "        # <URL dell'archivio di stato alla persona/gruppo> <has note> \"e62placeHolder:\"\n",
+    "        e62placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row[\"recordId\"] + '/' + stringCoords.code + \">\"\n",
+    "        line = triple(actorplaceHolder, noteCoords.prefix, e62placeHolder) +  closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e62placeHolder, hasTypeCoords.prefix, stringCoords.prefix) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e62placeHolder, labelCoords.prefix, '\\\"Fonte: Archivio di Stato di Prato - Fondo Ospedale della Misericordia e Dolce\\\"') + closeLine\n",
+    "        output.write(line)\n",
     "\n",
-    "        #Genere\n",
-    "        if(row['genere'] != ''):\n",
-    "            e62placeHolder= \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['id'] + \"/\" + stringCoords.code + \">\"\n",
-    "            line = triple(\"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['id'] + \">\", hasNoteCoords.prefix, e62placeHolder) + closeLine\n",
+    "        # If the entityType is 'person' the CIDOC class is E21 Person\n",
+    "        if(row['entityType'] == 'person'):\n",
+    "            line = triple(actorplaceHolder, hasTypeCoords.prefix, personCoords.prefix) + closeLine\n",
     "            output.write(line)\n",
-    "            line = triple(e62placeHolder, labelCoords.prefix, '\\\"' + row['genere'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"').replace(' |',',') + '\\\"') + closeLine\n",
+    "        # If the entityType is 'corporateBody' the CIDOC class is E74 Group\n",
+    "        if(row['entityType'] == 'corporateBody'):\n",
+    "            line = triple(actorplaceHolder, hasTypeCoords.prefix, groupCoords.prefix) + closeLine\n",
     "            output.write(line)\n",
-    "            line = triple(e62placeHolder, hasTypeCoords.prefix, stringCoords.prefix) + closeLine\n",
+    "        # If the entityType is 'family' the CIDOC class is E74 Group\n",
+    "        if(row['entityType'] == 'family'):\n",
+    "            line = triple(actorplaceHolder, hasTypeCoords.prefix, groupCoords.prefix) + closeLine\n",
     "            output.write(line)\n",
-    "            e55placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['id'] + \"/\" + stringCoords.code + \"_\" + typeCoords.code + \">\"\n",
-    "            line = triple(e62placeHolder, hasTypeNCoords.prefix, e55placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e55placeHolder, labelCoords.prefix, '\\\"Genere\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "                    \n",
+    "\n",
     "        output.write('\\n')\n",
     "        \n",
     "        #\n",
     "        #\n",
     "        # Limit number of entries processed (if desired)\n",
     "        if(ii>max_entries):\n",
-    "            break"
-   ]
+    "            break\n",
+    "        \n",
+    "        \n",
+    "        "
+   ],
+   "outputs": [],
+   "metadata": {
+    "tags": []
+   }
   }
  ],
  "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
   "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
+   "name": "python3",
+   "display_name": "Python 3.9.0 64-bit"
   },
   "language_info": {
    "codemirror_mode": {
@@ -208,8 +225,11 @@
    "interpreter": {
     "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
    }
+  },
+  "interpreter": {
+   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}

+ 26 - 12
ASPO/CSV_to_RDF/ospedale/CSV_to_RDF_ospedale_file.ipynb

@@ -62,9 +62,10 @@
     "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
     "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
     "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "# Added by FS CIDOC properties for person\n",
     "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
     "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
+    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
+    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
     "\n",
     "# CIDOC Objects\n",
     "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
@@ -73,7 +74,6 @@
     "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
     "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
     "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "# Added by FS CIDOC entity\n",
     "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
     "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')"
    ]
@@ -126,6 +126,8 @@
     "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
+    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
     "    output.write('\\n')\n"
    ]
   },
@@ -146,6 +148,11 @@
     "    writeTTLHeader(output)\n",
     "    first = True\n",
     "    ii = 0\n",
+    "    E53placeHolder = \"<http://www.archiviodistato.prato.it>\"\n",
+    "    line = triple(E53placeHolder, hasTypeCoords.prefix, placeCoords.prefix ) + closeLine\n",
+    "    output.write(line)\n",
+    "    line = triple(E53placeHolder, labelCoords.prefix, \"\\\"Archivio di Stato di Prato\\\"\") + closeLine\n",
+    "    output.write(line)\n",
     "    for row in reader:\n",
     "        # The index ii is used to process a limited number of entries for testing purposes\n",
     "        ii = ii+1\n",
@@ -153,12 +160,13 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
-    "        # Write E22 Man Made Object & E73 Information Object\n",
+    "        # E22 Man Made Object\n",
     "        if(row['titolo_aspo'] != 'None' and row['titolo_aspo'] != ''):\n",
     "            line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-ST00005-', ''), hasTypeCoords.prefix, manMadeObjectCoords.prefix) + closeLine\n",
     "            output.write(line)\n",
     "            line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-ST00005-', ''), labelCoords.prefix, '\\\"Documento fisico: ' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "            output.write(line)\n",
+    "        # E73 Information Object\n",
     "            e73placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
     "            line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-ST00005-', ''), carriesCoords.prefix, e73placeHolder) + closeLine\n",
     "            output.write(line)\n",
@@ -166,7 +174,7 @@
     "            output.write(line)\n",
     "            line = triple(e73placeHolder, labelCoords.prefix, '\\\"Contenuto informativo: ' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "            output.write(line)\n",
-    "\n",
+    "        # E55 Type\n",
     "        if(row['tipologia'] != ''):\n",
     "            tipologie = []\n",
     "            pipe = \"|\" \n",
@@ -191,8 +199,7 @@
     "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['genere'].replace(\" \", \"\") + \">\"\n",
     "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
     "                output.write(line)\n",
-    "        #\n",
-    "        # If the 'titolo_aspo' property is not empty for the given entry, write down title-related triples\n",
+    "        # E35 Title\n",
     "        if(row['titolo_aspo'] != 'None' and row['titolo_aspo'] != ''):\n",
     "            tut = row['titolo_aspo']\n",
     "            e35placeHolder1 = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['id'] + \"/\" + titleCoords.code + \">\"\n",
@@ -202,8 +209,7 @@
     "            output.write(line)\n",
     "            line = triple(e35placeHolder1, labelCoords.prefix, '\\\"' + tut.replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
     "            output.write(line)\n",
-    "\n",
-    "        # Triplify the 'segnatura' -- should exist for every entry\n",
+    "        # E42 Identifier\n",
     "        segnatura = ''\n",
     "        if(row['segnatura_precedente'] != ''):\n",
     "            segnatura = row['segnatura_precedente']\n",
@@ -217,11 +223,19 @@
     "            output.write(line)\n",
     "            line = triple(e42placeHolder, labelCoords.prefix, '\\\"Segnatura: Fondo Ospedale della Misericordia e Dolce, ' + segnatura + '\\\"') + closeLine\n",
     "            output.write(line)\n",
-    "                    \n",
+    "        # E42 Identifier\n",
+    "        e42placeHolderID = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['id'] + \"/\" + identifierCoords.code + \"ID>\"\n",
+    "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-ST00005-', ''), identifiedByCoords.prefix, e42placeHolderID) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e42placeHolderID, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
+    "        output.write(line)\n",
+    "        line = triple(e42placeHolderID, labelCoords.prefix, '\\\"' + row['id'].replace(\" \", \"\") + '\\\"') + closeLine            \n",
+    "        output.write(line)           \n",
+    "        # E53 Place\n",
+    "        E53placeHolder = \"<http://www.archiviodistato.prato.it>\"\n",
+    "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-ST00005-', ''), hasCurrentPermanentLocationCoords.prefix, E53placeHolder) + closeLine\n",
+    "        output.write(line)\n",
     "        output.write('\\n')\n",
-    "        \n",
-    "        #\n",
-    "        #\n",
     "        # Limit number of entries processed (if desired)\n",
     "        if(ii>max_entries):\n",
     "            break"

+ 1 - 7
ASPO/CSV_to_RDF/ospedale/CSV_to_RDF_ospedale_file_dimensions.ipynb

@@ -155,8 +155,7 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
-    "\n",
-    "        #DIMENSIONI\n",
+    "        # E54 Dimensions\n",
     "        pipe = \"|\"\n",
     "        if(row['dimensione_altezza_larghezza_spessore'] != ''):\n",
     "            dimensioni = []\n",
@@ -167,12 +166,7 @@
     "            output.write(line)\n",
     "            line = triple(e54placeHolder, labelCoords.prefix, '\\\"Dimensioni (mm): ' + row['dimensione_altezza_larghezza_spessore'].replace(\"|\", \"x\") + '\\\"') + closeLine\n",
     "            output.write(line)\n",
-    "        \n",
-    "                    \n",
     "        output.write('\\n')\n",
-    "        \n",
-    "        #\n",
-    "        #\n",
     "        # Limit number of entries processed (if desired)\n",
     "        if(ii>max_entries):\n",
     "            break"

+ 3 - 17
ASPO/CSV_to_RDF/ospedale/CSV_to_RDF_ospedale_file_event_creation.ipynb

@@ -63,7 +63,6 @@
     "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
     "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
     "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "# Added by FS CIDOC properties for person\n",
     "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
     "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
     "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
@@ -76,7 +75,6 @@
     "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
     "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
     "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "# Added by FS CIDOC entity\n",
     "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
     "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
     "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
@@ -166,8 +164,7 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
-    "        \n",
-    "        #Evento creazione\n",
+    "        # E65 Creation\n",
     "        if(row['tipologia'] != 'carteggio'):\n",
     "            e22placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['id'] + \">\"\n",
     "            e65placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['id'] + \"/\" + creationCoords.code + \">\"\n",
@@ -179,7 +176,7 @@
     "            output.write(line)\n",
     "            line = triple(e65placeHolder, hasTypePCoords.prefix, '\\\"Inizio\\\"^^xsd:string') + closeLine\n",
     "            output.write(line)\n",
-    "            #Autore dell'Inizio della creazione del documento\n",
+    "            # E39 Actor\n",
     "            pipe = \"|\"\n",
     "            if(row['persona'] != ''):\n",
     "                persone = []\n",
@@ -200,10 +197,8 @@
     "                        output.write(line)\n",
     "                        line = triple(e55placeHolder, labelCoords.prefix, '\\\"Menzionato\\\"') + closeLine\n",
     "                        output.write(line)\n",
-    "\n",
     "                else:\n",
     "                    persone.append(row['persona'])\n",
-    "\n",
     "            elif(row['famiglia'] != ''):\n",
     "                persone = []\n",
     "                if pipe in row['famiglia']:\n",
@@ -225,7 +220,6 @@
     "                        output.write(line)\n",
     "                else:\n",
     "                    persone.append(row['famiglia'])\n",
-    "\n",
     "            elif(row['compagnia'] != ''):\n",
     "                persone = []\n",
     "                if pipe in row['compagnia']:\n",
@@ -247,7 +241,6 @@
     "                        output.write(line)\n",
     "                else:\n",
     "                    persone.append(row['compagnia'])\n",
-    "\n",
     "            e65FplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['id'] + \"/\" + creationCoords.code + \"F>\"\n",
     "            line = triple(e22placeHolder, wasBroughtCoords.prefix, e65FplaceHolder) + closeLine\n",
     "            output.write(line)\n",
@@ -257,7 +250,7 @@
     "            output.write(line)\n",
     "            line = triple(e65FplaceHolder, hasTypePCoords.prefix, '\\\"Fine\\\"^^xsd:string') + closeLine\n",
     "            output.write(line)\n",
-    "            #Autore della creazione del documento\n",
+    "            # E39 Actor\n",
     "            pipe = \"|\"\n",
     "            if(row['persona'] != ''):\n",
     "                persone = []\n",
@@ -278,10 +271,8 @@
     "                        output.write(line)\n",
     "                        line = triple(e55placeHolder, labelCoords.prefix, '\\\"Menzionato\\\"') + closeLine\n",
     "                        output.write(line)\n",
-    "\n",
     "                else:\n",
     "                    persone.append(row['persona'])\n",
-    "\n",
     "            elif(row['famiglia'] != ''):\n",
     "                persone = []\n",
     "                if pipe in row['famiglia']:\n",
@@ -303,7 +294,6 @@
     "                        output.write(line)\n",
     "                else:\n",
     "                    persone.append(row['famiglia'])\n",
-    "\n",
     "            elif(row['compagnia'] != ''):\n",
     "                persone = []\n",
     "                if pipe in row['compagnia']:\n",
@@ -325,11 +315,7 @@
     "                        output.write(line)\n",
     "                else:\n",
     "                    persone.append(row['compagnia'])\n",
-    "\n",
     "        output.write('\\n')\n",
-    "        \n",
-    "        #\n",
-    "        #\n",
     "        # Limit number of entries processed (if desired)\n",
     "        if(ii>max_entries):\n",
     "            break"

+ 3 - 9
ASPO/CSV_to_RDF/ospedale/CSV_to_RDF_ospedale_file_event_creation_date.ipynb

@@ -135,7 +135,6 @@
     "    output.write('@prefix ' + onGoingTCoords.prefix + ' ' + onGoingTCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)\n",
-    "    \n",
     "    output.write('\\n')\n"
    ]
   },
@@ -148,7 +147,7 @@
    "outputs": [],
    "source": [
     "filePrefix = 'data'\n",
-    "fileType = '_event_creation'\n",
+    "fileType = 'file_event_creation'\n",
     "max_entries = 100000000\n",
     "\n",
     "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_date.ttl', 'w') as output:\n",
@@ -163,8 +162,7 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
-    "        \n",
-    "        #Periodo creazione\n",
+    "        # E65 Creation - E52 Time Span\n",
     "        #e65placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + creationCoords.code + \">\"\n",
     "        #e65FplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + creationCoords.code + \"F>\"\n",
     "        #e22placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \">\"        \n",
@@ -189,12 +187,8 @@
     "                line = triple(e52FplaceHolder, hasTypeCoords.prefix, timeSpanCoords.prefix) + closeLine\n",
     "                output.write(line)\n",
     "                line = triple(e52FplaceHolder, labelCoords.prefix, '\\\"'+row['ETS'] +'\\\"') + closeLine\n",
-    "                output.write(line)  \n",
-    "                          \n",
+    "                output.write(line)                      \n",
     "        output.write('\\n')\n",
-    "        \n",
-    "        #\n",
-    "        #\n",
     "        # Limit number of entries processed (if desired)\n",
     "        if(ii>max_entries):\n",
     "            break"

+ 2 - 7
ASPO/CSV_to_RDF/ospedale/CSV_to_RDF_ospedale_file_extent.ipynb

@@ -155,25 +155,20 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
-    "        \n",
-    "        # NUMERO DI CARTE - CONSISTENZA\n",
+    "        # E54 Dimensions\n",
     "        if(row['numero'] != ''):\n",
     "            e54placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['id'] + \"/\" + dimensionsCoords.code + \">\"\n",
     "            line = triple(\"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['id'] + \">\", hasDimensionsCoords.prefix, e54placeHolder) + closeLine\n",
     "            output.write(line)\n",
     "            line = triple(e54placeHolder, hasTypeCoords.prefix, dimensionsCoords.prefix) + closeLine\n",
     "            output.write(line)\n",
-    "            #Remove all white-space characters:\n",
+    "            # Remove all white-space characters:\n",
     "            txt = row['numero']\n",
     "            x = re.sub(\" \\n\", \"\", txt)\n",
     "            y = re.sub(\"\\s\\s\", \"\", x)\n",
     "            line = triple(e54placeHolder, labelCoords.prefix, '\\\"Consistenza: carte ' + y.replace('\\\\','\\\\\\\\').replace('\\\"','')+ '\\\"') + closeLine\n",
     "            output.write(line)\n",
-    "        \n",
     "        output.write('\\n')\n",
-    "        \n",
-    "        #\n",
-    "        #\n",
     "        # Limit number of entries processed (if desired)\n",
     "        if(ii>max_entries):\n",
     "            break"

+ 20 - 20
ASPO/CSV_to_RDF/ospedale/CSV_to_RDF_ospedale_file_physfacet.ipynb

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -30,7 +30,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -40,7 +40,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -52,7 +52,6 @@
     "        self.prefix = prefix\n",
     "        self.code = code\n",
     "\n",
-    "\n",
     "# Repositories\n",
     "datiniCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/IT-ASPO-ST00005->', 'dt:')\n",
     "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00002->', 'pa:')\n",
@@ -73,7 +72,6 @@
     "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
     "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
     "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "# Added by FS CIDOC entity\n",
     "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
     "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
     "materialCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E57_Material>', 'mt:', 'E57')"
@@ -81,7 +79,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -107,7 +105,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -129,13 +127,12 @@
     "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + consistCoords.prefix + ' ' + consistCoords.uri + closeLine)\n",
     "    output.write('@prefix ' + materialCoords.prefix + ' ' + materialCoords.uri + closeLine)\n",
-    "   \n",
     "    output.write('\\n')\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 18,
    "metadata": {
     "tags": []
    },
@@ -150,6 +147,11 @@
     "    writeTTLHeader(output)\n",
     "    first = True\n",
     "    ii = 0\n",
+    "    e57placeHolder = \"<http://archiviodistato.prato.it/\" + materialCoords.code + \">\"\n",
+    "    line = triple(e57placeHolder, hasTypeCoords.prefix, materialCoords.prefix) + closeLine\n",
+    "    output.write(line)\n",
+    "    line = triple(e57placeHolder, labelCoords.prefix, '\\\"Supporto\\\"') + closeLine\n",
+    "    output.write(line)\n",
     "    for row in reader:\n",
     "        # The index ii is used to process a limited number of entries for testing purposes\n",
     "        ii = ii+1\n",
@@ -157,20 +159,18 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
-    "        \n",
-    "        if(row['supporto'] != ''):\n",
-    "            e57placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['id'] + \"/\" + materialCoords.code + \">\"\n",
-    "            line = triple(\"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['id'] + \">\", consistCoords.prefix, e57placeHolder) + closeLine\n",
+    "        # E57 Material\n",
+    "        if(row['supporto'] != ''):            \n",
+    "            e55placeHolder = \"<http://www.archiviodistato.prato.it/\" + materialCoords.code + \"_\" + typeCoords.code + \"_\" + row['supporto'].replace('|','').replace(' ','_').replace('__','_').replace(',','').replace(')','').replace('(','') + \">\"  \n",
+    "            line = triple(\"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['id'] + \">\", consistCoords.prefix, e55placeHolder) + closeLine\n",
     "            output.write(line)\n",
-    "            line = triple(e57placeHolder, labelCoords.prefix, '\\\"' + row['supporto'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
+    "            line = triple(e55placeHolder, hasTypePCoords.prefix, e57placeHolder) + closeLine\n",
     "            output.write(line)\n",
-    "            line = triple(e57placeHolder, hasTypeCoords.prefix, materialCoords.prefix) + closeLine\n",
+    "            line = triple(e55placeHolder, hasTypeCoords.prefix, typeCoords.prefix) + closeLine\n",
     "            output.write(line)\n",
-    "        \n",
-    "                    \n",
-    "        output.write('\\n')\n",
-    "        \n",
-    "        #\n",
+    "            line = triple(e55placeHolder, labelCoords.prefix, '\\\"' + row['supporto'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
+    "            output.write(line)\n",
+    "            output.write('\\n')\n",
     "        #\n",
     "        # Limit number of entries processed (if desired)\n",
     "        if(ii>max_entries):\n",

+ 19 - 23
ASPO/CSV_to_RDF/ospedale/CSV_to_RDF_ospedale_file_phystech.ipynb

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 67,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -30,7 +30,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 68,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -40,7 +40,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 69,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -80,7 +80,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 70,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -106,7 +106,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 71,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -133,7 +133,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 72,
    "metadata": {
     "tags": []
    },
@@ -148,25 +148,24 @@
     "    writeTTLHeader(output)\n",
     "    first = True\n",
     "    ii = 0\n",
+    "    e3placeHolder = \"<http://archiviodistato.prato.it/\" + conditionCoords.code + \">\"\n",
+    "    line = triple(e3placeHolder, labelCoords.prefix, '\\\"Stato di conservazione\\\"') + closeLine\n",
+    "    output.write(line)\n",
+    "    line = triple(e3placeHolder, hasTypeCoords.prefix, conditionCoords.prefix) + closeLine\n",
+    "    output.write(line)\n",
     "    for row in reader:\n",
     "        # The index ii is used to process a limited number of entries for testing purposes\n",
     "        ii = ii+1\n",
     "        # Skip the first line as it carries info we don't want to triplify\n",
     "        if(first):\n",
     "            first = False\n",
-    "            continue\n",
-    "        \n",
-    "        # STATO DI CONSERVAZIONE\n",
+    "            continue \n",
+    "        # E3 Condition State\n",
     "        if(row['conservazione'] != ''):\n",
-    "            e3placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['id'] + \"/\" + conditionCoords.code + \">\"\n",
-    "            line = triple( \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['id'] + \">\", hasConditionCoords.prefix,  e3placeHolder) + closeLine\n",
+    "            e55placeHolder = \"<http://www.archiviodistato.prato.it/\" + conditionCoords.code + \"_\" + typeCoords.code + \"_\" + row['conservazione'].replace('|','').replace(' ','_').replace('__','_').replace(',','').replace(')','').replace('(','') + \">\" \n",
+    "            line = triple( \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['id'] + \">\", hasConditionCoords.prefix, e55placeHolder) + closeLine\n",
     "            output.write(line)\n",
-    "            line = triple(e3placeHolder, hasTypeCoords.prefix, conditionCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e3placeHolder, labelCoords.prefix, '\\\"Stato di conservazione\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            e55placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['id'] + \"/\" + conditionCoords.code + \"_\" + typeCoords.code + \">\"  \n",
-    "            line = triple(e3placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
+    "            line = triple(e55placeHolder, hasTypePCoords.prefix, e3placeHolder) + closeLine\n",
     "            output.write(line)\n",
     "            line = triple(e55placeHolder, hasTypeCoords.prefix, typeCoords.prefix) + closeLine\n",
     "            output.write(line)\n",
@@ -174,12 +173,9 @@
     "            txt = row['conservazione']\n",
     "            x = re.sub(\" \\n\", \"\", txt)\n",
     "            y = re.sub(\"\\s\\s\", \"\", x)\n",
-    "            line = triple(e55placeHolder, labelCoords.prefix, '\\\"'+ y.replace('\\\\','\\\\\\\\').replace('\\\"','')+ '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "                    \n",
-    "        output.write('\\n')\n",
-    "        \n",
-    "        #\n",
+    "            line = triple(e55placeHolder, labelCoords.prefix, '\\\"'+ y.replace('\\\\','\\\\\\\\').replace('\\\"','').replace('|',',').replace(' ,',',') + '\\\"') + closeLine\n",
+    "            output.write(line)       \n",
+    "            output.write('\\n')\n",
     "        #\n",
     "        # Limit number of entries processed (if desired)\n",
     "        if(ii>max_entries):\n",

+ 1 - 4
ASPO/CSV_to_RDF/ospedale/CSV_to_RDF_ospedale_file_scopecontent.ipynb

@@ -159,7 +159,6 @@
     "        if(first):\n",
     "            first = False\n",
     "            continue\n",
-    "\n",
     "        if(row['scope-content_body'] != ''):\n",
     "            e73placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
     "            e1placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['id'] + \"/E73_\" + entityCoords.code + \">\"\n",
@@ -180,9 +179,7 @@
     "            output.write(line)\n",
     "            line = triple(e55placeHolder, labelCoords.prefix, '\\\"Scope and Content\\\"') + closeLine\n",
     "            output.write(line)        \n",
-    "        output.write('\\n')\n",
-    "        \n",
-    "        #\n",
+    "            output.write('\\n')\n",
     "        #\n",
     "        # Limit number of entries processed (if desired)\n",
     "        if(ii>max_entries):\n",