Ver Fonte

add ASPO

Federica há 1 ano atrás
pai
commit
cacaaed76b
61 ficheiros alterados com 0 adições e 17953 exclusões
  1. 0 253
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/ceppo/ASPO_CSV_to_RDF_onomastica_ceppo.py
  2. 0 303
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_all_date_normalization.ipynb
  3. 0 257
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_all_date_normalization_range.ipynb
  4. 0 272
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_date.ipynb
  5. 0 230
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_eac.ipynb
  6. 0 258
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_file.ipynb
  7. 0 229
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_fonds.ipynb
  8. 0 207
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item.ipynb
  9. 0 331
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_date_normalization.ipynb
  10. 0 274
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_date_normalization_range.ipynb
  11. 0 243
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_event_creation.ipynb
  12. 0 281
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_event_creation_actor.ipynb
  13. 0 249
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_event_creation_date.ipynb
  14. 0 253
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_event_exchange.ipynb
  15. 0 266
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_event_exchange_actor.ipynb
  16. 0 269
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_event_exchange_date.ipynb
  17. 0 258
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_event_exchange_place.ipynb
  18. 0 259
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_event_exchange_receiver.ipynb
  19. 0 257
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_event_exchange_sender.ipynb
  20. 0 228
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_extent.ipynb
  21. 0 218
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_id.ipynb
  22. 0 231
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_list.ipynb
  23. 0 215
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_note.ipynb
  24. 0 216
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_phydesc.ipynb
  25. 0 210
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_physfacet.ipynb
  26. 0 216
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_phystech.ipynb
  27. 0 217
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_scopecontent.ipynb
  28. 0 212
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_segnatura.ipynb
  29. 0 209
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_title.ipynb
  30. 0 209
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_type.ipynb
  31. 0 244
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_series.ipynb
  32. 0 251
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_subfonds.ipynb
  33. 0 254
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_subseries.ipynb
  34. 0 222
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_type.ipynb
  35. 0 318
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_onomastica_datini.py
  36. 0 407
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_onomastica_datini_corporatebody.py
  37. 0 436
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_onomastica_datini_lettere_mani.py
  38. 0 907
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_onomastica_datini_person.py
  39. 0 454
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_onomastica_datini_person_occupation.py
  40. 0 248
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/gettatelli/CSV_to_RDF_gettatelli_fonds.ipynb
  41. 0 371
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/gettatelli/CSV_to_RDF_gettatelli_item_newdataset_old.ipynb
  42. 0 459
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/gettatelli/CSV_to_RDF_gettatelli_item_person_newdataset_old.ipynb
  43. 0 286
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/gettatelli/CSV_to_RDF_gettatelli_item_ref_reg.ipynb
  44. 0 536
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/gettatelli/CSV_to_RDF_gettatelli_newdata_document.ipynb
  45. 0 418
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/gettatelli/CSV_to_RDF_gettatelli_newdata_event.ipynb
  46. 0 347
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/gettatelli/CSV_to_RDF_gettatelli_newdata_event_place.ipynb
  47. 0 313
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/gettatelli/CSV_to_RDF_gettatelli_newdata_img.ipynb
  48. 0 297
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/gettatelli/CSV_to_RDF_gettatelli_newdata_label.ipynb
  49. 0 527
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/gettatelli/CSV_to_RDF_gettatelli_newdata_person.ipynb
  50. 0 587
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/gettatelli/CSV_to_RDF_gettatelli_newdata_person_name.ipynb
  51. 0 205
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/gettatelli/CSV_to_RDF_gettatelli_type.ipynb
  52. 0 240
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/marcovaldi/CSV_to_RDF_marcovaldi_date.ipynb
  53. 0 245
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/marcovaldi/CSV_to_RDF_marcovaldi_fonds.ipynb
  54. 0 285
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/marcovaldi/CSV_to_RDF_marcovaldi_item.ipynb
  55. 0 300
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/marcovaldi/CSV_to_RDF_marcovaldi_item_date_normalization.ipynb
  56. 0 252
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/marcovaldi/CSV_to_RDF_marcovaldi_item_event_creation.ipynb
  57. 0 297
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/marcovaldi/CSV_to_RDF_marcovaldi_item_event_exchange.ipynb
  58. 0 276
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/marcovaldi/CSV_to_RDF_marcovaldi_item_event_exchange_date_place.ipynb
  59. 0 214
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/marcovaldi/CSV_to_RDF_marcovaldi_item_extent.ipynb
  60. 0 222
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/marcovaldi/CSV_to_RDF_marcovaldi_item_note.ipynb
  61. 0 205
      FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/marcovaldi/CSV_to_RDF_marcovaldi_type.ipynb

+ 0 - 253
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/ceppo/ASPO_CSV_to_RDF_onomastica_ceppo.py

@@ -1,253 +0,0 @@
-#Parser to convert the Ceppo Vecchio onomastics CSV file into TTL format
-
-# Utilities to read/write csv files
-import csv
-# Utilities to handle character encodings
-import unicodedata
-# Ordered Dicts
-from collections import OrderedDict
-
-import json
-import re
-
-
-# OPZIONAL IMPORTS
-
-# For timestamping/simple speed tests
-from datetime import datetime
-# Random number generator
-from random import *
-# System & command line utilities
-import sys
-# Json for the dictionary
-import json
-
-import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/ASPO/ceppo/'
-export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/ASPO/ceppo/'
-
-# Custom class to store URIs + related infos for the ontologies/repositories
-
-class RDFcoords:
-    def __init__(self, uri, prefix, code = None):
-        self.uri = uri
-        self.prefix = prefix
-        self.code = code
-
-# Repositories
-aspoCoords = RDFcoords('<http://www.archiviodistato.prato.it/patrimonio/complessi-archivistici-e-soggetti-produttori/>', 'aspo:')
-foafCoords = RDFcoords('<http://xmlns.com/foaf/0.1/>', 'foaf:')
-cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')
-schemaCoords = RDFcoords('<http://schema.org/>', 'schema:')
-personCoords = RDFcoords('<http://www.w3.org/ns/person#>', 'person:')
-nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')
-rdfsCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')
-
-# Basic functions for triples / shortened triples in TTL format
-
-def triple(subject, predicate, object1):
-    line = subject + ' ' + predicate + ' ' + object1
-    return line
-
-def doublet(predicate, object1):
-    line = '    ' + predicate + ' ' + object1
-    return line
-
-def singlet(object1):
-    line = '        ' + object1
-    return line
-
-# Line endings in TTL format
-continueLine1 = ' ;\n'
-continueLine2 = ' ,\n'
-closeLine = ' .\n'
-
-def writeTTLHeader(output):
-    output.write('@prefix ' + aspoCoords.prefix + ' ' + aspoCoords.uri + closeLine)
-    output.write('@prefix ' + foafCoords.prefix + ' ' + foafCoords.uri + closeLine)
-    output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)
-    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)
-    output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)
-    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)
-    output.write('@prefix ' + rdfsCoords.prefix + ' ' + rdfsCoords.uri + closeLine)
-
-    output.write('\n')
-
-
-filePrefix = 'onomastica_'
-fileType = 'ceppo_vecchio'
-max_entries = 1000000000
-
-with open(import_dir + filePrefix + fileType + '.csv', newline="") as csv_file, open(
-        export_dir + filePrefix + fileType + '.ttl', 'w') as output:
-    reader = csv.DictReader(csv_file)
-    writeTTLHeader(output)
-    first = True
-    ii = 0
-    for row in reader:
-        # The index ii is used to process a limited number of entries for testing purposes
-        ii = ii + 1
-        if row['nameEntry@normal'] != '':
-
-            id_aspo = row['recordId']
-
-            #placeHolders
-            aspoPlaceHolder = aspoCoords.prefix + id_aspo
-            id_aspo = row['recordId']
-
-            line = triple(aspoPlaceHolder,
-                          cidocCoords.prefix + 'P1_is_identified_by',
-                          aspoPlaceHolder + "_E42") + closeLine
-            output.write(line)
-            line = triple(aspoPlaceHolder + "_E42",
-                          nsCoords.prefix + 'type',
-                          cidocCoords.prefix + 'E42_Identifier') + closeLine
-            output.write(line)
-            line = triple(aspoPlaceHolder + "_E42",
-                          rdfsCoords.prefix + 'label',
-                          '\"' + id_aspo + '\"') + closeLine
-            output.write(line)
-
-            line = triple(aspoPlaceHolder,
-                          nsCoords.prefix + 'type',
-                          foafCoords.prefix + 'person') + closeLine
-            output.write(line)
-            line = triple(aspoPlaceHolder,
-                          foafCoords.prefix + 'name',
-                          '\"' + row['nameEntry@normal'] + '\"') + closeLine
-            output.write(line)
-
-            if row['nome proprio'] != '':
-                #Remove all white-space characters:
-                txt = row['nome proprio']
-                x = re.sub(" \n", "", txt)
-                y = re.sub("\s\s", "", x)
-                line = triple(aspoPlaceHolder,
-                              foafCoords.prefix + 'givenName',
-                              '\"' + y + '\"') + closeLine
-                output.write(line)
-
-            if row['nome di famiglia'] != '':
-                #Remove all white-space characters:
-                txt = row['nome di famiglia']
-                x = re.sub("\n", " ", txt)
-                y = re.sub("\s\s", "", x)
-                line = triple(aspoPlaceHolder,
-                              foafCoords.prefix + 'familyName',
-                              '\"' + y + '\"') + closeLine
-                output.write(line)
-            
-            if row['Alias'] != '' and row['Alias'] != ' ':
-                #Remove all white-space characters:
-                txt = row['Alias']
-                x = re.sub("\n", " ", txt)
-                y = re.sub("\s\s", "", x)
-                line = triple(aspoPlaceHolder,
-                              schemaCoords.prefix + 'alternateName',
-                              '\"' + y + '\"') + closeLine
-                output.write(line)
-
-            if row['genere'] != '':
-                #Remove all white-space characters:
-                txt = row['genere']
-                x = re.sub("\n", " ", txt)
-                y = re.sub("\s\s", "", x)
-                line = triple(aspoPlaceHolder,
-                              foafCoords.prefix + 'gender',
-                              '\"' + y + '\"') + closeLine
-                output.write(line)
-
-            if row['patronimico/matronimico'] != '':
-                #Remove all white-space characters:
-                txt = row['patronimico/matronimico']
-                x = re.sub("\n", " ", txt)
-                y = re.sub("\s\s", "", x)
-                line = triple(aspoPlaceHolder,
-                              personCoords.prefix + 'patronymicName',
-                              '\"' + y + '\"') + closeLine
-                output.write(line)
-
-            if row['occupation'] != '' and row['occupation'] != ' ' :
-                occupationPlaceHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + id_aspo + "/occupation>"
-                #Remove all white-space characters:
-                txt = row['occupation']
-                x = re.sub("\n", " ", txt)
-                y = re.sub("\s\s", "", x)
-                line = triple(aspoPlaceHolder,
-                              schemaCoords.prefix + 'hasOccupation',
-                              occupationPlaceHolder) + closeLine
-                output.write(line)
-                line = triple(occupationPlaceHolder,
-                              nsCoords.prefix + 'type',
-                              schemaCoords.prefix + 'Occupation') + closeLine
-                output.write(line)
-                line = triple(occupationPlaceHolder,
-                              rdfsCoords.prefix + 'label',
-                              '\"' + y + '\"') + closeLine
-                output.write(line)
-
-            if row['avo 1'] != '':
-                avo1 = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + id_aspo + "/avo1>"
-                line = triple(aspoPlaceHolder,
-                              schemaCoords.prefix + 'relatedTo',
-                              avo1) + closeLine
-                output.write(line)
-                line = triple(avo1,
-                              nsCoords.prefix + 'type',
-                              foafCoords.prefix + 'Person') + closeLine
-                output.write(line)
-                line = triple(avo1,
-                              rdfsCoords.prefix + 'label',
-                              '\"' + row['avo 1'] + '\"') + closeLine
-                output.write(line)
-
-            if row['avo 2'] != '':
-                avo2 = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + id_aspo + "/avo2>"
-                line = triple(aspoPlaceHolder,
-                              schemaCoords.prefix + 'relatedTo',
-                              avo2) + closeLine
-                output.write(line)
-                line = triple(avo2,
-                              nsCoords.prefix + 'type',
-                              foafCoords.prefix + 'Person') + closeLine
-                output.write(line)
-                line = triple(avo2,
-                              rdfsCoords.prefix + 'label',
-                              '\"' + row['avo 2'] + '\"') + closeLine
-                output.write(line)
-
-            if row['Qualifica'] != '':
-                #Remove all white-space characters:
-                txt = row['Qualifica']
-                x = re.sub("\n", " ", txt)
-                y = re.sub("\s\s", " ", x)
-                line = triple(aspoPlaceHolder,
-                              schemaCoords.prefix + 'honorificPrefix',
-                              '\"' + y + '\"') + closeLine
-                output.write(line)
-
-            if row['place_occupation_Qualifica'] != '':
-                #Remove all white-space characters:
-                txt = row['place_occupation_Qualifica']
-                x = re.sub("\n", " ", txt)
-                y = re.sub("\s\s", "", x)
-                line = triple(aspoPlaceHolder,
-                              schemaCoords.prefix + 'workLocation',
-                              '\"' + row['place_occupation_Qualifica'].replace('\\','\\\\').replace('"','\\"') + '\"') + closeLine
-                output.write(line)
-
-            if row['biogHist p'] != '':
-                #Remove all white-space characters:
-                txt = row['biogHist p']
-                x = re.sub("\n", " ", txt)
-                y = re.sub("\s\s", " ", x)
-                line = triple(aspoPlaceHolder,
-                              cidocCoords.prefix + 'P3_has_note',
-                              '\"' + y + '\"') + closeLine
-                output.write(line)
-
-        output.write('\n')
-        #
-        #
-        # Limit number of entries processed (if desired)
-        if (ii > max_entries):
-            break

+ 0 - 303
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_all_date_normalization.ipynb

@@ -1,303 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json\n",
-    "import re"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/DATE/CORRETTE/DATINI/CSV/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/DATE/CORRETTE/DATINI/RDF/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3 Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "# CIDOC Predicates\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
-    "tookPlaceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P7_took_place_at>', 'tk:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "hasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108_has_produced>', 'hp:')\n",
-    "wasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108i_was_produced_by>', 'wp:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "onGoingTCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P81_ongoing_throughout>', 'gt:')\n",
-    "nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')\n",
-    "schemaCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')\n",
-    "yearCoords = RDFcoords('<https://www.w3.org/TR/owl-time#year>', 'year:')\n",
-    "monthCoords = RDFcoords('<https://www.w3.org/TR/owl-time#month>', 'month:')\n",
-    "dayCoords = RDFcoords('<https://www.w3.org/TR/owl-time#day>', 'day:')\n",
-    "# CIDOC Objects\n",
-    "productionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E12_Production>', 'pr:', 'E12')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "sendLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL2_Send_Letter>', 'sl:', 'EL2')\n",
-    "receiveLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL3_Receive_Letter>', 'rl:', 'EL3')\n",
-    "xsdCoords = RDFcoords('<http://www.w3.org/2001/XMLSchema#>', 'xsd:')\n",
-    "cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "- year: https://www.w3.org/TR/owl-time#year\n",
-    "- month: https://www.w3.org/TR/owl-time#month\n",
-    "- day: https://www.w3.org/TR/owl-time#day"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + tookPlaceCoords.prefix + ' ' + tookPlaceCoords.uri + closeLine)        \n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)   \n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasProducedCoords.prefix + ' ' + hasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasProducedCoords.prefix + ' ' + wasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + productionCoords.prefix + ' ' + productionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + creationCoords.prefix + ' ' + creationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + onGoingTCoords.prefix + ' ' + onGoingTCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + sendLetterCoords.prefix + ' ' + sendLetterCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + receiveLetterCoords.prefix + ' ' + receiveLetterCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + yearCoords.prefix + ' ' + yearCoords.uri + closeLine) \n",
-    "    output.write('@prefix ' + monthCoords.prefix + ' ' + monthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + dayCoords.prefix + ' ' + dayCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + xsdCoords.prefix + ' ' + xsdCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'subseries_datini_date'\n",
-    "max_entries = 1000000000\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        # E65 Creation - E52 Time Span\n",
-    "        e65placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + creationCoords.code + \">\"\n",
-    "        e65FplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + creationCoords.code + \"F>\"\n",
-    "        e22placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \">\"        \n",
-    "        line = triple(e22placeHolder, wasBroughtCoords.prefix, e65placeHolder) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e65placeHolder, hasTypeCoords.prefix, creationCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e65placeHolder, labelCoords.prefix, '\\\"Inizio creazione\\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e65placeHolder, hasTypePCoords.prefix, '\\\"Inizio\\\"^^xsd:string') + closeLine\n",
-    "        output.write(line)\n",
-    "        e65FplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + creationCoords.code + \"F>\"\n",
-    "        line = triple(e22placeHolder, wasBroughtCoords.prefix, e65FplaceHolder) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e65FplaceHolder, hasTypeCoords.prefix, creationCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e65FplaceHolder, labelCoords.prefix, '\\\"Fine creazione\\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e65FplaceHolder, hasTypePCoords.prefix, '\\\"Fine\\\"^^xsd:string') + closeLine\n",
-    "        output.write(line)\n",
-    "        if(row['data_periodo_inizio'] != ''):\n",
-    "            if(row['data_periodo_inizio'] != ''):            \n",
-    "                e52placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + creationCoords.code + \"_\" + timeSpanCoords.code + \">\"\n",
-    "                line = triple(e65placeHolder, hasTimeSpanCoords.prefix, e52placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e52placeHolder, hasTypeCoords.prefix, timeSpanCoords.prefix) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e52placeHolder, labelCoords.prefix, '\\\"'+row['data_periodo_inizio'] +'\\\"') + closeLine\n",
-    "                output.write(line)\n",
-    "            if(row['data_periodo_fine'] != ''):\n",
-    "                e52FplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + creationCoords.code + \"_\" + timeSpanCoords.code + \"F>\"\n",
-    "                line = triple(e65FplaceHolder, hasTimeSpanCoords.prefix, e52FplaceHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e52FplaceHolder, hasTypeCoords.prefix, timeSpanCoords.prefix) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e52FplaceHolder, labelCoords.prefix, '\\\"'+row['data_periodo_fine'] +'\\\"') + closeLine\n",
-    "                output.write(line)\n",
-    "            if(row['data_periodo_normalizzata_inizio'] != ''):\n",
-    "                if(row['data_periodo_normalizzata_inizio'] != 'Senza data'):           \n",
-    "                    year = row['data_periodo_normalizzata_inizio'][0:4]\n",
-    "                    month = row['data_periodo_normalizzata_inizio'][4:6]\n",
-    "                    day = row['data_periodo_normalizzata_inizio'][6:8]\n",
-    "                    if(year != '****' and year != '9998'):\n",
-    "                        line = triple(e52placeHolder, yearCoords.prefix, '\\\"'+year+'\\\"^^xsd:integer') + closeLine\n",
-    "                        output.write(line)\n",
-    "                        if (month != '**' and month != '99'):\n",
-    "                            line = triple(e52placeHolder, monthCoords.prefix, '\\\"'+month+ '\\\"^^xsd:integer') + closeLine                            \n",
-    "                            output.write(line)\n",
-    "                        if (day != '**' and day != '98'):\n",
-    "                            line = triple(e52placeHolder, dayCoords.prefix, '\\\"'+day+ '\\\"^^xsd:integer') + closeLine\n",
-    "                            output.write(line)\n",
-    "                        else:\n",
-    "                            line = triple(e52placeHolder, cidocCoords.prefix + 'P3_has_note', '\\\"Data incompleta\\\"') + closeLine\n",
-    "                            output.write(line)\n",
-    "            if(row['data_periodo_normalizzata_fine'] != ''):\n",
-    "               if(row['data_periodo_normalizzata_inizio'] != 'Senza data'):\n",
-    "                    year = row['data_periodo_normalizzata_fine'][0:4]\n",
-    "                    month = row['data_periodo_normalizzata_fine'][4:6]\n",
-    "                    day = row['data_periodo_normalizzata_fine'][6:8]\n",
-    "                    if(year != '****' and year != '9998'):\n",
-    "                        line = triple(e52FplaceHolder, yearCoords.prefix, '\\\"'+year+'\\\"^^xsd:integer') + closeLine\n",
-    "                        output.write(line)\n",
-    "                        if (month != '**' and month != '99'):\n",
-    "                            line = triple(e52FplaceHolder, monthCoords.prefix, '\\\"'+month+ '\\\"^^xsd:integer') + closeLine\n",
-    "                            output.write(line)\n",
-    "                        if (day != '**' and day != '98'):\n",
-    "                                line = triple(e52FplaceHolder, dayCoords.prefix, '\\\"'+day+ '\\\"^^xsd:integer') + closeLine\n",
-    "                                output.write(line)\n",
-    "                    else:\n",
-    "                        line = triple(e52FplaceHolder, cidocCoords.prefix + 'P3_has_note', '\\\"Data incompleta\\\"') + closeLine\n",
-    "                        output.write(line)\n",
-    "        output.write('\\n')\n",
-    "            # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break"
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 257
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_all_date_normalization_range.ipynb

@@ -1,257 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json\n",
-    "import re"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/DATE/CORRETTE/DATINI/CSV/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/DATE/CORRETTE/DATINI/RDF/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3 Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "# CIDOC Predicates\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
-    "tookPlaceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P7_took_place_at>', 'tk:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "hasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108_has_produced>', 'hp:')\n",
-    "wasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108i_was_produced_by>', 'wp:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "onGoingTCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P81_ongoing_throughout>', 'gt:')\n",
-    "nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')\n",
-    "schemaCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')\n",
-    "yearCoords = RDFcoords('<https://www.w3.org/TR/owl-time#year>', 'year:')\n",
-    "monthCoords = RDFcoords('<https://www.w3.org/TR/owl-time#month>', 'month:')\n",
-    "dayCoords = RDFcoords('<https://www.w3.org/TR/owl-time#day>', 'day:')\n",
-    "# CIDOC Objects\n",
-    "productionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E12_Production>', 'pr:', 'E12')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "sendLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL2_Send_Letter>', 'sl:', 'EL2')\n",
-    "receiveLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL3_Receive_Letter>', 'rl:', 'EL3')\n",
-    "xsdCoords = RDFcoords('<http://www.w3.org/2001/XMLSchema#>', 'xsd:')\n",
-    "cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')\n",
-    "\n",
-    "beginningCoords = RDFcoords('<https://www.w3.org/TR/owl-time#hasBeginning>', 'beg:')\n",
-    "endCoords = RDFcoords('<https://www.w3.org/TR/owl-time#hasEnd>', 'end:')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "- year: https://www.w3.org/TR/owl-time#year\n",
-    "- month: https://www.w3.org/TR/owl-time#month\n",
-    "- day: https://www.w3.org/TR/owl-time#day"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + tookPlaceCoords.prefix + ' ' + tookPlaceCoords.uri + closeLine)        \n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)   \n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasProducedCoords.prefix + ' ' + hasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasProducedCoords.prefix + ' ' + wasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + productionCoords.prefix + ' ' + productionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + creationCoords.prefix + ' ' + creationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + onGoingTCoords.prefix + ' ' + onGoingTCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + sendLetterCoords.prefix + ' ' + sendLetterCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + receiveLetterCoords.prefix + ' ' + receiveLetterCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + yearCoords.prefix + ' ' + yearCoords.uri + closeLine) \n",
-    "    output.write('@prefix ' + monthCoords.prefix + ' ' + monthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + dayCoords.prefix + ' ' + dayCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + xsdCoords.prefix + ' ' + xsdCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + beginningCoords.prefix + ' ' + beginningCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + endCoords.prefix + ' ' + endCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'subfonds_datini_date'\n",
-    "max_entries = 1000000000\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_range.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        # E52 Time Span \n",
-    "        year = row['data_periodo_normalizzata_inizio'][0:4]\n",
-    "        month = row['data_periodo_normalizzata_inizio'][4:6]\n",
-    "        day = row['data_periodo_normalizzata_inizio'][6:8]\n",
-    "        yearf = row['data_periodo_normalizzata_fine'][0:4]\n",
-    "        monthf = row['data_periodo_normalizzata_fine'][4:6]\n",
-    "        dayf = row['data_periodo_normalizzata_fine'][6:8]\n",
-    "        if(row['data_periodo_normalizzata_inizio'] != ''):\n",
-    "            if(row['data_periodo_normalizzata_inizio'] != 'Senza data'):\n",
-    "                e52placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + creationCoords.code + \"_\" + timeSpanCoords.code + \">\"\n",
-    "                line = triple(e52placeHolder, beginningCoords.prefix, '\\\"'+row['data_periodo_normalizzata_inizio']+'\\\"^^xsd:date') + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e52placeHolder, endCoords.prefix, '\\\"'+year+month+day+'\\\"^^xsd:date') + closeLine\n",
-    "                output.write(line)\n",
-    "        if(row['data_periodo_normalizzata_fine'] != ''):\n",
-    "            if(row['data_periodo_normalizzata_fine'] != 'Senza data'):\n",
-    "                e52FplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + creationCoords.code + \"_\" + timeSpanCoords.code + \"F>\"\n",
-    "                line = triple(e52FplaceHolder, endCoords.prefix, '\\\"'+row['data_periodo_normalizzata_fine']+'\\\"^^xsd:date') + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e52FplaceHolder, beginningCoords.prefix, '\\\"'+yearf+monthf+dayf+'\\\"^^xsd:date') + closeLine\n",
-    "                output.write(line)\n",
-    "        output.write('\\n')\n",
-    "            # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break"
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 272
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_date.ipynb

@@ -1,272 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json\n",
-    "import re"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json\n",
-    "import re"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3 Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "# CIDOC Predicates\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
-    "tookPlaceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P7_took_place_at>', 'tk:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "movedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P25i_moved_by>', 'mb:')\n",
-    "movedToCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P26_moved_to>', 'mt:')\n",
-    "movedFromCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P27_moved_from>', 'mf:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "hasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108_has_produced>', 'hp:')\n",
-    "wasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108i_was_produced_by>', 'wp:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "onGoingTCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P81_ongoing_throughout>', 'gt:')\n",
-    "nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')\n",
-    "schemaCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')\n",
-    "# CIDOC Objects\n",
-    "moveCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E9_Move>', 'mv:', 'E9')\n",
-    "productionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E12_Production>', 'pr:', 'E12')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "# New classes (subclasses of E7 Activity) - Exchange, Sending, Recive Letters\n",
-    "exchangeLettersCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL1_Exchange_Letters>', 'el:', 'EL1')\n",
-    "sendLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL2_Send_Letter>', 'sl:', 'EL2')\n",
-    "receiveLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL3_Receive_Letter>', 'rl:', 'EL3')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + tookPlaceCoords.prefix + ' ' + tookPlaceCoords.uri + closeLine)        \n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + movedByCoords.prefix + ' ' + movedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + movedToCoords.prefix + ' ' + movedToCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + movedFromCoords.prefix + ' ' + movedFromCoords.uri + closeLine)     \n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasProducedCoords.prefix + ' ' + hasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasProducedCoords.prefix + ' ' + wasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + moveCoords.prefix + ' ' + moveCoords.uri + closeLine) \n",
-    "    output.write('@prefix ' + productionCoords.prefix + ' ' + productionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + creationCoords.prefix + ' ' + creationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + exchangeLettersCoords.prefix + ' ' + exchangeLettersCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + sendLetterCoords.prefix + ' ' + sendLetterCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + receiveLetterCoords.prefix + ' ' + receiveLetterCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + onGoingTCoords.prefix + ' ' + onGoingTCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)\n",
-    "\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_date.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        \n",
-    "        #Evento send letter\n",
-    "        el2placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \">\"\n",
-    "        el3placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \">\"\n",
-    "\n",
-    "        # Data invio\n",
-    "        if(row['data_inizio'] != ''):\n",
-    "            e52PplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \"_\" + timeSpanCoords.code + \">\"\n",
-    "            line = triple(el2placeHolder, e52PplaceHolder, '\\\"'+ row['data_inizio'] + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "\n",
-    "        if(row['data_fine'] != ''):\n",
-    "            e52AplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \"_\" + timeSpanCoords.code + \">\"\n",
-    "            line = triple(el3placeHolder, e52AplaceHolder, '\\\"' + row['data_fine'] + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "\n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 230
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_eac.ipynb

@@ -1,230 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "noteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'nt:')\n",
-    "subClassOfCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#subClassOf>', 'so:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "groupCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E74_Group>', 'gp:', 'E74')\n",
-    "legalBodyCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E40_Legal_Body>', 'ly:', 'E40')\n",
-    "stringCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E62_String>', 'st:', 'E62')\n",
-    "actorCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E39_Actor>', 'ac:', 'E39')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + noteCoords.prefix + ' ' + noteCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + actorCoords.prefix + ' ' + actorCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + groupCoords.prefix + ' ' + groupCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + legalBodyCoords.prefix + ' ' + legalBodyCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + stringCoords.prefix + ' ' + stringCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + subClassOfCoords.prefix + ' ' + subClassOfCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'eacAuth03'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "            \n",
-    "\n",
-    "        # <URL dell'archivio di stato alla persona/gruppo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.cidoc-crm.org/cidoc-crm/E39_Actor> .\n",
-    "        actorplaceHolder = personAuthCoords.prefix + row[\"recordId\"].replace('IT-ASPO-AU00003-','')\n",
-    "        # <URL dell'archivio di stato alla persona/gruppo> <http://www.w3.org/2000/01/rdf-schema#label> \"Pippo Pandolfi\"\n",
-    "        nome_actor = row[\"nameEntry@normal\"].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')\n",
-    "        line = triple(actorplaceHolder, labelCoords.prefix, '\\\"' + nome_actor.strip() + '\\\"') +  closeLine\n",
-    "        output.write(line)\n",
-    "        # <URL dell'archivio di stato alla persona/gruppo> <has note> \"e62placeHolder:\"\n",
-    "        e62placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row[\"recordId\"] + '/' + stringCoords.code + \">\"\n",
-    "        line = triple(actorplaceHolder, noteCoords.prefix, e62placeHolder) +  closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e62placeHolder, hasTypeCoords.prefix, stringCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e62placeHolder, labelCoords.prefix, '\\\"Fonte: Archivio di Stato di Prato - Fondo Datini \\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        # If the entityType is 'person' the CIDOC class is E21 Person\n",
-    "        if(row['entityType'] == 'person'):\n",
-    "            line = triple(actorplaceHolder, hasTypeCoords.prefix, personCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "        # If the entityType is 'corporateBody' the CIDOC class is E74 Group\n",
-    "        if(row['entityType'] == 'corporateBody'):\n",
-    "            line = triple(actorplaceHolder, hasTypeCoords.prefix, groupCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "        # If the entityType is 'family' the CIDOC class is E74 Group\n",
-    "        if(row['entityType'] == 'family'):\n",
-    "            line = triple(actorplaceHolder, hasTypeCoords.prefix, groupCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 258
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_file.ipynb

@@ -1,258 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "# Utilities to handle character encodings\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "hasCurrentOwnerCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P52_has_current_owner>', 'ow:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "groupCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E74_Group>', 'gp:', 'E74')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentOwnerCoords.prefix + ' ' + hasCurrentOwnerCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + groupCoords.prefix + ' ' + groupCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'file'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    E74placeHolder = '<http://www.archiviodistato.prato.it>'\n",
-    "    line = triple(E74placeHolder, hasTypeCoords.prefix, groupCoords.prefix ) + closeLine\n",
-    "    output.write(line)        \n",
-    "    line = triple(E74placeHolder, labelCoords.prefix, \"\\\"Archivio di Stato di Prato\\\"\") + closeLine\n",
-    "    output.write(line)\n",
-    "    currentLocation = \"\\\"Fondo Datini, Archivio di Stato di Prato, Prato (PO)\\\"\"\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        # E22 Man Made Object\n",
-    "        line = triple(datiniCoords.prefix + row['id'], hasTypeCoords.prefix, manMadeObjectCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(datiniCoords.prefix + row['id'], labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        # E73 Information Object\n",
-    "        e73placeHolder =  \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\" \n",
-    "        line = triple(datiniCoords.prefix + row['id'], carriesCoords.prefix, e73placeHolder) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e73placeHolder, hasTypeCoords.prefix, informationObjectCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e73placeHolder, labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        # E35 Title\n",
-    "        if(row['titolo_aspo'] != 'None'):\n",
-    "            e35placeHolder1 = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + titleCoords.code + \">\"\n",
-    "            line = triple(e73placeHolder, identifiedByCoords.prefix, e35placeHolder1) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e35placeHolder1, hasTypeCoords.prefix, titleCoords.prefix) +  closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e35placeHolder1, labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "        # E42 Identifier\n",
-    "        if(row['segnatura_parent'] != ''):\n",
-    "            e42placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + identifierCoords.code + \">\"\n",
-    "            line = triple(datiniCoords.prefix + row['id'], identifiedByCoords.prefix, e42placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e42placeHolder, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e42placeHolder, labelCoords.prefix, '\\\"Fondo Datini, ' + row['segnatura_parent'] + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e42placeHolder, hasTypePCoords.prefix, '\\\"Segnatura precedente\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "        # Tipologia\n",
-    "        if(row['tipologia'] != ''):\n",
-    "            tipologie = []\n",
-    "            pipe = \"|\" \n",
-    "            if pipe in row['tipologia']:\n",
-    "                tipologie = row['tipologia'].split('|')\n",
-    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + tipologie.replace(\" \", \"\") + \">\"\n",
-    "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "            else:\n",
-    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['tipologia'].replace(\" \", \"\") + \">\"\n",
-    "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "                output.write(line)    \n",
-    "        # E42 Identifier\n",
-    "        e42placeHolderID = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + identifierCoords.code + \"ID>\"\n",
-    "        line = triple(datiniCoords.prefix + row['id'], identifiedByCoords.prefix, e42placeHolderID) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e42placeHolderID, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e42placeHolderID, labelCoords.prefix, '\\\"' + row['id'].replace(\" \", \"\") + '\\\"') + closeLine            \n",
-    "        output.write(line)\n",
-    "        # E22 - P52 - E74\n",
-    "        line = triple(datiniCoords.prefix + row['id'], hasCurrentOwnerCoords.prefix, E74placeHolder) + closeLine\n",
-    "        output.write(line)\n",
-    "        # E22 - P54 - E53\n",
-    "        line = triple(datiniCoords.prefix + row['id'], hasCurrentPermanentLocationCoords.prefix, currentLocation) + closeLine\n",
-    "        output.write(line)\n",
-    "        output.write('\\n')\n",
-    "      \n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3.10.4 64-bit",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  },
-  "vscode": {
-   "interpreter": {
-    "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 229
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_fonds.ipynb

@@ -1,229 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "# Utilities to handle character encodings\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "hasCurrentOwnerCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P52_has_current_owner>', 'ow:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "groupCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E74_Group>', 'gp:', 'E74')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentOwnerCoords.prefix + ' ' + hasCurrentOwnerCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + groupCoords.prefix + ' ' + groupCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'fonds'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    E74placeHolder = '<http://www.archiviodistato.prato.it>'\n",
-    "    line = triple(E74placeHolder, hasTypeCoords.prefix, groupCoords.prefix ) + closeLine\n",
-    "    output.write(line)        \n",
-    "    line = triple(E74placeHolder, labelCoords.prefix, \"\\\"Archivio di Stato di Prato\\\"\") + closeLine\n",
-    "    output.write(line)\n",
-    "    currentLocation = \"\\\"Fondo Datini, Archivio di Stato di Prato, Prato (PO)\\\"\"\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        # E22 Man Made Object\n",
-    "        line = triple(datiniCoords.prefix + row['id'], hasTypeCoords.prefix, manMadeObjectCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(datiniCoords.prefix + row['id'], labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        # E73 Information Object\n",
-    "        e73placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
-    "        line = triple(datiniCoords.prefix + row['id'], carriesCoords.prefix, e73placeHolder) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e73placeHolder, hasTypeCoords.prefix, informationObjectCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e73placeHolder, labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        # E35 Title\n",
-    "        if(row['titolo_aspo'] != 'None'):\n",
-    "            e35placeHolder1 = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + titleCoords.code + \">\"\n",
-    "            line = triple(e73placeHolder, identifiedByCoords.prefix, e35placeHolder1) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e35placeHolder1, hasTypeCoords.prefix, titleCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e35placeHolder1, labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "        # E42 Identifier\n",
-    "        e42placeHolderID = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + identifierCoords.code + \"ID>\"\n",
-    "        line = triple(datiniCoords.prefix + row['id'], identifiedByCoords.prefix, e42placeHolderID) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e42placeHolderID, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e42placeHolderID, labelCoords.prefix, '\\\"' + row['id'].replace(\" \", \"\") + '\\\"') + closeLine            \n",
-    "        output.write(line)\n",
-    "        # E22 - P52 - E74\n",
-    "        line = triple(datiniCoords.prefix + row['id'], hasCurrentOwnerCoords.prefix, E74placeHolder) + closeLine\n",
-    "        output.write(line)\n",
-    "        # E22 - P54 - E53\n",
-    "        line = triple(datiniCoords.prefix + row['id'], hasCurrentPermanentLocationCoords.prefix, currentLocation) + closeLine\n",
-    "        output.write(line)\n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.7.3 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 207
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item.ipynb

@@ -1,207 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        # E22 Man Made Object\n",
-    "        line = triple(datiniCoords.prefix + row['id'], hasTypeCoords.prefix, manMadeObjectCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(datiniCoords.prefix + row['id'], labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        # E73 Information Object\n",
-    "        e73placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
-    "        line = triple(datiniCoords.prefix + row['id'], carriesCoords.prefix, e73placeHolder) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e73placeHolder, hasTypeCoords.prefix, informationObjectCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e73placeHolder, labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "        output.write(line) \n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 331
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_date_normalization.ipynb

@@ -1,331 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json\n",
-    "import re"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/DATE/CORRETTE/DATINI/CSV/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/DATE/CORRETTE/DATINI/RDF/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3 Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "# CIDOC Predicates\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
-    "tookPlaceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P7_took_place_at>', 'tk:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "hasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108_has_produced>', 'hp:')\n",
-    "wasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108i_was_produced_by>', 'wp:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "onGoingTCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P81_ongoing_throughout>', 'gt:')\n",
-    "nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')\n",
-    "schemaCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')\n",
-    "yearCoords = RDFcoords('<https://www.w3.org/TR/owl-time#year>', 'year:')\n",
-    "monthCoords = RDFcoords('<https://www.w3.org/TR/owl-time#month>', 'month:')\n",
-    "dayCoords = RDFcoords('<https://www.w3.org/TR/owl-time#day>', 'day:')\n",
-    "# CIDOC Objects\n",
-    "productionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E12_Production>', 'pr:', 'E12')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "sendLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL2_Send_Letter>', 'sl:', 'EL2')\n",
-    "receiveLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL3_Receive_Letter>', 'rl:', 'EL3')\n",
-    "xsdCoords = RDFcoords('<http://www.w3.org/2001/XMLSchema#>', 'xsd:')\n",
-    "cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "- year: https://www.w3.org/TR/owl-time#year\n",
-    "- month: https://www.w3.org/TR/owl-time#month\n",
-    "- day: https://www.w3.org/TR/owl-time#day"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + tookPlaceCoords.prefix + ' ' + tookPlaceCoords.uri + closeLine)        \n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)   \n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasProducedCoords.prefix + ' ' + hasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasProducedCoords.prefix + ' ' + wasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + productionCoords.prefix + ' ' + productionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + creationCoords.prefix + ' ' + creationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + onGoingTCoords.prefix + ' ' + onGoingTCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + sendLetterCoords.prefix + ' ' + sendLetterCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + receiveLetterCoords.prefix + ' ' + receiveLetterCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + yearCoords.prefix + ' ' + yearCoords.uri + closeLine) \n",
-    "    output.write('@prefix ' + monthCoords.prefix + ' ' + monthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + dayCoords.prefix + ' ' + dayCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + xsdCoords.prefix + ' ' + xsdCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item_datini_date'\n",
-    "max_entries = 1000000000\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        # E65 Creation - E52 Time Span\n",
-    "        if(row['tipologia'] != 'carteggio'):\n",
-    "            if(row['data_periodo_normalizzata_inizio'] != ''):\n",
-    "                e52placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + creationCoords.code + \"_\" + timeSpanCoords.code + \">\"\n",
-    "                if(row['data_periodo_normalizzata_inizio'] != 'Senza data'):           \n",
-    "                    year = row['data_periodo_normalizzata_inizio'][0:4]\n",
-    "                    month = row['data_periodo_normalizzata_inizio'][4:6]\n",
-    "                    day = row['data_periodo_normalizzata_inizio'][6:8]\n",
-    "                    if(year != '****' and year != '9998'):\n",
-    "                        line = triple(e52placeHolder, yearCoords.prefix, '\\\"'+year+'\\\"^^xsd:integer') + closeLine\n",
-    "                        output.write(line)\n",
-    "                        if (month != '**' and month != '99'):\n",
-    "                            line = triple(e52placeHolder, monthCoords.prefix, '\\\"'+month+ '\\\"^^xsd:integer') + closeLine\n",
-    "                            output.write(line)\n",
-    "                        else:\n",
-    "                            line = triple(e52placeHolder, monthCoords.prefix, '\\\"01\\\"^^xsd:integer') + closeLine\n",
-    "                            output.write(line)\n",
-    "                        if (day != '**' and day != '98'):\n",
-    "                            line = triple(e52placeHolder, dayCoords.prefix, '\\\"'+day+ '\\\"^^xsd:integer') + closeLine\n",
-    "                            output.write(line)\n",
-    "                        else:\n",
-    "                            line = triple(e52placeHolder, dayCoords.prefix, '\\\"01\\\"^^xsd:integer') + closeLine\n",
-    "                            output.write(line)\n",
-    "                    else:\n",
-    "                        line = triple(e52placeHolder, cidocCoords.prefix + 'P3_has_note', '\\\"Data incompleta\\\"') + closeLine\n",
-    "                        output.write(line)\n",
-    "            if(row['data_periodo_normalizzata_fine'] != ''):\n",
-    "                e52FplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + creationCoords.code + \"_\" + timeSpanCoords.code + \"F>\"\n",
-    "                if(row['data_periodo_normalizzata_inizio'] != 'Senza data'):\n",
-    "                    year = row['data_periodo_normalizzata_fine'][0:4]\n",
-    "                    month = row['data_periodo_normalizzata_fine'][4:6]\n",
-    "                    day = row['data_periodo_normalizzata_fine'][6:8]\n",
-    "                    if(year != '****' and year != '9998'):\n",
-    "                        line = triple(e52FplaceHolder, yearCoords.prefix, '\\\"'+year+'\\\"^^xsd:integer') + closeLine\n",
-    "                        output.write(line)\n",
-    "                        if (month != '**' and month != '99'):\n",
-    "                            line = triple(e52FplaceHolder, monthCoords.prefix, '\\\"'+month+ '\\\"^^xsd:integer') + closeLine\n",
-    "                            output.write(line)\n",
-    "                        else:\n",
-    "                            line = triple(e52placeHolder, monthCoords.prefix, '\\\"12\\\"^^xsd:integer') + closeLine\n",
-    "                            output.write(line)\n",
-    "                        if (day != '**' and day != '98'):\n",
-    "                            line = triple(e52FplaceHolder, dayCoords.prefix, '\\\"'+day+ '\\\"^^xsd:integer') + closeLine\n",
-    "                            output.write(line)\n",
-    "                        else:\n",
-    "                            line = triple(e52placeHolder, dayCoords.prefix, '\\\"31\\\"^^xsd:integer') + closeLine\n",
-    "                            output.write(line)\n",
-    "                    else:\n",
-    "                        line = triple(e52FplaceHolder, cidocCoords.prefix + 'P3_has_note', '\\\"Data incompleta\\\"') + closeLine\n",
-    "                        output.write(line)\n",
-    "        else:\n",
-    "            if(row['data_periodo_normalizzata_inizio'] != ''):\n",
-    "                e52PplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \"_\" + timeSpanCoords.code + \">\"\n",
-    "                if(row['data_periodo_normalizzata_inizio'] != 'Senza data'):           \n",
-    "                    year = row['data_periodo_normalizzata_inizio'][0:4]\n",
-    "                    month = row['data_periodo_normalizzata_inizio'][4:6]\n",
-    "                    day = row['data_periodo_normalizzata_inizio'][6:8]\n",
-    "                    if(year != '****' and year != '9998'):\n",
-    "                        line = triple(e52PplaceHolder, yearCoords.prefix, '\\\"'+year+'\\\"^^xsd:integer') + closeLine\n",
-    "                        output.write(line)\n",
-    "                        if (month != '**' and month != '99'):\n",
-    "                            line = triple(e52PplaceHolder, monthCoords.prefix, '\\\"'+month+ '\\\"^^xsd:integer') + closeLine\n",
-    "                            output.write(line)\n",
-    "                        else:\n",
-    "                            line = triple(e52placeHolder, monthCoords.prefix, '\\\"01\\\"^^xsd:integer') + closeLine\n",
-    "                            output.write(line)\n",
-    "                        if (day != '**' and day != '98'):\n",
-    "                            line = triple(e52PplaceHolder, dayCoords.prefix, '\\\"'+day+ '\\\"^^xsd:integer') + closeLine\n",
-    "                            output.write(line)\n",
-    "                        else:\n",
-    "                            line = triple(e52placeHolder, dayCoords.prefix, '\\\"01\\\"^^xsd:integer') + closeLine\n",
-    "                            output.write(line)\n",
-    "                    else:\n",
-    "                        line = triple(e52PplaceHolder, cidocCoords.prefix + 'P3_has_note', '\\\"Data incompleta\\\"') + closeLine\n",
-    "                        output.write(line)\n",
-    "            if(row['data_periodo_normalizzata_fine'] != ''):\n",
-    "                e52AplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \"_\" + timeSpanCoords.code + \">\"\n",
-    "                if(row['data_periodo_normalizzata_inizio'] != 'Senza data'):\n",
-    "                    year = row['data_periodo_normalizzata_fine'][0:4]\n",
-    "                    month = row['data_periodo_normalizzata_fine'][4:6]\n",
-    "                    day = row['data_periodo_normalizzata_fine'][6:8]\n",
-    "                    if(year != '****' and year != '9998'):\n",
-    "                        line = triple(e52AplaceHolder, yearCoords.prefix, '\\\"'+year+'\\\"^^xsd:integer') + closeLine\n",
-    "                        output.write(line)\n",
-    "                        if (month != '**' and month != '99'):\n",
-    "                            line = triple(e52AplaceHolder, monthCoords.prefix, '\\\"'+month+ '\\\"^^xsd:integer') + closeLine\n",
-    "                            output.write(line)\n",
-    "                        else:\n",
-    "                            line = triple(e52placeHolder, monthCoords.prefix, '\\\"12\\\"^^xsd:integer') + closeLine\n",
-    "                            output.write(line)\n",
-    "                        if (day != '**' and day != '98'):\n",
-    "                            line = triple(e52AplaceHolder, dayCoords.prefix, '\\\"'+day+ '\\\"^^xsd:integer') + closeLine\n",
-    "                            output.write(line)\n",
-    "                        else:\n",
-    "                            line = triple(e52placeHolder, dayCoords.prefix, '\\\"31\\\"^^xsd:integer') + closeLine\n",
-    "                            output.write(line)\n",
-    "                    else:\n",
-    "                        line = triple(e52AplaceHolder, cidocCoords.prefix + 'P3_has_note', '\\\"Data incompleta\\\"') + closeLine\n",
-    "                        output.write(line)\n",
-    "\n",
-    "        output.write('\\n')\n",
-    "            # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break"
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 274
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_date_normalization_range.ipynb

@@ -1,274 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json\n",
-    "import re"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/DATE/CORRETTE/DATINI/CSV/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/DATE/CORRETTE/DATINI/RDF/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3 Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "# CIDOC Predicates\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
-    "tookPlaceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P7_took_place_at>', 'tk:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "hasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108_has_produced>', 'hp:')\n",
-    "wasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108i_was_produced_by>', 'wp:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "onGoingTCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P81_ongoing_throughout>', 'gt:')\n",
-    "nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')\n",
-    "schemaCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')\n",
-    "yearCoords = RDFcoords('<https://www.w3.org/TR/owl-time#year>', 'year:')\n",
-    "monthCoords = RDFcoords('<https://www.w3.org/TR/owl-time#month>', 'month:')\n",
-    "dayCoords = RDFcoords('<https://www.w3.org/TR/owl-time#day>', 'day:')\n",
-    "# CIDOC Objects\n",
-    "productionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E12_Production>', 'pr:', 'E12')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "sendLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL2_Send_Letter>', 'sl:', 'EL2')\n",
-    "receiveLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL3_Receive_Letter>', 'rl:', 'EL3')\n",
-    "xsdCoords = RDFcoords('<http://www.w3.org/2001/XMLSchema#>', 'xsd:')\n",
-    "cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')\n",
-    "beginningCoords = RDFcoords('<https://www.w3.org/TR/owl-time#hasBeginning>', 'beg:')\n",
-    "endCoords = RDFcoords('<https://www.w3.org/TR/owl-time#hasEnd>', 'end:')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "- year: https://www.w3.org/TR/owl-time#year\n",
-    "- month: https://www.w3.org/TR/owl-time#month\n",
-    "- day: https://www.w3.org/TR/owl-time#day"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + tookPlaceCoords.prefix + ' ' + tookPlaceCoords.uri + closeLine)        \n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)   \n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasProducedCoords.prefix + ' ' + hasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasProducedCoords.prefix + ' ' + wasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + productionCoords.prefix + ' ' + productionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + creationCoords.prefix + ' ' + creationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + onGoingTCoords.prefix + ' ' + onGoingTCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + sendLetterCoords.prefix + ' ' + sendLetterCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + receiveLetterCoords.prefix + ' ' + receiveLetterCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + yearCoords.prefix + ' ' + yearCoords.uri + closeLine) \n",
-    "    output.write('@prefix ' + monthCoords.prefix + ' ' + monthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + dayCoords.prefix + ' ' + dayCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + xsdCoords.prefix + ' ' + xsdCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)\n",
-    "\n",
-    "    output.write('@prefix ' + beginningCoords.prefix + ' ' + beginningCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + endCoords.prefix + ' ' + endCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item_datini_date'\n",
-    "max_entries = 1000000000\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_range.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        # E65 Creation - E52 Time Span\n",
-    "        year = row['data_periodo_normalizzata_inizio'][0:4]\n",
-    "        month = row['data_periodo_normalizzata_inizio'][4:6]\n",
-    "        day = row['data_periodo_normalizzata_inizio'][6:8]\n",
-    "        yearf = row['data_periodo_normalizzata_fine'][0:4]\n",
-    "        monthf = row['data_periodo_normalizzata_fine'][4:6]\n",
-    "        dayf = row['data_periodo_normalizzata_fine'][6:8]\n",
-    "        if(row['tipologia'] != 'carteggio'):\n",
-    "            if(row['data_periodo_normalizzata_inizio'] != ''):\n",
-    "                if(row['data_periodo_normalizzata_inizio'] != 'Senza data'):\n",
-    "                    e52placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + creationCoords.code + \"_\" + timeSpanCoords.code + \">\"\n",
-    "                    line = triple(e52placeHolder, beginningCoords.prefix, '\\\"'+row['data_periodo_normalizzata_inizio']+'\\\"^^xsd:date') + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e52placeHolder, endCoords.prefix, '\\\"'+year+month+day+'\\\"^^xsd:date') + closeLine\n",
-    "                    output.write(line)\n",
-    "            if(row['data_periodo_normalizzata_fine'] != ''):\n",
-    "                if(row['data_periodo_normalizzata_fine'] != 'Senza data'):\n",
-    "                    e52FplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + creationCoords.code + \"_\" + timeSpanCoords.code + \"F>\"\n",
-    "                    line = triple(e52FplaceHolder, endCoords.prefix, '\\\"'+row['data_periodo_normalizzata_fine']+'\\\"^^xsd:date') + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e52FplaceHolder, beginningCoords.prefix, '\\\"'+yearf+monthf+dayf+'\\\"^^xsd:date') + closeLine\n",
-    "                    output.write(line)\n",
-    "        else:\n",
-    "            if(row['data_periodo_normalizzata_inizio'] != ''):\n",
-    "                if(row['data_periodo_normalizzata_inizio'] != 'Senza data'):           \n",
-    "                    e52PplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \"_\" + timeSpanCoords.code + \">\"\n",
-    "                    line = triple(e52PplaceHolder, beginningCoords.prefix, '\\\"'+row['data_periodo_normalizzata_inizio']+'\\\"^^xsd:date') + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e52PplaceHolder, endCoords.prefix, '\\\"'+year+month+day+'\\\"^^xsd:date') + closeLine\n",
-    "                    output.write(line)\n",
-    "            if(row['data_periodo_normalizzata_fine'] != ''):\n",
-    "                if(row['data_periodo_normalizzata_fine'] != 'Senza data'):\n",
-    "                    e52AplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \"_\" + timeSpanCoords.code + \">\"\n",
-    "                    line = triple(e52AplaceHolder, endCoords.prefix, '\\\"'+row['data_periodo_normalizzata_fine']+'\\\"^^xsd:date') + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e52AplaceHolder, beginningCoords.prefix, '\\\"'+yearf+monthf+dayf+'\\\"^^xsd:date') + closeLine\n",
-    "                    output.write(line)\n",
-    "\n",
-    "        output.write('\\n')\n",
-    "            # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break"
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 243
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_event_creation.ipynb

@@ -1,243 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json\n",
-    "import re"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3 Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "# CIDOC Predicates\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
-    "tookPlaceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P7_took_place_at>', 'tk:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "hasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108_has_produced>', 'hp:')\n",
-    "wasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108i_was_produced_by>', 'wp:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "# CIDOC Objects\n",
-    "productionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E12_Production>', 'pr:', 'E12')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + tookPlaceCoords.prefix + ' ' + tookPlaceCoords.uri + closeLine)        \n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)   \n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasProducedCoords.prefix + ' ' + hasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasProducedCoords.prefix + ' ' + wasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + productionCoords.prefix + ' ' + productionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + creationCoords.prefix + ' ' + creationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_event_creation.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue        \n",
-    "        # E65 Creation\n",
-    "        if(row['tipologia'] != 'carteggio'):\n",
-    "            e22placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \">\"\n",
-    "            e65placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + creationCoords.code + \">\"\n",
-    "            e65FplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + creationCoords.code + \"F>\"\n",
-    "            line = triple(e22placeHolder, wasBroughtCoords.prefix, e65placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e65placeHolder, hasTypeCoords.prefix, creationCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e65placeHolder, labelCoords.prefix, '\\\"Inizio creazione di ' + row['titolo_aspo'] + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e65placeHolder, hasTypePCoords.prefix, '\\\"Inizio\\\"^^xsd:string') + closeLine\n",
-    "            output.write(line)\n",
-    "            e65FplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + creationCoords.code + \"F>\"\n",
-    "            line = triple(e22placeHolder, wasBroughtCoords.prefix, e65FplaceHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e65FplaceHolder, hasTypeCoords.prefix, creationCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e65FplaceHolder, labelCoords.prefix, '\\\"Fine creazione di ' + row['titolo_aspo'] + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e65FplaceHolder, hasTypePCoords.prefix, '\\\"Fine\\\"^^xsd:string') + closeLine\n",
-    "            output.write(line)\n",
-    "            # E65 Creation - E53 Place\n",
-    "            if(row['luogo_luogo'] != ''):\n",
-    "                auth_luogo = row['luogo_luogo']\n",
-    "                authcode_luogo = re.sub('{luogo: .* ', '', auth_luogo)\n",
-    "                authcodeprefix_luogo = authcode_luogo.replace('\"', '').replace('}', '').strip()\n",
-    "                e53placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + placeCoords.code + \">\"\n",
-    "                line = triple(e65placeHolder, tookPlaceCoords.prefix, e53placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e65FplaceHolder, tookPlaceCoords.prefix, e53placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e53placeHolder, hasTypeCoords.prefix, placeCoords.prefix) + closeLine\n",
-    "                output.write(line)\n",
-    "                geogname = row['luogo_luogo'].replace('{luogo:', '').replace(',', '').replace('authID:', '')\n",
-    "                place = re.sub('}', '', geogname).strip()\n",
-    "                line = triple(e53placeHolder, labelCoords.prefix, '\\\"' + place + '\\\"') + closeLine\n",
-    "                output.write(line)                                          \n",
-    "        output.write('\\n')\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 281
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_event_creation_actor.ipynb

@@ -1,281 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json\n",
-    "import re"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3 Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "# CIDOC Predicates\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
-    "tookPlaceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P7_took_place_at>', 'tk:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "hasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108_has_produced>', 'hp:')\n",
-    "wasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108i_was_produced_by>', 'wp:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "# CIDOC Objects\n",
-    "productionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E12_Production>', 'pr:', 'E12')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "pcarriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/PC14_carried_out_by>', 'cy:', 'PC14')\n",
-    "roleOfCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14.1_in_the_role_of>', 'ro:')\n",
-    "hasDomainCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P01_has_domain>', 'hd:')\n",
-    "hasRangeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P02_has_range>', 'hr:')\n",
-    "actorCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E39_Actor>', 'ac:', 'E39')\n",
-    "hadParticipantCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P11_had_participant>', 'pt:')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + tookPlaceCoords.prefix + ' ' + tookPlaceCoords.uri + closeLine)        \n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)   \n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasProducedCoords.prefix + ' ' + hasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasProducedCoords.prefix + ' ' + wasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + productionCoords.prefix + ' ' + productionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + creationCoords.prefix + ' ' + creationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasDomainCoords.prefix + ' ' + hasDomainCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + pcarriedByCoords.prefix + ' ' + pcarriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasRangeCoords.prefix + ' ' + hasRangeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + actorCoords.prefix + ' ' + actorCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hadParticipantCoords.prefix + ' ' + hadParticipantCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' +  roleOfCoords.prefix + ' ' +  roleOfCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_event_creation_actor.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        # E65 Creation - E39 Actor\n",
-    "        if(row['tipologia'] != 'carteggio'):\n",
-    "            e65placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + creationCoords.code + \">\"\n",
-    "            e65FplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + creationCoords.code + \"F>\"\n",
-    "            e22placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \">\"        \n",
-    "            PC14placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + pcarriedByCoords.code + \">\"   \n",
-    "            E55placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + pcarriedByCoords.code + \"_\" + typeCoords.code + \">\"       \n",
-    "            P11placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/P11\" + \">\"   \n",
-    "            P11E55placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/P11\" + \"_\" + typeCoords.code + \">\"\n",
-    "            if(row['persona_tenutario'] != '' and row['tipologia'] != 'carteggio'):\n",
-    "                auth = row['persona_tenutario']\n",
-    "                authcode = re.sub('{\"nome\": .* ', '', auth)\n",
-    "                authcodeprefix= authcode.replace('IT-ASPO-AU00003-', '').replace('\"', '').replace('}', '').strip()\n",
-    "                e39placeHolder = personAuthCoords.prefix + authcodeprefix\n",
-    "                line = triple(e65placeHolder, hasDomainCoords.prefix, PC14placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e65FplaceHolder, hasDomainCoords.prefix, PC14placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(PC14placeHolder, labelCoords.prefix, \"\\\"Responsabilità della creazione\\\"\") + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(PC14placeHolder, hasTypeCoords.prefix, pcarriedByCoords.prefix) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(PC14placeHolder, hasRangeCoords.prefix, e39placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(PC14placeHolder, roleOfCoords.prefix, E55placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(E55placeHolder, hasTypeCoords.prefix, typeCoords.prefix) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(E55placeHolder, labelCoords.prefix, \"\\\"Responsabile del documento\\\"\" ) + closeLine\n",
-    "                output.write(line)\n",
-    "            if(row['persona_mano'] != '' and row['tipologia'] != 'carteggio'):\n",
-    "                auth = row['persona_mano']\n",
-    "                authcode = re.sub('{\"nome\": .* ', '', auth)\n",
-    "                authcodeprefix= authcode.replace('IT-ASPO-AU00003-', '').replace('\"', '').replace('}', '').strip()\n",
-    "                e39placeHolder = personAuthCoords.prefix + authcodeprefix\n",
-    "                line = triple(e65placeHolder, hasDomainCoords.prefix, PC14placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e65FplaceHolder, hasDomainCoords.prefix, PC14placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(PC14placeHolder, hasTypeCoords.prefix, pcarriedByCoords.prefix) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(PC14placeHolder, hasRangeCoords.prefix, e39placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(PC14placeHolder, roleOfCoords.prefix, E55placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(E55placeHolder, hasTypeCoords.prefix, typeCoords.prefix) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(E55placeHolder, labelCoords.prefix, \"\\\"Mano\\\"\" ) + closeLine\n",
-    "                output.write(line)   \n",
-    "            if(row['compagnia'] != '' and row['tipologia'] != 'carteggio'):\n",
-    "                auth = row['compagnia']\n",
-    "                authcode = re.sub('{nome: .* ', '', auth)\n",
-    "                authcodeprefix= authcode.replace('IT-ASPO-AU00003-', '').replace('\"', '').replace('}', '').strip()\n",
-    "                e39placeHolder = personAuthCoords.prefix + authcodeprefix\n",
-    "                line = triple(e65placeHolder, hadParticipantCoords.prefix, P11placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e65FplaceHolder, hadParticipantCoords.prefix, P11placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(P11placeHolder, hasRangeCoords.prefix, e39placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(P11placeHolder, hasTypeCoords.prefix, typeCoords.prefix) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(P11placeHolder, labelCoords.prefix, \"\\\"Compagnia\\\"\" ) + closeLine\n",
-    "                output.write(line)\n",
-    "            output.write('\\n')\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 249
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_event_creation_date.ipynb

@@ -1,249 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json\n",
-    "import re"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3 Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "# CIDOC Predicates\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
-    "tookPlaceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P7_took_place_at>', 'tk:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "hasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108_has_produced>', 'hp:')\n",
-    "wasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108i_was_produced_by>', 'wp:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "onGoingTCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P81_ongoing_throughout>', 'gt:')\n",
-    "nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')\n",
-    "schemaCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')\n",
-    "# CIDOC Objects\n",
-    "productionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E12_Production>', 'pr:', 'E12')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + tookPlaceCoords.prefix + ' ' + tookPlaceCoords.uri + closeLine)        \n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)   \n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasProducedCoords.prefix + ' ' + hasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasProducedCoords.prefix + ' ' + wasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + productionCoords.prefix + ' ' + productionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + creationCoords.prefix + ' ' + creationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + onGoingTCoords.prefix + ' ' + onGoingTCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_event_creation_date.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        # E65 Creation - E52 Time Span\n",
-    "        if(row['tipologia'] != 'carteggio'):\n",
-    "            e65placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + creationCoords.code + \">\"\n",
-    "            e65FplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + creationCoords.code + \"F>\"\n",
-    "            e22placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \">\"        \n",
-    "            if(row['data_inizio'] != ''):            \n",
-    "                e52placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + creationCoords.code + \"_\" + timeSpanCoords.code + \">\"\n",
-    "                line = triple(e65placeHolder, hasTimeSpanCoords.prefix, e52placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e52placeHolder, hasTypeCoords.prefix, timeSpanCoords.prefix) + closeLine\n",
-    "                output.write(line)\n",
-    "                #e55placeHolder = \"<\" + row['URL'] + \"_\" + timeSpanCoords.code + \"_ETI_E55>\"\n",
-    "                #line = triple(e52ETIplaceHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "                #output.write(line)\n",
-    "                #line = triple(e55placeHolder, hasTypeCoords.prefix, typeCoords.prefix) + closeLine\n",
-    "                #output.write(line)\n",
-    "                #line = triple(e52ETIplaceHolder, onGoingTCoords.prefix, '\\\"'+row['ETI'] +'\\\"^^rdfs:Literal') + closeLine\n",
-    "                #output.write(line)\n",
-    "                line = triple(e52placeHolder, labelCoords.prefix, '\\\"'+row['data_inizio'] +'\\\"') + closeLine\n",
-    "                output.write(line)\n",
-    "                #line = triple(e55placeHolder, labelCoords.prefix, '\\\"Estremo temporale inferiore\\\"') + closeLine\n",
-    "                #output.write(line)\n",
-    "            if(row['data_fine'] != ''):\n",
-    "                e52FplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + creationCoords.code + \"_\" + timeSpanCoords.code + \"F>\"\n",
-    "                line = triple(e65FplaceHolder, hasTimeSpanCoords.prefix, e52FplaceHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e52FplaceHolder, hasTypeCoords.prefix, timeSpanCoords.prefix) + closeLine\n",
-    "                output.write(line)\n",
-    "                #e55placeHolder = \"<\" + row['URL'] + \"_\" + timeSpanCoords.code + \"_ETS_E55>\"\n",
-    "                #line = triple(e52ETSplaceHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "                #output.write(line)\n",
-    "                #line = triple(e55placeHolder, hasTypeCoords.prefix, typeCoords.prefix) + closeLine\n",
-    "                #output.write(line)\n",
-    "                #line = triple(e52ETSplaceHolder, onGoingTCoords.prefix, '\\\"'+row['ETS'] +'\\\"^^rdfs:Literal') + closeLine\n",
-    "                #output.write(line)\n",
-    "                line = triple(e52FplaceHolder, labelCoords.prefix, '\\\"'+row['data_fine'] +'\\\"') + closeLine\n",
-    "                output.write(line)\n",
-    "                #line = triple(e55placeHolder, labelCoords.prefix, '\\\"Estremo temporale superiore\\\"') + closeLine\n",
-    "                #output.write(line)\n",
-    "            output.write('\\n')\n",
-    "            # Limit number of entries processed (if desired)\n",
-    "            if(ii>max_entries):\n",
-    "                break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 253
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_event_exchange.ipynb

@@ -1,253 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json\n",
-    "import re"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3 Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "subClassOfCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#subClassOf>', 'so:')\n",
-    "# CIDOC Predicates\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
-    "tookPlaceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P7_took_place_at>', 'tk:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "movedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P25i_moved_by>', 'mb:')\n",
-    "movedToCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P26_moved_to>', 'mt:')\n",
-    "movedFromCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P27_moved_from>', 'mf:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "hasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108_has_produced>', 'hp:')\n",
-    "wasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108i_was_produced_by>', 'wp:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "# CIDOC Objects\n",
-    "moveCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E9_Move>', 'mv:', 'E9')\n",
-    "productionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E12_Production>', 'pr:', 'E12')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "pcarriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/PC14_carried_out_by>', 'cy:')\n",
-    "# New classes (subclasses of E7 Activity) - Exchange, Sending, Recive Letters\n",
-    "exchangeLettersCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL1_Exchange_Letters>', 'el:', 'EL1')\n",
-    "sendLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL2_Send_Letter>', 'sl:', 'EL2')\n",
-    "receiveLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL3_Receive_Letter>', 'rl:', 'EL3')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + tookPlaceCoords.prefix + ' ' + tookPlaceCoords.uri + closeLine)        \n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + movedByCoords.prefix + ' ' + movedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + movedToCoords.prefix + ' ' + movedToCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + movedFromCoords.prefix + ' ' + movedFromCoords.uri + closeLine)     \n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasProducedCoords.prefix + ' ' + hasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasProducedCoords.prefix + ' ' + wasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + moveCoords.prefix + ' ' + moveCoords.uri + closeLine) \n",
-    "    output.write('@prefix ' + productionCoords.prefix + ' ' + productionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + creationCoords.prefix + ' ' + creationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + exchangeLettersCoords.prefix + ' ' + exchangeLettersCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + sendLetterCoords.prefix + ' ' + sendLetterCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + receiveLetterCoords.prefix + ' ' + receiveLetterCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + subClassOfCoords.prefix + ' ' + subClassOfCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_event_exchange.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue   \n",
-    "        # EL1 Exchange Letters\n",
-    "        el1placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + exchangeLettersCoords.code + \">\"\n",
-    "        el2placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \">\"\n",
-    "        el3placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \">\"        \n",
-    "        if(row['luogo_partenza'] != '' and row['luogo_arrivo'] != ''):\n",
-    "            line = triple(exchangeLettersCoords.prefix, labelCoords.prefix, '\\\"Exchange of letters\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(sendLetterCoords.prefix, labelCoords.prefix, '\\\"Send letter\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(receiveLetterCoords.prefix, labelCoords.prefix, '\\\"Receive letter\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(el2placeHolder, subClassOfCoords.prefix, el1placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(el3placeHolder, subClassOfCoords.prefix, el1placeHolder) + closeLine\n",
-    "            output.write(line)    \n",
-    "            line = triple(datiniCoords.prefix + row['id'], movedByCoords.prefix, el1placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(el1placeHolder, labelCoords.prefix, '\\\"' + row['tipologia'] + ': ' + row['titolo_aspo'] + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(el1placeHolder, hasTypeCoords.prefix, exchangeLettersCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(el2placeHolder, labelCoords.prefix, '\\\"Invio\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(el2placeHolder, hasTypeCoords.prefix, sendLetterCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(el3placeHolder, labelCoords.prefix, '\\\"Ricezione\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(el3placeHolder, hasTypeCoords.prefix, receiveLetterCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            output.write('\\n')\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 266
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_event_exchange_actor.ipynb

@@ -1,266 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json\n",
-    "import re"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3 Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "subClassOfCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#subClassOf>', 'so:')\n",
-    "# CIDOC Predicates\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
-    "tookPlaceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P7_took_place_at>', 'tk:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "movedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P25i_moved_by>', 'mb:')\n",
-    "movedToCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P26_moved_to>', 'mt:')\n",
-    "movedFromCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P27_moved_from>', 'mf:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "hasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108_has_produced>', 'hp:')\n",
-    "wasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108i_was_produced_by>', 'wp:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "# CIDOC Objects\n",
-    "moveCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E9_Move>', 'mv:', 'E9')\n",
-    "productionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E12_Production>', 'pr:', 'E12')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "pcarriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/PC14_carried_out_by>', 'cy:')\n",
-    "# New classes (subclasses of E7 Activity) - Exchange, Sending, Recive Letters\n",
-    "exchangeLettersCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL1_Exchange_Letters>', 'el:', 'EL1')\n",
-    "sendLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL2_Send_Letter>', 'sl:', 'EL2')\n",
-    "receiveLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL3_Receive_Letter>', 'rl:', 'EL3')\n",
-    "hasDomainCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P01_has_domain>', 'hd:')\n",
-    "hasRangeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P02_has_range>', 'hr:')\n",
-    "roleOfCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14.1_in_the_role_of>', 'ro:')\n",
-    "hadParticipantCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P11_had_participant>', 'pt:')\n",
-    "\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + tookPlaceCoords.prefix + ' ' + tookPlaceCoords.uri + closeLine)        \n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + movedByCoords.prefix + ' ' + movedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + movedToCoords.prefix + ' ' + movedToCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + movedFromCoords.prefix + ' ' + movedFromCoords.uri + closeLine)     \n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasProducedCoords.prefix + ' ' + hasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasProducedCoords.prefix + ' ' + wasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + moveCoords.prefix + ' ' + moveCoords.uri + closeLine) \n",
-    "    output.write('@prefix ' + productionCoords.prefix + ' ' + productionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + creationCoords.prefix + ' ' + creationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + exchangeLettersCoords.prefix + ' ' + exchangeLettersCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + sendLetterCoords.prefix + ' ' + sendLetterCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + receiveLetterCoords.prefix + ' ' + receiveLetterCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + subClassOfCoords.prefix + ' ' + subClassOfCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasDomainCoords.prefix + ' ' + hasDomainCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + pcarriedByCoords.prefix + ' ' + pcarriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasRangeCoords.prefix + ' ' + hasRangeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' +  roleOfCoords.prefix + ' ' +  roleOfCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hadParticipantCoords.prefix + ' ' + hadParticipantCoords.uri + closeLine)\n",
-    "    \n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_event_exchange_actor.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue   \n",
-    "        # EL1 Exchange Letters\n",
-    "        el1placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + exchangeLettersCoords.code + \">\"\n",
-    "        el2placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \">\"\n",
-    "        el3placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \">\"        \n",
-    "        # E65 Creation - E39 Actor\n",
-    "        if(row['tipologia'] == 'carteggio'):\n",
-    "            e22placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \">\"        \n",
-    "            PC14placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/PC14>\"   \n",
-    "            E55placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/PC14_\" + typeCoords.code + \">\"       \n",
-    "            P11placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/P11\" + \">\"   \n",
-    "            P11E55placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/P11\" + \"_\" + typeCoords.code + \">\"\n",
-    "            if(row['persona_mano'] != ''):\n",
-    "                auth = row['persona_mano']\n",
-    "                authcode = re.sub('{\"nome\": .* ', '', auth)\n",
-    "                authcodeprefix= authcode.replace('IT-ASPO-AU00003-', '').replace('\"', '').replace('}', '').strip()\n",
-    "                e39placeHolder = personAuthCoords.prefix + authcodeprefix\n",
-    "                line = triple(el1placeHolder, hasDomainCoords.prefix, PC14placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(PC14placeHolder, hasTypeCoords.prefix, pcarriedByCoords.prefix) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(PC14placeHolder, labelCoords.prefix, \"\\\"Responsabilità della scrittura del documento\\\"\") + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(PC14placeHolder, hasRangeCoords.prefix, e39placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(PC14placeHolder, roleOfCoords.prefix, E55placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(E55placeHolder, hasTypeCoords.prefix, typeCoords.prefix) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(E55placeHolder, labelCoords.prefix, \"\\\"Mano\\\"\" ) + closeLine\n",
-    "                output.write(line)   \n",
-    "            output.write('\\n')\n",
-    "            #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 269
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_event_exchange_date.ipynb

@@ -1,269 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json\n",
-    "import re"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3 Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "# CIDOC Predicates\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
-    "tookPlaceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P7_took_place_at>', 'tk:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "movedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P25i_moved_by>', 'mb:')\n",
-    "movedToCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P26_moved_to>', 'mt:')\n",
-    "movedFromCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P27_moved_from>', 'mf:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "hasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108_has_produced>', 'hp:')\n",
-    "wasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108i_was_produced_by>', 'wp:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "onGoingTCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P81_ongoing_throughout>', 'gt:')\n",
-    "nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')\n",
-    "schemaCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')\n",
-    "# CIDOC Objects\n",
-    "moveCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E9_Move>', 'mv:', 'E9')\n",
-    "productionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E12_Production>', 'pr:', 'E12')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "# New classes (subclasses of E7 Activity) - Exchange, Sending, Recive Letters\n",
-    "exchangeLettersCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL1_Exchange_Letters>', 'el:', 'EL1')\n",
-    "sendLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL2_Send_Letter>', 'sl:', 'EL2')\n",
-    "receiveLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL3_Receive_Letter>', 'rl:', 'EL3')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + tookPlaceCoords.prefix + ' ' + tookPlaceCoords.uri + closeLine)        \n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + movedByCoords.prefix + ' ' + movedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + movedToCoords.prefix + ' ' + movedToCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + movedFromCoords.prefix + ' ' + movedFromCoords.uri + closeLine)     \n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasProducedCoords.prefix + ' ' + hasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasProducedCoords.prefix + ' ' + wasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + moveCoords.prefix + ' ' + moveCoords.uri + closeLine) \n",
-    "    output.write('@prefix ' + productionCoords.prefix + ' ' + productionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + creationCoords.prefix + ' ' + creationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + exchangeLettersCoords.prefix + ' ' + exchangeLettersCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + sendLetterCoords.prefix + ' ' + sendLetterCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + receiveLetterCoords.prefix + ' ' + receiveLetterCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + onGoingTCoords.prefix + ' ' + onGoingTCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)\n",
-    "\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_event_exchange_date.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        # EL2 Send Letter - EL3 Receive Letter\n",
-    "        el2placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \">\"\n",
-    "        el3placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \">\"\n",
-    "        # EL2 Send Letter - E52 Time Span\n",
-    "        if(row['data_inizio'] != ''):\n",
-    "            e52PplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \"_\" + timeSpanCoords.code + \">\"\n",
-    "            line = triple(el2placeHolder, hasTimeSpanCoords.prefix, e52PplaceHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e52PplaceHolder, hasTypeCoords.prefix, timeSpanCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e52PplaceHolder, labelCoords.prefix, '\\\"'+ row['data_inizio'] + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            #e55placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \"_\" + timeSpanCoords.code + \"/ETI>\"\n",
-    "            #line = triple(e52PplaceHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "            #output.write(line)\n",
-    "            #line = triple(e55placeHolder, hasTypeCoords.prefix, typeCoords.prefix) + closeLine\n",
-    "            #output.write(line)\n",
-    "            #line = triple(e52PplaceHolder, onGoingTCoords.prefix, '\\\"'+row['data_inizio'] +'\\\"^^rdfs:Literal') + closeLine\n",
-    "            #output.write(line)\n",
-    "            #line = triple(e55placeHolder, labelCoords.prefix, '\\\"Estremo temporale inferiore\\\"') + closeLine\n",
-    "            #output.write(line)\n",
-    "\n",
-    "        # EL3 Reveive Letter - E52 Time Span\n",
-    "        if(row['data_fine'] != ''):\n",
-    "            e52AplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \"_\" + timeSpanCoords.code + \">\"\n",
-    "            line = triple(el3placeHolder, hasTimeSpanCoords.prefix, e52AplaceHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e52AplaceHolder, hasTypeCoords.prefix, timeSpanCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e52AplaceHolder, labelCoords.prefix, '\\\"' + row['data_fine'] + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            #e55placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \"_\" + timeSpanCoords.code + \"/ETS>\"\n",
-    "            #line = triple(e52AplaceHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "            #output.write(line)\n",
-    "            #line = triple(e55placeHolder, hasTypeCoords.prefix, typeCoords.prefix) + closeLine\n",
-    "            #output.write(line)\n",
-    "            #line = triple(e52AplaceHolder, onGoingTCoords.prefix, '\\\"'+row['data_fine'] +'\\\"^^rdfs:Literal') + closeLine\n",
-    "            #output.write(line)\n",
-    "            #line = triple(e55placeHolder, labelCoords.prefix, '\\\"Estremo temporale superiore\\\"') + closeLine\n",
-    "            #output.write(line)\n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 258
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_event_exchange_place.ipynb

@@ -1,258 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json\n",
-    "import re"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3 Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "subClassOfCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#subClassOf>', 'so:')\n",
-    "# CIDOC Predicates\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
-    "tookPlaceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P7_took_place_at>', 'tk:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "hasDomainCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P01_has_domain>', 'hd:')\n",
-    "hasRangeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P02_has_range>', 'hr:')\n",
-    "movedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P25i_moved_by>', 'mb:')\n",
-    "movedToCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P26_moved_to>', 'mt:')\n",
-    "movedFromCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P27_moved_from>', 'mf:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "hasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108_has_produced>', 'hp:')\n",
-    "wasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108i_was_produced_by>', 'wp:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "# CIDOC Objects\n",
-    "moveCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E9_Move>', 'mv:', 'E9')\n",
-    "productionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E12_Production>', 'pr:', 'E12')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "pcarriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/PC14_carried_out_by>', 'cy:', 'PC14')\n",
-    "\n",
-    "# New classes (subclasses of E7 Activity) - Exchange, Sending, Recive Letters\n",
-    "exchangeLettersCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL1_Exchange_Letters>', 'el:', 'EL1')\n",
-    "sendLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL2_Send_Letter>', 'sl:', 'EL2')\n",
-    "receiveLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL3_Receive_Letter>', 'rl:', 'EL3')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + tookPlaceCoords.prefix + ' ' + tookPlaceCoords.uri + closeLine)        \n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + movedByCoords.prefix + ' ' + movedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + movedToCoords.prefix + ' ' + movedToCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + movedFromCoords.prefix + ' ' + movedFromCoords.uri + closeLine)     \n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasProducedCoords.prefix + ' ' + hasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasProducedCoords.prefix + ' ' + wasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + moveCoords.prefix + ' ' + moveCoords.uri + closeLine) \n",
-    "    output.write('@prefix ' + productionCoords.prefix + ' ' + productionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + creationCoords.prefix + ' ' + creationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + exchangeLettersCoords.prefix + ' ' + exchangeLettersCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + sendLetterCoords.prefix + ' ' + sendLetterCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + receiveLetterCoords.prefix + ' ' + receiveLetterCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_event_exchange_place.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        \n",
-    "        # EL1 Exchange Letter - E53 Place\n",
-    "        el1placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + exchangeLettersCoords.code + \">\"\n",
-    "        el2placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \">\"\n",
-    "        el3placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \">\"\n",
-    "        if(row['luogo_partenza'] != '' and row['luogo_arrivo'] != ''):\n",
-    "            partenza = row['luogo_partenza'].replace('{luogo:', '').replace(',', '').replace('authID:', '')\n",
-    "            place_partenza = re.sub('IT-ASPO-GEO0001-[0-9]*}', '', partenza).strip()\n",
-    "            arrivo = row['luogo_arrivo'].replace('{luogo:', '').replace(',', '').replace('authID:', '')\n",
-    "            place_arrivo = re.sub('IT-ASPO-GEO0001-[0-9]*}', '', arrivo).strip()\n",
-    "            auth_partenza = row['luogo_partenza']\n",
-    "            authcode_partenza = re.sub('{luogo: .* ', '', auth_partenza)\n",
-    "            authcodeprefix_partenza = authcode_partenza.replace('IT-ASPO-GEO0001-', '').replace('\"', '').replace('}', '').strip()\n",
-    "            auth_arrivo = row['luogo_arrivo']\n",
-    "            authcode_arrivo = re.sub('{luogo: .* ', '', auth_arrivo)\n",
-    "            authcodeprefix_arrivo = authcode_arrivo.replace('IT-ASPO-GEO0001-', '').replace('\"', '').replace('}', '').strip()\n",
-    "            # EL2 Send Letter - E53 Place\n",
-    "            e53placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + authcodeprefix_partenza +  \">\"\n",
-    "            line = triple(el2placeHolder, movedFromCoords.prefix, e53placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            #line = triple(e53placeHolder, hasTypeCoords.prefix, placeCoords.prefix) + closeLine\n",
-    "            #output.write(line)\n",
-    "            #line = triple(e53placeHolder, labelCoords.prefix, '\\\"' + place_partenza + '\\\"') + closeLine\n",
-    "            #output.write(line)\n",
-    "            # EL3 Receive Letter - E53 Place\n",
-    "            e53placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + authcodeprefix_arrivo +  \">\"\n",
-    "            line = triple(el3placeHolder, movedToCoords.prefix, e53placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            #line = triple(e53placeHolder, hasTypeCoords.prefix, placeCoords.prefix) + closeLine\n",
-    "            #output.write(line)\n",
-    "            #line = triple(e53placeHolder, labelCoords.prefix, '\\\"' + place_arrivo + '\\\"') + closeLine\n",
-    "            #output.write(line)     \n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 259
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_event_exchange_receiver.ipynb

@@ -1,259 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json\n",
-    "import re"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3 Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "subClassOfCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#subClassOf>', 'so:')\n",
-    "# CIDOC Predicates\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
-    "tookPlaceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P7_took_place_at>', 'tk:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "roleOfCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14.1_in_the_role_of>', 'ro:')\n",
-    "hasDomainCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P01_has_domain>', 'hd:')\n",
-    "hasRangeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P02_has_range>', 'hr:')\n",
-    "movedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P25i_moved_by>', 'mb:')\n",
-    "movedToCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P26_moved_to>', 'mt:')\n",
-    "movedFromCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P27_moved_from>', 'mf:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "hasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108_has_produced>', 'hp:')\n",
-    "wasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108i_was_produced_by>', 'wp:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "# CIDOC Objects\n",
-    "moveCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E9_Move>', 'mv:', 'E9')\n",
-    "productionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E12_Production>', 'pr:', 'E12')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "actorCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E39_Actor>', 'ac:', 'E39')\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "pcarriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/PC14_carried_out_by>', 'cy:', 'PC14')\n",
-    "\n",
-    "# New classes (subclasses of E7 Activity) - Exchange, Sending, Recive Letters\n",
-    "exchangeLettersCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL1_Exchange_Letters>', 'el:', 'EL1')\n",
-    "sendLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL2_Send_Letter>', 'sl:', 'EL2')\n",
-    "receiveLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL3_Receive_Letter>', 'rl:', 'EL3')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + tookPlaceCoords.prefix + ' ' + tookPlaceCoords.uri + closeLine)        \n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + movedByCoords.prefix + ' ' + movedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + movedToCoords.prefix + ' ' + movedToCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + movedFromCoords.prefix + ' ' + movedFromCoords.uri + closeLine)     \n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasProducedCoords.prefix + ' ' + hasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasProducedCoords.prefix + ' ' + wasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + moveCoords.prefix + ' ' + moveCoords.uri + closeLine) \n",
-    "    output.write('@prefix ' + productionCoords.prefix + ' ' + productionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + creationCoords.prefix + ' ' + creationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + exchangeLettersCoords.prefix + ' ' + exchangeLettersCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + sendLetterCoords.prefix + ' ' + sendLetterCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + receiveLetterCoords.prefix + ' ' + receiveLetterCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + roleOfCoords.prefix + ' ' + roleOfCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasDomainCoords.prefix + ' ' + hasDomainCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + pcarriedByCoords.prefix + ' ' + pcarriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasRangeCoords.prefix + ' ' + hasRangeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + actorCoords.prefix + ' ' + actorCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_event_exchange_receiver.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    E55rplaceHolder = \"<http://www.archiviodistato.prato.it/\" + receiveLetterCoords.code + \"_\" + typeCoords.code + \">\"\n",
-    "    line = triple(E55rplaceHolder, labelCoords.prefix, \"\\\"Destinatario\\\"\" ) + closeLine\n",
-    "    output.write(line)\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        # EL3 Receive Letter\n",
-    "        el1placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + exchangeLettersCoords.code + \">\"\n",
-    "        el2placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \">\"\n",
-    "        el3placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \">\"\n",
-    "        PC14splaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \"_\" + pcarriedByCoords.code + \">\"   \n",
-    "        PC14rplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \"_\" + pcarriedByCoords.code + \">\"   \n",
-    "        if(row['persona_destinatario'] != ''):\n",
-    "            destinatario = row['persona_destinatario'].replace('{\"nome\":', '').replace(',', '').replace('\"authID\":', '')\n",
-    "            name_destinatario = re.sub('IT-ASPO-AU00003-[0-9].*}', '', destinatario).replace('\"', '').replace('}', '').strip()\n",
-    "            auth = row['persona_destinatario']\n",
-    "            authcode = re.sub('{\"nome\": .* ', '', auth)\n",
-    "            authcodeprefix= authcode.replace('IT-ASPO-AU00003-', '').replace('\"', '').replace('}', '').strip()\n",
-    "            actorplaceHolder = personAuthCoords.prefix + authcodeprefix        \n",
-    "            line = triple(el3placeHolder, hasDomainCoords.prefix, PC14rplaceHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(PC14rplaceHolder, hasTypeCoords.prefix, pcarriedByCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(PC14rplaceHolder, labelCoords.prefix, \"\\\"\" + name_destinatario + \" nel ruolo di destinatario\" + \"\\\"\" ) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(PC14rplaceHolder, roleOfCoords.prefix, E55rplaceHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(PC14rplaceHolder, hasRangeCoords.prefix, actorplaceHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            output.write('\\n')\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 257
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_event_exchange_sender.ipynb

@@ -1,257 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json\n",
-    "import re"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3 Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "subClassOfCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#subClassOf>', 'so:')\n",
-    "# CIDOC Predicates\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
-    "tookPlaceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P7_took_place_at>', 'tk:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "roleOfCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14.1_in_the_role_of>', 'ro:')\n",
-    "hasDomainCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P01_has_domain>', 'hd:')\n",
-    "hasRangeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P02_has_range>', 'hr:')\n",
-    "movedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P25i_moved_by>', 'mb:')\n",
-    "movedToCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P26_moved_to>', 'mt:')\n",
-    "movedFromCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P27_moved_from>', 'mf:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "hasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108_has_produced>', 'hp:')\n",
-    "wasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108i_was_produced_by>', 'wp:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "# CIDOC Objects\n",
-    "moveCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E9_Move>', 'mv:', 'E9')\n",
-    "productionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E12_Production>', 'pr:', 'E12')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "pcarriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/PC14_carried_out_by>', 'cy:', 'PC14')\n",
-    "\n",
-    "# New classes (subclasses of E7 Activity) - Exchange, Sending, Recive Letters\n",
-    "exchangeLettersCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL1_Exchange_Letters>', 'el:', 'EL1')\n",
-    "sendLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL2_Send_Letter>', 'sl:', 'EL2')\n",
-    "receiveLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL3_Receive_Letter>', 'rl:', 'EL3')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + tookPlaceCoords.prefix + ' ' + tookPlaceCoords.uri + closeLine)        \n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + movedByCoords.prefix + ' ' + movedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + movedToCoords.prefix + ' ' + movedToCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + movedFromCoords.prefix + ' ' + movedFromCoords.uri + closeLine)     \n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasProducedCoords.prefix + ' ' + hasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasProducedCoords.prefix + ' ' + wasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + moveCoords.prefix + ' ' + moveCoords.uri + closeLine) \n",
-    "    output.write('@prefix ' + productionCoords.prefix + ' ' + productionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + creationCoords.prefix + ' ' + creationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + exchangeLettersCoords.prefix + ' ' + exchangeLettersCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + sendLetterCoords.prefix + ' ' + sendLetterCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + receiveLetterCoords.prefix + ' ' + receiveLetterCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + roleOfCoords.prefix + ' ' + roleOfCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasDomainCoords.prefix + ' ' + hasDomainCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + pcarriedByCoords.prefix + ' ' + pcarriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasRangeCoords.prefix + ' ' + hasRangeCoords.uri + closeLine)\n",
-    "    \n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_event_exchange_sender.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    E55splaceHolder = \"<http://www.archiviodistato.prato.it/\" + sendLetterCoords.code + \"_\" + typeCoords.code + \">\"       \n",
-    "    line = triple(E55splaceHolder, labelCoords.prefix, \"\\\"Mittente\\\"\" ) + closeLine\n",
-    "    output.write(line)\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue        \n",
-    "        # EL2 Send Letter\n",
-    "        el1placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + exchangeLettersCoords.code + \">\"\n",
-    "        el2placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \">\"\n",
-    "        el3placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \">\"\n",
-    "        PC14splaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \"_\" + pcarriedByCoords.code + \">\"   \n",
-    "        PC14rplaceHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \"_\" + pcarriedByCoords.code + \">\"   \n",
-    "        if(row['persona_mittente'] != ''):\n",
-    "            mittente = row['persona_mittente'].replace('{\"nome\":', '').replace(',', '').replace('\"authID\":', '')\n",
-    "            name_mittente = re.sub('IT-ASPO-AU00003-[0-9].*}', '', mittente).replace('\"', '').replace('}', '').strip()\n",
-    "            auth = row['persona_mittente']\n",
-    "            authcode = re.sub('{\"nome\": .* ', '', auth)\n",
-    "            authcodeprefix= authcode.replace('IT-ASPO-AU00003-', '').replace('\"', '').replace('}', '').strip()\n",
-    "            actorplaceHolder = personAuthCoords.prefix + authcodeprefix        \n",
-    "            line = triple(el2placeHolder, hasDomainCoords.prefix, PC14splaceHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(PC14splaceHolder, hasTypeCoords.prefix, pcarriedByCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(PC14splaceHolder, labelCoords.prefix, \"\\\"\" + name_mittente + \" nel ruolo di mittente\" + \"\\\"\" ) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(PC14splaceHolder, roleOfCoords.prefix, E55splaceHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(PC14splaceHolder, hasRangeCoords.prefix, actorplaceHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            output.write('\\n')\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 228
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_extent.ipynb

@@ -1,228 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "hasDimensionsCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P43_has_dimension>', 'hd:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "dimensionsCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E54_Dimension>', 'dm:', 'E54')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasDimensionsCoords.prefix + ' ' + hasDimensionsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + dimensionsCoords.prefix + ' ' + dimensionsCoords.uri + closeLine)\n",
-    "\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_extent.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        # E54 Dimensions\n",
-    "        if(row['numero'] != ''):\n",
-    "            e54placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + dimensionsCoords.code + \">\"\n",
-    "            line = triple(datiniCoords.prefix + row['id'], hasDimensionsCoords.prefix, e54placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e54placeHolder, hasTypeCoords.prefix, dimensionsCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e54placeHolder, labelCoords.prefix, '\\\"' + row['numero'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e54placeHolder, hasTypePCoords.prefix, '\\\"Consistenza carte\\\"') + closeLine\n",
-    "            output.write(line)       \n",
-    "        if(row['extent'] != ''):\n",
-    "            e54placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + dimensionsCoords.code + \">\"\n",
-    "            line = triple(datiniCoords.prefix + row['id'], hasDimensionsCoords.prefix, e54placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e54placeHolder, hasTypeCoords.prefix, dimensionsCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e54placeHolder, labelCoords.prefix, '\\\"' + row['extent'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e54placeHolder, hasTypePCoords.prefix, '\\\"Consistenza carte\\\"') + closeLine\n",
-    "            output.write(line)        \n",
-    "        if(row['consistenza'] != ''):\n",
-    "            e54placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + dimensionsCoords.code + \">\"\n",
-    "            line = triple(datiniCoords.prefix + row['id'], hasDimensionsCoords.prefix, e54placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e54placeHolder, hasTypeCoords.prefix, dimensionsCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e54placeHolder, labelCoords.prefix, '\\\"' + row['consistenza'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e54placeHolder, hasTypePCoords.prefix, '\\\"Consistenza carte\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 218
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_id.ipynb

@@ -1,218 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "hasCurrentOwnerCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P52_has_current_owner>', 'ow:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "groupCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E74_Group>', 'gp:', 'E74')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentOwnerCoords.prefix + ' ' + hasCurrentOwnerCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + groupCoords.prefix + ' ' + groupCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_id.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    E74placeHolder = '<http://www.archiviodistato.prato.it>'\n",
-    "    line = triple(E74placeHolder, hasTypeCoords.prefix, groupCoords.prefix ) + closeLine\n",
-    "    output.write(line)        \n",
-    "    line = triple(E74placeHolder, labelCoords.prefix, \"\\\"Archivio di Stato di Prato\\\"\") + closeLine\n",
-    "    output.write(line)\n",
-    "    currentLocation = \"\\\"Fondo Datini, Archivio di Stato di Prato, Prato (PO)\\\"\"\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        # E22 - P - E42 \n",
-    "        e42placeHolderID = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + identifierCoords.code + \"ID>\"\n",
-    "        line = triple(datiniCoords.prefix + row['id'], identifiedByCoords.prefix, e42placeHolderID) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e42placeHolderID, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e42placeHolderID, labelCoords.prefix, '\\\"' + row['id'].replace(\" \", \"\") + '\\\"') + closeLine            \n",
-    "        output.write(line)\n",
-    "        # E22 - P52 - E74\n",
-    "        line = triple(datiniCoords.prefix + row['id'], hasCurrentOwnerCoords.prefix, E74placeHolder) + closeLine\n",
-    "        output.write(line)\n",
-    "        # E22 - P54 - E53\n",
-    "        line = triple(datiniCoords.prefix + row['id'], hasCurrentPermanentLocationCoords.prefix, currentLocation) + closeLine\n",
-    "        output.write(line)\n",
-    "        output.write('\\n')\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 231
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_list.ipynb

@@ -1,231 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "refersCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P67_refers_to>', 'rt:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "thingCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E70_Thing>', 'th:', 'E70')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + refersCoords.prefix + ' ' + refersCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + thingCoords.prefix + ' ' + thingCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 42,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_list.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    e55placeHolder = '<http://www.archiviodistato.prato.it/merce_E55>'\n",
-    "    line = triple(e55placeHolder, hasTypeCoords.prefix, typeCoords.prefix) + closeLine\n",
-    "    output.write(line)\n",
-    "    line = triple(e55placeHolder, labelCoords.prefix, '\\\"Merce\\\"') + closeLine\n",
-    "    output.write(line)\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        # E73 Information Object\n",
-    "        e73placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
-    "        if row['lista'] != '' and row['lista'] != ' ':\n",
-    "                item = []\n",
-    "                pipe = \" | \"\n",
-    "                if pipe in row['lista']:\n",
-    "                    item = row['lista'].replace(\"Merci | \",\"\").split(' | ') \n",
-    "                    for merce in item:\n",
-    "                        e70placeHolder = '<http://www.archiviodistato.prato.it/' + merce.lower().replace(\" \",\"\").replace(\" \",\"_\") + '>'\n",
-    "                        line = triple(e73placeHolder, refersCoords.prefix, e70placeHolder) + closeLine\n",
-    "                        output.write(line)\n",
-    "                        line = triple(e70placeHolder, hasTypeCoords.prefix, thingCoords.prefix) + closeLine\n",
-    "                        output.write(line)\n",
-    "                        lemma = merce.replace(\" \",\"\")\n",
-    "                        line = triple(e70placeHolder, labelCoords.prefix, '\\\"' + merce.lower() + '\\\"') + closeLine\n",
-    "                        output.write(line) \n",
-    "                        line = triple(e70placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "                        output.write(line)\n",
-    "                else:\n",
-    "                    e70placeHolder = '<http://www.archiviodistato.prato.it/' + row['lista'].lower().replace(\" \",\"\").replace(\" \",\"_\") + '>'\n",
-    "                    line = triple(e73placeHolder, refersCoords.prefix, e70placeHolder) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e70placeHolder, hasTypeCoords.prefix, thingCoords.prefix) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e70placeHolder, labelCoords.prefix, '\\\"' +  row['lista'].lower() + '\\\"') + closeLine\n",
-    "                    output.write(line) \n",
-    "                    line = triple(e70placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "                    output.write(line)\n",
-    "                output.write('\\n')\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 215
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_note.ipynb

@@ -1,215 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "hasNoteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'no:')\n",
-    "hasTypeNCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3.1_has_type>', 'tn:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "stringCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E62_String>', 'sr:', 'E62')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasNoteCoords.prefix + ' ' + hasNoteCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeNCoords.prefix + ' ' + hasTypeNCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + stringCoords.prefix + ' ' + stringCoords.uri + closeLine)\n",
-    "\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_note.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        # Physdesc\n",
-    "        if(row['nota'] != ''):\n",
-    "            e73placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
-    "            e62placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/E73_\" + stringCoords.code + \">\"\n",
-    "            line = triple(e73placeHolder, hasNoteCoords.prefix, e62placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e62placeHolder, labelCoords.prefix, '\\\"' + row['nota'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e62placeHolder, hasTypeCoords.prefix, stringCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            e55placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/E73_\" + stringCoords.code + \"_\" + typeCoords.code + \">\"\n",
-    "            line = triple(e62placeHolder, hasTypeNCoords.prefix, e55placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e55placeHolder, labelCoords.prefix, '\\\"Nota contenuto informativo\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "\n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 216
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_phydesc.ipynb

@@ -1,216 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json\n",
-    "import re"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "hasNoteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'no:')\n",
-    "hasTypeNCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3.1_has_type>', 'tn:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "stringCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E62_String>', 'sr:', 'E62')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasNoteCoords.prefix + ' ' + hasNoteCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeNCoords.prefix + ' ' + hasTypeNCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + stringCoords.prefix + ' ' + stringCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_physdesc.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    e55placeHolder = \"<http://archiviodistato.prato.it/E22_\" + stringCoords.code + \"_\" + typeCoords.code + \">\"\n",
-    "    line = triple(e55placeHolder, labelCoords.prefix, '\\\"Nota descrizione fisica\\\"') + closeLine\n",
-    "    output.write(line)\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        # E62 String\n",
-    "        if(row['descrizione_fisica'] != ''):\n",
-    "            e62placeHolder= \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/E22_\" + stringCoords.code + \">\"\n",
-    "            line = triple(datiniCoords.prefix + row['id'], hasNoteCoords.prefix, e62placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            #Remove all white-space characters:\n",
-    "            txt = row['descrizione_fisica']\n",
-    "            x = re.sub(\" \\n\", \"\", txt)\n",
-    "            y = re.sub(\"\\s\\s\", \"\", x)\n",
-    "            line = triple(e62placeHolder, labelCoords.prefix, '\\\"' + y.replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e62placeHolder, hasTypeCoords.prefix, stringCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e62placeHolder, hasTypeNCoords.prefix, e55placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            output.write('\\n')\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 210
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_physfacet.ipynb

@@ -1,210 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "consistCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P45_consist_of>', 'cf:')\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "materialCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E57_Material>', 'mt:', 'E57')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + consistCoords.prefix + ' ' + consistCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + materialCoords.prefix + ' ' + materialCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_physfacet.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    e57placeHolder = \"<http://archiviodistato.prato.it/\" + materialCoords.code + \">\"\n",
-    "    line = triple(e57placeHolder, hasTypeCoords.prefix, materialCoords.prefix) + closeLine\n",
-    "    output.write(line)\n",
-    "    line = triple(e57placeHolder, labelCoords.prefix, '\\\"Supporto\\\"') + closeLine\n",
-    "    output.write(line)\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        # E57 Material\n",
-    "        if(row['supporto'] != ''):\n",
-    "            e55placeHolder = \"<http://www.archiviodistato.prato.it/\" + materialCoords.code + \"_\" + typeCoords.code + \"_\" + row['supporto'].replace('|','').replace(' ','_').replace('__','_').replace(',','').replace(')','').replace('(','') + \">\"  \n",
-    "            line = triple(datiniCoords.prefix + row['id'], consistCoords.prefix, e55placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e55placeHolder, hasTypePCoords.prefix, e57placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e55placeHolder, hasTypeCoords.prefix, typeCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e55placeHolder, labelCoords.prefix, '\\\"' + row['supporto'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            output.write('\\n')\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 216
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_phystech.ipynb

@@ -1,216 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json\n",
-    "import re"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "hasConditionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P44_has_condition>', 'hc:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "conditionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E3_Condition_State>', 'cs:', 'E3')\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasConditionCoords.prefix + ' ' + hasConditionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + conditionCoords.prefix + ' ' + conditionCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_phystech.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    e3placeHolder = \"<http://archiviodistato.prato.it/\" + conditionCoords.code + \">\"\n",
-    "    line = triple(e3placeHolder, labelCoords.prefix, '\\\"Stato di conservazione\\\"') + closeLine\n",
-    "    output.write(line)\n",
-    "    line = triple(e3placeHolder, hasTypeCoords.prefix, conditionCoords.prefix) + closeLine\n",
-    "    output.write(line)\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        # E3 Condition State\n",
-    "        if(row['conservazione'] != ''):\n",
-    "            e55placeHolder = \"<http://www.archiviodistato.prato.it/\" + conditionCoords.code + \"_\" + typeCoords.code + \"_\" + row['conservazione'].replace('|','').replace(' ','_').replace('__','_').replace(',','').replace(')','').replace('(','') + \">\"  \n",
-    "            line = triple(datiniCoords.prefix + row['id'], hasConditionCoords.prefix, e55placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e55placeHolder, hasTypePCoords.prefix, e3placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e55placeHolder, hasTypeCoords.prefix, typeCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            #Remove all white-space characters:\n",
-    "            txt = row['conservazione']\n",
-    "            x = re.sub(\" \\n\", \"\", txt)\n",
-    "            y = re.sub(\"\\s\\s\", \"\", x)\n",
-    "            line = triple(e55placeHolder, labelCoords.prefix, '\\\"'+ y.replace('\\\\','\\\\\\\\').replace('\\\"','').replace('|',',').replace(' ,',',') + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 217
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_scopecontent.ipynb

@@ -1,217 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import re\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "refersCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P67_refers_to>', 'rt:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "refersHasTypeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P67.1_type>', 'rh:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "entityCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E1_Entity>', 'ey:', 'E1')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + refersCoords.prefix + ' ' + refersCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + entityCoords.prefix + ' ' + entityCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + refersHasTypeCoords.prefix + ' ' + refersHasTypeCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_scopecontent.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        # E1 Entity - Scope and Content\n",
-    "        if(row['scope-content_body'] != ''):\n",
-    "            e73placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
-    "            e1placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/E73_\" + entityCoords.code + \">\"\n",
-    "            line = triple(e73placeHolder, refersCoords.prefix, e1placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e1placeHolder, hasTypeCoords.prefix, entityCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            #Remove all white-space characters:\n",
-    "            txt = row['scope-content_body']\n",
-    "            x = re.sub(\" \\n\", \"\", txt)\n",
-    "            y = re.sub(\"\\s\\s\", \"\", x)\n",
-    "            line = triple(e1placeHolder, labelCoords.prefix, '\\\"' + y.replace('\\\\','\\\\\\\\').replace('\\\"','') + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            e55placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + entityCoords.code + \"_\" + typeCoords.code + \">\"\n",
-    "            line = triple(e1placeHolder, refersHasTypeCoords.prefix, e55placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e55placeHolder, hasTypeCoords.prefix, typeCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e55placeHolder, labelCoords.prefix, '\\\"Scope and Content\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            output.write('\\n')\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 212
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_segnatura.ipynb

@@ -1,212 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_segnatura.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        # E42 Identifier\n",
-    "        segnatura = ''\n",
-    "        if(row['segnatura_registri_1'] != ''):\n",
-    "            segnatura = row['segnatura_registri_1']\n",
-    "            if(row['segnatura_registri_2'] != ''):\n",
-    "                segnatura = 'registro ' + segnatura + ' ' + row['segnatura_registri_2']\n",
-    "        if(row['segnatura_busta'] != ''):\n",
-    "            segnatura =  'busta ' + row['segnatura_busta']\n",
-    "            if(row['segnatura_inserto'] != ''):\n",
-    "                segnatura = segnatura + ', inserto ' + row['segnatura_inserto']\n",
-    "                if(row['segnatura_codice'] != ''):\n",
-    "                    segnatura = segnatura + ', codice ' + row['segnatura_codice']\n",
-    "        if(segnatura != ''):\n",
-    "            e42placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + identifierCoords.code + \">\"\n",
-    "            line = triple(datiniCoords.prefix + row['id'], identifiedByCoords.prefix, e42placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e42placeHolder, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e42placeHolder, labelCoords.prefix, '\\\"Fondo Datini, ' + segnatura + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e42placeHolder, hasTypePCoords.prefix, '\\\"Segnatura\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.7.3 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.3"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 209
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_title.ipynb

@@ -1,209 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_title.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        # E35 Title\n",
-    "        e73placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
-    "        if(row['titolo_aspo'] != 'None' and row['titolo_aspo'] != ''):\n",
-    "            e35placeHolder1 = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + titleCoords.code + \">\"\n",
-    "            line = triple(e73placeHolder, identifiedByCoords.prefix, e35placeHolder1) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e35placeHolder1, hasTypeCoords.prefix, titleCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e35placeHolder1, labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "        if(row['titolo_originale'] != 'None' and row['titolo_originale'] != ''):\n",
-    "            line = triple(e35placeHolder1, hasAlternativeFormCoords.prefix, \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/original_title>\") + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(\"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/original_title>\", hasTypeCoords.prefix, titleCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(\"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/original_title>\", labelCoords.prefix, '\\\"' + row['titolo_originale'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.7.3 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 209
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_item_type.ipynb

@@ -1,209 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "# Added by FS\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_type.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        e73placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
-    "        # E55 Type\n",
-    "        if(row['tipologia'] != ''):\n",
-    "            tipologie = []\n",
-    "            pipe = \"|\" \n",
-    "            if pipe in row['tipologia']:\n",
-    "                tipologie = row['tipologia'].split('|')\n",
-    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + tipologie.replace(\" \", \"\") + \">\"\n",
-    "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "            else:\n",
-    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['tipologia'].replace(\" \", \"\") + \">\"\n",
-    "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 244
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_series.ipynb

@@ -1,244 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "# Utilities to handle character encodings\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "hasCurrentOwnerCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P52_has_current_owner>', 'ow:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/e73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "groupCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E74_Group>', 'gp:', 'E74')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentOwnerCoords.prefix + ' ' + hasCurrentOwnerCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + groupCoords.prefix + ' ' + groupCoords.uri + closeLine)\n",
-    "   \n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'series'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    E74placeHolder = '<http://www.archiviodistato.prato.it>'\n",
-    "    line = triple(E74placeHolder, hasTypeCoords.prefix, groupCoords.prefix ) + closeLine\n",
-    "    output.write(line)        \n",
-    "    line = triple(E74placeHolder, labelCoords.prefix, \"\\\"Archivio di Stato di Prato\\\"\") + closeLine\n",
-    "    output.write(line)\n",
-    "    currentLocation = \"\\\"Fondo Datini, Archivio di Stato di Prato, Prato (PO)\\\"\"\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        # E22 Man Made Object\n",
-    "        line = triple(datiniCoords.prefix + row['id'], hasTypeCoords.prefix, manMadeObjectCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(datiniCoords.prefix + row['id'], labelCoords.prefix, '\\\"Documento fisico: ' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        # E73 Information Object\n",
-    "        e73placeHolder =  \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
-    "        line = triple(datiniCoords.prefix + row['id'], carriesCoords.prefix, e73placeHolder) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e73placeHolder, hasTypeCoords.prefix, informationObjectCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e73placeHolder, labelCoords.prefix, '\\\"Contenuto informativo: ' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        # E35 Title\n",
-    "        if(row['titolo_aspo'] != 'None'):\n",
-    "            e35placeHolder1 = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + titleCoords.code + \">\"\n",
-    "            line = triple(e73placeHolder, identifiedByCoords.prefix, e35placeHolder1) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e35placeHolder1, hasTypeCoords.prefix, titleCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e35placeHolder1, labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "        # E55 Type\n",
-    "        if(row['tipologia'] != ''):\n",
-    "            tipologie = []\n",
-    "            pipe = \"|\" \n",
-    "            if pipe in row['tipologia']:\n",
-    "                tipologie = row['tipologia'].split('|')\n",
-    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + tipologie.replace(\" \", \"\") + \">\"\n",
-    "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "            else:\n",
-    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['tipologia'].replace(\" \", \"\") + \">\"\n",
-    "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "        # E42 Identifier\n",
-    "        e42placeHolderID = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + identifierCoords.code + \"ID>\"\n",
-    "        line = triple(datiniCoords.prefix + row['id'], identifiedByCoords.prefix, e42placeHolderID) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e42placeHolderID, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e42placeHolderID, labelCoords.prefix, '\\\"' + row['id'].replace(\" \", \"\") + '\\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        # E22 - P52 - E74\n",
-    "        line = triple(datiniCoords.prefix + row['id'], hasCurrentOwnerCoords.prefix, E74placeHolder) + closeLine\n",
-    "        output.write(line)\n",
-    "        # E22 - P54 - E53\n",
-    "        line = triple(datiniCoords.prefix + row['id'], hasCurrentPermanentLocationCoords.prefix, currentLocation) + closeLine\n",
-    "        output.write(line)\n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.7.3 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 251
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_subfonds.ipynb

@@ -1,251 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "# Utilities to handle character encodings\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "hasNoteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'no:')\n",
-    "hasTypeNCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3.1_has_type>', 'tn:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "hasCurrentOwnerCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P52_has_current_owner>', 'ow:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "stringCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E62_String>', 'sr:', 'E62')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "groupCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E74_Group>', 'gp:', 'E74')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasNoteCoords.prefix + ' ' + hasNoteCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeNCoords.prefix + ' ' + hasTypeNCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + stringCoords.prefix + ' ' + stringCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentOwnerCoords.prefix + ' ' + hasCurrentOwnerCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + groupCoords.prefix + ' ' + groupCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'subfonds'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    E74placeHolder = '<http://www.archiviodistato.prato.it>'\n",
-    "    line = triple(E74placeHolder, hasTypeCoords.prefix, groupCoords.prefix ) + closeLine\n",
-    "    output.write(line)        \n",
-    "    line = triple(E74placeHolder, labelCoords.prefix, \"\\\"Archivio di Stato di Prato\\\"\") + closeLine\n",
-    "    output.write(line)\n",
-    "    currentLocation = \"\\\"Fondo Datini, Archivio di Stato di Prato, Prato (PO)\\\"\"\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        # E22 Man Made Object\n",
-    "        line = triple(datiniCoords.prefix + row['id'], hasTypeCoords.prefix, manMadeObjectCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(datiniCoords.prefix + row['id'], labelCoords.prefix, '\\\"Documento fisico: ' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        # E73 Information Object\n",
-    "        e73placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
-    "        line = triple(datiniCoords.prefix + row['id'], carriesCoords.prefix, e73placeHolder) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e73placeHolder, hasTypeCoords.prefix, informationObjectCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e73placeHolder, labelCoords.prefix, '\\\"Contenuto informativo: ' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        # E35 Title \n",
-    "        if(row['titolo_aspo'] != 'None'):\n",
-    "            e35placeHolder1 = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + titleCoords.code + \">\"\n",
-    "            line = triple(e73placeHolder, identifiedByCoords.prefix, e35placeHolder1) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e35placeHolder1, hasTypeCoords.prefix, titleCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e35placeHolder1, labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "        # E55 Type\n",
-    "        if(row['genere'] != ''):\n",
-    "            tipologie = []\n",
-    "            pipe = \"|\" \n",
-    "            if pipe in row['genere']:\n",
-    "                tipologie = row['genere'].split('|')\n",
-    "                for type in tipologie:\n",
-    "                    tipo = type\n",
-    "                    e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + tipo.replace(\" \", \"\") + \">\"\n",
-    "                    line = triple(datiniCoords.prefix + row['id'], hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "                    output.write(line)\n",
-    "            else:\n",
-    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['genere'].replace(\" \", \"\") + \">\"\n",
-    "                line = triple(datiniCoords.prefix + row['id'], hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "        # E42 Identifier\n",
-    "        e42placeHolderID = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + identifierCoords.code + \"ID>\"\n",
-    "        line = triple(datiniCoords.prefix + row['id'], identifiedByCoords.prefix, e42placeHolderID) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e42placeHolderID, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e42placeHolderID, labelCoords.prefix, '\\\"' + row['id'].replace(\" \", \"\") + '\\\"') + closeLine            \n",
-    "        output.write(line)       \n",
-    "        # E22 - P52 - E74\n",
-    "        line = triple(datiniCoords.prefix + row['id'], hasCurrentOwnerCoords.prefix, E74placeHolder) + closeLine\n",
-    "        output.write(line)\n",
-    "        # E22 - P54 - E53\n",
-    "        line = triple(datiniCoords.prefix + row['id'], hasCurrentPermanentLocationCoords.prefix, currentLocation) + closeLine\n",
-    "        output.write(line)\n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 254
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_subseries.ipynb

@@ -1,254 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "# Utilities to handle character encodings\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "hasCurrentOwnerCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P52_has_current_owner>', 'ow:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "groupCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E74_Group>', 'gp:', 'E74')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentOwnerCoords.prefix + ' ' + hasCurrentOwnerCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + groupCoords.prefix + ' ' + groupCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'subseries'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    E74placeHolder = '<http://www.archiviodistato.prato.it>'\n",
-    "    line = triple(E74placeHolder, hasTypeCoords.prefix, groupCoords.prefix ) + closeLine\n",
-    "    output.write(line)        \n",
-    "    line = triple(E74placeHolder, labelCoords.prefix, \"\\\"Archivio di Stato di Prato\\\"\") + closeLine\n",
-    "    output.write(line)\n",
-    "    currentLocation = \"\\\"Fondo Datini, Archivio di Stato di Prato, Prato (PO)\\\"\"\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        # E22 Man Made Object\n",
-    "        line = triple(datiniCoords.prefix + row['id'], hasTypeCoords.prefix, manMadeObjectCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(datiniCoords.prefix + row['id'], labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        # E73 Information Object\n",
-    "        e73placeHolder = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
-    "        line = triple(datiniCoords.prefix + row['id'], carriesCoords.prefix, e73placeHolder) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e73placeHolder, hasTypeCoords.prefix, informationObjectCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e73placeHolder, labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        # E53 Title\n",
-    "        if(row['titolo_aspo'] != 'None'):\n",
-    "            e35placeHolder1 =  \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + titleCoords.code + \">\"\n",
-    "            output.write(line)\n",
-    "            line = triple(e73placeHolder, identifiedByCoords.prefix, e35placeHolder1) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e35placeHolder1, hasTypeCoords.prefix, titleCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e35placeHolder1, labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "        # E42 Identifier\n",
-    "        if(row['segnatura_parent'] != ''):\n",
-    "            e42placeHolder =  \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + identifierCoords.code + \">\"\n",
-    "            line = triple(datiniCoords.prefix + row['id'], identifiedByCoords.prefix, e42placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e42placeHolder, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e42placeHolder, labelCoords.prefix, '\\\"Fondo Datini, ' + row['segnatura_parent'] + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e42placeHolder, hasTypePCoords.prefix, '\\\"Segnatura precedente\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "        # E55 Type\n",
-    "        if(row['tipologia'] != ''):\n",
-    "            tipologie = []\n",
-    "            pipe = \"|\" \n",
-    "            if pipe in row['tipologia']:\n",
-    "                tipologie = row['tipologia'].split('|')\n",
-    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + tipologie.replace(\" \", \"\") + \">\"\n",
-    "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "            else:\n",
-    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['tipologia'].replace(\" \", \"\") + \">\"\n",
-    "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "        # E42 Identifier\n",
-    "        e42placeHolderID = \"<http://datini.archiviodistato.prato.it/la-ricerca/scheda/\" + row['id'] + \"/\" + identifierCoords.code + \"ID>\"\n",
-    "        line = triple(datiniCoords.prefix + row['id'], identifiedByCoords.prefix, e42placeHolderID) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e42placeHolderID, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e42placeHolderID, labelCoords.prefix, '\\\"' + row['id'].replace(\" \", \"\") + '\\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        # E22 - P52 - E74\n",
-    "        line = triple(datiniCoords.prefix + row['id'], hasCurrentOwnerCoords.prefix, E74placeHolder) + closeLine\n",
-    "        output.write(line)\n",
-    "        # E22 - P54 - E53\n",
-    "        line = triple(datiniCoords.prefix + row['id'], hasCurrentPermanentLocationCoords.prefix, currentLocation) + closeLine\n",
-    "        output.write(line)\n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break"
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 222
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_datini_type.ipynb

@@ -1,222 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Parser per estrarre tutte le tipologie di documenti ed associarle solo una volta ad ogni record"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "# Added by FS CIDOC properties for person\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "# Added by FS CIDOC entity\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "ename": "UnicodeDecodeError",
-     "evalue": "'utf-8' codec can't decode byte 0x88 in position 2918: invalid start byte",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mUnicodeDecodeError\u001b[0m                        Traceback (most recent call last)",
-      "\u001b[0;32m/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_3354/4120994092.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      8\u001b[0m     \u001b[0mfirst\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      9\u001b[0m     \u001b[0mii\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 10\u001b[0;31m     \u001b[0;32mfor\u001b[0m \u001b[0mrow\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mreader\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     11\u001b[0m         \u001b[0;31m# The index ii is used to process a limited number of entries for testing purposes\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     12\u001b[0m         \u001b[0mii\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mii\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/csv.py\u001b[0m in \u001b[0;36m__next__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    108\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mline_num\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    109\u001b[0m             \u001b[0;31m# Used only for its side effect.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 110\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfieldnames\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    111\u001b[0m         \u001b[0mrow\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreader\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    112\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mline_num\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mline_num\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/csv.py\u001b[0m in \u001b[0;36mfieldnames\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m     95\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_fieldnames\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     96\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 97\u001b[0;31m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_fieldnames\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreader\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     98\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mStopIteration\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     99\u001b[0m                 \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/codecs.py\u001b[0m in \u001b[0;36mdecode\u001b[0;34m(self, input, final)\u001b[0m\n\u001b[1;32m    320\u001b[0m         \u001b[0;31m# decode input (taking the buffer into account)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    321\u001b[0m         \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuffer\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 322\u001b[0;31m         \u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconsumed\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_buffer_decode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfinal\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    323\u001b[0m         \u001b[0;31m# keep undecoded input until the next call\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    324\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuffer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mconsumed\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mUnicodeDecodeError\u001b[0m: 'utf-8' codec can't decode byte 0x88 in position 2918: invalid start byte"
-     ]
-    }
-   ],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'all_type'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "\n",
-    "        line = triple(\"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['tipologia'].replace(\" \", \"\") + \">\", hasTypeCoords.prefix, typeCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(\"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['tipologia'].replace(\" \", \"\") + \">\", labelCoords.prefix,  '\\\"' + row['tipologia']+ '\\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        \n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 318
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_onomastica_datini.py

@@ -1,318 +0,0 @@
-#Parser to convert the Datini onomastics CSV file into TTL format
-
-# Utilities to read/write csv files
-import csv
-# Utilities to handle character encodings
-import unicodedata
-# Ordered Dicts
-from collections import OrderedDict
-
-import json
-import re
-
-
-# OPZIONAL IMPORTS
-
-# For timestamping/simple speed tests
-from datetime import datetime
-# Random number generator
-from random import *
-# System & command line utilities
-import sys
-# Json for the dictionary
-import json
-
-import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/Data/DallASPO/'
-export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/Data/DallASPO/RDF/'
-
-# Custom class to store URIs + related infos for the ontologies/repositories
-
-class RDFcoords:
-    def __init__(self, uri, prefix, code = None):
-        self.uri = uri
-        self.prefix = prefix
-        self.code = code
-
-# Repositories
-aspoCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/>', 'aspo:')
-foafCoords = RDFcoords('<http://xmlns.com/foaf/0.1/>', 'foaf:')
-cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')
-schemaCoords = RDFcoords('<http://schema.org/>', 'schema:')
-personCoords = RDFcoords('<http://www.w3.org/ns/person#>', 'person:')
-nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')
-rdfsCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')
-owlCoords = RDFcoords('<http://www.w3.org/2002/07/owl#>', 'owl:')
-
-# Basic functions for triples / shortened triples in TTL format
-
-def triple(subject, predicate, object1):
-    line = subject + ' ' + predicate + ' ' + object1
-    return line
-
-def doublet(predicate, object1):
-    line = '    ' + predicate + ' ' + object1
-    return line
-
-def singlet(object1):
-    line = '        ' + object1
-    return line
-
-# Line endings in TTL format
-continueLine1 = ' ;\n'
-continueLine2 = ' ,\n'
-closeLine = ' .\n'
-
-def writeTTLHeader(output):
-    output.write('@prefix ' + aspoCoords.prefix + ' ' + aspoCoords.uri + closeLine)
-    output.write('@prefix ' + foafCoords.prefix + ' ' + foafCoords.uri + closeLine)
-    output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)
-    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)
-    output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)
-    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)
-    output.write('@prefix ' + rdfsCoords.prefix + ' ' + rdfsCoords.uri + closeLine)
-    output.write('@prefix ' + owlCoords.prefix + ' ' + owlCoords.uri + closeLine)
-    output.write('\n')
-
-
-filePrefix = 'onomastica_'
-fileType = 'datini'
-max_entries = 10000000000000
-
-with open(import_dir + filePrefix + fileType + '.csv', newline="") as csv_file, open(
-        export_dir + filePrefix + fileType + '.ttl', 'w') as output:
-    reader = csv.DictReader(csv_file)
-    writeTTLHeader(output)
-    first = True
-    ii = 0
-    for row in reader:
-        # The index ii is used to process a limited number of entries for testing purposes
-        ii = ii + 1
-        if row['entityType'] == 'person':
-            id_aspo = row['recordId']
-            aspoPlaceHolder = aspoCoords.prefix + id_aspo
-            line = triple(aspoPlaceHolder, 
-                          nsCoords.prefix + 'type', 
-                          cidocCoords.prefix + 'E21_Person') + closeLine
-            output.write(line)
-
-            line = triple(aspoPlaceHolder, 
-                          nsCoords.prefix + 'type', 
-                          personCoords.prefix + 'Person') + closeLine
-            output.write(line)
-
-            line = triple(aspoPlaceHolder,
-                          nsCoords.prefix + 'type',
-                          foafCoords.prefix + 'person') + closeLine
-            output.write(line)
-
-            line = triple(aspoPlaceHolder,
-                          cidocCoords.prefix + 'P1_is_identified_by',
-                          aspoPlaceHolder + "_E42") + closeLine
-            output.write(line)
-
-            line = triple(aspoPlaceHolder + "_E42",
-                          nsCoords.prefix + 'type',
-                          cidocCoords.prefix + 'E42_Identifier') + closeLine
-            output.write(line)
-            line = triple(aspoPlaceHolder + "_E42",
-                          rdfsCoords.prefix + 'label',
-                          '\"' + id_aspo + '\"') + closeLine
-            output.write(line)
-
-            line = triple(aspoPlaceHolder,
-                          foafCoords.prefix + 'name',
-                          '\"' + row['nameEntry@normal'] + '\"') + closeLine
-            output.write(line)
-
-            line = triple(aspoPlaceHolder,
-                          rdfsCoords.prefix + 'label',
-                          '\"' + row['nameEntry@normal'] + '\"') + closeLine
-            output.write(line)
-
-            if row['nome proprio'] != '':
-                #Remove all white-space characters:
-                txt = row['nome proprio']
-                x = re.sub(" \n", "", txt)
-                y = re.sub("\s\s", "", x)
-                name = re.sub("\n", "", y)
-                line = triple(aspoPlaceHolder,
-                              foafCoords.prefix + 'givenName',
-                              '\"' + name + '\"') + closeLine
-                output.write(line)
-
-            if row['nome di famiglia'] != '':
-                #Remove all white-space characters:
-                txt = row['nome di famiglia']
-                x = re.sub("\n", " ", txt)
-                y = re.sub("\s\s", "", x)
-                line = triple(aspoPlaceHolder,
-                              foafCoords.prefix + 'familyName',
-                              '\"' + y + '\"') + closeLine
-                output.write(line)
-            
-            if row['Alias'] != '' and row['Alias'] != ' ':
-                #Remove all white-space characters:
-                txt = row['Alias']
-                x = re.sub("\n", " ", txt)
-                y = re.sub("\s\s", "", x)
-                line = triple(aspoPlaceHolder,
-                              schemaCoords.prefix + 'alternateName',
-                              '\"' + y + '\"') + closeLine
-                output.write(line)
-
-            if row['genere'] != '':
-                #Remove all white-space characters:
-                txt = row['genere']
-                x = re.sub("\n", " ", txt)
-                y = re.sub("\s\s", "", x)
-                line = triple(aspoPlaceHolder,
-                              foafCoords.prefix + 'gender',
-                              '\"' + y + '\"') + closeLine
-                output.write(line)
-
-            if row['patronimico/matronimico'] != '':
-                #Remove all white-space characters:
-                txt = row['patronimico/matronimico']
-                x = re.sub("\n", " ", txt)
-                y = re.sub("\s\s", "", x)
-                line = triple(aspoPlaceHolder,
-                              personCoords.prefix + 'patronymicName',
-                              '\"' + y + '\"') + closeLine
-                output.write(line)
-
-            if row['occupation'] != '' and row['occupation'] != ' ' :
-                occupazioni = []
-                pipe = "|"
-                if pipe in row['occupation']:
-                    occupazioni = row['occupation'].split('|') 
-                    for occupazione in occupazioni:
-                        #Remove all white-space characters:
-                        txt = occupazione
-                        x = re.sub("\n", " ", txt)
-                        y = re.sub("\s\s", "", x)
-                        occ = re.sub(r'[^A-Za-z]','', y)
-                        occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + occ.replace(" ","_") + '>'
-                        line = triple(aspoPlaceHolder,
-                                    schemaCoords.prefix + 'hasOccupation',
-                                    occupationPlaceHolder) + closeLine
-                        output.write(line)
-                        line = triple(occupationPlaceHolder,
-                                    nsCoords.prefix + 'type',
-                                    schemaCoords.prefix + 'Occupation') + closeLine
-                        output.write(line)
-                        line = triple(occupationPlaceHolder,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"' + y + '\"') + closeLine
-                        output.write(line)
-                else:
-                    #Remove all white-space characters:
-                    txt = row['occupation']
-                    x = re.sub("\n", " ", txt)
-                    y = re.sub("\s\s", "", x)
-                    occ = re.sub(r'[^A-Za-z]','', y)
-                    occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + occ.replace(" ","_") + '>'
-                    line = triple(aspoPlaceHolder,
-                                schemaCoords.prefix + 'hasOccupation',
-                                occupationPlaceHolder) + closeLine
-                    output.write(line)
-                    line = triple(occupationPlaceHolder,
-                                nsCoords.prefix + 'type',
-                                schemaCoords.prefix + 'Occupation') + closeLine
-                    output.write(line)
-                    line = triple(occupationPlaceHolder,
-                                rdfsCoords.prefix + 'label',
-                                '\"' + y + '\"') + closeLine
-                    output.write(line)
-
-            if row['avo 1'] != '':
-                avo1 = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + id_aspo + "/avo1>"
-                line = triple(aspoPlaceHolder,
-                              schemaCoords.prefix + 'relatedTo',
-                              avo1) + closeLine
-                output.write(line)
-                line = triple(avo1,
-                              nsCoords.prefix + 'type',
-                              foafCoords.prefix + 'Person') + closeLine
-                output.write(line)
-                line = triple(avo1,
-                              rdfsCoords.prefix + 'label',
-                              '\"' + row['avo 1'] + '\"') + closeLine
-                output.write(line)
-
-            if row['avo 2'] != '':
-                avo2 = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + id_aspo + "/avo2>"
-                line = triple(aspoPlaceHolder,
-                              schemaCoords.prefix + 'relatedTo',
-                              avo2) + closeLine
-                output.write(line)
-                line = triple(avo2,
-                              nsCoords.prefix + 'type',
-                              foafCoords.prefix + 'Person') + closeLine
-                output.write(line)
-                line = triple(avo2,
-                              rdfsCoords.prefix + 'label',
-                              '\"' + row['avo 2'] + '\"') + closeLine
-                output.write(line)
-
-            if row['Qualifica'] != '':
-                qualifiche = []
-                pipe = "|"
-                if pipe in row['Qualifica']:
-                    qualifiche = row['Qualifica'].split('|') 
-                    for qualifica in qualifiche:
-                        #Remove all white-space characters:
-                        txt = qualifica
-                        x = re.sub("\n", " ", txt)
-                        y = re.sub("\s\s", " ", x)
-                        line = triple(aspoPlaceHolder, schemaCoords.prefix + 'honorificPrefix', '\"' + str(y) + '\"') + closeLine
-                        output.write(line)
-                else:
-                #Remove all white-space characters:
-                    txt = row['Qualifica']
-                    x = re.sub("\n", " ", txt)
-                    y = re.sub("\s\s", " ", x)
-                    line = triple(aspoPlaceHolder, schemaCoords.prefix + 'honorificPrefix', '\"' + y + '\"') + closeLine
-                    output.write(line)
-
-            if row['place_occupation_Qualifica'] != '':
-                #Remove all white-space characters:
-                txt = row['place_occupation_Qualifica']
-                x = re.sub("\n", " ", txt)
-                y = re.sub("\s\s", "", x)
-                line = triple(aspoPlaceHolder,
-                              schemaCoords.prefix + 'workLocation',
-                              '\"' + row['place_occupation_Qualifica'].replace('\\','\\\\').replace('"','\\"') + '\"') + closeLine
-                output.write(line)
-
-            if row['biogHist p'] != '':
-                #Remove all white-space characters:
-                txt = row['biogHist p']
-                x = re.sub("\n", " ", txt)
-                y = re.sub("\s\s", " ", x)
-                note = re.sub("\"", "", x)
-                line = triple(aspoPlaceHolder,
-                              cidocCoords.prefix + 'P3_has_note',
-                              '\"' + note + '\"') + closeLine
-                output.write(line)
-            
-            if row['Variante'] != '': 
-                varianti = []
-                pipe = "|"
-                if pipe in row['Variante']:
-                    varianti = row['Variante'].split('|')
-                    for variante in varianti: 
-                        line = triple(aspoPlaceHolder,
-                        owlCoords.prefix + 'sameAs',
-                        aspoCoords.prefix + str(variante)) + closeLine
-                        output.write(line)
-                else:
-                    line = triple(aspoPlaceHolder,
-                    owlCoords.prefix + 'sameAs',
-                    aspoCoords.prefix + row['Variante']) + closeLine
-                    output.write(line)
-            
-            output.write('\n')
-        #
-        # Limit number of entries processed (if desired)
-        if (ii > max_entries):
-            break

+ 0 - 407
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_onomastica_datini_corporatebody.py

@@ -1,407 +0,0 @@
-#Parser to convert the Datini onomastics CSV file into TTL format
-
-# Utilities to read/write csv files
-import csv
-# Utilities to handle character encodings
-import unicodedata
-# Ordered Dicts
-from collections import OrderedDict
-
-import json
-import re
-
-
-# OPZIONAL IMPORTS
-
-# For timestamping/simple speed tests
-from datetime import datetime
-# Random number generator
-from random import *
-# System & command line utilities
-import sys
-# Json for the dictionary
-import json
-
-import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'
-export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'
-
-# Custom class to store URIs + related infos for the ontologies/repositories
-
-class RDFcoords:
-    def __init__(self, uri, prefix, code = None):
-        self.uri = uri
-        self.prefix = prefix
-        self.code = code
-
-# Repositories
-aspoCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/>', 'aspo:')
-foafCoords = RDFcoords('<http://xmlns.com/foaf/0.1/>', 'foaf:')
-cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')
-schemaCoords = RDFcoords('<http://schema.org/>', 'schema:')
-personCoords = RDFcoords('<http://www.w3.org/ns/person#>', 'person:')
-nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')
-rdfsCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')
-owlCoords = RDFcoords('<http://www.w3.org/2002/07/owl#>', 'owl:')
-
-# Basic functions for triples / shortened triples in TTL format
-
-def triple(subject, predicate, object1):
-    line = subject + ' ' + predicate + ' ' + object1
-    return line
-
-def doublet(predicate, object1):
-    line = '    ' + predicate + ' ' + object1
-    return line
-
-def singlet(object1):
-    line = '        ' + object1
-    return line
-
-# Line endings in TTL format
-continueLine1 = ' ;\n'
-continueLine2 = ' ,\n'
-closeLine = ' .\n'
-
-def writeTTLHeader(output):
-    output.write('@prefix ' + aspoCoords.prefix + ' ' + aspoCoords.uri + closeLine)
-    output.write('@prefix ' + foafCoords.prefix + ' ' + foafCoords.uri + closeLine)
-    output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)
-    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)
-    output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)
-    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)
-    output.write('@prefix ' + rdfsCoords.prefix + ' ' + rdfsCoords.uri + closeLine)
-    output.write('@prefix ' + owlCoords.prefix + ' ' + owlCoords.uri + closeLine)
-    output.write('\n')
-
-
-filePrefix = 'DATINI - onomastica '
-fileType = '- corporate_body'
-max_entries = 10000000000000
-
-with open(import_dir + filePrefix + fileType + '.csv', newline="") as csv_file, open(
-        export_dir + filePrefix + fileType + '.ttl', 'w') as output:
-    reader = csv.DictReader(csv_file)
-    writeTTLHeader(output)
-    first = True
-    ii = 0
-    for row in reader:
-        # The index ii is used to process a limited number of entries for testing purposes
-        ii = ii + 1
-        if row['entityType'] == 'corporateBody':
-            id_aspo = row['recordId']
-            aspoPlaceHolder = aspoCoords.prefix + id_aspo
-            line = triple(aspoPlaceHolder, 
-                          nsCoords.prefix + 'type', 
-                          cidocCoords.prefix + 'E74_Group') + closeLine
-            output.write(line)
-
-            line = triple(aspoPlaceHolder,
-                          nsCoords.prefix + 'type',
-                          foafCoords.prefix + 'Group') + closeLine
-            output.write(line)
-
-            line = triple(aspoPlaceHolder,
-                          cidocCoords.prefix + 'P1_is_identified_by',
-                          aspoPlaceHolder + "_E42") + closeLine
-            output.write(line)
-
-            line = triple(aspoPlaceHolder + "_E42",
-                          nsCoords.prefix + 'type',
-                          cidocCoords.prefix + 'E42_Identifier') + closeLine
-            output.write(line)
-            line = triple(aspoPlaceHolder + "_E42",
-                          rdfsCoords.prefix + 'label',
-                          '\"' + id_aspo + '\"') + closeLine
-            output.write(line)
-
-            line = triple(aspoPlaceHolder,
-                          foafCoords.prefix + 'name',
-                          '\"' + row['nameEntry@normal'].replace("\"", "") + '\"') + closeLine
-            output.write(line)
-
-            line = triple(aspoPlaceHolder,
-                          rdfsCoords.prefix + 'label',
-                          '\"' + row['nameEntry@normal'].replace("\"", "") + '\"') + closeLine
-            output.write(line)
-
-            if row['occupation'] != '' and row['occupation'] != ' ' :
-                occupazioni = []
-                pipe = "|"
-                if pipe in row['occupation']:
-                    occupazioni = row['occupation'].split('|') 
-                    for occupazione in occupazioni:
-                        #Remove all white-space characters:
-                        txt = occupazione
-                        x = re.sub("\n", " ", txt)
-                        y = re.sub("\s\s", "", x)
-                        occ = re.sub(r'[^A-Za-z]','', y)
-                        occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + occ.replace(" ","_") + '>'
-                        line = triple(aspoPlaceHolder,
-                                    schemaCoords.prefix + 'hasOccupation',
-                                    occupationPlaceHolder) + closeLine
-                        output.write(line)
-                        line = triple(occupationPlaceHolder,
-                                    nsCoords.prefix + 'type',
-                                    schemaCoords.prefix + 'Occupation') + closeLine
-                        output.write(line)
-                        line = triple(occupationPlaceHolder,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"' + y + '\"') + closeLine
-                        output.write(line)
-                else:
-                    #Remove all white-space characters:
-                    txt = row['occupation']
-                    x = re.sub("\n", " ", txt)
-                    y = re.sub("\s\s", "", x)
-                    occ = re.sub(r'[^A-Za-z]','', y)
-                    occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + occ.replace(" ","_") + '>'
-                    line = triple(aspoPlaceHolder,
-                                schemaCoords.prefix + 'hasOccupation',
-                                occupationPlaceHolder) + closeLine
-                    output.write(line)
-                    line = triple(occupationPlaceHolder,
-                                nsCoords.prefix + 'type',
-                                schemaCoords.prefix + 'Occupation') + closeLine
-                    output.write(line)
-                    line = triple(occupationPlaceHolder,
-                                rdfsCoords.prefix + 'label',
-                                '\"' + y + '\"') + closeLine
-                    output.write(line)
-            
-            if row['tipologia'] != '':
-                line = triple(aspoPlaceHolder,
-                              cidocCoords.prefix + 'P2_has_type',
-                              '\"' + row['tipologia'] + '\"') + closeLine
-                output.write(line)
-            
-            if row['intestatario principale: nome o ID se lo abbiamo'] != '':
-                E13placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['recordId'] + '_S_' + row['intestatario principale: nome o ID se lo abbiamo'].replace(' ', '_') +  ">"
-                line = triple(E13placeHolder, 
-                nsCoords.prefix + 'type', 
-                cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine
-                output.write(line)
-                line = triple(E13placeHolder, cidocCoords.prefix + 'P141_assigned', aspoPlaceHolder) + closeLine
-                output.write(line)
-                line = triple(E13placeHolder,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"Relazione: ' + row['intestatario principale: nome o ID se lo abbiamo'] + ' intestatario principale del gruppo ' + row['recordId'] + '\"') + closeLine
-                output.write(line)
-                E55placeHolder = '<http://www.archiviodistato.prato.it/socio_principale>'                                
-                line = triple(E13placeHolder, cidocCoords.prefix + 'P42_assigned', E55placeHolder) + closeLine
-                output.write(line)
-                line = triple(E55placeHolder, rdfsCoords.prefix + 'label', '\"Socio\"') + closeLine
-                output.write(line)
-                line = triple(E55placeHolder, cidocCoords.prefix + 'P3_has_note', '\"Intestatario principale\"') + closeLine
-                output.write(line)
-                id = row['intestatario principale: nome o ID se lo abbiamo']                
-                if re.match(r'IT-ASPO', id):
-                    socioid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['intestatario principale: nome o ID se lo abbiamo']+ ">"
-                    line = triple(aspoPlaceHolder, cidocCoords.prefix + 'P107_has_current_or_former_member', socioid) + closeLine
-                    output.write(line)
-                    line = triple(socioid, cidocCoords.prefix + 'P141_assigned', E13placeHolder) + closeLine
-                    output.write(line)
-                else:
-                    socionoid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['intestatario principale: nome o ID se lo abbiamo'].replace(' ', '_').lower() + ">"
-                    line = triple(aspoPlaceHolder, cidocCoords.prefix + 'P107_has_current_or_former_member', socionoid) + closeLine
-                    output.write(line)
-                    line = triple(socionoid, cidocCoords.prefix + 'P141_assigned', E13placeHolder) + closeLine
-                    output.write(line)
-                    line = triple(socionoid,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"' + row['intestatario principale: nome o ID se lo abbiamo'] + '\"') + closeLine
-                    output.write(line)
-                    
-                    line = triple(socionoid, 
-                          nsCoords.prefix + 'type', 
-                          cidocCoords.prefix + 'E21_Person') + closeLine
-                    output.write(line)
-
-                    line = triple(socionoid, 
-                                nsCoords.prefix + 'type', 
-                                personCoords.prefix + 'Person') + closeLine
-                    output.write(line)
-
-                    line = triple(socionoid,
-                                nsCoords.prefix + 'type',
-                                foafCoords.prefix + 'person') + closeLine
-                    output.write(line)
-
-            if row['socio 2'] != '':
-                E13placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['recordId'] + '_S_' + row['socio 2'].replace(' ', '_') +  ">"
-                line = triple(E13placeHolder, 
-                nsCoords.prefix + 'type', 
-                cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine
-                output.write(line)
-                line = triple(E13placeHolder, cidocCoords.prefix + 'P141_assigned', aspoPlaceHolder) + closeLine
-                output.write(line)
-                line = triple(E13placeHolder,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"Relazione: ' + row['socio 2'] + ' socio del gruppo ' + row['recordId'] + '\"') + closeLine
-                output.write(line)
-                E55placeHolder = '<http://www.archiviodistato.prato.it/socio>'                                
-                line = triple(E13placeHolder, cidocCoords.prefix + 'P42_assigned', E55placeHolder) + closeLine
-                output.write(line)
-                line = triple(E55placeHolder, rdfsCoords.prefix + 'label', '\"Socio\"') + closeLine
-                output.write(line)
-                id = row['socio 2']                
-                if re.match(r'IT-ASPO', id):
-                    socioid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['socio 2']+ ">"
-                    line = triple(aspoPlaceHolder, cidocCoords.prefix + 'P107_has_current_or_former_member', socioid) + closeLine
-                    output.write(line)
-                    line = triple(socioid, cidocCoords.prefix + 'P141_assigned', E13placeHolder) + closeLine
-                    output.write(line)
-                else:
-                    socionoid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['socio 2'].replace(' ', '_').lower() + ">"
-                    line = triple(aspoPlaceHolder, cidocCoords.prefix + 'P107_has_current_or_former_member', socionoid) + closeLine
-                    output.write(line)
-                    line = triple(socionoid, cidocCoords.prefix + 'P141_assigned', E13placeHolder) + closeLine
-                    output.write(line)
-                    line = triple(socionoid,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"' + row['socio 2'] + '\"') + closeLine
-                    output.write(line)
-                    
-                    line = triple(socionoid, 
-                          nsCoords.prefix + 'type', 
-                          cidocCoords.prefix + 'E21_Person') + closeLine
-                    output.write(line)
-
-                    line = triple(socionoid, 
-                                nsCoords.prefix + 'type', 
-                                personCoords.prefix + 'Person') + closeLine
-                    output.write(line)
-
-                    line = triple(socionoid,
-                                nsCoords.prefix + 'type',
-                                foafCoords.prefix + 'person') + closeLine
-                    output.write(line)            
-            
-            if row['socio 3'] != '':
-                E13placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['recordId'] + '_S_' + row['socio 3'].replace(' ', '_') +  ">"
-                line = triple(E13placeHolder, 
-                nsCoords.prefix + 'type', 
-                cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine
-                output.write(line)
-                line = triple(E13placeHolder, cidocCoords.prefix + 'P141_assigned', aspoPlaceHolder) + closeLine
-                output.write(line)
-                line = triple(E13placeHolder,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"Relazione: ' + row['socio 3'] + ' socio del gruppo ' + row['recordId'] + '\"') + closeLine
-                output.write(line)
-                E55placeHolder = '<http://www.archiviodistato.prato.it/socio>'                                
-                line = triple(E13placeHolder, cidocCoords.prefix + 'P42_assigned', E55placeHolder) + closeLine
-                output.write(line)
-                line = triple(E55placeHolder, rdfsCoords.prefix + 'label', '\"Socio\"') + closeLine
-                output.write(line)
-                id = row['socio 3']                
-                if re.match(r'IT-ASPO', id):
-                    socioid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['socio 3']+ ">"
-                    line = triple(aspoPlaceHolder, cidocCoords.prefix + 'P107_has_current_or_former_member', socioid) + closeLine
-                    output.write(line)
-                    line = triple(socioid, cidocCoords.prefix + 'P141_assigned', E13placeHolder) + closeLine
-                    output.write(line)
-                else:
-                    socionoid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['socio 3'].replace(' ', '_').lower() + ">"
-                    line = triple(aspoPlaceHolder, cidocCoords.prefix + 'P107_has_current_or_former_member', socionoid) + closeLine
-                    output.write(line)
-                    line = triple(socionoid, cidocCoords.prefix + 'P141_assigned', E13placeHolder) + closeLine
-                    output.write(line)
-                    line = triple(socionoid,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"' + row['socio 3'] + '\"') + closeLine
-                    output.write(line)
-                    
-                    line = triple(socionoid, 
-                          nsCoords.prefix + 'type', 
-                          cidocCoords.prefix + 'E21_Person') + closeLine
-                    output.write(line)
-
-                    line = triple(socionoid, 
-                                nsCoords.prefix + 'type', 
-                                personCoords.prefix + 'Person') + closeLine
-                    output.write(line)
-
-                    line = triple(socionoid,
-                                nsCoords.prefix + 'type',
-                                foafCoords.prefix + 'person') + closeLine
-                    output.write(line)            
-            
-            if row['socio 4'] != '':
-                E13placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['recordId'] + '_S_' + row['socio 4'].replace(' ', '_') +  ">"
-                line = triple(E13placeHolder, 
-                nsCoords.prefix + 'type', 
-                cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine
-                output.write(line)
-                line = triple(E13placeHolder, cidocCoords.prefix + 'P141_assigned', aspoPlaceHolder) + closeLine
-                output.write(line)
-                line = triple(E13placeHolder,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"Relazione: ' + row['socio 4'] + ' socio del gruppo ' + row['recordId'] + '\"') + closeLine
-                output.write(line)
-                E55placeHolder = '<http://www.archiviodistato.prato.it/socio>'                                
-                line = triple(E13placeHolder, cidocCoords.prefix + 'P42_assigned', E55placeHolder) + closeLine
-                output.write(line)
-                line = triple(E55placeHolder, rdfsCoords.prefix + 'label', '\"Socio\"') + closeLine
-                output.write(line)
-                id = row['socio 4']                
-                if re.match(r'IT-ASPO', id):
-                    socioid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['socio 4']+ ">"
-                    line = triple(aspoPlaceHolder, cidocCoords.prefix + 'P107_has_current_or_former_member', socioid) + closeLine
-                    output.write(line)
-                    line = triple(socioid, cidocCoords.prefix + 'P141_assigned', E13placeHolder) + closeLine
-                    output.write(line)
-                else:
-                    socionoid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['socio 4'].replace(' ', '_').lower() + ">"
-                    line = triple(aspoPlaceHolder, cidocCoords.prefix + 'P107_has_current_or_former_member', socionoid) + closeLine
-                    output.write(line)
-                    line = triple(socionoid, cidocCoords.prefix + 'P141_assigned', E13placeHolder) + closeLine
-                    output.write(line)
-                    line = triple(socionoid,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"' + row['socio 4'] + '\"') + closeLine
-                    output.write(line)
-                    
-                    line = triple(socionoid, 
-                          nsCoords.prefix + 'type', 
-                          cidocCoords.prefix + 'E21_Person') + closeLine
-                    output.write(line)
-
-                    line = triple(socionoid, 
-                                nsCoords.prefix + 'type', 
-                                personCoords.prefix + 'Person') + closeLine
-                    output.write(line)
-
-                    line = triple(socionoid,
-                                nsCoords.prefix + 'type',
-                                foafCoords.prefix + 'person') + closeLine
-                    output.write(line)            
-            
-            if row['sede operativa'] != '':
-                if row['id sede operativa'] != '':
-                    E13placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['id sede operativa'] + '/SO' +  ">"
-                    line = triple(E13placeHolder, 
-                    nsCoords.prefix + 'type', 
-                    cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine
-                    output.write(line)
-                    line = triple(E13placeHolder, cidocCoords.prefix + 'P141_assigned', aspoPlaceHolder) + closeLine
-                    output.write(line)
-                    line = triple(E13placeHolder,
-                                        rdfsCoords.prefix + 'label',
-                                        '\" ' + row['sede operativa'] + ' sede operativa del gruppo ' + row['recordId'] + '\"') + closeLine
-                    output.write(line)
-                    E55placeHolder = '<http://www.archiviodistato.prato.it/sede_operativa>'                                
-                    line = triple(E13placeHolder, cidocCoords.prefix + 'P42_assigned', E55placeHolder) + closeLine
-                    output.write(line)
-                    line = triple(E55placeHolder, rdfsCoords.prefix + 'label', '\"Sede operativa\"') + closeLine
-                    output.write(line)
-                    e53placeHolder = "<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/" + row['id sede operativa'] + ">" 
-                    line = triple(aspoPlaceHolder, 
-                                    cidocCoords.prefix + 'P74_has_current_or_former_residence',
-                                    e53placeHolder) + closeLine
-                    output.write(line)
-
-            output.write('\n')
-        #
-        # Limit number of entries processed (if desired)
-        if (ii > max_entries):
-            break

+ 0 - 436
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_onomastica_datini_lettere_mani.py

@@ -1,436 +0,0 @@
-#Parser to convert the Datini onomastics CSV file into TTL format
-
-# Utilities to read/write csv files
-import csv
-# Utilities to handle character encodings
-import unicodedata
-# Ordered Dicts
-from collections import OrderedDict
-
-import json
-import re
-
-
-# OPZIONAL IMPORTS
-
-# For timestamping/simple speed tests
-from datetime import datetime
-# Random number generator
-from random import *
-# System & command line utilities
-import sys
-# Json for the dictionary
-import json
-
-import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'
-export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'
-
-# Custom class to store URIs + related infos for the ontologies/repositories
-
-class RDFcoords:
-    def __init__(self, uri, prefix, code = None):
-        self.uri = uri
-        self.prefix = prefix
-        self.code = code
-
-# Repositories
-aspoCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/>', 'aspo:')
-foafCoords = RDFcoords('<http://xmlns.com/foaf/0.1/>', 'foaf:')
-cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')
-schemaCoords = RDFcoords('<http://schema.org/>', 'schema:')
-personCoords = RDFcoords('<http://www.w3.org/ns/person#>', 'person:')
-nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')
-rdfsCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')
-owlCoords = RDFcoords('<http://www.w3.org/2002/07/owl#>', 'owl:')
-
-# Basic functions for triples / shortened triples in TTL format
-
-def triple(subject, predicate, object1):
-    line = subject + ' ' + predicate + ' ' + object1
-    return line
-
-def doublet(predicate, object1):
-    line = '    ' + predicate + ' ' + object1
-    return line
-
-def singlet(object1):
-    line = '        ' + object1
-    return line
-
-# Line endings in TTL format
-continueLine1 = ' ;\n'
-continueLine2 = ' ,\n'
-closeLine = ' .\n'
-
-def writeTTLHeader(output):
-    output.write('@prefix ' + aspoCoords.prefix + ' ' + aspoCoords.uri + closeLine)
-    output.write('@prefix ' + foafCoords.prefix + ' ' + foafCoords.uri + closeLine)
-    output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)
-    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)
-    output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)
-    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)
-    output.write('@prefix ' + rdfsCoords.prefix + ' ' + rdfsCoords.uri + closeLine)
-    output.write('@prefix ' + owlCoords.prefix + ' ' + owlCoords.uri + closeLine)
-    output.write('\n')
-
-
-filePrefix = 'DATINI - onomastica '
-fileType = '- mani'
-max_entries = 10000000000000
-
-with open(import_dir + filePrefix + fileType + '.csv', newline="") as csv_file, open(
-        export_dir + filePrefix + fileType + '.ttl', 'w') as output:
-    reader = csv.DictReader(csv_file)
-    writeTTLHeader(output)
-    first = True
-    ii = 0
-    for row in reader:
-        # The index ii is used to process a limited number of entries for testing purposes
-        ii = ii + 1
-        id_aspo = row['recordId']
-        aspoPlaceHolder = aspoCoords.prefix + id_aspo
-        line = triple(aspoPlaceHolder, 
-                          nsCoords.prefix + 'type', 
-                          cidocCoords.prefix + 'E74_Group') + closeLine
-        output.write(line)
-
-        line = triple(aspoPlaceHolder,
-                          nsCoords.prefix + 'type',
-                          foafCoords.prefix + 'Group') + closeLine
-        output.write(line)
-
-        line = triple(aspoPlaceHolder,
-                          cidocCoords.prefix + 'P2_has_type',
-                          '\"Gruppo scrittura lettera\"') + closeLine
-        output.write(line)
-
-        line = triple(aspoPlaceHolder,
-                          cidocCoords.prefix + 'P1_is_identified_by',
-                          aspoPlaceHolder + "_E42") + closeLine
-        output.write(line)
-
-        line = triple(aspoPlaceHolder + "_E42",
-                          nsCoords.prefix + 'type',
-                          cidocCoords.prefix + 'E42_Identifier') + closeLine
-        output.write(line)
-
-        line = triple(aspoPlaceHolder + "_E42",
-                          rdfsCoords.prefix + 'label',
-                          '\"' + id_aspo + '\"') + closeLine
-        output.write(line)
-
-        line = triple(aspoPlaceHolder,
-                          foafCoords.prefix + 'name',
-                          '\"' + row['nameEntry@normal'].replace("\"", "") + '\"') + closeLine
-        output.write(line)
-
-        line = triple(aspoPlaceHolder,
-                          rdfsCoords.prefix + 'label',
-                          '\"' + row['nameEntry@normal'].replace("\"", "") + '\"') + closeLine
-        output.write(line)
-
-        if row['ID prima persona'] != '':
-                E13placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['recordId'] + '_ML_' + row['ID prima persona'].replace(' ', '_') +  ">"
-                line = triple(E13placeHolder, 
-                nsCoords.prefix + 'type', 
-                cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine
-                output.write(line)
-                line = triple(E13placeHolder, cidocCoords.prefix + 'P141_assigned', aspoPlaceHolder) + closeLine
-                output.write(line)
-                line = triple(E13placeHolder,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"Relazione: ' + row['ID prima persona'] + ' socio del gruppo scrittura lettera ' + row['recordId'] + '\"') + closeLine
-                output.write(line)
-                E55placeHolder = '<http://www.archiviodistato.prato.it/manilettere>'                                
-                line = triple(E13placeHolder, cidocCoords.prefix + 'P42_assigned', E55placeHolder) + closeLine
-                output.write(line)
-                line = triple(E55placeHolder, rdfsCoords.prefix + 'label', '\"Mano lettera\"') + closeLine
-                output.write(line)
-                id = row['ID prima persona']                
-                if re.match(r'IT-ASPO', id):
-                    socioid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['ID prima persona']+ ">"
-                    line = triple(aspoPlaceHolder, cidocCoords.prefix + 'P107_has_current_or_former_member', socioid) + closeLine
-                    output.write(line)
-                    line = triple(socioid, cidocCoords.prefix + 'P141_assigned', E13placeHolder) + closeLine
-                    output.write(line)
-                else:
-                    socionoid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['ID prima persona'].replace(' ', '_').lower() + ">"
-                    line = triple(aspoPlaceHolder, cidocCoords.prefix + 'P107_has_current_or_former_member', socionoid) + closeLine
-                    output.write(line)
-                    line = triple(socionoid, cidocCoords.prefix + 'P141_assigned', E13placeHolder) + closeLine
-                    output.write(line)
-                    line = triple(socionoid,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"' + row['ID prima persona'] + '\"') + closeLine
-                    output.write(line)
-                    
-                    line = triple(socionoid, 
-                          nsCoords.prefix + 'type', 
-                          cidocCoords.prefix + 'E21_Person') + closeLine
-                    output.write(line)
-
-                    line = triple(socionoid, 
-                                nsCoords.prefix + 'type', 
-                                personCoords.prefix + 'Person') + closeLine
-                    output.write(line)
-
-                    line = triple(socionoid,
-                                nsCoords.prefix + 'type',
-                                foafCoords.prefix + 'person') + closeLine
-                    output.write(line)            
-            
-        if row['ID seconda persona'] != '':
-                E13placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['recordId'] + '_ML_' + row['ID seconda persona'].replace(' ', '_') +  ">"
-                line = triple(E13placeHolder, 
-                nsCoords.prefix + 'type', 
-                cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine
-                output.write(line)
-                line = triple(E13placeHolder, cidocCoords.prefix + 'P141_assigned', aspoPlaceHolder) + closeLine
-                output.write(line)
-                line = triple(E13placeHolder,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"Relazione: ' + row['ID seconda persona'] + ' socio del gruppo scrittura lettera ' + row['recordId'] + '\"') + closeLine
-                output.write(line)
-                E55placeHolder = '<http://www.archiviodistato.prato.it/manilettere>'                                
-                line = triple(E13placeHolder, cidocCoords.prefix + 'P42_assigned', E55placeHolder) + closeLine
-                output.write(line)
-                line = triple(E55placeHolder, rdfsCoords.prefix + 'label', '\"Mano lettera\"') + closeLine
-                output.write(line)
-                id = row['ID seconda persona']                
-                if re.match(r'IT-ASPO', id):
-                    socioid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['ID seconda persona']+ ">"
-                    line = triple(aspoPlaceHolder, cidocCoords.prefix + 'P107_has_current_or_former_member', socioid) + closeLine
-                    output.write(line)
-                    line = triple(socioid, cidocCoords.prefix + 'P141_assigned', E13placeHolder) + closeLine
-                    output.write(line)
-                else:
-                    socionoid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['ID seconda persona'].replace(' ', '_').lower() + ">"
-                    line = triple(aspoPlaceHolder, cidocCoords.prefix + 'P107_has_current_or_former_member', socionoid) + closeLine
-                    output.write(line)
-                    line = triple(socionoid, cidocCoords.prefix + 'P141_assigned', E13placeHolder) + closeLine
-                    output.write(line)
-                    line = triple(socionoid,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"' + row['ID seconda persona'] + '\"') + closeLine
-                    output.write(line)
-                    
-                    line = triple(socionoid, 
-                          nsCoords.prefix + 'type', 
-                          cidocCoords.prefix + 'E21_Person') + closeLine
-                    output.write(line)
-
-                    line = triple(socionoid, 
-                                nsCoords.prefix + 'type', 
-                                personCoords.prefix + 'Person') + closeLine
-                    output.write(line)
-
-                    line = triple(socionoid,
-                                nsCoords.prefix + 'type',
-                                foafCoords.prefix + 'person') + closeLine
-                    output.write(line)            
-
-        if row['ID terza persona'] != '':
-                E13placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['recordId'] + '_ML_' + row['ID terza persona'].replace(' ', '_') +  ">"
-                line = triple(E13placeHolder, 
-                nsCoords.prefix + 'type', 
-                cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine
-                output.write(line)
-                line = triple(E13placeHolder, cidocCoords.prefix + 'P141_assigned', aspoPlaceHolder) + closeLine
-                output.write(line)
-                line = triple(E13placeHolder,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"Relazione: ' + row['ID terza persona'] + ' socio del gruppo scrittura lettera ' + row['recordId'] + '\"') + closeLine
-                output.write(line)
-                E55placeHolder = '<http://www.archiviodistato.prato.it/manilettere>'                                
-                line = triple(E13placeHolder, cidocCoords.prefix + 'P42_assigned', E55placeHolder) + closeLine
-                output.write(line)
-                line = triple(E55placeHolder, rdfsCoords.prefix + 'label', '\"Mano lettera\"') + closeLine
-                output.write(line)
-                id = row['ID terza persona']                
-                if re.match(r'IT-ASPO', id):
-                    socioid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['ID terza persona']+ ">"
-                    line = triple(aspoPlaceHolder, cidocCoords.prefix + 'P107_has_current_or_former_member', socioid) + closeLine
-                    output.write(line)
-                    line = triple(socioid, cidocCoords.prefix + 'P141_assigned', E13placeHolder) + closeLine
-                    output.write(line)
-                else:
-                    socionoid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['ID terza persona'].replace(' ', '_').lower() + ">"
-                    line = triple(aspoPlaceHolder, cidocCoords.prefix + 'P107_has_current_or_former_member', socionoid) + closeLine
-                    output.write(line)
-                    line = triple(socionoid, cidocCoords.prefix + 'P141_assigned', E13placeHolder) + closeLine
-                    output.write(line)
-                    line = triple(socionoid,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"' + row['ID terza persona'] + '\"') + closeLine
-                    output.write(line)
-                    
-                    line = triple(socionoid, 
-                          nsCoords.prefix + 'type', 
-                          cidocCoords.prefix + 'E21_Person') + closeLine
-                    output.write(line)
-
-                    line = triple(socionoid, 
-                                nsCoords.prefix + 'type', 
-                                personCoords.prefix + 'Person') + closeLine
-                    output.write(line)
-
-                    line = triple(socionoid,
-                                nsCoords.prefix + 'type',
-                                foafCoords.prefix + 'person') + closeLine
-                    output.write(line)            
-
-        if row['ID quarta persona'] != '':
-                E13placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['recordId'] + '_ML_' + row['ID quarta persona'].replace(' ', '_') +  ">"
-                line = triple(E13placeHolder, 
-                nsCoords.prefix + 'type', 
-                cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine
-                output.write(line)
-                line = triple(E13placeHolder, cidocCoords.prefix + 'P141_assigned', aspoPlaceHolder) + closeLine
-                output.write(line)
-                line = triple(E13placeHolder,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"Relazione: ' + row['ID quarta persona'] + ' socio del gruppo scrittura lettera ' + row['recordId'] + '\"') + closeLine
-                output.write(line)
-                E55placeHolder = '<http://www.archiviodistato.prato.it/manilettere>'                                
-                line = triple(E13placeHolder, cidocCoords.prefix + 'P42_assigned', E55placeHolder) + closeLine
-                output.write(line)
-                line = triple(E55placeHolder, rdfsCoords.prefix + 'label', '\"Mano lettera\"') + closeLine
-                output.write(line)
-                id = row['ID quarta persona']                
-                if re.match(r'IT-ASPO', id):
-                    socioid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['ID quarta persona']+ ">"
-                    line = triple(aspoPlaceHolder, cidocCoords.prefix + 'P107_has_current_or_former_member', socioid) + closeLine
-                    output.write(line)
-                    line = triple(socioid, cidocCoords.prefix + 'P141_assigned', E13placeHolder) + closeLine
-                    output.write(line)
-                else:
-                    socionoid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['ID quarta persona'].replace(' ', '_').lower() + ">"
-                    line = triple(aspoPlaceHolder, cidocCoords.prefix + 'P107_has_current_or_former_member', socionoid) + closeLine
-                    output.write(line)
-                    line = triple(socionoid, cidocCoords.prefix + 'P141_assigned', E13placeHolder) + closeLine
-                    output.write(line)
-                    line = triple(socionoid,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"' + row['ID quarta persona'] + '\"') + closeLine
-                    output.write(line)
-                    
-                    line = triple(socionoid, 
-                          nsCoords.prefix + 'type', 
-                          cidocCoords.prefix + 'E21_Person') + closeLine
-                    output.write(line)
-
-                    line = triple(socionoid, 
-                                nsCoords.prefix + 'type', 
-                                personCoords.prefix + 'Person') + closeLine
-                    output.write(line)
-
-                    line = triple(socionoid,
-                                nsCoords.prefix + 'type',
-                                foafCoords.prefix + 'person') + closeLine
-                    output.write(line)            
-
-        if row['ID quinta persona'] != '':
-                E13placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['recordId'] + '_ML_' + row['ID quinta persona'].replace(' ', '_') +  ">"
-                line = triple(E13placeHolder, 
-                nsCoords.prefix + 'type', 
-                cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine
-                output.write(line)
-                line = triple(E13placeHolder, cidocCoords.prefix + 'P141_assigned', aspoPlaceHolder) + closeLine
-                output.write(line)
-                line = triple(E13placeHolder,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"Relazione: ' + row['ID quinta persona'] + ' socio del gruppo scrittura lettera ' + row['recordId'] + '\"') + closeLine
-                output.write(line)
-                E55placeHolder = '<http://www.archiviodistato.prato.it/manilettere>'                                
-                line = triple(E13placeHolder, cidocCoords.prefix + 'P42_assigned', E55placeHolder) + closeLine
-                output.write(line)
-                line = triple(E55placeHolder, rdfsCoords.prefix + 'label', '\"Mano lettera\"') + closeLine
-                output.write(line)
-                id = row['ID quinta persona']                
-                if re.match(r'IT-ASPO', id):
-                    socioid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['ID quinta persona']+ ">"
-                    line = triple(aspoPlaceHolder, cidocCoords.prefix + 'P107_has_current_or_former_member', socioid) + closeLine
-                    output.write(line)
-                    line = triple(socioid, cidocCoords.prefix + 'P141_assigned', E13placeHolder) + closeLine
-                    output.write(line)
-                else:
-                    socionoid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['ID quinta persona'].replace(' ', '_').lower() + ">"
-                    line = triple(aspoPlaceHolder, cidocCoords.prefix + 'P107_has_current_or_former_member', socionoid) + closeLine
-                    output.write(line)
-                    line = triple(socionoid, cidocCoords.prefix + 'P141_assigned', E13placeHolder) + closeLine
-                    output.write(line)
-                    line = triple(socionoid,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"' + row['ID quinta persona'] + '\"') + closeLine
-                    output.write(line)
-                    
-                    line = triple(socionoid, 
-                          nsCoords.prefix + 'type', 
-                          cidocCoords.prefix + 'E21_Person') + closeLine
-                    output.write(line)
-
-                    line = triple(socionoid, 
-                                nsCoords.prefix + 'type', 
-                                personCoords.prefix + 'Person') + closeLine
-                    output.write(line)
-
-                    line = triple(socionoid,
-                                nsCoords.prefix + 'type',
-                                foafCoords.prefix + 'person') + closeLine
-                    output.write(line)            
-
-        if row['ID sesta persona'] != '':
-                E13placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['recordId'] + '_ML_' + row['ID sesta persona'].replace(' ', '_') +  ">"
-                line = triple(E13placeHolder, 
-                nsCoords.prefix + 'type', 
-                cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine
-                output.write(line)
-                line = triple(E13placeHolder, cidocCoords.prefix + 'P141_assigned', aspoPlaceHolder) + closeLine
-                output.write(line)
-                line = triple(E13placeHolder,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"Relazione: ' + row['ID sesta persona'] + ' socio del gruppo scrittura lettera ' + row['recordId'] + '\"') + closeLine
-                output.write(line)
-                E55placeHolder = '<http://www.archiviodistato.prato.it/manilettere>'                                
-                line = triple(E13placeHolder, cidocCoords.prefix + 'P42_assigned', E55placeHolder) + closeLine
-                output.write(line)
-                line = triple(E55placeHolder, rdfsCoords.prefix + 'label', '\"Mano lettera\"') + closeLine
-                output.write(line)
-                id = row['ID sesta persona']                
-                if re.match(r'IT-ASPO', id):
-                    socioid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['ID sesta persona']+ ">"
-                    line = triple(aspoPlaceHolder, cidocCoords.prefix + 'P107_has_current_or_former_member', socioid) + closeLine
-                    output.write(line)
-                    line = triple(socioid, cidocCoords.prefix + 'P141_assigned', E13placeHolder) + closeLine
-                    output.write(line)
-                else:
-                    socionoid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['ID sesta persona'].replace(' ', '_').lower() + ">"
-                    line = triple(aspoPlaceHolder, cidocCoords.prefix + 'P107_has_current_or_former_member', socionoid) + closeLine
-                    output.write(line)
-                    line = triple(socionoid, cidocCoords.prefix + 'P141_assigned', E13placeHolder) + closeLine
-                    output.write(line)
-                    line = triple(socionoid,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"' + row['ID sesta persona'] + '\"') + closeLine
-                    output.write(line)
-                    
-                    line = triple(socionoid, 
-                          nsCoords.prefix + 'type', 
-                          cidocCoords.prefix + 'E21_Person') + closeLine
-                    output.write(line)
-
-                    line = triple(socionoid, 
-                                nsCoords.prefix + 'type', 
-                                personCoords.prefix + 'Person') + closeLine
-                    output.write(line)
-
-                    line = triple(socionoid,
-                                nsCoords.prefix + 'type',
-                                foafCoords.prefix + 'person') + closeLine
-                    output.write(line)            
-
-        output.write('\n')
-        #
-        # Limit number of entries processed (if desired)
-        if (ii > max_entries):
-            break

+ 0 - 907
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_onomastica_datini_person.py

@@ -1,907 +0,0 @@
-#Parser to convert the Datini onomastics CSV file into TTL format
-
-# Utilities to read/write csv files
-import csv
-# Utilities to handle character encodings
-import unicodedata
-# Ordered Dicts
-from collections import OrderedDict
-
-import json
-import re
-
-
-# OPZIONAL IMPORTS
-
-# For timestamping/simple speed tests
-from datetime import datetime
-# Random number generator
-from random import *
-# System & command line utilities
-import sys
-# Json for the dictionary
-import json
-
-import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'
-export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'
-
-# Custom class to store URIs + related infos for the ontologies/repositories
-
-class RDFcoords:
-    def __init__(self, uri, prefix, code = None):
-        self.uri = uri
-        self.prefix = prefix
-        self.code = code
-
-# Repositories
-aspoCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/>', 'aspo:')
-foafCoords = RDFcoords('<http://xmlns.com/foaf/0.1/>', 'foaf:')
-cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')
-schemaCoords = RDFcoords('<http://schema.org/>', 'schema:')
-personCoords = RDFcoords('<http://www.w3.org/ns/person#>', 'person:')
-nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')
-rdfsCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')
-owlCoords = RDFcoords('<http://www.w3.org/2002/07/owl#>', 'owl:')
-
-# Basic functions for triples / shortened triples in TTL format
-
-def triple(subject, predicate, object1):
-    line = subject + ' ' + predicate + ' ' + object1
-    return line
-
-def doublet(predicate, object1):
-    line = '    ' + predicate + ' ' + object1
-    return line
-
-def singlet(object1):
-    line = '        ' + object1
-    return line
-
-# Line endings in TTL format
-continueLine1 = ' ;\n'
-continueLine2 = ' ,\n'
-closeLine = ' .\n'
-
-def writeTTLHeader(output):
-    output.write('@prefix ' + aspoCoords.prefix + ' ' + aspoCoords.uri + closeLine)
-    output.write('@prefix ' + foafCoords.prefix + ' ' + foafCoords.uri + closeLine)
-    output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)
-    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)
-    output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)
-    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)
-    output.write('@prefix ' + rdfsCoords.prefix + ' ' + rdfsCoords.uri + closeLine)
-    output.write('@prefix ' + owlCoords.prefix + ' ' + owlCoords.uri + closeLine)
-    output.write('\n')
-
-
-filePrefix = 'DATINI - onomastica '
-fileType = '- persone singole'
-max_entries = 10000000000000
-
-with open(import_dir + filePrefix + fileType + '.csv', newline="") as csv_file, open(
-        export_dir + filePrefix + fileType + '.ttl', 'w') as output:
-    reader = csv.DictReader(csv_file)
-    writeTTLHeader(output)
-    first = True
-    ii = 0
-    for row in reader:
-        # The index ii is used to process a limited number of entries for testing purposes
-        ii = ii + 1
-        if row['entityType'] == 'person':
-            id_aspo = row['recordId']
-            aspoPlaceHolder = aspoCoords.prefix + id_aspo
-            line = triple(aspoPlaceHolder, 
-                          nsCoords.prefix + 'type', 
-                          cidocCoords.prefix + 'E21_Person') + closeLine
-            output.write(line)
-
-            line = triple(aspoPlaceHolder, 
-                          nsCoords.prefix + 'type', 
-                          personCoords.prefix + 'Person') + closeLine
-            output.write(line)
-
-            line = triple(aspoPlaceHolder,
-                          nsCoords.prefix + 'type',
-                          foafCoords.prefix + 'person') + closeLine
-            output.write(line)
-
-            line = triple(aspoPlaceHolder,
-                          cidocCoords.prefix + 'P1_is_identified_by',
-                          aspoPlaceHolder + "_E42") + closeLine
-            output.write(line)
-
-            line = triple(aspoPlaceHolder + "_E42",
-                          nsCoords.prefix + 'type',
-                          cidocCoords.prefix + 'E42_Identifier') + closeLine
-            output.write(line)
-            line = triple(aspoPlaceHolder + "_E42",
-                          rdfsCoords.prefix + 'label',
-                          '\"' + id_aspo + '\"') + closeLine
-            output.write(line)
-
-            line = triple(aspoPlaceHolder,
-                          foafCoords.prefix + 'name',
-                          '\"' + row['nameEntry@normal'] + '\"') + closeLine
-            output.write(line)
-
-            line = triple(aspoPlaceHolder,
-                          rdfsCoords.prefix + 'label',
-                          '\"' + row['nameEntry@normal'] + '\"') + closeLine
-            output.write(line)
-
-            if row['nome proprio'] != '':
-                #Remove all white-space characters:
-                txt = row['nome proprio']
-                x = re.sub(" \n", "", txt)
-                y = re.sub("\s\s", "", x)
-                name = re.sub("\n", "", y)
-                line = triple(aspoPlaceHolder,
-                              foafCoords.prefix + 'givenName',
-                              '\"' + name + '\"') + closeLine
-                output.write(line)
-
-            if row['nome di famiglia'] != '':
-                #Remove all white-space characters:
-                txt = row['nome di famiglia']
-                x = re.sub("\n", " ", txt)
-                y = re.sub("\s\s", "", x)
-                line = triple(aspoPlaceHolder,
-                              foafCoords.prefix + 'familyName',
-                              '\"' + y + '\"') + closeLine
-                output.write(line)
-            
-            if row['Alias'] != '' and row['Alias'] != ' ':
-                #Remove all white-space characters:
-                txt = row['Alias']
-                x = re.sub("\n", " ", txt)
-                y = re.sub("\s\s", "", x)
-                line = triple(aspoPlaceHolder,
-                              schemaCoords.prefix + 'alternateName',
-                              '\"' + y + '\"') + closeLine
-                output.write(line)
-
-            if row['genere'] != '':
-                #Remove all white-space characters:
-                txt = row['genere']
-                x = re.sub("\n", " ", txt)
-                y = re.sub("\s\s", "", x)
-                line = triple(aspoPlaceHolder,
-                              foafCoords.prefix + 'gender',
-                              '\"' + y + '\"') + closeLine
-                output.write(line)
-
-            if row['patronimico/matronimico'] != '':
-                #Remove all white-space characters:
-                txt = row['patronimico/matronimico']
-                x = re.sub("\n", " ", txt)
-                y = re.sub("\s\s", "", x)
-                line = triple(aspoPlaceHolder,
-                              personCoords.prefix + 'patronymicName',
-                              '\"' + y + '\"') + closeLine
-                output.write(line)
-
-            # if row['occupation_1'] != '' and row['occupation_1'] != ' ' :
-            #     occupazioni = []
-            #     pipe = "|"
-            #     if pipe in row['occupation_1']:
-            #         occupazioni = row['occupation_1'].split('|') 
-            #         for occupazione in occupazioni:
-            #             #Remove all white-space characters:
-            #             txt = occupazione
-            #             x = re.sub("\n", " ", txt)
-            #             y = re.sub("\s\s", "", x)
-            #             occ = re.sub(r'[^A-Za-z]','', y)
-            #             occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + occ.replace(" ","_") + '>'
-            #             line = triple(aspoPlaceHolder,
-            #                         schemaCoords.prefix + 'hasOccupation',
-            #                         occupationPlaceHolder) + closeLine
-            #             output.write(line)
-            #             line = triple(occupationPlaceHolder,
-            #                         nsCoords.prefix + 'type',
-            #                         schemaCoords.prefix + 'Occupation') + closeLine
-            #             output.write(line)
-            #             line = triple(occupationPlaceHolder,
-            #                         rdfsCoords.prefix + 'label',
-            #                         '\"' + y + '\"') + closeLine
-            #             output.write(line)
-
-            #             if row['place occupation 1 ENTE'] != '':
-            #                 placeoccupazioni = []
-            #                 pipe = "|"
-            #                 if pipe in row['place occupation 1 ENTE']:
-            #                     placeoccupazioni = row['place occupation 1 ENTE'].split('|') 
-            #                     for placeoccupazione in placeoccupazioni:
-            #                         #Remove all white-space characters:
-            #                         txt = row['place occupation 1 ENTE']
-            #                         x = re.sub("\n", " ", txt)
-            #                         y = re.sub("\s\s", "", x)
-            #                         placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
-            #                         line = triple(aspoPlaceHolder,
-            #                                     schemaCoords.prefix + 'workLocation',
-            #                                     placeoccupationPlaceHolder) + closeLine
-            #                         output.write(line)
-            #                         line = triple(occupationPlaceHolder,
-            #                                     schemaCoords.prefix + 'occupationLocation',
-            #                                     placeoccupationPlaceHolder) + closeLine
-            #                         output.write(line)
-            #                         line = triple(placeoccupationPlaceHolder,
-            #                         rdfsCoords.prefix + 'label',
-            #                         '\"' + y + '\"') + closeLine
-            #                         output.write(line)
-            #                         line = triple(placeoccupationPlaceHolder,
-            #                         nsCoords.prefix + 'type',
-            #                         cidocCoords.prefix + 'E53_Place') + closeLine
-            #                         output.write(line)
-            #                         line = triple(aspoPlaceHolder,
-            #                         schemaCoords.prefix + 'jobTitle',
-            #                         '\"' + row['occupation_1'].lower() + ' presso ' + row['place occupation 1 ENTE'].lower() + '\"') + closeLine
-            #                         output.write(line)
-            #                 else: 
-            #                     placeoccupazioni = row['place occupation 1 ENTE'].split('|') 
-            #                     txt = row['place occupation 1 ENTE']
-            #                     x = re.sub("\n", " ", txt)
-            #                     y = re.sub("\s\s", "", x)
-            #                     placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
-            #                     line = triple(aspoPlaceHolder,
-            #                                     schemaCoords.prefix + 'workLocation',
-            #                                     placeoccupationPlaceHolder) + closeLine
-            #                     output.write(line)
-            #                     line = triple(occupationPlaceHolder,
-            #                                     schemaCoords.prefix + 'occupationLocation',
-            #                                     placeoccupationPlaceHolder) + closeLine
-            #                     output.write(line)
-            #                     line = triple(placeoccupationPlaceHolder,
-            #                         rdfsCoords.prefix + 'label',
-            #                         '\"' + y + '\"') + closeLine
-            #                     output.write(line)
-            #                     line = triple(placeoccupationPlaceHolder,
-            #                         nsCoords.prefix + 'type',
-            #                         cidocCoords.prefix + 'E53_Place') + closeLine
-            #                     output.write(line)
-            #                     line = triple(aspoPlaceHolder,
-            #                         schemaCoords.prefix + 'jobTitle',
-            #                         '\"' + row['occupation_1'].lower() + ' presso ' + row['place occupation 1 ENTE'].lower() + '\"') + closeLine
-            #                     output.write(line)              
-            #     else:
-            #         #Remove all white-space characters:
-            #         txt = row['occupation_1']
-            #         x = re.sub("\n", " ", txt)
-            #         y = re.sub("\s\s", "", x)
-            #         occ = re.sub(r'[^A-Za-z]','', y)
-            #         occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + occ.replace(" ","_") + '>'
-            #         line = triple(aspoPlaceHolder,
-            #                     schemaCoords.prefix + 'hasOccupation',
-            #                     occupationPlaceHolder) + closeLine
-            #         output.write(line)
-            #         line = triple(occupationPlaceHolder,
-            #                     nsCoords.prefix + 'type',
-            #                     schemaCoords.prefix + 'Occupation') + closeLine
-            #         output.write(line)
-            #         line = triple(occupationPlaceHolder,
-            #                     rdfsCoords.prefix + 'label',
-            #                     '\"' + y + '\"') + closeLine
-            #         output.write(line)
-                    
-            #         if row['place occupation 1 ENTE'] != '':
-            #                 placeoccupazioni = []
-            #                 pipe = "|"
-            #                 if pipe in row['place occupation 1 ENTE']:
-            #                     placeoccupazioni = row['place occupation 1 ENTE'].split('|') 
-            #                     for placeoccupazione in placeoccupazioni:
-            #                         #Remove all white-space characters:
-            #                         txt = row['place occupation 1 ENTE']
-            #                         x = re.sub("\n", " ", txt)
-            #                         y = re.sub("\s\s", "", x)
-            #                         placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
-            #                         line = triple(aspoPlaceHolder,
-            #                                     schemaCoords.prefix + 'workLocation',
-            #                                     placeoccupationPlaceHolder) + closeLine
-            #                         output.write(line)
-            #                         line = triple(occupationPlaceHolder,
-            #                                     schemaCoords.prefix + 'occupationLocation',
-            #                                     placeoccupationPlaceHolder) + closeLine
-            #                         output.write(line)
-            #                         line = triple(placeoccupationPlaceHolder,
-            #                         rdfsCoords.prefix + 'label',
-            #                         '\"' + y + '\"') + closeLine
-            #                         output.write(line)
-            #                         line = triple(placeoccupationPlaceHolder,
-            #                         nsCoords.prefix + 'type',
-            #                         cidocCoords.prefix + 'E53_Place') + closeLine
-            #                         output.write(line)
-            #                         line = triple(aspoPlaceHolder,
-            #                         schemaCoords.prefix + 'jobTitle',
-            #                         '\"' + row['occupation_1'].lower() + ' presso ' + row['place occupation 1 ENTE'].lower() + '\"') + closeLine
-            #                         output.write(line)
-            #                 else: 
-            #                     placeoccupazioni = row['place occupation 1 ENTE'].split('|') 
-            #                     txt = row['place occupation 1 ENTE']
-            #                     x = re.sub("\n", " ", txt)
-            #                     y = re.sub("\s\s", "", x)
-            #                     placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
-            #                     line = triple(aspoPlaceHolder,
-            #                                     schemaCoords.prefix + 'workLocation',
-            #                                     placeoccupationPlaceHolder) + closeLine
-            #                     output.write(line)
-            #                     line = triple(occupationPlaceHolder,
-            #                                     schemaCoords.prefix + 'occupationLocation',
-            #                                     placeoccupationPlaceHolder) + closeLine
-            #                     output.write(line)
-            #                     line = triple(placeoccupationPlaceHolder,
-            #                         rdfsCoords.prefix + 'label',
-            #                         '\"' + y + '\"') + closeLine
-            #                     output.write(line)
-            #                     line = triple(placeoccupationPlaceHolder,
-            #                         nsCoords.prefix + 'type',
-            #                         cidocCoords.prefix + 'E53_Place') + closeLine
-            #                     output.write(line)
-            #                     line = triple(aspoPlaceHolder,
-            #                         schemaCoords.prefix + 'jobTitle',
-            #                         '\"' + row['occupation_1'].lower() + ' presso ' + row['place occupation 1 ENTE'].lower() + '\"') + closeLine
-            #                     output.write(line)              
-            
-            # if row['occupation_2'] != '' and row['occupation_2'] != ' ' :
-            #     occupazioni = []
-            #     pipe = "|"
-            #     if pipe in row['occupation_2']:
-            #         occupazioni = row['occupation_2'].split('|') 
-            #         for occupazione in occupazioni:
-            #             #Remove all white-space characters:
-            #             txt = occupazione
-            #             x = re.sub("\n", " ", txt)
-            #             y = re.sub("\s\s", "", x)
-            #             occ = re.sub(r'[^A-Za-z]','', y)
-            #             occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + occ.replace(" ","_") + '>'
-            #             line = triple(aspoPlaceHolder,
-            #                         schemaCoords.prefix + 'hasOccupation',
-            #                         occupationPlaceHolder) + closeLine
-            #             output.write(line)
-            #             line = triple(occupationPlaceHolder,
-            #                         nsCoords.prefix + 'type',
-            #                         schemaCoords.prefix + 'Occupation') + closeLine
-            #             output.write(line)
-            #             line = triple(occupationPlaceHolder,
-            #                         rdfsCoords.prefix + 'label',
-            #                         '\"' + y + '\"') + closeLine
-            #             output.write(line)
-
-            #             if row['place occupation 2 ENTE'] != '':
-            #                 placeoccupazioni = []
-            #                 pipe = "|"
-            #                 if pipe in row['place occupation 2 ENTE']:
-            #                     placeoccupazioni = row['place occupation 2 ENTE'].split('|') 
-            #                     for placeoccupazione in placeoccupazioni:
-            #                         #Remove all white-space characters:
-            #                         txt = row['place occupation 2 ENTE']
-            #                         x = re.sub("\n", " ", txt)
-            #                         y = re.sub("\s\s", "", x)
-            #                         placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
-            #                         line = triple(aspoPlaceHolder,
-            #                                     schemaCoords.prefix + 'workLocation',
-            #                                     placeoccupationPlaceHolder) + closeLine
-            #                         output.write(line)
-            #                         line = triple(occupationPlaceHolder,
-            #                                     schemaCoords.prefix + 'occupationLocation',
-            #                                     placeoccupationPlaceHolder) + closeLine
-            #                         output.write(line)
-            #                         line = triple(placeoccupationPlaceHolder,
-            #                         rdfsCoords.prefix + 'label',
-            #                         '\"' + y + '\"') + closeLine
-            #                         output.write(line)
-            #                         line = triple(placeoccupationPlaceHolder,
-            #                         nsCoords.prefix + 'type',
-            #                         cidocCoords.prefix + 'E53_Place') + closeLine
-            #                         output.write(line)
-            #                         line = triple(aspoPlaceHolder,
-            #                         schemaCoords.prefix + 'jobTitle',
-            #                         '\"' + row['occupation_2'].lower() + ' presso ' + row['place occupation 2 ENTE'].lower() + '\"') + closeLine
-            #                         output.write(line)
-            #                 else: 
-            #                     placeoccupazioni = row['place occupation 2 ENTE'].split('|') 
-            #                     txt = row['place occupation 2 ENTE']
-            #                     x = re.sub("\n", " ", txt)
-            #                     y = re.sub("\s\s", "", x)
-            #                     placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
-            #                     line = triple(aspoPlaceHolder,
-            #                                     schemaCoords.prefix + 'workLocation',
-            #                                     placeoccupationPlaceHolder) + closeLine
-            #                     output.write(line)
-            #                     line = triple(occupationPlaceHolder,
-            #                                     schemaCoords.prefix + 'occupationLocation',
-            #                                     placeoccupationPlaceHolder) + closeLine
-            #                     output.write(line)
-            #                     line = triple(placeoccupationPlaceHolder,
-            #                         rdfsCoords.prefix + 'label',
-            #                         '\"' + y + '\"') + closeLine
-            #                     output.write(line)
-            #                     line = triple(placeoccupationPlaceHolder,
-            #                         nsCoords.prefix + 'type',
-            #                         cidocCoords.prefix + 'E53_Place') + closeLine
-            #                     output.write(line)
-            #                     line = triple(aspoPlaceHolder,
-            #                         schemaCoords.prefix + 'jobTitle',
-            #                         '\"' + row['occupation_2'].lower() + ' presso ' + row['place occupation 2 ENTE'].lower() + '\"') + closeLine
-            #                     output.write(line)              
-            #     else:
-            #         #Remove all white-space characters:
-            #         txt = row['occupation_2']
-            #         x = re.sub("\n", " ", txt)
-            #         y = re.sub("\s\s", "", x)
-            #         occ = re.sub(r'[^A-Za-z]','', y)
-            #         occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + occ.replace(" ","_") + '>'
-            #         line = triple(aspoPlaceHolder,
-            #                     schemaCoords.prefix + 'hasOccupation',
-            #                     occupationPlaceHolder) + closeLine
-            #         output.write(line)
-            #         line = triple(occupationPlaceHolder,
-            #                     nsCoords.prefix + 'type',
-            #                     schemaCoords.prefix + 'Occupation') + closeLine
-            #         output.write(line)
-            #         line = triple(occupationPlaceHolder,
-            #                     rdfsCoords.prefix + 'label',
-            #                     '\"' + y + '\"') + closeLine
-            #         output.write(line)
-                    
-            #         if row['place occupation 2 ENTE'] != '':
-            #                 placeoccupazioni = []
-            #                 pipe = "|"
-            #                 if pipe in row['place occupation 2 ENTE']:
-            #                     placeoccupazioni = row['place occupation 2 ENTE'].split('|') 
-            #                     for placeoccupazione in placeoccupazioni:
-            #                         #Remove all white-space characters:
-            #                         txt = row['place occupation 2 ENTE']
-            #                         x = re.sub("\n", " ", txt)
-            #                         y = re.sub("\s\s", "", x)
-            #                         placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
-            #                         line = triple(aspoPlaceHolder,
-            #                                     schemaCoords.prefix + 'workLocation',
-            #                                     placeoccupationPlaceHolder) + closeLine
-            #                         output.write(line)
-            #                         line = triple(occupationPlaceHolder,
-            #                                     schemaCoords.prefix + 'occupationLocation',
-            #                                     placeoccupationPlaceHolder) + closeLine
-            #                         output.write(line)
-            #                         line = triple(placeoccupationPlaceHolder,
-            #                         rdfsCoords.prefix + 'label',
-            #                         '\"' + y + '\"') + closeLine
-            #                         output.write(line)
-            #                         line = triple(placeoccupationPlaceHolder,
-            #                         nsCoords.prefix + 'type',
-            #                         cidocCoords.prefix + 'E53_Place') + closeLine
-            #                         output.write(line)
-            #                         line = triple(aspoPlaceHolder,
-            #                         schemaCoords.prefix + 'jobTitle',
-            #                         '\"' + row['occupation_2'].lower() + ' presso ' + row['place occupation 2 ENTE'].lower() + '\"') + closeLine
-            #                         output.write(line)
-            #                 else: 
-            #                     placeoccupazioni = row['place occupation 2 ENTE'].split('|') 
-            #                     txt = row['place occupation 2 ENTE']
-            #                     x = re.sub("\n", " ", txt)
-            #                     y = re.sub("\s\s", "", x)
-            #                     placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
-            #                     line = triple(aspoPlaceHolder,
-            #                                     schemaCoords.prefix + 'workLocation',
-            #                                     placeoccupationPlaceHolder) + closeLine
-            #                     output.write(line)
-            #                     line = triple(occupationPlaceHolder,
-            #                                     schemaCoords.prefix + 'occupationLocation',
-            #                                     placeoccupationPlaceHolder) + closeLine
-            #                     output.write(line)
-            #                     line = triple(placeoccupationPlaceHolder,
-            #                         rdfsCoords.prefix + 'label',
-            #                         '\"' + y + '\"') + closeLine
-            #                     output.write(line)
-            #                     line = triple(placeoccupationPlaceHolder,
-            #                         nsCoords.prefix + 'type',
-            #                         cidocCoords.prefix + 'E53_Place') + closeLine
-            #                     output.write(line)
-            #                     line = triple(aspoPlaceHolder,
-            #                         schemaCoords.prefix + 'jobTitle',
-            #                         '\"' + row['occupation_2'].lower() + ' presso ' + row['place occupation 2 ENTE'].lower() + '\"') + closeLine
-            #                     output.write(line)              
-            
-            if (row['avo 1'] != ''):
-                id = row['avo 1']
-                E13placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['avo 1'].replace(' ', '_') + '_AVO1_' + row['recordId'] + ">"
-                line = triple(E13placeHolder, 
-                nsCoords.prefix + 'type', 
-                cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine
-                output.write(line)
-                line = triple(E13placeHolder, cidocCoords.prefix + 'P141_assigned', aspoPlaceHolder) + closeLine
-                output.write(line)
-                line = triple(E13placeHolder,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"Relazione: ' + row['avo 1'] + ' avo di secondo grado di ' + row['recordId'] + '\"') + closeLine
-                output.write(line)
-                if re.match(r'IT-ASPO', id):
-                    relazioneid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['avo 1']+ ">"
-                    #line = triple(aspoPlaceHolder, schemaCoords.prefix + 'relatedTo', relazioneid) + closeLine
-                    #output.write(line)
-                    line = triple(relazioneid, cidocCoords.prefix + 'P141_assigned', E13placeHolder) + closeLine
-                    output.write(line)
-                else:
-                    relazionenoid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['avo 1'].replace(' ', '_').lower()+ ">"
-                    #line = triple(aspoPlaceHolder, schemaCoords.prefix + 'relatedTo', relazionenoid) + closeLine
-                    #output.write(line)
-                    line = triple(relazionenoid, cidocCoords.prefix + 'P141_assigned', E13placeHolder ) + closeLine
-                    output.write(line)
-                    line = triple(relazionenoid,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"' + row['avo 1'] + '\"') + closeLine
-                    output.write(line)
-                    line = triple(relazionenoid, 
-                          nsCoords.prefix + 'type', 
-                          cidocCoords.prefix + 'E21_Person') + closeLine
-                    output.write(line)
-
-                    line = triple(relazionenoid, 
-                                nsCoords.prefix + 'type', 
-                                personCoords.prefix + 'Person') + closeLine
-                    output.write(line)
-
-                    line = triple(relazionenoid,
-                                nsCoords.prefix + 'type',
-                                foafCoords.prefix + 'person') + closeLine
-                    output.write(line)
-                E55placeHolder = '<http://www.archiviodistato.prato.it/avo_secondo_grado>'
-                line = triple(E13placeHolder, cidocCoords.prefix + 'P42_assigned', E55placeHolder) + closeLine
-                output.write(line)
-                line = triple(E55placeHolder,
-                                            rdfsCoords.prefix + 'label',
-                                            '\"Avo di secondo grado\"') + closeLine
-                output.write(line)      
-
-            if (row['avo 2'] != ''):
-                id = row['avo 2']
-                E13placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['avo 2'].replace(' ', '_') + '_AVO2_' + row['recordId'] + ">"
-                line = triple(E13placeHolder, 
-                nsCoords.prefix + 'type', 
-                cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine
-                output.write(line)
-                line = triple(E13placeHolder, cidocCoords.prefix + 'P141_assigned', aspoPlaceHolder) + closeLine
-                output.write(line)
-                line = triple(E13placeHolder,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"Relazione: ' + row['avo 2'] + ' avo di terzo grado di ' + row['recordId'] + '\"') + closeLine
-                output.write(line)
-                if re.match(r'IT-ASPO', id):
-                    relazioneid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['avo 2']+ ">"
-                    #line = triple(aspoPlaceHolder, schemaCoords.prefix + 'relatedTo', relazioneid) + closeLine
-                    #output.write(line)
-                    line = triple(relazioneid, cidocCoords.prefix + 'P141_assigned', E13placeHolder) + closeLine
-                    output.write(line)
-                else:
-                    relazionenoid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['avo 2'].replace(' ', '_').lower()+ ">"
-                    #line = triple(aspoPlaceHolder, schemaCoords.prefix + 'relatedTo', relazionenoid) + closeLine
-                    #output.write(line)
-                    line = triple(relazionenoid, cidocCoords.prefix + 'P141_assigned', E13placeHolder ) + closeLine
-                    output.write(line)
-                    line = triple(relazionenoid,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"' + row['avo 2'] + '\"') + closeLine
-                    output.write(line)
-                    line = triple(relazionenoid, 
-                          nsCoords.prefix + 'type', 
-                          cidocCoords.prefix + 'E21_Person') + closeLine
-                    output.write(line)
-
-                    line = triple(relazionenoid, 
-                                nsCoords.prefix + 'type', 
-                                personCoords.prefix + 'Person') + closeLine
-                    output.write(line)
-
-                    line = triple(relazionenoid,
-                                nsCoords.prefix + 'type',
-                                foafCoords.prefix + 'person') + closeLine
-                    output.write(line)
-                E55placeHolder = '<http://www.archiviodistato.prato.it/avo_terzo_grado>'
-                line = triple(E13placeHolder, cidocCoords.prefix + 'P42_assigned', E55placeHolder) + closeLine
-                output.write(line)
-                line = triple(E55placeHolder,
-                                            rdfsCoords.prefix + 'label',
-                                            '\"Avo di terzo grado\"') + closeLine
-                output.write(line)      
-
-            if row['Qualifica'] != '':
-                qualifiche = []
-                pipe = "|"
-                if pipe in row['Qualifica']:
-                    qualifiche = row['Qualifica'].split('|') 
-                    for qualifica in qualifiche:
-                        #Remove all white-space characters:
-                        txt = qualifica
-                        x = re.sub("\n", " ", txt)
-                        y = re.sub("\s\s", " ", x)
-                        line = triple(aspoPlaceHolder, schemaCoords.prefix + 'honorificPrefix', '\"' + str(y) + '\"') + closeLine
-                        output.write(line)
-                else:
-                #Remove all white-space characters:
-                    txt = row['Qualifica']
-                    x = re.sub("\n", " ", txt)
-                    y = re.sub("\s\s", " ", x)
-                    line = triple(aspoPlaceHolder, schemaCoords.prefix + 'honorificPrefix', '\"' + y + '\"') + closeLine
-                    output.write(line)
-
-            # if row['place_occupation_Qualifica'] != '':
-            #     #Remove all white-space characters:
-            #     txt = row['place_occupation_Qualifica']
-            #     x = re.sub("\n", " ", txt)
-            #     y = re.sub("\s\s", "", x)
-            #     line = triple(aspoPlaceHolder,
-            #                   schemaCoords.prefix + 'workLocation',
-            #                   '\"' + row['place_occupation_Qualifica'].replace('\\','\\\\').replace('"','\\"') + '\"') + closeLine
-            #     output.write(line)
-
-            if row['biogHist p'] != '':
-                #Remove all white-space characters:
-                txt = row['biogHist p']
-                x = re.sub("\n", " ", txt)
-                y = re.sub("\s\s", " ", x)
-                note = re.sub("\"", "", x)
-                line = triple(aspoPlaceHolder,
-                              cidocCoords.prefix + 'P3_has_note',
-                              '\"' + note + '\"') + closeLine
-                output.write(line)
-            
-            if row['Variante'] != '': 
-                varianti = []
-                pipe = "|"
-                if pipe in row['Variante']:
-                    varianti = row['Variante'].split('|')
-                    for variante in varianti: 
-                        line = triple(aspoPlaceHolder,
-                        owlCoords.prefix + 'sameAs',
-                        aspoCoords.prefix + str(variante)) + closeLine
-                        output.write(line)
-                else:
-                    line = triple(aspoPlaceHolder,
-                    owlCoords.prefix + 'sameAs',
-                    aspoCoords.prefix + row['Variante']) + closeLine
-                    output.write(line)
-          
-            if (row['recordID relazione'] != ''):
-                relazioni = []
-                pipe = "|"
-                if pipe in row['recordID relazione']:
-                    relazioni = row['recordID relazione'].split('|')
-                    for relazione in relazioni:
-                        id = relazione
-                        E13placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + relazione.replace(' ', '_') + '_R_' + row['recordId'] + ">"
-                        line = triple(E13placeHolder, 
-                        nsCoords.prefix + 'type', 
-                        cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine
-                        output.write(line)
-                        line = triple(E13placeHolder, cidocCoords.prefix + 'P141_assigned', aspoPlaceHolder) + closeLine
-                        output.write(line)
-                        line = triple(E13placeHolder,
-                                            rdfsCoords.prefix + 'label',
-                                            '\"Relazione: di ' + relazione + ' ' + ' con ' + row['recordId'] + '\"') + closeLine
-                        output.write(line)
-                        if re.match(r'IT-ASPO', id):
-                            relazioneid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + relazione + ">"
-                            #line = triple(aspoPlaceHolder, schemaCoords.prefix + 'relatedTo', relazioneid) + closeLine
-                            #output.write(line)
-                            line = triple(relazioneid, cidocCoords.prefix + 'P141_assigned', E13placeHolder) + closeLine
-                            output.write(line)
-                        else:
-                            relazionenoid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + relazione.replace(' ', '_').lower()+ ">"
-                            #line = triple(aspoPlaceHolder, schemaCoords.prefix + 'relatedTo', relazionenoid) + closeLine
-                            #output.write(line)
-                            line = triple(relazionenoid, cidocCoords.prefix + 'P141_assigned', E13placeHolder ) + closeLine
-                            output.write(line)
-                            cleanlabel = relazione.rstrip()
-                            line = triple(relazionenoid,
-                                            rdfsCoords.prefix + 'label',
-                                            '\"' + cleanlabel + '\"') + closeLine
-                            output.write(line)
-                            line = triple(relazionenoid, 
-                                nsCoords.prefix + 'type', 
-                                cidocCoords.prefix + 'E21_Person') + closeLine
-                            output.write(line)
-
-                            line = triple(relazionenoid, 
-                                        nsCoords.prefix + 'type', 
-                                        personCoords.prefix + 'Person') + closeLine
-                            output.write(line)
-
-                            line = triple(relazionenoid,
-                                        nsCoords.prefix + 'type',
-                                        foafCoords.prefix + 'person') + closeLine
-                            output.write(line)
-                        if (row['nome relazione'] != ''):      
-                                relazioni = []
-                                pipe = "|" 
-                                if pipe in row['nome relazione']:
-                                    relazioni = row['nome relazione'].split('|') 
-                                    for relazione in relazioni:
-                                        #Remove all white-space characters:
-                                        txt = relazione
-                                        x = re.sub("\n", " ", txt)
-                                        y = re.sub("\s\s", "", x)
-                                        rel = re.sub(r'[^A-Za-z]','', y)
-                                        cleanlabel = rel.rstrip().lstrip()
-                                        E55placeHolder = '<http://www.archiviodistato.prato.it/relation_' + cleanlabel.replace(" ","") + '>'
-                                        line = triple(E13placeHolder, cidocCoords.prefix + 'P42_assigned', E55placeHolder) + closeLine
-                                        output.write(line)
-                                        line = triple(E55placeHolder,
-                                                    rdfsCoords.prefix + 'label',
-                                                    '\"' + cleanlabel + '\"') + closeLine
-                                        output.write(line)
-                                else:
-                                    cleanlabel = row['nome relazione'].rstrip().lstrip()
-                                    E55placeHolder = '<http://www.archiviodistato.prato.it/relation_' + cleanlabel.replace(' ', '') + '>'
-                                    line = triple(E13placeHolder, cidocCoords.prefix + 'P42_assigned', E55placeHolder) + closeLine
-                                    output.write(line)
-                                    line = triple(E55placeHolder,
-                                                rdfsCoords.prefix + 'label',
-                                                '\"' + cleanlabel + '\"') + closeLine
-                                    output.write(line)
-
-                else:
-                        relazione = row['recordID relazione']
-                        id = relazione
-                        E13placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + relazione.replace(' ', '_') + '_R_' + row['recordId'] + ">"
-                        line = triple(E13placeHolder, 
-                        nsCoords.prefix + 'type', 
-                        cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine
-                        output.write(line)
-                        line = triple(E13placeHolder, cidocCoords.prefix + 'P141_assigned', aspoPlaceHolder) + closeLine
-                        output.write(line)
-                        line = triple(E13placeHolder,
-                                            rdfsCoords.prefix + 'label',
-                                            '\"Relazione: di ' + relazione + ' ' + ' con ' + row['recordId'] + '\"') + closeLine
-                        output.write(line)
-                        if re.match(r'IT-ASPO', id):
-                            relazioneid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + relazione + ">"
-                            #line = triple(aspoPlaceHolder, schemaCoords.prefix + 'relatedTo', relazioneid) + closeLine
-                            #output.write(line)
-                            line = triple(relazioneid, cidocCoords.prefix + 'P141_assigned', E13placeHolder) + closeLine
-                            output.write(line)
-                        else:
-                            relazionenoid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + relazione.replace(' ', '_').lower()+ ">"
-                            #line = triple(aspoPlaceHolder, schemaCoords.prefix + 'relatedTo', relazionenoid) + closeLine
-                            #output.write(line)
-                            line = triple(relazionenoid, cidocCoords.prefix + 'P141_assigned', E13placeHolder ) + closeLine
-                            output.write(line)
-                            cleanlabel = relazione.rstrip()
-                            line = triple(relazionenoid,
-                                            rdfsCoords.prefix + 'label',
-                                            '\"' + cleanlabel + '\"') + closeLine
-                            output.write(line)
-                            line = triple(relazionenoid, 
-                                nsCoords.prefix + 'type', 
-                                cidocCoords.prefix + 'E21_Person') + closeLine
-                            output.write(line)
-
-                            line = triple(relazionenoid, 
-                                        nsCoords.prefix + 'type', 
-                                        personCoords.prefix + 'Person') + closeLine
-                            output.write(line)
-
-                            line = triple(relazionenoid,
-                                        nsCoords.prefix + 'type',
-                                        foafCoords.prefix + 'person') + closeLine
-                            output.write(line)
-                        if (row['nome relazione'] != ''):      
-                                relazioni = []
-                                pipe = "|" 
-                                if pipe in row['nome relazione']:
-                                    relazioni = row['nome relazione'].split('|') 
-                                    for relazione in relazioni:
-                                        #Remove all white-space characters:
-                                        txt = relazione
-                                        x = re.sub("\n", " ", txt)
-                                        y = re.sub("\s\s", "", x)
-                                        rel = re.sub(r'[^A-Za-z]','', y)
-                                        cleanlabel = rel.rstrip().lstrip()
-                                        E55placeHolder = '<http://www.archiviodistato.prato.it/relation_' + cleanlabel.replace(" ","") + '>'
-                                        line = triple(E13placeHolder, cidocCoords.prefix + 'P42_assigned', E55placeHolder) + closeLine
-                                        output.write(line)
-                                        line = triple(E55placeHolder,
-                                                    rdfsCoords.prefix + 'label',
-                                                    '\"' + cleanlabel + '\"') + closeLine
-                                        output.write(line)
-                                else:
-                                    cleanlabel = row['nome relazione'].rstrip().lstrip()
-                                    E55placeHolder = '<http://www.archiviodistato.prato.it/relation_' + cleanlabel.replace(' ', '') + '>'
-                                    line = triple(E13placeHolder, cidocCoords.prefix + 'P42_assigned', E55placeHolder) + closeLine
-                                    output.write(line)
-                                    line = triple(E55placeHolder,
-                                                rdfsCoords.prefix + 'label',
-                                                '\"' + cleanlabel + '\"') + closeLine
-                                    output.write(line)
-
-            if (row['recordID relazione2'] != ''):
-                id = row['recordID relazione2']
-                E13placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['recordID relazione2'].replace(' ', '_') + '_R_' + row['recordId'] + ">"
-                line = triple(E13placeHolder, 
-                nsCoords.prefix + 'type', 
-                cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine
-                output.write(line)
-                line = triple(E13placeHolder, cidocCoords.prefix + 'P141_assigned', aspoPlaceHolder) + closeLine
-                output.write(line)
-                line = triple(E13placeHolder, rdfsCoords.prefix + 'label', '\"Relazione: ' + row['recordID relazione2']  + ' ' + row['nome relazione2'] + ' di ' + row['recordId'] + '\"') + closeLine
-                output.write(line)
-                if re.match(r'IT-ASPO', id):
-                    relazioneid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['recordID relazione2']+ ">"
-                    #line = triple(aspoPlaceHolder, schemaCoords.prefix + 'relatedTo', relazioneid) + closeLine
-                    #output.write(line)
-                    line = triple(relazioneid, cidocCoords.prefix + 'P141_assigned', E13placeHolder ) + closeLine
-                    output.write(line)
-                else:
-                    relazionenoid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['recordID relazione2'].replace(' ', '_').lower()+ ">"
-                    #line = triple(aspoPlaceHolder,schemaCoords.prefix + 'relatedTo', relazionenoid) + closeLine
-                    #output.write(line)
-                    line = triple(relazionenoid, cidocCoords.prefix + 'P141_assigned', E13placeHolder ) + closeLine
-                    output.write(line)
-                    line = triple(relazionenoid,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"' + row['recordID relazione2'] + '\"') + closeLine
-                    output.write(line)
-                    line = triple(relazionenoid, 
-                          nsCoords.prefix + 'type', 
-                          cidocCoords.prefix + 'E21_Person') + closeLine
-                    output.write(line)
-
-                    line = triple(relazionenoid, 
-                                nsCoords.prefix + 'type', 
-                                personCoords.prefix + 'Person') + closeLine
-                    output.write(line)
-
-                    line = triple(relazionenoid,
-                                nsCoords.prefix + 'type',
-                                foafCoords.prefix + 'person') + closeLine
-                    output.write(line)
-                if (row['nome relazione2'] != ''):
-                        tipologie = []
-                        pipe = "|" 
-                        if pipe in row['nome relazione2']:
-                            relazioni = row['nome relazione2'].split('|') 
-                            for relazione in relazioni:
-                                #Remove all white-space characters:
-                                txt = relazione
-                                x = re.sub("\n", " ", txt)
-                                y = re.sub("\s\s", "", x)
-                                rel = re.sub(r'[^A-Za-z]','', y)
-                                cleanlabel = rel.rstrip().lstrip()
-                                E55placeHolder = '<http://www.archiviodistato.prato.it/relation_' + cleanlabel.replace(" ","") + '>'
-                                line = triple(E13placeHolder, cidocCoords.prefix + 'P42_assigned', E55placeHolder) + closeLine
-                                output.write(line)
-                                line = triple(E55placeHolder,
-                                            rdfsCoords.prefix + 'label',
-                                            '\"' + cleanlabel + '\"') + closeLine
-                                output.write(line)
-                        else:
-                            cleanlabel = row['nome relazione2'].rstrip().lstrip()
-                            E55placeHolder = '<http://www.archiviodistato.prato.it/relation_' + cleanlabel.replace(' ', '') + '>'
-                            line = triple(E13placeHolder, cidocCoords.prefix + 'P42_assigned', E55placeHolder) + closeLine
-                            output.write(line)
-                            line = triple(E55placeHolder,
-                                        rdfsCoords.prefix + 'label',
-                                        '\"' + cleanlabel + '\"') + closeLine
-                            output.write(line)
-            
-            if row['Riferimenti di attribuzione'] != '':
-               #Remove all white-space characters:
-                txt = row['Riferimenti di attribuzione']
-                x = re.sub(" \n", "", txt)
-                y = re.sub("\s\s", "", x)
-                note = re.sub("\"", "", x)
-                e62placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['recordId'] + "/E62" + ">" 
-                line = triple(aspoPlaceHolder,  cidocCoords.prefix + 'P3_has_note', e62placeHolder) + closeLine
-                output.write(line)
-                line = triple(e62placeHolder, rdfsCoords.prefix + 'label', '\"' + note.replace('\\','\\\\').replace('"','\\"')+ '\"') + closeLine
-                output.write(line)
-                line = triple(e62placeHolder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E62_String') + closeLine
-                output.write(line)
-                e55placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['recordId'] + "/E62_E55" + ">" 
-                line = triple(e62placeHolder, cidocCoords.prefix + 'P2_has_type', e55placeHolder) + closeLine
-                output.write(line)
-                line = triple(e55placeHolder, rdfsCoords.prefix + 'label', '\"Riferimenti di attribuzione\"') + closeLine
-                output.write(line)
-            
-            output.write('\n')
-        #
-        # Limit number of entries processed (if desired)
-        if (ii > max_entries):
-            break

+ 0 - 454
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/datini/CSV_to_RDF_onomastica_datini_person_occupation.py

@@ -1,454 +0,0 @@
-#Parser to convert the Datini onomastics CSV file into TTL format
-
-# Utilities to read/write csv files
-import csv
-# Utilities to handle character encodings
-import unicodedata
-# Ordered Dicts
-from collections import OrderedDict
-
-import json
-import re
-
-
-# OPZIONAL IMPORTS
-
-# For timestamping/simple speed tests
-from datetime import datetime
-# Random number generator
-from random import *
-# System & command line utilities
-import sys
-# Json for the dictionary
-import json
-
-import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/datini/'
-export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/datini/'
-
-# Custom class to store URIs + related infos for the ontologies/repositories
-
-class RDFcoords:
-    def __init__(self, uri, prefix, code = None):
-        self.uri = uri
-        self.prefix = prefix
-        self.code = code
-
-# Repositories
-aspoCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/>', 'aspo:')
-foafCoords = RDFcoords('<http://xmlns.com/foaf/0.1/>', 'foaf:')
-cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')
-schemaCoords = RDFcoords('<http://schema.org/>', 'schema:')
-personCoords = RDFcoords('<http://www.w3.org/ns/person#>', 'person:')
-nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')
-rdfsCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')
-owlCoords = RDFcoords('<http://www.w3.org/2002/07/owl#>', 'owl:')
-
-# Basic functions for triples / shortened triples in TTL format
-
-def triple(subject, predicate, object1):
-    line = subject + ' ' + predicate + ' ' + object1
-    return line
-
-def doublet(predicate, object1):
-    line = '    ' + predicate + ' ' + object1
-    return line
-
-def singlet(object1):
-    line = '        ' + object1
-    return line
-
-# Line endings in TTL format
-continueLine1 = ' ;\n'
-continueLine2 = ' ,\n'
-closeLine = ' .\n'
-
-def writeTTLHeader(output):
-    output.write('@prefix ' + aspoCoords.prefix + ' ' + aspoCoords.uri + closeLine)
-    output.write('@prefix ' + foafCoords.prefix + ' ' + foafCoords.uri + closeLine)
-    output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)
-    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)
-    output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)
-    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)
-    output.write('@prefix ' + rdfsCoords.prefix + ' ' + rdfsCoords.uri + closeLine)
-    output.write('@prefix ' + owlCoords.prefix + ' ' + owlCoords.uri + closeLine)
-    output.write('\n')
-
-
-filePrefix = 'association_occupation_DATINI_luoghi_restore'
-fileType = ''
-max_entries = 10000000000000
-
-with open(import_dir + filePrefix + fileType + '.csv', newline="") as csv_file, open(
-        export_dir + filePrefix + fileType + '.ttl', 'w') as output:
-    reader = csv.DictReader(csv_file)
-    writeTTLHeader(output)
-    first = True
-    ii = 0
-    for row in reader:
-        # The index ii is used to process a limited number of entries for testing purposes
-        ii = ii + 1
-        id_aspo = row['recordid']
-        aspoPlaceHolder = aspoCoords.prefix + id_aspo
-
-        if row['occupation_1'] != '' and row['occupation_1'] != ' ' :
-                occupazioni = []
-                pipe = "|"
-                if pipe in row['occupation_1']:
-                    occupazioni = row['occupation_1'].split('|') 
-                    for occupazione in occupazioni:
-                        #Remove all white-space characters:
-                        txt = occupazione
-                        x = re.sub("\n", " ", txt)
-                        y = re.sub("\s\s", "", x)
-                        occ = re.sub(r'[^A-Za-z]','', y)
-                        occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + occ.replace(" ","_") + '>'
-                        line = triple(aspoPlaceHolder,
-                                    schemaCoords.prefix + 'hasOccupation',
-                                    occupationPlaceHolder) + closeLine
-                        output.write(line)
-                        line = triple(occupationPlaceHolder,
-                                    nsCoords.prefix + 'type',
-                                    schemaCoords.prefix + 'Occupation') + closeLine
-                        output.write(line)
-                        line = triple(occupationPlaceHolder,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"' + y + '\"') + closeLine
-                        output.write(line)
-                        if row['ID_ente_1'] != '':
-                            placeoccupationPlaceHolder = '<http://dev.restore.ovi.cnr.it/vocabularies/places/' + row['ID_ente_1'] + '>'
-                            line = triple(aspoPlaceHolder,
-                                                schemaCoords.prefix + 'workLocation',
-                                                placeoccupationPlaceHolder) + closeLine
-                            output.write(line)
-                            line = triple(occupationPlaceHolder,
-                                                schemaCoords.prefix + 'occupationLocation',
-                                                placeoccupationPlaceHolder) + closeLine
-                            output.write(line)
-                        elif row['ID_ente_1'] == '' and row['place occupation 1 ENTE'] != '':
-                            placeoccupazioni = []
-                            pipe = "|"
-                            if pipe in row['place occupation 1 ENTE']:
-                                placeoccupazioni = row['place occupation 1 ENTE'].split('|') 
-                                for placeoccupazione in placeoccupazioni:
-                                    #Remove all white-space characters:
-                                    txt = row['place occupation 1 ENTE']
-                                    x = re.sub("\n", " ", txt)
-                                    y = re.sub("\s\s", "", x)
-                                    placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
-                                    line = triple(aspoPlaceHolder,
-                                                schemaCoords.prefix + 'workLocation',
-                                                placeoccupationPlaceHolder) + closeLine
-                                    output.write(line)
-                                    line = triple(occupationPlaceHolder,
-                                                schemaCoords.prefix + 'occupationLocation',
-                                                placeoccupationPlaceHolder) + closeLine
-                                    output.write(line)
-                                    line = triple(placeoccupationPlaceHolder,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"' + y + '\"') + closeLine
-                                    output.write(line)
-                                    line = triple(placeoccupationPlaceHolder,
-                                    nsCoords.prefix + 'type',
-                                    cidocCoords.prefix + 'E53_Place') + closeLine
-                                    output.write(line)
-                                    line = triple(aspoPlaceHolder,
-                                    schemaCoords.prefix + 'jobTitle',
-                                    '\"' + row['occupation_1'].lower() + ' presso ' + row['place occupation 1 ENTE'].lower() + '\"') + closeLine
-                                    output.write(line)
-                            else: 
-                                placeoccupazioni = row['place occupation 1 ENTE'].split('|') 
-                                txt = row['place occupation 1 ENTE']
-                                x = re.sub("\n", " ", txt)
-                                y = re.sub("\s\s", "", x)
-                                placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
-                                line = triple(aspoPlaceHolder,
-                                                schemaCoords.prefix + 'workLocation',
-                                                placeoccupationPlaceHolder) + closeLine
-                                output.write(line)
-                                line = triple(occupationPlaceHolder,
-                                                schemaCoords.prefix + 'occupationLocation',
-                                                placeoccupationPlaceHolder) + closeLine
-                                output.write(line)
-                                line = triple(placeoccupationPlaceHolder,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"' + y + '\"') + closeLine
-                                output.write(line)
-                                line = triple(placeoccupationPlaceHolder,
-                                    nsCoords.prefix + 'type',
-                                    cidocCoords.prefix + 'E53_Place') + closeLine
-                                output.write(line)
-                                line = triple(aspoPlaceHolder,
-                                    schemaCoords.prefix + 'jobTitle',
-                                    '\"' + row['occupation_1'].lower() + ' presso ' + row['place occupation 1 ENTE'].lower() + '\"') + closeLine
-                                output.write(line)              
-                else:
-                    #Remove all white-space characters:
-                    txt = row['occupation_1']
-                    x = re.sub("\n", " ", txt)
-                    y = re.sub("\s\s", "", x)
-                    occ = re.sub(r'[^A-Za-z]','', y)
-                    occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + occ.replace(" ","_") + '>'
-                    line = triple(aspoPlaceHolder,
-                                schemaCoords.prefix + 'hasOccupation',
-                                occupationPlaceHolder) + closeLine
-                    output.write(line)
-                    line = triple(occupationPlaceHolder,
-                                nsCoords.prefix + 'type',
-                                schemaCoords.prefix + 'Occupation') + closeLine
-                    output.write(line)
-                    line = triple(occupationPlaceHolder,
-                                rdfsCoords.prefix + 'label',
-                                '\"' + y + '\"') + closeLine
-                    output.write(line)
-                    if row['ID_ente_1'] != '':
-                            placeoccupationPlaceHolder = '<http://dev.restore.ovi.cnr.it/vocabularies/places/' + row['ID_ente_1'] + '>'
-                            line = triple(aspoPlaceHolder,
-                                                schemaCoords.prefix + 'workLocation',
-                                                placeoccupationPlaceHolder) + closeLine
-                            output.write(line)
-                            line = triple(occupationPlaceHolder,
-                                                schemaCoords.prefix + 'occupationLocation',
-                                                placeoccupationPlaceHolder) + closeLine
-                            output.write(line)
-                    elif row['ID_ente_1'] == '' and row['place occupation 1 ENTE'] != '':
-                            placeoccupazioni = []
-                            pipe = "|"
-                            if pipe in row['place occupation 1 ENTE']:
-                                placeoccupazioni = row['place occupation 1 ENTE'].split('|') 
-                                for placeoccupazione in placeoccupazioni:
-                                    #Remove all white-space characters:
-                                    txt = row['place occupation 1 ENTE']
-                                    x = re.sub("\n", " ", txt)
-                                    y = re.sub("\s\s", "", x)
-                                    placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
-                                    line = triple(aspoPlaceHolder,
-                                                schemaCoords.prefix + 'workLocation',
-                                                placeoccupationPlaceHolder) + closeLine
-                                    output.write(line)
-                                    line = triple(occupationPlaceHolder,
-                                                schemaCoords.prefix + 'occupationLocation',
-                                                placeoccupationPlaceHolder) + closeLine
-                                    output.write(line)
-                                    line = triple(placeoccupationPlaceHolder,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"' + y + '\"') + closeLine
-                                    output.write(line)
-                                    line = triple(placeoccupationPlaceHolder,
-                                    nsCoords.prefix + 'type',
-                                    cidocCoords.prefix + 'E53_Place') + closeLine
-                                    output.write(line)
-                                    line = triple(aspoPlaceHolder,
-                                    schemaCoords.prefix + 'jobTitle',
-                                    '\"' + row['occupation_1'].lower() + ' presso ' + row['place occupation 1 ENTE'].lower() + '\"') + closeLine
-                                    output.write(line)
-                            else: 
-                                placeoccupazioni = row['place occupation 1 ENTE'].split('|') 
-                                txt = row['place occupation 1 ENTE']
-                                x = re.sub("\n", " ", txt)
-                                y = re.sub("\s\s", "", x)
-                                placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
-                                line = triple(aspoPlaceHolder,
-                                                schemaCoords.prefix + 'workLocation',
-                                                placeoccupationPlaceHolder) + closeLine
-                                output.write(line)
-                                line = triple(occupationPlaceHolder,
-                                                schemaCoords.prefix + 'occupationLocation',
-                                                placeoccupationPlaceHolder) + closeLine
-                                output.write(line)
-                                line = triple(placeoccupationPlaceHolder,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"' + y + '\"') + closeLine
-                                output.write(line)
-                                line = triple(placeoccupationPlaceHolder,
-                                    nsCoords.prefix + 'type',
-                                    cidocCoords.prefix + 'E53_Place') + closeLine
-                                output.write(line)
-                                line = triple(aspoPlaceHolder,
-                                    schemaCoords.prefix + 'jobTitle',
-                                    '\"' + row['occupation_1'].lower() + ' presso ' + row['place occupation 1 ENTE'].lower() + '\"') + closeLine
-                                output.write(line)              
-            
-        if row['occupation_2'] != '' and row['occupation_2'] != ' ' :
-                occupazioni = []
-                pipe = "|"
-                if pipe in row['occupation_2']:
-                    occupazioni = row['occupation_2'].split('|') 
-                    for occupazione in occupazioni:
-                        #Remove all white-space characters:
-                        txt = occupazione
-                        x = re.sub("\n", " ", txt)
-                        y = re.sub("\s\s", "", x)
-                        occ = re.sub(r'[^A-Za-z]','', y)
-                        occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + occ.replace(" ","_") + '>'
-                        line = triple(aspoPlaceHolder,
-                                    schemaCoords.prefix + 'hasOccupation',
-                                    occupationPlaceHolder) + closeLine
-                        output.write(line)
-                        line = triple(occupationPlaceHolder,
-                                    nsCoords.prefix + 'type',
-                                    schemaCoords.prefix + 'Occupation') + closeLine
-                        output.write(line)
-                        line = triple(occupationPlaceHolder,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"' + y + '\"') + closeLine
-                        output.write(line)
-                        
-                        if row['ID_ente_2'] != '':
-                            placeoccupationPlaceHolder = '<http://dev.restore.ovi.cnr.it/vocabularies/places/' + row['ID_ente_2'] + '>'
-                            line = triple(aspoPlaceHolder,
-                                                schemaCoords.prefix + 'workLocation',
-                                                placeoccupationPlaceHolder) + closeLine
-                            output.write(line)
-                            line = triple(occupationPlaceHolder,
-                                                schemaCoords.prefix + 'occupationLocation',
-                                                placeoccupationPlaceHolder) + closeLine
-                            output.write(line)
-                        elif row['place occupation 2 ENTE'] != '':
-                            placeoccupazioni = []
-                            pipe = "|"
-                            if pipe in row['place occupation 2 ENTE']:
-                                placeoccupazioni = row['place occupation 2 ENTE'].split('|') 
-                                for placeoccupazione in placeoccupazioni:
-                                    #Remove all white-space characters:
-                                    txt = row['place occupation 2 ENTE']
-                                    x = re.sub("\n", " ", txt)
-                                    y = re.sub("\s\s", "", x)
-                                    placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
-                                    line = triple(aspoPlaceHolder,
-                                                schemaCoords.prefix + 'workLocation',
-                                                placeoccupationPlaceHolder) + closeLine
-                                    output.write(line)
-                                    line = triple(occupationPlaceHolder,
-                                                schemaCoords.prefix + 'occupationLocation',
-                                                placeoccupationPlaceHolder) + closeLine
-                                    output.write(line)
-                                    line = triple(placeoccupationPlaceHolder,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"' + y + '\"') + closeLine
-                                    output.write(line)
-                                    line = triple(placeoccupationPlaceHolder,
-                                    nsCoords.prefix + 'type',
-                                    cidocCoords.prefix + 'E53_Place') + closeLine
-                                    output.write(line)
-                                    line = triple(aspoPlaceHolder,
-                                    schemaCoords.prefix + 'jobTitle',
-                                    '\"' + row['occupation_2'].lower() + ' presso ' + row['place occupation 2 ENTE'].lower() + '\"') + closeLine
-                                    output.write(line)
-                            else: 
-                                placeoccupazioni = row['place occupation 2 ENTE'].split('|') 
-                                txt = row['place occupation 2 ENTE']
-                                x = re.sub("\n", " ", txt)
-                                y = re.sub("\s\s", "", x)
-                                placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
-                                line = triple(aspoPlaceHolder,
-                                                schemaCoords.prefix + 'workLocation',
-                                                placeoccupationPlaceHolder) + closeLine
-                                output.write(line)
-                                line = triple(occupationPlaceHolder,
-                                                schemaCoords.prefix + 'occupationLocation',
-                                                placeoccupationPlaceHolder) + closeLine
-                                output.write(line)
-                                line = triple(placeoccupationPlaceHolder,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"' + y + '\"') + closeLine
-                                output.write(line)
-                                line = triple(placeoccupationPlaceHolder,
-                                    nsCoords.prefix + 'type',
-                                    cidocCoords.prefix + 'E53_Place') + closeLine
-                                output.write(line)
-                                line = triple(aspoPlaceHolder,
-                                    schemaCoords.prefix + 'jobTitle',
-                                    '\"' + row['occupation_2'].lower() + ' presso ' + row['place occupation 2 ENTE'].lower() + '\"') + closeLine
-                                output.write(line)              
-                else:
-                    #Remove all white-space characters:
-                    txt = row['occupation_2']
-                    x = re.sub("\n", " ", txt)
-                    y = re.sub("\s\s", "", x)
-                    occ = re.sub(r'[^A-Za-z]','', y)
-                    occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + occ.replace(" ","_") + '>'
-                    line = triple(aspoPlaceHolder,
-                                schemaCoords.prefix + 'hasOccupation',
-                                occupationPlaceHolder) + closeLine
-                    output.write(line)
-                    line = triple(occupationPlaceHolder,
-                                nsCoords.prefix + 'type',
-                                schemaCoords.prefix + 'Occupation') + closeLine
-                    output.write(line)
-                    line = triple(occupationPlaceHolder,
-                                rdfsCoords.prefix + 'label',
-                                '\"' + y + '\"') + closeLine
-                    output.write(line)
-                    if row['ID_ente_2'] != '':
-                            placeoccupationPlaceHolder = '<http://dev.restore.ovi.cnr.it/vocabularies/places/' + row['ID_ente_2'] + '>'
-                            line = triple(aspoPlaceHolder,
-                                                schemaCoords.prefix + 'workLocation',
-                                                placeoccupationPlaceHolder) + closeLine
-                            output.write(line)
-                            line = triple(occupationPlaceHolder,
-                                                schemaCoords.prefix + 'occupationLocation',
-                                                placeoccupationPlaceHolder) + closeLine
-                            output.write(line)
-                    elif row['ID_ente_2'] == '' and row['place occupation 2 ENTE'] != '':
-                        placeoccupazioni = []
-                        pipe = "|"
-                        if pipe in row['place occupation 2 ENTE']:
-                                placeoccupazioni = row['place occupation 2 ENTE'].split('|') 
-                                for placeoccupazione in placeoccupazioni:
-                                    #Remove all white-space characters:
-                                    txt = row['place occupation 2 ENTE']
-                                    x = re.sub("\n", " ", txt)
-                                    y = re.sub("\s\s", "", x)
-                                    placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
-                                    line = triple(aspoPlaceHolder,
-                                                schemaCoords.prefix + 'workLocation',
-                                                placeoccupationPlaceHolder) + closeLine
-                                    output.write(line)
-                                    line = triple(occupationPlaceHolder,
-                                                schemaCoords.prefix + 'occupationLocation',
-                                                placeoccupationPlaceHolder) + closeLine
-                                    output.write(line)
-                                    line = triple(placeoccupationPlaceHolder,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"' + y + '\"') + closeLine
-                                    output.write(line)
-                                    line = triple(placeoccupationPlaceHolder,
-                                    nsCoords.prefix + 'type',
-                                    cidocCoords.prefix + 'E53_Place') + closeLine
-                                    output.write(line)
-                                    line = triple(aspoPlaceHolder,
-                                    schemaCoords.prefix + 'jobTitle',
-                                    '\"' + row['occupation_2'].lower() + ' presso ' + row['place occupation 2 ENTE'].lower() + '\"') + closeLine
-                                    output.write(line)
-                        else: 
-                                placeoccupazioni = row['place occupation 2 ENTE'].split('|') 
-                                txt = row['place occupation 2 ENTE']
-                                x = re.sub("\n", " ", txt)
-                                y = re.sub("\s\s", "", x)
-                                placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
-                                line = triple(aspoPlaceHolder,
-                                                schemaCoords.prefix + 'workLocation',
-                                                placeoccupationPlaceHolder) + closeLine
-                                output.write(line)
-                                line = triple(occupationPlaceHolder,
-                                                schemaCoords.prefix + 'occupationLocation',
-                                                placeoccupationPlaceHolder) + closeLine
-                                output.write(line)
-                                line = triple(placeoccupationPlaceHolder,
-                                    rdfsCoords.prefix + 'label',
-                                    '\"' + y + '\"') + closeLine
-                                output.write(line)
-                                line = triple(placeoccupationPlaceHolder,
-                                    nsCoords.prefix + 'type',
-                                    cidocCoords.prefix + 'E53_Place') + closeLine
-                                output.write(line)
-                                line = triple(aspoPlaceHolder,
-                                    schemaCoords.prefix + 'jobTitle',
-                                    '\"' + row['occupation_2'].lower() + ' presso ' + row['place occupation 2 ENTE'].lower() + '\"') + closeLine
-                                output.write(line)              
-            
-        output.write('\n')
-        #
-        # Limit number of entries processed (if desired)
-        if (ii > max_entries):
-            break

+ 0 - 248
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/gettatelli/CSV_to_RDF_gettatelli_fonds.ipynb

@@ -1,248 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "# Utilities to handle character encodings\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/gettatelli/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/gettatelli/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/IT-ASPO-GT001->', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT001->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "hasCurrentOwnerCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P52_has_current_owner>', 'ow:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "groupCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E74_Group>', 'gp:', 'E74')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentOwnerCoords.prefix + ' ' + hasCurrentOwnerCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + groupCoords.prefix + ' ' + groupCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'fonds'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    E74placeHolder = '<http://www.archiviodistato.prato.it>'\n",
-    "    line = triple(E74placeHolder, hasTypeCoords.prefix, groupCoords.prefix ) + closeLine\n",
-    "    output.write(line)        \n",
-    "    line = triple(E74placeHolder, labelCoords.prefix, \"\\\"Archivio di Stato di Prato\\\"\") + closeLine\n",
-    "    output.write(line)\n",
-    "    currentLocation = \"\\\"Fondo Ospedale della Misericordia e Dolce, Archivio di Stato di Prato, Prato (PO)\\\"\"\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        # E22 Man Made Object\n",
-    "        line = triple(\"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/IT-ASPO-ST00005-0005939>\", hasTypeCoords.prefix, manMadeObjectCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(\"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/IT-ASPO-ST00005-0005939>\", labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"') + '\\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        # E73 Information Object\n",
-    "        e73placeHolder =\"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/IT-ASPO-ST00005-0005939/\" + informationObjectCoords.code + \">\"\n",
-    "        line = triple(\"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/IT-ASPO-ST00005-0005939>\", carriesCoords.prefix, e73placeHolder) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e73placeHolder, hasTypeCoords.prefix, informationObjectCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e73placeHolder, labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"') + '\\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        # E35 Title\n",
-    "        if(row['titolo_aspo'] != 'None'):\n",
-    "            e35placeHolder1 = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/IT-ASPO-ST00005-0005939/\" + titleCoords.code + \">\"\n",
-    "            line = triple(e73placeHolder, identifiedByCoords.prefix, e35placeHolder1) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e35placeHolder1, hasTypeCoords.prefix, titleCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e35placeHolder1, labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "        # E42 Identifier\n",
-    "        e42placeHolderID = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/\" + row['id'] + \"/\" + identifierCoords.code + \"ID>\"\n",
-    "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-GT001-', ''), identifiedByCoords.prefix, e42placeHolderID) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e42placeHolderID, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e42placeHolderID, labelCoords.prefix, '\\\"' + row['id'].replace(\" \", \"\") + '\\\"') + closeLine            \n",
-    "        output.write(line)\n",
-    "        # E22 - P52 - E74\n",
-    "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-GT001-', ''), hasCurrentOwnerCoords.prefix, E74placeHolder) + closeLine\n",
-    "        output.write(line)\n",
-    "        # E22 - P54 - E53\n",
-    "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-GT001-', ''), hasCurrentPermanentLocationCoords.prefix, currentLocation) + closeLine\n",
-    "        output.write(line)\n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.10.4 64-bit",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 371
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/gettatelli/CSV_to_RDF_gettatelli_item_newdataset_old.ipynb

@@ -1,371 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "# Utilities to handle character encodings\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/ASPO/gettatelli/'\n",
-    "export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/ASPO/gettatelli/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/IT-ASPO-GT001->', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT001->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasNoteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'no:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "wasBornCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P98i_was_born>', 'wbc:')\n",
-    "diedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P100i_died_in>', 'di:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "noteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'nt:')\n",
-    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
-    "consistCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P45_consist_of>', 'cf:')\n",
-    "hasConditionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P44_has_condition>', 'hc:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "hasCurrentOwnerCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P52_has_current_owner>', 'ow:')\n",
-    "nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "stringCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E62_String>', 'st:', 'E62')\n",
-    "birthCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E67_Birth>', 'th:', 'E67')\n",
-    "deathCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E69_Death>', 'dh:', 'E69')\n",
-    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
-    "materialCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E57_Material>', 'mt:', 'E57')\n",
-    "conditionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E3_Condition_State>', 'cs:', 'E3')\n",
-    "entityCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E1_Entity>', 'ey:', 'E1')\n",
-    "refersCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P67_refers_to>', 'rt:')\n",
-    "refersHasTypeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P67.1_type>', 'rh:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "groupCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E74_Group>', 'gp:', 'E74')\n",
-    "eventCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E5_Event>', 'event:', 'E5')\n",
-    "wasPresentCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P12i_was_present_at>', 'wp:')\n",
-    "tookPlaceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P7_took_place_at>', 'tk:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "roleOfCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14.1_in_the_role_of>', 'ro:')\n",
-    "hasDomainCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P01_has_domain>', 'hd:')\n",
-    "hasRangeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P02_has_range>', 'hr:')\n",
-    "pcarriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/PC14_carried_out_by>', 'cy:', 'PC14')\n",
-    "documentsCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P70_documents>', 'doc:')\n",
-    "fallsCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P89_falls_within>', 'fw:')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + stringCoords.prefix + ' ' + stringCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + noteCoords.prefix + ' ' + noteCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBornCoords.prefix + ' ' + wasBornCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + diedCoords.prefix + ' ' + diedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + birthCoords.prefix + ' ' + birthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + deathCoords.prefix + ' ' + deathCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + materialCoords.prefix + ' ' + materialCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + consistCoords.prefix + ' ' + consistCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + conditionCoords.prefix + ' ' + conditionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasConditionCoords.prefix + ' ' + hasConditionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + refersCoords.prefix + ' ' + refersCoords.uri + closeLine)  \n",
-    "    output.write('@prefix ' + entityCoords.prefix + ' ' + entityCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + refersHasTypeCoords.prefix + ' ' + refersHasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasNoteCoords.prefix + ' ' + hasNoteCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + creationCoords.prefix + ' ' + creationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentOwnerCoords.prefix + ' ' + hasCurrentOwnerCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + groupCoords.prefix + ' ' + groupCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + eventCoords.prefix + ' ' + eventCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasPresentCoords.prefix + ' ' + wasPresentCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + roleOfCoords.prefix + ' ' + roleOfCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasDomainCoords.prefix + ' ' + hasDomainCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + pcarriedByCoords.prefix + ' ' + pcarriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasRangeCoords.prefix + ' ' + hasRangeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + documentsCoords.prefix + ' ' + documentsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + tookPlaceCoords.prefix + ' ' + tookPlaceCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + fallsCoords.prefix + ' ' + fallsCoords.uri + closeLine)\n",
-    "    \n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '_eventi_newdataset.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_event_newdataset.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    E74placeHolder = '<http://www.archiviodistato.prato.it>'\n",
-    "    line = triple(E74placeHolder, hasTypeCoords.prefix, groupCoords.prefix ) + closeLine\n",
-    "    output.write(line)        \n",
-    "    line = triple(E74placeHolder, labelCoords.prefix, \"\\\"Archivio di Stato di Prato\\\"\") + closeLine\n",
-    "    output.write(line)\n",
-    "    currentLocation = \"\\\"Fondo Ospedale della Misericordia e Dolce, Archivio di Stato di Prato, Prato (PO)\\\"\"\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue \n",
-    "        \n",
-    "        e21placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['ID partecipante evento'] + '>'\n",
-    "        e22placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['ID registro'] + \">\"\n",
-    "        e5placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['ID gettatello'] + \"_E5_\" + row['ID evento'] + \">\"\n",
-    "        e5typeplaceHolder = '<http://www.archiviodistato.prato.it/' + row['ID evento'] + \">\"\n",
-    "            \n",
-    "        line = triple(e22placeHolder, documentsCoords.prefix, e5placeHolder) + closeLine\n",
-    "        output.write(line)\n",
-    "\n",
-    "        # E5 E55\n",
-    "        if (row['evento'] != ''):\n",
-    "            line = triple(e5placeHolder, labelCoords.prefix, '\\\"' + row['evento'] + ' di ' + row['nome gettatello'] + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e5placeHolder, hasTypePCoords.prefix, e5typeplaceHolder ) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e5placeHolder, hasTypeCoords.prefix, eventCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e5typeplaceHolder, nsCoords.prefix + 'type', '\\\"' + row['evento'] + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e5typeplaceHolder, labelCoords.prefix, '\\\"' + row['evento'] + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "        \n",
-    "        # E5 E53\n",
-    "        if(row['IDASPO_GEO_EVENTO_micro_microtoponimo'] != ''):\n",
-    "            e53mmplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + row['IDASPO_GEO_EVENTO_micro_microtoponimo'].replace('IT-ASPO-GEO0001-', '') + \">\"\n",
-    "        else:\n",
-    "            e53mmplaceHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0002-'+ row['ID_VOCABOLARIO_EVENTO_micro_microtoponimo'] + '>'      \n",
-    "        if(row['IDASPO_GEO_EVENTO_microtoponimo'] != ''):\n",
-    "            e53mplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + row['IDASPO_GEO_EVENTO_microtoponimo'].replace('IT-ASPO-GEO0001-', '') + \">\"\n",
-    "        else:\n",
-    "            e53mplaceHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0002-'+ row['ID_VOCABOLARIO_EVENTO_microtoponimo'] + '>'      \n",
-    "        if(row['IDASPO_GEO_EVENTO_macrotoponimo'] != ''):\n",
-    "            e53placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + row['IDASPO_GEO_EVENTO_macrotoponimo'].replace('IT-ASPO-GEO0001-', '') + \">\"\n",
-    "        else:\n",
-    "            e53placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0002-'+ row['ID_VOCABOLARIO_EVENTO_macrotoponimo'] + '>'      \n",
-    "\n",
-    "        if (row['EVENTO_micro_microtoponimo'] != ''):\n",
-    "            line = triple(e5placeHolder, tookPlaceCoords.prefix, e53mmplaceHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e53mmplaceHolder, labelCoords.prefix, '\\\"' + row['EVENTO_micro_microtoponimo'] + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e53mmplaceHolder, hasTypeCoords.prefix, placeCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e53mmplaceHolder, fallsCoords.prefix, e53mplaceHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            \n",
-    "        if (row['EVENTO_micro_microtoponimo'] == '' and row['EVENTO_microtoponimo'] != ''):\n",
-    "            line = triple(e5placeHolder, tookPlaceCoords.prefix, e53mplaceHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e53mplaceHolder, labelCoords.prefix, '\\\"' + row['EVENTO_microtoponimo'] + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e53mplaceHolder, hasTypeCoords.prefix, placeCoords.prefix) + closeLine\n",
-    "            output.write(line) \n",
-    "            line = triple(e53mplaceHolder, fallsCoords.prefix, e53placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "        \n",
-    "        if (row['EVENTO_microtoponimo'] == '' and row['EVENTO_macrotoponimo'] != ''):\n",
-    "            line = triple(e5placeHolder, tookPlaceCoords.prefix, e53placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e53placeHolder, labelCoords.prefix, '\\\"' + row['EVENTO_macrotoponimo'] + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e53placeHolder, hasTypeCoords.prefix, placeCoords.prefix) + closeLine\n",
-    "            output.write(line) \n",
-    "        \n",
-    "        # E5 E52 \n",
-    "        if row['data normalizzata'] != '':\n",
-    "            e52placeHolder =  \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['ID gettatello'] + \"_E5_\" + row['ID evento'] + '_E52>'\n",
-    "            line = triple(e5placeHolder, hasTimeSpanCoords.prefix, e52placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e52placeHolder, hasTypeCoords.prefix, timeSpanCoords.prefix ) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e52placeHolder, labelCoords.prefix, '\\\"' + row['data normalizzata'] + '\\\"' ) + closeLine\n",
-    "            output.write(line)\n",
-    "\n",
-    "        PC14placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['ID partecipante evento'] + \"_\" + row['ID evento'] + \"_\" + pcarriedByCoords.code + \">\"   \n",
-    "        E55placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['ID partecipante evento']  + \"_\" + row['ID evento'] + \"_\" + pcarriedByCoords.code + typeCoords.code + \">\"   \n",
-    "        if(row['ruolo in evento'] != ''):       \n",
-    "            line = triple(e5placeHolder, hasDomainCoords.prefix, PC14placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(PC14placeHolder, hasTypeCoords.prefix, pcarriedByCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(PC14placeHolder, labelCoords.prefix, \"\\\"\" + row['nome partecipante evento'] + \" nel ruolo di \" + row['ruolo in evento'] + \"\\\"\" ) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(PC14placeHolder, roleOfCoords.prefix, E55placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(E55placeHolder, labelCoords.prefix, \"\\\"\"+ row['ruolo in evento'] + \"\\\"\" ) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(PC14placeHolder, hasRangeCoords.prefix, e21placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "\n",
-    "        # E42 Identifier\n",
-    "        if(row['CODICE REGISTRO'] != ''):\n",
-    "            e42placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['ID registro'] + \"/\" + identifierCoords.code + \">\"\n",
-    "            line = triple(e22placeHolder, identifiedByCoords.prefix, e42placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e42placeHolder, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e42placeHolder, labelCoords.prefix, '\\\"Riferimento registro: ' + row['CODICE REGISTRO']+ '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "\n",
-    "        #E62 String - Physdesc\n",
-    "        if(row['note'] != ''):\n",
-    "            e62placeHolder =  \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['ID registro'] + \"/\" + stringCoords.code + \">\"\n",
-    "            line = triple(e5placeHolder, hasNoteCoords.prefix, e62placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e62placeHolder, labelCoords.prefix, '\\\"' + row['note'].replace('\"','')+ '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e62placeHolder, hasTypeCoords.prefix, stringCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "\n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.7.3 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 459
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/gettatelli/CSV_to_RDF_gettatelli_item_person_newdataset_old.ipynb

@@ -1,459 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "# Utilities to handle character encodings\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "\n",
-    "import json\n",
-    "import re\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/ASPO/gettatelli/'\n",
-    "export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/ASPO/gettatelli/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT002/scheda/IT-ASPO-GT002->', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT002->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasNoteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'no:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "wasBornCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P98i_was_born>', 'wbc:')\n",
-    "diedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P100i_died_in>', 'di:')\n",
-    "wasPresentCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P12i_was_present_at>', 'wp:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "noteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'nt:')\n",
-    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
-    "consistCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P45_consist_of>', 'cf:')\n",
-    "hasConditionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P44_has_condition>', 'hc:')\n",
-    "foafCoords = RDFcoords('<http://xmlns.com/foaf/0.1/>', 'foaf:')\n",
-    "nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')\n",
-    "rdfsCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')\n",
-    "aspoCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/>', 'aspo:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "stringCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E62_String>', 'st:', 'E62')\n",
-    "birthCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E67_Birth>', 'th:', 'E67')\n",
-    "deathCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E69_Death>', 'dh:', 'E69')\n",
-    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
-    "materialCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E57_Material>', 'mt:', 'E57')\n",
-    "conditionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E3_Condition_State>', 'cs:', 'E3')\n",
-    "entityCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E1_Entity>', 'ey:', 'E1')\n",
-    "eventCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E5_Event>', 'eve:', 'E5')\n",
-    "refersCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P67_refers_to>', 'rt:')\n",
-    "refersHasTypeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P67.1_type>', 'rh:')\n",
-    "schemaCoords = RDFcoords('<http://schema.org/>', 'schema:')\n",
-    "owlCoords = RDFcoords('<http://www.w3.org/2002/07/owl#>', 'owl:')\n",
-    "fallsCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P89_falls_within>', 'fw:')\n",
-    "residenceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P74_has_current_or_former_residence>', 'res:')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + stringCoords.prefix + ' ' + stringCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + noteCoords.prefix + ' ' + noteCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBornCoords.prefix + ' ' + wasBornCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + diedCoords.prefix + ' ' + diedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasPresentCoords.prefix + ' ' + wasPresentCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + birthCoords.prefix + ' ' + birthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + deathCoords.prefix + ' ' + deathCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + materialCoords.prefix + ' ' + materialCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + consistCoords.prefix + ' ' + consistCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + conditionCoords.prefix + ' ' + conditionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasConditionCoords.prefix + ' ' + hasConditionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + refersCoords.prefix + ' ' + refersCoords.uri + closeLine)  \n",
-    "    output.write('@prefix ' + entityCoords.prefix + ' ' + entityCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + refersHasTypeCoords.prefix + ' ' + refersHasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasNoteCoords.prefix + ' ' + hasNoteCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + creationCoords.prefix + ' ' + creationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + foafCoords.prefix + ' ' + foafCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + eventCoords.prefix + ' ' + eventCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + rdfsCoords.prefix + ' ' + rdfsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + owlCoords.prefix + ' ' + owlCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + aspoCoords.prefix + ' ' + aspoCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + fallsCoords.prefix + ' ' + fallsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + residenceCoords.prefix + ' ' + residenceCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '_persone_newdataset.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_persone_newdataset.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+0\n",
-    "        if (row['EAD_EAC'] == 'FALSE'):    \n",
-    "            e21placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['ID_persona'] + '>'\n",
-    "            #e22placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT002/scheda/\" + row['ID_persona'] + \">\"\n",
-    "            e65placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT002/scheda/\" + row['ID_persona'] + \"/\" + creationCoords.code + \">\"\n",
-    "\n",
-    "            #line = triple(e22placeHolder, refersCoords.prefix, e21placeHolder) + closeLine\n",
-    "            #output.write(line)\n",
-    "            if (row['gettatello'] == 'TRUE'):\n",
-    "                e55placeHolder = '<http://www.archiviodistato.prato.it/gettatello' + \">\"\n",
-    "                line = triple(e21placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e55placeHolder, labelCoords.prefix, '\\\"Gettatello\\\"') + closeLine\n",
-    "                output.write(line)\n",
-    "            \n",
-    "            # nome rilevato\n",
-    "            if(row['nome rilevato'] != ''):\n",
-    "                line = triple(e21placeHolder, hasTypeCoords.prefix, personCoords.prefix) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e21placeHolder, foafCoords.prefix + 'name', '\\\"' + row['nome rilevato'] + '\\\"') + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e21placeHolder, labelCoords.prefix, '\\\"' + row['nome rilevato'] + '\\\"') +  closeLine\n",
-    "                output.write(line)\n",
-    "                e62placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['ID_persona'] +'/E62>'\n",
-    "                line = triple(e21placeHolder, noteCoords.prefix, e62placeHolder) +  closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e62placeHolder, hasTypeCoords.prefix, stringCoords.prefix) +  closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e62placeHolder, labelCoords.prefix, '\\\"Fonte: Archivio di Stato di Prato - Fondo Ospedale della Misericordia e Dolce\\\"') +  closeLine\n",
-    "                output.write(line)\n",
-    "\n",
-    "            # codice gettatello\n",
-    "            e42placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['ID_persona'] + '/E42>'\n",
-    "            if(row['gettatello'] == 'TRUE' and row['codice gettatello numero'] != ''):\n",
-    "                line = triple(e21placeHolder, identifiedByCoords.prefix, e42placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e42placeHolder, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e42placeHolder, labelCoords.prefix, '\\\"Matricola: ' + row['codice gettatello numero'] + '\\\"') + closeLine\n",
-    "                output.write(line)\n",
-    "            \n",
-    "            # soprannome\n",
-    "            if row['soprannome'] != '' and row['soprannome'] != ' ':\n",
-    "                    #Remove all white-space characters:\n",
-    "                    txt = row['soprannome']\n",
-    "                    x = re.sub(\"\\n\", \" \", txt)\n",
-    "                    y = re.sub(\"\\s\\s\", \"\", x)\n",
-    "                    line = triple(e21placeHolder,\n",
-    "                                schemaCoords.prefix + 'alternateName',\n",
-    "                                '\\\"' + y + '\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "            \n",
-    "            if row['nome proprio'] != '':\n",
-    "                    #Remove all white-space characters:\n",
-    "                    txt = row['nome proprio']\n",
-    "                    x = re.sub(\" \\n\", \"\", txt)\n",
-    "                    y = re.sub(\"\\s\\s\", \"\", x)\n",
-    "                    name = re.sub(\"\\n\", \"\", y)\n",
-    "                    line = triple(e21placeHolder,\n",
-    "                                foafCoords.prefix + 'givenName',\n",
-    "                                '\\\"' + name + '\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "\n",
-    "            if row['cognome'] != '':\n",
-    "                    #Remove all white-space characters:\n",
-    "                    txt = row['cognome']\n",
-    "                    x = re.sub(\"\\n\", \" \", txt)\n",
-    "                    y = re.sub(\"\\s\\s\", \"\", x)\n",
-    "                    line = triple(e21placeHolder,\n",
-    "                                foafCoords.prefix + 'familyName',\n",
-    "                                '\\\"' + y + '\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "\n",
-    "            # nome_alternativo\n",
-    "            if row['nome_alternativo'] != '': \n",
-    "                line = triple(e21placeHolder,\n",
-    "                owlCoords.prefix + 'sameAs',\n",
-    "                aspoCoords.prefix + row['nome_alternativo'].replace(' ', '_')) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(aspoCoords.prefix + row['nome_alternativo'].replace(' ', '_'),\n",
-    "                rdfsCoords.prefix + 'label',\n",
-    "                '\\\"' + row['nome_alternativo'] + '\\\"') + closeLine\n",
-    "                output.write(line)\n",
-    "\n",
-    "            # genere\n",
-    "            if row['m/f'] != '':\n",
-    "                    #Remove all white-space characters:\n",
-    "                    txt = row['m/f']\n",
-    "                    x = re.sub(\"\\n\", \" \", txt)\n",
-    "                    y = re.sub(\"\\s\\s\", \"\", x)\n",
-    "                    line = triple(e21placeHolder,\n",
-    "                                foafCoords.prefix + 'gender',\n",
-    "                                '\\\"' + y + '\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "            \n",
-    "            # patronimico\n",
-    "            if row['patronimico'] != '':\n",
-    "                    #Remove all white-space characters:\n",
-    "                    txt = row['patronimico']\n",
-    "                    x = re.sub(\"\\n\", \" \", txt)\n",
-    "                    y = re.sub(\"\\s\\s\", \"\", x)\n",
-    "                    line = triple(e21placeHolder,\n",
-    "                                personCoords.prefix + 'patronymicName',\n",
-    "                                '\\\"' + y + '\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "            \n",
-    "            # professione     \n",
-    "            if row['professione'] != '' and row['professione'] != ' ' :\n",
-    "                        occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + row['professione'].replace(\" \",\"_\") + '>'\n",
-    "                        line = triple(e21placeHolder,\n",
-    "                                    schemaCoords.prefix + 'hasOccupation',\n",
-    "                                    occupationPlaceHolder) + closeLine\n",
-    "                        output.write(line)\n",
-    "                        line = triple(occupationPlaceHolder,\n",
-    "                                    nsCoords.prefix + 'type',\n",
-    "                                    schemaCoords.prefix + 'Occupation') + closeLine\n",
-    "                        output.write(line)\n",
-    "                        line = triple(occupationPlaceHolder,\n",
-    "                                    rdfsCoords.prefix + 'label',\n",
-    "                                    '\\\"' + row['professione'] + '\\\"') + closeLine\n",
-    "                        output.write(line)\n",
-    "            # avo 1\n",
-    "            if row['avo 1'] != '':\n",
-    "                    avo1 = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['ID_persona'] + \"/avo1>\"\n",
-    "                    line = triple(e21placeHolder,\n",
-    "                                schemaCoords.prefix + 'relatedTo',\n",
-    "                                avo1) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(avo1,\n",
-    "                                nsCoords.prefix + 'type',\n",
-    "                                foafCoords.prefix + 'Person') + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(avo1,\n",
-    "                                rdfsCoords.prefix + 'label',\n",
-    "                                '\\\"' + row['avo 1'] + '\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "\n",
-    "            if row['avo 2'] != '':\n",
-    "                    avo2 = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['ID_persona'] + \"/avo2>\"\n",
-    "                    line = triple(e21placeHolder,\n",
-    "                                schemaCoords.prefix + 'relatedTo',\n",
-    "                                avo2) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(avo2,\n",
-    "                                nsCoords.prefix + 'type',\n",
-    "                                foafCoords.prefix + 'Person') + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(avo2,\n",
-    "                                rdfsCoords.prefix + 'label',\n",
-    "                                '\\\"' + row['avo 2'] + '\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "            \n",
-    "            # titolo \n",
-    "            if row['titolo'] != '':\n",
-    "                txt = row['titolo']\n",
-    "                x = re.sub(\"\\n\", \" \", txt)\n",
-    "                y = re.sub(\"\\s\\s\", \" \", x)\n",
-    "                line = triple(e21placeHolder, schemaCoords.prefix + 'honorificPrefix', '\\\"' + y + '\\\"') + closeLine\n",
-    "                output.write(line)\n",
-    "            if row['titolo 2'] != '':\n",
-    "                txt = row['titolo 2']\n",
-    "                x = re.sub(\"\\n\", \" \", txt)\n",
-    "                y = re.sub(\"\\s\\s\", \" \", x)\n",
-    "                line = triple(e21placeHolder, schemaCoords.prefix + 'honorificPrefix', '\\\"' + y + '\\\"') + closeLine\n",
-    "                output.write(line)\n",
-    "\n",
-    "            # provenienza microtoponimo\n",
-    "            if(row['PROVENIENZA_microtoponimo'] != ''):\n",
-    "                if(row['IDASPO_GEO_PROVENIENZA_microtoponimo'] != ''):\n",
-    "                    e53placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + row['IDASPO_GEO_PROVENIENZA_microtoponimo'].replace('IT-ASPO-GEO0001-', '') + \">\"\n",
-    "                    line = triple(e21placeHolder, residenceCoords.prefix, e53placeHolder) + closeLine\n",
-    "                    output.write(line)\n",
-    "                else:\n",
-    "                    e53placeHolder = '<http://www.dev.restore.ovi.cnr.it/vocabularies/places/' + row['ID_VOCABOLARIO_PROVENIENZA_microtoponimo'] + '>'\n",
-    "                    line = triple(e53placeHolder, labelCoords.prefix, '\\\"' + row['PROVENIENZA_microtoponimo'] + '\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e53placeHolder, hasTypeCoords.prefix, placeCoords.prefix) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e21placeHolder, residenceCoords.prefix, e53placeHolder) + closeLine\n",
-    "                    output.write(line)\n",
-    "            # provenienza macrotoponimo \n",
-    "            if(row['PROVENIENZA_macrotoponimo'] != '' and row['PROVENIENZA_microtoponimo'] != ''):\n",
-    "                        if(row['IDASPO_GEO_PROVENIENZA_macrotoponimo'] != ''):\n",
-    "                            e53mplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + row['IDASPO_GEO_PROVENIENZA_macrotoponimo'].replace('IT-ASPO-GEO0001-', '') + \">\"\n",
-    "                            line = triple(e53placeHolder, fallsCoords.prefix, e53mplaceHolder) + closeLine\n",
-    "                            output.write(line)\n",
-    "                        else:\n",
-    "                            e53mplaceHolder = '<http://www.dev.restore.ovi.cnr.it/vocabularies/places/' + row['ID_VOCABOLARIO_PROVENIENZA_macrotoponimo'] + '>'\n",
-    "                            line = triple(e53mplaceHolder, labelCoords.prefix, '\\\"' + row['PROVENIENZA_macrotoponimo'] + '\\\"') + closeLine\n",
-    "                            output.write(line)\n",
-    "                            line = triple(e53mplaceHolder, hasTypeCoords.prefix, placeCoords.prefix) + closeLine\n",
-    "                            output.write(line)\n",
-    "                            line = triple(e53placeHolder, fallsCoords.prefix, e53mplaceHolder) + closeLine\n",
-    "                            output.write(line)\n",
-    "            elif (row['PROVENIENZA_macrotoponimo'] != '' and row['PROVENIENZA_microtoponimo'] == ''):\n",
-    "                if(row['IDASPO_GEO_PROVENIENZA_macrotoponimo'] != ''):\n",
-    "                    e53placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + row['IDASPO_GEO_PROVENIENZA_macrotoponimo'].replace('IT-ASPO-GEO0001-', '') + \">\"\n",
-    "                    line = triple(e21placeHolder, residenceCoords.prefix, e53placeHolder) + closeLine\n",
-    "                    output.write(line)\n",
-    "                else:\n",
-    "                    e53placeHolder = '<http://www.dev.restore.ovi.cnr.it/vocabularies/places/'+ row['ID_VOCABOLARIO_PROVENIENZA_macrotoponimo'] + '>'\n",
-    "                    line = triple(e53placeHolder, labelCoords.prefix, '\\\"' + row['PROVENIENZA_macrotoponimo'] + '\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e53placeHolder, hasTypeCoords.prefix, placeCoords.prefix) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e21placeHolder, residenceCoords.prefix, e53placeHolder) + closeLine\n",
-    "                    output.write(line)\n",
-    "\n",
-    "            # note\n",
-    "            if(row['note'] != ''):\n",
-    "                e62placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['ID_persona'] +'/E62P>'\n",
-    "                line = triple(e21placeHolder, noteCoords.prefix, e62placeHolder) +  closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e62placeHolder, hasTypeCoords.prefix, stringCoords.prefix) +  closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e62placeHolder, labelCoords.prefix, '\\\"'+ row['note'] + '\\\"') +  closeLine\n",
-    "                output.write(line)\n",
-    "\n",
-    "            output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.7.3 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 286
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/gettatelli/CSV_to_RDF_gettatelli_item_ref_reg.ipynb

@@ -1,286 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "# Utilities to handle character encodings\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/ASPO/gettatelli/'\n",
-    "export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/ASPO/gettatelli/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/IT-ASPO-GT001->', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT001->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasNoteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'no:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "wasBornCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P98i_was_born>', 'wbc:')\n",
-    "diedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P100i_died_in>', 'di:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "noteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/_has_note>', 'nt:')\n",
-    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
-    "consistCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P45_consist_of>', 'cf:')\n",
-    "hasConditionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P44_has_condition>', 'hc:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "hasCurrentOwnerCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P52_has_current_owner>', 'ow:')\n",
-    "nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "stringCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E62_String>', 'st:', 'E62')\n",
-    "birthCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E67_Birth>', 'th:', 'E67')\n",
-    "deathCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E69_Death>', 'dh:', 'E69')\n",
-    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
-    "materialCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E57_Material>', 'mt:', 'E57')\n",
-    "conditionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E3_Condition_State>', 'cs:', 'E3')\n",
-    "entityCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E1_Entity>', 'ey:', 'E1')\n",
-    "refersCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P67_refers_to>', 'rt:')\n",
-    "refersHasTypeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P67.1_type>', 'rh:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "groupCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E74_Group>', 'gp:', 'E74')\n",
-    "eventCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E5_Event>', 'event:', 'E5')\n",
-    "wasPresentCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P12i_was_present_at>', 'wp:')\n",
-    "tookPlaceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P7_took_place_at>', 'tk:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "roleOfCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14.1_in_the_role_of>', 'ro:')\n",
-    "hasDomainCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P01_has_domain>', 'hd:')\n",
-    "hasRangeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P02_has_range>', 'hr:')\n",
-    "pcarriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/PC14_carried_out_by>', 'cy:', 'PC14')\n",
-    "documentsCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P70_documents>', 'doc:')\n",
-    "fallsCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P89_falls_within>', 'fw:')\n",
-    "\n",
-    " "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + stringCoords.prefix + ' ' + stringCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + noteCoords.prefix + ' ' + noteCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBornCoords.prefix + ' ' + wasBornCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + diedCoords.prefix + ' ' + diedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + birthCoords.prefix + ' ' + birthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + deathCoords.prefix + ' ' + deathCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + materialCoords.prefix + ' ' + materialCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + consistCoords.prefix + ' ' + consistCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + conditionCoords.prefix + ' ' + conditionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasConditionCoords.prefix + ' ' + hasConditionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + refersCoords.prefix + ' ' + refersCoords.uri + closeLine)  \n",
-    "    output.write('@prefix ' + entityCoords.prefix + ' ' + entityCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + refersHasTypeCoords.prefix + ' ' + refersHasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasNoteCoords.prefix + ' ' + hasNoteCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + creationCoords.prefix + ' ' + creationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentOwnerCoords.prefix + ' ' + hasCurrentOwnerCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + groupCoords.prefix + ' ' + groupCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + eventCoords.prefix + ' ' + eventCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasPresentCoords.prefix + ' ' + wasPresentCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + roleOfCoords.prefix + ' ' + roleOfCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasDomainCoords.prefix + ' ' + hasDomainCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + pcarriedByCoords.prefix + ' ' + pcarriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasRangeCoords.prefix + ' ' + hasRangeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + documentsCoords.prefix + ' ' + documentsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + tookPlaceCoords.prefix + ' ' + tookPlaceCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + fallsCoords.prefix + ' ' + fallsCoords.uri + closeLine)\n",
-    "    \n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + 'ref_reg.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_event_newdataset.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    E74placeHolder = '<http://www.archiviodistato.prato.it>'\n",
-    "    line = triple(E74placeHolder, hasTypeCoords.prefix, groupCoords.prefix ) + closeLine\n",
-    "    output.write(line)        \n",
-    "    line = triple(E74placeHolder, labelCoords.prefix, \"\\\"Archivio di Stato di Prato\\\"\") + closeLine\n",
-    "    output.write(line)\n",
-    "    currentLocation = \"\\\"Fondo Ospedale della Misericordia e Dolce, Archivio di Stato di Prato, Prato (PO)\\\"\"\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue \n",
-    "        \n",
-    "        e21placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['ID partecipante evento'] + '>'\n",
-    "        e22placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['ID registro'] + \">\"\n",
-    "        e5placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['ID gettatello'] + \"_E5_\" + row['ID evento'] + \">\"\n",
-    "        e5typeplaceHolder = '<http://www.archiviodistato.prato.it/' + row['ID evento'] + \">\"\n",
-    "\n",
-    "        # E42 Identifier\n",
-    "        if(row['CODICE REGISTRO'] != ''):\n",
-    "            e42placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/\" + row['ID registro'] + \"/\" + identifierCoords.code + \">\"\n",
-    "            line = triple(e22placeHolder, identifiedByCoords.prefix, e42placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e42placeHolder, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e42placeHolder, labelCoords.prefix, '\\\"Riferimento registro: ' + row['CODICE REGISTRO']+ '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "\n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.7.3 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 536
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/gettatelli/CSV_to_RDF_gettatelli_newdata_document.ipynb

@@ -1,536 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "# Utilities to handle character encodings\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/gettatelli/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/gettatelli/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/IT-ASPO-GT001->', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT001->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasNoteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'no:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "wasBornCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P98i_was_born>', 'wbc:')\n",
-    "diedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P100i_died_in>', 'di:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "noteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'nt:')\n",
-    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
-    "consistCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P45_consist_of>', 'cf:')\n",
-    "hasConditionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P44_has_condition>', 'hc:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "hasCurrentOwnerCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P52_has_current_owner>', 'ow:')\n",
-    "nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "stringCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E62_String>', 'st:', 'E62')\n",
-    "birthCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E67_Birth>', 'th:', 'E67')\n",
-    "deathCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E69_Death>', 'dh:', 'E69')\n",
-    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
-    "materialCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E57_Material>', 'mt:', 'E57')\n",
-    "conditionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E3_Condition_State>', 'cs:', 'E3')\n",
-    "entityCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E1_Entity>', 'ey:', 'E1')\n",
-    "refersCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P67_refers_to>', 'rt:')\n",
-    "refersHasTypeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P67.1_type>', 'rh:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "groupCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E74_Group>', 'gp:', 'E74')\n",
-    "eventCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E5_Event>', 'event:', 'E5')\n",
-    "wasPresentCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P12i_was_present_at>', 'wp:')\n",
-    "tookPlaceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P7_took_place_at>', 'tk:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "roleOfCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14.1_in_the_role_of>', 'ro:')\n",
-    "hasDomainCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P01_has_domain>', 'hd:')\n",
-    "hasRangeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P02_has_range>', 'hr:')\n",
-    "pcarriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/PC14_carried_out_by>', 'cy:', 'PC14')\n",
-    "documentsCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P70_documents>', 'doc:')\n",
-    "fallsCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P89_falls_within>', 'fw:')\n",
-    "attributeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E13_Attribute_Assignment>', 'att:', 'E13')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "assignedAttrCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P140_assigned_attribute_to>', 'ast:')\n",
-    "assignedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P141_assigned>', 'ass:')\n",
-    "composedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P46_is_composed_of>', 'cmp:')\n",
-    "\n",
-    " "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + stringCoords.prefix + ' ' + stringCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + noteCoords.prefix + ' ' + noteCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBornCoords.prefix + ' ' + wasBornCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + diedCoords.prefix + ' ' + diedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + birthCoords.prefix + ' ' + birthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + deathCoords.prefix + ' ' + deathCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + materialCoords.prefix + ' ' + materialCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + consistCoords.prefix + ' ' + consistCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + conditionCoords.prefix + ' ' + conditionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasConditionCoords.prefix + ' ' + hasConditionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + refersCoords.prefix + ' ' + refersCoords.uri + closeLine)  \n",
-    "    output.write('@prefix ' + entityCoords.prefix + ' ' + entityCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + refersHasTypeCoords.prefix + ' ' + refersHasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasNoteCoords.prefix + ' ' + hasNoteCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + creationCoords.prefix + ' ' + creationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentOwnerCoords.prefix + ' ' + hasCurrentOwnerCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + groupCoords.prefix + ' ' + groupCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + eventCoords.prefix + ' ' + eventCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasPresentCoords.prefix + ' ' + wasPresentCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + roleOfCoords.prefix + ' ' + roleOfCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasDomainCoords.prefix + ' ' + hasDomainCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + pcarriedByCoords.prefix + ' ' + pcarriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasRangeCoords.prefix + ' ' + hasRangeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + documentsCoords.prefix + ' ' + documentsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + tookPlaceCoords.prefix + ' ' + tookPlaceCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + fallsCoords.prefix + ' ' + fallsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + attributeCoords.prefix + ' ' + attributeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + assignedAttrCoords.prefix + ' ' + assignedAttrCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + assignedCoords.prefix + ' ' + assignedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + composedCoords.prefix + ' ' + composedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "filePrefix = 'gettatelli_'\n",
-    "fileType = 'newdataset'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    E74placeHolder = '<http://www.archiviodistato.prato.it>'\n",
-    "    line = triple(E74placeHolder, hasTypeCoords.prefix, groupCoords.prefix ) + closeLine\n",
-    "    output.write(line)        \n",
-    "    line = triple(E74placeHolder, labelCoords.prefix, \"\\\"Archivio di Stato di Prato\\\"\") + closeLine\n",
-    "    output.write(line)\n",
-    "    currentLocation = \"\\\"Fondo Ospedale della Misericordia e Dolce, Archivio di Stato di Prato, Prato (PO)\\\"\"\n",
-    "\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue \n",
-    "        \n",
-    "        # DOCUMENTO\n",
-    "        # E22 Man Made Object - baliatici\n",
-    "        if(row['segnatura_completa'] != ''):\n",
-    "            e22placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/\" + row['segnatura_completa'].lstrip().rstrip()  + \">\"\n",
-    "            if row['tipo']!= \"\":\n",
-    "                segnatura = row['segnatura'] + \", \" + row['tipo'] + \". \" + row['carta, foglio, pagina'] + row['r, v']\n",
-    "            else:\n",
-    "                segnatura = row['segnatura'] + \", \" + row['carta, foglio, pagina'] + row['r, v']\n",
-    "            \n",
-    "            line = triple(e22placeHolder, hasTypeCoords.prefix, manMadeObjectCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e22placeHolder, labelCoords.prefix, '\\\"Documento ' + segnatura  + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            \n",
-    "            # E73 Information Object\n",
-    "            e73placeHolder =  \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/\" + row['segnatura_completa'].lstrip().rstrip()  + \"/\" + informationObjectCoords.code + \">\"\n",
-    "            line = triple(e22placeHolder, carriesCoords.prefix, e73placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e73placeHolder, hasTypeCoords.prefix, informationObjectCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e73placeHolder, labelCoords.prefix, '\\\"Documento ' + segnatura + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            e55placeHolder = \"<http://www.archiviodistato.prato.it/baliatico>\"\n",
-    "            line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e55placeHolder, labelCoords.prefix, '\\\"Documento\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "\n",
-    "        # E42 Identifier\n",
-    "        if(row['segnatura_completa'] != ''):\n",
-    "            e42placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/\" + row['segnatura_completa'].lstrip().rstrip()  + \"/E42>\"\n",
-    "            line = triple(e22placeHolder, identifiedByCoords.prefix, e42placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e42placeHolder, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e42placeHolder, hasTypePCoords.prefix, '\\\"Segnatura\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e42placeHolder, labelCoords.prefix, '\\\"Fondo Ospedale della Misericordia e Dolce, ' + segnatura + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "\n",
-    "            if(row['segnatura'] == '5301'):\n",
-    "                e22rplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/IT-ASPO-ST00005-0005817\" + \">\"\n",
-    "                e42rplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/IT-ASPO-ST00005-0005817\" + \"/E42>\"\n",
-    "                \n",
-    "                line = triple(e22rplaceHolder, composedCoords.prefix, e22placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                if (row['CODICE REGISTRO']):\n",
-    "                    e42placeHolderS = \"<http://www.archiviodistato.prato.it/\" + row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\") +  \">\"\n",
-    "                    line = triple(e42rplaceHolder, hasAlternativeFormCoords.prefix, e42placeHolderS) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, hasTypePCoords.prefix, '\\\"Sigla registro\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, labelCoords.prefix, '\\\"' + row['CODICE REGISTRO'].replace(\" \",\"\") + '\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "                \n",
-    "            elif (row['segnatura'] == '5302'):\n",
-    "                e22rplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/IT-ASPO-ST00005-0005818\" + \">\"\n",
-    "                e42rplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/IT-ASPO-ST00005-0005818\" + \"/E42>\"\n",
-    "                \n",
-    "                line = triple(e22rplaceHolder, composedCoords.prefix, e22placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                if (row['CODICE REGISTRO']):\n",
-    "                    e42placeHolderS = \"<http://www.archiviodistato.prato.it/\" + row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\") +  \">\"\n",
-    "                    line = triple(e42rplaceHolder, hasAlternativeFormCoords.prefix, e42placeHolderS) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, hasTypePCoords.prefix, '\\\"Sigla registro\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, labelCoords.prefix, '\\\"' + row['CODICE REGISTRO'].replace(\" \",\"\") + '\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "            \n",
-    "            elif (row['segnatura'] == '5303'):\n",
-    "                e22rplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/IT-ASPO-ST00005-0005819\" + \">\"\n",
-    "                e42rplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/IT-ASPO-ST00005-0005819\" + \"/E42>\"\n",
-    "                \n",
-    "                line = triple(e22rplaceHolder, composedCoords.prefix, e22placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                if (row['CODICE REGISTRO']):\n",
-    "                    e42placeHolderS = \"<http://www.archiviodistato.prato.it/\" + row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\") +  \">\"\n",
-    "                    line = triple(e42rplaceHolder, hasAlternativeFormCoords.prefix, e42placeHolderS) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, hasTypePCoords.prefix, '\\\"Sigla registro\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, labelCoords.prefix, '\\\"' + row['CODICE REGISTRO'].replace(\" \",\"\") + '\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "            \n",
-    "            elif (row['segnatura'] == '5304'):\n",
-    "                e22rplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/IT-ASPO-ST00005-0005820\" + \">\"\n",
-    "                e42rplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/IT-ASPO-ST00005-0005820\" + \"/E42>\"\n",
-    "                \n",
-    "                line = triple(e22rplaceHolder, composedCoords.prefix, e22placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                if (row['CODICE REGISTRO']):\n",
-    "                    e42placeHolderS = \"<http://www.archiviodistato.prato.it/\" + row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\") +  \">\"\n",
-    "                    line = triple(e42rplaceHolder, hasAlternativeFormCoords.prefix, e42placeHolderS) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, hasTypePCoords.prefix, '\\\"Sigla registro\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, labelCoords.prefix, '\\\"' + row['CODICE REGISTRO'].replace(\" \",\"\") + '\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "            \n",
-    "            elif (row['segnatura'] == '5305'):\n",
-    "                e22rplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/IT-ASPO-ST00005-0005821\" + \">\"\n",
-    "                e42rplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/IT-ASPO-ST00005-0005821\" + \"/E42>\"\n",
-    "                \n",
-    "                line = triple(e22rplaceHolder, composedCoords.prefix, e22placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                if (row['CODICE REGISTRO']):\n",
-    "                    e42placeHolderS = \"<http://www.archiviodistato.prato.it/\" + row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\") +  \">\"\n",
-    "                    line = triple(e42rplaceHolder, hasAlternativeFormCoords.prefix, e42placeHolderS) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, hasTypePCoords.prefix, '\\\"Sigla registro\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, labelCoords.prefix, '\\\"' + row['CODICE REGISTRO'].replace(\" \",\"\") + '\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "            \n",
-    "            elif (row['segnatura'] == '5306'):\n",
-    "                e22rplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/IT-ASPO-ST00005-0005822\" + \">\"\n",
-    "                e42rplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/IT-ASPO-ST00005-0005822\" + \"/E42>\"\n",
-    "                \n",
-    "                line = triple(e22rplaceHolder, composedCoords.prefix, e22placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                if (row['CODICE REGISTRO']):\n",
-    "                    e42placeHolderS = \"<http://www.archiviodistato.prato.it/\" + row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\") +  \">\"\n",
-    "                    line = triple(e42rplaceHolder, hasAlternativeFormCoords.prefix, e42placeHolderS) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, hasTypePCoords.prefix, '\\\"Sigla registro\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, labelCoords.prefix, '\\\"' + row['CODICE REGISTRO'].replace(\" \",\"\") + '\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "        \n",
-    "            elif (row['segnatura'] == '5307'):\n",
-    "                e22rplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/IT-ASPO-ST00005-0005823\" + \">\"\n",
-    "                e42rplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/IT-ASPO-ST00005-0005823\" + \"/E42>\"\n",
-    "                \n",
-    "                line = triple(e22rplaceHolder, composedCoords.prefix, e22placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                if (row['CODICE REGISTRO']):\n",
-    "                    e42placeHolderS = \"<http://www.archiviodistato.prato.it/\" + row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\") +  \">\"\n",
-    "                    line = triple(e42rplaceHolder, hasAlternativeFormCoords.prefix, e42placeHolderS) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, hasTypePCoords.prefix, '\\\"Sigla registro\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, labelCoords.prefix, '\\\"' + row['CODICE REGISTRO'].replace(\" \",\"\") + '\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "            \n",
-    "            elif (row['segnatura'] == '5308'):\n",
-    "                e22rplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/IT-ASPO-ST00005-0005824\" + \">\"\n",
-    "                e42rplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/IT-ASPO-ST00005-0005824\" + \"/E42>\"\n",
-    "                \n",
-    "                line = triple(e22rplaceHolder, composedCoords.prefix, e22placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                if (row['CODICE REGISTRO']):\n",
-    "                    e42placeHolderS = \"<http://www.archiviodistato.prato.it/\" + row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\") +  \">\"\n",
-    "                    line = triple(e42rplaceHolder, hasAlternativeFormCoords.prefix, e42placeHolderS) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, hasTypePCoords.prefix, '\\\"Sigla registro\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, labelCoords.prefix, '\\\"' + row['CODICE REGISTRO'].replace(\" \",\"\") + '\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "            \n",
-    "            elif (row['segnatura'] == '5310'):\n",
-    "                e22rplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/IT-ASPO-ST00005-0005826\" + \">\"\n",
-    "                e42rplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/IT-ASPO-ST00005-0005826\" + \"/E42>\"\n",
-    "                \n",
-    "                line = triple(e22rplaceHolder, composedCoords.prefix, e22placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                if (row['CODICE REGISTRO']):\n",
-    "                    e42placeHolderS = \"<http://www.archiviodistato.prato.it/\" + row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\") +  \">\"\n",
-    "                    line = triple(e42rplaceHolder, hasAlternativeFormCoords.prefix, e42placeHolderS) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, hasTypePCoords.prefix, '\\\"Sigla registro\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, labelCoords.prefix, '\\\"' + row['CODICE REGISTRO'].replace(\" \",\"\") + '\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "            \n",
-    "            elif (row['segnatura'] == '5311'):\n",
-    "                e22rplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/IT-ASPO-ST00005-0005827\" + \">\"\n",
-    "                e42rplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/IT-ASPO-ST00005-0005827\" + \"/E42>\"\n",
-    "                \n",
-    "                line = triple(e22rplaceHolder, composedCoords.prefix, e22placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                if (row['CODICE REGISTRO']):\n",
-    "                    e42placeHolderS = \"<http://www.archiviodistato.prato.it/\" + row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\") +  \">\"\n",
-    "                    line = triple(e42rplaceHolder, hasAlternativeFormCoords.prefix, e42placeHolderS) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, hasTypePCoords.prefix, '\\\"Sigla registro\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, labelCoords.prefix, '\\\"' + row['CODICE REGISTRO'].replace(\" \",\"\") + '\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "        \n",
-    "            elif (row['segnatura'] == '5192'):\n",
-    "                e22rplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/IT-ASPO-ST00005-0005659\" + \">\"\n",
-    "                e42rplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/IT-ASPO-ST00005-0005659\" + \"/E42>\"\n",
-    "                \n",
-    "                line = triple(e22rplaceHolder, composedCoords.prefix, e22placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                if (row['CODICE REGISTRO']):\n",
-    "                    e42placeHolderS = \"<http://www.archiviodistato.prato.it/\" + row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\") +  \">\"\n",
-    "                    line = triple(e42rplaceHolder, hasAlternativeFormCoords.prefix, e42placeHolderS) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, hasTypePCoords.prefix, '\\\"Sigla registro\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, labelCoords.prefix, '\\\"' + row['CODICE REGISTRO'].replace(\" \",\"\") + '\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "            \n",
-    "            elif (row['segnatura'] == '464'):\n",
-    "                e22rplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/IT-ASPO-ST00005-0000512\" + \">\"\n",
-    "                e42rplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoSt005/scheda/IT-ASPO-ST00005-0000512\" + \"/E42>\"\n",
-    "                \n",
-    "                line = triple(e22rplaceHolder, composedCoords.prefix, e22placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                if (row['CODICE REGISTRO']):\n",
-    "                    e42placeHolderS = \"<http://www.archiviodistato.prato.it/\" + row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\") +  \">\"\n",
-    "                    line = triple(e42rplaceHolder, hasAlternativeFormCoords.prefix, e42placeHolderS) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, hasTypePCoords.prefix, '\\\"Sigla registro\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e42placeHolderS, labelCoords.prefix, '\\\"' + row['CODICE REGISTRO'].replace(\" \",\"\") + '\\\"') + closeLine\n",
-    "                    output.write(line)    \n",
-    "        \n",
-    "       \n",
-    "            \n",
-    "        # E22 - P52 - E74\n",
-    "        line = triple(e22placeHolder, hasCurrentOwnerCoords.prefix, E74placeHolder) + closeLine\n",
-    "        output.write(line)\n",
-    "        # E22 - P54 - E53\n",
-    "        line = triple(e22placeHolder, hasCurrentPermanentLocationCoords.prefix, currentLocation) + closeLine\n",
-    "        output.write(line)\n",
-    "\n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "397704579725e15f5c7cb49fe5f0341eb7531c82d19f2c29d197e8b64ab5776b"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 418
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/gettatelli/CSV_to_RDF_gettatelli_newdata_event.ipynb

@@ -1,418 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 43,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "# Utilities to handle character encodings\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 44,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/gettatelli/'\n",
-    "#export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/gettatelli/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/DATE/CORRETTE/GETTATELLI/RDF/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 45,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/IT-ASPO-GT001->', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT001->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasNoteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'no:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "wasBornCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P98i_was_born>', 'wbc:')\n",
-    "diedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P100i_died_in>', 'di:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "noteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'nt:')\n",
-    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
-    "consistCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P45_consist_of>', 'cf:')\n",
-    "hasConditionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P44_has_condition>', 'hc:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "hasCurrentOwnerCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P52_has_current_owner>', 'ow:')\n",
-    "nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')\n",
-    "xsdCoords = RDFcoords('<http://www.w3.org/2001/XMLSchema#>', 'xsd:')\n",
-    "\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "stringCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E62_String>', 'st:', 'E62')\n",
-    "birthCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E67_Birth>', 'th:', 'E67')\n",
-    "deathCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E69_Death>', 'dh:', 'E69')\n",
-    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
-    "materialCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E57_Material>', 'mt:', 'E57')\n",
-    "conditionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E3_Condition_State>', 'cs:', 'E3')\n",
-    "entityCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E1_Entity>', 'ey:', 'E1')\n",
-    "refersCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P67_refers_to>', 'rt:')\n",
-    "refersHasTypeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P67.1_type>', 'rh:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "groupCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E74_Group>', 'gp:', 'E74')\n",
-    "eventCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E5_Event>', 'event:', 'E5')\n",
-    "wasPresentCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P12i_was_present_at>', 'wp:')\n",
-    "tookPlaceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P7_took_place_at>', 'tk:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "roleOfCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14.1_in_the_role_of>', 'ro:')\n",
-    "hasDomainCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P01_has_domain>', 'hd:')\n",
-    "hasRangeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P02_has_range>', 'hr:')\n",
-    "pcarriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/PC14_carried_out_by>', 'cy:', 'PC14')\n",
-    "documentsCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P70_documents>', 'doc:')\n",
-    "fallsCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P89_falls_within>', 'fw:')\n",
-    "attributeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E13_Attribute_Assignment>', 'att:', 'E13')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "assignedAttrCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P140_assigned_attribute_to>', 'ast:')\n",
-    "assignedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P141_assigned>', 'ass:')\n",
-    "composedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P46_is_composed_of>', 'cmp:')\n",
-    "foafCoords = RDFcoords('<http://xmlns.com/foaf/0.1/>', 'foaf:')\n",
-    "yearCoords = RDFcoords('<https://www.w3.org/TR/owl-time#year>', 'year:')\n",
-    "monthCoords = RDFcoords('<https://www.w3.org/TR/owl-time#month>', 'month:')\n",
-    "dayCoords = RDFcoords('<https://www.w3.org/TR/owl-time#day>', 'day:')\n",
-    "beginningCoords = RDFcoords('<https://www.w3.org/TR/owl-time#hasBeginning>', 'beg:')\n",
-    "xsdCoords = RDFcoords('<http://www.w3.org/2001/XMLSchema#>', 'xsd:')\n",
-    "endCoords = RDFcoords('<https://www.w3.org/TR/owl-time#hasEnd>', 'end:')\n",
-    "\n",
-    " "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 46,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 47,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + stringCoords.prefix + ' ' + stringCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + noteCoords.prefix + ' ' + noteCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBornCoords.prefix + ' ' + wasBornCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + diedCoords.prefix + ' ' + diedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + birthCoords.prefix + ' ' + birthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + deathCoords.prefix + ' ' + deathCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + materialCoords.prefix + ' ' + materialCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + consistCoords.prefix + ' ' + consistCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + conditionCoords.prefix + ' ' + conditionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasConditionCoords.prefix + ' ' + hasConditionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + refersCoords.prefix + ' ' + refersCoords.uri + closeLine)  \n",
-    "    output.write('@prefix ' + entityCoords.prefix + ' ' + entityCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + refersHasTypeCoords.prefix + ' ' + refersHasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasNoteCoords.prefix + ' ' + hasNoteCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + creationCoords.prefix + ' ' + creationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentOwnerCoords.prefix + ' ' + hasCurrentOwnerCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + groupCoords.prefix + ' ' + groupCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + eventCoords.prefix + ' ' + eventCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasPresentCoords.prefix + ' ' + wasPresentCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + roleOfCoords.prefix + ' ' + roleOfCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasDomainCoords.prefix + ' ' + hasDomainCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + pcarriedByCoords.prefix + ' ' + pcarriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasRangeCoords.prefix + ' ' + hasRangeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + documentsCoords.prefix + ' ' + documentsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + tookPlaceCoords.prefix + ' ' + tookPlaceCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + fallsCoords.prefix + ' ' + fallsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + attributeCoords.prefix + ' ' + attributeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + assignedAttrCoords.prefix + ' ' + assignedAttrCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + assignedCoords.prefix + ' ' + assignedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + composedCoords.prefix + ' ' + composedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + foafCoords.prefix + ' ' + foafCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + yearCoords.prefix + ' ' + yearCoords.uri + closeLine) \n",
-    "    output.write('@prefix ' + monthCoords.prefix + ' ' + monthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + dayCoords.prefix + ' ' + dayCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + xsdCoords.prefix + ' ' + xsdCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + beginningCoords.prefix + ' ' + beginningCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + xsdCoords.prefix + ' ' + xsdCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + endCoords.prefix + ' ' + endCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "filePrefix = 'gettatelli_'\n",
-    "fileType = 'newdataset'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_event_date_normalization.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "\n",
-    "    baliatico = '<http://www.archiviodistato.prato.it/baliatico>'\n",
-    "    consegna = '<http://www.archiviodistato.prato.it/consegna>'\n",
-    "    licenziamento = '<http://www.archiviodistato.prato.it/licenziamento>'\n",
-    "    nascita = '<http://www.archiviodistato.prato.it/nascita>'\n",
-    "    morte = '<http://www.archiviodistato.prato.it/morte>'\n",
-    "    finebaliatico = '<http://www.archiviodistato.prato.it/finebaliatico>'\n",
-    "    battesimo = '<http://www.archiviodistato.prato.it/battesimo>'\n",
-    "    cresima = '<http://www.archiviodistato.prato.it/cresima>'\n",
-    "    ritrovamento = '<http://www.archiviodistato.prato.it/ritrovamento>'\n",
-    "    restituzione = '<http://www.archiviodistato.prato.it/restituzione>'\n",
-    "    trasferimento = '<http://www.archiviodistato.prato.it/trasferimento>'\n",
-    "    matrimonio = '<http://www.archiviodistato.prato.it/matrimonio>'\n",
-    "    iniziolavoro = '<http://www.archiviodistato.prato.it/iniziolavoro>'\n",
-    "    finelavoro = '<http://www.archiviodistato.prato.it/finelavoro>'\n",
-    "\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue \n",
-    "        \n",
-    "        # EVENTO\n",
-    "        if (row['evento'] != ''): \n",
-    "            evento = row['evento'].replace(' ','_').replace('\\'','').lstrip().rstrip()\n",
-    "            data = row['giorno'] + row['mese'] + row['anno'].lstrip().rstrip().replace(\" \",\"\")\n",
-    "            gettatelloevento = row['nome gettatello tenuto o con cui si è in relazione ( trovato o consegnato)'].replace(\"    \",\"\").replace(\"   \",\"\").replace(\"  \",\"\").replace(\" \",\"\").replace(\" \", \"_\").replace(\"'\", \"\").lstrip().rstrip()\n",
-    "            partecipanteevento = row['nome rilevato'].replace(\"    \",\"\").replace(\"   \",\"\").replace(\"  \",\"\").replace(\" \",\"\").replace(\" \", \"_\").replace(\"'\", \"\").lstrip().rstrip()          \n",
-    "            e21placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + partecipanteevento + \"_\" + row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\") +  \"_\" + row['codice gettatello numero'].replace(\" \", \"\") + '>'\n",
-    "            e73placeHolder =  \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/\" + row['segnatura_completa'].lstrip().rstrip() + \"/\" + informationObjectCoords.code.lstrip().rstrip() + \">\"         \n",
-    "            e5placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/\" + row['segnatura_completa'].replace(\" \", \"\").lstrip().rstrip() + \"_E5_\" + evento + \"_\" + data + \"_\" + row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\") + \"_\" + row['codice gettatello numero'].replace(\" \",\"\").lstrip().rstrip() + \">\"\n",
-    "            # line = triple(e73placeHolder, documentsCoords.prefix, e5placeHolder) + closeLine\n",
-    "            # output.write(line)\n",
-    "            e5typeplaceHolder = '<http://www.archiviodistato.prato.it/' + evento + \"_E55>\"\n",
-    "            # line = triple(e5typeplaceHolder, hasTypeCoords.prefix, typeCoords.prefix) + closeLine\n",
-    "            # output.write(line)\n",
-    "            # line = triple(e5typeplaceHolder, labelCoords.prefix, '\\\"' + row['evento'] + '\\\"') + closeLine\n",
-    "            # output.write(line)\n",
-    "            # line = triple(e5placeHolder, hasTypePCoords.prefix, e5typeplaceHolder ) + closeLine\n",
-    "            # output.write(line)\n",
-    "            # line = triple(e5placeHolder, hasTypeCoords.prefix, eventCoords.prefix) + closeLine\n",
-    "            # output.write(line)\n",
-    "\n",
-    "            # if (row['nome rilevato'] != ''):\n",
-    "            #     PC14placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/' + row['segnatura_completa'].replace(\" \", \"\").lstrip().rstrip() + \"_\" + evento + \"_\" + partecipanteevento + \"_\" + row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\") + \"_\" + row['codice gettatello numero'].replace(\" \",\"\") + \"_\" + pcarriedByCoords.code + \">\"                       \n",
-    "            #     line = triple(e5placeHolder, hasDomainCoords.prefix, PC14placeHolder) + closeLine\n",
-    "            #     output.write(line)\n",
-    "            #     line = triple(PC14placeHolder, hasTypeCoords.prefix, pcarriedByCoords.prefix) + closeLine\n",
-    "            #     output.write(line)\n",
-    "            #     if(row['funzione nell\\'evento'] != ''):\n",
-    "            #         line = triple(PC14placeHolder, labelCoords.prefix, \"\\\"\" + row['nome rilevato'] + \" nel ruolo di \" + row['funzione nell\\'evento'] + \"\\\"\" ) + closeLine\n",
-    "            #         output.write(line)\n",
-    "            #     else:\n",
-    "            #         line = triple(PC14placeHolder, labelCoords.prefix, \"\\\"\" + row['nome rilevato'] + \" con ruolo non definito \" + \"\\\"\" ) + closeLine\n",
-    "            #         output.write(line)\n",
-    "            \n",
-    "            # if(row['funzione nell\\'evento'] != ''):\n",
-    "            #     funzioneevento = row['funzione nell\\'evento'].replace(\" \",\"\").replace(\"\\'\",'')\n",
-    "            #     E55placeHolder = '<http://www.archiviodistato.prato.it/ruolo_evento_' + funzioneevento + \">\"                          \n",
-    "            #     line = triple(PC14placeHolder, roleOfCoords.prefix, E55placeHolder) + closeLine\n",
-    "            #     output.write(line)\n",
-    "            #     line = triple(E55placeHolder, labelCoords.prefix, \"\\\"\"+ row['funzione nell\\'evento'] +  \"\\\"\" ) + closeLine\n",
-    "            #     output.write(line)\n",
-    "            #     line = triple(PC14placeHolder, hasRangeCoords.prefix, e21placeHolder) + closeLine\n",
-    "            #     output.write(line)\n",
-    "            \n",
-    "            data = row['giorno'] +'/' + row['mese'] + '/' + row['anno']\n",
-    "            if data  != '':\n",
-    "                dataid = row['giorno'] + row['mese'] + row['anno'].lstrip().rstrip().replace(\" \",\"\")\n",
-    "                e52placeHolder =  \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/\" + row['segnatura_completa'].replace(\" \", \"\").lstrip().rstrip() + \"_E5_\" + evento + \"_\" + dataid + \"_\" + row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\") + \"_\" + row['codice gettatello numero'].replace(\" \",\"\") + '_E52>'\n",
-    "                # line = triple(e5placeHolder, hasTimeSpanCoords.prefix, e52placeHolder) + closeLine\n",
-    "                # output.write(line)\n",
-    "                # line = triple(e52placeHolder, hasTypeCoords.prefix, timeSpanCoords.prefix ) + closeLine\n",
-    "                # output.write(line)\n",
-    "                # line = triple(e52placeHolder, labelCoords.prefix, '\\\"' + data + '\\\"' ) + closeLine\n",
-    "                # output.write(line)\n",
-    "                # aggiungere giorno mese anno\n",
-    "                # if(row['anno'] != ''):\n",
-    "                #     line = triple(e52placeHolder, yearCoords.prefix, '\\\"'+row['anno']+'\\\"^^xsd:integer') + closeLine\n",
-    "                #     output.write(line)\n",
-    "                #     if (row['mese'] != ''):\n",
-    "                #         line = triple(e52placeHolder, monthCoords.prefix, '\\\"'+row['mese']+ '\\\"^^xsd:integer') + closeLine\n",
-    "                #         output.write(line)\n",
-    "                #     else:\n",
-    "                #         mese = '01'\n",
-    "                #         line = triple(e52placeHolder, monthCoords.prefix, '\\\"01\\\"^^xsd:integer') + closeLine\n",
-    "                #         output.write(line)\n",
-    "                #     if (row['giorno'] != ''):\n",
-    "                #         line = triple(e52placeHolder, dayCoords.prefix, '\\\"'+row['giorno']+ '\\\"^^xsd:integer') + closeLine\n",
-    "                #         output.write(line)\n",
-    "                #     else:\n",
-    "                #         giorno = '01'\n",
-    "                #         line = triple(e52placeHolder, dayCoords.prefix, '\\\"01\\\"^^xsd:integer') + closeLine\n",
-    "                #         output.write(line)\n",
-    "                # else:\n",
-    "                #     line = triple(e52placeHolder, noteCoords.prefix + 'P3_has_note', '\\\"Data incompleta\\\"') + closeLine\n",
-    "                #     output.write(line)\n",
-    "                \n",
-    "                if(row['anno'] != '' and row['mese'] != '' and row['giorno'] != ''):\n",
-    "                    line = triple(e52placeHolder, beginningCoords.prefix, '\\\"'+ row['anno'] + row['mese']+ row['giorno'] + '\\\"^^xsd:date') + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e52placeHolder, endCoords.prefix, '\\\"'+ row['anno'] + row['mese']+ row['giorno'] + '\\\"^^xsd:date') + closeLine\n",
-    "                    output.write(line)\n",
-    "                elif(row['anno'] != '' and row['mese'] == '' and row['giorno'] != ''):\n",
-    "                    line = triple(e52placeHolder, beginningCoords.prefix, '\\\"'+ row['anno'] + '01' + row['giorno'] + '\\\"^^xsd:date') + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e52placeHolder, endCoords.prefix, '\\\"'+ row['anno'] + '12' + row['giorno'] + '\\\"^^xsd:date') + closeLine\n",
-    "                    output.write(line)\n",
-    "                elif(row['anno'] != '' and row['mese'] != '' and row['giorno'] == ''):\n",
-    "                    line = triple(e52placeHolder, beginningCoords.prefix, '\\\"'+ row['anno'] + row['mese'] + '01'+ '\\\"^^xsd:date') + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e52placeHolder, endCoords.prefix, '\\\"'+ row['anno'] + row['mese'] + '31'+ '\\\"^^xsd:date') + closeLine\n",
-    "                    output.write(line)\n",
-    "                elif(row['anno'] != '' and row['mese'] == '' and row['giorno'] == ''):\n",
-    "                    line = triple(e52placeHolder, beginningCoords.prefix, '\\\"'+ row['anno'] + '0101' + '\\\"^^xsd:date') + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e52placeHolder, endCoords.prefix, '\\\"'+ row['anno'] + '1231' + '\\\"^^xsd:date') + closeLine\n",
-    "                    output.write(line)\n",
-    "       \n",
-    "            # note\n",
-    "            # if(row['note'] != ''):\n",
-    "            #     e62placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/\" + row['segnatura_completa'].replace(\" \", \"\").lstrip().rstrip() + \"_E5_\" + evento + \"_\" + partecipanteevento + \"_\" + row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\") + \"_\" + row['codice gettatello numero'].replace(\" \",\"\") + '_E62>'\n",
-    "            #     line = triple(e5placeHolder, noteCoords.prefix, e62placeHolder) +  closeLine\n",
-    "            #     output.write(line)\n",
-    "            #     line = triple(e62placeHolder, hasTypeCoords.prefix, stringCoords.prefix) +  closeLine\n",
-    "            #     output.write(line)\n",
-    "            #     line = triple(e62placeHolder, labelCoords.prefix, '\\\"' + row['note'].replace(\"\\\"\", \"\") + '\\\"') +  closeLine\n",
-    "            #     output.write(line)\n",
-    "            #     line = triple(e62placeHolder, hasTypePCoords.prefix, '\\\"Nota relativa all\\'evento\\\"') +  closeLine\n",
-    "            #     output.write(line)\n",
-    "            \n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "397704579725e15f5c7cb49fe5f0341eb7531c82d19f2c29d197e8b64ab5776b"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 347
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/gettatelli/CSV_to_RDF_gettatelli_newdata_event_place.ipynb

@@ -1,347 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "# Utilities to handle character encodings\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/VARIE/TOPONIMI/MICROTOPONIMI/CSV/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/gettatelli/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/IT-ASPO-GT001->', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT001->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasNoteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'no:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "wasBornCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P98i_was_born>', 'wbc:')\n",
-    "diedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P100i_died_in>', 'di:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "noteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'nt:')\n",
-    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
-    "consistCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P45_consist_of>', 'cf:')\n",
-    "hasConditionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P44_has_condition>', 'hc:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "hasCurrentOwnerCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P52_has_current_owner>', 'ow:')\n",
-    "nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')\n",
-    "xsdCoords = RDFcoords('<http://www.w3.org/2001/XMLSchema#>', 'xsd:')\n",
-    "\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "stringCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E62_String>', 'st:', 'E62')\n",
-    "birthCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E67_Birth>', 'th:', 'E67')\n",
-    "deathCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E69_Death>', 'dh:', 'E69')\n",
-    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
-    "materialCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E57_Material>', 'mt:', 'E57')\n",
-    "conditionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E3_Condition_State>', 'cs:', 'E3')\n",
-    "entityCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E1_Entity>', 'ey:', 'E1')\n",
-    "refersCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P67_refers_to>', 'rt:')\n",
-    "refersHasTypeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P67.1_type>', 'rh:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "groupCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E74_Group>', 'gp:', 'E74')\n",
-    "eventCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E5_Event>', 'event:', 'E5')\n",
-    "wasPresentCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P12i_was_present_at>', 'wp:')\n",
-    "tookPlaceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P7_took_place_at>', 'tk:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "roleOfCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14.1_in_the_role_of>', 'ro:')\n",
-    "hasDomainCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P01_has_domain>', 'hd:')\n",
-    "hasRangeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P02_has_range>', 'hr:')\n",
-    "pcarriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/PC14_carried_out_by>', 'cy:', 'PC14')\n",
-    "documentsCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P70_documents>', 'doc:')\n",
-    "fallsCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P89_falls_within>', 'fw:')\n",
-    "attributeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E13_Attribute_Assignment>', 'att:', 'E13')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "assignedAttrCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P140_assigned_attribute_to>', 'ast:')\n",
-    "assignedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P141_assigned>', 'ass:')\n",
-    "composedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P46_is_composed_of>', 'cmp:')\n",
-    "foafCoords = RDFcoords('<http://xmlns.com/foaf/0.1/>', 'foaf:')\n",
-    "yearCoords = RDFcoords('<https://www.w3.org/TR/owl-time#year>', 'year:')\n",
-    "monthCoords = RDFcoords('<https://www.w3.org/TR/owl-time#month>', 'month:')\n",
-    "dayCoords = RDFcoords('<https://www.w3.org/TR/owl-time#day>', 'day:')\n",
-    "\n",
-    "\n",
-    " "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + stringCoords.prefix + ' ' + stringCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + noteCoords.prefix + ' ' + noteCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBornCoords.prefix + ' ' + wasBornCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + diedCoords.prefix + ' ' + diedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + birthCoords.prefix + ' ' + birthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + deathCoords.prefix + ' ' + deathCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + materialCoords.prefix + ' ' + materialCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + consistCoords.prefix + ' ' + consistCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + conditionCoords.prefix + ' ' + conditionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasConditionCoords.prefix + ' ' + hasConditionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + refersCoords.prefix + ' ' + refersCoords.uri + closeLine)  \n",
-    "    output.write('@prefix ' + entityCoords.prefix + ' ' + entityCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + refersHasTypeCoords.prefix + ' ' + refersHasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasNoteCoords.prefix + ' ' + hasNoteCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + creationCoords.prefix + ' ' + creationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentOwnerCoords.prefix + ' ' + hasCurrentOwnerCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + groupCoords.prefix + ' ' + groupCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + eventCoords.prefix + ' ' + eventCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasPresentCoords.prefix + ' ' + wasPresentCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + roleOfCoords.prefix + ' ' + roleOfCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasDomainCoords.prefix + ' ' + hasDomainCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + pcarriedByCoords.prefix + ' ' + pcarriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasRangeCoords.prefix + ' ' + hasRangeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + documentsCoords.prefix + ' ' + documentsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + tookPlaceCoords.prefix + ' ' + tookPlaceCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + fallsCoords.prefix + ' ' + fallsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + attributeCoords.prefix + ' ' + attributeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + assignedAttrCoords.prefix + ' ' + assignedAttrCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + assignedCoords.prefix + ' ' + assignedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + composedCoords.prefix + ' ' + composedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + foafCoords.prefix + ' ' + foafCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + yearCoords.prefix + ' ' + yearCoords.uri + closeLine) \n",
-    "    output.write('@prefix ' + monthCoords.prefix + ' ' + monthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + dayCoords.prefix + ' ' + dayCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + xsdCoords.prefix + ' ' + xsdCoords.uri + closeLine)\n",
-    "   \n",
-    "    \n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "filePrefix = 'gettatelli_'\n",
-    "fileType = 'newdataset_association'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_event_place.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "\n",
-    "    baliatico = '<http://www.archiviodistato.prato.it/baliatico>'\n",
-    "    consegna = '<http://www.archiviodistato.prato.it/consegna>'\n",
-    "    licenziamento = '<http://www.archiviodistato.prato.it/licenziamento>'\n",
-    "    nascita = '<http://www.archiviodistato.prato.it/nascita>'\n",
-    "    morte = '<http://www.archiviodistato.prato.it/morte>'\n",
-    "    finebaliatico = '<http://www.archiviodistato.prato.it/finebaliatico>'\n",
-    "    battesimo = '<http://www.archiviodistato.prato.it/battesimo>'\n",
-    "    cresima = '<http://www.archiviodistato.prato.it/cresima>'\n",
-    "    ritrovamento = '<http://www.archiviodistato.prato.it/ritrovamento>'\n",
-    "    restituzione = '<http://www.archiviodistato.prato.it/restituzione>'\n",
-    "    trasferimento = '<http://www.archiviodistato.prato.it/trasferimento>'\n",
-    "    matrimonio = '<http://www.archiviodistato.prato.it/matrimonio>'\n",
-    "    iniziolavoro = '<http://www.archiviodistato.prato.it/iniziolavoro>'\n",
-    "    finelavoro = '<http://www.archiviodistato.prato.it/finelavoro>'\n",
-    "\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue \n",
-    "        \n",
-    "        # EVENTO\n",
-    "\n",
-    "        if (row['evento'] != ''): \n",
-    "            evento = row['evento'].replace(' ','_').replace('\\'','').lstrip().rstrip()\n",
-    "            data = row['giorno'] + row['mese'] + row['anno'].lstrip().rstrip().replace(\" \",\"\")\n",
-    "            e5placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/\" + row['segnatura_completa'].replace(\" \", \"\").lstrip().rstrip() + \"_E5_\" + evento + \"_\" + data + \"_\" + row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\") + \"_\" + row['codice gettatello numero'].replace(\" \",\"\").lstrip().rstrip() + \">\"\n",
-    "\n",
-    "            # luogo \n",
-    "            '''if(row['EVENTO micro microtoponimo'] != ''):\n",
-    "                topmmPlaceholder = row['EVENTO micro microtoponimo'].replace(' ','').replace('\\'','').replace(',','').replace('ô','').replace('’','').replace('(','').replace(')','').replace('\\\"','').lower().replace('ospedaledelmisericordiaedolce', 'ospedaledellamisericordiaedolce')\n",
-    "                e53mmplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + topmmPlaceholder + \">\"           \n",
-    "                #line = triple(e53mmplaceHolder, labelCoords.prefix, '\\\"' + row['EVENTO micro microtoponimo'].replace(\"\\\"\", \"\") + '\\\"') + closeLine\n",
-    "                #output.write(line)\n",
-    "                line = triple(e53mmplaceHolder, hasTypeCoords.prefix, placeCoords.prefix) + closeLine\n",
-    "                output.write(line)\n",
-    "            if(row['EVENTO MICROTOPONIMO'] != ''):\n",
-    "                topmPlaceholder =  row['EVENTO MICROTOPONIMO'].replace(' ','').replace('\\'','').replace(',','').replace('ô','').replace('’','').replace('(','').replace(')','').replace('\\\"','').lower().replace('ospedaledelmisericordiaedolce', 'ospedaledellamisericordiaedolce')\n",
-    "                e53mplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + topmPlaceholder + \">\"\n",
-    "                #line = triple(e53mplaceHolder, labelCoords.prefix, '\\\"' + row['EVENTO MICROTOPONIMO'].replace(\"\\\"\", \"\") + '\\\"') + closeLine                    \n",
-    "                #output.write(line)\n",
-    "                line = triple(e53mplaceHolder, hasTypeCoords.prefix, placeCoords.prefix) + closeLine\n",
-    "                output.write(line)\n",
-    "            if(row['EVENTO COMUNE'] != ''):\n",
-    "                topcPlaceholder =  row['EVENTO COMUNE'].replace(' ','').replace('\\'','').replace(',','').replace('ô','').replace('’','').replace('(','').replace(')','').replace('\\\"','').lower()\n",
-    "                e53cplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + topcPlaceholder + \">\"\n",
-    "                #line = triple(e53cplaceHolder, labelCoords.prefix, '\\\"' + row['EVENTO COMUNE'].replace(\"\\\"\", \"\") + '\\\"') + closeLine\n",
-    "                #output.write(line)\n",
-    "                line = triple(e53cplaceHolder, hasTypeCoords.prefix, placeCoords.prefix) + closeLine\n",
-    "                output.write(line)\n",
-    "            if(row['EVENTO PROVINCIA'] != ''):\n",
-    "                toppPlaceholder =  row['EVENTO PROVINCIA'].replace(' ','').replace('\\'','').replace(',','').replace('ô','').replace('’','').replace('(','').replace(')','').replace('\\\"','').lower()\n",
-    "                e53pplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + toppPlaceholder + \">\"\n",
-    "                #line = triple(e53pplaceHolder, labelCoords.prefix, '\\\"' + row['EVENTO PROVINCIA'].replace(\"\\\"\", \"\") + '\\\"') + closeLine\n",
-    "                #output.write(line)\n",
-    "                line = triple(e53pplaceHolder, hasTypeCoords.prefix, placeCoords.prefix) + closeLine\n",
-    "                output.write(line)'''\n",
-    "\n",
-    "            if (row['ID_luogo'] != ''):\n",
-    "                e53pplaceHolder = \"<http://dev.restore.ovi.cnr.it/vocabularies/places/\" + row['ID_luogo'] + \">\"\n",
-    "                line = triple(e5placeHolder, tookPlaceCoords.prefix, e53pplaceHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "\n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "397704579725e15f5c7cb49fe5f0341eb7531c82d19f2c29d197e8b64ab5776b"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 313
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/gettatelli/CSV_to_RDF_gettatelli_newdata_img.ipynb

@@ -1,313 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "# Utilities to handle character encodings\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import re\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/gettatelli/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/gettatelli/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/IT-ASPO-GT001->', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT001->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasNoteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'no:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "wasBornCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P98i_was_born>', 'wbc:')\n",
-    "diedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P100i_died_in>', 'di:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "noteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'nt:')\n",
-    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
-    "consistCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P45_consist_of>', 'cf:')\n",
-    "hasConditionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P44_has_condition>', 'hc:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "hasCurrentOwnerCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P52_has_current_owner>', 'ow:')\n",
-    "nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "stringCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E62_String>', 'st:', 'E62')\n",
-    "birthCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E67_Birth>', 'th:', 'E67')\n",
-    "deathCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E69_Death>', 'dh:', 'E69')\n",
-    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
-    "materialCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E57_Material>', 'mt:', 'E57')\n",
-    "conditionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E3_Condition_State>', 'cs:', 'E3')\n",
-    "entityCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E1_Entity>', 'ey:', 'E1')\n",
-    "refersCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P67_refers_to>', 'rt:')\n",
-    "refersHasTypeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P67.1_type>', 'rh:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "groupCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E74_Group>', 'gp:', 'E74')\n",
-    "eventCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E5_Event>', 'event:', 'E5')\n",
-    "wasPresentCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P12i_was_present_at>', 'wp:')\n",
-    "tookPlaceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P7_took_place_at>', 'tk:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "roleOfCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14.1_in_the_role_of>', 'ro:')\n",
-    "hasDomainCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P01_has_domain>', 'hd:')\n",
-    "hasRangeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P02_has_range>', 'hr:')\n",
-    "pcarriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/PC14_carried_out_by>', 'cy:', 'PC14')\n",
-    "documentsCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P70_documents>', 'doc:')\n",
-    "fallsCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P89_falls_within>', 'fw:')\n",
-    "attributeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E13_Attribute_Assignment>', 'att:', 'E13')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "assignedAttrCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P140_assigned_attribute_to>', 'ast:')\n",
-    "assignedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P141_assigned>', 'ass:')\n",
-    "composedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P46_is_composed_of>', 'cmp:')\n",
-    "foafCoords = RDFcoords('<http://xmlns.com/foaf/0.1/>', 'foaf:')\n",
-    "schemaCoords = RDFcoords('<http://schema.org/>', 'schema:')\n",
-    "cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')\n",
-    "rdfsCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')\n",
-    "patrCoords = RDFcoords('<http://www.w3.org/ns/person#patronymicName>', 'patr:')\n",
-    "matrCoords = RDFcoords('<http://www.w3.org/ns/person#matronymicName>', 'matr:')\n",
-    "residenceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P74_has_current_or_former_residence>', 'res:')\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + stringCoords.prefix + ' ' + stringCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + noteCoords.prefix + ' ' + noteCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBornCoords.prefix + ' ' + wasBornCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + diedCoords.prefix + ' ' + diedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + birthCoords.prefix + ' ' + birthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + deathCoords.prefix + ' ' + deathCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + materialCoords.prefix + ' ' + materialCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + consistCoords.prefix + ' ' + consistCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + conditionCoords.prefix + ' ' + conditionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasConditionCoords.prefix + ' ' + hasConditionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + refersCoords.prefix + ' ' + refersCoords.uri + closeLine)  \n",
-    "    output.write('@prefix ' + entityCoords.prefix + ' ' + entityCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + refersHasTypeCoords.prefix + ' ' + refersHasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasNoteCoords.prefix + ' ' + hasNoteCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + creationCoords.prefix + ' ' + creationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentOwnerCoords.prefix + ' ' + hasCurrentOwnerCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + groupCoords.prefix + ' ' + groupCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + eventCoords.prefix + ' ' + eventCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasPresentCoords.prefix + ' ' + wasPresentCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + roleOfCoords.prefix + ' ' + roleOfCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasDomainCoords.prefix + ' ' + hasDomainCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + pcarriedByCoords.prefix + ' ' + pcarriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasRangeCoords.prefix + ' ' + hasRangeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + documentsCoords.prefix + ' ' + documentsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + tookPlaceCoords.prefix + ' ' + tookPlaceCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + fallsCoords.prefix + ' ' + fallsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + attributeCoords.prefix + ' ' + attributeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + assignedAttrCoords.prefix + ' ' + assignedAttrCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + assignedCoords.prefix + ' ' + assignedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + composedCoords.prefix + ' ' + composedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + foafCoords.prefix + ' ' + foafCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + matrCoords.prefix + ' ' + matrCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + patrCoords.prefix + ' ' + patrCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + rdfsCoords.prefix + ' ' + rdfsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + residenceCoords.prefix + ' ' + residenceCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "filePrefix = 'corrispondenza_IDgettatello-IMG'\n",
-    "fileType = ''\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_person.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue \n",
-    "        \n",
-    "        # PERSONA\n",
-    "\n",
-    "        if(row['link ASPO'] != ''): \n",
-    "                e36placeHolder = '<' + row['link immagine'] + '>'\n",
-    "                e36e42placeHolder = '<' + row['link immagine'] + '_E36_E42>'\n",
-    "                E22cplaceHolder =  '<' + row['link ASPO'] + '>'\n",
-    "\n",
-    "                line = triple(e36placeHolder, cidocCoords.prefix + 'P138_represents', E22cplaceHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e36placeHolder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E36_Visual_Item') + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e36placeHolder, labelCoords.prefix, '\\\"Immagine digitale del segno gettatello\\\"') + closeLine\n",
-    "                output.write(line)\n",
-    "                \n",
-    "                line = triple(e36placeHolder, cidocCoords.prefix + 'P1_is_identified_by', e36e42placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e36e42placeHolder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E42_Identifier') + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e36e42placeHolder, labelCoords.prefix, '\\\"'+row['codice contrassegno']+ '\\\"') + closeLine\n",
-    "                output.write(line)\n",
-    "\n",
-    "\n",
-    "                            \n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "397704579725e15f5c7cb49fe5f0341eb7531c82d19f2c29d197e8b64ab5776b"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 297
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/gettatelli/CSV_to_RDF_gettatelli_newdata_label.ipynb

@@ -1,297 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "# Utilities to handle character encodings\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/gettatelli/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/gettatelli/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/IT-ASPO-GT001->', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT001->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasNoteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'no:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "wasBornCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P98i_was_born>', 'wbc:')\n",
-    "diedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P100i_died_in>', 'di:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "noteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'nt:')\n",
-    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
-    "consistCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P45_consist_of>', 'cf:')\n",
-    "hasConditionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P44_has_condition>', 'hc:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "hasCurrentOwnerCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P52_has_current_owner>', 'ow:')\n",
-    "nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')\n",
-    "xsdCoords = RDFcoords('<http://www.w3.org/2001/XMLSchema#>', 'xsd:')\n",
-    "\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "stringCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E62_String>', 'st:', 'E62')\n",
-    "birthCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E67_Birth>', 'th:', 'E67')\n",
-    "deathCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E69_Death>', 'dh:', 'E69')\n",
-    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
-    "materialCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E57_Material>', 'mt:', 'E57')\n",
-    "conditionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E3_Condition_State>', 'cs:', 'E3')\n",
-    "entityCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E1_Entity>', 'ey:', 'E1')\n",
-    "refersCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P67_refers_to>', 'rt:')\n",
-    "refersHasTypeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P67.1_type>', 'rh:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "groupCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E74_Group>', 'gp:', 'E74')\n",
-    "eventCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E5_Event>', 'event:', 'E5')\n",
-    "wasPresentCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P12i_was_present_at>', 'wp:')\n",
-    "tookPlaceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P7_took_place_at>', 'tk:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "roleOfCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14.1_in_the_role_of>', 'ro:')\n",
-    "hasDomainCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P01_has_domain>', 'hd:')\n",
-    "hasRangeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P02_has_range>', 'hr:')\n",
-    "pcarriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/PC14_carried_out_by>', 'cy:', 'PC14')\n",
-    "documentsCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P70_documents>', 'doc:')\n",
-    "fallsCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P89_falls_within>', 'fw:')\n",
-    "attributeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E13_Attribute_Assignment>', 'att:', 'E13')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "assignedAttrCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P140_assigned_attribute_to>', 'ast:')\n",
-    "assignedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P141_assigned>', 'ass:')\n",
-    "composedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P46_is_composed_of>', 'cmp:')\n",
-    "foafCoords = RDFcoords('<http://xmlns.com/foaf/0.1/>', 'foaf:')\n",
-    "yearCoords = RDFcoords('<https://www.w3.org/TR/owl-time#year>', 'year:')\n",
-    "monthCoords = RDFcoords('<https://www.w3.org/TR/owl-time#month>', 'month:')\n",
-    "dayCoords = RDFcoords('<https://www.w3.org/TR/owl-time#day>', 'day:')\n",
-    "\n",
-    "\n",
-    " "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + stringCoords.prefix + ' ' + stringCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + noteCoords.prefix + ' ' + noteCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBornCoords.prefix + ' ' + wasBornCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + diedCoords.prefix + ' ' + diedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + birthCoords.prefix + ' ' + birthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + deathCoords.prefix + ' ' + deathCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + materialCoords.prefix + ' ' + materialCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + consistCoords.prefix + ' ' + consistCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + conditionCoords.prefix + ' ' + conditionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasConditionCoords.prefix + ' ' + hasConditionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + refersCoords.prefix + ' ' + refersCoords.uri + closeLine)  \n",
-    "    output.write('@prefix ' + entityCoords.prefix + ' ' + entityCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + refersHasTypeCoords.prefix + ' ' + refersHasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasNoteCoords.prefix + ' ' + hasNoteCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + creationCoords.prefix + ' ' + creationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentOwnerCoords.prefix + ' ' + hasCurrentOwnerCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + groupCoords.prefix + ' ' + groupCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + eventCoords.prefix + ' ' + eventCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasPresentCoords.prefix + ' ' + wasPresentCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + roleOfCoords.prefix + ' ' + roleOfCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasDomainCoords.prefix + ' ' + hasDomainCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + pcarriedByCoords.prefix + ' ' + pcarriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasRangeCoords.prefix + ' ' + hasRangeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + documentsCoords.prefix + ' ' + documentsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + tookPlaceCoords.prefix + ' ' + tookPlaceCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + fallsCoords.prefix + ' ' + fallsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + attributeCoords.prefix + ' ' + attributeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + assignedAttrCoords.prefix + ' ' + assignedAttrCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + assignedCoords.prefix + ' ' + assignedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + composedCoords.prefix + ' ' + composedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + foafCoords.prefix + ' ' + foafCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + yearCoords.prefix + ' ' + yearCoords.uri + closeLine) \n",
-    "    output.write('@prefix ' + monthCoords.prefix + ' ' + monthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + dayCoords.prefix + ' ' + dayCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + xsdCoords.prefix + ' ' + xsdCoords.uri + closeLine)\n",
-    "   \n",
-    "    \n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "filePrefix = 'gettatelli_'\n",
-    "fileType = 'newdataset_label'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue \n",
-    "        \n",
-    "        # EVENTO\n",
-    "        evento = row['evento'].replace(' ','_').replace('\\'','').lstrip().rstrip()\n",
-    "        data = row['giorno'] + row['mese'] + row['anno'].lstrip().rstrip().replace(\" \",\"\")\n",
-    "        e5placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/\" + row['segnatura_completa'].replace(\" \", \"\").lstrip().rstrip() + \"_E5_\" + evento + \"_\" + data + \"_\" + row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\") + \"_\" + row['codice gettatello numero'].replace(\" \",\"\").lstrip().rstrip() + \">\"\n",
-    "        line = triple(e5placeHolder, labelCoords.prefix, '\\\"' + row['evento'] + '\\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        output.write('\\n')\n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "397704579725e15f5c7cb49fe5f0341eb7531c82d19f2c29d197e8b64ab5776b"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 527
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/gettatelli/CSV_to_RDF_gettatelli_newdata_person.ipynb

@@ -1,527 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "# Utilities to handle character encodings\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import re\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/gettatelli/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/gettatelli/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/IT-ASPO-GT001->', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT001->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasNoteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'no:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "wasBornCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P98i_was_born>', 'wbc:')\n",
-    "diedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P100i_died_in>', 'di:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "noteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'nt:')\n",
-    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
-    "consistCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P45_consist_of>', 'cf:')\n",
-    "hasConditionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P44_has_condition>', 'hc:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "hasCurrentOwnerCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P52_has_current_owner>', 'ow:')\n",
-    "nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "stringCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E62_String>', 'st:', 'E62')\n",
-    "birthCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E67_Birth>', 'th:', 'E67')\n",
-    "deathCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E69_Death>', 'dh:', 'E69')\n",
-    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
-    "materialCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E57_Material>', 'mt:', 'E57')\n",
-    "conditionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E3_Condition_State>', 'cs:', 'E3')\n",
-    "entityCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E1_Entity>', 'ey:', 'E1')\n",
-    "refersCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P67_refers_to>', 'rt:')\n",
-    "refersHasTypeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P67.1_type>', 'rh:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "groupCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E74_Group>', 'gp:', 'E74')\n",
-    "eventCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E5_Event>', 'event:', 'E5')\n",
-    "wasPresentCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P12i_was_present_at>', 'wp:')\n",
-    "tookPlaceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P7_took_place_at>', 'tk:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "roleOfCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14.1_in_the_role_of>', 'ro:')\n",
-    "hasDomainCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P01_has_domain>', 'hd:')\n",
-    "hasRangeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P02_has_range>', 'hr:')\n",
-    "pcarriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/PC14_carried_out_by>', 'cy:', 'PC14')\n",
-    "documentsCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P70_documents>', 'doc:')\n",
-    "fallsCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P89_falls_within>', 'fw:')\n",
-    "attributeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E13_Attribute_Assignment>', 'att:', 'E13')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "assignedAttrCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P140_assigned_attribute_to>', 'ast:')\n",
-    "assignedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P141_assigned>', 'ass:')\n",
-    "composedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P46_is_composed_of>', 'cmp:')\n",
-    "foafCoords = RDFcoords('<http://xmlns.com/foaf/0.1/>', 'foaf:')\n",
-    "schemaCoords = RDFcoords('<http://schema.org/>', 'schema:')\n",
-    "cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')\n",
-    "rdfsCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')\n",
-    "patrCoords = RDFcoords('<http://www.w3.org/ns/person#patronymicName>', 'patr:')\n",
-    "matrCoords = RDFcoords('<http://www.w3.org/ns/person#matronymicName>', 'matr:')\n",
-    "residenceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P74_has_current_or_former_residence>', 'res:')\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + stringCoords.prefix + ' ' + stringCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + noteCoords.prefix + ' ' + noteCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBornCoords.prefix + ' ' + wasBornCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + diedCoords.prefix + ' ' + diedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + birthCoords.prefix + ' ' + birthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + deathCoords.prefix + ' ' + deathCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + materialCoords.prefix + ' ' + materialCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + consistCoords.prefix + ' ' + consistCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + conditionCoords.prefix + ' ' + conditionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasConditionCoords.prefix + ' ' + hasConditionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + refersCoords.prefix + ' ' + refersCoords.uri + closeLine)  \n",
-    "    output.write('@prefix ' + entityCoords.prefix + ' ' + entityCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + refersHasTypeCoords.prefix + ' ' + refersHasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasNoteCoords.prefix + ' ' + hasNoteCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + creationCoords.prefix + ' ' + creationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentOwnerCoords.prefix + ' ' + hasCurrentOwnerCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + groupCoords.prefix + ' ' + groupCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + eventCoords.prefix + ' ' + eventCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasPresentCoords.prefix + ' ' + wasPresentCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + roleOfCoords.prefix + ' ' + roleOfCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasDomainCoords.prefix + ' ' + hasDomainCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + pcarriedByCoords.prefix + ' ' + pcarriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasRangeCoords.prefix + ' ' + hasRangeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + documentsCoords.prefix + ' ' + documentsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + tookPlaceCoords.prefix + ' ' + tookPlaceCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + fallsCoords.prefix + ' ' + fallsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + attributeCoords.prefix + ' ' + attributeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + assignedAttrCoords.prefix + ' ' + assignedAttrCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + assignedCoords.prefix + ' ' + assignedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + composedCoords.prefix + ' ' + composedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + foafCoords.prefix + ' ' + foafCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + matrCoords.prefix + ' ' + matrCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + patrCoords.prefix + ' ' + patrCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + rdfsCoords.prefix + ' ' + rdfsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + residenceCoords.prefix + ' ' + residenceCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "filePrefix = 'gettatelli_'\n",
-    "fileType = 'newdataset'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_person.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue \n",
-    "        \n",
-    "        # PERSONA\n",
-    "        if (row['nome rilevato'] != ''):\n",
-    "            partecipanteevento = row['nome rilevato'].replace(\"    \",\"\").replace(\"   \",\"\").replace(\"  \",\"\").replace(\" \",\"\").replace(\" \", \"_\").replace(\"'\", \"\")          \n",
-    "            e21placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + partecipanteevento +  \"_\" + row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\") +  \"_\" + row['codice gettatello numero'].replace(\" \", \"\") + '>'\n",
-    "            if row['professione'] != '' and row['professione'] != ' ':\n",
-    "                        occupazioni = []\n",
-    "                        pipe = \"|\"\n",
-    "                        if pipe in row['professione']:\n",
-    "                            occupazioni = row['professione'].split('|') \n",
-    "                            for occupazione in occupazioni:\n",
-    "                                #Remove all white-space characters:\n",
-    "                                txt = occupazione\n",
-    "                                x = re.sub(\"\\n\", \" \", txt)\n",
-    "                                y = re.sub(\"\\s\\s\", \"\", x)\n",
-    "                                occ = re.sub(r'[^A-Za-z]','', y)\n",
-    "                                occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + occ.replace(\" \",\"_\").lstrip().rstrip() + '>'\n",
-    "                                line = triple(e21placeHolder,\n",
-    "                                            schemaCoords.prefix + 'hasOccupation',\n",
-    "                                            occupationPlaceHolder) + closeLine\n",
-    "                                output.write(line)\n",
-    "                                line = triple(occupationPlaceHolder,\n",
-    "                                            nsCoords.prefix + 'type',\n",
-    "                                            schemaCoords.prefix + 'Occupation') + closeLine\n",
-    "                                output.write(line)\n",
-    "                                line = triple(occupationPlaceHolder,\n",
-    "                                            rdfsCoords.prefix + 'label',\n",
-    "                                            '\\\"' + y + '\\\"') + closeLine\n",
-    "                                output.write(line)\n",
-    "                        else:\n",
-    "                            #Remove all white-space characters:\n",
-    "                            txt = row['professione']\n",
-    "                            x = re.sub(\"\\n\", \" \", txt)\n",
-    "                            y = re.sub(\"\\s\\s\", \"\", x)\n",
-    "                            occ = re.sub(r'[^A-Za-z]','', y)\n",
-    "                            occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + occ.replace(\" \",\"_\") + '>'\n",
-    "                            line = triple(e21placeHolder,\n",
-    "                                        schemaCoords.prefix + 'hasOccupation',\n",
-    "                                        occupationPlaceHolder) + closeLine\n",
-    "                            output.write(line)\n",
-    "                            line = triple(occupationPlaceHolder,\n",
-    "                                        nsCoords.prefix + 'type',\n",
-    "                                        schemaCoords.prefix + 'Occupation') + closeLine\n",
-    "                            output.write(line)\n",
-    "                            line = triple(occupationPlaceHolder,\n",
-    "                                        rdfsCoords.prefix + 'label',\n",
-    "                                        '\\\"' + y.lstrip().rstrip() + '\\\"') + closeLine\n",
-    "                            output.write(line)\n",
-    "                    \n",
-    "            if(row['funzione nell\\'evento'] == 'gettatello' and row['codice gettatello numero'] != ''):\n",
-    "                        e42placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\") + \"_\" + row['codice gettatello numero'].replace(\" \", \"\")  + '/E42>'\n",
-    "                        line = triple(e21placeHolder, identifiedByCoords.prefix, e42placeHolder) + closeLine\n",
-    "                        output.write(line)\n",
-    "                        line = triple(e42placeHolder, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "                        output.write(line)\n",
-    "                        line = triple(e42placeHolder, labelCoords.prefix, '\\\"' + row['CODICE REGISTRO'] +' ' + row['codice gettatello numero']+  '\\\"') + closeLine\n",
-    "                        output.write(line)\n",
-    "                        line = triple(e42placeHolder, hasTypePCoords.prefix, '\\\"Matricola\\\"') + closeLine\n",
-    "                        output.write(line)\n",
-    "                    \n",
-    "            if(row['funzione nell\\'evento'] == 'gettatello'):\n",
-    "                        e55placeHolder = '<http://www.archiviodistato.prato.it/gettatello' + \">\"\n",
-    "                        line = triple(e21placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "                        output.write(line)\n",
-    "                        line = triple(e55placeHolder, labelCoords.prefix, '\\\"Gettatello\\\"') + closeLine\n",
-    "                        output.write(line)\n",
-    "                    \n",
-    "                    # PROVENIENZA microtoponimo\n",
-    "            if(row['PROVENIENZA MICRO MICROTOPONIMO'] != '' and row['PROVENIENZA MICROTOPONIMO'] != '' and row['PROVENIENZA COMUNE'] != '' and row['PROVENIENZA macrotoponimo PROVINCIA'] != ''):\n",
-    "                        topPlaceholder = row['PROVENIENZA MICRO MICROTOPONIMO'].replace(' ','').replace('\\'','').replace(',','').replace('ô','').replace('’','').replace('(','').replace(')','').replace('\\\"','').lower()\n",
-    "                        e53placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + topPlaceholder + \">\"\n",
-    "                        line = triple(e21placeHolder, residenceCoords.prefix, e53placeHolder) + closeLine\n",
-    "                        output.write(line)\n",
-    "                        #line = triple(e53placeHolder, labelCoords.prefix, '\\\"' + row['PROVENIENZA MICRO MICROTOPONIMO'] + '\\\"') + closeLine\n",
-    "                        #output.write(line)\n",
-    "                        line = triple(e53placeHolder, hasTypeCoords.prefix, placeCoords.prefix) + closeLine\n",
-    "                        output.write(line)\n",
-    "                        if (row['PROVENIENZA MICROTOPONIMO'] != ''):\n",
-    "                            topmPlaceholder = row['PROVENIENZA MICROTOPONIMO'].replace(' ','').replace('\\'','').replace(',','').replace('ô','').replace('’','').replace('(','').replace(')','').replace('\\\"','').lower()\n",
-    "                            e53mplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + topmPlaceholder + \">\"\n",
-    "                            line = triple(e53placeHolder, fallsCoords.prefix, e53mplaceHolder) + closeLine\n",
-    "                            output.write(line)\n",
-    "                            #line = triple(e53mplaceHolder, labelCoords.prefix, '\\\"' + row['PROVENIENZA MICROTOPONIMO'] + '\\\"') + closeLine\n",
-    "                            #output.write(line)\n",
-    "                            line = triple(e53mplaceHolder, hasTypeCoords.prefix, placeCoords.prefix) + closeLine\n",
-    "                            output.write(line)\n",
-    "                            if (row['PROVENIENZA COMUNE'] != ''):\n",
-    "                                topcPlaceholder = row['PROVENIENZA COMUNE'].replace(' ','').replace('\\'','').replace(',','').replace('ô','').replace('’','').replace('(','').replace(')','').replace('\\\"','').lower()\n",
-    "                                e53cplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + topcPlaceholder + \">\"\n",
-    "                                line = triple(e53mplaceHolder, fallsCoords.prefix, e53cplaceHolder) + closeLine\n",
-    "                                output.write(line)\n",
-    "                                #line = triple(e53cplaceHolder, labelCoords.prefix, '\\\"' + row['PROVENIENZA COMUNE'] + '\\\"') + closeLine\n",
-    "                                #output.write(line)\n",
-    "                                line = triple(e53cplaceHolder, hasTypeCoords.prefix, placeCoords.prefix) + closeLine\n",
-    "                                output.write(line)\n",
-    "                                if (row['PROVENIENZA macrotoponimo PROVINCIA'] != ''):\n",
-    "                                    toppPlaceholder = row['PROVENIENZA macrotoponimo PROVINCIA'].replace(' ','').replace('\\'','').replace(',','').replace('ô','').replace('’','').replace('(','').replace(')','').replace('\\\"','').lower()\n",
-    "                                    e53pplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + toppPlaceholder + \">\"\n",
-    "                                    line = triple(e53cplaceHolder, fallsCoords.prefix, e53pplaceHolder) + closeLine\n",
-    "                                    output.write(line)\n",
-    "                                    #line = triple(e53pplaceHolder, labelCoords.prefix, '\\\"' + row['PROVENIENZA macrotoponimo PROVINCIA'] + '\\\"') + closeLine\n",
-    "                                    #output.write(line)\n",
-    "                                    line = triple(e53pplaceHolder, hasTypeCoords.prefix, placeCoords.prefix) + closeLine\n",
-    "                                    output.write(line)\n",
-    "\n",
-    "                    # provenienza microtoponimo \n",
-    "            elif(row['PROVENIENZA MICRO MICROTOPONIMO'] == '' and row['PROVENIENZA MICROTOPONIMO'] != '' and row['PROVENIENZA COMUNE'] != '' and row['PROVENIENZA macrotoponimo PROVINCIA'] != ''):\n",
-    "                        topmPlaceholder = row['PROVENIENZA MICROTOPONIMO'].replace(' ','').replace('\\'','').replace(',','').replace('ô','').replace('’','').replace('(','').replace(')','').replace('\\\"','').lower()\n",
-    "                        e53mplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + topmPlaceholder + \">\"\n",
-    "                        #line = triple(e53mplaceHolder, labelCoords.prefix, '\\\"' + row['PROVENIENZA MICROTOPONIMO'] + '\\\"') + closeLine\n",
-    "                        #output.write(line)\n",
-    "                        line = triple(e53mplaceHolder, hasTypeCoords.prefix, placeCoords.prefix) + closeLine\n",
-    "                        output.write(line)\n",
-    "                        if (row['PROVENIENZA COMUNE'] != ''):\n",
-    "                            topcPlaceholder = row['PROVENIENZA COMUNE'].replace(' ','').replace('\\'','').replace(',','').replace('ô','').replace('’','').replace('(','').replace(')','').replace('\\\"','').lower()\n",
-    "                            e53cplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + topcPlaceholder + \">\"\n",
-    "                            line = triple(e53mplaceHolder, fallsCoords.prefix, e53cplaceHolder) + closeLine\n",
-    "                            output.write(line)\n",
-    "                            #line = triple(e53cplaceHolder, labelCoords.prefix, '\\\"' + row['PROVENIENZA COMUNE'] + '\\\"') + closeLine\n",
-    "                            #output.write(line)\n",
-    "                            line = triple(e53cplaceHolder, hasTypeCoords.prefix, placeCoords.prefix) + closeLine\n",
-    "                            output.write(line)\n",
-    "                            if (row['PROVENIENZA macrotoponimo PROVINCIA'] != ''):\n",
-    "                                toppPlaceholder = row['PROVENIENZA macrotoponimo PROVINCIA'].replace(' ','').replace('\\'','').replace(',','').replace('ô','').replace('’','').replace('(','').replace(')','').replace('\\\"','').lower()\n",
-    "                                e53pplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + toppPlaceholder + \">\"\n",
-    "                                line = triple(e53cplaceHolder, fallsCoords.prefix, e53pplaceHolder) + closeLine\n",
-    "                                output.write(line)\n",
-    "                                #line = triple(e53pplaceHolder, labelCoords.prefix, '\\\"' + row['PROVENIENZA macrotoponimo PROVINCIA'] + '\\\"') + closeLine\n",
-    "                                #output.write(line)\n",
-    "                                line = triple(e53pplaceHolder, hasTypeCoords.prefix, placeCoords.prefix) + closeLine\n",
-    "                                output.write(line)\n",
-    "\n",
-    "                    # PROVENIENZA comune\n",
-    "            elif(row['PROVENIENZA MICRO MICROTOPONIMO'] == '' and row['PROVENIENZA MICROTOPONIMO'] == '' and row['PROVENIENZA COMUNE'] != '' and row['PROVENIENZA macrotoponimo PROVINCIA'] != ''):\n",
-    "                        topPlaceholder = row['PROVENIENZA COMUNE'].replace(' ','').replace('\\'','').replace(',','').replace('ô','').replace('’','').replace('(','').replace(')','').replace('\\\"','').lower()\n",
-    "                        e53placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + topPlaceholder + \">\"\n",
-    "                        #line = triple(e53placeHolder, labelCoords.prefix, '\\\"' + row['PROVENIENZA COMUNE'] + '\\\"') + closeLine\n",
-    "                        #output.write(line)\n",
-    "                        line = triple(e53placeHolder, hasTypeCoords.prefix, placeCoords.prefix) + closeLine\n",
-    "                        output.write(line)\n",
-    "                        if (row['PROVENIENZA macrotoponimo PROVINCIA'] != ''):\n",
-    "                                toppPlaceholder = row['PROVENIENZA macrotoponimo PROVINCIA'].replace(' ','').replace('\\'','').replace(',','').replace('ô','').replace('’','').replace('(','').replace(')','').replace('\\\"','').lower()\n",
-    "                                e53pplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + toppPlaceholder + \">\"\n",
-    "                                line = triple(e53placeHolder, fallsCoords.prefix, e53pplaceHolder) + closeLine\n",
-    "                                output.write(line)\n",
-    "                                #line = triple(e53pplaceHolder, labelCoords.prefix, '\\\"' + row['PROVENIENZA macrotoponimo PROVINCIA'] + '\\\"') + closeLine\n",
-    "                                #output.write(line)\n",
-    "                                line = triple(e53pplaceHolder, hasTypeCoords.prefix, placeCoords.prefix) + closeLine\n",
-    "                                output.write(line)\n",
-    "                    \n",
-    "                    # PROVENIENZA macrotoponimo provincia\n",
-    "            elif(row['PROVENIENZA MICRO MICROTOPONIMO'] == '' and row['PROVENIENZA MICROTOPONIMO'] == '' and row['PROVENIENZA COMUNE'] == '' and row['PROVENIENZA macrotoponimo PROVINCIA'] != ''):\n",
-    "                        topPlaceholder = row['PROVENIENZA macrotoponimo PROVINCIA'].replace(' ','').replace('\\'','').replace(',','').replace('ô','').replace('’','').replace('(','').replace(')','').replace('\\\"','').lower()\n",
-    "                        e53placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + topPlaceholder + \">\"\n",
-    "                        #line = triple(e53placeHolder, labelCoords.prefix, '\\\"' + row['PROVENIENZA macrotoponimo PROVINCIA'] + '\\\"') + closeLine\n",
-    "                        #output.write(line)\n",
-    "                        line = triple(e53placeHolder, hasTypeCoords.prefix, placeCoords.prefix) + closeLine\n",
-    "                        output.write(line)\n",
-    "         # E57 Material\n",
-    "        if(row['link segno gettatello aspo'] != ''):\n",
-    "            if(row['funzione nell\\'evento'] == 'gettatello'): \n",
-    "                #if (row['funzione nell\\'evento'] == 'gettatello'):\n",
-    "                partecipanteevento = row['nome rilevato'].replace(\"    \",\"\").replace(\"   \",\"\").replace(\"  \",\"\").replace(\" \",\"\").replace(\" \", \"_\").replace(\"'\", \"\")          \n",
-    "                e21placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + partecipanteevento +  \"_\" + row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\") +  \"_\" + row['codice gettatello numero'].replace(\" \", \"\") + '>'     \n",
-    "                #E13 Attribute Assignment\n",
-    "                E13placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/\" + partecipanteevento + \"_\" + row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\") + \"_\" + row['codice gettatello numero'].replace(\" \", \"\") + \"_\" + attributeCoords.code + \">\"\n",
-    "                line = triple(E13placeHolder, hasTypeCoords.prefix, attributeCoords.prefix) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(E13placeHolder, labelCoords.prefix, '\\\"Documentazione gettatello ' + row['nome rilevato'].title() + '\\\"') + closeLine\n",
-    "                output.write(line)\n",
-    "                E22cplaceHolder =  '<' + row['link segno gettatello aspo'] + '>'\n",
-    "                line = triple(E13placeHolder, assignedAttrCoords.prefix, E22cplaceHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                #e22placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/\" + row['segnatura_completa'] + \">\"\n",
-    "                line = triple(E13placeHolder, assignedCoords.prefix, e21placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                #E22 Man Made Object - contrassegno\n",
-    "                line = triple(E22cplaceHolder, hasTypeCoords.prefix, manMadeObjectCoords.prefix) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(E22cplaceHolder, labelCoords.prefix, '\\\"Contrassegno di ' + row['nome rilevato'].title() + '\\\"') + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(E22cplaceHolder, hasTypePCoords.prefix, '\\\"Contrassegno\\\"') + closeLine\n",
-    "                output.write(line)\n",
-    "\n",
-    "                '''e57placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/\" + partecipanteevento + \"_\" + row['CODICE REGISTRO'].replace(\" \",\"\") + \"_\" + row['codice gettatello numero'] + \"/C/\" + materialCoords.code + \">\"\n",
-    "                line = triple(E22cplaceHolder, consistCoords.prefix, e57placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e57placeHolder, labelCoords.prefix, '\\\"' + row['descrizione segno'].replace('\"','')+ '\\\"') + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e57placeHolder, hasTypeCoords.prefix, materialCoords.prefix) + closeLine\n",
-    "                output.write(line)\n",
-    "                e55placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda\" + partecipanteevento + \"_\" + row['CODICE REGISTRO'].replace(\" \",\"\") + \"_\" + row['codice gettatello numero'] + \"/C/\" + materialCoords.code  + \"_\" + typeCoords.code + \">\"  \n",
-    "                line = triple(e57placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e55placeHolder, hasTypeCoords.prefix, typeCoords.prefix) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e55placeHolder, labelCoords.prefix, '\\\"Descrizione contrassegno\\\"') + closeLine\n",
-    "                output.write(line)'''\n",
-    "\n",
-    "            elif (row['funzione nell\\'evento'] != 'gettatello') :\n",
-    "                #if (row['funzione nell\\'evento'] == 'gettatello'):\n",
-    "                partecipanteevento = row['nome gettatello tenuto o con cui si è in relazione ( trovato o consegnato)'].replace(\"    \",\"\").replace(\"   \",\"\").replace(\"  \",\"\").replace(\" \",\"\").replace(\" \", \"_\").replace(\"'\", \"\")          \n",
-    "                e21placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + partecipanteevento +  \"_\" + row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\") +  \"_\" + row['codice gettatello numero'].replace(\" \", \"\") + '>'     \n",
-    "                #E13 Attribute Assignment\n",
-    "                E13placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/\" + partecipanteevento + \"_\" + row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\") + \"_\" + row['codice gettatello numero'].replace(\" \", \"\") + \"_\" + attributeCoords.code + \">\"\n",
-    "                line = triple(E13placeHolder, hasTypeCoords.prefix, attributeCoords.prefix) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(E13placeHolder, labelCoords.prefix, '\\\"Documentazione gettatello ' + row['nome gettatello tenuto o con cui si è in relazione ( trovato o consegnato)'].title() + '\\\"') + closeLine\n",
-    "                output.write(line)\n",
-    "                E22cplaceHolder =  '<' + row['link segno gettatello aspo'] + '>'\n",
-    "                line = triple(E13placeHolder, assignedAttrCoords.prefix, E22cplaceHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                e22placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/\" + row['segnatura_completa'] + \">\"\n",
-    "                line = triple(E13placeHolder, assignedCoords.prefix, e21placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                #E22 Man Made Object - contrassegno\n",
-    "                line = triple(E22cplaceHolder, hasTypeCoords.prefix, manMadeObjectCoords.prefix) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(E22cplaceHolder, labelCoords.prefix, '\\\"Contrassegno di ' + row['nome gettatello tenuto o con cui si è in relazione ( trovato o consegnato)'].title() + '\\\"') + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(E22cplaceHolder, hasTypePCoords.prefix, '\\\"Contrassegno\\\"') + closeLine\n",
-    "                output.write(line)\n",
-    "\n",
-    "                '''e57placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/\" + partecipanteevento + \"_\" + row['CODICE REGISTRO'].replace(\" \",\"\") + \"_\" + row['codice gettatello numero'] + \"/C/\" + materialCoords.code + \">\"\n",
-    "                line = triple(E22cplaceHolder, consistCoords.prefix, e57placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e57placeHolder, labelCoords.prefix, '\\\"' + row['descrizione segno'].replace('\"','')+ '\\\"') + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e57placeHolder, hasTypeCoords.prefix, materialCoords.prefix) + closeLine\n",
-    "                output.write(line)\n",
-    "                e55placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda\" + partecipanteevento + \"_\" + row['CODICE REGISTRO'].replace(\" \",\"\") + \"_\" + row['codice gettatello numero'] + \"/C/\" + materialCoords.code  + \"_\" + typeCoords.code + \">\"  \n",
-    "                line = triple(e57placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e55placeHolder, hasTypeCoords.prefix, typeCoords.prefix) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e55placeHolder, labelCoords.prefix, '\\\"Descrizione contrassegno\\\"') + closeLine\n",
-    "                output.write(line)'''\n",
-    "                            \n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "397704579725e15f5c7cb49fe5f0341eb7531c82d19f2c29d197e8b64ab5776b"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 587
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/gettatelli/CSV_to_RDF_gettatelli_newdata_person_name.ipynb

@@ -1,587 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "# Utilities to handle character encodings\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import re\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/gettatelli/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/gettatelli/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoGT001/scheda/IT-ASPO-GT001->', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GT001->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasNoteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'no:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "wasBornCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P98i_was_born>', 'wbc:')\n",
-    "diedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P100i_died_in>', 'di:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "noteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'nt:')\n",
-    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
-    "consistCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P45_consist_of>', 'cf:')\n",
-    "hasConditionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P44_has_condition>', 'hc:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "hasCurrentOwnerCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P52_has_current_owner>', 'ow:')\n",
-    "nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "stringCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E62_String>', 'st:', 'E62')\n",
-    "birthCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E67_Birth>', 'th:', 'E67')\n",
-    "deathCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E69_Death>', 'dh:', 'E69')\n",
-    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
-    "materialCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E57_Material>', 'mt:', 'E57')\n",
-    "conditionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E3_Condition_State>', 'cs:', 'E3')\n",
-    "entityCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E1_Entity>', 'ey:', 'E1')\n",
-    "refersCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P67_refers_to>', 'rt:')\n",
-    "refersHasTypeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P67.1_type>', 'rh:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "groupCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E74_Group>', 'gp:', 'E74')\n",
-    "eventCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E5_Event>', 'event:', 'E5')\n",
-    "wasPresentCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P12i_was_present_at>', 'wp:')\n",
-    "tookPlaceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P7_took_place_at>', 'tk:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "roleOfCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14.1_in_the_role_of>', 'ro:')\n",
-    "hasDomainCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P01_has_domain>', 'hd:')\n",
-    "hasRangeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P02_has_range>', 'hr:')\n",
-    "pcarriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/PC14_carried_out_by>', 'cy:', 'PC14')\n",
-    "documentsCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P70_documents>', 'doc:')\n",
-    "fallsCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P89_falls_within>', 'fw:')\n",
-    "attributeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E13_Attribute_Assignment>', 'att:', 'E13')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "assignedAttrCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P140_assigned_attribute_to>', 'ast:')\n",
-    "assignedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P141_assigned>', 'ass:')\n",
-    "composedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P46_is_composed_of>', 'cmp:')\n",
-    "foafCoords = RDFcoords('<http://xmlns.com/foaf/0.1/>', 'foaf:')\n",
-    "schemaCoords = RDFcoords('<http://schema.org/>', 'schema:')\n",
-    "cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')\n",
-    "rdfsCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')\n",
-    "patrCoords = RDFcoords('<http://www.w3.org/ns/person#patronymicName>', 'patr:')\n",
-    "matrCoords = RDFcoords('<http://www.w3.org/ns/person#matronymicName>', 'matr:')\n",
-    "residenceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P74_has_current_or_former_residence>', 'res:')\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + stringCoords.prefix + ' ' + stringCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + noteCoords.prefix + ' ' + noteCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBornCoords.prefix + ' ' + wasBornCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + diedCoords.prefix + ' ' + diedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + birthCoords.prefix + ' ' + birthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + deathCoords.prefix + ' ' + deathCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + materialCoords.prefix + ' ' + materialCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + consistCoords.prefix + ' ' + consistCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + conditionCoords.prefix + ' ' + conditionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasConditionCoords.prefix + ' ' + hasConditionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + refersCoords.prefix + ' ' + refersCoords.uri + closeLine)  \n",
-    "    output.write('@prefix ' + entityCoords.prefix + ' ' + entityCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + refersHasTypeCoords.prefix + ' ' + refersHasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasNoteCoords.prefix + ' ' + hasNoteCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + creationCoords.prefix + ' ' + creationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentOwnerCoords.prefix + ' ' + hasCurrentOwnerCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + groupCoords.prefix + ' ' + groupCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + eventCoords.prefix + ' ' + eventCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasPresentCoords.prefix + ' ' + wasPresentCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + roleOfCoords.prefix + ' ' + roleOfCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasDomainCoords.prefix + ' ' + hasDomainCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + pcarriedByCoords.prefix + ' ' + pcarriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasRangeCoords.prefix + ' ' + hasRangeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + documentsCoords.prefix + ' ' + documentsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + tookPlaceCoords.prefix + ' ' + tookPlaceCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + fallsCoords.prefix + ' ' + fallsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + attributeCoords.prefix + ' ' + attributeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + assignedAttrCoords.prefix + ' ' + assignedAttrCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + assignedCoords.prefix + ' ' + assignedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + composedCoords.prefix + ' ' + composedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + foafCoords.prefix + ' ' + foafCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + matrCoords.prefix + ' ' + matrCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + patrCoords.prefix + ' ' + patrCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + rdfsCoords.prefix + ' ' + rdfsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + residenceCoords.prefix + ' ' + residenceCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "filePrefix = 'gettatelli_'\n",
-    "fileType = 'newdataset'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_person_name.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue \n",
-    "        \n",
-    "        # PERSONA\n",
-    "        if (row['nome rilevato'] != ''):\n",
-    "                    partecipanteevento = row['nome rilevato'].replace(\"    \",\"\").replace(\"   \",\"\").replace(\"  \",\"\").replace(\" \",\"\").replace(\" \", \"_\").replace(\"'\", \"\")          \n",
-    "                    e21placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + partecipanteevento +  \"_\" + row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\") +  \"_\" + row['codice gettatello numero'].replace(\" \", \"\") + '>'\n",
-    "                    line = triple(e21placeHolder, hasTypeCoords.prefix, personCoords.prefix) + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e21placeHolder, foafCoords.prefix + 'name', '\\\"' + row['nome rilevato'].replace(\"  \",\"\").lstrip().rstrip().title() + '\\\"') + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e21placeHolder, labelCoords.prefix, '\\\"' + row['nome rilevato'].replace(\"  \",\"\").lstrip().rstrip().title() + '\\\"') +  closeLine\n",
-    "                    output.write(line)\n",
-    "                    e62placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + partecipanteevento + \"_\" + row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\") + \"_\" + row['codice gettatello numero'].replace(\" \", \"\")  +'/E62>'\n",
-    "                    line = triple(e21placeHolder, noteCoords.prefix, e62placeHolder) +  closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e62placeHolder, hasTypeCoords.prefix, stringCoords.prefix) +  closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e62placeHolder, hasTypePCoords.prefix, '\\\"Fonte\\\"') +  closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e62placeHolder, labelCoords.prefix, '\\\"Fonte: Archivio di Stato di Prato - Fondo Ospedale della Misericordia e Dolce\\\"') +  closeLine\n",
-    "                    output.write(line)\n",
-    "\n",
-    "                    if row['funzione nell\\'evento'] != '' and row['nome gettatello tenuto o con cui si è in relazione ( trovato o consegnato)'] != '':\n",
-    "                        e62placeHolder1 = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + partecipanteevento + \"_\" + row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\") + \"_\" + row['codice gettatello numero'].replace(\" \", \"\")  +'/E62_G>'\n",
-    "                        line = triple(e21placeHolder, noteCoords.prefix, e62placeHolder1) +  closeLine\n",
-    "                        output.write(line)\n",
-    "                        line = triple(e62placeHolder1, hasTypeCoords.prefix, stringCoords.prefix) +  closeLine\n",
-    "                        output.write(line)\n",
-    "                        line = triple(e62placeHolder1, hasTypePCoords.prefix, '\\\"Nota collegamento con gettatello\\\"') +  closeLine\n",
-    "                        output.write(line)\n",
-    "                        professione = str(row['funzione nell\\'evento'].title())\n",
-    "                        collegamento = str(row['nome gettatello tenuto o con cui si è in relazione ( trovato o consegnato)'].title())\n",
-    "                        line = triple(e62placeHolder1, labelCoords.prefix, '\\\"' + professione + ' di ' + collegamento + '\\\"') +  closeLine\n",
-    "                        output.write(line)\n",
-    "                \n",
-    "                    if row['titolo1'] != '' and row['titolo1'] != ' ':\n",
-    "                        txt = row['titolo1']\n",
-    "                        x = re.sub(\"\\n\", \" \", txt)\n",
-    "                        y = re.sub(\"\\s\\s\", \" \", x)\n",
-    "                        y = y.lstrip().rstrip()\n",
-    "                        line = triple(e21placeHolder, schemaCoords.prefix + 'honorificPrefix', '\\\"' + y + '\\\"') + closeLine\n",
-    "                        output.write(line)\n",
-    "                    \n",
-    "                    if row['titolo2'] != '' and row['titolo2'] != ' ':\n",
-    "                        txt = row['titolo2']\n",
-    "                        x = re.sub(\"\\n\", \" \", txt)\n",
-    "                        y = re.sub(\"\\s\\s\", \" \", x)\n",
-    "                        y = y.lstrip().rstrip()\n",
-    "                        line = triple(e21placeHolder, schemaCoords.prefix + 'honorificPrefix', '\\\"' + y + '\\\"') + closeLine\n",
-    "                        output.write(line)\n",
-    "            \n",
-    "                    if row['soprannome'] != '' and row['soprannome'] != ' ':\n",
-    "                        #Remove all white-space characters:\n",
-    "                        txt = row['soprannome']\n",
-    "                        x = re.sub(\"\\n\", \" \", txt)\n",
-    "                        y = re.sub(\"\\s\\s\", \"\", x)\n",
-    "                        line = triple(e21placeHolder,\n",
-    "                                    schemaCoords.prefix + 'alternateName',\n",
-    "                                    '\\\"' + y.title() + '\\\"') + closeLine\n",
-    "                        output.write(line)\n",
-    "\n",
-    "                    if row['m/f'] != '':\n",
-    "                        #Remove all white-space characters:\n",
-    "                        txt = row['m/f']\n",
-    "                        x = re.sub(\"\\n\", \" \", txt)\n",
-    "                        y = re.sub(\"\\s\\s\", \"\", x)\n",
-    "                        line = triple(e21placeHolder,\n",
-    "                                    foafCoords.prefix + 'gender',\n",
-    "                                    '\\\"' + y + '\\\"') + closeLine\n",
-    "                        output.write(line)\n",
-    "\n",
-    "                    if row['nome proprio'] != '' and row['nome proprio'] != ' ' :\n",
-    "                        #Remove all white-space characters:\n",
-    "                        txt = row['nome proprio'] + \" \" +  row['2° nome proprio'] + \" \" + row['3° nome proprio']+ \" \" + row['4° nome proprio']\n",
-    "                        #x = re.sub(\" \\n\", \"\", txt)\n",
-    "                        #y = re.sub(\"\\s\\s\", \"\", x)\n",
-    "                        #name = re.sub(\"\\n\", \"\", y)\n",
-    "                        #name = name.lstrip().rstrip()\n",
-    "                        line = triple(e21placeHolder, foafCoords.prefix + 'givenName', '\\\"' + txt.title().lstrip().rstrip() + '\\\"') + closeLine\n",
-    "                        output.write(line)\n",
-    "\n",
-    "                    if row['cognome'] != '' and row['cognome'] != ' ':\n",
-    "                        #Remove all white-space characters:\n",
-    "                        txt = row['cognome']\n",
-    "                        x = re.sub(\"\\n\", \" \", txt)\n",
-    "                        y = re.sub(\"\\s\\s\", \"\", x)\n",
-    "                        line = triple(e21placeHolder,\n",
-    "                                        foafCoords.prefix + 'familyName',\n",
-    "                                        '\\\"' + y.lstrip().rstrip().title() + '\\\"') + closeLine\n",
-    "                        output.write(line)\n",
-    "                    \n",
-    "                    if row['patronimico'] != '' and row['patronimico'] != ' ':\n",
-    "                        #Remove all white-space characters:\n",
-    "                        txt = row['patronimico']\n",
-    "                        x = re.sub(\"\\n\", \" \", txt)\n",
-    "                        y = re.sub(\"\\s\\s\", \"\", x)\n",
-    "                        line = triple(e21placeHolder,\n",
-    "                                        patrCoords.prefix,\n",
-    "                                        '\\\"' + y.title() + '\\\"') + closeLine\n",
-    "                        output.write(line)\n",
-    "                    \n",
-    "                    if row['matronimico 1° nome'] != '' or row['matronimico 2° nome'] != '' or row['patronimico della madre']  != '':\n",
-    "                        txt = row['matronimico 1° nome'] + \" \" + row['matronimico 2° nome'] + \" \" + row['patronimico della madre']\n",
-    "                        #x = re.sub(\"\\n\", \" \", txt)\n",
-    "                        #y = re.sub(\"\\s\\s\", \"\", x)\n",
-    "                        name = '\\\"' + txt + '\\\"'\n",
-    "                        namecomplete = name.replace(\"    \",\"   \").replace(\"   \",\"  \").replace(\"  \",\" \").replace(\"\\\" \",\"\\\"\").replace(\" \\\"\",\"\\\"\").title()\n",
-    "                        if namecomplete != \"\\\"\\\"\":\n",
-    "                            line = triple(e21placeHolder, matrCoords.prefix, namecomplete) + closeLine\n",
-    "                            output.write(line)\n",
-    "                    \n",
-    "                    if (row['1° nome proprio marito'] != '' and row['1° nome proprio marito'] != ' '):\n",
-    "                        if (row['cognome del marito'] != ''):\n",
-    "                            idmarito = (row['1° nome proprio marito'] + row['2° nome proprio marito'] + row['patronimico del marito'] + row['cognome del marito']).replace(' ','').replace('\\'','')+ row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\")\n",
-    "                        else:\n",
-    "                            idmarito = (row['1° nome proprio marito'] + row['2° nome proprio marito'] + row['patronimico del marito'] + row['cognome']).replace(' ','').replace('\\'','')+ row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\")\n",
-    "\n",
-    "                        relazionenoid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + idmarito + \">\"\n",
-    "                        E13placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + idmarito + '_MARITO_' + row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\") + \"_\" + row['codice gettatello numero'].replace(\" \", \"\")  + \">\"\n",
-    "                        line = triple(E13placeHolder,\n",
-    "                        nsCoords.prefix + 'type', \n",
-    "                        cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine\n",
-    "                        output.write(line)\n",
-    "                        line = triple(E13placeHolder, cidocCoords.prefix + 'P141_assigned', e21placeHolder) + closeLine\n",
-    "                        output.write(line)\n",
-    "                        if (row['cognome del marito'] != ''):\n",
-    "                            line = triple(E13placeHolder,\n",
-    "                                                rdfsCoords.prefix + 'label',\n",
-    "                                                '\\\"' + (row['1° nome proprio marito'] + ' ' + row['2° nome proprio marito']+ ' ' + row['patronimico del marito'] + row['cognome del marito']).title() + ' coniuge di ' + (row['nome rilevato']).title() + '\\\"') + closeLine\n",
-    "                            output.write(line)\n",
-    "                            line = triple(relazionenoid,\n",
-    "                            foafCoords.prefix + 'name',\n",
-    "                            '\\\"' + row['1° nome proprio marito'].title() + ' ' + row['2° nome proprio marito'].title() + ' ' + row['patronimico del marito'].title() + row['cognome del marito'].title() + '\\\"') + closeLine\n",
-    "                            output.write(line)\n",
-    "                        else:\n",
-    "                            line = triple(E13placeHolder,\n",
-    "                                                rdfsCoords.prefix + 'label',\n",
-    "                                                '\\\"' + (row['1° nome proprio marito'] + ' ' + row['2° nome proprio marito']+ ' ' + row['patronimico del marito'] + row['cognome']).title() + ' coniuge di ' + (row['nome rilevato']).title() + '\\\"') + closeLine\n",
-    "                            output.write(line)\n",
-    "                            line = triple(relazionenoid,\n",
-    "                            foafCoords.prefix + 'name',\n",
-    "                            '\\\"' + row['1° nome proprio marito'].title() + ' ' + row['2° nome proprio marito'].title() + ' ' + row['patronimico del marito'].title() + row['cognome'].title() + '\\\"') + closeLine\n",
-    "                            output.write(line)\n",
-    "\n",
-    "                        \n",
-    "\n",
-    "                        line = triple(relazionenoid, cidocCoords.prefix + 'P141_assigned', E13placeHolder ) + closeLine\n",
-    "                        output.write(line)\n",
-    "                        \n",
-    "                        if (row['cognome del marito'] != ''):\n",
-    "                            labelmarito = row['1° nome proprio marito'].title() + ' ' + row['2° nome proprio marito'].title() + ' ' + row['patronimico del marito'].title()  + row['cognome del marito'].title()\n",
-    "                        else:\n",
-    "                            labelmarito = row['1° nome proprio marito'].title() + ' ' + row['2° nome proprio marito'].title() + ' ' + row['patronimico del marito'].title()  + row['cognome'].title()\n",
-    "                        \n",
-    "                        line = triple(relazionenoid,\n",
-    "                                            rdfsCoords.prefix + 'label',\n",
-    "                                            '\\\"' + labelmarito + '\\\"') + closeLine\n",
-    "                        output.write(line)\n",
-    "                        \n",
-    "                        line = triple(relazionenoid, \n",
-    "                                nsCoords.prefix + 'type', \n",
-    "                                personCoords.prefix) + closeLine\n",
-    "                        output.write(line)\n",
-    "\n",
-    "                        line = triple(relazionenoid, \n",
-    "                                        nsCoords.prefix + 'type', \n",
-    "                                        personCoords.prefix + 'Person') + closeLine\n",
-    "                        output.write(line)\n",
-    "\n",
-    "                        line = triple(relazionenoid,\n",
-    "                                        nsCoords.prefix + 'type',\n",
-    "                                        foafCoords.prefix + 'person') + closeLine\n",
-    "                        output.write(line)\n",
-    "                        E55placeHolder = '<http://www.archiviodistato.prato.it/moglie>'\n",
-    "                        line = triple(E13placeHolder, cidocCoords.prefix + 'P42_assigned', E55placeHolder) + closeLine\n",
-    "                        output.write(line)\n",
-    "                        line = triple(E55placeHolder,\n",
-    "                                                    rdfsCoords.prefix + 'label',\n",
-    "                                                    '\\\"Moglie\\\"') + closeLine\n",
-    "                        output.write(line)\n",
-    "\n",
-    "                        if row['1° nome proprio marito'] != '':\n",
-    "                            #Remove all white-space characters:\n",
-    "                            txt = row['1° nome proprio marito']\n",
-    "                            if row['2° nome proprio marito'] != '':\n",
-    "                                txt = row['1° nome proprio marito'] + ' ' + row['2° nome proprio marito']\n",
-    "                            x = re.sub(\" \\n\", \"\", txt)\n",
-    "                            y = re.sub(\"\\s\\s\", \"\", x)\n",
-    "                            name = re.sub(\"\\n\", \"\", y)\n",
-    "                            line = triple(relazionenoid,\n",
-    "                                        foafCoords.prefix + 'givenName',\n",
-    "                                        '\\\"' + name.title() + '\\\"') + closeLine\n",
-    "                            output.write(line)\n",
-    "\n",
-    "                        if row['cognome del marito'] != '':\n",
-    "                            #Remove all white-space characters:\n",
-    "                            txt = row['cognome del marito']\n",
-    "                            x = re.sub(\"\\n\", \" \", txt)\n",
-    "                            y = re.sub(\"\\s\\s\", \"\", x)\n",
-    "                            line = triple(relazionenoid,\n",
-    "                                        foafCoords.prefix + 'familyName',\n",
-    "                                        '\\\"' + y.title() + '\\\"') + closeLine\n",
-    "                            output.write(line)\n",
-    "\n",
-    "                        if row['patronimico del marito'] != '':\n",
-    "                            #Remove all white-space characters:\n",
-    "                            txt = row['patronimico del marito']\n",
-    "                            x = re.sub(\"\\n\", \" \", txt)\n",
-    "                            y = re.sub(\"\\s\\s\", \"\", x)\n",
-    "                            line = triple(relazionenoid,\n",
-    "                                        personCoords.prefix + 'patronymicName',\n",
-    "                                        '\\\"' + y.title() + '\\\"') + closeLine\n",
-    "                            output.write(line)\n",
-    "\n",
-    "\n",
-    "                \n",
-    "                    '''if (row['avo 1'] != '' and row['avo 1'] != ' '):\n",
-    "                        idavo1 = (row['avo 1'] + row['matronimico avo 1']).replace(' ','').replace('\\'','')\n",
-    "                        E13placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + idavo1 + '_AVO1_' + row['CODICE REGISTRO'].replace(\" \",\"\") + \"_\" + row['codice gettatello numero'].replace(\" \", \"\")  + \">\"\n",
-    "                        line = triple(E13placeHolder, \n",
-    "                        nsCoords.prefix + 'type', \n",
-    "                        cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine\n",
-    "                        output.write(line)\n",
-    "                        line = triple(E13placeHolder, cidocCoords.prefix + 'P141_assigned', e21placeHolder) + closeLine\n",
-    "                        output.write(line)\n",
-    "                        line = triple(E13placeHolder,\n",
-    "                                            rdfsCoords.prefix + 'label',\n",
-    "                                            '\\\"Relazione: ' + row['avo 1'] + ' ' + row['matronimico avo 1'] + ' avo di secondo grado di ' + row['nome rilevato'] + '\\\"') + closeLine\n",
-    "                        output.write(line)\n",
-    "                        relazionenoid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + idavo1 + \">\"\n",
-    "                        line = triple(relazionenoid, cidocCoords.prefix + 'P141_assigned', E13placeHolder ) + closeLine\n",
-    "                        output.write(line)\n",
-    "                        line = triple(relazionenoid,\n",
-    "                                            rdfsCoords.prefix + 'label',\n",
-    "                                            '\\\"' + row['avo 1'] + ' ' + row['matronimico avo 1'] + '\\\"') + closeLine\n",
-    "                        output.write(line)\n",
-    "                        line = triple(relazionenoid, \n",
-    "                                nsCoords.prefix + 'type', \n",
-    "                                cidocCoords.prefix + 'E21_Person') + closeLine\n",
-    "                        output.write(line)\n",
-    "\n",
-    "                        line = triple(relazionenoid, \n",
-    "                                        nsCoords.prefix + 'type', \n",
-    "                                        personCoords.prefix + 'Person') + closeLine\n",
-    "                        output.write(line)\n",
-    "\n",
-    "                        line = triple(relazionenoid,\n",
-    "                                        nsCoords.prefix + 'type',\n",
-    "                                        foafCoords.prefix + 'person') + closeLine\n",
-    "                        output.write(line)\n",
-    "                        E55placeHolder = '<http://www.archiviodistato.prato.it/avo_secondo_grado>'\n",
-    "                        line = triple(E13placeHolder, cidocCoords.prefix + 'P42_assigned', E55placeHolder) + closeLine\n",
-    "                        output.write(line)\n",
-    "                        line = triple(E55placeHolder,\n",
-    "                                                    rdfsCoords.prefix + 'label',\n",
-    "                                                    '\\\"Avo di secondo grado\\\"') + closeLine\n",
-    "                        output.write(line)      \n",
-    "\n",
-    "                    if (row['avo 2'] != '' and row['avo 1'] != ' '):\n",
-    "                        idavo2 = (row['avo 2'] + row['matronimico avo 2']).replace(' ','').replace('\\'','')\n",
-    "                        E13placeHolder = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + idavo2 + '_AVO2_' + row['CODICE REGISTRO'].replace(\" \",\"\") + \"_\" + row['codice gettatello numero'].replace(\" \", \"\")  +\">\"\n",
-    "                        line = triple(E13placeHolder, \n",
-    "                        nsCoords.prefix + 'type', \n",
-    "                        cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine\n",
-    "                        output.write(line)\n",
-    "                        line = triple(E13placeHolder, cidocCoords.prefix + 'P141_assigned', e21placeHolder) + closeLine\n",
-    "                        output.write(line)\n",
-    "                        line = triple(E13placeHolder,\n",
-    "                                            rdfsCoords.prefix + 'label',\n",
-    "                                            '\\\"Relazione: ' + row['avo 2'] + ' ' + row['matronimico avo 2'] + ' avo di terzo grado di ' + row['nome rilevato'] + '\\\"') + closeLine\n",
-    "                        output.write(line)\n",
-    "                        relazionenoid = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + idavo2 + \">\"\n",
-    "                        line = triple(relazionenoid, cidocCoords.prefix + 'P141_assigned', E13placeHolder ) + closeLine\n",
-    "                        output.write(line)\n",
-    "                        line = triple(relazionenoid,\n",
-    "                                            rdfsCoords.prefix + 'label',\n",
-    "                                            '\\\"' + row['avo 2'] + ' ' + row['matronimico avo 2'] + '\\\"') + closeLine\n",
-    "                        output.write(line)\n",
-    "                        line = triple(relazionenoid, \n",
-    "                                nsCoords.prefix + 'type', \n",
-    "                                cidocCoords.prefix + 'E21_Person') + closeLine\n",
-    "                        output.write(line)\n",
-    "\n",
-    "                        line = triple(relazionenoid, \n",
-    "                                        nsCoords.prefix + 'type', \n",
-    "                                        personCoords.prefix + 'Person') + closeLine\n",
-    "                        output.write(line)\n",
-    "\n",
-    "                        line = triple(relazionenoid,\n",
-    "                                        nsCoords.prefix + 'type',\n",
-    "                                        foafCoords.prefix + 'person') + closeLine\n",
-    "                        output.write(line)\n",
-    "                        E55placeHolder = '<http://www.archiviodistato.prato.it/avo_terzo_grado>'\n",
-    "                        line = triple(E13placeHolder, cidocCoords.prefix + 'P42_assigned', E55placeHolder) + closeLine\n",
-    "                        output.write(line)\n",
-    "                        line = triple(E55placeHolder,\n",
-    "                                                    rdfsCoords.prefix + 'label',\n",
-    "                                                    '\\\"Avo di terzo grado\\\"') + closeLine\n",
-    "                        output.write(line)'''                     \n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "397704579725e15f5c7cb49fe5f0341eb7531c82d19f2c29d197e8b64ab5776b"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 205
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/gettatelli/CSV_to_RDF_gettatelli_type.ipynb

@@ -1,205 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Parser per estrarre tutte le tipologie di documenti ed associarle solo una volta ad ogni record"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/ASPO/gettatelli/'\n",
-    "export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/ASPO/gettatelli/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item_type'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "\n",
-    "        line = triple(\"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['tipologia'].replace(\" \", \"\") + \">\", hasTypeCoords.prefix, typeCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(\"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['tipologia'].replace(\" \", \"\") + \">\", labelCoords.prefix,  '\\\"' + row['tipologia']+ '\\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        \n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 240
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/marcovaldi/CSV_to_RDF_marcovaldi_date.ipynb

@@ -1,240 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "# Utilities to handle character encodings\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json\n",
-    "import re"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/ASPO/marcovaldi/'\n",
-    "export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/ASPO/marcovaldi/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-MV001->', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "movedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P25i_moved_by>', 'mb:')\n",
-    "subClassOfCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#subClassOf>', 'so:')\n",
-    "tookPlaceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P7_took_place_at>', 'tk:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "roleOfCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14.1_in_the_role_of>', 'ro:')\n",
-    "hasDomainCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P01_has_domain>', 'hd:')\n",
-    "hasRangeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P02_has_range>', 'hr:')\n",
-    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
-    "movedToCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P26_moved_to>', 'mt:')\n",
-    "movedFromCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P27_moved_from>', 'mf:')\n",
-    "\n",
-    "#CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "pcarriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/PC14_carried_out_by>', 'cy:', 'PC14')\n",
-    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "\n",
-    "\n",
-    "# New classes (subclasses of E7 Activity) - Exchange, Sending, Recive Letters\n",
-    "exchangeLettersCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL1_Exchange_Letters>', 'el:', 'EL1')\n",
-    "sendLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL2_Send_Letter>', 'sl:', 'EL2')\n",
-    "receiveLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL3_Receive_Letter>', 'rl:', 'EL3')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + exchangeLettersCoords.prefix + ' ' + exchangeLettersCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + sendLetterCoords.prefix + ' ' + sendLetterCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + receiveLetterCoords.prefix + ' ' + receiveLetterCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + subClassOfCoords.prefix + ' ' + subClassOfCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + movedByCoords.prefix + ' ' + movedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + roleOfCoords.prefix + ' ' + roleOfCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasDomainCoords.prefix + ' ' + hasDomainCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + pcarriedByCoords.prefix + ' ' + pcarriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasRangeCoords.prefix + ' ' + hasRangeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + movedToCoords.prefix + ' ' + movedToCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + movedFromCoords.prefix + ' ' + movedFromCoords.uri + closeLine)\n",
-    "    \n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_date.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        el1placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + exchangeLettersCoords.code + \">\"\n",
-    "        el2placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \">\"\n",
-    "        el3placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \">\"\n",
-    "        \n",
-    "        # Data invio\n",
-    "        if(row['data_inizio'] != ''):\n",
-    "            e52PplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \"_\" + timeSpanCoords.code + \">\"\n",
-    "            line = triple(el2placeHolder, e52PplaceHolder, '\\\"'+ row['data_inizio'] + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "       \n",
-    "        if(row['data_fine'] != ''):\n",
-    "            e52AplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \"_\" + timeSpanCoords.code + \">\"\n",
-    "            line = triple(el3placeHolder, e52AplaceHolder, '\\\"' + row['data_fine'] + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "             \n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.7.3 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.3"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 245
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/marcovaldi/CSV_to_RDF_marcovaldi_fonds.ipynb

@@ -1,245 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "# Utilities to handle character encodings\n",
-    "import unicodedata\n",
-    "# Ordered Dicts \n",
-    "from collections import OrderedDict\n",
-    "\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/marcovaldi/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/marcovaldi/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-MV001->', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "hasCurrentOwnerCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P52_has_current_owner>', 'ow:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "groupCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E74_Group>', 'gp:', 'E74')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentOwnerCoords.prefix + ' ' + hasCurrentOwnerCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + groupCoords.prefix + ' ' + groupCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'fonds'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    E74placeHolder = '<http://www.archiviodistato.prato.it>'\n",
-    "    line = triple(E74placeHolder, hasTypeCoords.prefix, groupCoords.prefix ) + closeLine\n",
-    "    output.write(line)        \n",
-    "    line = triple(E74placeHolder, labelCoords.prefix, \"\\\"Archivio di Stato di Prato\\\"\") + closeLine\n",
-    "    output.write(line)\n",
-    "    currentLocation = \"\\\"Fondo Ospedale della Misericordia e Dolce, Archivio di Stato di Prato, Prato (PO)\\\"\"\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        # E22 Man Made Object\n",
-    "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-MV001-', ''), hasTypeCoords.prefix, manMadeObjectCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-MV001-', ''), labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        # E73 Information Object\n",
-    "        e73placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
-    "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-MV001-', ''), carriesCoords.prefix, e73placeHolder) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e73placeHolder, hasTypeCoords.prefix, informationObjectCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e73placeHolder, labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        # E35 Title        \n",
-    "        if(row['titolo_aspo'] != 'None'):\n",
-    "            e35placeHolder1 = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + titleCoords.code + \">\"\n",
-    "            line = triple(e73placeHolder, identifiedByCoords.prefix, e35placeHolder1) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e35placeHolder1, hasTypeCoords.prefix, titleCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e35placeHolder1, labelCoords.prefix, '\\\"' + row['titolo_aspo'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "        # E42 Identifier\n",
-    "        e42placeHolderID = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + identifierCoords.code + \"ID>\"\n",
-    "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-MV001-', ''), identifiedByCoords.prefix, e42placeHolderID) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e42placeHolderID, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e42placeHolderID, labelCoords.prefix, '\\\"' + row['id'].replace(\" \", \"\") + '\\\"') + closeLine            \n",
-    "        output.write(line)\n",
-    "        # E22 - P52 - E74\n",
-    "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-MV001-', ''), hasCurrentOwnerCoords.prefix, E74placeHolder) + closeLine\n",
-    "        output.write(line)\n",
-    "        # E22 - P54 - E53\n",
-    "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-MV001-', ''), hasCurrentPermanentLocationCoords.prefix, currentLocation) + closeLine\n",
-    "        output.write(line)\n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.7.3 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 285
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/marcovaldi/CSV_to_RDF_marcovaldi_item.ipynb

@@ -1,285 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "# Utilities to handle character encodings\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/marcovaldi/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/marcovaldi/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-MV001->', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "hasCurrentPermanentLocationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P54_has_current_permanent_location>', 'ap:')\n",
-    "hasCurrentOwnerCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P52_has_current_owner>', 'ow:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "groupCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E74_Group>', 'gp:', 'E74')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasCurrentOwnerCoords.prefix + ' ' + hasCurrentOwnerCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + groupCoords.prefix + ' ' + groupCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    E74placeHolder = '<http://www.archiviodistato.prato.it>'\n",
-    "    line = triple(E74placeHolder, hasTypeCoords.prefix, groupCoords.prefix ) + closeLine\n",
-    "    output.write(line)        \n",
-    "    line = triple(E74placeHolder, labelCoords.prefix, \"\\\"Archivio di Stato di Prato\\\"\") + closeLine\n",
-    "    output.write(line)\n",
-    "    currentLocation = \"\\\"Fondo Ospedale della Misericordia e Dolce, Archivio di Stato di Prato, Prato (PO)\\\"\"\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        # E22 Man Made Object\n",
-    "        tt = ''\n",
-    "        if(row['titolo_aspo'] != 'None' and row['titolo_aspo'] != ''):\n",
-    "            tt = row['titolo_aspo']\n",
-    "            line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-MV001-', ''), hasTypeCoords.prefix, manMadeObjectCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-MV001-', ''), labelCoords.prefix, '\\\"' + tt.replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "        # E73 Information Object\n",
-    "            e73placeHolder =  \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
-    "            line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-MV001-', ''), carriesCoords.prefix, e73placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e73placeHolder, hasTypeCoords.prefix, informationObjectCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e73placeHolder, labelCoords.prefix, '\\\"' + tt.replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "        # E55 Type\n",
-    "        if(row['tipologia'] != ''):\n",
-    "            tipologie = []\n",
-    "            pipe = \"|\" \n",
-    "            if pipe in row['tipologia']:\n",
-    "                tipologie = row['tipologia'].split('|')\n",
-    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + tipologie.replace(\" \", \"\") + \">\"\n",
-    "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "            else:\n",
-    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['tipologia'].replace(\" \", \"\") + \">\"\n",
-    "                line = triple(e73placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "        elif(row['genere'] != ''):\n",
-    "            tipologie = []\n",
-    "            pipe = \"|\" \n",
-    "            if pipe in row['genere']:\n",
-    "                tipologie = row['genere'].split('|')\n",
-    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + tipologie.replace(\" \", \"\") + \">\"\n",
-    "                line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-MV001-', ''), hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "            else:\n",
-    "                e55placeHolder = \"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['genere'].replace(\" \", \"\") + \">\"\n",
-    "                line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-MV001-', ''), hasTypePCoords.prefix, e55placeHolder) + closeLine\n",
-    "                output.write(line)       \n",
-    "        # E35 Title        \n",
-    "        if(row['titolo_aspo'] != 'None' and row['titolo_aspo'] != ''):\n",
-    "            tut = row['titolo_aspo']\n",
-    "            e35placeHolder1 = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + titleCoords.code + \">\"\n",
-    "            line = triple(e73placeHolder, identifiedByCoords.prefix, e35placeHolder1) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e35placeHolder1, hasTypeCoords.prefix, titleCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e35placeHolder1, labelCoords.prefix, '\\\"' + tut.replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "        # E42 Identifier\n",
-    "        segnatura = ''\n",
-    "        if(row['segnatura_busta'] != ''):\n",
-    "            segnatura = 'busta '+row['segnatura_busta']\n",
-    "        if(row['segnatura_codice'] != ''):\n",
-    "            segnatura = segnatura + ', codice ' + row['segnatura_codice']\n",
-    "        if(segnatura != ''):\n",
-    "            e42placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + identifierCoords.code + \">\"\n",
-    "            line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-MV001-', ''), identifiedByCoords.prefix, e42placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e42placeHolder, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e42placeHolder, labelCoords.prefix, '\\\"Fondo Ospedale della Misericordia e Dolce, ' + segnatura + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e42placeHolder, hasTypePCoords.prefix, \"\\\"Segnatura\\\"\") + closeLine\n",
-    "            output.write(line)\n",
-    "        # E42 Identifier\n",
-    "        e42placeHolderID = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + identifierCoords.code + \"ID>\"\n",
-    "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-MV001-', ''), identifiedByCoords.prefix, e42placeHolderID) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e42placeHolderID, hasTypeCoords.prefix, identifierCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(e42placeHolderID, labelCoords.prefix, '\\\"' + row['id'].replace(\" \", \"\") + '\\\"') + closeLine            \n",
-    "        output.write(line)\n",
-    "        # E22 - P52 - E74\n",
-    "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-MV001-', ''), hasCurrentOwnerCoords.prefix, E74placeHolder) + closeLine\n",
-    "        output.write(line)\n",
-    "        # E22 - P54 - E53\n",
-    "        line = triple(datiniCoords.prefix + row['id'].replace('IT-ASPO-MV001-', ''), hasCurrentPermanentLocationCoords.prefix, currentLocation) + closeLine\n",
-    "        output.write(line)\n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 300
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/marcovaldi/CSV_to_RDF_marcovaldi_item_date_normalization.ipynb

@@ -1,300 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json\n",
-    "import re"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/DATE/CORRETTE/MARCOVALDI/CSV/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/DATE/CORRETTE/MARCOVALDI/RDF/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3 Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "# CIDOC Predicates\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
-    "tookPlaceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P7_took_place_at>', 'tk:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "hasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108_has_produced>', 'hp:')\n",
-    "wasProducedCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P108i_was_produced_by>', 'wp:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "onGoingTCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P81_ongoing_throughout>', 'gt:')\n",
-    "nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')\n",
-    "schemaCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')\n",
-    "yearCoords = RDFcoords('<https://www.w3.org/TR/owl-time#year>', 'year:')\n",
-    "monthCoords = RDFcoords('<https://www.w3.org/TR/owl-time#month>', 'month:')\n",
-    "dayCoords = RDFcoords('<https://www.w3.org/TR/owl-time#day>', 'day:')\n",
-    "# CIDOC Objects\n",
-    "productionCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E12_Production>', 'pr:', 'E12')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "sendLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL2_Send_Letter>', 'sl:', 'EL2')\n",
-    "receiveLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL3_Receive_Letter>', 'rl:', 'EL3')\n",
-    "xsdCoords = RDFcoords('<http://www.w3.org/2001/XMLSchema#>', 'xsd:')\n",
-    "cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')\n",
-    "\n",
-    "beginningCoords = RDFcoords('<https://www.w3.org/TR/owl-time#hasBeginning>', 'beg:')\n",
-    "endCoords = RDFcoords('<https://www.w3.org/TR/owl-time#hasEnd>', 'end:')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "- year: https://www.w3.org/TR/owl-time#year\n",
-    "- month: https://www.w3.org/TR/owl-time#month\n",
-    "- day: https://www.w3.org/TR/owl-time#day"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + tookPlaceCoords.prefix + ' ' + tookPlaceCoords.uri + closeLine)        \n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)   \n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasProducedCoords.prefix + ' ' + hasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasProducedCoords.prefix + ' ' + wasProducedCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + productionCoords.prefix + ' ' + productionCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + creationCoords.prefix + ' ' + creationCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + onGoingTCoords.prefix + ' ' + onGoingTCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + sendLetterCoords.prefix + ' ' + sendLetterCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + receiveLetterCoords.prefix + ' ' + receiveLetterCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + yearCoords.prefix + ' ' + yearCoords.uri + closeLine) \n",
-    "    output.write('@prefix ' + monthCoords.prefix + ' ' + monthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + dayCoords.prefix + ' ' + dayCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + xsdCoords.prefix + ' ' + xsdCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + beginningCoords.prefix + ' ' + beginningCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + endCoords.prefix + ' ' + endCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item_marcovaldi_date'\n",
-    "max_entries = 1000000000\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_range.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        # E52 Time Span\n",
-    "        if(row['data_periodo_normalizzata_inizio'] != ''):\n",
-    "                e52PplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \"_\" + timeSpanCoords.code + \">\"\n",
-    "                if(row['data_periodo_normalizzata_inizio'] != 'Senza data'):\n",
-    "                    if(len(row['data_periodo_normalizzata_inizio']) == 8):           \n",
-    "                        year = row['data_periodo_normalizzata_inizio'][0:4]\n",
-    "                        month = row['data_periodo_normalizzata_inizio'][4:6]\n",
-    "                        day = row['data_periodo_normalizzata_inizio'][6:8]\n",
-    "                        if(year != '****' and year != '9998'):\n",
-    "                            line = triple(e52PplaceHolder, yearCoords.prefix, '\\\"'+year+'\\\"^^xsd:integer') + closeLine\n",
-    "                            output.write(line)\n",
-    "                            if (month != '**' and month != '99'):\n",
-    "                                line = triple(e52PplaceHolder, monthCoords.prefix, '\\\"'+month+ '\\\"^^xsd:integer') + closeLine\n",
-    "                                output.write(line)\n",
-    "                            else:\n",
-    "                                line = triple(e52PplaceHolder, monthCoords.prefix, '\\\"01\\\"^^xsd:integer') + closeLine\n",
-    "                                output.write(line)\n",
-    "                            if (day != '**' and day != '98'):\n",
-    "                                line = triple(e52PplaceHolder, dayCoords.prefix, '\\\"'+day+ '\\\"^^xsd:integer') + closeLine\n",
-    "                                output.write(line)\n",
-    "                            else:\n",
-    "                                line = triple(e52PplaceHolder, dayCoords.prefix, '\\\"01\\\"^^xsd:integer') + closeLine\n",
-    "                                output.write(line)\n",
-    "                        else:\n",
-    "                            line = triple(e52PplaceHolder, cidocCoords.prefix + 'P3_has_note', '\\\"Data incompleta\\\"') + closeLine\n",
-    "                            output.write(line)\n",
-    "                        line = triple(e52PplaceHolder, beginningCoords.prefix, '\\\"'+row['data_periodo_normalizzata_inizio']+'\\\"^^xsd:date') + closeLine\n",
-    "                        output.write(line)\n",
-    "                        line = triple(e52PplaceHolder, endCoords.prefix, '\\\"'+row['data_periodo_normalizzata_inizio']+'\\\"^^xsd:date') + closeLine\n",
-    "                        output.write(line)\n",
-    "        if(row['data_periodo_normalizzata_fine'] != ''):\n",
-    "            e52AplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \"_\" + timeSpanCoords.code + \">\"\n",
-    "            if(row['data_periodo_normalizzata_inizio'] != 'Senza data'):\n",
-    "                if(len(row['data_periodo_normalizzata_inizio']) == 8):\n",
-    "                    year = row['data_periodo_normalizzata_fine'][0:4]\n",
-    "                    month = row['data_periodo_normalizzata_fine'][4:6]\n",
-    "                    day = row['data_periodo_normalizzata_fine'][6:8]\n",
-    "                    if(year != '****' and year != '9998'):\n",
-    "                        line = triple(e52AplaceHolder, yearCoords.prefix, '\\\"'+year+'\\\"^^xsd:integer') + closeLine\n",
-    "                        output.write(line)\n",
-    "                        if (month != '**' and month != '99'):\n",
-    "                            line = triple(e52AplaceHolder, monthCoords.prefix, '\\\"'+month+ '\\\"^^xsd:integer') + closeLine\n",
-    "                            output.write(line)\n",
-    "                        else:\n",
-    "                            line = triple(e52AplaceHolder, monthCoords.prefix, '\\\"12\\\"^^xsd:integer') + closeLine\n",
-    "                            output.write(line)\n",
-    "                        if (day != '**' and day != '98'):\n",
-    "                            line = triple(e52AplaceHolder, dayCoords.prefix, '\\\"'+day+ '\\\"^^xsd:integer') + closeLine\n",
-    "                            output.write(line)\n",
-    "                        else:\n",
-    "                            line = triple(e52AplaceHolder, dayCoords.prefix, '\\\"31\\\"^^xsd:integer') + closeLine\n",
-    "                            output.write(line)\n",
-    "                    else:\n",
-    "                        line = triple(e52AplaceHolder, cidocCoords.prefix + 'P3_has_note', '\\\"Data incompleta\\\"') + closeLine\n",
-    "                        output.write(line)\n",
-    "\n",
-    "                    line = triple(e52AplaceHolder, endCoords.prefix, '\\\"'+row['data_periodo_normalizzata_fine']+'\\\"^^xsd:date') + closeLine\n",
-    "                    output.write(line)\n",
-    "                    line = triple(e52AplaceHolder, beginningCoords.prefix, '\\\"'+row['data_periodo_normalizzata_fine']+'\\\"^^xsd:date') + closeLine\n",
-    "                    output.write(line)\n",
-    "\n",
-    "        output.write('\\n')\n",
-    "            # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  },
-  "vscode": {
-   "interpreter": {
-    "hash": "397704579725e15f5c7cb49fe5f0341eb7531c82d19f2c29d197e8b64ab5776b"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 252
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/marcovaldi/CSV_to_RDF_marcovaldi_item_event_creation.ipynb

@@ -1,252 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "# Utilities to handle character encodings\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json\n",
-    "import re"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/ASPO/marcovaldi/'\n",
-    "export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/ASPO/marcovaldi/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-MV001->', 'dt:')\n",
-    "# Added by FS\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "# Added by FS CIDOC properties for person\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "# Added by FS CIDOC entity\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "tookPlaceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P7_took_place_at>', 'tk:')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + tookPlaceCoords.prefix + ' ' + tookPlaceCoords.uri + closeLine) \n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine) \n",
-    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_event_creation.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        #Evento creazione\n",
-    "        if(row['tipologia'] != 'carteggio'):\n",
-    "            e22placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \">\"\n",
-    "            e73placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
-    "            e65placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + creationCoords.code + \">\"\n",
-    "            e65FplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + creationCoords.code + \"F>\"\n",
-    "            line = triple(e22placeHolder, wasBroughtCoords.prefix, e65placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e65placeHolder, hasTypeCoords.prefix, creationCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e65placeHolder, labelCoords.prefix, '\\\"Inizio creazione di ' + row['titolo_aspo'] + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e65placeHolder, hasTypePCoords.prefix, '\\\"Inizio\\\"^^xsd:string') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e22placeHolder, wasBroughtCoords.prefix, e65FplaceHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e65FplaceHolder, hasTypeCoords.prefix, creationCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e65FplaceHolder, labelCoords.prefix, '\\\"Fine creazione di ' + row['titolo_aspo'] + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e65FplaceHolder, hasTypePCoords.prefix, '\\\"Fine\\\"^^xsd:string') + closeLine\n",
-    "            output.write(line)\n",
-    "\n",
-    "            #Luogo creazione\n",
-    "            if(row['luogo_luogo'] != ''):\n",
-    "                e65placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + creationCoords.code + \">\"\n",
-    "                auth_partenza = row['luogo_luogo']\n",
-    "                authcode_partenza = re.sub('{luogo: .* ', '', auth_partenza)\n",
-    "                authcodeprefix_partenza = authcode_partenza.replace('IT-ASPO-GEO0001-', '').replace('\"', '').replace('}', '').strip()\n",
-    "                e53placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + authcodeprefix_partenza + \">\"\n",
-    "                line = triple(e65placeHolder, tookPlaceCoords.prefix, e53placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e53placeHolder, hasTypeCoords.prefix, placeCoords.prefix) + closeLine\n",
-    "                output.write(line)\n",
-    "                geogname = row['luogo_luogo'].replace('{luogo:', '').replace(',', '').replace('authID:', '')\n",
-    "                place = re.sub('IT-ASPO-GEO0001-[0-9]*}', '', geogname).strip()\n",
-    "                line = triple(e53placeHolder, labelCoords.prefix, '\\\"' + place + '\\\"') + closeLine\n",
-    "                output.write(line)\n",
-    "        \n",
-    "            #Periodo creazione\n",
-    "            if(row['data_periodo'] != ''):\n",
-    "                e52placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + timeSpanCoords.code + \">\"\n",
-    "                line = triple(e65placeHolder, hasTimeSpanCoords.prefix, e52placeHolder) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e52placeHolder, hasTypeCoords.prefix, timeSpanCoords.prefix) + closeLine\n",
-    "                output.write(line)\n",
-    "                line = triple(e52placeHolder, labelCoords.prefix, '\\\"Periodo: ' + row['data_periodo'] + '\\\"') + closeLine\n",
-    "                output.write(line)  \n",
-    "        \n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 297
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/marcovaldi/CSV_to_RDF_marcovaldi_item_event_exchange.ipynb

@@ -1,297 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "# Utilities to handle character encodings\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json\n",
-    "import re"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/ASPO/marcovaldi/'\n",
-    "export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/ASPO/marcovaldi/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-MV001->', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "movedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P25i_moved_by>', 'mb:')\n",
-    "subClassOfCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#subClassOf>', 'so:')\n",
-    "tookPlaceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P7_took_place_at>', 'tk:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "roleOfCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14.1_in_the_role_of>', 'ro:')\n",
-    "hasDomainCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P01_has_domain>', 'hd:')\n",
-    "hasRangeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P02_has_range>', 'hr:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "pcarriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/PC14_carried_out_by>', 'cy:', 'PC14')\n",
-    "\n",
-    "# New classes (subclasses of E7 Activity) - Exchange, Sending, Recive Letters\n",
-    "exchangeLettersCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL1_Exchange_Letters>', 'el:', 'EL1')\n",
-    "sendLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL2_Send_Letter>', 'sl:', 'EL2')\n",
-    "receiveLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL3_Receive_Letter>', 'rl:', 'EL3')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + exchangeLettersCoords.prefix + ' ' + exchangeLettersCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + sendLetterCoords.prefix + ' ' + sendLetterCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + receiveLetterCoords.prefix + ' ' + receiveLetterCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + subClassOfCoords.prefix + ' ' + subClassOfCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + movedByCoords.prefix + ' ' + movedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + roleOfCoords.prefix + ' ' + roleOfCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasDomainCoords.prefix + ' ' + hasDomainCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + pcarriedByCoords.prefix + ' ' + pcarriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasRangeCoords.prefix + ' ' + hasRangeCoords.uri + closeLine)\n",
-    "    \n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_event_exchange.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        \n",
-    "        el1placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + exchangeLettersCoords.code + \">\"\n",
-    "        el2placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \">\"\n",
-    "        el3placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \">\"\n",
-    "        PC14splaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \"_\" + pcarriedByCoords.code + \">\"   \n",
-    "        PC14rplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \"_\" + pcarriedByCoords.code + \">\"   \n",
-    "        E55splaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \"_\" + typeCoords.code + \">\"       \n",
-    "        E55rplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \"_\" + typeCoords.code + \">\"\n",
-    "    \n",
-    "        line = triple(exchangeLettersCoords.prefix, labelCoords.prefix, '\\\"Exchange of letters\\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(sendLetterCoords.prefix, labelCoords.prefix, '\\\"Send letter\\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(receiveLetterCoords.prefix, labelCoords.prefix, '\\\"Receive letter\\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(el2placeHolder, subClassOfCoords.prefix, el1placeHolder) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(el3placeHolder, subClassOfCoords.prefix, el1placeHolder) + closeLine\n",
-    "        output.write(line)    \n",
-    "        line = triple(\"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \">\", movedByCoords.prefix, el1placeHolder) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(el1placeHolder, labelCoords.prefix, '\\\"' + row['tipologia'] + ': ' + row['titolo_aspo'] + '\\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(el1placeHolder, hasTypeCoords.prefix, exchangeLettersCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(el2placeHolder, labelCoords.prefix, '\\\"Invio\\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(el2placeHolder, hasTypeCoords.prefix, sendLetterCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(el3placeHolder, labelCoords.prefix, '\\\"Ricezione\\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(el3placeHolder, hasTypeCoords.prefix, receiveLetterCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "\n",
-    "        # If the 'persname' property is not empty for the given entry, write down triples\n",
-    "        if(row['persona_mittente'] != ''):\n",
-    "            mittente = row['persona_mittente'].replace('{\"nome\":', '').replace(',', '').replace('\"authID\":', '')\n",
-    "            name_mittente = re.sub('IT-ASPO-AU00003-[0-9].*}', '', mittente).replace('\"', '').replace('}', '').strip()\n",
-    "            auth = row['persona_mittente']\n",
-    "            authcode = re.sub('{\"nome\": .* ', '', auth)\n",
-    "            authcodeprefix= authcode.replace('IT-ASPO-AU00003-', '').replace('\"', '').replace('}', '').strip()\n",
-    "            actorplaceHolder = personAuthCoords.prefix + authcodeprefix        \n",
-    "            line = triple(el2placeHolder, hasDomainCoords.prefix, PC14splaceHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(PC14splaceHolder, hasTypeCoords.prefix, pcarriedByCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(PC14splaceHolder, labelCoords.prefix, \"\\\"\" + name_mittente + \" nel ruolo di mittente\" + \"\\\"\" ) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(PC14splaceHolder, roleOfCoords.prefix, E55splaceHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(E55splaceHolder, labelCoords.prefix, \"\\\"Mittente\\\"\" ) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(PC14splaceHolder, hasRangeCoords.prefix, actorplaceHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "\n",
-    "        # If the 'persname' property is not empty for the given entry, write down triples\n",
-    "        if(row['persona_destinatario'] != ''):\n",
-    "            destinatario = row['persona_destinatario'].replace('{\"nome\":', '').replace(',', '').replace('\"authID\":', '')\n",
-    "            name_destinatario = re.sub('IT-ASPO-AU00003-[0-9].*}', '', destinatario).replace('\"', '').replace('}', '').strip()\n",
-    "            auth = row['persona_destinatario']\n",
-    "            authcode = re.sub('{\"nome\": .* ', '', auth)\n",
-    "            authcodeprefix= authcode.replace('IT-ASPO-AU00003-', '').replace('\"', '').replace('}', '').strip()\n",
-    "            actorplaceHolder = personAuthCoords.prefix + authcodeprefix        \n",
-    "            line = triple(el3placeHolder, hasDomainCoords.prefix, PC14rplaceHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(PC14rplaceHolder, hasTypeCoords.prefix, pcarriedByCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(PC14rplaceHolder, labelCoords.prefix, \"\\\"\" + name_destinatario + \" nel ruolo di destinatario\" + \"\\\"\" ) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(PC14rplaceHolder, roleOfCoords.prefix, E55rplaceHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(E55rplaceHolder, labelCoords.prefix, \"\\\"Destinatario\\\"\" ) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(PC14rplaceHolder, hasRangeCoords.prefix, actorplaceHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "        \n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.7.3 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 276
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/marcovaldi/CSV_to_RDF_marcovaldi_item_event_exchange_date_place.ipynb

@@ -1,276 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "# Utilities to handle character encodings\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json\n",
-    "import re"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/ASPO/marcovaldi/'\n",
-    "export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/ASPO/marcovaldi/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-MV001->', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "movedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P25i_moved_by>', 'mb:')\n",
-    "subClassOfCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#subClassOf>', 'so:')\n",
-    "tookPlaceCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P7_took_place_at>', 'tk:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "roleOfCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14.1_in_the_role_of>', 'ro:')\n",
-    "hasDomainCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P01_has_domain>', 'hd:')\n",
-    "hasRangeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P02_has_range>', 'hr:')\n",
-    "hasTimeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span>', 'hs:')\n",
-    "movedToCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P26_moved_to>', 'mt:')\n",
-    "movedFromCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P27_moved_from>', 'mf:')\n",
-    "\n",
-    "#CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "pcarriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/PC14_carried_out_by>', 'cy:', 'PC14')\n",
-    "timeSpanCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span>', 'ts:', 'E52')\n",
-    "placeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E53_Place>', 'pl:', 'E53')\n",
-    "\n",
-    "\n",
-    "# New classes (subclasses of E7 Activity) - Exchange, Sending, Recive Letters\n",
-    "exchangeLettersCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL1_Exchange_Letters>', 'el:', 'EL1')\n",
-    "sendLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL2_Send_Letter>', 'sl:', 'EL2')\n",
-    "receiveLetterCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/EL3_Receive_Letter>', 'rl:', 'EL3')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + exchangeLettersCoords.prefix + ' ' + exchangeLettersCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + sendLetterCoords.prefix + ' ' + sendLetterCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + receiveLetterCoords.prefix + ' ' + receiveLetterCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + subClassOfCoords.prefix + ' ' + subClassOfCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + movedByCoords.prefix + ' ' + movedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + roleOfCoords.prefix + ' ' + roleOfCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasDomainCoords.prefix + ' ' + hasDomainCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + pcarriedByCoords.prefix + ' ' + pcarriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasRangeCoords.prefix + ' ' + hasRangeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + movedToCoords.prefix + ' ' + movedToCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + movedFromCoords.prefix + ' ' + movedFromCoords.uri + closeLine)\n",
-    "    \n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_event_exchange_date_place.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        el1placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + exchangeLettersCoords.code + \">\"\n",
-    "        el2placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \">\"\n",
-    "        el3placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \">\"\n",
-    "        \n",
-    "        # Data invio\n",
-    "        if(row['data_inizio'] != ''):\n",
-    "            e52PplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + sendLetterCoords.code + \"_\" + timeSpanCoords.code + \">\"\n",
-    "            line = triple(el2placeHolder, hasTimeSpanCoords.prefix, e52PplaceHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e52PplaceHolder, hasTypeCoords.prefix, timeSpanCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e52PplaceHolder, labelCoords.prefix, '\\\"'+ row['data_inizio'] + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "       \n",
-    "        if(row['data_fine'] != ''):\n",
-    "            e52AplaceHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + receiveLetterCoords.code + \"_\" + timeSpanCoords.code + \">\"\n",
-    "            line = triple(el3placeHolder, hasTimeSpanCoords.prefix, e52AplaceHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e52AplaceHolder, hasTypeCoords.prefix, timeSpanCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e52AplaceHolder, labelCoords.prefix, '\\\"' + row['data_fine'] + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "     \n",
-    "        if(row['luogo_partenza'] != '' and row['luogo_arrivo'] != ''):\n",
-    "            partenza = row['luogo_partenza'].replace('{luogo:', '').replace(',', '').replace('authID:', '')\n",
-    "            place_partenza = re.sub('IT-ASPO-GEO0001-[0-9]*}', '', partenza).strip()\n",
-    "            arrivo = row['luogo_arrivo'].replace('{luogo:', '').replace(',', '').replace('authID:', '')\n",
-    "            place_arrivo = re.sub('IT-ASPO-GEO0001-[0-9]*}', '', arrivo).strip()\n",
-    "            auth_partenza = row['luogo_partenza']\n",
-    "            authcode_partenza = re.sub('{luogo: .* ', '', auth_partenza)\n",
-    "            authcodeprefix_partenza = authcode_partenza.replace('IT-ASPO-GEO0001-', '').replace('\"', '').replace('}', '').strip()\n",
-    "            auth_arrivo = row['luogo_arrivo']\n",
-    "            authcode_arrivo = re.sub('{luogo: .* ', '', auth_arrivo)\n",
-    "            authcodeprefix_arrivo = authcode_arrivo.replace('IT-ASPO-GEO0001-', '').replace('\"', '').replace('}', '').strip()\n",
-    "            #Luogo partenza\n",
-    "            e53placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + authcodeprefix_partenza + \">\"\n",
-    "            line = triple(el2placeHolder, movedFromCoords.prefix, e53placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e53placeHolder, hasTypeCoords.prefix, placeCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e53placeHolder, labelCoords.prefix, '\\\"' + place_partenza + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            #Luogo arrivo\n",
-    "            e53placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-GEO0001-\" + authcodeprefix_arrivo + \">\"\n",
-    "            line = triple(el3placeHolder, movedToCoords.prefix, e53placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e53placeHolder, hasTypeCoords.prefix, placeCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e53placeHolder, labelCoords.prefix, '\\\"' + place_arrivo + '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "        \n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.7.3 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.3"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 214
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/marcovaldi/CSV_to_RDF_marcovaldi_item_extent.ipynb

@@ -1,214 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "# Utilities to handle character encodings\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/marcovaldi/'\n",
-    "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/marcovaldi/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-MV001->', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "hasDimensionsCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P43_has_dimension>', 'hd:')\n",
-    "dimensionsCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E54_Dimension>', 'dm:', 'E54')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "# Added by FS CIDOC entity\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasDimensionsCoords.prefix + ' ' + hasDimensionsCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + dimensionsCoords.prefix + ' ' + dimensionsCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_extent.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "\n",
-    "        if(row['numero'] != ''):\n",
-    "            e54placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + dimensionsCoords.code + \">\"\n",
-    "            line = triple(\"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \">\", hasDimensionsCoords.prefix, e54placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e54placeHolder, hasTypeCoords.prefix, dimensionsCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e54placeHolder, labelCoords.prefix, '\\\"' + row['numero'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e54placeHolder, hasTypePCoords.prefix, '\\\"Consistenza carte\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "        \n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 222
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/marcovaldi/CSV_to_RDF_marcovaldi_item_note.ipynb

@@ -1,222 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "# Utilities to handle character encodings\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/ASPO/marcovaldi/'\n",
-    "export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/ASPO/marcovaldi/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-MV001->', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "hasTypeNCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3.1_has_type>', 'tn:')\n",
-    "hasNoteCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P3_has_note>', 'no:')\n",
-    "\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')\n",
-    "stringCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E62_String>', 'sr:', 'E62')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeNCoords.prefix + ' ' + hasTypeNCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + stringCoords.prefix + ' ' + stringCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasNoteCoords.prefix + ' ' + hasNoteCoords.uri + closeLine)\n",
-    "    \n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_note.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "        \n",
-    "        #Nota\n",
-    "        if(row['nota'] != ''):\n",
-    "            e73placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/\" + informationObjectCoords.code + \">\"\n",
-    "            e62placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/E73_\" + stringCoords.code + \">\"\n",
-    "            line = triple(e73placeHolder, hasNoteCoords.prefix, e62placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e62placeHolder, labelCoords.prefix, '\\\"' + row['nota'].replace('\\\\','\\\\\\\\').replace('\"','\\\\\"')+ '\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e62placeHolder, hasTypeCoords.prefix, stringCoords.prefix) + closeLine\n",
-    "            output.write(line)\n",
-    "            e55placeHolder = \"<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/\" + row['id'] + \"/E73_\" + stringCoords.code + \"_\" + typeCoords.code + \">\"\n",
-    "            line = triple(e62placeHolder, hasTypeNCoords.prefix, e55placeHolder) + closeLine\n",
-    "            output.write(line)\n",
-    "            line = triple(e55placeHolder, labelCoords.prefix, '\\\"Nota contenuto informativo\\\"') + closeLine\n",
-    "            output.write(line)\n",
-    "        \n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 205
FORMS/parsers/prebuilt_parsers/CSV_to_RDF_ASPO/marcovaldi/CSV_to_RDF_marcovaldi_type.ipynb

@@ -1,205 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Parser per estrarre tutte le tipologie di documenti ed associarle solo una volta ad ogni record"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Utilities to read/write csv files\n",
-    "import csv\n",
-    "import unicodedata\n",
-    "# Ordered Dicts\n",
-    "from collections import OrderedDict\n",
-    "import json\n",
-    "\n",
-    "\n",
-    "# OPZIONAL IMPORTS\n",
-    "\n",
-    "# For timestamping/simple speed tests\n",
-    "from datetime import datetime\n",
-    "# Random number generator\n",
-    "from random import *\n",
-    "# System & command line utilities\n",
-    "import sys\n",
-    "# Json for the dictionary\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/ASPO/marcovaldi/'\n",
-    "export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/ASPO/marcovaldi/'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Custom class to store URIs + related infos for the ontologies/repositories\n",
-    "\n",
-    "class RDFcoords:\n",
-    "    def __init__(self, uri, prefix, code = None):\n",
-    "        self.uri = uri\n",
-    "        self.prefix = prefix\n",
-    "        self.code = code\n",
-    "\n",
-    "\n",
-    "# Repositories\n",
-    "datiniCoords = RDFcoords('<http://datini.archiviodistato.prato.it/la-ricerca/scheda/>', 'dt:')\n",
-    "personAuthCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/IT-ASPO-AU00003->', 'pa:')\n",
-    "# W3/CIDOC Predicates\n",
-    "hasTypeCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>', 'tp:')\n",
-    "hasTypePCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P2_has_type>', 'te:')\n",
-    "carriesCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P128_carries>', 'ca:')\n",
-    "identifiedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>', 'ib:')\n",
-    "labelCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#label>', 'lb:')\n",
-    "wasBroughtCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P92i_was_brought_into_existence_by>', 'wb:')\n",
-    "carriedByCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P14_carried_out_by>', 'cb:')\n",
-    "hasAlternativeFormCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/P139_has_alternative_form>', 'af:')\n",
-    "\n",
-    "# CIDOC Objects\n",
-    "manMadeObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object>', 'mo:', 'E22')\n",
-    "informationObjectCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object>', 'io:', 'E73')\n",
-    "titleCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E35_Title>', 'ti:' ,'E35')\n",
-    "placeAppellationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E44_Place_appellation>', 'pa:', 'E44')\n",
-    "identifierCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E42_Identifier>', 'id:', 'E42')\n",
-    "typeCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E55_Type>', 'ty:', 'E55')\n",
-    "creationCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E65_Creation>', 'cr:', 'E65')\n",
-    "personCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/E21_Person>', 'ps:', 'E21')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Basic functions for triples / shortened triples in TTL format\n",
-    "\n",
-    "def triple(subject, predicate, object1):\n",
-    "    line = subject + ' ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def doublet(predicate, object1):\n",
-    "    line = '    ' + predicate + ' ' + object1\n",
-    "    return line\n",
-    "\n",
-    "def singlet(object1):\n",
-    "    line = '        ' + object1\n",
-    "    return line\n",
-    "\n",
-    "# Line endings in TTL format\n",
-    "continueLine1 = ' ;\\n'\n",
-    "continueLine2 = ' ,\\n'\n",
-    "closeLine = ' .\\n'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeTTLHeader(output):\n",
-    "    output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n",
-    "    output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n",
-    "    output.write('\\n')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "filePrefix = 'data_'\n",
-    "fileType = 'item_type'\n",
-    "max_entries = 1000000000\n",
-    "\n",
-    "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '.ttl', 'w') as output:\n",
-    "    reader = csv.DictReader(csv_file)\n",
-    "    writeTTLHeader(output)\n",
-    "    first = True\n",
-    "    ii = 0\n",
-    "    for row in reader:\n",
-    "        # The index ii is used to process a limited number of entries for testing purposes\n",
-    "        ii = ii+1\n",
-    "        # Skip the first line as it carries info we don't want to triplify\n",
-    "        if(first):\n",
-    "            first = False\n",
-    "            continue\n",
-    "\n",
-    "        line = triple(\"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['tipologia'].replace(\" \", \"\") + \">\", hasTypeCoords.prefix, typeCoords.prefix) + closeLine\n",
-    "        output.write(line)\n",
-    "        line = triple(\"<http://archiviodistato.prato.it/\" + typeCoords.code + \"/\" + row['tipologia'].replace(\" \", \"\") + \">\", labelCoords.prefix,  '\\\"' + row['tipologia']+ '\\\"') + closeLine\n",
-    "        output.write(line)\n",
-    "        \n",
-    "        output.write('\\n')\n",
-    "        #\n",
-    "        #\n",
-    "        # Limit number of entries processed (if desired)\n",
-    "        if(ii>max_entries):\n",
-    "            break\n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.0 64-bit",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "metadata": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}