{ "cells": [ { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "# Utilities to read/write csv files\n", "import csv\n", "# Utilities to handle character encodings\n", "import unicodedata\n", "# Ordered Dicts\n", "from collections import OrderedDict\n", "import re\n", "import json\n", "\n", "\n", "# OPZIONAL IMPORTS\n", "\n", "# For timestamping/simple speed tests\n", "from datetime import datetime\n", "# Random number generator\n", "from random import *\n", "# System & command line utilities\n", "import sys\n", "# Json for the dictionary\n", "import json" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/gettatelli/'\n", "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/gettatelli/'" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "# Custom class to store URIs + related infos for the ontologies/repositories\n", "\n", "class RDFcoords:\n", " def __init__(self, uri, prefix, code = None):\n", " self.uri = uri\n", " self.prefix = prefix\n", " self.code = code\n", "\n", "# Repositories\n", "datiniCoords = RDFcoords('', 'dt:')\n", "personAuthCoords = RDFcoords('', 'pa:')\n", "# W3/CIDOC Predicates\n", "hasTypeCoords = RDFcoords('', 'tp:')\n", "hasNoteCoords = RDFcoords('', 'no:')\n", "hasTypePCoords = RDFcoords('', 'te:')\n", "carriesCoords = RDFcoords('', 'ca:')\n", "identifiedByCoords = RDFcoords('', 'ib:')\n", "labelCoords = RDFcoords('', 'lb:')\n", "wasBroughtCoords = RDFcoords('', 'wb:')\n", "wasBornCoords = RDFcoords('', 'wbc:')\n", "diedCoords = RDFcoords('', 'di:')\n", "carriedByCoords = RDFcoords('', 'cb:')\n", "noteCoords = RDFcoords('', 'nt:')\n", "hasTimeSpanCoords = RDFcoords('', 'hs:')\n", "consistCoords = RDFcoords('', 'cf:')\n", "hasConditionCoords = RDFcoords('', 'hc:')\n", "hasCurrentPermanentLocationCoords = RDFcoords('', 'ap:')\n", "hasCurrentOwnerCoords = RDFcoords('', 'ow:')\n", "nsCoords = RDFcoords('', 'rdf:')\n", "\n", "# CIDOC Objects\n", "manMadeObjectCoords = RDFcoords('', 'mo:', 'E22')\n", "informationObjectCoords = RDFcoords('', 'io:', 'E73')\n", "titleCoords = RDFcoords('', 'ti:' ,'E35')\n", "placeAppellationCoords = RDFcoords('', 'pa:', 'E44')\n", "identifierCoords = RDFcoords('', 'id:', 'E42')\n", "typeCoords = RDFcoords('', 'ty:', 'E55')\n", "creationCoords = RDFcoords('', 'cr:', 'E65')\n", "personCoords = RDFcoords('', 'ps:', 'E21')\n", "stringCoords = RDFcoords('', 'st:', 'E62')\n", "birthCoords = RDFcoords('', 'th:', 'E67')\n", "deathCoords = RDFcoords('', 'dh:', 'E69')\n", "timeSpanCoords = RDFcoords('', 'ts:', 'E52')\n", "materialCoords = RDFcoords('', 'mt:', 'E57')\n", "conditionCoords = RDFcoords('', 'cs:', 'E3')\n", "entityCoords = RDFcoords('', 'ey:', 'E1')\n", "refersCoords = RDFcoords('', 'rt:')\n", "refersHasTypeCoords = RDFcoords('', 'rh:')\n", "hasCurrentPermanentLocationCoords = RDFcoords('', 'ap:')\n", "placeCoords = RDFcoords('', 'pl:', 'E53')\n", "groupCoords = RDFcoords('', 'gp:', 'E74')\n", "eventCoords = RDFcoords('', 'event:', 'E5')\n", "wasPresentCoords = RDFcoords('', 'wp:')\n", "tookPlaceCoords = RDFcoords('', 'tk:')\n", "carriedByCoords = RDFcoords('', 'cb:')\n", "roleOfCoords = RDFcoords('', 'ro:')\n", "hasDomainCoords = RDFcoords('', 'hd:')\n", "hasRangeCoords = RDFcoords('', 'hr:')\n", "pcarriedByCoords = RDFcoords('', 'cy:', 'PC14')\n", "documentsCoords = RDFcoords('', 'doc:')\n", "fallsCoords = RDFcoords('', 'fw:')\n", "attributeCoords = RDFcoords('', 'att:', 'E13')\n", "hasAlternativeFormCoords = RDFcoords('', 'af:')\n", "assignedAttrCoords = RDFcoords('', 'ast:')\n", "assignedCoords = RDFcoords('', 'ass:')\n", "composedCoords = RDFcoords('', 'cmp:')\n", "foafCoords = RDFcoords('', 'foaf:')\n", "schemaCoords = RDFcoords('', 'schema:')\n", "cidocCoords = RDFcoords('', 'crm:')\n", "rdfsCoords = RDFcoords('', 'rdfs:')\n", "patrCoords = RDFcoords('', 'patr:')\n", "matrCoords = RDFcoords('', 'matr:')\n", "residenceCoords = RDFcoords('', 'res:')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "# Basic functions for triples / shortened triples in TTL format\n", "\n", "def triple(subject, predicate, object1):\n", " line = subject + ' ' + predicate + ' ' + object1\n", " return line\n", "\n", "def doublet(predicate, object1):\n", " line = ' ' + predicate + ' ' + object1\n", " return line\n", "\n", "def singlet(object1):\n", " line = ' ' + object1\n", " return line\n", "\n", "# Line endings in TTL format\n", "continueLine1 = ' ;\\n'\n", "continueLine2 = ' ,\\n'\n", "closeLine = ' .\\n'" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "def writeTTLHeader(output):\n", " output.write('@prefix ' + datiniCoords.prefix + ' ' + datiniCoords.uri + closeLine)\n", " output.write('@prefix ' + personAuthCoords.prefix + ' ' + personAuthCoords.uri + closeLine)\n", " output.write('@prefix ' + hasTypeCoords.prefix + ' ' + hasTypeCoords.uri + closeLine)\n", " output.write('@prefix ' + hasTypePCoords.prefix + ' ' + hasTypePCoords.uri + closeLine)\n", " output.write('@prefix ' + manMadeObjectCoords.prefix + ' ' + manMadeObjectCoords.uri + closeLine)\n", " output.write('@prefix ' + carriesCoords.prefix + ' ' + carriesCoords.uri + closeLine)\n", " output.write('@prefix ' + informationObjectCoords.prefix + ' ' + informationObjectCoords.uri + closeLine)\n", " output.write('@prefix ' + identifiedByCoords.prefix + ' ' + identifiedByCoords.uri + closeLine)\n", " output.write('@prefix ' + titleCoords.prefix + ' ' + titleCoords.uri + closeLine)\n", " output.write('@prefix ' + labelCoords.prefix + ' ' + labelCoords.uri + closeLine)\n", " output.write('@prefix ' + identifierCoords.prefix + ' ' + identifierCoords.uri + closeLine)\n", " output.write('@prefix ' + wasBroughtCoords.prefix + ' ' + wasBroughtCoords.uri + closeLine)\n", " output.write('@prefix ' + typeCoords.prefix + ' ' + typeCoords.uri + closeLine)\n", " output.write('@prefix ' + carriedByCoords.prefix + ' ' + carriedByCoords.uri + closeLine)\n", " output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)\n", " output.write('@prefix ' + stringCoords.prefix + ' ' + stringCoords.uri + closeLine)\n", " output.write('@prefix ' + noteCoords.prefix + ' ' + noteCoords.uri + closeLine)\n", " output.write('@prefix ' + wasBornCoords.prefix + ' ' + wasBornCoords.uri + closeLine)\n", " output.write('@prefix ' + diedCoords.prefix + ' ' + diedCoords.uri + closeLine)\n", " output.write('@prefix ' + birthCoords.prefix + ' ' + birthCoords.uri + closeLine)\n", " output.write('@prefix ' + deathCoords.prefix + ' ' + deathCoords.uri + closeLine)\n", " output.write('@prefix ' + hasTimeSpanCoords.prefix + ' ' + hasTimeSpanCoords.uri + closeLine)\n", " output.write('@prefix ' + timeSpanCoords.prefix + ' ' + timeSpanCoords.uri + closeLine)\n", " output.write('@prefix ' + materialCoords.prefix + ' ' + materialCoords.uri + closeLine)\n", " output.write('@prefix ' + consistCoords.prefix + ' ' + consistCoords.uri + closeLine)\n", " output.write('@prefix ' + conditionCoords.prefix + ' ' + conditionCoords.uri + closeLine)\n", " output.write('@prefix ' + hasConditionCoords.prefix + ' ' + hasConditionCoords.uri + closeLine)\n", " output.write('@prefix ' + refersCoords.prefix + ' ' + refersCoords.uri + closeLine) \n", " output.write('@prefix ' + entityCoords.prefix + ' ' + entityCoords.uri + closeLine)\n", " output.write('@prefix ' + refersHasTypeCoords.prefix + ' ' + refersHasTypeCoords.uri + closeLine)\n", " output.write('@prefix ' + hasNoteCoords.prefix + ' ' + hasNoteCoords.uri + closeLine)\n", " output.write('@prefix ' + creationCoords.prefix + ' ' + creationCoords.uri + closeLine)\n", " output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n", " output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n", " output.write('@prefix ' + hasCurrentPermanentLocationCoords.prefix + ' ' + hasCurrentPermanentLocationCoords.uri + closeLine)\n", " output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)\n", " output.write('@prefix ' + hasCurrentOwnerCoords.prefix + ' ' + hasCurrentOwnerCoords.uri + closeLine)\n", " output.write('@prefix ' + groupCoords.prefix + ' ' + groupCoords.uri + closeLine)\n", " output.write('@prefix ' + eventCoords.prefix + ' ' + eventCoords.uri + closeLine)\n", " output.write('@prefix ' + wasPresentCoords.prefix + ' ' + wasPresentCoords.uri + closeLine)\n", " output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)\n", " output.write('@prefix ' + roleOfCoords.prefix + ' ' + roleOfCoords.uri + closeLine)\n", " output.write('@prefix ' + hasDomainCoords.prefix + ' ' + hasDomainCoords.uri + closeLine)\n", " output.write('@prefix ' + pcarriedByCoords.prefix + ' ' + pcarriedByCoords.uri + closeLine)\n", " output.write('@prefix ' + hasRangeCoords.prefix + ' ' + hasRangeCoords.uri + closeLine)\n", " output.write('@prefix ' + documentsCoords.prefix + ' ' + documentsCoords.uri + closeLine)\n", " output.write('@prefix ' + tookPlaceCoords.prefix + ' ' + tookPlaceCoords.uri + closeLine)\n", " output.write('@prefix ' + fallsCoords.prefix + ' ' + fallsCoords.uri + closeLine)\n", " output.write('@prefix ' + attributeCoords.prefix + ' ' + attributeCoords.uri + closeLine)\n", " output.write('@prefix ' + assignedAttrCoords.prefix + ' ' + assignedAttrCoords.uri + closeLine)\n", " output.write('@prefix ' + assignedCoords.prefix + ' ' + assignedCoords.uri + closeLine)\n", " output.write('@prefix ' + composedCoords.prefix + ' ' + composedCoords.uri + closeLine)\n", " output.write('@prefix ' + hasAlternativeFormCoords.prefix + ' ' + hasAlternativeFormCoords.uri + closeLine)\n", " output.write('@prefix ' + foafCoords.prefix + ' ' + foafCoords.uri + closeLine)\n", " output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)\n", " output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)\n", " output.write('@prefix ' + matrCoords.prefix + ' ' + matrCoords.uri + closeLine)\n", " output.write('@prefix ' + patrCoords.prefix + ' ' + patrCoords.uri + closeLine)\n", " output.write('@prefix ' + rdfsCoords.prefix + ' ' + rdfsCoords.uri + closeLine)\n", " output.write('@prefix ' + residenceCoords.prefix + ' ' + residenceCoords.uri + closeLine)\n", " output.write('\\n')\n" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "filePrefix = 'gettatelli_'\n", "fileType = 'newdataset'\n", "max_entries = 1000000000\n", "\n", "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(export_dir + filePrefix + fileType + '_person_name.ttl', 'w') as output:\n", " reader = csv.DictReader(csv_file)\n", " writeTTLHeader(output)\n", " first = True\n", " ii = 0\n", " for row in reader:\n", " # The index ii is used to process a limited number of entries for testing purposes\n", " ii = ii+1\n", " # Skip the first line as it carries info we don't want to triplify\n", " if(first):\n", " first = False\n", " continue \n", " \n", " # PERSONA\n", " if (row['nome rilevato'] != ''):\n", " partecipanteevento = row['nome rilevato'].replace(\" \",\"\").replace(\" \",\"\").replace(\" \",\"\").replace(\" \",\"\").replace(\" \", \"_\").replace(\"'\", \"\") \n", " e21placeHolder = ''\n", " line = triple(e21placeHolder, hasTypeCoords.prefix, personCoords.prefix) + closeLine\n", " output.write(line)\n", " line = triple(e21placeHolder, foafCoords.prefix + 'name', '\\\"' + row['nome rilevato'].replace(\" \",\"\").lstrip().rstrip().title() + '\\\"') + closeLine\n", " output.write(line)\n", " line = triple(e21placeHolder, labelCoords.prefix, '\\\"' + row['nome rilevato'].replace(\" \",\"\").lstrip().rstrip().title() + '\\\"') + closeLine\n", " output.write(line)\n", " e62placeHolder = ''\n", " line = triple(e21placeHolder, noteCoords.prefix, e62placeHolder) + closeLine\n", " output.write(line)\n", " line = triple(e62placeHolder, hasTypeCoords.prefix, stringCoords.prefix) + closeLine\n", " output.write(line)\n", " line = triple(e62placeHolder, hasTypePCoords.prefix, '\\\"Fonte\\\"') + closeLine\n", " output.write(line)\n", " line = triple(e62placeHolder, labelCoords.prefix, '\\\"Fonte: Archivio di Stato di Prato - Fondo Ospedale della Misericordia e Dolce\\\"') + closeLine\n", " output.write(line)\n", "\n", " if row['funzione nell\\'evento'] != '' and row['nome gettatello tenuto o con cui si è in relazione ( trovato o consegnato)'] != '':\n", " e62placeHolder1 = ''\n", " line = triple(e21placeHolder, noteCoords.prefix, e62placeHolder1) + closeLine\n", " output.write(line)\n", " line = triple(e62placeHolder1, hasTypeCoords.prefix, stringCoords.prefix) + closeLine\n", " output.write(line)\n", " line = triple(e62placeHolder1, hasTypePCoords.prefix, '\\\"Nota collegamento con gettatello\\\"') + closeLine\n", " output.write(line)\n", " professione = str(row['funzione nell\\'evento'].title())\n", " collegamento = str(row['nome gettatello tenuto o con cui si è in relazione ( trovato o consegnato)'].title())\n", " line = triple(e62placeHolder1, labelCoords.prefix, '\\\"' + professione + ' di ' + collegamento + '\\\"') + closeLine\n", " output.write(line)\n", " \n", " if row['titolo1'] != '' and row['titolo1'] != ' ':\n", " txt = row['titolo1']\n", " x = re.sub(\"\\n\", \" \", txt)\n", " y = re.sub(\"\\s\\s\", \" \", x)\n", " y = y.lstrip().rstrip()\n", " line = triple(e21placeHolder, schemaCoords.prefix + 'honorificPrefix', '\\\"' + y + '\\\"') + closeLine\n", " output.write(line)\n", " \n", " if row['titolo2'] != '' and row['titolo2'] != ' ':\n", " txt = row['titolo2']\n", " x = re.sub(\"\\n\", \" \", txt)\n", " y = re.sub(\"\\s\\s\", \" \", x)\n", " y = y.lstrip().rstrip()\n", " line = triple(e21placeHolder, schemaCoords.prefix + 'honorificPrefix', '\\\"' + y + '\\\"') + closeLine\n", " output.write(line)\n", " \n", " if row['soprannome'] != '' and row['soprannome'] != ' ':\n", " #Remove all white-space characters:\n", " txt = row['soprannome']\n", " x = re.sub(\"\\n\", \" \", txt)\n", " y = re.sub(\"\\s\\s\", \"\", x)\n", " line = triple(e21placeHolder,\n", " schemaCoords.prefix + 'alternateName',\n", " '\\\"' + y.title() + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " if row['m/f'] != '':\n", " #Remove all white-space characters:\n", " txt = row['m/f']\n", " x = re.sub(\"\\n\", \" \", txt)\n", " y = re.sub(\"\\s\\s\", \"\", x)\n", " line = triple(e21placeHolder,\n", " foafCoords.prefix + 'gender',\n", " '\\\"' + y + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " if row['nome proprio'] != '' and row['nome proprio'] != ' ' :\n", " #Remove all white-space characters:\n", " txt = row['nome proprio'] + \" \" + row['2° nome proprio'] + \" \" + row['3° nome proprio']+ \" \" + row['4° nome proprio']\n", " #x = re.sub(\" \\n\", \"\", txt)\n", " #y = re.sub(\"\\s\\s\", \"\", x)\n", " #name = re.sub(\"\\n\", \"\", y)\n", " #name = name.lstrip().rstrip()\n", " line = triple(e21placeHolder, foafCoords.prefix + 'givenName', '\\\"' + txt.title().lstrip().rstrip() + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " if row['cognome'] != '' and row['cognome'] != ' ':\n", " #Remove all white-space characters:\n", " txt = row['cognome']\n", " x = re.sub(\"\\n\", \" \", txt)\n", " y = re.sub(\"\\s\\s\", \"\", x)\n", " line = triple(e21placeHolder,\n", " foafCoords.prefix + 'familyName',\n", " '\\\"' + y.lstrip().rstrip().title() + '\\\"') + closeLine\n", " output.write(line)\n", " \n", " if row['patronimico'] != '' and row['patronimico'] != ' ':\n", " #Remove all white-space characters:\n", " txt = row['patronimico']\n", " x = re.sub(\"\\n\", \" \", txt)\n", " y = re.sub(\"\\s\\s\", \"\", x)\n", " line = triple(e21placeHolder,\n", " patrCoords.prefix,\n", " '\\\"' + y.title() + '\\\"') + closeLine\n", " output.write(line)\n", " \n", " if row['matronimico 1° nome'] != '' or row['matronimico 2° nome'] != '' or row['patronimico della madre'] != '':\n", " txt = row['matronimico 1° nome'] + \" \" + row['matronimico 2° nome'] + \" \" + row['patronimico della madre']\n", " #x = re.sub(\"\\n\", \" \", txt)\n", " #y = re.sub(\"\\s\\s\", \"\", x)\n", " name = '\\\"' + txt + '\\\"'\n", " namecomplete = name.replace(\" \",\" \").replace(\" \",\" \").replace(\" \",\" \").replace(\"\\\" \",\"\\\"\").replace(\" \\\"\",\"\\\"\").title()\n", " if namecomplete != \"\\\"\\\"\":\n", " line = triple(e21placeHolder, matrCoords.prefix, namecomplete) + closeLine\n", " output.write(line)\n", " \n", " if (row['1° nome proprio marito'] != '' and row['1° nome proprio marito'] != ' '):\n", " if (row['cognome del marito'] != ''):\n", " idmarito = (row['1° nome proprio marito'] + row['2° nome proprio marito'] + row['patronimico del marito'] + row['cognome del marito']).replace(' ','').replace('\\'','')+ row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\")\n", " else:\n", " idmarito = (row['1° nome proprio marito'] + row['2° nome proprio marito'] + row['patronimico del marito'] + row['cognome']).replace(' ','').replace('\\'','')+ row['CODICE REGISTRO'].replace(\" \",\"\").replace(\"°\",\"\")\n", "\n", " relazionenoid = '\"\n", " E13placeHolder = '\"\n", " line = triple(E13placeHolder,\n", " nsCoords.prefix + 'type', \n", " cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine\n", " output.write(line)\n", " line = triple(E13placeHolder, cidocCoords.prefix + 'P141_assigned', e21placeHolder) + closeLine\n", " output.write(line)\n", " if (row['cognome del marito'] != ''):\n", " line = triple(E13placeHolder,\n", " rdfsCoords.prefix + 'label',\n", " '\\\"' + (row['1° nome proprio marito'] + ' ' + row['2° nome proprio marito']+ ' ' + row['patronimico del marito'] + row['cognome del marito']).title() + ' coniuge di ' + (row['nome rilevato']).title() + '\\\"') + closeLine\n", " output.write(line)\n", " line = triple(relazionenoid,\n", " foafCoords.prefix + 'name',\n", " '\\\"' + row['1° nome proprio marito'].title() + ' ' + row['2° nome proprio marito'].title() + ' ' + row['patronimico del marito'].title() + row['cognome del marito'].title() + '\\\"') + closeLine\n", " output.write(line)\n", " else:\n", " line = triple(E13placeHolder,\n", " rdfsCoords.prefix + 'label',\n", " '\\\"' + (row['1° nome proprio marito'] + ' ' + row['2° nome proprio marito']+ ' ' + row['patronimico del marito'] + row['cognome']).title() + ' coniuge di ' + (row['nome rilevato']).title() + '\\\"') + closeLine\n", " output.write(line)\n", " line = triple(relazionenoid,\n", " foafCoords.prefix + 'name',\n", " '\\\"' + row['1° nome proprio marito'].title() + ' ' + row['2° nome proprio marito'].title() + ' ' + row['patronimico del marito'].title() + row['cognome'].title() + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " \n", "\n", " line = triple(relazionenoid, cidocCoords.prefix + 'P141_assigned', E13placeHolder ) + closeLine\n", " output.write(line)\n", " \n", " if (row['cognome del marito'] != ''):\n", " labelmarito = row['1° nome proprio marito'].title() + ' ' + row['2° nome proprio marito'].title() + ' ' + row['patronimico del marito'].title() + row['cognome del marito'].title()\n", " else:\n", " labelmarito = row['1° nome proprio marito'].title() + ' ' + row['2° nome proprio marito'].title() + ' ' + row['patronimico del marito'].title() + row['cognome'].title()\n", " \n", " line = triple(relazionenoid,\n", " rdfsCoords.prefix + 'label',\n", " '\\\"' + labelmarito + '\\\"') + closeLine\n", " output.write(line)\n", " \n", " line = triple(relazionenoid, \n", " nsCoords.prefix + 'type', \n", " personCoords.prefix) + closeLine\n", " output.write(line)\n", "\n", " line = triple(relazionenoid, \n", " nsCoords.prefix + 'type', \n", " personCoords.prefix + 'Person') + closeLine\n", " output.write(line)\n", "\n", " line = triple(relazionenoid,\n", " nsCoords.prefix + 'type',\n", " foafCoords.prefix + 'person') + closeLine\n", " output.write(line)\n", " E55placeHolder = ''\n", " line = triple(E13placeHolder, cidocCoords.prefix + 'P42_assigned', E55placeHolder) + closeLine\n", " output.write(line)\n", " line = triple(E55placeHolder,\n", " rdfsCoords.prefix + 'label',\n", " '\\\"Moglie\\\"') + closeLine\n", " output.write(line)\n", "\n", " if row['1° nome proprio marito'] != '':\n", " #Remove all white-space characters:\n", " txt = row['1° nome proprio marito']\n", " if row['2° nome proprio marito'] != '':\n", " txt = row['1° nome proprio marito'] + ' ' + row['2° nome proprio marito']\n", " x = re.sub(\" \\n\", \"\", txt)\n", " y = re.sub(\"\\s\\s\", \"\", x)\n", " name = re.sub(\"\\n\", \"\", y)\n", " line = triple(relazionenoid,\n", " foafCoords.prefix + 'givenName',\n", " '\\\"' + name.title() + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " if row['cognome del marito'] != '':\n", " #Remove all white-space characters:\n", " txt = row['cognome del marito']\n", " x = re.sub(\"\\n\", \" \", txt)\n", " y = re.sub(\"\\s\\s\", \"\", x)\n", " line = triple(relazionenoid,\n", " foafCoords.prefix + 'familyName',\n", " '\\\"' + y.title() + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " if row['patronimico del marito'] != '':\n", " #Remove all white-space characters:\n", " txt = row['patronimico del marito']\n", " x = re.sub(\"\\n\", \" \", txt)\n", " y = re.sub(\"\\s\\s\", \"\", x)\n", " line = triple(relazionenoid,\n", " personCoords.prefix + 'patronymicName',\n", " '\\\"' + y.title() + '\\\"') + closeLine\n", " output.write(line)\n", "\n", "\n", " \n", " '''if (row['avo 1'] != '' and row['avo 1'] != ' '):\n", " idavo1 = (row['avo 1'] + row['matronimico avo 1']).replace(' ','').replace('\\'','')\n", " E13placeHolder = '\"\n", " line = triple(E13placeHolder, \n", " nsCoords.prefix + 'type', \n", " cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine\n", " output.write(line)\n", " line = triple(E13placeHolder, cidocCoords.prefix + 'P141_assigned', e21placeHolder) + closeLine\n", " output.write(line)\n", " line = triple(E13placeHolder,\n", " rdfsCoords.prefix + 'label',\n", " '\\\"Relazione: ' + row['avo 1'] + ' ' + row['matronimico avo 1'] + ' avo di secondo grado di ' + row['nome rilevato'] + '\\\"') + closeLine\n", " output.write(line)\n", " relazionenoid = '\"\n", " line = triple(relazionenoid, cidocCoords.prefix + 'P141_assigned', E13placeHolder ) + closeLine\n", " output.write(line)\n", " line = triple(relazionenoid,\n", " rdfsCoords.prefix + 'label',\n", " '\\\"' + row['avo 1'] + ' ' + row['matronimico avo 1'] + '\\\"') + closeLine\n", " output.write(line)\n", " line = triple(relazionenoid, \n", " nsCoords.prefix + 'type', \n", " cidocCoords.prefix + 'E21_Person') + closeLine\n", " output.write(line)\n", "\n", " line = triple(relazionenoid, \n", " nsCoords.prefix + 'type', \n", " personCoords.prefix + 'Person') + closeLine\n", " output.write(line)\n", "\n", " line = triple(relazionenoid,\n", " nsCoords.prefix + 'type',\n", " foafCoords.prefix + 'person') + closeLine\n", " output.write(line)\n", " E55placeHolder = ''\n", " line = triple(E13placeHolder, cidocCoords.prefix + 'P42_assigned', E55placeHolder) + closeLine\n", " output.write(line)\n", " line = triple(E55placeHolder,\n", " rdfsCoords.prefix + 'label',\n", " '\\\"Avo di secondo grado\\\"') + closeLine\n", " output.write(line) \n", "\n", " if (row['avo 2'] != '' and row['avo 1'] != ' '):\n", " idavo2 = (row['avo 2'] + row['matronimico avo 2']).replace(' ','').replace('\\'','')\n", " E13placeHolder = '\"\n", " line = triple(E13placeHolder, \n", " nsCoords.prefix + 'type', \n", " cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine\n", " output.write(line)\n", " line = triple(E13placeHolder, cidocCoords.prefix + 'P141_assigned', e21placeHolder) + closeLine\n", " output.write(line)\n", " line = triple(E13placeHolder,\n", " rdfsCoords.prefix + 'label',\n", " '\\\"Relazione: ' + row['avo 2'] + ' ' + row['matronimico avo 2'] + ' avo di terzo grado di ' + row['nome rilevato'] + '\\\"') + closeLine\n", " output.write(line)\n", " relazionenoid = '\"\n", " line = triple(relazionenoid, cidocCoords.prefix + 'P141_assigned', E13placeHolder ) + closeLine\n", " output.write(line)\n", " line = triple(relazionenoid,\n", " rdfsCoords.prefix + 'label',\n", " '\\\"' + row['avo 2'] + ' ' + row['matronimico avo 2'] + '\\\"') + closeLine\n", " output.write(line)\n", " line = triple(relazionenoid, \n", " nsCoords.prefix + 'type', \n", " cidocCoords.prefix + 'E21_Person') + closeLine\n", " output.write(line)\n", "\n", " line = triple(relazionenoid, \n", " nsCoords.prefix + 'type', \n", " personCoords.prefix + 'Person') + closeLine\n", " output.write(line)\n", "\n", " line = triple(relazionenoid,\n", " nsCoords.prefix + 'type',\n", " foafCoords.prefix + 'person') + closeLine\n", " output.write(line)\n", " E55placeHolder = ''\n", " line = triple(E13placeHolder, cidocCoords.prefix + 'P42_assigned', E55placeHolder) + closeLine\n", " output.write(line)\n", " line = triple(E55placeHolder,\n", " rdfsCoords.prefix + 'label',\n", " '\\\"Avo di terzo grado\\\"') + closeLine\n", " output.write(line)''' \n", " output.write('\\n')\n", " #\n", " #\n", " # Limit number of entries processed (if desired)\n", " if(ii>max_entries):\n", " break\n", " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [] } ], "metadata": { "interpreter": { "hash": "397704579725e15f5c7cb49fe5f0341eb7531c82d19f2c29d197e8b64ab5776b" }, "kernelspec": { "display_name": "Python 3.9.0 64-bit", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.0" }, "metadata": { "interpreter": { "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" } } }, "nbformat": 4, "nbformat_minor": 2 }