Browse Source

Elimina 'Museo/CSV_to_RDF/CSV_to_RDF_mpp.py'

Alessia 2 years ago
parent
commit
4d66d8b41d
1 changed files with 0 additions and 346 deletions
  1. 0 346
      Museo/CSV_to_RDF/CSV_to_RDF_mpp.py

+ 0 - 346
Museo/CSV_to_RDF/CSV_to_RDF_mpp.py

@@ -1,346 +0,0 @@
-# Utilities to read/write csv files
-import csv
-# Utilities to handle character encodings
-import unicodedata
-# Ordered Dicts
-from collections import OrderedDict
-
-import json
-
-# OPZIONAL IMPORTS
-
-# For timestamping/simple speed tests
-from datetime import datetime
-# Random number generator
-from random import *
-# System & command line utilities
-import sys
-# Json for the dictionary
-import json
-
-import_dir = '/Users/alessiaspadi/Documents/RESTORE/temp_MPP/tabelle/Ospedale/mod/'
-export_dir = '/Users/alessiaspadi/Documents/RESTORE/temp_MPP/tabelle/Ospedale/mod/'
-
-
-# Custom class to store URIs + related infos for the ontologies/repositories
-
-class RDFcoords:
-    def __init__(self, uri, prefix, code=None):
-        self.uri = uri
-        self.prefix = prefix
-        self.code = code
-
-
-# Repositories
-museoCoords = RDFcoords('<http://palazzopretorio.comune.prato.it/it/le-opere/alcuni-capolavori/>', 'mpp:')
-
-cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')
-aatCoords = RDFcoords('<http://vocab.getty.edu/aat/>', 'aat:')
-nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')
-schemaCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')
-
-
-# Basic functions for triples / shortened triples in TTL format
-
-def triple(subject, predicate, object1):
-    line = subject + ' ' + predicate + ' ' + object1
-    return line
-
-
-def doublet(predicate, object1):
-    line = '    ' + predicate + ' ' + object1
-    return line
-
-
-def singlet(object1):
-    line = '        ' + object1
-    return line
-
-
-# Line endings in TTL format
-continueLine1 = ' ;\n'
-continueLine2 = ' ,\n'
-closeLine = ' .\n'
-
-
-def writeTTLHeader(output):
-    output.write('@prefix ' + museoCoords.prefix + ' ' + museoCoords.uri + closeLine)
-    output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)
-    output.write('@prefix ' + aatCoords.prefix + ' ' + aatCoords.uri + closeLine)
-    output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)
-    output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)
-
-    output.write('\n')
-
-
-filePrefix = 'SR20OA_'
-fileType = 'Ospedale'
-max_entries = 1000000000
-
-with open(import_dir + filePrefix + fileType + '.csv', newline="") as csv_file, open(
-        export_dir + filePrefix + fileType + '.ttl', 'w') as output:
-    reader = csv.DictReader(csv_file)
-    writeTTLHeader(output)
-    first = True
-    ii = 0
-    for row in reader:
-        # The index ii is used to process a limited number of entries for testing purposes
-        ii = ii + 1
-
-        if row['RVEL'] == '' or row['RVEL'] == '0':
-
-            sb = ''
-            subj = ''
-            pp = row['OGTD'] + ' (' + row['ACC'] + ') '
-            if row['SGTI'] != '':
-                sb = pp + row['SGTI']
-            if row['LDCN'] != '':
-                subj = sb + ' in ' + row['LDCN']
-            else:
-                subj = sb
-
-            # Triplify the 'codice' -- should exist for every entry
-            codice = ''
-            if (row['NCTR'] != '' and row['NCTN'] != ''):
-                codice = row['NCTR'] + row['NCTN']
-            '''if (row['RVEL'] != ' '):
-                codice = codice + "-" + row['RVEL']'''
-
-            url = row['URL']
-
-            # placeHolders
-            datplaceHolder = museoCoords.prefix + url
-            e42placeHolder = museoCoords.prefix + url + '_E42'
-            e42CplaceHolder = museoCoords.prefix + url + '_E42_MPP'
-            e73placeHolder = museoCoords.prefix + url + '_E73'
-            e55placeHolder = museoCoords.prefix + url + '_E55'
-            e35placeHolder1 = museoCoords.prefix + url + '_E35'
-            e53placeHolder = museoCoords.prefix + url + '_E53'
-            e1placeHolder = museoCoords.prefix + url + '_E1'
-            e74placeHolder = museoCoords.prefix + url + '_E74'
-
-            if (codice != ''):
-                line = triple(datplaceHolder, cidocCoords.prefix + 'P1_is_identified_by', e42placeHolder) + closeLine
-                output.write(line)
-                line = triple(e42placeHolder, nsCoords.prefix + 'type',
-                              cidocCoords.prefix + 'E42_Identifier') + closeLine
-                output.write(line)
-                '''line = triple(e42placeHolder, cidocCoords.prefix + 'P2_has_type',
-                              aatCoords.prefix + '300404626') + closeLine
-                output.write(line)
-                line = triple(aatCoords.prefix + '300404626', schemaCoords.prefix + 'label',
-                              "identifier") + closeLine
-                output.write(line)'''
-                line = triple(e42placeHolder, schemaCoords.prefix + 'label',
-                              '\"Codice univoco del bene: ' + codice + '\"') + closeLine
-                output.write(line)
-
-                '''
-                # AS
-                e55placeHolder = "<http://www.museodipalazzopretorio.it/" + codice + '/' + identifierCoords.code + ">"
-                line = triple(e42placeHolder, hasTypePCoords.prefix, e55placeHolder) + closeLine
-                output.write(line)
-                line = triple(e55placeHolder, hasTypeCoords.prefix, typeCoords.prefix) + closeLine
-                output.write(line)
-                line = triple(e55placeHolder, labelCoords.prefix, '\"Codice univoco del bene\"') + closeLine
-                output.write(line)
-                # Fine AS
-                '''
-
-            # Write E22 Man Made Object & E73 Information Object -- should exist for every entry?
-            line = triple(datplaceHolder, nsCoords.prefix + 'type',
-                          cidocCoords.prefix + 'E22_Man-Made_Object') + closeLine
-            output.write(line)
-            # Added by AS
-            line = triple(datplaceHolder, schemaCoords.prefix + 'label', '\"' + subj + '\"') + closeLine
-            output.write(line)
-            # End AS
-            line = triple(datplaceHolder, cidocCoords.prefix + 'P128_carries', e73placeHolder) + closeLine
-            output.write(line)
-            line = triple(e73placeHolder, nsCoords.prefix + 'type',
-                          cidocCoords.prefix + 'E73_Information_Object') + closeLine
-            output.write(line)
-
-            # AS
-            ss = ''
-            if row['SGTI'] != '':
-                ss = row['SGTI']
-            else:
-                ss = 'senza titolo'
-
-            line = triple(e73placeHolder, schemaCoords.prefix + 'label',
-                          '\"Opera d\'arte raffigurante ' + ss + '\"') + closeLine
-            output.write(line)
-
-            # E73 - P2 - E55
-
-            tt = ''
-            typeLabel = ''
-
-            if row['OGTD'] == 'dipinto':
-                tt = aatCoords.prefix + "300033618"
-            elif row['OGTD'] == 'rilievo':
-                tt = aatCoords.prefix + "300047230"
-            elif row['OGTD'] == 'polittico':
-                tt = aatCoords.prefix + "300178235"
-            elif row['OGTD'] == 'predella':
-                tt = aatCoords.prefix + "300003745"
-            else:
-                tt = e55placeHolder
-
-            line = triple(e73placeHolder,
-                          cidocCoords.prefix + 'P2_has_type',
-                          tt) + closeLine
-            output.write(line)
-            line = triple(tt, schemaCoords.prefix + 'label',
-                          '\"' + row['OGTD'] + '\"') + closeLine
-            output.write(line)
-
-            # E73 - P1 - E35
-
-            if row['SGTT'] != '':
-                line = triple(e73placeHolder, cidocCoords.prefix + 'P1_is_identified_by', e35placeHolder1) + closeLine
-                output.write(line)
-                line = triple(e35placeHolder1, nsCoords.prefix + 'type', cidocCoords.prefix + 'E35_Title') + closeLine
-                output.write(line)
-                line = triple(e35placeHolder1, schemaCoords.prefix + 'label', '\"' + row['SGTT'] + '\"') + closeLine
-                output.write(line)
-
-                line = triple(e35placeHolder1, cidocCoords.prefix + 'P2_has_type',
-                              aatCoords.prefix + "300417193") + closeLine
-                output.write(line)
-                line = triple(aatCoords.prefix + "300417193", schemaCoords.prefix + 'label',
-                              '\"titolo\"') + closeLine
-                output.write(line)
-
-            # E22 - P62 - E1
-
-            if row['SGTI'] != '':
-                line = triple(datplaceHolder,
-                              cidocCoords.prefix + 'P62_depicts',
-                              e1placeHolder) + closeLine
-                output.write(line)
-                line = triple(e1placeHolder,
-                              nsCoords.prefix + 'type',
-                              cidocCoords.prefix + 'E1_CRM_Entity') + closeLine
-                output.write(line)
-                line = triple(e1placeHolder,
-                              schemaCoords.prefix + 'label', '\"' +
-                              row['SGTI'] + '\"') + closeLine
-                output.write(line)
-
-            # doppio titolo - se usato cancellare E73 -E35
-
-            '''if row['SGTI'] != 'None' and row['SGTI'] != ' ':
-
-                line = triple(e73placeHolder, identifiedByCoords.prefix, "<http://www.museodipalazzopretorio.it/" + codice + "/sgti>") + closeLine
-                output.write(line)
-                line = triple("<http://www.museodipalazzopretorio.it/" + codice + "/sgti>",
-                              hasTypePCoords.prefix,
-                              "<http://www.museodipalazzopretorio.it/" + codice + "/ico>") + closeLine
-                output.write(line)
-                line = triple("<http://www.museodipalazzopretorio.it/" + codice + "/sgti>",
-                              labelCoords.prefix,
-                              '\"' + row['SGTI'] + '\"') + closeLine
-                output.write(line)
-                line = triple("<http://www.museodipalazzopretorio.it/" + codice + "/ico>",
-                              labelCoords.prefix,
-                              '\"' + 'Identificazione Iconografica' + '\"') + closeLine
-                output.write(line)
-
-
-            if row['SGTT'] != ' ':
-
-                line = triple(e73placeHolder,
-                              identifiedByCoords.prefix,
-                              "<http://www.museodipalazzopretorio.it/" + codice + "/sgtt>") + closeLine
-                output.write(line)
-                line = triple("<http://www.museodipalazzopretorio.it/" + codice + "/sgtt>",
-                              hasTypePCoords.prefix,
-                              "<http://www.museodipalazzopretorio.it/" + codice + "/titolo>") + closeLine
-                output.write(line)
-                line = triple("<http://www.museodipalazzopretorio.it/" + codice + "/sgtt>",
-                              labelCoords.prefix,
-                              '\"' + row['SGTT'] + '\"') + closeLine
-                output.write(line)
-                line = triple("<http://www.museodipalazzopretorio.it/" + codice + "/titolo>",
-                              labelCoords.prefix,
-                              '\"' + 'Titolo' + '\"') + closeLine
-                output.write(line)'''
-
-            # Fine doppio titolo
-
-            # Attention: these triples are identified only for C100005 Museo di Palazzo Pretorio
-
-            if row['ESC'] == 'C100005':
-                line = triple(datplaceHolder,
-                              cidocCoords.prefix + 'P52_has_current_owner',
-                              e74placeHolder) + closeLine
-                output.write(line)
-                line = triple(e74placeHolder,
-                              nsCoords.prefix + 'type',
-                              cidocCoords.prefix + 'E74_Group') + closeLine
-                output.write(line)
-                line = triple(e74placeHolder,
-                              schemaCoords.prefix + 'label',
-                              '\"Museo di Palazzo Pretorio\"') + closeLine
-                output.write(line)
-
-                line = triple(e74placeHolder,
-                              cidocCoords.prefix + 'P1_is_identified_by',
-                              e42CplaceHolder) + closeLine
-                output.write(line)
-                line = triple(e42CplaceHolder,
-                              nsCoords.prefix + 'type',
-                              cidocCoords.prefix + 'E42_Identifier') + closeLine
-                output.write(line)
-                line = triple(e42CplaceHolder,
-                              schemaCoords.prefix + 'label',
-                              '\"' + row['ESC'] + '\"') + closeLine
-                output.write(line)
-
-                line = triple(e42CplaceHolder,
-                              cidocCoords.prefix + 'P2_has_type',
-                              aatCoords.prefix + '300404626') + closeLine
-                output.write(line)
-                line = triple(aatCoords.prefix + '300404626',
-                              schemaCoords.prefix + 'label',
-                              '\"identificatore numerico\"') + closeLine
-                output.write(line)
-
-
-            currentLocation = ''
-
-            # E22 - P54 - E53
-            if row['LDCN'] != '':
-                if row['LDCS'] != '':
-                    currentLocation = row['LDCS']
-                else:
-                    currentLocation = currentLocation
-                if row['LDCM'] != '':
-                    currentLocation = currentLocation + ', ' + row['LDCM']
-                else:
-                    currentLocation = currentLocation
-                if row['LDCN'] != '':
-                    currentLocation = currentLocation + ', ' + row['LDCN']
-                else:
-                    currentLocation = currentLocation
-
-                currentLocation = currentLocation + ', ' + row['PVCC'] + ' (' + row['PVCP'] + ')'
-
-                line = triple(datplaceHolder, cidocCoords.prefix + 'P54_has_current_permanent_location',
-                              e53placeHolder) + closeLine
-                output.write(line)
-                line = triple(e53placeHolder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E53_Place') + closeLine
-                output.write(line)
-                line = triple(e53placeHolder, schemaCoords.prefix + 'label',
-                              '\"' + currentLocation + '\"') + closeLine
-                output.write(line)
-            # End AS
-
-            output.write('\n')
-        #
-        #
-        # Limit number of entries processed (if desired)
-        if (ii > max_entries):
-            break