CSV_to_RDF_Luoghi.py 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. # Utilities to read/write csv files
  2. import csv
  3. # Utilities to handle character encodings
  4. import unicodedata
  5. # Ordered Dicts
  6. from collections import OrderedDict
  7. import json
  8. # OPZIONAL IMPORTS
  9. # For timestamping/simple speed tests
  10. from datetime import datetime
  11. # Random number generator
  12. from random import *
  13. # System & command line utilities
  14. import sys
  15. # Json for the dictionary
  16. import json
  17. import_dir = '/Users/alessiaspadi/Documents/RESTORE/temp_MPP/tabelle/'
  18. export_dir = '/Users/alessiaspadi/Documents/RESTORE/temp_MPP/tabelle/Carica/'
  19. # Custom class to store URIs + related infos for the ontologies/repositories
  20. class RDFcoords:
  21. def __init__(self, uri, prefix, code = None):
  22. self.uri = uri
  23. self.prefix = prefix
  24. self.code = code
  25. # Repositories
  26. museoCoords = RDFcoords('<http://palazzopretorio.comune.prato.it/it/le-opere/alcuni-capolavori/>', 'mpp:')
  27. tgnCoords = RDFcoords('<http://vocab.getty.edu/tgn/>', 'tgn:')
  28. placeCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/>', 'pl:')
  29. repettiCoords = RDFcoords('<http://193.205.4.99/repetti/tester.php?idx=>', 'rpt:')
  30. cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')
  31. aatCoords = RDFcoords('<http://vocab.getty.edu/aat/>', 'aat:')
  32. nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')
  33. schemaCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')
  34. owlCoords = RDFcoords('<http://www.w3.org/2002/07/owl#>', 'owl:')
  35. # Basic functions for triples / shortened triples in TTL format
  36. def triple(subject, predicate, object1):
  37. line = subject + ' ' + predicate + ' ' + object1
  38. return line
  39. def doublet(predicate, object1):
  40. line = ' ' + predicate + ' ' + object1
  41. return line
  42. def singlet(object1):
  43. line = ' ' + object1
  44. return line
  45. # Line endings in TTL format
  46. continueLine1 = ' ;\n'
  47. continueLine2 = ' ,\n'
  48. closeLine = ' .\n'
  49. def writeTTLHeader(output):
  50. output.write('@prefix ' + museoCoords.prefix + ' ' + museoCoords.uri + closeLine)
  51. output.write('@prefix ' + tgnCoords.prefix + ' ' + tgnCoords.uri + closeLine)
  52. output.write('@prefix ' + placeCoords.prefix + ' ' + placeCoords.uri + closeLine)
  53. output.write('@prefix ' + repettiCoords.prefix + ' ' + repettiCoords.uri + closeLine)
  54. output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)
  55. output.write('@prefix ' + aatCoords.prefix + ' ' + aatCoords.uri + closeLine)
  56. output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)
  57. output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)
  58. output.write('@prefix ' + owlCoords.prefix + ' ' + owlCoords.uri + closeLine)
  59. output.write('\n')
  60. file = "Luoghi"
  61. max_entries = 1000000000
  62. with open(import_dir + file + '.csv', newline="") as csv_file, open(
  63. export_dir + file + '.ttl', 'w') as output:
  64. reader = csv.DictReader(csv_file)
  65. writeTTLHeader(output)
  66. first = True
  67. ii = 0
  68. for row in reader:
  69. # The index ii is used to process a limited number of entries for testing purposes
  70. ii = ii + 1
  71. pl = row['Places']
  72. place = pl.replace(' ', '')
  73. #placeHolders
  74. locplaceHolder = museoCoords.prefix + place
  75. line = triple(locplaceHolder,
  76. nsCoords.prefix + 'type',
  77. cidocCoords.prefix + 'E53_Place') + closeLine
  78. output.write(line)
  79. line = triple(locplaceHolder,
  80. schemaCoords.prefix + 'label',
  81. '\"' + row['Places'] + '\"') + closeLine
  82. output.write(line)
  83. if row['TGN'] != '':
  84. line = triple(locplaceHolder,
  85. owlCoords.prefix + 'sameAs',
  86. tgnCoords.prefix + row['TGN']) + closeLine
  87. output.write(line)
  88. if row['EAC-ID'] != '':
  89. line = triple(locplaceHolder,
  90. owlCoords.prefix + 'sameAs',
  91. placeCoords.prefix + row['EAC-ID']) + closeLine
  92. output.write(line)
  93. if row['Repetti (solo Toscana)'] != '':
  94. line = triple(locplaceHolder,
  95. owlCoords.prefix + 'sameAs',
  96. repettiCoords.prefix + row['Repetti (solo Toscana)']) + closeLine
  97. if row['TGN'] != '':
  98. line = triple(tgnCoords.prefix + row['TGN'],
  99. owlCoords.prefix + 'sameAs',
  100. locplaceHolder) + closeLine
  101. output.write(line)
  102. line = triple(tgnCoords.prefix + row['TGN'],
  103. nsCoords.prefix + 'type',
  104. cidocCoords.prefix + 'E53_Place') + closeLine
  105. output.write(line)
  106. line = triple(tgnCoords.prefix + row['TGN'],
  107. schemaCoords.prefix + 'label',
  108. '\"' + row['Places'] + '\"') + closeLine
  109. output.write(line)
  110. if row['EAC-ID'] != '':
  111. line = triple(tgnCoords.prefix + row['TGN'],
  112. owlCoords.prefix + 'sameAs',
  113. placeCoords.prefix + row['EAC-ID']) + closeLine
  114. output.write(line)
  115. if row['Repetti (solo Toscana)'] != '':
  116. line = triple(tgnCoords.prefix + row['TGN'],
  117. owlCoords.prefix + 'sameAs',
  118. repettiCoords.prefix + row['Repetti (solo Toscana)']) + closeLine
  119. output.write(line)
  120. if row['EAC-ID'] != '':
  121. line = triple(placeCoords.prefix + row['EAC-ID'],
  122. owlCoords.prefix + 'sameAs',
  123. locplaceHolder) + closeLine
  124. output.write(line)
  125. line = triple(placeCoords.prefix + row['EAC-ID'],
  126. nsCoords.prefix + 'type',
  127. cidocCoords.prefix + 'E53_Place') + closeLine
  128. output.write(line)
  129. if row['TGN'] != '':
  130. line = triple(placeCoords.prefix + row['EAC-ID'],
  131. owlCoords.prefix + 'sameAs',
  132. tgnCoords.prefix + row['TGN']) + closeLine
  133. output.write(line)
  134. if row['Repetti (solo Toscana)'] != '':
  135. line = triple(placeCoords.prefix + row['EAC-ID'],
  136. owlCoords.prefix + 'sameAs',
  137. repettiCoords.prefix + row['Repetti (solo Toscana)']) + closeLine
  138. output.write(line)
  139. if row['Repetti (solo Toscana)'] != '':
  140. line = triple(repettiCoords.prefix + row['Repetti (solo Toscana)'],
  141. owlCoords.prefix + 'sameAs',
  142. locplaceHolder) + closeLine
  143. output.write(line)
  144. line = triple(repettiCoords.prefix + row['Repetti (solo Toscana)'],
  145. nsCoords.prefix + 'type',
  146. cidocCoords.prefix + 'E53_Place') + closeLine
  147. output.write(line)
  148. line = triple(repettiCoords.prefix + row['Repetti (solo Toscana)'],
  149. schemaCoords.prefix + 'label',
  150. '\"' + row['Places'] + '\"') + closeLine
  151. output.write(line)
  152. if row['TGN'] != '':
  153. line = triple(repettiCoords.prefix + row['Repetti (solo Toscana)'],
  154. owlCoords.prefix + 'sameAs',
  155. tgnCoords.prefix + row['TGN']) + closeLine
  156. output.write(line)
  157. if row['EAC-ID'] != '':
  158. line = triple(repettiCoords.prefix + row['Repetti (solo Toscana)'],
  159. owlCoords.prefix + 'sameAs',
  160. placeCoords.prefix + row['EAC-ID']) + closeLine
  161. output.write(line)