CSV_to_RDF_Datini.py 43 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087
  1. # Utilities to read/write csv files
  2. import csv
  3. # Utilities to handle character encodings
  4. import unicodedata
  5. # Ordered Dicts
  6. from collections import OrderedDict
  7. from urllib.request import urlopen
  8. from bs4 import BeautifulSoup
  9. import json
  10. # OPZIONAL IMPORTS
  11. # For timestamping/simple speed tests
  12. from datetime import datetime
  13. # Random number generator
  14. from random import *
  15. # System & command line utilities
  16. import sys
  17. # Json for the dictionary
  18. import json
  19. import_dir = '/Users/alessiaspadi/Documents/RESTORE/temp_MPP/tabelle/Datini/mod/'
  20. export_dir = '/Users/alessiaspadi/Documents/RESTORE/temp_MPP/tabelle/Carica/'
  21. # Custom class to store URIs + related infos for the ontologies/repositories
  22. class RDFcoords:
  23. def __init__(self, uri, prefix, code=None):
  24. self.uri = uri
  25. self.prefix = prefix
  26. self.code = code
  27. # Repositories
  28. museoCoords = RDFcoords('<http://palazzopretorio.comune.prato.it/it/le-opere/alcuni-capolavori/>', 'mpp:')
  29. autCoords = RDFcoords('<http://palazzopretorio.comune.prato.it/it/opere/autori/>', 'aut:')
  30. cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')
  31. aatCoords = RDFcoords('<http://vocab.getty.edu/aat/>', 'aat:')
  32. nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')
  33. schemaCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')
  34. xsdCoords = RDFcoords('<http://www.w3.org/2001/XMLSchema#>', 'xsd:')
  35. iconCoords = RDFcoords('<http://iconclass.org/>', 'ico:')
  36. # Basic functions for triples / shortened triples in TTL format
  37. def triple(subject, predicate, object1):
  38. line = subject + ' ' + predicate + ' ' + object1
  39. return line
  40. def doublet(predicate, object1):
  41. line = ' ' + predicate + ' ' + object1
  42. return line
  43. def singlet(object1):
  44. line = ' ' + object1
  45. return line
  46. # Line endings in TTL format
  47. continueLine1 = ' ;\n'
  48. continueLine2 = ' ,\n'
  49. closeLine = ' .\n'
  50. def writeTTLHeader(output):
  51. output.write('@prefix ' + museoCoords.prefix + ' ' + museoCoords.uri + closeLine)
  52. output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)
  53. output.write('@prefix ' + aatCoords.prefix + ' ' + aatCoords.uri + closeLine)
  54. output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)
  55. output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)
  56. output.write('@prefix ' + autCoords.prefix + ' ' + autCoords.uri + closeLine)
  57. output.write('@prefix ' + xsdCoords.prefix + ' ' + xsdCoords.uri + closeLine)
  58. output.write('@prefix ' + iconCoords.prefix + ' ' + iconCoords.uri + closeLine)
  59. output.write('\n')
  60. filePrefix = 'SR20OA_'
  61. fileType = 'Datini'
  62. max_entries = 1000000000
  63. def get_aut_url(code):
  64. aut_file = open(import_dir + 'AR20AUT_' + fileType + '.csv', newline="")
  65. reader = csv.DictReader(aut_file)
  66. for row in reader:
  67. role = ''
  68. if row['AUTQ'] != '':
  69. role = row['AUTQ']
  70. else:
  71. role = ''
  72. if row['AUTH'] == code:
  73. return [row['URL'], role]
  74. def get_role(role):
  75. role_file = open('/Users/alessiaspadi/Documents/RESTORE/temp_MPP/tabelle/AAT_RUOLI.csv', newline="")
  76. reader = csv.DictReader(role_file)
  77. for row in reader:
  78. if row['Label'] == role:
  79. return row['AAT']
  80. def get_elem(mtc):
  81. mtc_file = open('/Users/alessiaspadi/Documents/RESTORE/temp_MPP/tabelle/AAT_MTC.csv', newline="")
  82. reader = csv.DictReader(mtc_file)
  83. for row in reader:
  84. if row['MTC'] == mtc:
  85. return [row['AAT'], row['Type']]
  86. with open(import_dir + filePrefix + fileType + '.csv', newline="") as csv_file, open(
  87. export_dir + filePrefix + fileType + '.ttl', 'w') as output:
  88. reader = csv.DictReader(csv_file)
  89. writeTTLHeader(output)
  90. first = True
  91. ii = 0
  92. for row in reader:
  93. # The index ii is used to process a limited number of entries for testing purposes
  94. ii = ii + 1
  95. sb = ''
  96. subj = ''
  97. pp = row['OGTD'] + ' (' + row['ACC'] + ') '
  98. if row['SGTI'] != '':
  99. sb = pp + row['SGTI']
  100. if row['LDCN'] != '':
  101. subj = sb + ' in ' + row['LDCN']
  102. else:
  103. subj = sb
  104. # Triplify the 'codice' -- should exist for every entry
  105. codice = ''
  106. if (row['NCTR'] != '' and row['NCTN'] != ''):
  107. codice = row['NCTR'] + row['NCTN']
  108. codiceP = ''
  109. if (row['AUTH'] != ''):
  110. codiceP = row['AUTH']
  111. place = ''
  112. if (row['PRVC'] != ''):
  113. place = row['PRVC']
  114. columnName = list(row)
  115. url = row['URL']
  116. # placeHolders
  117. datplaceHolder = museoCoords.prefix + url
  118. e1placeHolder = museoCoords.prefix + url + '_E1'
  119. e3placeHolder = museoCoords.prefix + url + 'E3'
  120. e10placeHolder = museoCoords.prefix + url + '_E10'
  121. e12placeHolder = museoCoords.prefix + url + '_E12'
  122. e13placeHolder = museoCoords.prefix + url + '_E13'
  123. e21placeHolder = museoCoords.prefix + url + '_InE21'
  124. e25placeHolder = museoCoords.prefix + url + '_E25'
  125. e34placeHolder = museoCoords.prefix + url + '_E34'
  126. e35placeHolder1 = museoCoords.prefix + url + '_E35'
  127. e42placeHolder = museoCoords.prefix + url + '_E42'
  128. e42CplaceHolder = museoCoords.prefix + url + '_E42_MPP'
  129. e65placeHolder = museoCoords.prefix + url + '_InE65'
  130. e73placeHolder = museoCoords.prefix + url + '_E73'
  131. e74placeHolder = museoCoords.prefix + url + '_E74'
  132. if (codice != ''):
  133. line = triple(datplaceHolder, cidocCoords.prefix + 'P1_is_identified_by', e42placeHolder) + closeLine
  134. output.write(line)
  135. line = triple(e42placeHolder, nsCoords.prefix + 'type',
  136. cidocCoords.prefix + 'E42_Identifier') + closeLine
  137. output.write(line)
  138. line = triple(e42placeHolder,
  139. schemaCoords.prefix + 'label',
  140. '\"' + codice + '\"') + closeLine
  141. output.write(line)
  142. ###
  143. line = triple(e42placeHolder,
  144. cidocCoords.prefix + 'P2_has_type',
  145. '\"Codice univoco del bene (NCT)\"') + closeLine
  146. output.write(line)
  147. # Write E22 Man Made Object & E73 Information Object -- should exist for every entry?
  148. line = triple(datplaceHolder, nsCoords.prefix + 'type',
  149. cidocCoords.prefix + 'E22_Man-Made_Object') + closeLine
  150. output.write(line)
  151. # Added by AS
  152. line = triple(datplaceHolder, schemaCoords.prefix + 'label', '\"' + subj + '\"') + closeLine
  153. output.write(line)
  154. # End AS
  155. line = triple(datplaceHolder, cidocCoords.prefix + 'P128_carries', e73placeHolder) + closeLine
  156. output.write(line)
  157. line = triple(e73placeHolder, nsCoords.prefix + 'type',
  158. cidocCoords.prefix + 'E73_Information_Object') + closeLine
  159. output.write(line)
  160. # AS
  161. ss = ''
  162. if row['SGTI'] != '':
  163. ss = row['SGTI']
  164. else:
  165. ss = 'senza titolo'
  166. line = triple(e73placeHolder, schemaCoords.prefix + 'label',
  167. '\"Opera d\'arte raffigurante ' + ss + '\"') + closeLine
  168. output.write(line)
  169. # E73 - P2 - E55
  170. tt = ''
  171. typeLabel = ''
  172. if row['OGTD'] == 'dipinto':
  173. tt = aatCoords.prefix + "300033618"
  174. elif row['OGTD'] == 'rilievo':
  175. tt = aatCoords.prefix + "300047230"
  176. elif row['OGTD'] == 'polittico':
  177. tt = aatCoords.prefix + "300178235"
  178. elif row['OGTD'] == 'predella':
  179. tt = aatCoords.prefix + "300003745"
  180. line = triple(e73placeHolder,
  181. cidocCoords.prefix + 'P2_has_type',
  182. tt) + closeLine
  183. output.write(line)
  184. line = triple(tt, schemaCoords.prefix + 'label',
  185. '\"' + row['OGTD'] + '\"') + closeLine
  186. output.write(line)
  187. # E73 - P1 - E35
  188. if row['SGTT'] != '':
  189. line = triple(e73placeHolder, cidocCoords.prefix + 'P1_is_identified_by', e35placeHolder1) + closeLine
  190. output.write(line)
  191. line = triple(e35placeHolder1, nsCoords.prefix + 'type', cidocCoords.prefix + 'E35_Title') + closeLine
  192. output.write(line)
  193. line = triple(e35placeHolder1, schemaCoords.prefix + 'label', '\"' + row['SGTT'] + '\"') + closeLine
  194. output.write(line)
  195. # E22 - P62 - E1
  196. if row['SGTI'] != '':
  197. line = triple(datplaceHolder,
  198. cidocCoords.prefix + 'P62_depicts',
  199. e1placeHolder) + closeLine
  200. output.write(line)
  201. line = triple(e1placeHolder,
  202. nsCoords.prefix + 'type',
  203. cidocCoords.prefix + 'E1_CRM_Entity') + closeLine
  204. output.write(line)
  205. line = triple(e1placeHolder,
  206. schemaCoords.prefix + 'label', '\"' +
  207. row['SGTI'] + '\"') + closeLine
  208. output.write(line)
  209. line = triple(e1placeHolder,
  210. cidocCoords.prefix + 'P2_has_type',
  211. '\"Identificazione Iconografica\"') + closeLine
  212. output.write(line)
  213. # Attention: these triples are identified only for C100005 Museo di Palazzo Pretorio
  214. if row['ESC'] == 'C100005':
  215. line = triple(datplaceHolder,
  216. cidocCoords.prefix + 'P52_has_current_owner',
  217. '<http://palazzopretorio.comune.prato.it/it/>') + closeLine
  218. output.write(line)
  219. line = triple('<http://palazzopretorio.comune.prato.it/it/>',
  220. nsCoords.prefix + 'type',
  221. cidocCoords.prefix + 'E74_Group') + closeLine
  222. output.write(line)
  223. line = triple('<http://palazzopretorio.comune.prato.it/it/>',
  224. schemaCoords.prefix + 'label',
  225. '\"Museo di Palazzo Pretorio\"') + closeLine
  226. output.write(line)
  227. currentLocation = ''
  228. # E22 - P54 - E53
  229. if row['LDCN'] != '':
  230. if row['LDCS'] != '':
  231. currentLocation = row['LDCS']
  232. else:
  233. currentLocation = currentLocation
  234. if row['LDCM'] != '':
  235. currentLocation = currentLocation + ', ' + row['LDCM']
  236. else:
  237. currentLocation = currentLocation
  238. if row['LDCN'] != '':
  239. currentLocation = currentLocation + ', ' + row['LDCN']
  240. else:
  241. currentLocation = currentLocation
  242. currentLocation = currentLocation + ', ' + row['PVCC'] + ' (' + row['PVCP'] + ')'
  243. line = triple(datplaceHolder, cidocCoords.prefix + 'P54_has_current_permanent_location',
  244. '\"' + currentLocation + '\"') + closeLine
  245. output.write(line)
  246. e12FplaceHolder = ''
  247. if row['DTSI'] != row['DTSF']:
  248. e12FplaceHolder = museoCoords.prefix + url + '_E12F'
  249. # Write E12 Production -- should exist for every entry?
  250. # E12 P108 E22
  251. line = triple(e12placeHolder, cidocCoords.prefix + 'P108_has_produced', datplaceHolder) + closeLine
  252. output.write(line)
  253. line = triple(e12placeHolder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E12_Production') + closeLine
  254. output.write(line)
  255. # E73 P108i E12
  256. line = triple(e73placeHolder, cidocCoords.prefix + 'P108i_was_produced_by', e12placeHolder) + closeLine
  257. output.write(line)
  258. if e12FplaceHolder != '':
  259. line = triple(e12FplaceHolder, cidocCoords.prefix + 'P108_has_produced', datplaceHolder) + closeLine
  260. output.write(line)
  261. line = triple(e12FplaceHolder, nsCoords.prefix + 'type',
  262. cidocCoords.prefix + 'E12_Production') + closeLine
  263. output.write(line)
  264. line = triple(e12FplaceHolder, schemaCoords.prefix + 'label',
  265. '\"Fine produzione di ' + row['SGTI'] + '\"') + closeLine
  266. output.write(line)
  267. # E73 P108i E12
  268. line = triple(e73placeHolder, cidocCoords.prefix + 'P108i_was_produced_by', e12FplaceHolder) + closeLine
  269. output.write(line)
  270. # E12 P140i E13
  271. line = triple(e12FplaceHolder,
  272. cidocCoords.prefix + 'P140i_was_attributed_by',
  273. e13placeHolder) + closeLine
  274. output.write(line)
  275. # E12 P2
  276. line = triple(e12FplaceHolder,
  277. cidocCoords.prefix + 'P2_has_type',
  278. '\"Fine\"^^xsd:string') + closeLine
  279. output.write(line)
  280. line = triple(e12placeHolder,
  281. cidocCoords.prefix + 'P2_has_type',
  282. '\"Inizio\"^^xsd:string') + closeLine
  283. output.write(line)
  284. line = triple(e12placeHolder, schemaCoords.prefix + 'label',
  285. '\"Inizio produzione di ' + row['SGTI'] + '\"') + closeLine
  286. output.write(line)
  287. else:
  288. line = triple(e12placeHolder, schemaCoords.prefix + 'label',
  289. '\"Produzione di ' + row['SGTI'] + '\"') + closeLine
  290. output.write(line)
  291. tcl = []
  292. for name in columnName:
  293. if 'TCL' in name:
  294. tcl.append(name)
  295. # E12 - P7 - E53
  296. for el in tcl:
  297. i = 0
  298. if row[el] == 'luogo di produzione':
  299. pl = ''
  300. if i == 0:
  301. pl = row['PRVC']
  302. else:
  303. pl = row['PRVC' + i]
  304. line = triple(e12placeHolder,
  305. cidocCoords.prefix + 'P7_took_place_at',
  306. museoCoords.prefix + pl) + closeLine
  307. output.write(line)
  308. if e12FplaceHolder != '':
  309. line = triple(e12FplaceHolder,
  310. cidocCoords.prefix + 'P7_took_place_at',
  311. museoCoords.prefix + pl) + closeLine
  312. output.write(line)
  313. i = i + 1
  314. # E12 - PC14 - E21
  315. if row['AUTH'] != '':
  316. aut = get_aut_url(row['AUTH'])
  317. aut_url = aut[0]
  318. aut_role = aut[1]
  319. ll = row['AUTN'] + '_' + aut_role
  320. lab = ll.replace(' ', '')
  321. label = lab.replace(',', '')
  322. AuthorPlaceholder = autCoords.prefix + aut_url
  323. line = triple(museoCoords.prefix + '_' + label,
  324. cidocCoords.prefix + 'P01_has_domain',
  325. e12placeHolder) + closeLine
  326. output.write(line)
  327. if e12FplaceHolder != '':
  328. line = triple(museoCoords.prefix + '_' + label,
  329. cidocCoords.prefix + 'P01_has_domain',
  330. e12FplaceHolder) + closeLine
  331. output.write(line)
  332. if 'AUTH1' in columnName:
  333. if row['AUTH1'] != '':
  334. aut = get_aut_url(row['AUTH1'])
  335. aut_url = aut[0]
  336. aut_role = aut[1]
  337. ll = row['AUTN1'] + '_' + aut_role
  338. lab = ll.replace(' ', '')
  339. label = lab.replace(',', '')
  340. AuthorPlaceholder = autCoords.prefix + aut_url
  341. line = triple(museoCoords.prefix + '_' + label,
  342. cidocCoords.prefix + 'P01_has_domain',
  343. e12placeHolder) + closeLine
  344. output.write(line)
  345. if e12FplaceHolder != '':
  346. line = triple(museoCoords.prefix + '_' + label,
  347. cidocCoords.prefix + 'P01_has_domain',
  348. e12FplaceHolder) + closeLine
  349. output.write(line)
  350. # E12 - PC14 - E21
  351. if 'CMMN' in columnName:
  352. if row['CMMN'] != '':
  353. cc = row['CMMN']
  354. cm = cc.replace(' ', '')
  355. cmmn = cm.replace(',', '')
  356. cmmPlaceholder = museoCoords.prefix + '_' + cmmn
  357. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  358. cidocCoords.prefix + 'P01_has_domain',
  359. e12placeHolder) + closeLine
  360. output.write(line)
  361. if e12FplaceHolder != '':
  362. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  363. cidocCoords.prefix + 'P01_has_domain',
  364. e12FplaceHolder) + closeLine
  365. output.write(line)
  366. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  367. nsCoords.prefix + 'type',
  368. cidocCoords.prefix + 'PC14_carried_out_by') + closeLine
  369. output.write(line)
  370. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  371. schemaCoords.prefix + 'label',
  372. '\"' + row['CMMN'] + ' nel ruolo di committente\"') + closeLine
  373. output.write(line)
  374. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  375. cidocCoords.prefix + 'P02_has_range',
  376. cmmPlaceholder) + closeLine
  377. output.write(line)
  378. line = triple(cmmPlaceholder,
  379. nsCoords.prefix + 'type',
  380. cidocCoords.prefix + 'E39_Actor') + closeLine
  381. output.write(line)
  382. line = triple(cmmPlaceholder,
  383. schemaCoords.prefix + 'label',
  384. '\"' + row['CMMN'] + '\"') + closeLine
  385. output.write(line)
  386. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  387. cidocCoords.prefix + 'P14.1_in_the_role_of',
  388. museoCoords.prefix + '_client') + closeLine
  389. output.write(line)
  390. line = triple(museoCoords.prefix + '_client',
  391. nsCoords.prefix + 'type',
  392. cidocCoords.prefix + 'E55_Type') + closeLine
  393. output.write(line)
  394. line = triple(museoCoords.prefix + '_client',
  395. schemaCoords.prefix + 'label',
  396. '\"Committente\"') + closeLine
  397. output.write(line)
  398. # E12 - P4 - E52
  399. if row['DTSI'] != '':
  400. line = triple(e12placeHolder,
  401. cidocCoords.prefix + 'P4_has_time-span',
  402. museoCoords.prefix + row['DTSI']) + closeLine
  403. output.write(line)
  404. line = triple(museoCoords.prefix + row['DTSI'],
  405. nsCoords.prefix + 'type',
  406. cidocCoords.prefix + 'E52_Time-Span') + closeLine
  407. output.write(line)
  408. line = triple(museoCoords.prefix + row['DTSI'],
  409. schemaCoords.prefix + 'label',
  410. '\"' + row['DTSI'] + '\"') + closeLine
  411. output.write(line)
  412. if e12FplaceHolder != '':
  413. line = triple(e12FplaceHolder,
  414. cidocCoords.prefix + 'P4_has_time-span',
  415. museoCoords.prefix + row['DTSF']) + closeLine
  416. output.write(line)
  417. line = triple(museoCoords.prefix + row['DTSF'],
  418. nsCoords.prefix + 'type',
  419. cidocCoords.prefix + 'E52_Time-Span') + closeLine
  420. output.write(line)
  421. line = triple(museoCoords.prefix + row['DTSF'],
  422. schemaCoords.prefix + 'label',
  423. '\"' + row['DTSF'] + '\"') + closeLine
  424. output.write(line)
  425. tcl = []
  426. for name in columnName:
  427. if 'TCL' in name:
  428. tcl.append(name)
  429. j = 0
  430. for el in tcl:
  431. if row[el] != '':
  432. j = j + 1
  433. last = str(j - 1)
  434. n = len(tcl) - 1
  435. for i in range(n):
  436. k = str(i + 1)
  437. if i + 1 == 1:
  438. w = ''
  439. else:
  440. w = i
  441. f = str(w)
  442. if row['TCL' + k] != '':
  443. pastActor = ''
  444. newActor = ''
  445. pl = ''
  446. if row['PRCD' + k] != '':
  447. newActor = ' a ' + row['PRCD' + k]
  448. if row['PRCD' + f] != '':
  449. pastActor = ' da ' + row['PRCD' + f]
  450. pl = row['PRCD' + f].replace(' ', '')
  451. newe10placeHolder = museoCoords.prefix + url + "_E10_" + k
  452. line = triple(newe10placeHolder,
  453. cidocCoords.prefix + 'P30_transferred_custody_of',
  454. datplaceHolder) + closeLine
  455. output.write(line)
  456. line = triple(newe10placeHolder,
  457. nsCoords.prefix + 'type',
  458. cidocCoords.prefix + 'E10_Transfer_of_Custody') + closeLine
  459. output.write(line)
  460. line = triple(newe10placeHolder,
  461. schemaCoords.prefix + 'label',
  462. '\"Passaggio di ' + row['SGTI'] + pastActor +
  463. newActor + '\"') + closeLine
  464. output.write(line)
  465. if row['PRDI' + f] != '':
  466. timespan = row['PRDI' + f]
  467. tt = timespan.replace(' ', '')
  468. tp = tt.replace('.', '')
  469. ts = tp.replace('/', '')
  470. timespanPlaceholder = museoCoords.prefix + '_' + ts
  471. # E10 P4 E52
  472. line = triple(newe10placeHolder,
  473. cidocCoords.prefix + 'P4_has_time-span',
  474. timespanPlaceholder) + closeLine
  475. output.write(line)
  476. line = triple(timespanPlaceholder,
  477. nsCoords.prefix + 'type',
  478. cidocCoords.prefix + 'E52_Time-Span') + closeLine
  479. output.write(line)
  480. line = triple(timespanPlaceholder,
  481. schemaCoords.prefix + 'label',
  482. '\"' + timespan + '\"') + closeLine
  483. output.write(line)
  484. pastActorPlaceholder = museoCoords.prefix + '_' + pl
  485. newLoc = row['PRCD' + k].replace(' ', '')
  486. newActorPlaceholder = museoCoords.prefix + '_' + newLoc
  487. # E10 P26 E74 (moved to)
  488. if newActorPlaceholder != '':
  489. line = triple(newe10placeHolder,
  490. cidocCoords.prefix + 'P29_custody_received_by',
  491. newActorPlaceholder) + closeLine
  492. output.write(line)
  493. # E10 P27 E74
  494. pastActorLabel = row['PRCD' + f]
  495. line = triple(newe10placeHolder,
  496. cidocCoords.prefix + 'P28_custody_surrendered_by',
  497. pastActorPlaceholder) + closeLine
  498. output.write(line)
  499. line = triple(pastActorPlaceholder,
  500. nsCoords.prefix + 'type',
  501. cidocCoords.prefix + 'E39_Actor') + closeLine
  502. output.write(line)
  503. line = triple(pastActorPlaceholder,
  504. schemaCoords.prefix + 'label',
  505. '\"' + pastActorLabel + '\"') + closeLine
  506. output.write(line)
  507. line = triple(datplaceHolder,
  508. cidocCoords.prefix + 'P49_has_former_or_current_keeper',
  509. pastActorPlaceholder) + closeLine
  510. output.write(line)
  511. # E74 P74 E53
  512. pastResidenceLabel = row['PRVC' + f]
  513. pastResidencePlaceHolder = museoCoords.prefix + row['PRVC' + f]
  514. line = triple(pastActorPlaceholder,
  515. cidocCoords.prefix + 'P74_has_current_or_former_residence',
  516. pastResidencePlaceHolder) + closeLine
  517. output.write(line)
  518. ####
  519. pastActor = ''
  520. newActor = ''
  521. pl = ''
  522. if row['LDCN'] != '':
  523. newActor = ' a ' + row['LDCN']
  524. if row['PRCD' + last] != '':
  525. pastActor = ' da ' + row['PRCD' + last]
  526. pl = row['PRCD' + last].replace(' ', '')
  527. line = triple(e10placeHolder,
  528. cidocCoords.prefix + 'P30_transferred_custody_of',
  529. datplaceHolder) + closeLine
  530. output.write(line)
  531. line = triple(e10placeHolder,
  532. nsCoords.prefix + 'type',
  533. cidocCoords.prefix + 'E10_Transfer_of_Custody') + closeLine
  534. output.write(line)
  535. line = triple(e10placeHolder,
  536. schemaCoords.prefix + 'label',
  537. '\"Passaggio di ' + row['SGTI'] + pastActor +
  538. newActor + '\"') + closeLine
  539. output.write(line)
  540. if row['PRDU' + last] != '':
  541. timespan = row['PRDU' + last]
  542. tt = timespan.replace(' ', '')
  543. ts = tt.replace('/', '')
  544. timespanPlaceholder = museoCoords.prefix + '_' + ts
  545. # E10 P4 E52
  546. line = triple(e10placeHolder,
  547. cidocCoords.prefix + 'P4_has_time-span',
  548. timespanPlaceholder) + closeLine
  549. output.write(line)
  550. line = triple(timespanPlaceholder,
  551. nsCoords.prefix + 'type',
  552. cidocCoords.prefix + 'E52_Time-Span') + closeLine
  553. output.write(line)
  554. line = triple(timespanPlaceholder,
  555. schemaCoords.prefix + 'label',
  556. '\"' + timespan + '\"') + closeLine
  557. output.write(line)
  558. pastActorPlaceholder = museoCoords.prefix + '_' + pl
  559. newLocPlaceholder = e74placeHolder
  560. # E10 P26 E74 (moved to)
  561. if newLocPlaceholder != '':
  562. line = triple(e10placeHolder,
  563. cidocCoords.prefix + 'P29_custody_received_by',
  564. newLocPlaceholder) + closeLine
  565. output.write(line)
  566. # E10 P27 E74
  567. pastActorLabel = row['PRCD' + last]
  568. line = triple(e10placeHolder,
  569. cidocCoords.prefix + 'P28_custody_surrendered_by',
  570. pastActorPlaceholder) + closeLine
  571. output.write(line)
  572. line = triple(pastActorPlaceholder,
  573. nsCoords.prefix + 'type',
  574. cidocCoords.prefix + 'E39_Actor') + closeLine
  575. output.write(line)
  576. line = triple(pastActorPlaceholder,
  577. schemaCoords.prefix + 'label',
  578. '\"' + pastActorLabel + '\"') + closeLine
  579. output.write(line)
  580. line = triple(datplaceHolder,
  581. cidocCoords.prefix + 'P49_has_former_or_current_keeper',
  582. pastActorPlaceholder) + closeLine
  583. output.write(line)
  584. # E74 P74 E53
  585. pastResidenceLabel = row['PRVC' + last]
  586. pastResidencePlaceHolder = museoCoords.prefix + row['PRVC' + last]
  587. if row['PRVP' + last] != '':
  588. pastResidenceLabel = pastResidenceLabel + ' (' + row['PRVP' + last] + ')'
  589. if row['PRVR' + last] != '':
  590. pastResidenceLabel = pastResidenceLabel + ', ' + row['PRVR' + last]
  591. if row['PRVS' + last] != '':
  592. pastResidenceLabel = pastResidenceLabel + ', ' + row['PRVS' + last]
  593. line = triple(pastActorPlaceholder,
  594. cidocCoords.prefix + 'P74_has_current_or_former_residence',
  595. pastResidencePlaceHolder) + closeLine
  596. output.write(line)
  597. line = triple(pastResidencePlaceHolder,
  598. nsCoords.prefix + 'type',
  599. cidocCoords.prefix + 'E53_Place') + closeLine
  600. output.write(line)
  601. # E22 P44 E3
  602. if row['STCC'] != '':
  603. line = triple(datplaceHolder,
  604. cidocCoords.prefix + 'P44_has_condition',
  605. e3placeHolder) + closeLine
  606. output.write(line)
  607. line = triple(e3placeHolder,
  608. nsCoords.prefix + 'type',
  609. cidocCoords.prefix + 'E3_Condition_State') + closeLine
  610. output.write(line)
  611. line = triple(e3placeHolder,
  612. schemaCoords.prefix + 'label',
  613. '\"Condizione di: ' + row['SGTI'] + '\"') + closeLine
  614. output.write(line)
  615. line = triple(e3placeHolder,
  616. cidocCoords.prefix + 'P2_has_type',
  617. '\"' + row['STCC'] + '\"') + closeLine
  618. output.write(line)
  619. # E22 P65 E34
  620. if (row['ISRI'] != ''):
  621. line = triple(datplaceHolder,
  622. cidocCoords.prefix + 'P56_bears_feature',
  623. e25placeHolder) + closeLine
  624. output.write(line)
  625. line = triple(e25placeHolder,
  626. nsCoords.prefix + 'type',
  627. cidocCoords.prefix + 'E25_Man-Made_Feature') + closeLine
  628. output.write(line)
  629. line = triple(e25placeHolder,
  630. schemaCoords.prefix + 'label',
  631. '\"Iscrizione su ' + subj + '\"') + closeLine
  632. output.write(line)
  633. line = triple(e25placeHolder,
  634. cidocCoords.prefix + 'P128_carries',
  635. e34placeHolder) + closeLine
  636. output.write(line)
  637. line = triple(e34placeHolder,
  638. nsCoords.prefix + 'type',
  639. cidocCoords.prefix + 'E34_Inscription') + closeLine
  640. output.write(line)
  641. line = triple(e34placeHolder,
  642. schemaCoords.prefix + 'label',
  643. '\"Iscrizione: ' + row['ISRI'] + '\"') + closeLine
  644. output.write(line)
  645. pl = row['ISRI'].replace(' ', '-')
  646. pla = pl.replace('.', '')
  647. line = triple(e34placeHolder,
  648. cidocCoords.prefix + 'P3_has_note',
  649. '\"' + row['ISRI'] + '\"') + closeLine
  650. output.write(line)
  651. # E34 P2 E55
  652. if (row['ISRT'] != ''):
  653. rr = row['ISRT'].replace(' ', '')
  654. line = triple(e34placeHolder,
  655. cidocCoords.prefix + 'P2_has_type',
  656. '\"' + row['ISRT'] + '\"') + closeLine
  657. output.write(line)
  658. # E34 P72 E56
  659. if (row['ISRL'] != ''):
  660. line = triple(e34placeHolder,
  661. cidocCoords.prefix + 'P72_has_language',
  662. museoCoords.prefix + '_' + row['ISRL']) + closeLine
  663. output.write(line)
  664. line = triple(museoCoords.prefix + '_' + row['ISRL'],
  665. nsCoords.prefix + 'type',
  666. cidocCoords.prefix + 'E56_Language') + closeLine
  667. output.write(line)
  668. line = triple(museoCoords.prefix + '_' + row['ISRL'],
  669. schemaCoords.prefix + 'label',
  670. '\"' + row['ISRL'] + '\"') + closeLine
  671. output.write(line)
  672. if row['ISRS'] != '':
  673. line = triple(e34placeHolder,
  674. cidocCoords.prefix + 'P92i_was_brought_into_existence_by',
  675. e65placeHolder) + closeLine
  676. output.write(line)
  677. line = triple(e65placeHolder,
  678. nsCoords.prefix + 'type',
  679. cidocCoords.prefix + 'E65_Creation') + closeLine
  680. output.write(line)
  681. line = triple(e65placeHolder,
  682. schemaCoords.prefix + 'label',
  683. '\"Creazione dell\'Iscrizione ' + row['ISRI'] + '\"') + closeLine
  684. output.write(line)
  685. if row['ISRS']:
  686. ss = row['ISRS'].replace(' ', '')
  687. tecPlaceholder = museoCoords.prefix + url + '_' + ss
  688. line = triple(e65placeHolder,
  689. cidocCoords.prefix + 'P32_used_general_technique',
  690. tecPlaceholder) + closeLine
  691. output.write(line)
  692. line = triple(tecPlaceholder,
  693. nsCoords.prefix + 'type',
  694. cidocCoords.prefix + 'E55_Type') + closeLine
  695. output.write(line)
  696. line = triple(tecPlaceholder,
  697. schemaCoords.prefix + 'label',
  698. '\"' + row['ISRS'] + '\"') + closeLine
  699. output.write(line)
  700. if row['ISRP'] != '':
  701. line = triple(e25placeHolder,
  702. cidocCoords.prefix + 'P3_has_note',
  703. '\"' + row['ISRP'] + '\"^^xsd:string') + closeLine
  704. output.write(line)
  705. unit = ''
  706. if (row['MISU'] != ''):
  707. unit = row['MISU']
  708. valueA = ''
  709. valueL = ''
  710. if (row['MISA'] != ''):
  711. value = row['MISA']
  712. valueA = value.replace(',', 'v')
  713. if (row['MISL'] != ''):
  714. value = row['MISL']
  715. valueL = value.replace(',', 'v')
  716. # Altezza
  717. # E22 P43 E54
  718. if (row['MISA'] != ''):
  719. line = triple(datplaceHolder,
  720. cidocCoords.prefix + 'P43_has_dimension',
  721. museoCoords.prefix + url + '_Altezza') + closeLine
  722. output.write(line)
  723. line = triple(museoCoords.prefix + url + '_Altezza',
  724. nsCoords.prefix + 'type',
  725. cidocCoords.prefix + 'E54_Dimension') + closeLine
  726. output.write(line)
  727. line = triple(museoCoords.prefix + url + '_Altezza',
  728. schemaCoords.prefix + 'label',
  729. '\"Altezza: ' + row['MISA'] + row['MISU'] + '\"') + closeLine
  730. output.write(line)
  731. # E54 P90 E60
  732. line = triple(museoCoords.prefix + url + '_Altezza',
  733. cidocCoords.prefix + 'P90_has_value',
  734. '\"' + row['MISA'] + '\"^^xsd:integer') + closeLine
  735. output.write(line)
  736. # E54 P2 E55
  737. line = triple(museoCoords.prefix + url + '_Altezza',
  738. cidocCoords.prefix + 'P2_has_type',
  739. aatCoords.prefix + '300055644') + closeLine
  740. output.write(line)
  741. line = triple(aatCoords.prefix + '300055644',
  742. schemaCoords.prefix + 'label',
  743. '\"altezza\"') + closeLine
  744. output.write(line)
  745. # E54 P91 E58
  746. if (row['MISU'] != ''):
  747. line = triple(museoCoords.prefix + url + '_Altezza',
  748. cidocCoords.prefix + 'P91_has_unit',
  749. aatCoords.prefix + '300379098') + closeLine
  750. output.write(line)
  751. line = triple(aatCoords.prefix + '300379098',
  752. nsCoords.prefix + 'type',
  753. cidocCoords.prefix + 'E58_Measurement_Unit') + closeLine
  754. output.write(line)
  755. line = triple(aatCoords.prefix + '300379098',
  756. schemaCoords.prefix + 'label',
  757. '\"' + row['MISU'] + '\"') + closeLine
  758. output.write(line)
  759. # Larghezza
  760. # E22 P43 E54
  761. if (row['MISL'] != ''):
  762. line = triple(datplaceHolder,
  763. cidocCoords.prefix + 'P43_has_dimension',
  764. museoCoords.prefix + url + '_Larghezza') + closeLine
  765. output.write(line)
  766. line = triple(museoCoords.prefix + url + '_Larghezza',
  767. nsCoords.prefix + 'type',
  768. cidocCoords.prefix + 'E54_Dimension') + closeLine
  769. output.write(line)
  770. line = triple(museoCoords.prefix + url + '_Larghezza',
  771. schemaCoords.prefix + 'label',
  772. '\"Larghezza: ' + row['MISL'] + row['MISU'] + '\"') + closeLine
  773. output.write(line)
  774. # E54 P90 E60
  775. line = triple(museoCoords.prefix + url + '_Larghezza',
  776. cidocCoords.prefix + 'P90_has_value',
  777. '\"' + row['MISL'] + '\"^^xsd:integer') + closeLine
  778. output.write(line)
  779. # E54 P2 E55
  780. line = triple(museoCoords.prefix + url + '_Larghezza',
  781. cidocCoords.prefix + 'P2_has_type',
  782. aatCoords.prefix + '300055647') + closeLine
  783. output.write(line)
  784. line = triple(aatCoords.prefix + '300055647',
  785. schemaCoords.prefix + 'label',
  786. '\"larghezza\"') + closeLine
  787. output.write(line)
  788. # E54 P91 E58
  789. if (row['MISU'] != ''):
  790. line = triple(museoCoords.prefix + url + '_Larghezza',
  791. cidocCoords.prefix + 'P91_has_unit',
  792. aatCoords.prefix + '300379098') + closeLine
  793. output.write(line)
  794. line = triple(aatCoords.prefix + '300379098',
  795. nsCoords.prefix + 'type',
  796. cidocCoords.prefix + 'E58_Measurement_Unit') + closeLine
  797. output.write(line)
  798. line = triple(aatCoords.prefix + '300379098',
  799. schemaCoords.prefix + 'label',
  800. '\"' + row['MISU'] + '\"') + closeLine
  801. output.write(line)
  802. if row['MTC'] != '':
  803. mtcs = []
  804. if '/' in row['MTC']:
  805. mtcs = row['MTC'].split('/')
  806. else:
  807. mtcs.append(row['MTC'])
  808. for tc in mtcs:
  809. mtc = tc.lstrip()
  810. el = get_elem(mtc)
  811. if el[1] == 'MTC/M':
  812. line = triple(datplaceHolder,
  813. cidocCoords.prefix + 'P45_consists_of',
  814. aatCoords.prefix + el[0]) + closeLine
  815. output.write(line)
  816. line = triple(aatCoords.prefix + el[0],
  817. nsCoords.prefix + 'type',
  818. cidocCoords.prefix + 'E57_Material') + closeLine
  819. output.write(line)
  820. line = triple(aatCoords.prefix + el[0],
  821. schemaCoords.prefix + 'label',
  822. '\"' + mtc + '\"') + closeLine
  823. output.write(line)
  824. else: #E12 Production - P32 used technique - E55 Type
  825. line = triple(e12placeHolder,
  826. cidocCoords.prefix + 'P32_used_general_technique',
  827. aatCoords.prefix + el[0]) + closeLine
  828. output.write(line)
  829. if e12FplaceHolder != '':
  830. line = triple(e12FplaceHolder,
  831. cidocCoords.prefix + 'P32_used_general_technique',
  832. aatCoords.prefix + el[0]) + closeLine
  833. output.write(line)
  834. line = triple(aatCoords.prefix + el[0],
  835. nsCoords.prefix + 'type',
  836. cidocCoords.prefix + 'E55_Type') + closeLine
  837. output.write(line)
  838. line = triple(aatCoords.prefix + el[0],
  839. schemaCoords.prefix + 'label',
  840. '\"' + mtc + '\"') + closeLine
  841. output.write(line)
  842. # E12 P140i E13
  843. if row['AUTM'] != '':
  844. mot = row['AUTM'].replace(' ', '_')
  845. e55placeHolder = museoCoords.prefix + url + '_' + mot
  846. line = triple(e12placeHolder,
  847. cidocCoords.prefix + 'P140i_was_attributed_by',
  848. e13placeHolder) + closeLine
  849. output.write(line)
  850. line = triple(e13placeHolder,
  851. nsCoords.prefix + 'type',
  852. cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine
  853. output.write(line)
  854. line = triple(e13placeHolder,
  855. schemaCoords.prefix + 'label',
  856. '\"Motivazione attribuzione\"') + closeLine
  857. output.write(line)
  858. line = triple(e13placeHolder,
  859. cidocCoords.prefix + 'P2_has_type',
  860. '\"' + row['AUTM'] + '\"') + closeLine
  861. output.write(line)
  862. aut = get_aut_url(row['AUTH'])
  863. aut_url = aut[0]
  864. AuthorPlaceholder = autCoords.prefix + aut_url
  865. line = triple(e13placeHolder,
  866. cidocCoords.prefix + 'P141_assigned',
  867. AuthorPlaceholder) + closeLine
  868. output.write(line)
  869. # E22 P44 E62
  870. if row['NSC'] != '':
  871. ph = row['NSC'].replace(' "', ' «')
  872. phr = ph.replace('"', '»')
  873. line = triple(datplaceHolder,
  874. cidocCoords.prefix + 'P3_has_note',
  875. '\"' + phr + '\"^^xsd:string') + closeLine
  876. output.write(line)
  877. iconclass = row['DESI']
  878. icon = iconclass.replace(' ', '')
  879. list_icon = []
  880. if ':' in icon:
  881. list_icon = icon.split(':')
  882. else:
  883. list_icon.append(icon)
  884. for ic in list_icon:
  885. url = 'http://iconclass.org/rdk/' + str(ic)
  886. html = urlopen(url).read()
  887. soup = BeautifulSoup(html, 'html.parser')
  888. # kill all script and style elements
  889. for script in soup(["script", "style"]):
  890. script.extract() # rip it out
  891. # get text
  892. text = soup.get_text()
  893. pretty = soup.prettify()
  894. ff = soup.find("div", {"id": "ic_current"})
  895. dd = ff.find("a", {"class", "ic_notation"})
  896. ss = dd.text
  897. x = ss.find(' ')
  898. icon_label = ss[x + 1:]
  899. ur = ic.replace("(", "%28")
  900. urr = ur.replace(")", "%29")
  901. line = triple(datplaceHolder,
  902. cidocCoords.prefix + 'P62_depicts',
  903. iconCoords.prefix + urr) + closeLine
  904. output.write(line)
  905. line = triple(iconCoords.prefix + urr,
  906. nsCoords.prefix + 'type',
  907. cidocCoords.prefix + 'E1_CRM_Entity') + closeLine
  908. output.write(line)
  909. output.write('\n')
  910. #
  911. #
  912. # Limit number of entries processed (if desired)
  913. if (ii > max_entries):
  914. break