CSV_to_RDF_Datini.py 43 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096
  1. # Utilities to read/write csv files
  2. import csv
  3. # Utilities to handle character encodings
  4. import unicodedata
  5. # Ordered Dicts
  6. from collections import OrderedDict
  7. from urllib.request import urlopen
  8. from bs4 import BeautifulSoup
  9. import json
  10. # OPZIONAL IMPORTS
  11. # For timestamping/simple speed tests
  12. from datetime import datetime
  13. # Random number generator
  14. from random import *
  15. # System & command line utilities
  16. import sys
  17. # Json for the dictionary
  18. import json
  19. import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/MPP/corretti/'
  20. export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/MPP/'
  21. # Custom class to store URIs + related infos for the ontologies/repositories
  22. class RDFcoords:
  23. def __init__(self, uri, prefix, code=None):
  24. self.uri = uri
  25. self.prefix = prefix
  26. self.code = code
  27. # Repositories
  28. museoCoords = RDFcoords('<http://palazzopretorio.comune.prato.it/it/le-opere/alcuni-capolavori/>', 'mpp:')
  29. autCoords = RDFcoords('<http://palazzopretorio.comune.prato.it/it/opere/autori/>', 'aut:')
  30. cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')
  31. aatCoords = RDFcoords('<http://vocab.getty.edu/aat/>', 'aat:')
  32. nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')
  33. schemaCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')
  34. xsdCoords = RDFcoords('<http://www.w3.org/2001/XMLSchema#>', 'xsd:')
  35. iconCoords = RDFcoords('<http://iconclass.org/>', 'ico:')
  36. # Basic functions for triples / shortened triples in TTL format
  37. def triple(subject, predicate, object1):
  38. line = subject + ' ' + predicate + ' ' + object1
  39. return line
  40. def doublet(predicate, object1):
  41. line = ' ' + predicate + ' ' + object1
  42. return line
  43. def singlet(object1):
  44. line = ' ' + object1
  45. return line
  46. # Line endings in TTL format
  47. continueLine1 = ' ;\n'
  48. continueLine2 = ' ,\n'
  49. closeLine = ' .\n'
  50. def writeTTLHeader(output):
  51. output.write('@prefix ' + museoCoords.prefix + ' ' + museoCoords.uri + closeLine)
  52. output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)
  53. output.write('@prefix ' + aatCoords.prefix + ' ' + aatCoords.uri + closeLine)
  54. output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)
  55. output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)
  56. output.write('@prefix ' + autCoords.prefix + ' ' + autCoords.uri + closeLine)
  57. output.write('@prefix ' + xsdCoords.prefix + ' ' + xsdCoords.uri + closeLine)
  58. output.write('@prefix ' + iconCoords.prefix + ' ' + iconCoords.uri + closeLine)
  59. output.write('\n')
  60. filePrefix = '00_SR20OA_'
  61. fileType = 'Datini'
  62. max_entries = 1000000000
  63. def get_aut_url(code):
  64. aut_file = open(import_dir + 'AR20AUT_' + fileType + '.csv', newline="")
  65. reader = csv.DictReader(aut_file)
  66. for row in reader:
  67. auth = int(row['AUTH'])
  68. cod = int(code)
  69. role = ''
  70. if row['AUTQ'] != '':
  71. role = row['AUTQ']
  72. else:
  73. role = ''
  74. if auth == cod:
  75. return [row['URL'], role]
  76. def get_role(role):
  77. role_file = open('/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/MPP/AAT_RUOLI.csv', newline="")
  78. reader = csv.DictReader(role_file)
  79. for row in reader:
  80. if row['Label'] == role:
  81. return row['AAT']
  82. def get_elem(mtc):
  83. mtc_file = open('/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/MPP/AAT_MTC.csv', newline="")
  84. reader = csv.DictReader(mtc_file)
  85. for row in reader:
  86. if row['MTC'] == mtc:
  87. return [row['AAT'], row['Type']]
  88. with open(import_dir + filePrefix + fileType + '.csv', newline="") as csv_file, open(
  89. export_dir + filePrefix + fileType + '.ttl', 'w') as output:
  90. reader = csv.DictReader(csv_file)
  91. writeTTLHeader(output)
  92. first = True
  93. ii = 0
  94. for row in reader:
  95. # The index ii is used to process a limited number of entries for testing purposes
  96. ii = ii + 1
  97. sb = ''
  98. subj = ''
  99. pp = row['OGTD'] + ' (' + row['ACC'] + ') '
  100. if row['SGTI'] != '':
  101. sb = pp + row['SGTI']
  102. if row['LDCN'] != '':
  103. subj = sb + ' in ' + row['LDCN']
  104. else:
  105. subj = sb
  106. # Triplify the 'codice' -- should exist for every entry
  107. codice = ''
  108. if (row['NCTR'] != '' and row['NCTN'] != ''):
  109. codice = row['NCTR'] + row['NCTN']
  110. codiceP = ''
  111. if (row['AUTH'] != ''):
  112. codiceP = row['AUTH']
  113. place = ''
  114. if (row['PRVC'] != ''):
  115. place = row['PRVC']
  116. columnName = list(row)
  117. url = row['URL']
  118. # placeHolders
  119. datplaceHolder = museoCoords.prefix + url
  120. e1placeHolder = museoCoords.prefix + url + '_E1'
  121. e3placeHolder = museoCoords.prefix + url + 'E3'
  122. e10placeHolder = museoCoords.prefix + url + '_E10'
  123. e12placeHolder = museoCoords.prefix + url + '_E12'
  124. e13placeHolder = museoCoords.prefix + url + '_E13'
  125. e21placeHolder = museoCoords.prefix + url + '_InE21'
  126. e25placeHolder = museoCoords.prefix + url + '_E25'
  127. e34placeHolder = museoCoords.prefix + url + '_E34'
  128. e35placeHolder1 = museoCoords.prefix + url + '_E35'
  129. e42placeHolder = museoCoords.prefix + url + '_E42'
  130. e42CplaceHolder = museoCoords.prefix + url + '_E42_MPP'
  131. e65placeHolder = museoCoords.prefix + url + '_InE65'
  132. e73placeHolder = museoCoords.prefix + url + '_E73'
  133. e74placeHolder = museoCoords.prefix + url + '_E74'
  134. if (codice != ''):
  135. line = triple(datplaceHolder, cidocCoords.prefix + 'P1_is_identified_by', e42placeHolder) + closeLine
  136. output.write(line)
  137. line = triple(e42placeHolder, nsCoords.prefix + 'type',
  138. cidocCoords.prefix + 'E42_Identifier') + closeLine
  139. output.write(line)
  140. line = triple(e42placeHolder,
  141. schemaCoords.prefix + 'label',
  142. '\"' + codice + '\"') + closeLine
  143. output.write(line)
  144. ###
  145. line = triple(e42placeHolder,
  146. cidocCoords.prefix + 'P2_has_type',
  147. '\"Codice univoco del bene (NCT)\"') + closeLine
  148. output.write(line)
  149. # Write E22 Man Made Object & E73 Information Object -- should exist for every entry?
  150. line = triple(datplaceHolder, nsCoords.prefix + 'type',
  151. cidocCoords.prefix + 'E22_Man-Made_Object') + closeLine
  152. output.write(line)
  153. # Added by AS
  154. line = triple(datplaceHolder, schemaCoords.prefix + 'label', '\"' + subj + '\"') + closeLine
  155. output.write(line)
  156. # End AS
  157. line = triple(datplaceHolder, cidocCoords.prefix + 'P128_carries', e73placeHolder) + closeLine
  158. output.write(line)
  159. line = triple(e73placeHolder, nsCoords.prefix + 'type',
  160. cidocCoords.prefix + 'E73_Information_Object') + closeLine
  161. output.write(line)
  162. # AS
  163. ss = ''
  164. if row['SGTI'] != '':
  165. ss = row['SGTI']
  166. else:
  167. ss = 'senza titolo'
  168. line = triple(e73placeHolder, schemaCoords.prefix + 'label',
  169. '\"Opera d\'arte raffigurante ' + ss + '\"') + closeLine
  170. output.write(line)
  171. # E73 - P2 - E55
  172. tt = ''
  173. typeLabel = ''
  174. if row['OGTD'] == 'dipinto':
  175. tt = aatCoords.prefix + "300033618"
  176. elif row['OGTD'] == 'rilievo':
  177. tt = aatCoords.prefix + "300047230"
  178. elif row['OGTD'] == 'polittico':
  179. tt = aatCoords.prefix + "300178235"
  180. elif row['OGTD'] == 'predella':
  181. tt = aatCoords.prefix + "300003745"
  182. line = triple(e73placeHolder,
  183. cidocCoords.prefix + 'P2_has_type',
  184. tt) + closeLine
  185. output.write(line)
  186. line = triple(tt, schemaCoords.prefix + 'label',
  187. '\"' + row['OGTD'] + '\"') + closeLine
  188. output.write(line)
  189. # E73 - P1 - E35
  190. if row['SGTT'] != '':
  191. line = triple(e73placeHolder, cidocCoords.prefix + 'P1_is_identified_by', e35placeHolder1) + closeLine
  192. output.write(line)
  193. line = triple(e35placeHolder1, nsCoords.prefix + 'type', cidocCoords.prefix + 'E35_Title') + closeLine
  194. output.write(line)
  195. line = triple(e35placeHolder1, schemaCoords.prefix + 'label', '\"' + row['SGTT'] + '\"') + closeLine
  196. output.write(line)
  197. # E22 - P62 - E1
  198. if row['SGTI'] != '':
  199. line = triple(datplaceHolder,
  200. cidocCoords.prefix + 'P62_depicts',
  201. e1placeHolder) + closeLine
  202. output.write(line)
  203. line = triple(e1placeHolder,
  204. nsCoords.prefix + 'type',
  205. cidocCoords.prefix + 'E1_CRM_Entity') + closeLine
  206. output.write(line)
  207. line = triple(e1placeHolder,
  208. schemaCoords.prefix + 'label', '\"' +
  209. row['SGTI'] + '\"') + closeLine
  210. output.write(line)
  211. line = triple(e1placeHolder,
  212. cidocCoords.prefix + 'P2_has_type',
  213. '\"Identificazione Iconografica\"') + closeLine
  214. output.write(line)
  215. # Attention: these triples are identified only for C100005 Museo di Palazzo Pretorio
  216. if row['ESC'] == 'C100005':
  217. line = triple(datplaceHolder,
  218. cidocCoords.prefix + 'P52_has_current_owner',
  219. '<http://palazzopretorio.comune.prato.it/it/>') + closeLine
  220. output.write(line)
  221. line = triple('<http://palazzopretorio.comune.prato.it/it/>',
  222. nsCoords.prefix + 'type',
  223. cidocCoords.prefix + 'E74_Group') + closeLine
  224. output.write(line)
  225. line = triple('<http://palazzopretorio.comune.prato.it/it/>',
  226. schemaCoords.prefix + 'label',
  227. '\"Museo di Palazzo Pretorio\"') + closeLine
  228. output.write(line)
  229. currentLocation = ''
  230. # E22 - P54 - E53
  231. if row['LDCN'] != '':
  232. if row['LDCS'] != '':
  233. currentLocation = row['LDCS']
  234. else:
  235. currentLocation = currentLocation
  236. if row['LDCM'] != '':
  237. currentLocation = currentLocation + ', ' + row['LDCM']
  238. else:
  239. currentLocation = currentLocation
  240. if row['LDCN'] != '':
  241. currentLocation = currentLocation + ', ' + row['LDCN']
  242. else:
  243. currentLocation = currentLocation
  244. currentLocation = currentLocation + ', ' + row['PVCC'] + ' (' + row['PVCP'] + ')'
  245. line = triple(datplaceHolder, cidocCoords.prefix + 'P54_has_current_permanent_location',
  246. '\"' + currentLocation + '\"') + closeLine
  247. output.write(line)
  248. e12FplaceHolder = ''
  249. if row['DTSI'] != row['DTSF']:
  250. e12FplaceHolder = museoCoords.prefix + url + '_E12F'
  251. # Write E12 Production -- should exist for every entry?
  252. # E12 P108 E22
  253. line = triple(e12placeHolder, cidocCoords.prefix + 'P108_has_produced', datplaceHolder) + closeLine
  254. output.write(line)
  255. line = triple(e12placeHolder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E12_Production') + closeLine
  256. output.write(line)
  257. # E73 P108i E12
  258. line = triple(e73placeHolder, cidocCoords.prefix + 'P108i_was_produced_by', e12placeHolder) + closeLine
  259. output.write(line)
  260. if e12FplaceHolder != '':
  261. line = triple(e12FplaceHolder, cidocCoords.prefix + 'P108_has_produced', datplaceHolder) + closeLine
  262. output.write(line)
  263. line = triple(e12FplaceHolder, nsCoords.prefix + 'type',
  264. cidocCoords.prefix + 'E12_Production') + closeLine
  265. output.write(line)
  266. line = triple(e12FplaceHolder, schemaCoords.prefix + 'label',
  267. '\"Fine produzione di ' + row['SGTI'] + '\"') + closeLine
  268. output.write(line)
  269. # E73 P108i E12
  270. line = triple(e73placeHolder, cidocCoords.prefix + 'P108i_was_produced_by', e12FplaceHolder) + closeLine
  271. output.write(line)
  272. # E12 P140i E13
  273. line = triple(e12FplaceHolder,
  274. cidocCoords.prefix + 'P140i_was_attributed_by',
  275. e13placeHolder) + closeLine
  276. output.write(line)
  277. # E12 P2
  278. line = triple(e12FplaceHolder,
  279. cidocCoords.prefix + 'P2_has_type',
  280. '\"Fine\"^^xsd:string') + closeLine
  281. output.write(line)
  282. line = triple(e12placeHolder,
  283. cidocCoords.prefix + 'P2_has_type',
  284. '\"Inizio\"^^xsd:string') + closeLine
  285. output.write(line)
  286. line = triple(e12placeHolder, schemaCoords.prefix + 'label',
  287. '\"Inizio produzione di ' + row['SGTI'] + '\"') + closeLine
  288. output.write(line)
  289. else:
  290. line = triple(e12placeHolder, schemaCoords.prefix + 'label',
  291. '\"Produzione di ' + row['SGTI'] + '\"') + closeLine
  292. output.write(line)
  293. tcl = []
  294. for name in columnName:
  295. if 'TCL' in name:
  296. tcl.append(name)
  297. # E12 - P7 - E53
  298. for el in tcl:
  299. i = 0
  300. if row[el] == 'luogo di produzione':
  301. pl = ''
  302. if i == 0:
  303. pl = row['PRVC']
  304. else:
  305. pl = row['PRVC' + i]
  306. line = triple(e12placeHolder,
  307. cidocCoords.prefix + 'P7_took_place_at',
  308. museoCoords.prefix + pl) + closeLine
  309. output.write(line)
  310. if e12FplaceHolder != '':
  311. line = triple(e12FplaceHolder,
  312. cidocCoords.prefix + 'P7_took_place_at',
  313. museoCoords.prefix + pl) + closeLine
  314. output.write(line)
  315. i = i + 1
  316. # E12 - PC14 - E21
  317. if row['AUTH'] != '':
  318. aut = get_aut_url(row['AUTH'])
  319. aut_url = aut[0]
  320. aut_role = aut[1]
  321. ll = row['AUTN'] + '_' + aut_role
  322. lab = ll.replace(' ', '')
  323. label = lab.replace(',', '')
  324. AuthorPlaceholder = autCoords.prefix + aut_url
  325. line = triple(museoCoords.prefix + '_' + label,
  326. cidocCoords.prefix + 'P01_has_domain',
  327. e12placeHolder) + closeLine
  328. output.write(line)
  329. if e12FplaceHolder != '':
  330. line = triple(museoCoords.prefix + '_' + label,
  331. cidocCoords.prefix + 'P01_has_domain',
  332. e12FplaceHolder) + closeLine
  333. output.write(line)
  334. if 'AUTH1' in columnName:
  335. if row['AUTH1'] != '':
  336. aut = get_aut_url(row['AUTH1'])
  337. aut_url = aut[0]
  338. aut_role = aut[1]
  339. ll = row['AUTN1'] + '_' + aut_role
  340. lab = ll.replace(' ', '')
  341. label = lab.replace(',', '')
  342. AuthorPlaceholder = autCoords.prefix + aut_url
  343. line = triple(museoCoords.prefix + '_' + label,
  344. cidocCoords.prefix + 'P01_has_domain',
  345. e12placeHolder) + closeLine
  346. output.write(line)
  347. if e12FplaceHolder != '':
  348. line = triple(museoCoords.prefix + '_' + label,
  349. cidocCoords.prefix + 'P01_has_domain',
  350. e12FplaceHolder) + closeLine
  351. output.write(line)
  352. # E12 - PC14 - E21
  353. if 'CMMN' in columnName:
  354. if row['CMMN'] != '':
  355. cc = row['CMMN']
  356. cm = cc.replace(' ', '')
  357. cmmn = cm.replace(',', '')
  358. cmmPlaceholder = museoCoords.prefix + '_' + cmmn
  359. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  360. cidocCoords.prefix + 'P01_has_domain',
  361. e12placeHolder) + closeLine
  362. output.write(line)
  363. if e12FplaceHolder != '':
  364. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  365. cidocCoords.prefix + 'P01_has_domain',
  366. e12FplaceHolder) + closeLine
  367. output.write(line)
  368. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  369. nsCoords.prefix + 'type',
  370. cidocCoords.prefix + 'PC14_carried_out_by') + closeLine
  371. output.write(line)
  372. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  373. schemaCoords.prefix + 'label',
  374. '\"' + row['CMMN'] + ' nel ruolo di committente\"') + closeLine
  375. output.write(line)
  376. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  377. cidocCoords.prefix + 'P02_has_range',
  378. cmmPlaceholder) + closeLine
  379. output.write(line)
  380. line = triple(cmmPlaceholder,
  381. nsCoords.prefix + 'type',
  382. cidocCoords.prefix + 'E39_Actor') + closeLine
  383. output.write(line)
  384. line = triple(cmmPlaceholder,
  385. schemaCoords.prefix + 'label',
  386. '\"' + row['CMMN'] + '\"') + closeLine
  387. output.write(line)
  388. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  389. cidocCoords.prefix + 'P14.1_in_the_role_of',
  390. museoCoords.prefix + '_client') + closeLine
  391. output.write(line)
  392. line = triple(museoCoords.prefix + '_client',
  393. nsCoords.prefix + 'type',
  394. cidocCoords.prefix + 'E55_Type') + closeLine
  395. output.write(line)
  396. line = triple(museoCoords.prefix + '_client',
  397. schemaCoords.prefix + 'label',
  398. '\"Committente\"') + closeLine
  399. output.write(line)
  400. # E12 - P4 - E52
  401. if row['DTSI'] != '':
  402. line = triple(e12placeHolder,
  403. cidocCoords.prefix + 'P4_has_time-span',
  404. museoCoords.prefix + row['DTSI']) + closeLine
  405. output.write(line)
  406. line = triple(museoCoords.prefix + row['DTSI'],
  407. nsCoords.prefix + 'type',
  408. cidocCoords.prefix + 'E52_Time-Span') + closeLine
  409. output.write(line)
  410. line = triple(museoCoords.prefix + row['DTSI'],
  411. schemaCoords.prefix + 'label',
  412. '\"' + row['DTSI'] + '\"') + closeLine
  413. output.write(line)
  414. if e12FplaceHolder != '':
  415. line = triple(e12FplaceHolder,
  416. cidocCoords.prefix + 'P4_has_time-span',
  417. museoCoords.prefix + row['DTSF']) + closeLine
  418. output.write(line)
  419. line = triple(museoCoords.prefix + row['DTSF'],
  420. nsCoords.prefix + 'type',
  421. cidocCoords.prefix + 'E52_Time-Span') + closeLine
  422. output.write(line)
  423. line = triple(museoCoords.prefix + row['DTSF'],
  424. schemaCoords.prefix + 'label',
  425. '\"' + row['DTSF'] + '\"') + closeLine
  426. output.write(line)
  427. tcl = []
  428. for name in columnName:
  429. if 'TCL' in name:
  430. tcl.append(name)
  431. j = 0
  432. for el in tcl:
  433. if row[el] != '':
  434. j = j + 1
  435. last = str(j - 1)
  436. n = len(tcl) - 1
  437. for i in range(n):
  438. k = str(i + 1)
  439. if i + 1 == 1:
  440. w = ''
  441. else:
  442. w = i
  443. f = str(w)
  444. if row['TCL' + k] != '':
  445. pastActor = ''
  446. newActor = ''
  447. pl = ''
  448. if row['PRCD' + k] != '':
  449. newActor = ' a ' + row['PRCD' + k]
  450. if row['PRCD' + f] != '':
  451. pastActor = ' da ' + row['PRCD' + f]
  452. pl = row['PRCD' + f].replace(' ', '')
  453. newe10placeHolder = museoCoords.prefix + url + "_E10_" + k
  454. line = triple(newe10placeHolder,
  455. cidocCoords.prefix + 'P30_transferred_custody_of',
  456. datplaceHolder) + closeLine
  457. output.write(line)
  458. line = triple(newe10placeHolder,
  459. nsCoords.prefix + 'type',
  460. cidocCoords.prefix + 'E10_Transfer_of_Custody') + closeLine
  461. output.write(line)
  462. line = triple(newe10placeHolder,
  463. schemaCoords.prefix + 'label',
  464. '\"Passaggio di ' + row['SGTI'] + pastActor +
  465. newActor + '\"') + closeLine
  466. output.write(line)
  467. if row['PRDI' + f] != '':
  468. timespan = row['PRDI' + f]
  469. tt = timespan.replace(' ', '')
  470. tp = tt.replace('.', '')
  471. ts = tp.replace('/', '')
  472. timespanPlaceholder = museoCoords.prefix + '_' + ts
  473. # E10 P4 E52
  474. line = triple(newe10placeHolder,
  475. cidocCoords.prefix + 'P4_has_time-span',
  476. timespanPlaceholder) + closeLine
  477. output.write(line)
  478. line = triple(timespanPlaceholder,
  479. nsCoords.prefix + 'type',
  480. cidocCoords.prefix + 'E52_Time-Span') + closeLine
  481. output.write(line)
  482. line = triple(timespanPlaceholder,
  483. schemaCoords.prefix + 'label',
  484. '\"' + timespan + '\"') + closeLine
  485. output.write(line)
  486. pastActorPlaceholder = museoCoords.prefix + '_' + pl
  487. newLoc = row['PRCD' + k].replace(' ', '')
  488. newActorPlaceholder = museoCoords.prefix + '_' + newLoc
  489. # E10 P26 E74 (moved to)
  490. if newActorPlaceholder != '':
  491. line = triple(newe10placeHolder,
  492. cidocCoords.prefix + 'P29_custody_received_by',
  493. newActorPlaceholder) + closeLine
  494. output.write(line)
  495. # E10 P27 E74
  496. pastActorLabel = row['PRCD' + f]
  497. line = triple(newe10placeHolder,
  498. cidocCoords.prefix + 'P28_custody_surrendered_by',
  499. pastActorPlaceholder) + closeLine
  500. output.write(line)
  501. line = triple(pastActorPlaceholder,
  502. nsCoords.prefix + 'type',
  503. cidocCoords.prefix + 'E39_Actor') + closeLine
  504. output.write(line)
  505. line = triple(pastActorPlaceholder,
  506. schemaCoords.prefix + 'label',
  507. '\"' + pastActorLabel + '\"') + closeLine
  508. output.write(line)
  509. line = triple(datplaceHolder,
  510. cidocCoords.prefix + 'P49_has_former_or_current_keeper',
  511. pastActorPlaceholder) + closeLine
  512. output.write(line)
  513. # E74 P74 E53
  514. pastResidenceLabel = row['PRVC' + f]
  515. pastResidencePlaceHolder = museoCoords.prefix + row['PRVC' + f]
  516. line = triple(pastActorPlaceholder,
  517. cidocCoords.prefix + 'P74_has_current_or_former_residence',
  518. pastResidencePlaceHolder) + closeLine
  519. output.write(line)
  520. ####
  521. pastActor = ''
  522. newActor = ''
  523. pl = ''
  524. if row['LDCN'] != '':
  525. newActor = ' a ' + row['LDCN']
  526. if row['PRCD' + last] != '':
  527. pastActor = ' da ' + row['PRCD' + last]
  528. pl = row['PRCD' + last].replace(' ', '')
  529. line = triple(e10placeHolder,
  530. cidocCoords.prefix + 'P30_transferred_custody_of',
  531. datplaceHolder) + closeLine
  532. output.write(line)
  533. line = triple(e10placeHolder,
  534. nsCoords.prefix + 'type',
  535. cidocCoords.prefix + 'E10_Transfer_of_Custody') + closeLine
  536. output.write(line)
  537. line = triple(e10placeHolder,
  538. schemaCoords.prefix + 'label',
  539. '\"Passaggio di ' + row['SGTI'] + pastActor +
  540. newActor + '\"') + closeLine
  541. output.write(line)
  542. if row['PRDU' + last] != '':
  543. timespan = row['PRDU' + last]
  544. tt = timespan.replace(' ', '')
  545. ts = tt.replace('/', '')
  546. timespanPlaceholder = museoCoords.prefix + '_' + ts
  547. # E10 P4 E52
  548. line = triple(e10placeHolder,
  549. cidocCoords.prefix + 'P4_has_time-span',
  550. timespanPlaceholder) + closeLine
  551. output.write(line)
  552. line = triple(timespanPlaceholder,
  553. nsCoords.prefix + 'type',
  554. cidocCoords.prefix + 'E52_Time-Span') + closeLine
  555. output.write(line)
  556. line = triple(timespanPlaceholder,
  557. schemaCoords.prefix + 'label',
  558. '\"' + timespan + '\"') + closeLine
  559. output.write(line)
  560. pastActorPlaceholder = museoCoords.prefix + '_' + pl
  561. newLocPlaceholder = e74placeHolder
  562. # E10 P26 E74 (moved to)
  563. if newLocPlaceholder != '':
  564. line = triple(e10placeHolder,
  565. cidocCoords.prefix + 'P29_custody_received_by',
  566. newLocPlaceholder) + closeLine
  567. output.write(line)
  568. # E10 P27 E74
  569. pastActorLabel = row['PRCD' + last]
  570. line = triple(e10placeHolder,
  571. cidocCoords.prefix + 'P28_custody_surrendered_by',
  572. pastActorPlaceholder) + closeLine
  573. output.write(line)
  574. line = triple(pastActorPlaceholder,
  575. nsCoords.prefix + 'type',
  576. cidocCoords.prefix + 'E39_Actor') + closeLine
  577. output.write(line)
  578. line = triple(pastActorPlaceholder,
  579. schemaCoords.prefix + 'label',
  580. '\"' + pastActorLabel + '\"') + closeLine
  581. output.write(line)
  582. line = triple(datplaceHolder,
  583. cidocCoords.prefix + 'P49_has_former_or_current_keeper',
  584. pastActorPlaceholder) + closeLine
  585. output.write(line)
  586. # E74 P74 E53
  587. pastResidenceLabel = row['PRVC' + last]
  588. pastResidencePlaceHolder = museoCoords.prefix + row['PRVC' + last]
  589. if row['PRVP' + last] != '':
  590. pastResidenceLabel = pastResidenceLabel + ' (' + row['PRVP' + last] + ')'
  591. if row['PRVR' + last] != '':
  592. pastResidenceLabel = pastResidenceLabel + ', ' + row['PRVR' + last]
  593. if row['PRVS' + last] != '':
  594. pastResidenceLabel = pastResidenceLabel + ', ' + row['PRVS' + last]
  595. line = triple(pastActorPlaceholder,
  596. cidocCoords.prefix + 'P74_has_current_or_former_residence',
  597. pastResidencePlaceHolder) + closeLine
  598. output.write(line)
  599. line = triple(pastResidencePlaceHolder,
  600. nsCoords.prefix + 'type',
  601. cidocCoords.prefix + 'E53_Place') + closeLine
  602. output.write(line)
  603. # E22 P44 E3
  604. if row['STCC'] != '':
  605. line = triple(datplaceHolder,
  606. cidocCoords.prefix + 'P44_has_condition',
  607. e3placeHolder) + closeLine
  608. output.write(line)
  609. line = triple(e3placeHolder,
  610. nsCoords.prefix + 'type',
  611. cidocCoords.prefix + 'E3_Condition_State') + closeLine
  612. output.write(line)
  613. line = triple(e3placeHolder,
  614. schemaCoords.prefix + 'label',
  615. '\"Condizione di: ' + row['SGTI'] + '\"') + closeLine
  616. output.write(line)
  617. line = triple(e3placeHolder,
  618. cidocCoords.prefix + 'P2_has_type',
  619. '\"' + row['STCC'] + '\"') + closeLine
  620. output.write(line)
  621. # E22 P65 E34
  622. if (row['ISRI'] != ''):
  623. line = triple(datplaceHolder,
  624. cidocCoords.prefix + 'P56_bears_feature',
  625. e25placeHolder) + closeLine
  626. output.write(line)
  627. line = triple(e25placeHolder,
  628. nsCoords.prefix + 'type',
  629. cidocCoords.prefix + 'E25_Man-Made_Feature') + closeLine
  630. output.write(line)
  631. line = triple(e25placeHolder,
  632. schemaCoords.prefix + 'label',
  633. '\"Iscrizione su ' + subj + '\"') + closeLine
  634. output.write(line)
  635. line = triple(e25placeHolder,
  636. cidocCoords.prefix + 'P128_carries',
  637. e34placeHolder) + closeLine
  638. output.write(line)
  639. line = triple(e34placeHolder,
  640. nsCoords.prefix + 'type',
  641. cidocCoords.prefix + 'E34_Inscription') + closeLine
  642. output.write(line)
  643. line = triple(e34placeHolder,
  644. schemaCoords.prefix + 'label',
  645. '\"Iscrizione: ' + row['ISRI'] + '\"') + closeLine
  646. output.write(line)
  647. pl = row['ISRI'].replace(' ', '-')
  648. pla = pl.replace('.', '')
  649. line = triple(e34placeHolder,
  650. cidocCoords.prefix + 'P3_has_note',
  651. '\"' + row['ISRI'] + '\"') + closeLine
  652. output.write(line)
  653. # E34 P2 E55
  654. if (row['ISRT'] != ''):
  655. rr = row['ISRT'].replace(' ', '')
  656. line = triple(e34placeHolder,
  657. cidocCoords.prefix + 'P2_has_type',
  658. '\"' + row['ISRT'] + '\"') + closeLine
  659. output.write(line)
  660. # E34 P72 E56
  661. if (row['ISRL'] != ''):
  662. line = triple(e34placeHolder,
  663. cidocCoords.prefix + 'P72_has_language',
  664. museoCoords.prefix + '_' + row['ISRL']) + closeLine
  665. output.write(line)
  666. line = triple(museoCoords.prefix + '_' + row['ISRL'],
  667. nsCoords.prefix + 'type',
  668. cidocCoords.prefix + 'E56_Language') + closeLine
  669. output.write(line)
  670. line = triple(museoCoords.prefix + '_' + row['ISRL'],
  671. schemaCoords.prefix + 'label',
  672. '\"' + row['ISRL'] + '\"') + closeLine
  673. output.write(line)
  674. if row['ISRS'] != '':
  675. line = triple(e34placeHolder,
  676. cidocCoords.prefix + 'P92i_was_brought_into_existence_by',
  677. e65placeHolder) + closeLine
  678. output.write(line)
  679. line = triple(e65placeHolder,
  680. nsCoords.prefix + 'type',
  681. cidocCoords.prefix + 'E65_Creation') + closeLine
  682. output.write(line)
  683. line = triple(e65placeHolder,
  684. schemaCoords.prefix + 'label',
  685. '\"Creazione dell\'Iscrizione ' + row['ISRI'] + '\"') + closeLine
  686. output.write(line)
  687. if row['ISRS']:
  688. ss = row['ISRS'].replace(' ', '')
  689. tecPlaceholder = museoCoords.prefix + url + '_' + ss
  690. line = triple(e65placeHolder,
  691. cidocCoords.prefix + 'P32_used_general_technique',
  692. tecPlaceholder) + closeLine
  693. output.write(line)
  694. line = triple(tecPlaceholder,
  695. nsCoords.prefix + 'type',
  696. cidocCoords.prefix + 'E55_Type') + closeLine
  697. output.write(line)
  698. line = triple(tecPlaceholder,
  699. schemaCoords.prefix + 'label',
  700. '\"' + row['ISRS'] + '\"') + closeLine
  701. output.write(line)
  702. if row['ISRP'] != '':
  703. line = triple(e25placeHolder,
  704. cidocCoords.prefix + 'P3_has_note',
  705. '\"' + row['ISRP'] + '\"^^xsd:string') + closeLine
  706. output.write(line)
  707. unit = ''
  708. if (row['MISU'] != ''):
  709. unit = row['MISU']
  710. valueA = ''
  711. valueL = ''
  712. if (row['MISA'] != ''):
  713. value = row['MISA']
  714. valueA = value.replace(',', 'v')
  715. if (row['MISL'] != ''):
  716. value = row['MISL']
  717. valueL = value.replace(',', 'v')
  718. # Altezza
  719. # E22 P43 E54
  720. if (row['MISA'] != ''):
  721. line = triple(datplaceHolder,
  722. cidocCoords.prefix + 'P43_has_dimension',
  723. museoCoords.prefix + url + '_Altezza') + closeLine
  724. output.write(line)
  725. line = triple(museoCoords.prefix + url + '_Altezza',
  726. nsCoords.prefix + 'type',
  727. cidocCoords.prefix + 'E54_Dimension') + closeLine
  728. output.write(line)
  729. line = triple(museoCoords.prefix + url + '_Altezza',
  730. schemaCoords.prefix + 'label',
  731. '\"Altezza: ' + row['MISA'] + row['MISU'] + '\"') + closeLine
  732. output.write(line)
  733. # E54 P90 E60
  734. line = triple(museoCoords.prefix + url + '_Altezza',
  735. cidocCoords.prefix + 'P90_has_value',
  736. '\"' + row['MISA'] + '\"^^xsd:integer') + closeLine
  737. output.write(line)
  738. # E54 P2 E55
  739. line = triple(museoCoords.prefix + url + '_Altezza',
  740. cidocCoords.prefix + 'P2_has_type',
  741. aatCoords.prefix + '300055644') + closeLine
  742. output.write(line)
  743. line = triple(aatCoords.prefix + '300055644',
  744. schemaCoords.prefix + 'label',
  745. '\"altezza\"') + closeLine
  746. output.write(line)
  747. # E54 P91 E58
  748. if (row['MISU'] != ''):
  749. line = triple(museoCoords.prefix + url + '_Altezza',
  750. cidocCoords.prefix + 'P91_has_unit',
  751. aatCoords.prefix + '300379098') + closeLine
  752. output.write(line)
  753. line = triple(aatCoords.prefix + '300379098',
  754. nsCoords.prefix + 'type',
  755. cidocCoords.prefix + 'E58_Measurement_Unit') + closeLine
  756. output.write(line)
  757. line = triple(aatCoords.prefix + '300379098',
  758. schemaCoords.prefix + 'label',
  759. '\"' + row['MISU'] + '\"') + closeLine
  760. output.write(line)
  761. # Larghezza
  762. # E22 P43 E54
  763. if (row['MISL'] != ''):
  764. line = triple(datplaceHolder,
  765. cidocCoords.prefix + 'P43_has_dimension',
  766. museoCoords.prefix + url + '_Larghezza') + closeLine
  767. output.write(line)
  768. line = triple(museoCoords.prefix + url + '_Larghezza',
  769. nsCoords.prefix + 'type',
  770. cidocCoords.prefix + 'E54_Dimension') + closeLine
  771. output.write(line)
  772. line = triple(museoCoords.prefix + url + '_Larghezza',
  773. schemaCoords.prefix + 'label',
  774. '\"Larghezza: ' + row['MISL'] + row['MISU'] + '\"') + closeLine
  775. output.write(line)
  776. # E54 P90 E60
  777. line = triple(museoCoords.prefix + url + '_Larghezza',
  778. cidocCoords.prefix + 'P90_has_value',
  779. '\"' + row['MISL'] + '\"^^xsd:integer') + closeLine
  780. output.write(line)
  781. # E54 P2 E55
  782. line = triple(museoCoords.prefix + url + '_Larghezza',
  783. cidocCoords.prefix + 'P2_has_type',
  784. aatCoords.prefix + '300055647') + closeLine
  785. output.write(line)
  786. line = triple(aatCoords.prefix + '300055647',
  787. schemaCoords.prefix + 'label',
  788. '\"larghezza\"') + closeLine
  789. output.write(line)
  790. # E54 P91 E58
  791. if (row['MISU'] != ''):
  792. line = triple(museoCoords.prefix + url + '_Larghezza',
  793. cidocCoords.prefix + 'P91_has_unit',
  794. aatCoords.prefix + '300379098') + closeLine
  795. output.write(line)
  796. line = triple(aatCoords.prefix + '300379098',
  797. nsCoords.prefix + 'type',
  798. cidocCoords.prefix + 'E58_Measurement_Unit') + closeLine
  799. output.write(line)
  800. line = triple(aatCoords.prefix + '300379098',
  801. schemaCoords.prefix + 'label',
  802. '\"' + row['MISU'] + '\"') + closeLine
  803. output.write(line)
  804. if row['MTC'] != '':
  805. mtcs = []
  806. if '/' in row['MTC']:
  807. mtcs = row['MTC'].split('/')
  808. else:
  809. mtcs.append(row['MTC'])
  810. for tc in mtcs:
  811. mtc = tc.lstrip()
  812. el = get_elem(mtc)
  813. if el[1] == 'MTC/M':
  814. line = triple(datplaceHolder,
  815. cidocCoords.prefix + 'P45_consists_of',
  816. aatCoords.prefix + el[0]) + closeLine
  817. output.write(line)
  818. line = triple(aatCoords.prefix + el[0],
  819. nsCoords.prefix + 'type',
  820. cidocCoords.prefix + 'E57_Material') + closeLine
  821. output.write(line)
  822. line = triple(aatCoords.prefix + el[0],
  823. schemaCoords.prefix + 'label',
  824. '\"' + mtc + '\"') + closeLine
  825. output.write(line)
  826. else: #E12 Production - P32 used technique - E55 Type
  827. line = triple(e12placeHolder,
  828. cidocCoords.prefix + 'P32_used_general_technique',
  829. aatCoords.prefix + el[0]) + closeLine
  830. output.write(line)
  831. if e12FplaceHolder != '':
  832. line = triple(e12FplaceHolder,
  833. cidocCoords.prefix + 'P32_used_general_technique',
  834. aatCoords.prefix + el[0]) + closeLine
  835. output.write(line)
  836. line = triple(aatCoords.prefix + el[0],
  837. nsCoords.prefix + 'type',
  838. cidocCoords.prefix + 'E55_Type') + closeLine
  839. output.write(line)
  840. line = triple(aatCoords.prefix + el[0],
  841. schemaCoords.prefix + 'label',
  842. '\"' + mtc + '\"') + closeLine
  843. output.write(line)
  844. # E12 P140i E13
  845. if row['AUTM'] != '':
  846. mot = row['AUTM'].replace(' ', '_')
  847. e55placeHolder = museoCoords.prefix + url + '_' + mot
  848. line = triple(e12placeHolder,
  849. cidocCoords.prefix + 'P140i_was_attributed_by',
  850. e13placeHolder) + closeLine
  851. output.write(line)
  852. line = triple(e13placeHolder,
  853. nsCoords.prefix + 'type',
  854. cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine
  855. output.write(line)
  856. line = triple(e13placeHolder,
  857. schemaCoords.prefix + 'label',
  858. '\"Motivazione attribuzione\"') + closeLine
  859. output.write(line)
  860. line = triple(e13placeHolder,
  861. cidocCoords.prefix + 'P2_has_type',
  862. '\"' + row['AUTM'] + '\"') + closeLine
  863. output.write(line)
  864. aut = get_aut_url(row['AUTH'])
  865. aut_url = aut[0]
  866. AuthorPlaceholder = autCoords.prefix + aut_url
  867. line = triple(e13placeHolder,
  868. cidocCoords.prefix + 'P141_assigned',
  869. AuthorPlaceholder) + closeLine
  870. output.write(line)
  871. # E22 P44 E62
  872. if row['NSC'] != '':
  873. ph = row['NSC'].replace(' "', ' «')
  874. phr = ph.replace('"', '»')
  875. line = triple(datplaceHolder,
  876. cidocCoords.prefix + 'P3_has_note',
  877. '\"' + phr + '\"^^xsd:string') + closeLine
  878. output.write(line)
  879. iconclass = row['DESI']
  880. icon = iconclass.replace(' ', '')
  881. list_icon = []
  882. if ':' in icon:
  883. list_icon = icon.split(':')
  884. else:
  885. list_icon.append(icon)
  886. for ic in list_icon:
  887. url = 'http://iconclass.org/rdk/' + str(ic)
  888. html = urlopen(url).read()
  889. soup = BeautifulSoup(html, 'html.parser')
  890. # kill all script and style elements
  891. for script in soup(["script", "style"]):
  892. script.extract() # rip it out
  893. # get text
  894. text = soup.get_text()
  895. pretty = soup.prettify()
  896. ff = soup.find("div", {"id": "ic_current"})
  897. dd = ff.find("a", {"class", "ic_notation"})
  898. ss = dd.text
  899. x = ss.find(' ')
  900. icon_label = ss[x + 1:]
  901. ur = ic.replace("(", "%28")
  902. urr = ur.replace(")", "%29")
  903. line = triple(datplaceHolder,
  904. cidocCoords.prefix + 'P62_depicts',
  905. iconCoords.prefix + urr) + closeLine
  906. output.write(line)
  907. line = triple(iconCoords.prefix + urr,
  908. nsCoords.prefix + 'type',
  909. cidocCoords.prefix + 'E1_CRM_Entity') + closeLine
  910. output.write(line)
  911. # P2 Opera d'arte
  912. line = triple(datplaceHolder,
  913. cidocCoords.prefix + 'P2_has_type',
  914. '\"Opera d\'Arte\"') + closeLine
  915. output.write(line)
  916. output.write('\n')
  917. #
  918. #
  919. # Limit number of entries processed (if desired)
  920. if (ii > max_entries):
  921. break