CSV_to_RDF_Martini.py 48 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117
  1. # Utilities to read/write csv files
  2. import csv
  3. # Utilities to handle character encodings
  4. import unicodedata
  5. # Ordered Dicts
  6. from collections import OrderedDict
  7. from urllib.request import urlopen
  8. #from bs4 import BeautifulSoup
  9. import json
  10. # OPZIONAL IMPORTS
  11. # For timestamping/simple speed tests
  12. from datetime import datetime
  13. # Random number generator
  14. from random import *
  15. # System & command line utilities
  16. import sys
  17. # Json for the dictionary
  18. import json
  19. import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/MPP/CSV/corretti/'
  20. export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/MPP/RDF/'
  21. # Custom class to store URIs + related infos for the ontologies/repositories
  22. class RDFcoords:
  23. def __init__(self, uri, prefix, code=None):
  24. self.uri = uri
  25. self.prefix = prefix
  26. self.code = code
  27. # Repositories
  28. museoCoords = RDFcoords('<https://palazzopretorio.prato.it/it/le-opere/alcuni-capolavori/>', 'mpp:')
  29. autCoords = RDFcoords('<https://palazzopretorio.prato.it/it/opere/autori/>', 'aut:')
  30. cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')
  31. aatCoords = RDFcoords('<http://vocab.getty.edu/aat/>', 'aat:')
  32. nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')
  33. schemaCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')
  34. xsdCoords = RDFcoords('<http://www.w3.org/2001/XMLSchema#>', 'xsd:')
  35. iconCoords = RDFcoords('<http://iconclass.org/>', 'ico:')
  36. # Basic functions for triples / shortened triples in TTL format
  37. def triple(subject, predicate, object1):
  38. line = subject + ' ' + predicate + ' ' + object1
  39. return line
  40. def doublet(predicate, object1):
  41. line = ' ' + predicate + ' ' + object1
  42. return line
  43. def singlet(object1):
  44. line = ' ' + object1
  45. return line
  46. # Line endings in TTL format
  47. continueLine1 = ' ;\n'
  48. continueLine2 = ' ,\n'
  49. closeLine = ' .\n'
  50. def writeTTLHeader(output):
  51. output.write('@prefix ' + museoCoords.prefix + ' ' + museoCoords.uri + closeLine)
  52. output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)
  53. output.write('@prefix ' + aatCoords.prefix + ' ' + aatCoords.uri + closeLine)
  54. output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)
  55. output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)
  56. output.write('@prefix ' + autCoords.prefix + ' ' + autCoords.uri + closeLine)
  57. output.write('@prefix ' + xsdCoords.prefix + ' ' + xsdCoords.uri + closeLine)
  58. output.write('@prefix ' + iconCoords.prefix + ' ' + iconCoords.uri + closeLine)
  59. output.write('\n')
  60. filePrefix = '00_SR20OA_'
  61. fileType = 'Martini'
  62. max_entries = 1000000000
  63. def get_aut_url(code):
  64. aut_file = open(import_dir + 'AR20AUT_' + fileType + '.csv', newline="")
  65. reader = csv.DictReader(aut_file)
  66. for row in reader:
  67. auth = int(row['AUTH'])
  68. cod = int(code)
  69. role = ''
  70. if row['AUTQ'] != '':
  71. role = row['AUTQ']
  72. else:
  73. role = ''
  74. if auth == cod:
  75. return [row['URL'], role]
  76. def get_role(role):
  77. role_file = open('/Users/federicaspinelli/TEAMOVI/Parser/DATA/MPP/CSV/AAT_RUOLI.csv', newline="")
  78. reader = csv.DictReader(role_file)
  79. for row in reader:
  80. if row['Label'] == role:
  81. return row['AAT']
  82. def get_elem(mtc):
  83. mtc_file = open('/Users/federicaspinelli/TEAMOVI/Parser/DATA/MPP/CSV/AAT_MTC.csv', newline="")
  84. reader = csv.DictReader(mtc_file)
  85. for row in reader:
  86. if row['MTC'] == mtc:
  87. return [row['AAT'], row['Type']]
  88. with open(import_dir + filePrefix + fileType + '.csv', newline="") as csv_file, open(
  89. export_dir + filePrefix + fileType + '.ttl', 'w') as output:
  90. reader = csv.DictReader(csv_file)
  91. writeTTLHeader(output)
  92. first = True
  93. ii = 0
  94. for row in reader:
  95. # The index ii is used to process a limited number of entries for testing purposes
  96. ii = ii + 1
  97. if row['RVEL'] == '' or row['RVEL'] == '0':
  98. sb = ''
  99. subj = ''
  100. #pp = row['OGTD'] + ' (' + row['ACC'] + ') '
  101. if row['SGTI'] != '':
  102. sb = row['SGTI']
  103. # Triplify the 'codice' -- should exist for every entry
  104. codice = ''
  105. if (row['NCTR'] != '' and row['NCTN'] != ''):
  106. codice = row['NCTR'] + row['NCTN']
  107. codiceP = ''
  108. if (row['AUTH'] != ''):
  109. codiceP = row['AUTH']
  110. place = ''
  111. if (row['PRVC'] != ''):
  112. place = row['PRVC']
  113. columnName = list(row)
  114. url = row['URL']
  115. # placeHolders
  116. datplaceHolder = museoCoords.prefix + url
  117. e1placeHolder = museoCoords.prefix + url + '_E1'
  118. e3placeHolder = museoCoords.prefix + url + 'E3'
  119. e10placeHolder = museoCoords.prefix + url + '_E10'
  120. e12placeHolder = museoCoords.prefix + url + '_E12'
  121. e13placeHolder = museoCoords.prefix + url + '_E13'
  122. e21placeHolder = museoCoords.prefix + url + '_InE21'
  123. e25placeHolder = museoCoords.prefix + url + '_E25'
  124. e34placeHolder = museoCoords.prefix + url + '_E34'
  125. e35placeHolder1 = museoCoords.prefix + url + '_E35'
  126. e42placeHolder = museoCoords.prefix + url + '_E42'
  127. e42CplaceHolder = museoCoords.prefix + url + '_E42_MPP'
  128. e65placeHolder = museoCoords.prefix + url + '_InE65'
  129. e73placeHolder = museoCoords.prefix + url + '_E73'
  130. e74placeHolder = museoCoords.prefix + url + '_E74'
  131. if (codice != ''):
  132. line = triple(datplaceHolder, cidocCoords.prefix + 'P1_is_identified_by', e42placeHolder) + closeLine
  133. output.write(line)
  134. line = triple(e42placeHolder, nsCoords.prefix + 'type',
  135. cidocCoords.prefix + 'E42_Identifier') + closeLine
  136. output.write(line)
  137. line = triple(e42placeHolder,
  138. schemaCoords.prefix + 'label',
  139. '\"0' + codice + '\"') + closeLine
  140. output.write(line)
  141. ###
  142. line = triple(e42placeHolder,
  143. cidocCoords.prefix + 'P2_has_type',
  144. '\"Codice univoco del bene (NCT)\"') + closeLine
  145. output.write(line)
  146. # Write E22 Man Made Object & E73 Information Object -- should exist for every entry?
  147. line = triple(datplaceHolder, nsCoords.prefix + 'type',
  148. cidocCoords.prefix + 'E22_Man-Made_Object') + closeLine
  149. output.write(line)
  150. # Added by AS
  151. line = triple(datplaceHolder, schemaCoords.prefix + 'label', '\"' + sb + '\"') + closeLine
  152. output.write(line)
  153. # End AS
  154. line = triple(datplaceHolder, cidocCoords.prefix + 'P128_carries', e73placeHolder) + closeLine
  155. output.write(line)
  156. line = triple(e73placeHolder, nsCoords.prefix + 'type',
  157. cidocCoords.prefix + 'E73_Information_Object') + closeLine
  158. output.write(line)
  159. # AS
  160. ss = ''
  161. if row['SGTI'] != '':
  162. ss = row['SGTI']
  163. else:
  164. ss = 'senza titolo'
  165. line = triple(e73placeHolder, schemaCoords.prefix + 'label',
  166. '\"Opera d\'arte raffigurante ' + ss + '\"') + closeLine
  167. output.write(line)
  168. # E73 - P2 - E55
  169. tt = ''
  170. typeLabel = ''
  171. if row['OGTD'] == 'dipinto':
  172. tt = aatCoords.prefix + "300033618"
  173. elif row['OGTD'] == 'rilievo':
  174. tt = aatCoords.prefix + "300047230"
  175. elif row['OGTD'] == 'polittico':
  176. tt = aatCoords.prefix + "300178235"
  177. elif row['OGTD'] == 'predella':
  178. tt = aatCoords.prefix + "300003745"
  179. line = triple(e73placeHolder,
  180. cidocCoords.prefix + 'P2_has_type',
  181. tt) + closeLine
  182. output.write(line)
  183. line = triple(tt, schemaCoords.prefix + 'label',
  184. '\"' + row['OGTD'] + '\"') + closeLine
  185. output.write(line)
  186. # E73 - P1 - E35
  187. if row['SGTT'] != '':
  188. line = triple(e73placeHolder, cidocCoords.prefix + 'P1_is_identified_by', e35placeHolder1) + closeLine
  189. output.write(line)
  190. line = triple(e35placeHolder1, nsCoords.prefix + 'type', cidocCoords.prefix + 'E35_Title') + closeLine
  191. output.write(line)
  192. line = triple(e35placeHolder1, schemaCoords.prefix + 'label', '\"' + row['SGTT'] + '\"') + closeLine
  193. output.write(line)
  194. # E22 - P62 - E1
  195. if row['SGTI'] != '':
  196. line = triple(datplaceHolder,
  197. cidocCoords.prefix + 'P62_depicts',
  198. e1placeHolder) + closeLine
  199. output.write(line)
  200. line = triple(e1placeHolder,
  201. nsCoords.prefix + 'type',
  202. cidocCoords.prefix + 'E1_CRM_Entity') + closeLine
  203. output.write(line)
  204. line = triple(e1placeHolder,
  205. schemaCoords.prefix + 'label', '\"' +
  206. row['SGTI'] + '\"') + closeLine
  207. output.write(line)
  208. line = triple(e1placeHolder,
  209. cidocCoords.prefix + 'P2_has_type',
  210. '\"Identificazione Iconografica\"') + closeLine
  211. output.write(line)
  212. # Attention: these triples are identified only for C100005 Museo di Palazzo Pretorio
  213. if row['ESC'] == 'C100005':
  214. line = triple(datplaceHolder,
  215. cidocCoords.prefix + 'P52_has_current_owner',
  216. '<https://palazzopretorio.prato.it/it/>') + closeLine
  217. output.write(line)
  218. line = triple('<https://palazzopretorio.prato.it/it/>',
  219. nsCoords.prefix + 'type',
  220. cidocCoords.prefix + 'E74_Group') + closeLine
  221. output.write(line)
  222. line = triple('<https://palazzopretorio.prato.it/it/>',
  223. schemaCoords.prefix + 'label',
  224. '\"Museo di Palazzo Pretorio\"') + closeLine
  225. output.write(line)
  226. currentLocation = ''
  227. # E22 - P54 - E53
  228. if row['LDCN'] != '':
  229. if row['LDCS'] != '':
  230. currentLocation = row['LDCS']
  231. else:
  232. currentLocation = currentLocation
  233. if row['LDCM'] != '':
  234. currentLocation = currentLocation + ', ' + row['LDCM']
  235. else:
  236. currentLocation = currentLocation
  237. if row['LDCN'] != '':
  238. currentLocation = currentLocation + ', ' + row['LDCN']
  239. else:
  240. currentLocation = currentLocation
  241. currentLocation = currentLocation + ', ' + row['PVCC'] + ' (' + row['PVCP'] + ')'
  242. line = triple(datplaceHolder, cidocCoords.prefix + 'P54_has_current_permanent_location',
  243. '\"' + currentLocation + '\"') + closeLine
  244. output.write(line)
  245. e12FplaceHolder = ''
  246. if row['DTSI'] != row['DTSF']:
  247. e12FplaceHolder = museoCoords.prefix + url + '_E12F'
  248. # Write E12 Production -- should exist for every entry?
  249. # E12 P108 E22
  250. line = triple(e12placeHolder, cidocCoords.prefix + 'P108_has_produced', datplaceHolder) + closeLine
  251. output.write(line)
  252. line = triple(e12placeHolder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E12_Production') + closeLine
  253. output.write(line)
  254. # E73 P108i E12
  255. line = triple(e73placeHolder, cidocCoords.prefix + 'P108i_was_produced_by', e12placeHolder) + closeLine
  256. output.write(line)
  257. if e12FplaceHolder != '':
  258. line = triple(e12FplaceHolder, cidocCoords.prefix + 'P108_has_produced', datplaceHolder) + closeLine
  259. output.write(line)
  260. line = triple(e12FplaceHolder, nsCoords.prefix + 'type',
  261. cidocCoords.prefix + 'E12_Production') + closeLine
  262. output.write(line)
  263. line = triple(e12FplaceHolder, schemaCoords.prefix + 'label',
  264. '\"Fine produzione di ' + row['SGTI'] + '\"') + closeLine
  265. output.write(line)
  266. # E73 P108i E12
  267. line = triple(e73placeHolder, cidocCoords.prefix + 'P108i_was_produced_by', e12FplaceHolder) + closeLine
  268. output.write(line)
  269. # E12 P140i E13
  270. line = triple(e12FplaceHolder,
  271. cidocCoords.prefix + 'P140i_was_attributed_by',
  272. e13placeHolder) + closeLine
  273. output.write(line)
  274. # E12 P2
  275. line = triple(e12FplaceHolder,
  276. cidocCoords.prefix + 'P2_has_type',
  277. '\"Fine\"^^xsd:string') + closeLine
  278. output.write(line)
  279. line = triple(e12placeHolder,
  280. cidocCoords.prefix + 'P2_has_type',
  281. '\"Inizio\"^^xsd:string') + closeLine
  282. output.write(line)
  283. line = triple(e12placeHolder, schemaCoords.prefix + 'label',
  284. '\"Inizio produzione di ' + row['SGTI'] + '\"') + closeLine
  285. output.write(line)
  286. else:
  287. line = triple(e12placeHolder, schemaCoords.prefix + 'label',
  288. '\"Produzione di ' + row['SGTI'] + '\"') + closeLine
  289. output.write(line)
  290. tcl = []
  291. for name in columnName:
  292. if 'TCL' in name:
  293. tcl.append(name)
  294. # E12 - P7 - E53
  295. for el in tcl:
  296. i = 0
  297. if row[el] == 'luogo di produzione':
  298. pl = ''
  299. if i == 0:
  300. pl = row['PRVC']
  301. else:
  302. pl = row['PRVC' + i]
  303. line = triple(e12placeHolder,
  304. cidocCoords.prefix + 'P7_took_place_at',
  305. museoCoords.prefix + pl) + closeLine
  306. output.write(line)
  307. if e12FplaceHolder != '':
  308. line = triple(e12FplaceHolder,
  309. cidocCoords.prefix + 'P7_took_place_at',
  310. museoCoords.prefix + pl) + closeLine
  311. output.write(line)
  312. i = i + 1
  313. # E12 - PC14 - E21
  314. if row['AUTH'] != '':
  315. aut = get_aut_url(row['AUTH'])
  316. aut_url = aut[0]
  317. aut_role = aut[1]
  318. ll = row['AUTN'] + '_' + aut_role
  319. lab = ll.replace(' ', '')
  320. label = lab.replace(',', '')
  321. AuthorPlaceholder = autCoords.prefix + aut_url
  322. line = triple(museoCoords.prefix + '_' + label,
  323. cidocCoords.prefix + 'P01_has_domain',
  324. e12placeHolder) + closeLine
  325. output.write(line)
  326. if e12FplaceHolder != '':
  327. line = triple(museoCoords.prefix + '_' + label,
  328. cidocCoords.prefix + 'P01_has_domain',
  329. e12FplaceHolder) + closeLine
  330. output.write(line)
  331. if 'AUTH1' in columnName:
  332. if row['AUTH1'] != '':
  333. aut = get_aut_url(row['AUTH1'])
  334. aut_url = aut[0]
  335. aut_role = aut[1]
  336. ll = row['AUTN1'] + '_' + aut_role
  337. lab = ll.replace(' ', '')
  338. label = lab.replace(',', '')
  339. AuthorPlaceholder = autCoords.prefix + aut_url
  340. line = triple(museoCoords.prefix + '_' + label,
  341. cidocCoords.prefix + 'P01_has_domain',
  342. e12placeHolder) + closeLine
  343. output.write(line)
  344. if e12FplaceHolder != '':
  345. line = triple(museoCoords.prefix + '_' + label,
  346. cidocCoords.prefix + 'P01_has_domain',
  347. e12FplaceHolder) + closeLine
  348. output.write(line)
  349. # E12 - PC14 - E21
  350. if 'CMMN' in columnName:
  351. if row['CMMN'] != '':
  352. cc = row['CMMN']
  353. cm = cc.replace(' ', '')
  354. cmmn = cm.replace(',', '')
  355. cmmPlaceholder = museoCoords.prefix + '_' + cmmn
  356. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  357. cidocCoords.prefix + 'P01_has_domain',
  358. e12placeHolder) + closeLine
  359. output.write(line)
  360. if e12FplaceHolder != '':
  361. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  362. cidocCoords.prefix + 'P01_has_domain',
  363. e12FplaceHolder) + closeLine
  364. output.write(line)
  365. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  366. nsCoords.prefix + 'type',
  367. cidocCoords.prefix + 'PC14_carried_out_by') + closeLine
  368. output.write(line)
  369. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  370. schemaCoords.prefix + 'label',
  371. '\"' + row['CMMN'] + ' nel ruolo di committente\"') + closeLine
  372. output.write(line)
  373. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  374. cidocCoords.prefix + 'P02_has_range',
  375. cmmPlaceholder) + closeLine
  376. output.write(line)
  377. line = triple(cmmPlaceholder,
  378. nsCoords.prefix + 'type',
  379. cidocCoords.prefix + 'E39_Actor') + closeLine
  380. output.write(line)
  381. line = triple(cmmPlaceholder,
  382. schemaCoords.prefix + 'label',
  383. '\"' + row['CMMN'] + '\"') + closeLine
  384. output.write(line)
  385. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  386. cidocCoords.prefix + 'P14.1_in_the_role_of',
  387. museoCoords.prefix + '_client') + closeLine
  388. output.write(line)
  389. line = triple(museoCoords.prefix + '_client',
  390. nsCoords.prefix + 'type',
  391. cidocCoords.prefix + 'E55_Type') + closeLine
  392. output.write(line)
  393. line = triple(museoCoords.prefix + '_client',
  394. schemaCoords.prefix + 'label',
  395. '\"Committente\"') + closeLine
  396. output.write(line)
  397. # E12 - P4 - E52
  398. if row['DTSI'] != '':
  399. line = triple(e12placeHolder,
  400. cidocCoords.prefix + 'P4_has_time-span',
  401. museoCoords.prefix + row['DTSI']) + closeLine
  402. output.write(line)
  403. line = triple(museoCoords.prefix + row['DTSI'],
  404. nsCoords.prefix + 'type',
  405. cidocCoords.prefix + 'E52_Time-Span') + closeLine
  406. output.write(line)
  407. line = triple(museoCoords.prefix + row['DTSI'],
  408. schemaCoords.prefix + 'label',
  409. '\"' + row['DTSI'] + '\"') + closeLine
  410. output.write(line)
  411. if e12FplaceHolder != '':
  412. line = triple(e12FplaceHolder,
  413. cidocCoords.prefix + 'P4_has_time-span',
  414. museoCoords.prefix + row['DTSF']) + closeLine
  415. output.write(line)
  416. line = triple(museoCoords.prefix + row['DTSF'],
  417. nsCoords.prefix + 'type',
  418. cidocCoords.prefix + 'E52_Time-Span') + closeLine
  419. output.write(line)
  420. line = triple(museoCoords.prefix + row['DTSF'],
  421. schemaCoords.prefix + 'label',
  422. '\"' + row['DTSF'] + '\"') + closeLine
  423. output.write(line)
  424. tcl = []
  425. for name in columnName:
  426. if 'TCL' in name:
  427. tcl.append(name)
  428. j = 0
  429. for el in tcl:
  430. if row[el] != '':
  431. j = j + 1
  432. last = str(j - 1)
  433. n = len(tcl) - 1
  434. for i in range(n):
  435. k = str(i + 1)
  436. if i + 1 == 1:
  437. w = ''
  438. else:
  439. w = i
  440. f = str(w)
  441. if row['TCL' + k] != '':
  442. pastActor = ''
  443. newActor = ''
  444. pl = ''
  445. if row['PRCD' + k] != '':
  446. newActor = ' a ' + row['PRCD' + k]
  447. if row['PRCD' + f] != '':
  448. pastActor = ' da ' + row['PRCD' + f]
  449. pl = row['PRCD' + f].replace(' ', '')
  450. newe10placeHolder = museoCoords.prefix + url + "_E10_" + k
  451. line = triple(newe10placeHolder,
  452. cidocCoords.prefix + 'P30_transferred_custody_of',
  453. datplaceHolder) + closeLine
  454. output.write(line)
  455. line = triple(newe10placeHolder,
  456. nsCoords.prefix + 'type',
  457. cidocCoords.prefix + 'E10_Transfer_of_Custody') + closeLine
  458. output.write(line)
  459. line = triple(newe10placeHolder,
  460. schemaCoords.prefix + 'label',
  461. '\"Passaggio di ' + row['SGTI'] + pastActor +
  462. newActor + '\"') + closeLine
  463. output.write(line)
  464. if row['PRDI' + f] != '':
  465. timespan = row['PRDI' + f]
  466. tt = timespan.replace(' ', '')
  467. tp = tt.replace('.', '')
  468. ts = tp.replace('/', '')
  469. timespanPlaceholder = museoCoords.prefix + '_' + ts
  470. # E10 P4 E52
  471. line = triple(newe10placeHolder,
  472. cidocCoords.prefix + 'P4_has_time-span',
  473. timespanPlaceholder) + closeLine
  474. output.write(line)
  475. line = triple(timespanPlaceholder,
  476. nsCoords.prefix + 'type',
  477. cidocCoords.prefix + 'E52_Time-Span') + closeLine
  478. output.write(line)
  479. line = triple(timespanPlaceholder,
  480. schemaCoords.prefix + 'label',
  481. '\"' + timespan + '\"') + closeLine
  482. output.write(line)
  483. pastActorPlaceholder = museoCoords.prefix + '_' + pl
  484. newLoc = row['PRCD' + k].replace(' ', '')
  485. newActorPlaceholder = museoCoords.prefix + '_' + newLoc
  486. # E10 P26 E74 (moved to)
  487. if newActorPlaceholder != '':
  488. line = triple(newe10placeHolder,
  489. cidocCoords.prefix + 'P29_custody_received_by',
  490. newActorPlaceholder) + closeLine
  491. output.write(line)
  492. # E10 P27 E74
  493. pastActorLabel = row['PRCD' + f]
  494. line = triple(newe10placeHolder,
  495. cidocCoords.prefix + 'P28_custody_surrendered_by',
  496. pastActorPlaceholder) + closeLine
  497. output.write(line)
  498. line = triple(pastActorPlaceholder,
  499. nsCoords.prefix + 'type',
  500. cidocCoords.prefix + 'E39_Actor') + closeLine
  501. output.write(line)
  502. line = triple(pastActorPlaceholder,
  503. schemaCoords.prefix + 'label',
  504. '\"' + pastActorLabel + '\"') + closeLine
  505. output.write(line)
  506. line = triple(datplaceHolder,
  507. cidocCoords.prefix + 'P49_has_former_or_current_keeper',
  508. pastActorPlaceholder) + closeLine
  509. output.write(line)
  510. # E74 P74 E53
  511. pastResidenceLabel = row['PRVC' + f]
  512. pastResidencePlaceHolder = museoCoords.prefix + row['PRVC' + f]
  513. line = triple(pastActorPlaceholder,
  514. cidocCoords.prefix + 'P74_has_current_or_former_residence',
  515. pastResidencePlaceHolder) + closeLine
  516. output.write(line)
  517. ####
  518. pastActor = ''
  519. newActor = ''
  520. pl = ''
  521. if row['LDCN'] != '':
  522. newActor = ' a ' + row['LDCN']
  523. if row['PRCD' + last] != '':
  524. pastActor = ' da ' + row['PRCD' + last]
  525. pl = row['PRCD' + last].replace(' ', '')
  526. line = triple(e10placeHolder,
  527. cidocCoords.prefix + 'P30_transferred_custody_of',
  528. datplaceHolder) + closeLine
  529. output.write(line)
  530. line = triple(e10placeHolder,
  531. nsCoords.prefix + 'type',
  532. cidocCoords.prefix + 'E10_Transfer_of_Custody') + closeLine
  533. output.write(line)
  534. line = triple(e10placeHolder,
  535. schemaCoords.prefix + 'label',
  536. '\"Passaggio di ' + row['SGTI'] + pastActor +
  537. newActor + '\"') + closeLine
  538. output.write(line)
  539. if row['PRDU' + last] != '':
  540. timespan = row['PRDU' + last]
  541. tt = timespan.replace(' ', '')
  542. ts = tt.replace('/', '')
  543. timespanPlaceholder = museoCoords.prefix + '_' + ts
  544. # E10 P4 E52
  545. line = triple(e10placeHolder,
  546. cidocCoords.prefix + 'P4_has_time-span',
  547. timespanPlaceholder) + closeLine
  548. output.write(line)
  549. line = triple(timespanPlaceholder,
  550. nsCoords.prefix + 'type',
  551. cidocCoords.prefix + 'E52_Time-Span') + closeLine
  552. output.write(line)
  553. line = triple(timespanPlaceholder,
  554. schemaCoords.prefix + 'label',
  555. '\"' + timespan + '\"') + closeLine
  556. output.write(line)
  557. pastActorPlaceholder = museoCoords.prefix + '_' + pl
  558. newLocPlaceholder = e74placeHolder
  559. # E10 P26 E74 (moved to)
  560. if newLocPlaceholder != '':
  561. line = triple(e10placeHolder,
  562. cidocCoords.prefix + 'P29_custody_received_by',
  563. newLocPlaceholder) + closeLine
  564. output.write(line)
  565. # E10 P27 E74
  566. pastActorLabel = row['PRCD' + last]
  567. line = triple(e10placeHolder,
  568. cidocCoords.prefix + 'P28_custody_surrendered_by',
  569. pastActorPlaceholder) + closeLine
  570. output.write(line)
  571. line = triple(pastActorPlaceholder,
  572. nsCoords.prefix + 'type',
  573. cidocCoords.prefix + 'E39_Actor') + closeLine
  574. output.write(line)
  575. line = triple(pastActorPlaceholder,
  576. schemaCoords.prefix + 'label',
  577. '\"' + pastActorLabel + '\"') + closeLine
  578. output.write(line)
  579. line = triple(datplaceHolder,
  580. cidocCoords.prefix + 'P49_has_former_or_current_keeper',
  581. pastActorPlaceholder) + closeLine
  582. output.write(line)
  583. # E74 P74 E53
  584. pastResidenceLabel = row['PRVC' + last]
  585. pastResidencePlaceHolder = museoCoords.prefix + row['PRVC' + last]
  586. if row['PRVP' + last] != '':
  587. pastResidenceLabel = pastResidenceLabel + ' (' + row['PRVP' + last] + ')'
  588. if row['PRVR' + last] != '':
  589. pastResidenceLabel = pastResidenceLabel + ', ' + row['PRVR' + last]
  590. if row['PRVS' + last] != '':
  591. pastResidenceLabel = pastResidenceLabel + ', ' + row['PRVS' + last]
  592. line = triple(pastActorPlaceholder,
  593. cidocCoords.prefix + 'P74_has_current_or_former_residence',
  594. pastResidencePlaceHolder) + closeLine
  595. output.write(line)
  596. line = triple(pastResidencePlaceHolder,
  597. nsCoords.prefix + 'type',
  598. cidocCoords.prefix + 'E53_Place') + closeLine
  599. output.write(line)
  600. # E22 P44 E3
  601. if row['STCC'] != '':
  602. line = triple(datplaceHolder,
  603. cidocCoords.prefix + 'P44_has_condition',
  604. e3placeHolder) + closeLine
  605. output.write(line)
  606. line = triple(e3placeHolder,
  607. nsCoords.prefix + 'type',
  608. cidocCoords.prefix + 'E3_Condition_State') + closeLine
  609. output.write(line)
  610. line = triple(e3placeHolder,
  611. schemaCoords.prefix + 'label',
  612. '\"Condizione di: ' + row['SGTI'] + '\"') + closeLine
  613. output.write(line)
  614. line = triple(e3placeHolder,
  615. cidocCoords.prefix + 'P2_has_type',
  616. '\"' + row['STCC'] + '\"') + closeLine
  617. output.write(line)
  618. # E22 P65 E34
  619. if (row['ISRI'] != ''):
  620. line = triple(datplaceHolder,
  621. cidocCoords.prefix + 'P56_bears_feature',
  622. e25placeHolder) + closeLine
  623. output.write(line)
  624. line = triple(e25placeHolder,
  625. nsCoords.prefix + 'type',
  626. cidocCoords.prefix + 'E25_Man-Made_Feature') + closeLine
  627. output.write(line)
  628. line = triple(e25placeHolder,
  629. schemaCoords.prefix + 'label',
  630. '\"Iscrizione su ' + subj + '\"') + closeLine
  631. output.write(line)
  632. line = triple(e25placeHolder,
  633. cidocCoords.prefix + 'P128_carries',
  634. e34placeHolder) + closeLine
  635. output.write(line)
  636. line = triple(e34placeHolder,
  637. nsCoords.prefix + 'type',
  638. cidocCoords.prefix + 'E34_Inscription') + closeLine
  639. output.write(line)
  640. line = triple(e34placeHolder,
  641. schemaCoords.prefix + 'label',
  642. '\"Iscrizione: ' + row['ISRI'] + '\"') + closeLine
  643. output.write(line)
  644. pl = row['ISRI'].replace(' ', '-')
  645. pla = pl.replace('.', '')
  646. line = triple(e34placeHolder,
  647. cidocCoords.prefix + 'P3_has_note',
  648. '\"' + row['ISRI'] + '\"') + closeLine
  649. output.write(line)
  650. # E34 P2 E55
  651. if (row['ISRT'] != ''):
  652. rr = row['ISRT'].replace(' ', '')
  653. line = triple(e34placeHolder,
  654. cidocCoords.prefix + 'P2_has_type',
  655. '\"' + row['ISRT'] + '\"') + closeLine
  656. output.write(line)
  657. # E34 P72 E56
  658. if (row['ISRL'] != ''):
  659. line = triple(e34placeHolder,
  660. cidocCoords.prefix + 'P72_has_language',
  661. museoCoords.prefix + '_' + row['ISRL']) + closeLine
  662. output.write(line)
  663. line = triple(museoCoords.prefix + '_' + row['ISRL'],
  664. nsCoords.prefix + 'type',
  665. cidocCoords.prefix + 'E56_Language') + closeLine
  666. output.write(line)
  667. line = triple(museoCoords.prefix + '_' + row['ISRL'],
  668. schemaCoords.prefix + 'label',
  669. '\"' + row['ISRL'] + '\"') + closeLine
  670. output.write(line)
  671. if (row['ISRA'] != '') or (row['ISRS'] != ''):
  672. line = triple(e34placeHolder,
  673. cidocCoords.prefix + 'P92i_was_brought_into_existence_by',
  674. e65placeHolder) + closeLine
  675. output.write(line)
  676. line = triple(e65placeHolder,
  677. nsCoords.prefix + 'type',
  678. cidocCoords.prefix + 'E65_Creation') + closeLine
  679. output.write(line)
  680. line = triple(e65placeHolder,
  681. schemaCoords.prefix + 'label',
  682. '\"Creazione dell\'Iscrizione ' + row['ISRI'] + '\"') + closeLine
  683. output.write(line)
  684. if row['ISRA'] != '':
  685. line = triple(e65placeHolder,
  686. cidocCoords.prefix + 'P14_carried_out_by',
  687. e21placeHolder) + closeLine
  688. output.write(line)
  689. line = triple(e21placeHolder,
  690. nsCoords.prefix + 'type',
  691. cidocCoords.prefix + 'E21_Person') + closeLine
  692. output.write(line)
  693. line = triple(e21placeHolder,
  694. schemaCoords.prefix + 'label',
  695. '\"' + row['ISRA'] + '\"') + closeLine
  696. output.write(line)
  697. if row['ISRS']:
  698. ss = row['ISRS'].replace(' ', '')
  699. tecPlaceholder = museoCoords.prefix + url + '_' + ss
  700. line = triple(e65placeHolder,
  701. cidocCoords.prefix + 'P32_used_general_technique',
  702. tecPlaceholder) + closeLine
  703. output.write(line)
  704. line = triple(tecPlaceholder,
  705. nsCoords.prefix + 'type',
  706. cidocCoords.prefix + 'E55_Type') + closeLine
  707. output.write(line)
  708. line = triple(tecPlaceholder,
  709. schemaCoords.prefix + 'label',
  710. '\"' + row['ISRS'] + '\"') + closeLine
  711. output.write(line)
  712. if row['ISRP'] != '':
  713. line = triple(e25placeHolder,
  714. cidocCoords.prefix + 'P3_has_note',
  715. '\"' + row['ISRP'] + '\"^^xsd:string') + closeLine
  716. output.write(line)
  717. unit = ''
  718. if (row['MISU'] != ''):
  719. unit = row['MISU']
  720. valueA = ''
  721. valueL = ''
  722. if (row['MISA'] != ''):
  723. value = row['MISA']
  724. valueA = value.replace(',', 'v')
  725. if (row['MISL'] != ''):
  726. value = row['MISL']
  727. valueL = value.replace(',', 'v')
  728. # Altezza
  729. # E22 P43 E54
  730. if (row['MISA'] != ''):
  731. line = triple(datplaceHolder,
  732. cidocCoords.prefix + 'P43_has_dimension',
  733. museoCoords.prefix + url + '_Altezza') + closeLine
  734. output.write(line)
  735. line = triple(museoCoords.prefix + url + '_Altezza',
  736. nsCoords.prefix + 'type',
  737. cidocCoords.prefix + 'E54_Dimension') + closeLine
  738. output.write(line)
  739. line = triple(museoCoords.prefix + url + '_Altezza',
  740. schemaCoords.prefix + 'label',
  741. '\"Altezza: ' + row['MISA'] + row['MISU'] + '\"') + closeLine
  742. output.write(line)
  743. # E54 P90 E60
  744. line = triple(museoCoords.prefix + url + '_Altezza',
  745. cidocCoords.prefix + 'P90_has_value',
  746. '\"' + row['MISA'] + '\"^^xsd:integer') + closeLine
  747. output.write(line)
  748. # E54 P2 E55
  749. line = triple(museoCoords.prefix + url + '_Altezza',
  750. cidocCoords.prefix + 'P2_has_type',
  751. aatCoords.prefix + '300055644') + closeLine
  752. output.write(line)
  753. line = triple(aatCoords.prefix + '300055644',
  754. schemaCoords.prefix + 'label',
  755. '\"altezza\"') + closeLine
  756. output.write(line)
  757. # E54 P91 E58
  758. if (row['MISU'] != ''):
  759. line = triple(museoCoords.prefix + url + '_Altezza',
  760. cidocCoords.prefix + 'P91_has_unit',
  761. aatCoords.prefix + '300379098') + closeLine
  762. output.write(line)
  763. line = triple(aatCoords.prefix + '300379098',
  764. nsCoords.prefix + 'type',
  765. cidocCoords.prefix + 'E58_Measurement_Unit') + closeLine
  766. output.write(line)
  767. line = triple(aatCoords.prefix + '300379098',
  768. schemaCoords.prefix + 'label',
  769. '\"' + row['MISU'] + '\"') + closeLine
  770. output.write(line)
  771. # Larghezza
  772. # E22 P43 E54
  773. if (row['MISL'] != ''):
  774. line = triple(datplaceHolder,
  775. cidocCoords.prefix + 'P43_has_dimension',
  776. museoCoords.prefix + url + '_Larghezza') + closeLine
  777. output.write(line)
  778. line = triple(museoCoords.prefix + url + '_Larghezza',
  779. nsCoords.prefix + 'type',
  780. cidocCoords.prefix + 'E54_Dimension') + closeLine
  781. output.write(line)
  782. line = triple(museoCoords.prefix + url + '_Larghezza',
  783. schemaCoords.prefix + 'label',
  784. '\"Larghezza: ' + row['MISL'] + row['MISU'] + '\"') + closeLine
  785. output.write(line)
  786. # E54 P90 E60
  787. line = triple(museoCoords.prefix + url + '_Larghezza',
  788. cidocCoords.prefix + 'P90_has_value',
  789. '\"' + row['MISL'] + '\"^^xsd:integer') + closeLine
  790. output.write(line)
  791. # E54 P2 E55
  792. line = triple(museoCoords.prefix + url + '_Larghezza',
  793. cidocCoords.prefix + 'P2_has_type',
  794. aatCoords.prefix + '300055647') + closeLine
  795. output.write(line)
  796. line = triple(aatCoords.prefix + '300055647',
  797. schemaCoords.prefix + 'label',
  798. '\"larghezza\"') + closeLine
  799. output.write(line)
  800. # E54 P91 E58
  801. if (row['MISU'] != ''):
  802. line = triple(museoCoords.prefix + url + '_Larghezza',
  803. cidocCoords.prefix + 'P91_has_unit',
  804. aatCoords.prefix + '300379098') + closeLine
  805. output.write(line)
  806. line = triple(aatCoords.prefix + '300379098',
  807. nsCoords.prefix + 'type',
  808. cidocCoords.prefix + 'E58_Measurement_Unit') + closeLine
  809. output.write(line)
  810. line = triple(aatCoords.prefix + '300379098',
  811. schemaCoords.prefix + 'label',
  812. '\"' + row['MISU'] + '\"') + closeLine
  813. output.write(line)
  814. if row['MTC'] != '':
  815. mtcs = []
  816. if '/' in row['MTC']:
  817. mtcs = row['MTC'].split('/')
  818. else:
  819. mtcs.append(row['MTC'])
  820. for tc in mtcs:
  821. mtc = tc.lstrip()
  822. el = get_elem(mtc)
  823. if el[1] == 'MTC/M':
  824. line = triple(datplaceHolder,
  825. cidocCoords.prefix + 'P45_consists_of',
  826. aatCoords.prefix + el[0]) + closeLine
  827. output.write(line)
  828. line = triple(aatCoords.prefix + el[0],
  829. nsCoords.prefix + 'type',
  830. cidocCoords.prefix + 'E57_Material') + closeLine
  831. output.write(line)
  832. line = triple(aatCoords.prefix + el[0],
  833. schemaCoords.prefix + 'label',
  834. '\"' + mtc + '\"') + closeLine
  835. output.write(line)
  836. else: #E12 Production - P32 used technique - E55 Type
  837. line = triple(e12placeHolder,
  838. cidocCoords.prefix + 'P32_used_general_technique',
  839. aatCoords.prefix + el[0]) + closeLine
  840. output.write(line)
  841. if e12FplaceHolder != '':
  842. line = triple(e12FplaceHolder,
  843. cidocCoords.prefix + 'P32_used_general_technique',
  844. aatCoords.prefix + el[0]) + closeLine
  845. output.write(line)
  846. line = triple(aatCoords.prefix + el[0],
  847. nsCoords.prefix + 'type',
  848. cidocCoords.prefix + 'E55_Type') + closeLine
  849. output.write(line)
  850. line = triple(aatCoords.prefix + el[0],
  851. schemaCoords.prefix + 'label',
  852. '\"' + mtc + '\"') + closeLine
  853. output.write(line)
  854. # E12 P140i E13
  855. if row['AUTM'] != '':
  856. mot = row['AUTM'].replace(' ', '_')
  857. e55placeHolder = museoCoords.prefix + url + '_' + mot
  858. line = triple(e12placeHolder,
  859. cidocCoords.prefix + 'P140i_was_attributed_by',
  860. e13placeHolder) + closeLine
  861. output.write(line)
  862. line = triple(e13placeHolder,
  863. nsCoords.prefix + 'type',
  864. cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine
  865. output.write(line)
  866. line = triple(e13placeHolder,
  867. schemaCoords.prefix + 'label',
  868. '\"Motivazione attribuzione\"') + closeLine
  869. output.write(line)
  870. line = triple(e13placeHolder,
  871. cidocCoords.prefix + 'P2_has_type',
  872. '\"' + row['AUTM'] + '\"') + closeLine
  873. output.write(line)
  874. aut = get_aut_url(row['AUTH'])
  875. aut_url = aut[0]
  876. AuthorPlaceholder = autCoords.prefix + aut_url
  877. line = triple(e13placeHolder,
  878. cidocCoords.prefix + 'P141_assigned',
  879. AuthorPlaceholder) + closeLine
  880. output.write(line)
  881. # E22 P44 E62
  882. if row['NSC'] != '':
  883. ph = row['NSC'].replace(' "', ' «')
  884. phr = ph.replace('"', '»')
  885. line = triple(datplaceHolder,
  886. cidocCoords.prefix + 'P3_has_note',
  887. '\"' + phr + '\"^^xsd:string') + closeLine
  888. output.write(line)
  889. iconclass = row['DESI']
  890. icon = iconclass.replace(' ', '')
  891. list_icon = []
  892. if ':' in icon:
  893. list_icon = icon.split(':')
  894. else:
  895. list_icon.append(icon)
  896. for ic in list_icon:
  897. siglaiconclass = str(ic)
  898. #url = 'http://iconclass.org/rdk/' + str(ic)
  899. #html = urlopen(url).read()
  900. #soup = BeautifulSoup(html, 'html.parser')
  901. # kill all script and style elements
  902. #for script in soup(["script", "style"]):
  903. # script.extract() # rip it out
  904. # get text
  905. #text = soup.get_text()
  906. #pretty = soup.prettify()
  907. #ff = soup.find("div", {"id": "ic_current"})
  908. #dd = ff.find("a", {"class", "ic_notation"})
  909. #ss = dd.text
  910. #x = ss.find(' ')
  911. #icon_label = ss[x + 1:]
  912. siglaiconclass = ic.replace("%28", "(",)
  913. siglaiconclassok = siglaiconclass.replace("%29", ")")
  914. siglaiconclassokplaceHolder = '<http://iconclass.org/rdk/' + siglaiconclassok + '>'
  915. line = triple(datplaceHolder,
  916. cidocCoords.prefix + 'P62_depicts',
  917. siglaiconclassokplaceHolder) + closeLine
  918. output.write(line)
  919. line = triple(siglaiconclassokplaceHolder,
  920. nsCoords.prefix + 'type',
  921. cidocCoords.prefix + 'E1_CRM_Entity') + closeLine
  922. output.write(line)
  923. line = triple(siglaiconclassokplaceHolder,
  924. schemaCoords.prefix + 'label',
  925. '\"Sigla Iconclass: ' + siglaiconclassok + '\"' ) + closeLine
  926. output.write(line)
  927. # P2 Opera d'arte
  928. line = triple(datplaceHolder,
  929. cidocCoords.prefix + 'P2_has_type',
  930. '\"Opera d\'Arte\"') + closeLine
  931. output.write(line)
  932. output.write('\n')
  933. #
  934. #
  935. # Limit number of entries processed (if desired)
  936. if (ii > max_entries):
  937. break