CSV_to_RDF_Martini.py 47 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103
  1. # Utilities to read/write csv files
  2. import csv
  3. # Utilities to handle character encodings
  4. import unicodedata
  5. # Ordered Dicts
  6. from collections import OrderedDict
  7. from urllib.request import urlopen
  8. from bs4 import BeautifulSoup
  9. import json
  10. # OPZIONAL IMPORTS
  11. # For timestamping/simple speed tests
  12. from datetime import datetime
  13. # Random number generator
  14. from random import *
  15. # System & command line utilities
  16. import sys
  17. # Json for the dictionary
  18. import json
  19. import_dir = '/Users/alessiaspadi/Documents/RESTORE/temp_MPP/tabelle/Martini/mod/'
  20. export_dir = '/Users/alessiaspadi/Documents/RESTORE/temp_MPP/tabelle/Carica/'
  21. # Custom class to store URIs + related infos for the ontologies/repositories
  22. class RDFcoords:
  23. def __init__(self, uri, prefix, code=None):
  24. self.uri = uri
  25. self.prefix = prefix
  26. self.code = code
  27. # Repositories
  28. museoCoords = RDFcoords('<http://palazzopretorio.comune.prato.it/it/le-opere/alcuni-capolavori/>', 'mpp:')
  29. autCoords = RDFcoords('<http://palazzopretorio.comune.prato.it/it/opere/autori/>', 'aut:')
  30. cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')
  31. aatCoords = RDFcoords('<http://vocab.getty.edu/aat/>', 'aat:')
  32. nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')
  33. schemaCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')
  34. xsdCoords = RDFcoords('<http://www.w3.org/2001/XMLSchema#>', 'xsd:')
  35. iconCoords = RDFcoords('<http://iconclass.org/>', 'ico:')
  36. # Basic functions for triples / shortened triples in TTL format
  37. def triple(subject, predicate, object1):
  38. line = subject + ' ' + predicate + ' ' + object1
  39. return line
  40. def doublet(predicate, object1):
  41. line = ' ' + predicate + ' ' + object1
  42. return line
  43. def singlet(object1):
  44. line = ' ' + object1
  45. return line
  46. # Line endings in TTL format
  47. continueLine1 = ' ;\n'
  48. continueLine2 = ' ,\n'
  49. closeLine = ' .\n'
  50. def writeTTLHeader(output):
  51. output.write('@prefix ' + museoCoords.prefix + ' ' + museoCoords.uri + closeLine)
  52. output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)
  53. output.write('@prefix ' + aatCoords.prefix + ' ' + aatCoords.uri + closeLine)
  54. output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)
  55. output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)
  56. output.write('@prefix ' + autCoords.prefix + ' ' + autCoords.uri + closeLine)
  57. output.write('@prefix ' + xsdCoords.prefix + ' ' + xsdCoords.uri + closeLine)
  58. output.write('@prefix ' + iconCoords.prefix + ' ' + iconCoords.uri + closeLine)
  59. output.write('\n')
  60. filePrefix = 'SR20OA_'
  61. fileType = 'Martini'
  62. max_entries = 1000000000
  63. def get_aut_url(code):
  64. aut_file = open(import_dir + 'AR20AUT_' + fileType + '.csv', newline="")
  65. reader = csv.DictReader(aut_file)
  66. for row in reader:
  67. role = ''
  68. if row['AUTQ'] != '':
  69. role = row['AUTQ']
  70. else:
  71. role = ''
  72. if row['AUTH'] == code:
  73. return [row['URL'], role]
  74. def get_role(role):
  75. role_file = open('/Users/alessiaspadi/Documents/RESTORE/temp_MPP/tabelle/AAT_RUOLI.csv', newline="")
  76. reader = csv.DictReader(role_file)
  77. for row in reader:
  78. if row['Label'] == role:
  79. return row['AAT']
  80. def get_elem(mtc):
  81. mtc_file = open('/Users/alessiaspadi/Documents/RESTORE/temp_MPP/tabelle/AAT_MTC.csv', newline="")
  82. reader = csv.DictReader(mtc_file)
  83. for row in reader:
  84. if row['MTC'] == mtc:
  85. return [row['AAT'], row['Type']]
  86. with open(import_dir + filePrefix + fileType + '.csv', newline="") as csv_file, open(
  87. export_dir + filePrefix + fileType + '.ttl', 'w') as output:
  88. reader = csv.DictReader(csv_file)
  89. writeTTLHeader(output)
  90. first = True
  91. ii = 0
  92. for row in reader:
  93. # The index ii is used to process a limited number of entries for testing purposes
  94. ii = ii + 1
  95. if row['RVEL'] == '' or row['RVEL'] == '0':
  96. sb = ''
  97. subj = ''
  98. pp = row['OGTD'] + ' (' + row['ACC'] + ') '
  99. if row['SGTI'] != '':
  100. sb = pp + row['SGTI']
  101. if row['LDCN'] != '':
  102. subj = sb + ' in ' + row['LDCN']
  103. else:
  104. subj = sb
  105. # Triplify the 'codice' -- should exist for every entry
  106. codice = ''
  107. if (row['NCTR'] != '' and row['NCTN'] != ''):
  108. codice = row['NCTR'] + row['NCTN']
  109. codiceP = ''
  110. if (row['AUTH'] != ''):
  111. codiceP = row['AUTH']
  112. place = ''
  113. if (row['PRVC'] != ''):
  114. place = row['PRVC']
  115. columnName = list(row)
  116. url = row['URL']
  117. # placeHolders
  118. datplaceHolder = museoCoords.prefix + url
  119. e1placeHolder = museoCoords.prefix + url + '_E1'
  120. e3placeHolder = museoCoords.prefix + url + 'E3'
  121. e10placeHolder = museoCoords.prefix + url + '_E10'
  122. e12placeHolder = museoCoords.prefix + url + '_E12'
  123. e13placeHolder = museoCoords.prefix + url + '_E13'
  124. e21placeHolder = museoCoords.prefix + url + '_InE21'
  125. e25placeHolder = museoCoords.prefix + url + '_E25'
  126. e34placeHolder = museoCoords.prefix + url + '_E34'
  127. e35placeHolder1 = museoCoords.prefix + url + '_E35'
  128. e42placeHolder = museoCoords.prefix + url + '_E42'
  129. e42CplaceHolder = museoCoords.prefix + url + '_E42_MPP'
  130. e65placeHolder = museoCoords.prefix + url + '_InE65'
  131. e73placeHolder = museoCoords.prefix + url + '_E73'
  132. e74placeHolder = museoCoords.prefix + url + '_E74'
  133. if (codice != ''):
  134. line = triple(datplaceHolder, cidocCoords.prefix + 'P1_is_identified_by', e42placeHolder) + closeLine
  135. output.write(line)
  136. line = triple(e42placeHolder, nsCoords.prefix + 'type',
  137. cidocCoords.prefix + 'E42_Identifier') + closeLine
  138. output.write(line)
  139. line = triple(e42placeHolder,
  140. schemaCoords.prefix + 'label',
  141. '\"' + codice + '\"') + closeLine
  142. output.write(line)
  143. ###
  144. line = triple(e42placeHolder,
  145. cidocCoords.prefix + 'P2_has_type',
  146. '\"Codice univoco del bene (NCT)\"') + closeLine
  147. output.write(line)
  148. # Write E22 Man Made Object & E73 Information Object -- should exist for every entry?
  149. line = triple(datplaceHolder, nsCoords.prefix + 'type',
  150. cidocCoords.prefix + 'E22_Man-Made_Object') + closeLine
  151. output.write(line)
  152. # Added by AS
  153. line = triple(datplaceHolder, schemaCoords.prefix + 'label', '\"' + subj + '\"') + closeLine
  154. output.write(line)
  155. # End AS
  156. line = triple(datplaceHolder, cidocCoords.prefix + 'P128_carries', e73placeHolder) + closeLine
  157. output.write(line)
  158. line = triple(e73placeHolder, nsCoords.prefix + 'type',
  159. cidocCoords.prefix + 'E73_Information_Object') + closeLine
  160. output.write(line)
  161. # AS
  162. ss = ''
  163. if row['SGTI'] != '':
  164. ss = row['SGTI']
  165. else:
  166. ss = 'senza titolo'
  167. line = triple(e73placeHolder, schemaCoords.prefix + 'label',
  168. '\"Opera d\'arte raffigurante ' + ss + '\"') + closeLine
  169. output.write(line)
  170. # E73 - P2 - E55
  171. tt = ''
  172. typeLabel = ''
  173. if row['OGTD'] == 'dipinto':
  174. tt = aatCoords.prefix + "300033618"
  175. elif row['OGTD'] == 'rilievo':
  176. tt = aatCoords.prefix + "300047230"
  177. elif row['OGTD'] == 'polittico':
  178. tt = aatCoords.prefix + "300178235"
  179. elif row['OGTD'] == 'predella':
  180. tt = aatCoords.prefix + "300003745"
  181. line = triple(e73placeHolder,
  182. cidocCoords.prefix + 'P2_has_type',
  183. tt) + closeLine
  184. output.write(line)
  185. line = triple(tt, schemaCoords.prefix + 'label',
  186. '\"' + row['OGTD'] + '\"') + closeLine
  187. output.write(line)
  188. # E73 - P1 - E35
  189. if row['SGTT'] != '':
  190. line = triple(e73placeHolder, cidocCoords.prefix + 'P1_is_identified_by', e35placeHolder1) + closeLine
  191. output.write(line)
  192. line = triple(e35placeHolder1, nsCoords.prefix + 'type', cidocCoords.prefix + 'E35_Title') + closeLine
  193. output.write(line)
  194. line = triple(e35placeHolder1, schemaCoords.prefix + 'label', '\"' + row['SGTT'] + '\"') + closeLine
  195. output.write(line)
  196. # E22 - P62 - E1
  197. if row['SGTI'] != '':
  198. line = triple(datplaceHolder,
  199. cidocCoords.prefix + 'P62_depicts',
  200. e1placeHolder) + closeLine
  201. output.write(line)
  202. line = triple(e1placeHolder,
  203. nsCoords.prefix + 'type',
  204. cidocCoords.prefix + 'E1_CRM_Entity') + closeLine
  205. output.write(line)
  206. line = triple(e1placeHolder,
  207. schemaCoords.prefix + 'label', '\"' +
  208. row['SGTI'] + '\"') + closeLine
  209. output.write(line)
  210. line = triple(e1placeHolder,
  211. cidocCoords.prefix + 'P2_has_type',
  212. '\"Identificazione Iconografica\"') + closeLine
  213. output.write(line)
  214. # Attention: these triples are identified only for C100005 Museo di Palazzo Pretorio
  215. if row['ESC'] == 'C100005':
  216. line = triple(datplaceHolder,
  217. cidocCoords.prefix + 'P52_has_current_owner',
  218. '<http://palazzopretorio.comune.prato.it/it/>') + closeLine
  219. output.write(line)
  220. line = triple('<http://palazzopretorio.comune.prato.it/it/>',
  221. nsCoords.prefix + 'type',
  222. cidocCoords.prefix + 'E74_Group') + closeLine
  223. output.write(line)
  224. line = triple('<http://palazzopretorio.comune.prato.it/it/>',
  225. schemaCoords.prefix + 'label',
  226. '\"Museo di Palazzo Pretorio\"') + closeLine
  227. output.write(line)
  228. currentLocation = ''
  229. # E22 - P54 - E53
  230. if row['LDCN'] != '':
  231. if row['LDCS'] != '':
  232. currentLocation = row['LDCS']
  233. else:
  234. currentLocation = currentLocation
  235. if row['LDCM'] != '':
  236. currentLocation = currentLocation + ', ' + row['LDCM']
  237. else:
  238. currentLocation = currentLocation
  239. if row['LDCN'] != '':
  240. currentLocation = currentLocation + ', ' + row['LDCN']
  241. else:
  242. currentLocation = currentLocation
  243. currentLocation = currentLocation + ', ' + row['PVCC'] + ' (' + row['PVCP'] + ')'
  244. line = triple(datplaceHolder, cidocCoords.prefix + 'P54_has_current_permanent_location',
  245. '\"' + currentLocation + '\"') + closeLine
  246. output.write(line)
  247. e12FplaceHolder = ''
  248. if row['DTSI'] != row['DTSF']:
  249. e12FplaceHolder = museoCoords.prefix + url + '_E12F'
  250. # Write E12 Production -- should exist for every entry?
  251. # E12 P108 E22
  252. line = triple(e12placeHolder, cidocCoords.prefix + 'P108_has_produced', datplaceHolder) + closeLine
  253. output.write(line)
  254. line = triple(e12placeHolder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E12_Production') + closeLine
  255. output.write(line)
  256. # E73 P108i E12
  257. line = triple(e73placeHolder, cidocCoords.prefix + 'P108i_was_produced_by', e12placeHolder) + closeLine
  258. output.write(line)
  259. if e12FplaceHolder != '':
  260. line = triple(e12FplaceHolder, cidocCoords.prefix + 'P108_has_produced', datplaceHolder) + closeLine
  261. output.write(line)
  262. line = triple(e12FplaceHolder, nsCoords.prefix + 'type',
  263. cidocCoords.prefix + 'E12_Production') + closeLine
  264. output.write(line)
  265. line = triple(e12FplaceHolder, schemaCoords.prefix + 'label',
  266. '\"Fine produzione di ' + row['SGTI'] + '\"') + closeLine
  267. output.write(line)
  268. # E73 P108i E12
  269. line = triple(e73placeHolder, cidocCoords.prefix + 'P108i_was_produced_by', e12FplaceHolder) + closeLine
  270. output.write(line)
  271. # E12 P140i E13
  272. line = triple(e12FplaceHolder,
  273. cidocCoords.prefix + 'P140i_was_attributed_by',
  274. e13placeHolder) + closeLine
  275. output.write(line)
  276. # E12 P2
  277. line = triple(e12FplaceHolder,
  278. cidocCoords.prefix + 'P2_has_type',
  279. '\"Fine\"^^xsd:string') + closeLine
  280. output.write(line)
  281. line = triple(e12placeHolder,
  282. cidocCoords.prefix + 'P2_has_type',
  283. '\"Inizio\"^^xsd:string') + closeLine
  284. output.write(line)
  285. line = triple(e12placeHolder, schemaCoords.prefix + 'label',
  286. '\"Inizio produzione di ' + row['SGTI'] + '\"') + closeLine
  287. output.write(line)
  288. else:
  289. line = triple(e12placeHolder, schemaCoords.prefix + 'label',
  290. '\"Produzione di ' + row['SGTI'] + '\"') + closeLine
  291. output.write(line)
  292. tcl = []
  293. for name in columnName:
  294. if 'TCL' in name:
  295. tcl.append(name)
  296. # E12 - P7 - E53
  297. for el in tcl:
  298. i = 0
  299. if row[el] == 'luogo di produzione':
  300. pl = ''
  301. if i == 0:
  302. pl = row['PRVC']
  303. else:
  304. pl = row['PRVC' + i]
  305. line = triple(e12placeHolder,
  306. cidocCoords.prefix + 'P7_took_place_at',
  307. museoCoords.prefix + pl) + closeLine
  308. output.write(line)
  309. if e12FplaceHolder != '':
  310. line = triple(e12FplaceHolder,
  311. cidocCoords.prefix + 'P7_took_place_at',
  312. museoCoords.prefix + pl) + closeLine
  313. output.write(line)
  314. i = i + 1
  315. # E12 - PC14 - E21
  316. if row['AUTH'] != '':
  317. aut = get_aut_url(row['AUTH'])
  318. aut_url = aut[0]
  319. aut_role = aut[1]
  320. ll = row['AUTN'] + '_' + aut_role
  321. lab = ll.replace(' ', '')
  322. label = lab.replace(',', '')
  323. AuthorPlaceholder = autCoords.prefix + aut_url
  324. line = triple(museoCoords.prefix + '_' + label,
  325. cidocCoords.prefix + 'P01_has_domain',
  326. e12placeHolder) + closeLine
  327. output.write(line)
  328. if e12FplaceHolder != '':
  329. line = triple(museoCoords.prefix + '_' + label,
  330. cidocCoords.prefix + 'P01_has_domain',
  331. e12FplaceHolder) + closeLine
  332. output.write(line)
  333. if 'AUTH1' in columnName:
  334. if row['AUTH1'] != '':
  335. aut = get_aut_url(row['AUTH1'])
  336. aut_url = aut[0]
  337. aut_role = aut[1]
  338. ll = row['AUTN1'] + '_' + aut_role
  339. lab = ll.replace(' ', '')
  340. label = lab.replace(',', '')
  341. AuthorPlaceholder = autCoords.prefix + aut_url
  342. line = triple(museoCoords.prefix + '_' + label,
  343. cidocCoords.prefix + 'P01_has_domain',
  344. e12placeHolder) + closeLine
  345. output.write(line)
  346. if e12FplaceHolder != '':
  347. line = triple(museoCoords.prefix + '_' + label,
  348. cidocCoords.prefix + 'P01_has_domain',
  349. e12FplaceHolder) + closeLine
  350. output.write(line)
  351. # E12 - PC14 - E21
  352. if 'CMMN' in columnName:
  353. if row['CMMN'] != '':
  354. cc = row['CMMN']
  355. cm = cc.replace(' ', '')
  356. cmmn = cm.replace(',', '')
  357. cmmPlaceholder = museoCoords.prefix + '_' + cmmn
  358. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  359. cidocCoords.prefix + 'P01_has_domain',
  360. e12placeHolder) + closeLine
  361. output.write(line)
  362. if e12FplaceHolder != '':
  363. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  364. cidocCoords.prefix + 'P01_has_domain',
  365. e12FplaceHolder) + closeLine
  366. output.write(line)
  367. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  368. nsCoords.prefix + 'type',
  369. cidocCoords.prefix + 'PC14_carried_out_by') + closeLine
  370. output.write(line)
  371. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  372. schemaCoords.prefix + 'label',
  373. '\"' + row['CMMN'] + ' nel ruolo di committente\"') + closeLine
  374. output.write(line)
  375. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  376. cidocCoords.prefix + 'P02_has_range',
  377. cmmPlaceholder) + closeLine
  378. output.write(line)
  379. line = triple(cmmPlaceholder,
  380. nsCoords.prefix + 'type',
  381. cidocCoords.prefix + 'E39_Actor') + closeLine
  382. output.write(line)
  383. line = triple(cmmPlaceholder,
  384. schemaCoords.prefix + 'label',
  385. '\"' + row['CMMN'] + '\"') + closeLine
  386. output.write(line)
  387. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  388. cidocCoords.prefix + 'P14.1_in_the_role_of',
  389. museoCoords.prefix + '_client') + closeLine
  390. output.write(line)
  391. line = triple(museoCoords.prefix + '_client',
  392. nsCoords.prefix + 'type',
  393. cidocCoords.prefix + 'E55_Type') + closeLine
  394. output.write(line)
  395. line = triple(museoCoords.prefix + '_client',
  396. schemaCoords.prefix + 'label',
  397. '\"Committente\"') + closeLine
  398. output.write(line)
  399. # E12 - P4 - E52
  400. if row['DTSI'] != '':
  401. line = triple(e12placeHolder,
  402. cidocCoords.prefix + 'P4_has_time-span',
  403. museoCoords.prefix + row['DTSI']) + closeLine
  404. output.write(line)
  405. line = triple(museoCoords.prefix + row['DTSI'],
  406. nsCoords.prefix + 'type',
  407. cidocCoords.prefix + 'E52_Time-Span') + closeLine
  408. output.write(line)
  409. line = triple(museoCoords.prefix + row['DTSI'],
  410. schemaCoords.prefix + 'label',
  411. '\"' + row['DTSI'] + '\"') + closeLine
  412. output.write(line)
  413. if e12FplaceHolder != '':
  414. line = triple(e12FplaceHolder,
  415. cidocCoords.prefix + 'P4_has_time-span',
  416. museoCoords.prefix + row['DTSF']) + closeLine
  417. output.write(line)
  418. line = triple(museoCoords.prefix + row['DTSF'],
  419. nsCoords.prefix + 'type',
  420. cidocCoords.prefix + 'E52_Time-Span') + closeLine
  421. output.write(line)
  422. line = triple(museoCoords.prefix + row['DTSF'],
  423. schemaCoords.prefix + 'label',
  424. '\"' + row['DTSF'] + '\"') + closeLine
  425. output.write(line)
  426. tcl = []
  427. for name in columnName:
  428. if 'TCL' in name:
  429. tcl.append(name)
  430. j = 0
  431. for el in tcl:
  432. if row[el] != '':
  433. j = j + 1
  434. last = str(j - 1)
  435. n = len(tcl) - 1
  436. for i in range(n):
  437. k = str(i + 1)
  438. if i + 1 == 1:
  439. w = ''
  440. else:
  441. w = i
  442. f = str(w)
  443. if row['TCL' + k] != '':
  444. pastActor = ''
  445. newActor = ''
  446. pl = ''
  447. if row['PRCD' + k] != '':
  448. newActor = ' a ' + row['PRCD' + k]
  449. if row['PRCD' + f] != '':
  450. pastActor = ' da ' + row['PRCD' + f]
  451. pl = row['PRCD' + f].replace(' ', '')
  452. newe10placeHolder = museoCoords.prefix + url + "_E10_" + k
  453. line = triple(newe10placeHolder,
  454. cidocCoords.prefix + 'P30_transferred_custody_of',
  455. datplaceHolder) + closeLine
  456. output.write(line)
  457. line = triple(newe10placeHolder,
  458. nsCoords.prefix + 'type',
  459. cidocCoords.prefix + 'E10_Transfer_of_Custody') + closeLine
  460. output.write(line)
  461. line = triple(newe10placeHolder,
  462. schemaCoords.prefix + 'label',
  463. '\"Passaggio di ' + row['SGTI'] + pastActor +
  464. newActor + '\"') + closeLine
  465. output.write(line)
  466. if row['PRDI' + f] != '':
  467. timespan = row['PRDI' + f]
  468. tt = timespan.replace(' ', '')
  469. tp = tt.replace('.', '')
  470. ts = tp.replace('/', '')
  471. timespanPlaceholder = museoCoords.prefix + '_' + ts
  472. # E10 P4 E52
  473. line = triple(newe10placeHolder,
  474. cidocCoords.prefix + 'P4_has_time-span',
  475. timespanPlaceholder) + closeLine
  476. output.write(line)
  477. line = triple(timespanPlaceholder,
  478. nsCoords.prefix + 'type',
  479. cidocCoords.prefix + 'E52_Time-Span') + closeLine
  480. output.write(line)
  481. line = triple(timespanPlaceholder,
  482. schemaCoords.prefix + 'label',
  483. '\"' + timespan + '\"') + closeLine
  484. output.write(line)
  485. pastActorPlaceholder = museoCoords.prefix + '_' + pl
  486. newLoc = row['PRCD' + k].replace(' ', '')
  487. newActorPlaceholder = museoCoords.prefix + '_' + newLoc
  488. # E10 P26 E74 (moved to)
  489. if newActorPlaceholder != '':
  490. line = triple(newe10placeHolder,
  491. cidocCoords.prefix + 'P29_custody_received_by',
  492. newActorPlaceholder) + closeLine
  493. output.write(line)
  494. # E10 P27 E74
  495. pastActorLabel = row['PRCD' + f]
  496. line = triple(newe10placeHolder,
  497. cidocCoords.prefix + 'P28_custody_surrendered_by',
  498. pastActorPlaceholder) + closeLine
  499. output.write(line)
  500. line = triple(pastActorPlaceholder,
  501. nsCoords.prefix + 'type',
  502. cidocCoords.prefix + 'E39_Actor') + closeLine
  503. output.write(line)
  504. line = triple(pastActorPlaceholder,
  505. schemaCoords.prefix + 'label',
  506. '\"' + pastActorLabel + '\"') + closeLine
  507. output.write(line)
  508. line = triple(datplaceHolder,
  509. cidocCoords.prefix + 'P49_has_former_or_current_keeper',
  510. pastActorPlaceholder) + closeLine
  511. output.write(line)
  512. # E74 P74 E53
  513. pastResidenceLabel = row['PRVC' + f]
  514. pastResidencePlaceHolder = museoCoords.prefix + row['PRVC' + f]
  515. line = triple(pastActorPlaceholder,
  516. cidocCoords.prefix + 'P74_has_current_or_former_residence',
  517. pastResidencePlaceHolder) + closeLine
  518. output.write(line)
  519. ####
  520. pastActor = ''
  521. newActor = ''
  522. pl = ''
  523. if row['LDCN'] != '':
  524. newActor = ' a ' + row['LDCN']
  525. if row['PRCD' + last] != '':
  526. pastActor = ' da ' + row['PRCD' + last]
  527. pl = row['PRCD' + last].replace(' ', '')
  528. line = triple(e10placeHolder,
  529. cidocCoords.prefix + 'P30_transferred_custody_of',
  530. datplaceHolder) + closeLine
  531. output.write(line)
  532. line = triple(e10placeHolder,
  533. nsCoords.prefix + 'type',
  534. cidocCoords.prefix + 'E10_Transfer_of_Custody') + closeLine
  535. output.write(line)
  536. line = triple(e10placeHolder,
  537. schemaCoords.prefix + 'label',
  538. '\"Passaggio di ' + row['SGTI'] + pastActor +
  539. newActor + '\"') + closeLine
  540. output.write(line)
  541. if row['PRDU' + last] != '':
  542. timespan = row['PRDU' + last]
  543. tt = timespan.replace(' ', '')
  544. ts = tt.replace('/', '')
  545. timespanPlaceholder = museoCoords.prefix + '_' + ts
  546. # E10 P4 E52
  547. line = triple(e10placeHolder,
  548. cidocCoords.prefix + 'P4_has_time-span',
  549. timespanPlaceholder) + closeLine
  550. output.write(line)
  551. line = triple(timespanPlaceholder,
  552. nsCoords.prefix + 'type',
  553. cidocCoords.prefix + 'E52_Time-Span') + closeLine
  554. output.write(line)
  555. line = triple(timespanPlaceholder,
  556. schemaCoords.prefix + 'label',
  557. '\"' + timespan + '\"') + closeLine
  558. output.write(line)
  559. pastActorPlaceholder = museoCoords.prefix + '_' + pl
  560. newLocPlaceholder = e74placeHolder
  561. # E10 P26 E74 (moved to)
  562. if newLocPlaceholder != '':
  563. line = triple(e10placeHolder,
  564. cidocCoords.prefix + 'P29_custody_received_by',
  565. newLocPlaceholder) + closeLine
  566. output.write(line)
  567. # E10 P27 E74
  568. pastActorLabel = row['PRCD' + last]
  569. line = triple(e10placeHolder,
  570. cidocCoords.prefix + 'P28_custody_surrendered_by',
  571. pastActorPlaceholder) + closeLine
  572. output.write(line)
  573. line = triple(pastActorPlaceholder,
  574. nsCoords.prefix + 'type',
  575. cidocCoords.prefix + 'E39_Actor') + closeLine
  576. output.write(line)
  577. line = triple(pastActorPlaceholder,
  578. schemaCoords.prefix + 'label',
  579. '\"' + pastActorLabel + '\"') + closeLine
  580. output.write(line)
  581. line = triple(datplaceHolder,
  582. cidocCoords.prefix + 'P49_has_former_or_current_keeper',
  583. pastActorPlaceholder) + closeLine
  584. output.write(line)
  585. # E74 P74 E53
  586. pastResidenceLabel = row['PRVC' + last]
  587. pastResidencePlaceHolder = museoCoords.prefix + row['PRVC' + last]
  588. if row['PRVP' + last] != '':
  589. pastResidenceLabel = pastResidenceLabel + ' (' + row['PRVP' + last] + ')'
  590. if row['PRVR' + last] != '':
  591. pastResidenceLabel = pastResidenceLabel + ', ' + row['PRVR' + last]
  592. if row['PRVS' + last] != '':
  593. pastResidenceLabel = pastResidenceLabel + ', ' + row['PRVS' + last]
  594. line = triple(pastActorPlaceholder,
  595. cidocCoords.prefix + 'P74_has_current_or_former_residence',
  596. pastResidencePlaceHolder) + closeLine
  597. output.write(line)
  598. line = triple(pastResidencePlaceHolder,
  599. nsCoords.prefix + 'type',
  600. cidocCoords.prefix + 'E53_Place') + closeLine
  601. output.write(line)
  602. # E22 P44 E3
  603. if row['STCC'] != '':
  604. line = triple(datplaceHolder,
  605. cidocCoords.prefix + 'P44_has_condition',
  606. e3placeHolder) + closeLine
  607. output.write(line)
  608. line = triple(e3placeHolder,
  609. nsCoords.prefix + 'type',
  610. cidocCoords.prefix + 'E3_Condition_State') + closeLine
  611. output.write(line)
  612. line = triple(e3placeHolder,
  613. schemaCoords.prefix + 'label',
  614. '\"Condizione di: ' + row['SGTI'] + '\"') + closeLine
  615. output.write(line)
  616. line = triple(e3placeHolder,
  617. cidocCoords.prefix + 'P2_has_type',
  618. '\"' + row['STCC'] + '\"') + closeLine
  619. output.write(line)
  620. # E22 P65 E34
  621. if (row['ISRI'] != ''):
  622. line = triple(datplaceHolder,
  623. cidocCoords.prefix + 'P56_bears_feature',
  624. e25placeHolder) + closeLine
  625. output.write(line)
  626. line = triple(e25placeHolder,
  627. nsCoords.prefix + 'type',
  628. cidocCoords.prefix + 'E25_Man-Made_Feature') + closeLine
  629. output.write(line)
  630. line = triple(e25placeHolder,
  631. schemaCoords.prefix + 'label',
  632. '\"Iscrizione su ' + subj + '\"') + closeLine
  633. output.write(line)
  634. line = triple(e25placeHolder,
  635. cidocCoords.prefix + 'P128_carries',
  636. e34placeHolder) + closeLine
  637. output.write(line)
  638. line = triple(e34placeHolder,
  639. nsCoords.prefix + 'type',
  640. cidocCoords.prefix + 'E34_Inscription') + closeLine
  641. output.write(line)
  642. line = triple(e34placeHolder,
  643. schemaCoords.prefix + 'label',
  644. '\"Iscrizione: ' + row['ISRI'] + '\"') + closeLine
  645. output.write(line)
  646. pl = row['ISRI'].replace(' ', '-')
  647. pla = pl.replace('.', '')
  648. line = triple(e34placeHolder,
  649. cidocCoords.prefix + 'P3_has_note',
  650. '\"' + row['ISRI'] + '\"') + closeLine
  651. output.write(line)
  652. # E34 P2 E55
  653. if (row['ISRT'] != ''):
  654. rr = row['ISRT'].replace(' ', '')
  655. line = triple(e34placeHolder,
  656. cidocCoords.prefix + 'P2_has_type',
  657. '\"' + row['ISRT'] + '\"') + closeLine
  658. output.write(line)
  659. # E34 P72 E56
  660. if (row['ISRL'] != ''):
  661. line = triple(e34placeHolder,
  662. cidocCoords.prefix + 'P72_has_language',
  663. museoCoords.prefix + '_' + row['ISRL']) + closeLine
  664. output.write(line)
  665. line = triple(museoCoords.prefix + '_' + row['ISRL'],
  666. nsCoords.prefix + 'type',
  667. cidocCoords.prefix + 'E56_Language') + closeLine
  668. output.write(line)
  669. line = triple(museoCoords.prefix + '_' + row['ISRL'],
  670. schemaCoords.prefix + 'label',
  671. '\"' + row['ISRL'] + '\"') + closeLine
  672. output.write(line)
  673. if (row['ISRA'] != '') or (row['ISRS'] != ''):
  674. line = triple(e34placeHolder,
  675. cidocCoords.prefix + 'P92i_was_brought_into_existence_by',
  676. e65placeHolder) + closeLine
  677. output.write(line)
  678. line = triple(e65placeHolder,
  679. nsCoords.prefix + 'type',
  680. cidocCoords.prefix + 'E65_Creation') + closeLine
  681. output.write(line)
  682. line = triple(e65placeHolder,
  683. schemaCoords.prefix + 'label',
  684. '\"Creazione dell\'Iscrizione ' + row['ISRI'] + '\"') + closeLine
  685. output.write(line)
  686. if row['ISRA'] != '':
  687. line = triple(e65placeHolder,
  688. cidocCoords.prefix + 'P14_carried_out_by',
  689. e21placeHolder) + closeLine
  690. output.write(line)
  691. line = triple(e21placeHolder,
  692. nsCoords.prefix + 'type',
  693. cidocCoords.prefix + 'E21_Person') + closeLine
  694. output.write(line)
  695. line = triple(e21placeHolder,
  696. schemaCoords.prefix + 'label',
  697. '\"' + row['ISRA'] + '\"') + closeLine
  698. output.write(line)
  699. if row['ISRS']:
  700. ss = row['ISRS'].replace(' ', '')
  701. tecPlaceholder = museoCoords.prefix + url + '_' + ss
  702. line = triple(e65placeHolder,
  703. cidocCoords.prefix + 'P32_used_general_technique',
  704. tecPlaceholder) + closeLine
  705. output.write(line)
  706. line = triple(tecPlaceholder,
  707. nsCoords.prefix + 'type',
  708. cidocCoords.prefix + 'E55_Type') + closeLine
  709. output.write(line)
  710. line = triple(tecPlaceholder,
  711. schemaCoords.prefix + 'label',
  712. '\"' + row['ISRS'] + '\"') + closeLine
  713. output.write(line)
  714. if row['ISRP'] != '':
  715. line = triple(e25placeHolder,
  716. cidocCoords.prefix + 'P3_has_note',
  717. '\"' + row['ISRP'] + '\"^^xsd:string') + closeLine
  718. output.write(line)
  719. unit = ''
  720. if (row['MISU'] != ''):
  721. unit = row['MISU']
  722. valueA = ''
  723. valueL = ''
  724. if (row['MISA'] != ''):
  725. value = row['MISA']
  726. valueA = value.replace(',', 'v')
  727. if (row['MISL'] != ''):
  728. value = row['MISL']
  729. valueL = value.replace(',', 'v')
  730. # Altezza
  731. # E22 P43 E54
  732. if (row['MISA'] != ''):
  733. line = triple(datplaceHolder,
  734. cidocCoords.prefix + 'P43_has_dimension',
  735. museoCoords.prefix + url + '_Altezza') + closeLine
  736. output.write(line)
  737. line = triple(museoCoords.prefix + url + '_Altezza',
  738. nsCoords.prefix + 'type',
  739. cidocCoords.prefix + 'E54_Dimension') + closeLine
  740. output.write(line)
  741. line = triple(museoCoords.prefix + url + '_Altezza',
  742. schemaCoords.prefix + 'label',
  743. '\"Altezza: ' + row['MISA'] + row['MISU'] + '\"') + closeLine
  744. output.write(line)
  745. # E54 P90 E60
  746. line = triple(museoCoords.prefix + url + '_Altezza',
  747. cidocCoords.prefix + 'P90_has_value',
  748. '\"' + row['MISA'] + '\"^^xsd:integer') + closeLine
  749. output.write(line)
  750. # E54 P2 E55
  751. line = triple(museoCoords.prefix + url + '_Altezza',
  752. cidocCoords.prefix + 'P2_has_type',
  753. aatCoords.prefix + '300055644') + closeLine
  754. output.write(line)
  755. line = triple(aatCoords.prefix + '300055644',
  756. schemaCoords.prefix + 'label',
  757. '\"altezza\"') + closeLine
  758. output.write(line)
  759. # E54 P91 E58
  760. if (row['MISU'] != ''):
  761. line = triple(museoCoords.prefix + url + '_Altezza',
  762. cidocCoords.prefix + 'P91_has_unit',
  763. aatCoords.prefix + '300379098') + closeLine
  764. output.write(line)
  765. line = triple(aatCoords.prefix + '300379098',
  766. nsCoords.prefix + 'type',
  767. cidocCoords.prefix + 'E58_Measurement_Unit') + closeLine
  768. output.write(line)
  769. line = triple(aatCoords.prefix + '300379098',
  770. schemaCoords.prefix + 'label',
  771. '\"' + row['MISU'] + '\"') + closeLine
  772. output.write(line)
  773. # Larghezza
  774. # E22 P43 E54
  775. if (row['MISL'] != ''):
  776. line = triple(datplaceHolder,
  777. cidocCoords.prefix + 'P43_has_dimension',
  778. museoCoords.prefix + url + '_Larghezza') + closeLine
  779. output.write(line)
  780. line = triple(museoCoords.prefix + url + '_Larghezza',
  781. nsCoords.prefix + 'type',
  782. cidocCoords.prefix + 'E54_Dimension') + closeLine
  783. output.write(line)
  784. line = triple(museoCoords.prefix + url + '_Larghezza',
  785. schemaCoords.prefix + 'label',
  786. '\"Larghezza: ' + row['MISL'] + row['MISU'] + '\"') + closeLine
  787. output.write(line)
  788. # E54 P90 E60
  789. line = triple(museoCoords.prefix + url + '_Larghezza',
  790. cidocCoords.prefix + 'P90_has_value',
  791. '\"' + row['MISL'] + '\"^^xsd:integer') + closeLine
  792. output.write(line)
  793. # E54 P2 E55
  794. line = triple(museoCoords.prefix + url + '_Larghezza',
  795. cidocCoords.prefix + 'P2_has_type',
  796. aatCoords.prefix + '300055647') + closeLine
  797. output.write(line)
  798. line = triple(aatCoords.prefix + '300055647',
  799. schemaCoords.prefix + 'label',
  800. '\"larghezza\"') + closeLine
  801. output.write(line)
  802. # E54 P91 E58
  803. if (row['MISU'] != ''):
  804. line = triple(museoCoords.prefix + url + '_Larghezza',
  805. cidocCoords.prefix + 'P91_has_unit',
  806. aatCoords.prefix + '300379098') + closeLine
  807. output.write(line)
  808. line = triple(aatCoords.prefix + '300379098',
  809. nsCoords.prefix + 'type',
  810. cidocCoords.prefix + 'E58_Measurement_Unit') + closeLine
  811. output.write(line)
  812. line = triple(aatCoords.prefix + '300379098',
  813. schemaCoords.prefix + 'label',
  814. '\"' + row['MISU'] + '\"') + closeLine
  815. output.write(line)
  816. if row['MTC'] != '':
  817. mtcs = []
  818. if '/' in row['MTC']:
  819. mtcs = row['MTC'].split('/')
  820. else:
  821. mtcs.append(row['MTC'])
  822. for tc in mtcs:
  823. mtc = tc.lstrip()
  824. el = get_elem(mtc)
  825. if el[1] == 'MTC/M':
  826. line = triple(datplaceHolder,
  827. cidocCoords.prefix + 'P45_consists_of',
  828. aatCoords.prefix + el[0]) + closeLine
  829. output.write(line)
  830. line = triple(aatCoords.prefix + el[0],
  831. nsCoords.prefix + 'type',
  832. cidocCoords.prefix + 'E57_Material') + closeLine
  833. output.write(line)
  834. line = triple(aatCoords.prefix + el[0],
  835. schemaCoords.prefix + 'label',
  836. '\"' + mtc + '\"') + closeLine
  837. output.write(line)
  838. else: #E12 Production - P32 used technique - E55 Type
  839. line = triple(e12placeHolder,
  840. cidocCoords.prefix + 'P32_used_general_technique',
  841. aatCoords.prefix + el[0]) + closeLine
  842. output.write(line)
  843. if e12FplaceHolder != '':
  844. line = triple(e12FplaceHolder,
  845. cidocCoords.prefix + 'P32_used_general_technique',
  846. aatCoords.prefix + el[0]) + closeLine
  847. output.write(line)
  848. line = triple(aatCoords.prefix + el[0],
  849. nsCoords.prefix + 'type',
  850. cidocCoords.prefix + 'E55_Type') + closeLine
  851. output.write(line)
  852. line = triple(aatCoords.prefix + el[0],
  853. schemaCoords.prefix + 'label',
  854. '\"' + mtc + '\"') + closeLine
  855. output.write(line)
  856. # E12 P140i E13
  857. if row['AUTM'] != '':
  858. mot = row['AUTM'].replace(' ', '_')
  859. e55placeHolder = museoCoords.prefix + url + '_' + mot
  860. line = triple(e12placeHolder,
  861. cidocCoords.prefix + 'P140i_was_attributed_by',
  862. e13placeHolder) + closeLine
  863. output.write(line)
  864. line = triple(e13placeHolder,
  865. nsCoords.prefix + 'type',
  866. cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine
  867. output.write(line)
  868. line = triple(e13placeHolder,
  869. schemaCoords.prefix + 'label',
  870. '\"Motivazione attribuzione\"') + closeLine
  871. output.write(line)
  872. line = triple(e13placeHolder,
  873. cidocCoords.prefix + 'P2_has_type',
  874. '\"' + row['AUTM'] + '\"') + closeLine
  875. output.write(line)
  876. aut = get_aut_url(row['AUTH'])
  877. aut_url = aut[0]
  878. AuthorPlaceholder = autCoords.prefix + aut_url
  879. line = triple(e13placeHolder,
  880. cidocCoords.prefix + 'P141_assigned',
  881. AuthorPlaceholder) + closeLine
  882. output.write(line)
  883. # E22 P44 E62
  884. if row['NSC'] != '':
  885. ph = row['NSC'].replace(' "', ' «')
  886. phr = ph.replace('"', '»')
  887. line = triple(datplaceHolder,
  888. cidocCoords.prefix + 'P3_has_note',
  889. '\"' + phr + '\"^^xsd:string') + closeLine
  890. output.write(line)
  891. iconclass = row['DESI']
  892. icon = iconclass.replace(' ', '')
  893. list_icon = []
  894. if ':' in icon:
  895. list_icon = icon.split(':')
  896. else:
  897. list_icon.append(icon)
  898. for ic in list_icon:
  899. url = 'http://iconclass.org/rdk/' + str(ic)
  900. html = urlopen(url).read()
  901. soup = BeautifulSoup(html, 'html.parser')
  902. # kill all script and style elements
  903. for script in soup(["script", "style"]):
  904. script.extract() # rip it out
  905. # get text
  906. text = soup.get_text()
  907. pretty = soup.prettify()
  908. ff = soup.find("div", {"id": "ic_current"})
  909. dd = ff.find("a", {"class", "ic_notation"})
  910. ss = dd.text
  911. x = ss.find(' ')
  912. icon_label = ss[x + 1:]
  913. ur = ic.replace("(", "%28")
  914. urr = ur.replace(")", "%29")
  915. line = triple(datplaceHolder,
  916. cidocCoords.prefix + 'P62_depicts',
  917. iconCoords.prefix + urr) + closeLine
  918. output.write(line)
  919. line = triple(iconCoords.prefix + urr,
  920. nsCoords.prefix + 'type',
  921. cidocCoords.prefix + 'E1_CRM_Entity') + closeLine
  922. output.write(line)
  923. output.write('\n')
  924. #
  925. #
  926. # Limit number of entries processed (if desired)
  927. if (ii > max_entries):
  928. break