CSV_to_RDF_Datini.py 43 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091
  1. # Utilities to read/write csv files
  2. import csv
  3. from urllib.request import urlopen
  4. from bs4 import BeautifulSoup
  5. # OPTIONAL IMPORTS
  6. # Random number generator
  7. from random import *
  8. # System & command line utilities
  9. import sys
  10. # Json for the dictionary
  11. import json
  12. import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/MPP/corretti/'
  13. export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/MPP/'
  14. def stronzo():
  15. print('casso')
  16. # Custom class to store URIs + related infos for the ontologies/repositories
  17. class RDFcoords:
  18. def __init__(self, uri, prefix, code=None):
  19. self.uri = uri
  20. self.prefix = prefix
  21. self.code = code
  22. # Repositories
  23. museoCoords = RDFcoords('<http://palazzopretorio.comune.prato.it/it/le-opere/alcuni-capolavori/>', 'mpp:')
  24. autCoords = RDFcoords('<http://palazzopretorio.comune.prato.it/it/opere/autori/>', 'aut:')
  25. cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')
  26. aatCoords = RDFcoords('<http://vocab.getty.edu/aat/>', 'aat:')
  27. nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')
  28. schemaCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')
  29. xsdCoords = RDFcoords('<http://www.w3.org/2001/XMLSchema#>', 'xsd:')
  30. iconCoords = RDFcoords('<http://iconclass.org/>', 'ico:')
  31. # Basic functions for triples / shortened triples in TTL format
  32. def triple(subject, predicate, object1):
  33. line = subject + ' ' + predicate + ' ' + object1
  34. return line
  35. def doublet(predicate, object1):
  36. line = ' ' + predicate + ' ' + object1
  37. return line
  38. def singlet(object1):
  39. line = ' ' + object1
  40. return line
  41. # Line endings in TTL format
  42. continueLine1 = ' ;\n'
  43. continueLine2 = ' ,\n'
  44. closeLine = ' .\n'
  45. def writeTTLHeader(output):
  46. output.write('@prefix ' + museoCoords.prefix + ' ' + museoCoords.uri + closeLine)
  47. output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)
  48. output.write('@prefix ' + aatCoords.prefix + ' ' + aatCoords.uri + closeLine)
  49. output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)
  50. output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)
  51. output.write('@prefix ' + autCoords.prefix + ' ' + autCoords.uri + closeLine)
  52. output.write('@prefix ' + xsdCoords.prefix + ' ' + xsdCoords.uri + closeLine)
  53. output.write('@prefix ' + iconCoords.prefix + ' ' + iconCoords.uri + closeLine)
  54. output.write('\n')
  55. filePrefix = '00_SR20OA_'
  56. fileType = 'Datini'
  57. max_entries = 1000000000
  58. """ def get_aut_url(code):
  59. aut_file = open(import_dir + 'AR20AUT_' + fileType + '.csv', newline="")
  60. reader = csv.DictReader(aut_file)
  61. for row in reader:
  62. auth = int(row['AUTH'])
  63. cod = int(code)
  64. role = ''
  65. if row['AUTQ'] != '':
  66. role = row['AUTQ']
  67. else:
  68. role = ''
  69. if auth == cod:
  70. return [row['URL'], role]
  71. def get_role(role):
  72. role_file = open('/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/MPP/AAT_RUOLI.csv', newline="")
  73. reader = csv.DictReader(role_file)
  74. for row in reader:
  75. if row['Label'] == role:
  76. return row['AAT']
  77. def get_elem(mtc):
  78. mtc_file = open('/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/MPP/AAT_MTC.csv', newline="")
  79. reader = csv.DictReader(mtc_file)
  80. for row in reader:
  81. if row['MTC'] == mtc:
  82. return [row['AAT'], row['Type']] """
  83. """ with open(import_dir + filePrefix + fileType + '.csv', newline="") as csv_file, open(
  84. export_dir + filePrefix + fileType + '.ttl', 'w') as output:
  85. reader = csv.DictReader(csv_file)
  86. writeTTLHeader(output)
  87. first = True
  88. ii = 0
  89. for row in reader:
  90. # The index ii is used to process a limited number of entries for testing purposes
  91. ii = ii + 1
  92. sb = ''
  93. subj = ''
  94. pp = row['OGTD'] + ' (' + row['ACC'] + ') '
  95. if row['SGTI'] != '':
  96. sb = pp + row['SGTI']
  97. if row['LDCN'] != '':
  98. subj = sb + ' in ' + row['LDCN']
  99. else:
  100. subj = sb
  101. # Triplify the 'codice' -- should exist for every entry
  102. codice = ''
  103. if (row['NCTR'] != '' and row['NCTN'] != ''):
  104. codice = row['NCTR'] + row['NCTN']
  105. codiceP = ''
  106. if (row['AUTH'] != ''):
  107. codiceP = row['AUTH']
  108. place = ''
  109. if (row['PRVC'] != ''):
  110. place = row['PRVC']
  111. columnName = list(row)
  112. url = row['URL']
  113. # placeHolders
  114. datplaceHolder = museoCoords.prefix + url
  115. e1placeHolder = museoCoords.prefix + url + '_E1'
  116. e3placeHolder = museoCoords.prefix + url + 'E3'
  117. e10placeHolder = museoCoords.prefix + url + '_E10'
  118. e12placeHolder = museoCoords.prefix + url + '_E12'
  119. e13placeHolder = museoCoords.prefix + url + '_E13'
  120. e21placeHolder = museoCoords.prefix + url + '_InE21'
  121. e25placeHolder = museoCoords.prefix + url + '_E25'
  122. e34placeHolder = museoCoords.prefix + url + '_E34'
  123. e35placeHolder1 = museoCoords.prefix + url + '_E35'
  124. e42placeHolder = museoCoords.prefix + url + '_E42'
  125. e42CplaceHolder = museoCoords.prefix + url + '_E42_MPP'
  126. e65placeHolder = museoCoords.prefix + url + '_InE65'
  127. e73placeHolder = museoCoords.prefix + url + '_E73'
  128. e74placeHolder = museoCoords.prefix + url + '_E74'
  129. if (codice != ''):
  130. line = triple(datplaceHolder, cidocCoords.prefix + 'P1_is_identified_by', e42placeHolder) + closeLine
  131. output.write(line)
  132. line = triple(e42placeHolder, nsCoords.prefix + 'type',
  133. cidocCoords.prefix + 'E42_Identifier') + closeLine
  134. output.write(line)
  135. line = triple(e42placeHolder,
  136. schemaCoords.prefix + 'label',
  137. '\"' + codice + '\"') + closeLine
  138. output.write(line)
  139. ###
  140. line = triple(e42placeHolder,
  141. cidocCoords.prefix + 'P2_has_type',
  142. '\"Codice univoco del bene (NCT)\"') + closeLine
  143. output.write(line)
  144. # Write E22 Man Made Object & E73 Information Object -- should exist for every entry?
  145. line = triple(datplaceHolder, nsCoords.prefix + 'type',
  146. cidocCoords.prefix + 'E22_Man-Made_Object') + closeLine
  147. output.write(line)
  148. # Added by AS
  149. line = triple(datplaceHolder, schemaCoords.prefix + 'label', '\"' + subj + '\"') + closeLine
  150. output.write(line)
  151. # End AS
  152. line = triple(datplaceHolder, cidocCoords.prefix + 'P128_carries', e73placeHolder) + closeLine
  153. output.write(line)
  154. line = triple(e73placeHolder, nsCoords.prefix + 'type',
  155. cidocCoords.prefix + 'E73_Information_Object') + closeLine
  156. output.write(line)
  157. # AS
  158. ss = ''
  159. if row['SGTI'] != '':
  160. ss = row['SGTI']
  161. else:
  162. ss = 'senza titolo'
  163. line = triple(e73placeHolder, schemaCoords.prefix + 'label',
  164. '\"Opera d\'arte raffigurante ' + ss + '\"') + closeLine
  165. output.write(line)
  166. # E73 - P2 - E55
  167. tt = ''
  168. typeLabel = ''
  169. if row['OGTD'] == 'dipinto':
  170. tt = aatCoords.prefix + "300033618"
  171. elif row['OGTD'] == 'rilievo':
  172. tt = aatCoords.prefix + "300047230"
  173. elif row['OGTD'] == 'polittico':
  174. tt = aatCoords.prefix + "300178235"
  175. elif row['OGTD'] == 'predella':
  176. tt = aatCoords.prefix + "300003745"
  177. line = triple(e73placeHolder,
  178. cidocCoords.prefix + 'P2_has_type',
  179. tt) + closeLine
  180. output.write(line)
  181. line = triple(tt, schemaCoords.prefix + 'label',
  182. '\"' + row['OGTD'] + '\"') + closeLine
  183. output.write(line)
  184. # E73 - P1 - E35
  185. if row['SGTT'] != '':
  186. line = triple(e73placeHolder, cidocCoords.prefix + 'P1_is_identified_by', e35placeHolder1) + closeLine
  187. output.write(line)
  188. line = triple(e35placeHolder1, nsCoords.prefix + 'type', cidocCoords.prefix + 'E35_Title') + closeLine
  189. output.write(line)
  190. line = triple(e35placeHolder1, schemaCoords.prefix + 'label', '\"' + row['SGTT'] + '\"') + closeLine
  191. output.write(line)
  192. # E22 - P62 - E1
  193. if row['SGTI'] != '':
  194. line = triple(datplaceHolder,
  195. cidocCoords.prefix + 'P62_depicts',
  196. e1placeHolder) + closeLine
  197. output.write(line)
  198. line = triple(e1placeHolder,
  199. nsCoords.prefix + 'type',
  200. cidocCoords.prefix + 'E1_CRM_Entity') + closeLine
  201. output.write(line)
  202. line = triple(e1placeHolder,
  203. schemaCoords.prefix + 'label', '\"' +
  204. row['SGTI'] + '\"') + closeLine
  205. output.write(line)
  206. line = triple(e1placeHolder,
  207. cidocCoords.prefix + 'P2_has_type',
  208. '\"Identificazione Iconografica\"') + closeLine
  209. output.write(line)
  210. # Attention: these triples are identified only for C100005 Museo di Palazzo Pretorio
  211. if row['ESC'] == 'C100005':
  212. line = triple(datplaceHolder,
  213. cidocCoords.prefix + 'P52_has_current_owner',
  214. '<http://palazzopretorio.comune.prato.it/it/>') + closeLine
  215. output.write(line)
  216. line = triple('<http://palazzopretorio.comune.prato.it/it/>',
  217. nsCoords.prefix + 'type',
  218. cidocCoords.prefix + 'E74_Group') + closeLine
  219. output.write(line)
  220. line = triple('<http://palazzopretorio.comune.prato.it/it/>',
  221. schemaCoords.prefix + 'label',
  222. '\"Museo di Palazzo Pretorio\"') + closeLine
  223. output.write(line)
  224. currentLocation = ''
  225. # E22 - P54 - E53
  226. if row['LDCN'] != '':
  227. if row['LDCS'] != '':
  228. currentLocation = row['LDCS']
  229. else:
  230. currentLocation = currentLocation
  231. if row['LDCM'] != '':
  232. currentLocation = currentLocation + ', ' + row['LDCM']
  233. else:
  234. currentLocation = currentLocation
  235. if row['LDCN'] != '':
  236. currentLocation = currentLocation + ', ' + row['LDCN']
  237. else:
  238. currentLocation = currentLocation
  239. currentLocation = currentLocation + ', ' + row['PVCC'] + ' (' + row['PVCP'] + ')'
  240. line = triple(datplaceHolder, cidocCoords.prefix + 'P54_has_current_permanent_location',
  241. '\"' + currentLocation + '\"') + closeLine
  242. output.write(line)
  243. e12FplaceHolder = ''
  244. if row['DTSI'] != row['DTSF']:
  245. e12FplaceHolder = museoCoords.prefix + url + '_E12F'
  246. # Write E12 Production -- should exist for every entry?
  247. # E12 P108 E22
  248. line = triple(e12placeHolder, cidocCoords.prefix + 'P108_has_produced', datplaceHolder) + closeLine
  249. output.write(line)
  250. line = triple(e12placeHolder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E12_Production') + closeLine
  251. output.write(line)
  252. # E73 P108i E12
  253. line = triple(e73placeHolder, cidocCoords.prefix + 'P108i_was_produced_by', e12placeHolder) + closeLine
  254. output.write(line)
  255. if e12FplaceHolder != '':
  256. line = triple(e12FplaceHolder, cidocCoords.prefix + 'P108_has_produced', datplaceHolder) + closeLine
  257. output.write(line)
  258. line = triple(e12FplaceHolder, nsCoords.prefix + 'type',
  259. cidocCoords.prefix + 'E12_Production') + closeLine
  260. output.write(line)
  261. line = triple(e12FplaceHolder, schemaCoords.prefix + 'label',
  262. '\"Fine produzione di ' + row['SGTI'] + '\"') + closeLine
  263. output.write(line)
  264. # E73 P108i E12
  265. line = triple(e73placeHolder, cidocCoords.prefix + 'P108i_was_produced_by', e12FplaceHolder) + closeLine
  266. output.write(line)
  267. # E12 P140i E13
  268. line = triple(e12FplaceHolder,
  269. cidocCoords.prefix + 'P140i_was_attributed_by',
  270. e13placeHolder) + closeLine
  271. output.write(line)
  272. # E12 P2
  273. line = triple(e12FplaceHolder,
  274. cidocCoords.prefix + 'P2_has_type',
  275. '\"Fine\"^^xsd:string') + closeLine
  276. output.write(line)
  277. line = triple(e12placeHolder,
  278. cidocCoords.prefix + 'P2_has_type',
  279. '\"Inizio\"^^xsd:string') + closeLine
  280. output.write(line)
  281. line = triple(e12placeHolder, schemaCoords.prefix + 'label',
  282. '\"Inizio produzione di ' + row['SGTI'] + '\"') + closeLine
  283. output.write(line)
  284. else:
  285. line = triple(e12placeHolder, schemaCoords.prefix + 'label',
  286. '\"Produzione di ' + row['SGTI'] + '\"') + closeLine
  287. output.write(line)
  288. tcl = []
  289. for name in columnName:
  290. if 'TCL' in name:
  291. tcl.append(name)
  292. # E12 - P7 - E53
  293. for el in tcl:
  294. i = 0
  295. if row[el] == 'luogo di produzione':
  296. pl = ''
  297. if i == 0:
  298. pl = row['PRVC']
  299. else:
  300. pl = row['PRVC' + i]
  301. line = triple(e12placeHolder,
  302. cidocCoords.prefix + 'P7_took_place_at',
  303. museoCoords.prefix + pl) + closeLine
  304. output.write(line)
  305. if e12FplaceHolder != '':
  306. line = triple(e12FplaceHolder,
  307. cidocCoords.prefix + 'P7_took_place_at',
  308. museoCoords.prefix + pl) + closeLine
  309. output.write(line)
  310. i = i + 1
  311. # E12 - PC14 - E21
  312. if row['AUTH'] != '':
  313. aut = get_aut_url(row['AUTH'])
  314. aut_url = aut[0]
  315. aut_role = aut[1]
  316. ll = row['AUTN'] + '_' + aut_role
  317. lab = ll.replace(' ', '')
  318. label = lab.replace(',', '')
  319. AuthorPlaceholder = autCoords.prefix + aut_url
  320. line = triple(museoCoords.prefix + '_' + label,
  321. cidocCoords.prefix + 'P01_has_domain',
  322. e12placeHolder) + closeLine
  323. output.write(line)
  324. if e12FplaceHolder != '':
  325. line = triple(museoCoords.prefix + '_' + label,
  326. cidocCoords.prefix + 'P01_has_domain',
  327. e12FplaceHolder) + closeLine
  328. output.write(line)
  329. if 'AUTH1' in columnName:
  330. if row['AUTH1'] != '':
  331. aut = get_aut_url(row['AUTH1'])
  332. aut_url = aut[0]
  333. aut_role = aut[1]
  334. ll = row['AUTN1'] + '_' + aut_role
  335. lab = ll.replace(' ', '')
  336. label = lab.replace(',', '')
  337. AuthorPlaceholder = autCoords.prefix + aut_url
  338. line = triple(museoCoords.prefix + '_' + label,
  339. cidocCoords.prefix + 'P01_has_domain',
  340. e12placeHolder) + closeLine
  341. output.write(line)
  342. if e12FplaceHolder != '':
  343. line = triple(museoCoords.prefix + '_' + label,
  344. cidocCoords.prefix + 'P01_has_domain',
  345. e12FplaceHolder) + closeLine
  346. output.write(line)
  347. # E12 - PC14 - E21
  348. if 'CMMN' in columnName:
  349. if row['CMMN'] != '':
  350. cc = row['CMMN']
  351. cm = cc.replace(' ', '')
  352. cmmn = cm.replace(',', '')
  353. cmmPlaceholder = museoCoords.prefix + '_' + cmmn
  354. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  355. cidocCoords.prefix + 'P01_has_domain',
  356. e12placeHolder) + closeLine
  357. output.write(line)
  358. if e12FplaceHolder != '':
  359. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  360. cidocCoords.prefix + 'P01_has_domain',
  361. e12FplaceHolder) + closeLine
  362. output.write(line)
  363. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  364. nsCoords.prefix + 'type',
  365. cidocCoords.prefix + 'PC14_carried_out_by') + closeLine
  366. output.write(line)
  367. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  368. schemaCoords.prefix + 'label',
  369. '\"' + row['CMMN'] + ' nel ruolo di committente\"') + closeLine
  370. output.write(line)
  371. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  372. cidocCoords.prefix + 'P02_has_range',
  373. cmmPlaceholder) + closeLine
  374. output.write(line)
  375. line = triple(cmmPlaceholder,
  376. nsCoords.prefix + 'type',
  377. cidocCoords.prefix + 'E39_Actor') + closeLine
  378. output.write(line)
  379. line = triple(cmmPlaceholder,
  380. schemaCoords.prefix + 'label',
  381. '\"' + row['CMMN'] + '\"') + closeLine
  382. output.write(line)
  383. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  384. cidocCoords.prefix + 'P14.1_in_the_role_of',
  385. museoCoords.prefix + '_client') + closeLine
  386. output.write(line)
  387. line = triple(museoCoords.prefix + '_client',
  388. nsCoords.prefix + 'type',
  389. cidocCoords.prefix + 'E55_Type') + closeLine
  390. output.write(line)
  391. line = triple(museoCoords.prefix + '_client',
  392. schemaCoords.prefix + 'label',
  393. '\"Committente\"') + closeLine
  394. output.write(line)
  395. # E12 - P4 - E52
  396. if row['DTSI'] != '':
  397. line = triple(e12placeHolder,
  398. cidocCoords.prefix + 'P4_has_time-span',
  399. museoCoords.prefix + row['DTSI']) + closeLine
  400. output.write(line)
  401. line = triple(museoCoords.prefix + row['DTSI'],
  402. nsCoords.prefix + 'type',
  403. cidocCoords.prefix + 'E52_Time-Span') + closeLine
  404. output.write(line)
  405. line = triple(museoCoords.prefix + row['DTSI'],
  406. schemaCoords.prefix + 'label',
  407. '\"' + row['DTSI'] + '\"') + closeLine
  408. output.write(line)
  409. if e12FplaceHolder != '':
  410. line = triple(e12FplaceHolder,
  411. cidocCoords.prefix + 'P4_has_time-span',
  412. museoCoords.prefix + row['DTSF']) + closeLine
  413. output.write(line)
  414. line = triple(museoCoords.prefix + row['DTSF'],
  415. nsCoords.prefix + 'type',
  416. cidocCoords.prefix + 'E52_Time-Span') + closeLine
  417. output.write(line)
  418. line = triple(museoCoords.prefix + row['DTSF'],
  419. schemaCoords.prefix + 'label',
  420. '\"' + row['DTSF'] + '\"') + closeLine
  421. output.write(line)
  422. tcl = []
  423. for name in columnName:
  424. if 'TCL' in name:
  425. tcl.append(name)
  426. j = 0
  427. for el in tcl:
  428. if row[el] != '':
  429. j = j + 1
  430. last = str(j - 1)
  431. n = len(tcl) - 1
  432. for i in range(n):
  433. k = str(i + 1)
  434. if i + 1 == 1:
  435. w = ''
  436. else:
  437. w = i
  438. f = str(w)
  439. if row['TCL' + k] != '':
  440. pastActor = ''
  441. newActor = ''
  442. pl = ''
  443. if row['PRCD' + k] != '':
  444. newActor = ' a ' + row['PRCD' + k]
  445. if row['PRCD' + f] != '':
  446. pastActor = ' da ' + row['PRCD' + f]
  447. pl = row['PRCD' + f].replace(' ', '')
  448. newe10placeHolder = museoCoords.prefix + url + "_E10_" + k
  449. line = triple(newe10placeHolder,
  450. cidocCoords.prefix + 'P30_transferred_custody_of',
  451. datplaceHolder) + closeLine
  452. output.write(line)
  453. line = triple(newe10placeHolder,
  454. nsCoords.prefix + 'type',
  455. cidocCoords.prefix + 'E10_Transfer_of_Custody') + closeLine
  456. output.write(line)
  457. line = triple(newe10placeHolder,
  458. schemaCoords.prefix + 'label',
  459. '\"Passaggio di ' + row['SGTI'] + pastActor +
  460. newActor + '\"') + closeLine
  461. output.write(line)
  462. if row['PRDI' + f] != '':
  463. timespan = row['PRDI' + f]
  464. tt = timespan.replace(' ', '')
  465. tp = tt.replace('.', '')
  466. ts = tp.replace('/', '')
  467. timespanPlaceholder = museoCoords.prefix + '_' + ts
  468. # E10 P4 E52
  469. line = triple(newe10placeHolder,
  470. cidocCoords.prefix + 'P4_has_time-span',
  471. timespanPlaceholder) + closeLine
  472. output.write(line)
  473. line = triple(timespanPlaceholder,
  474. nsCoords.prefix + 'type',
  475. cidocCoords.prefix + 'E52_Time-Span') + closeLine
  476. output.write(line)
  477. line = triple(timespanPlaceholder,
  478. schemaCoords.prefix + 'label',
  479. '\"' + timespan + '\"') + closeLine
  480. output.write(line)
  481. pastActorPlaceholder = museoCoords.prefix + '_' + pl
  482. newLoc = row['PRCD' + k].replace(' ', '')
  483. newActorPlaceholder = museoCoords.prefix + '_' + newLoc
  484. # E10 P26 E74 (moved to)
  485. if newActorPlaceholder != '':
  486. line = triple(newe10placeHolder,
  487. cidocCoords.prefix + 'P29_custody_received_by',
  488. newActorPlaceholder) + closeLine
  489. output.write(line)
  490. # E10 P27 E74
  491. pastActorLabel = row['PRCD' + f]
  492. line = triple(newe10placeHolder,
  493. cidocCoords.prefix + 'P28_custody_surrendered_by',
  494. pastActorPlaceholder) + closeLine
  495. output.write(line)
  496. line = triple(pastActorPlaceholder,
  497. nsCoords.prefix + 'type',
  498. cidocCoords.prefix + 'E39_Actor') + closeLine
  499. output.write(line)
  500. line = triple(pastActorPlaceholder,
  501. schemaCoords.prefix + 'label',
  502. '\"' + pastActorLabel + '\"') + closeLine
  503. output.write(line)
  504. line = triple(datplaceHolder,
  505. cidocCoords.prefix + 'P49_has_former_or_current_keeper',
  506. pastActorPlaceholder) + closeLine
  507. output.write(line)
  508. # E74 P74 E53
  509. pastResidenceLabel = row['PRVC' + f]
  510. pastResidencePlaceHolder = museoCoords.prefix + row['PRVC' + f]
  511. line = triple(pastActorPlaceholder,
  512. cidocCoords.prefix + 'P74_has_current_or_former_residence',
  513. pastResidencePlaceHolder) + closeLine
  514. output.write(line)
  515. ####
  516. pastActor = ''
  517. newActor = ''
  518. pl = ''
  519. if row['LDCN'] != '':
  520. newActor = ' a ' + row['LDCN']
  521. if row['PRCD' + last] != '':
  522. pastActor = ' da ' + row['PRCD' + last]
  523. pl = row['PRCD' + last].replace(' ', '')
  524. line = triple(e10placeHolder,
  525. cidocCoords.prefix + 'P30_transferred_custody_of',
  526. datplaceHolder) + closeLine
  527. output.write(line)
  528. line = triple(e10placeHolder,
  529. nsCoords.prefix + 'type',
  530. cidocCoords.prefix + 'E10_Transfer_of_Custody') + closeLine
  531. output.write(line)
  532. line = triple(e10placeHolder,
  533. schemaCoords.prefix + 'label',
  534. '\"Passaggio di ' + row['SGTI'] + pastActor +
  535. newActor + '\"') + closeLine
  536. output.write(line)
  537. if row['PRDU' + last] != '':
  538. timespan = row['PRDU' + last]
  539. tt = timespan.replace(' ', '')
  540. ts = tt.replace('/', '')
  541. timespanPlaceholder = museoCoords.prefix + '_' + ts
  542. # E10 P4 E52
  543. line = triple(e10placeHolder,
  544. cidocCoords.prefix + 'P4_has_time-span',
  545. timespanPlaceholder) + closeLine
  546. output.write(line)
  547. line = triple(timespanPlaceholder,
  548. nsCoords.prefix + 'type',
  549. cidocCoords.prefix + 'E52_Time-Span') + closeLine
  550. output.write(line)
  551. line = triple(timespanPlaceholder,
  552. schemaCoords.prefix + 'label',
  553. '\"' + timespan + '\"') + closeLine
  554. output.write(line)
  555. pastActorPlaceholder = museoCoords.prefix + '_' + pl
  556. newLocPlaceholder = e74placeHolder
  557. # E10 P26 E74 (moved to)
  558. if newLocPlaceholder != '':
  559. line = triple(e10placeHolder,
  560. cidocCoords.prefix + 'P29_custody_received_by',
  561. newLocPlaceholder) + closeLine
  562. output.write(line)
  563. # E10 P27 E74
  564. pastActorLabel = row['PRCD' + last]
  565. line = triple(e10placeHolder,
  566. cidocCoords.prefix + 'P28_custody_surrendered_by',
  567. pastActorPlaceholder) + closeLine
  568. output.write(line)
  569. line = triple(pastActorPlaceholder,
  570. nsCoords.prefix + 'type',
  571. cidocCoords.prefix + 'E39_Actor') + closeLine
  572. output.write(line)
  573. line = triple(pastActorPlaceholder,
  574. schemaCoords.prefix + 'label',
  575. '\"' + pastActorLabel + '\"') + closeLine
  576. output.write(line)
  577. line = triple(datplaceHolder,
  578. cidocCoords.prefix + 'P49_has_former_or_current_keeper',
  579. pastActorPlaceholder) + closeLine
  580. output.write(line)
  581. # E74 P74 E53
  582. pastResidenceLabel = row['PRVC' + last]
  583. pastResidencePlaceHolder = museoCoords.prefix + row['PRVC' + last]
  584. if row['PRVP' + last] != '':
  585. pastResidenceLabel = pastResidenceLabel + ' (' + row['PRVP' + last] + ')'
  586. if row['PRVR' + last] != '':
  587. pastResidenceLabel = pastResidenceLabel + ', ' + row['PRVR' + last]
  588. if row['PRVS' + last] != '':
  589. pastResidenceLabel = pastResidenceLabel + ', ' + row['PRVS' + last]
  590. line = triple(pastActorPlaceholder,
  591. cidocCoords.prefix + 'P74_has_current_or_former_residence',
  592. pastResidencePlaceHolder) + closeLine
  593. output.write(line)
  594. line = triple(pastResidencePlaceHolder,
  595. nsCoords.prefix + 'type',
  596. cidocCoords.prefix + 'E53_Place') + closeLine
  597. output.write(line)
  598. # E22 P44 E3
  599. if row['STCC'] != '':
  600. line = triple(datplaceHolder,
  601. cidocCoords.prefix + 'P44_has_condition',
  602. e3placeHolder) + closeLine
  603. output.write(line)
  604. line = triple(e3placeHolder,
  605. nsCoords.prefix + 'type',
  606. cidocCoords.prefix + 'E3_Condition_State') + closeLine
  607. output.write(line)
  608. line = triple(e3placeHolder,
  609. schemaCoords.prefix + 'label',
  610. '\"Condizione di: ' + row['SGTI'] + '\"') + closeLine
  611. output.write(line)
  612. line = triple(e3placeHolder,
  613. cidocCoords.prefix + 'P2_has_type',
  614. '\"' + row['STCC'] + '\"') + closeLine
  615. output.write(line)
  616. # E22 P65 E34
  617. if (row['ISRI'] != ''):
  618. line = triple(datplaceHolder,
  619. cidocCoords.prefix + 'P56_bears_feature',
  620. e25placeHolder) + closeLine
  621. output.write(line)
  622. line = triple(e25placeHolder,
  623. nsCoords.prefix + 'type',
  624. cidocCoords.prefix + 'E25_Man-Made_Feature') + closeLine
  625. output.write(line)
  626. line = triple(e25placeHolder,
  627. schemaCoords.prefix + 'label',
  628. '\"Iscrizione su ' + subj + '\"') + closeLine
  629. output.write(line)
  630. line = triple(e25placeHolder,
  631. cidocCoords.prefix + 'P128_carries',
  632. e34placeHolder) + closeLine
  633. output.write(line)
  634. line = triple(e34placeHolder,
  635. nsCoords.prefix + 'type',
  636. cidocCoords.prefix + 'E34_Inscription') + closeLine
  637. output.write(line)
  638. line = triple(e34placeHolder,
  639. schemaCoords.prefix + 'label',
  640. '\"Iscrizione: ' + row['ISRI'] + '\"') + closeLine
  641. output.write(line)
  642. pl = row['ISRI'].replace(' ', '-')
  643. pla = pl.replace('.', '')
  644. line = triple(e34placeHolder,
  645. cidocCoords.prefix + 'P3_has_note',
  646. '\"' + row['ISRI'] + '\"') + closeLine
  647. output.write(line)
  648. # E34 P2 E55
  649. if (row['ISRT'] != ''):
  650. rr = row['ISRT'].replace(' ', '')
  651. line = triple(e34placeHolder,
  652. cidocCoords.prefix + 'P2_has_type',
  653. '\"' + row['ISRT'] + '\"') + closeLine
  654. output.write(line)
  655. # E34 P72 E56
  656. if (row['ISRL'] != ''):
  657. line = triple(e34placeHolder,
  658. cidocCoords.prefix + 'P72_has_language',
  659. museoCoords.prefix + '_' + row['ISRL']) + closeLine
  660. output.write(line)
  661. line = triple(museoCoords.prefix + '_' + row['ISRL'],
  662. nsCoords.prefix + 'type',
  663. cidocCoords.prefix + 'E56_Language') + closeLine
  664. output.write(line)
  665. line = triple(museoCoords.prefix + '_' + row['ISRL'],
  666. schemaCoords.prefix + 'label',
  667. '\"' + row['ISRL'] + '\"') + closeLine
  668. output.write(line)
  669. if row['ISRS'] != '':
  670. line = triple(e34placeHolder,
  671. cidocCoords.prefix + 'P92i_was_brought_into_existence_by',
  672. e65placeHolder) + closeLine
  673. output.write(line)
  674. line = triple(e65placeHolder,
  675. nsCoords.prefix + 'type',
  676. cidocCoords.prefix + 'E65_Creation') + closeLine
  677. output.write(line)
  678. line = triple(e65placeHolder,
  679. schemaCoords.prefix + 'label',
  680. '\"Creazione dell\'Iscrizione ' + row['ISRI'] + '\"') + closeLine
  681. output.write(line)
  682. if row['ISRS']:
  683. ss = row['ISRS'].replace(' ', '')
  684. tecPlaceholder = museoCoords.prefix + url + '_' + ss
  685. line = triple(e65placeHolder,
  686. cidocCoords.prefix + 'P32_used_general_technique',
  687. tecPlaceholder) + closeLine
  688. output.write(line)
  689. line = triple(tecPlaceholder,
  690. nsCoords.prefix + 'type',
  691. cidocCoords.prefix + 'E55_Type') + closeLine
  692. output.write(line)
  693. line = triple(tecPlaceholder,
  694. schemaCoords.prefix + 'label',
  695. '\"' + row['ISRS'] + '\"') + closeLine
  696. output.write(line)
  697. if row['ISRP'] != '':
  698. line = triple(e25placeHolder,
  699. cidocCoords.prefix + 'P3_has_note',
  700. '\"' + row['ISRP'] + '\"^^xsd:string') + closeLine
  701. output.write(line)
  702. unit = ''
  703. if (row['MISU'] != ''):
  704. unit = row['MISU']
  705. valueA = ''
  706. valueL = ''
  707. if (row['MISA'] != ''):
  708. value = row['MISA']
  709. valueA = value.replace(',', 'v')
  710. if (row['MISL'] != ''):
  711. value = row['MISL']
  712. valueL = value.replace(',', 'v')
  713. # Altezza
  714. # E22 P43 E54
  715. if (row['MISA'] != ''):
  716. line = triple(datplaceHolder,
  717. cidocCoords.prefix + 'P43_has_dimension',
  718. museoCoords.prefix + url + '_Altezza') + closeLine
  719. output.write(line)
  720. line = triple(museoCoords.prefix + url + '_Altezza',
  721. nsCoords.prefix + 'type',
  722. cidocCoords.prefix + 'E54_Dimension') + closeLine
  723. output.write(line)
  724. line = triple(museoCoords.prefix + url + '_Altezza',
  725. schemaCoords.prefix + 'label',
  726. '\"Altezza: ' + row['MISA'] + row['MISU'] + '\"') + closeLine
  727. output.write(line)
  728. # E54 P90 E60
  729. line = triple(museoCoords.prefix + url + '_Altezza',
  730. cidocCoords.prefix + 'P90_has_value',
  731. '\"' + row['MISA'] + '\"^^xsd:integer') + closeLine
  732. output.write(line)
  733. # E54 P2 E55
  734. line = triple(museoCoords.prefix + url + '_Altezza',
  735. cidocCoords.prefix + 'P2_has_type',
  736. aatCoords.prefix + '300055644') + closeLine
  737. output.write(line)
  738. line = triple(aatCoords.prefix + '300055644',
  739. schemaCoords.prefix + 'label',
  740. '\"altezza\"') + closeLine
  741. output.write(line)
  742. # E54 P91 E58
  743. if (row['MISU'] != ''):
  744. line = triple(museoCoords.prefix + url + '_Altezza',
  745. cidocCoords.prefix + 'P91_has_unit',
  746. aatCoords.prefix + '300379098') + closeLine
  747. output.write(line)
  748. line = triple(aatCoords.prefix + '300379098',
  749. nsCoords.prefix + 'type',
  750. cidocCoords.prefix + 'E58_Measurement_Unit') + closeLine
  751. output.write(line)
  752. line = triple(aatCoords.prefix + '300379098',
  753. schemaCoords.prefix + 'label',
  754. '\"' + row['MISU'] + '\"') + closeLine
  755. output.write(line)
  756. # Larghezza
  757. # E22 P43 E54
  758. if (row['MISL'] != ''):
  759. line = triple(datplaceHolder,
  760. cidocCoords.prefix + 'P43_has_dimension',
  761. museoCoords.prefix + url + '_Larghezza') + closeLine
  762. output.write(line)
  763. line = triple(museoCoords.prefix + url + '_Larghezza',
  764. nsCoords.prefix + 'type',
  765. cidocCoords.prefix + 'E54_Dimension') + closeLine
  766. output.write(line)
  767. line = triple(museoCoords.prefix + url + '_Larghezza',
  768. schemaCoords.prefix + 'label',
  769. '\"Larghezza: ' + row['MISL'] + row['MISU'] + '\"') + closeLine
  770. output.write(line)
  771. # E54 P90 E60
  772. line = triple(museoCoords.prefix + url + '_Larghezza',
  773. cidocCoords.prefix + 'P90_has_value',
  774. '\"' + row['MISL'] + '\"^^xsd:integer') + closeLine
  775. output.write(line)
  776. # E54 P2 E55
  777. line = triple(museoCoords.prefix + url + '_Larghezza',
  778. cidocCoords.prefix + 'P2_has_type',
  779. aatCoords.prefix + '300055647') + closeLine
  780. output.write(line)
  781. line = triple(aatCoords.prefix + '300055647',
  782. schemaCoords.prefix + 'label',
  783. '\"larghezza\"') + closeLine
  784. output.write(line)
  785. # E54 P91 E58
  786. if (row['MISU'] != ''):
  787. line = triple(museoCoords.prefix + url + '_Larghezza',
  788. cidocCoords.prefix + 'P91_has_unit',
  789. aatCoords.prefix + '300379098') + closeLine
  790. output.write(line)
  791. line = triple(aatCoords.prefix + '300379098',
  792. nsCoords.prefix + 'type',
  793. cidocCoords.prefix + 'E58_Measurement_Unit') + closeLine
  794. output.write(line)
  795. line = triple(aatCoords.prefix + '300379098',
  796. schemaCoords.prefix + 'label',
  797. '\"' + row['MISU'] + '\"') + closeLine
  798. output.write(line)
  799. if row['MTC'] != '':
  800. mtcs = []
  801. if '/' in row['MTC']:
  802. mtcs = row['MTC'].split('/')
  803. else:
  804. mtcs.append(row['MTC'])
  805. for tc in mtcs:
  806. mtc = tc.lstrip()
  807. el = get_elem(mtc)
  808. if el[1] == 'MTC/M':
  809. line = triple(datplaceHolder,
  810. cidocCoords.prefix + 'P45_consists_of',
  811. aatCoords.prefix + el[0]) + closeLine
  812. output.write(line)
  813. line = triple(aatCoords.prefix + el[0],
  814. nsCoords.prefix + 'type',
  815. cidocCoords.prefix + 'E57_Material') + closeLine
  816. output.write(line)
  817. line = triple(aatCoords.prefix + el[0],
  818. schemaCoords.prefix + 'label',
  819. '\"' + mtc + '\"') + closeLine
  820. output.write(line)
  821. else: #E12 Production - P32 used technique - E55 Type
  822. line = triple(e12placeHolder,
  823. cidocCoords.prefix + 'P32_used_general_technique',
  824. aatCoords.prefix + el[0]) + closeLine
  825. output.write(line)
  826. if e12FplaceHolder != '':
  827. line = triple(e12FplaceHolder,
  828. cidocCoords.prefix + 'P32_used_general_technique',
  829. aatCoords.prefix + el[0]) + closeLine
  830. output.write(line)
  831. line = triple(aatCoords.prefix + el[0],
  832. nsCoords.prefix + 'type',
  833. cidocCoords.prefix + 'E55_Type') + closeLine
  834. output.write(line)
  835. line = triple(aatCoords.prefix + el[0],
  836. schemaCoords.prefix + 'label',
  837. '\"' + mtc + '\"') + closeLine
  838. output.write(line)
  839. # E12 P140i E13
  840. if row['AUTM'] != '':
  841. mot = row['AUTM'].replace(' ', '_')
  842. e55placeHolder = museoCoords.prefix + url + '_' + mot
  843. line = triple(e12placeHolder,
  844. cidocCoords.prefix + 'P140i_was_attributed_by',
  845. e13placeHolder) + closeLine
  846. output.write(line)
  847. line = triple(e13placeHolder,
  848. nsCoords.prefix + 'type',
  849. cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine
  850. output.write(line)
  851. line = triple(e13placeHolder,
  852. schemaCoords.prefix + 'label',
  853. '\"Motivazione attribuzione\"') + closeLine
  854. output.write(line)
  855. line = triple(e13placeHolder,
  856. cidocCoords.prefix + 'P2_has_type',
  857. '\"' + row['AUTM'] + '\"') + closeLine
  858. output.write(line)
  859. aut = get_aut_url(row['AUTH'])
  860. aut_url = aut[0]
  861. AuthorPlaceholder = autCoords.prefix + aut_url
  862. line = triple(e13placeHolder,
  863. cidocCoords.prefix + 'P141_assigned',
  864. AuthorPlaceholder) + closeLine
  865. output.write(line)
  866. # E22 P44 E62
  867. if row['NSC'] != '':
  868. ph = row['NSC'].replace(' "', ' «')
  869. phr = ph.replace('"', '»')
  870. line = triple(datplaceHolder,
  871. cidocCoords.prefix + 'P3_has_note',
  872. '\"' + phr + '\"^^xsd:string') + closeLine
  873. output.write(line)
  874. iconclass = row['DESI']
  875. icon = iconclass.replace(' ', '')
  876. list_icon = []
  877. if ':' in icon:
  878. list_icon = icon.split(':')
  879. else:
  880. list_icon.append(icon)
  881. for ic in list_icon:
  882. url = 'http://iconclass.org/rdk/' + str(ic)
  883. html = urlopen(url).read()
  884. soup = BeautifulSoup(html, 'html.parser')
  885. # kill all script and style elements
  886. for script in soup(["script", "style"]):
  887. script.extract() # rip it out
  888. # get text
  889. text = soup.get_text()
  890. pretty = soup.prettify()
  891. ff = soup.find("div", {"id": "ic_current"})
  892. dd = ff.find("a", {"class", "ic_notation"})
  893. ss = dd.text
  894. x = ss.find(' ')
  895. icon_label = ss[x + 1:]
  896. ur = ic.replace("(", "%28")
  897. urr = ur.replace(")", "%29")
  898. line = triple(datplaceHolder,
  899. cidocCoords.prefix + 'P62_depicts',
  900. iconCoords.prefix + urr) + closeLine
  901. output.write(line)
  902. line = triple(iconCoords.prefix + urr,
  903. nsCoords.prefix + 'type',
  904. cidocCoords.prefix + 'E1_CRM_Entity') + closeLine
  905. output.write(line)
  906. # P2 Opera d'arte
  907. line = triple(datplaceHolder,
  908. cidocCoords.prefix + 'P2_has_type',
  909. '\"Opera d\'Arte\"') + closeLine
  910. output.write(line)
  911. output.write('\n')
  912. #
  913. #
  914. # Limit number of entries processed (if desired)
  915. if (ii > max_entries):
  916. break """