CSV_to_RDF_Martini.py 47 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113
  1. # Utilities to read/write csv files
  2. import csv
  3. # Utilities to handle character encodings
  4. import unicodedata
  5. # Ordered Dicts
  6. from collections import OrderedDict
  7. from urllib.request import urlopen
  8. from bs4 import BeautifulSoup
  9. import json
  10. # OPZIONAL IMPORTS
  11. # For timestamping/simple speed tests
  12. from datetime import datetime
  13. # Random number generator
  14. from random import *
  15. # System & command line utilities
  16. import sys
  17. # Json for the dictionary
  18. import json
  19. import_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/MPP/corretti/'
  20. export_dir = '/Users/federicaspinelli/Google Drive/OVI-CNR/RDF/MPP/'
  21. # Custom class to store URIs + related infos for the ontologies/repositories
  22. class RDFcoords:
  23. def __init__(self, uri, prefix, code=None):
  24. self.uri = uri
  25. self.prefix = prefix
  26. self.code = code
  27. # Repositories
  28. museoCoords = RDFcoords('<http://palazzopretorio.comune.prato.it/it/le-opere/alcuni-capolavori/>', 'mpp:')
  29. autCoords = RDFcoords('<http://palazzopretorio.comune.prato.it/it/opere/autori/>', 'aut:')
  30. cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')
  31. aatCoords = RDFcoords('<http://vocab.getty.edu/aat/>', 'aat:')
  32. nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')
  33. schemaCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')
  34. xsdCoords = RDFcoords('<http://www.w3.org/2001/XMLSchema#>', 'xsd:')
  35. iconCoords = RDFcoords('<http://iconclass.org/>', 'ico:')
  36. # Basic functions for triples / shortened triples in TTL format
  37. def triple(subject, predicate, object1):
  38. line = subject + ' ' + predicate + ' ' + object1
  39. return line
  40. def doublet(predicate, object1):
  41. line = ' ' + predicate + ' ' + object1
  42. return line
  43. def singlet(object1):
  44. line = ' ' + object1
  45. return line
  46. # Line endings in TTL format
  47. continueLine1 = ' ;\n'
  48. continueLine2 = ' ,\n'
  49. closeLine = ' .\n'
  50. def writeTTLHeader(output):
  51. output.write('@prefix ' + museoCoords.prefix + ' ' + museoCoords.uri + closeLine)
  52. output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)
  53. output.write('@prefix ' + aatCoords.prefix + ' ' + aatCoords.uri + closeLine)
  54. output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)
  55. output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)
  56. output.write('@prefix ' + autCoords.prefix + ' ' + autCoords.uri + closeLine)
  57. output.write('@prefix ' + xsdCoords.prefix + ' ' + xsdCoords.uri + closeLine)
  58. output.write('@prefix ' + iconCoords.prefix + ' ' + iconCoords.uri + closeLine)
  59. output.write('\n')
  60. filePrefix = '00_SR20OA_'
  61. fileType = 'Martini'
  62. max_entries = 1000000000
  63. def get_aut_url(code):
  64. aut_file = open(import_dir + 'AR20AUT_' + fileType + '.csv', newline="")
  65. reader = csv.DictReader(aut_file)
  66. for row in reader:
  67. auth = int(row['AUTH'])
  68. cod = int(code)
  69. role = ''
  70. if row['AUTQ'] != '':
  71. role = row['AUTQ']
  72. else:
  73. role = ''
  74. if auth == cod:
  75. return [row['URL'], role]
  76. def get_role(role):
  77. role_file = open('/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/MPP/AAT_RUOLI.csv', newline="")
  78. reader = csv.DictReader(role_file)
  79. for row in reader:
  80. if row['Label'] == role:
  81. return row['AAT']
  82. def get_elem(mtc):
  83. mtc_file = open('/Users/federicaspinelli/Google Drive/OVI-CNR/CSV/MPP/AAT_MTC.csv', newline="")
  84. reader = csv.DictReader(mtc_file)
  85. for row in reader:
  86. if row['MTC'] == mtc:
  87. return [row['AAT'], row['Type']]
  88. with open(import_dir + filePrefix + fileType + '.csv', newline="") as csv_file, open(
  89. export_dir + filePrefix + fileType + '.ttl', 'w') as output:
  90. reader = csv.DictReader(csv_file)
  91. writeTTLHeader(output)
  92. first = True
  93. ii = 0
  94. for row in reader:
  95. # The index ii is used to process a limited number of entries for testing purposes
  96. ii = ii + 1
  97. if row['RVEL'] == '' or row['RVEL'] == '0':
  98. sb = ''
  99. subj = ''
  100. pp = row['OGTD'] + ' (' + row['ACC'] + ') '
  101. if row['SGTI'] != '':
  102. sb = pp + row['SGTI']
  103. if row['LDCN'] != '':
  104. subj = sb + ' in ' + row['LDCN']
  105. else:
  106. subj = sb
  107. # Triplify the 'codice' -- should exist for every entry
  108. codice = ''
  109. if (row['NCTR'] != '' and row['NCTN'] != ''):
  110. codice = row['NCTR'] + row['NCTN']
  111. codiceP = ''
  112. if (row['AUTH'] != ''):
  113. codiceP = row['AUTH']
  114. place = ''
  115. if (row['PRVC'] != ''):
  116. place = row['PRVC']
  117. columnName = list(row)
  118. url = row['URL']
  119. # placeHolders
  120. datplaceHolder = museoCoords.prefix + url
  121. e1placeHolder = museoCoords.prefix + url + '_E1'
  122. e3placeHolder = museoCoords.prefix + url + 'E3'
  123. e10placeHolder = museoCoords.prefix + url + '_E10'
  124. e12placeHolder = museoCoords.prefix + url + '_E12'
  125. e13placeHolder = museoCoords.prefix + url + '_E13'
  126. e21placeHolder = museoCoords.prefix + url + '_InE21'
  127. e25placeHolder = museoCoords.prefix + url + '_E25'
  128. e34placeHolder = museoCoords.prefix + url + '_E34'
  129. e35placeHolder1 = museoCoords.prefix + url + '_E35'
  130. e42placeHolder = museoCoords.prefix + url + '_E42'
  131. e42CplaceHolder = museoCoords.prefix + url + '_E42_MPP'
  132. e65placeHolder = museoCoords.prefix + url + '_InE65'
  133. e73placeHolder = museoCoords.prefix + url + '_E73'
  134. e74placeHolder = museoCoords.prefix + url + '_E74'
  135. if (codice != ''):
  136. line = triple(datplaceHolder, cidocCoords.prefix + 'P1_is_identified_by', e42placeHolder) + closeLine
  137. output.write(line)
  138. line = triple(e42placeHolder, nsCoords.prefix + 'type',
  139. cidocCoords.prefix + 'E42_Identifier') + closeLine
  140. output.write(line)
  141. line = triple(e42placeHolder,
  142. schemaCoords.prefix + 'label',
  143. '\"' + codice + '\"') + closeLine
  144. output.write(line)
  145. ###
  146. line = triple(e42placeHolder,
  147. cidocCoords.prefix + 'P2_has_type',
  148. '\"Codice univoco del bene (NCT)\"') + closeLine
  149. output.write(line)
  150. # Write E22 Man Made Object & E73 Information Object -- should exist for every entry?
  151. line = triple(datplaceHolder, nsCoords.prefix + 'type',
  152. cidocCoords.prefix + 'E22_Man-Made_Object') + closeLine
  153. output.write(line)
  154. # Added by AS
  155. line = triple(datplaceHolder, schemaCoords.prefix + 'label', '\"' + subj + '\"') + closeLine
  156. output.write(line)
  157. # End AS
  158. line = triple(datplaceHolder, cidocCoords.prefix + 'P128_carries', e73placeHolder) + closeLine
  159. output.write(line)
  160. line = triple(e73placeHolder, nsCoords.prefix + 'type',
  161. cidocCoords.prefix + 'E73_Information_Object') + closeLine
  162. output.write(line)
  163. # AS
  164. ss = ''
  165. if row['SGTI'] != '':
  166. ss = row['SGTI']
  167. else:
  168. ss = 'senza titolo'
  169. line = triple(e73placeHolder, schemaCoords.prefix + 'label',
  170. '\"Opera d\'arte raffigurante ' + ss + '\"') + closeLine
  171. output.write(line)
  172. # E73 - P2 - E55
  173. tt = ''
  174. typeLabel = ''
  175. if row['OGTD'] == 'dipinto':
  176. tt = aatCoords.prefix + "300033618"
  177. elif row['OGTD'] == 'rilievo':
  178. tt = aatCoords.prefix + "300047230"
  179. elif row['OGTD'] == 'polittico':
  180. tt = aatCoords.prefix + "300178235"
  181. elif row['OGTD'] == 'predella':
  182. tt = aatCoords.prefix + "300003745"
  183. line = triple(e73placeHolder,
  184. cidocCoords.prefix + 'P2_has_type',
  185. tt) + closeLine
  186. output.write(line)
  187. line = triple(tt, schemaCoords.prefix + 'label',
  188. '\"' + row['OGTD'] + '\"') + closeLine
  189. output.write(line)
  190. # E73 - P1 - E35
  191. if row['SGTT'] != '':
  192. line = triple(e73placeHolder, cidocCoords.prefix + 'P1_is_identified_by', e35placeHolder1) + closeLine
  193. output.write(line)
  194. line = triple(e35placeHolder1, nsCoords.prefix + 'type', cidocCoords.prefix + 'E35_Title') + closeLine
  195. output.write(line)
  196. line = triple(e35placeHolder1, schemaCoords.prefix + 'label', '\"' + row['SGTT'] + '\"') + closeLine
  197. output.write(line)
  198. # E22 - P62 - E1
  199. if row['SGTI'] != '':
  200. line = triple(datplaceHolder,
  201. cidocCoords.prefix + 'P62_depicts',
  202. e1placeHolder) + closeLine
  203. output.write(line)
  204. line = triple(e1placeHolder,
  205. nsCoords.prefix + 'type',
  206. cidocCoords.prefix + 'E1_CRM_Entity') + closeLine
  207. output.write(line)
  208. line = triple(e1placeHolder,
  209. schemaCoords.prefix + 'label', '\"' +
  210. row['SGTI'] + '\"') + closeLine
  211. output.write(line)
  212. line = triple(e1placeHolder,
  213. cidocCoords.prefix + 'P2_has_type',
  214. '\"Identificazione Iconografica\"') + closeLine
  215. output.write(line)
  216. # Attention: these triples are identified only for C100005 Museo di Palazzo Pretorio
  217. if row['ESC'] == 'C100005':
  218. line = triple(datplaceHolder,
  219. cidocCoords.prefix + 'P52_has_current_owner',
  220. '<http://palazzopretorio.comune.prato.it/it/>') + closeLine
  221. output.write(line)
  222. line = triple('<http://palazzopretorio.comune.prato.it/it/>',
  223. nsCoords.prefix + 'type',
  224. cidocCoords.prefix + 'E74_Group') + closeLine
  225. output.write(line)
  226. line = triple('<http://palazzopretorio.comune.prato.it/it/>',
  227. schemaCoords.prefix + 'label',
  228. '\"Museo di Palazzo Pretorio\"') + closeLine
  229. output.write(line)
  230. currentLocation = ''
  231. # E22 - P54 - E53
  232. if row['LDCN'] != '':
  233. if row['LDCS'] != '':
  234. currentLocation = row['LDCS']
  235. else:
  236. currentLocation = currentLocation
  237. if row['LDCM'] != '':
  238. currentLocation = currentLocation + ', ' + row['LDCM']
  239. else:
  240. currentLocation = currentLocation
  241. if row['LDCN'] != '':
  242. currentLocation = currentLocation + ', ' + row['LDCN']
  243. else:
  244. currentLocation = currentLocation
  245. currentLocation = currentLocation + ', ' + row['PVCC'] + ' (' + row['PVCP'] + ')'
  246. line = triple(datplaceHolder, cidocCoords.prefix + 'P54_has_current_permanent_location',
  247. '\"' + currentLocation + '\"') + closeLine
  248. output.write(line)
  249. e12FplaceHolder = ''
  250. if row['DTSI'] != row['DTSF']:
  251. e12FplaceHolder = museoCoords.prefix + url + '_E12F'
  252. # Write E12 Production -- should exist for every entry?
  253. # E12 P108 E22
  254. line = triple(e12placeHolder, cidocCoords.prefix + 'P108_has_produced', datplaceHolder) + closeLine
  255. output.write(line)
  256. line = triple(e12placeHolder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E12_Production') + closeLine
  257. output.write(line)
  258. # E73 P108i E12
  259. line = triple(e73placeHolder, cidocCoords.prefix + 'P108i_was_produced_by', e12placeHolder) + closeLine
  260. output.write(line)
  261. if e12FplaceHolder != '':
  262. line = triple(e12FplaceHolder, cidocCoords.prefix + 'P108_has_produced', datplaceHolder) + closeLine
  263. output.write(line)
  264. line = triple(e12FplaceHolder, nsCoords.prefix + 'type',
  265. cidocCoords.prefix + 'E12_Production') + closeLine
  266. output.write(line)
  267. line = triple(e12FplaceHolder, schemaCoords.prefix + 'label',
  268. '\"Fine produzione di ' + row['SGTI'] + '\"') + closeLine
  269. output.write(line)
  270. # E73 P108i E12
  271. line = triple(e73placeHolder, cidocCoords.prefix + 'P108i_was_produced_by', e12FplaceHolder) + closeLine
  272. output.write(line)
  273. # E12 P140i E13
  274. line = triple(e12FplaceHolder,
  275. cidocCoords.prefix + 'P140i_was_attributed_by',
  276. e13placeHolder) + closeLine
  277. output.write(line)
  278. # E12 P2
  279. line = triple(e12FplaceHolder,
  280. cidocCoords.prefix + 'P2_has_type',
  281. '\"Fine\"^^xsd:string') + closeLine
  282. output.write(line)
  283. line = triple(e12placeHolder,
  284. cidocCoords.prefix + 'P2_has_type',
  285. '\"Inizio\"^^xsd:string') + closeLine
  286. output.write(line)
  287. line = triple(e12placeHolder, schemaCoords.prefix + 'label',
  288. '\"Inizio produzione di ' + row['SGTI'] + '\"') + closeLine
  289. output.write(line)
  290. else:
  291. line = triple(e12placeHolder, schemaCoords.prefix + 'label',
  292. '\"Produzione di ' + row['SGTI'] + '\"') + closeLine
  293. output.write(line)
  294. tcl = []
  295. for name in columnName:
  296. if 'TCL' in name:
  297. tcl.append(name)
  298. # E12 - P7 - E53
  299. for el in tcl:
  300. i = 0
  301. if row[el] == 'luogo di produzione':
  302. pl = ''
  303. if i == 0:
  304. pl = row['PRVC']
  305. else:
  306. pl = row['PRVC' + i]
  307. line = triple(e12placeHolder,
  308. cidocCoords.prefix + 'P7_took_place_at',
  309. museoCoords.prefix + pl) + closeLine
  310. output.write(line)
  311. if e12FplaceHolder != '':
  312. line = triple(e12FplaceHolder,
  313. cidocCoords.prefix + 'P7_took_place_at',
  314. museoCoords.prefix + pl) + closeLine
  315. output.write(line)
  316. i = i + 1
  317. # E12 - PC14 - E21
  318. if row['AUTH'] != '':
  319. aut = get_aut_url(row['AUTH'])
  320. aut_url = aut[0]
  321. aut_role = aut[1]
  322. ll = row['AUTN'] + '_' + aut_role
  323. lab = ll.replace(' ', '')
  324. label = lab.replace(',', '')
  325. AuthorPlaceholder = autCoords.prefix + aut_url
  326. line = triple(museoCoords.prefix + '_' + label,
  327. cidocCoords.prefix + 'P01_has_domain',
  328. e12placeHolder) + closeLine
  329. output.write(line)
  330. if e12FplaceHolder != '':
  331. line = triple(museoCoords.prefix + '_' + label,
  332. cidocCoords.prefix + 'P01_has_domain',
  333. e12FplaceHolder) + closeLine
  334. output.write(line)
  335. if 'AUTH1' in columnName:
  336. if row['AUTH1'] != '':
  337. aut = get_aut_url(row['AUTH1'])
  338. aut_url = aut[0]
  339. aut_role = aut[1]
  340. ll = row['AUTN1'] + '_' + aut_role
  341. lab = ll.replace(' ', '')
  342. label = lab.replace(',', '')
  343. AuthorPlaceholder = autCoords.prefix + aut_url
  344. line = triple(museoCoords.prefix + '_' + label,
  345. cidocCoords.prefix + 'P01_has_domain',
  346. e12placeHolder) + closeLine
  347. output.write(line)
  348. if e12FplaceHolder != '':
  349. line = triple(museoCoords.prefix + '_' + label,
  350. cidocCoords.prefix + 'P01_has_domain',
  351. e12FplaceHolder) + closeLine
  352. output.write(line)
  353. # E12 - PC14 - E21
  354. if 'CMMN' in columnName:
  355. if row['CMMN'] != '':
  356. cc = row['CMMN']
  357. cm = cc.replace(' ', '')
  358. cmmn = cm.replace(',', '')
  359. cmmPlaceholder = museoCoords.prefix + '_' + cmmn
  360. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  361. cidocCoords.prefix + 'P01_has_domain',
  362. e12placeHolder) + closeLine
  363. output.write(line)
  364. if e12FplaceHolder != '':
  365. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  366. cidocCoords.prefix + 'P01_has_domain',
  367. e12FplaceHolder) + closeLine
  368. output.write(line)
  369. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  370. nsCoords.prefix + 'type',
  371. cidocCoords.prefix + 'PC14_carried_out_by') + closeLine
  372. output.write(line)
  373. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  374. schemaCoords.prefix + 'label',
  375. '\"' + row['CMMN'] + ' nel ruolo di committente\"') + closeLine
  376. output.write(line)
  377. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  378. cidocCoords.prefix + 'P02_has_range',
  379. cmmPlaceholder) + closeLine
  380. output.write(line)
  381. line = triple(cmmPlaceholder,
  382. nsCoords.prefix + 'type',
  383. cidocCoords.prefix + 'E39_Actor') + closeLine
  384. output.write(line)
  385. line = triple(cmmPlaceholder,
  386. schemaCoords.prefix + 'label',
  387. '\"' + row['CMMN'] + '\"') + closeLine
  388. output.write(line)
  389. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  390. cidocCoords.prefix + 'P14.1_in_the_role_of',
  391. museoCoords.prefix + '_client') + closeLine
  392. output.write(line)
  393. line = triple(museoCoords.prefix + '_client',
  394. nsCoords.prefix + 'type',
  395. cidocCoords.prefix + 'E55_Type') + closeLine
  396. output.write(line)
  397. line = triple(museoCoords.prefix + '_client',
  398. schemaCoords.prefix + 'label',
  399. '\"Committente\"') + closeLine
  400. output.write(line)
  401. # E12 - P4 - E52
  402. if row['DTSI'] != '':
  403. line = triple(e12placeHolder,
  404. cidocCoords.prefix + 'P4_has_time-span',
  405. museoCoords.prefix + row['DTSI']) + closeLine
  406. output.write(line)
  407. line = triple(museoCoords.prefix + row['DTSI'],
  408. nsCoords.prefix + 'type',
  409. cidocCoords.prefix + 'E52_Time-Span') + closeLine
  410. output.write(line)
  411. line = triple(museoCoords.prefix + row['DTSI'],
  412. schemaCoords.prefix + 'label',
  413. '\"' + row['DTSI'] + '\"') + closeLine
  414. output.write(line)
  415. if e12FplaceHolder != '':
  416. line = triple(e12FplaceHolder,
  417. cidocCoords.prefix + 'P4_has_time-span',
  418. museoCoords.prefix + row['DTSF']) + closeLine
  419. output.write(line)
  420. line = triple(museoCoords.prefix + row['DTSF'],
  421. nsCoords.prefix + 'type',
  422. cidocCoords.prefix + 'E52_Time-Span') + closeLine
  423. output.write(line)
  424. line = triple(museoCoords.prefix + row['DTSF'],
  425. schemaCoords.prefix + 'label',
  426. '\"' + row['DTSF'] + '\"') + closeLine
  427. output.write(line)
  428. tcl = []
  429. for name in columnName:
  430. if 'TCL' in name:
  431. tcl.append(name)
  432. j = 0
  433. for el in tcl:
  434. if row[el] != '':
  435. j = j + 1
  436. last = str(j - 1)
  437. n = len(tcl) - 1
  438. for i in range(n):
  439. k = str(i + 1)
  440. if i + 1 == 1:
  441. w = ''
  442. else:
  443. w = i
  444. f = str(w)
  445. if row['TCL' + k] != '':
  446. pastActor = ''
  447. newActor = ''
  448. pl = ''
  449. if row['PRCD' + k] != '':
  450. newActor = ' a ' + row['PRCD' + k]
  451. if row['PRCD' + f] != '':
  452. pastActor = ' da ' + row['PRCD' + f]
  453. pl = row['PRCD' + f].replace(' ', '')
  454. newe10placeHolder = museoCoords.prefix + url + "_E10_" + k
  455. line = triple(newe10placeHolder,
  456. cidocCoords.prefix + 'P30_transferred_custody_of',
  457. datplaceHolder) + closeLine
  458. output.write(line)
  459. line = triple(newe10placeHolder,
  460. nsCoords.prefix + 'type',
  461. cidocCoords.prefix + 'E10_Transfer_of_Custody') + closeLine
  462. output.write(line)
  463. line = triple(newe10placeHolder,
  464. schemaCoords.prefix + 'label',
  465. '\"Passaggio di ' + row['SGTI'] + pastActor +
  466. newActor + '\"') + closeLine
  467. output.write(line)
  468. if row['PRDI' + f] != '':
  469. timespan = row['PRDI' + f]
  470. tt = timespan.replace(' ', '')
  471. tp = tt.replace('.', '')
  472. ts = tp.replace('/', '')
  473. timespanPlaceholder = museoCoords.prefix + '_' + ts
  474. # E10 P4 E52
  475. line = triple(newe10placeHolder,
  476. cidocCoords.prefix + 'P4_has_time-span',
  477. timespanPlaceholder) + closeLine
  478. output.write(line)
  479. line = triple(timespanPlaceholder,
  480. nsCoords.prefix + 'type',
  481. cidocCoords.prefix + 'E52_Time-Span') + closeLine
  482. output.write(line)
  483. line = triple(timespanPlaceholder,
  484. schemaCoords.prefix + 'label',
  485. '\"' + timespan + '\"') + closeLine
  486. output.write(line)
  487. pastActorPlaceholder = museoCoords.prefix + '_' + pl
  488. newLoc = row['PRCD' + k].replace(' ', '')
  489. newActorPlaceholder = museoCoords.prefix + '_' + newLoc
  490. # E10 P26 E74 (moved to)
  491. if newActorPlaceholder != '':
  492. line = triple(newe10placeHolder,
  493. cidocCoords.prefix + 'P29_custody_received_by',
  494. newActorPlaceholder) + closeLine
  495. output.write(line)
  496. # E10 P27 E74
  497. pastActorLabel = row['PRCD' + f]
  498. line = triple(newe10placeHolder,
  499. cidocCoords.prefix + 'P28_custody_surrendered_by',
  500. pastActorPlaceholder) + closeLine
  501. output.write(line)
  502. line = triple(pastActorPlaceholder,
  503. nsCoords.prefix + 'type',
  504. cidocCoords.prefix + 'E39_Actor') + closeLine
  505. output.write(line)
  506. line = triple(pastActorPlaceholder,
  507. schemaCoords.prefix + 'label',
  508. '\"' + pastActorLabel + '\"') + closeLine
  509. output.write(line)
  510. line = triple(datplaceHolder,
  511. cidocCoords.prefix + 'P49_has_former_or_current_keeper',
  512. pastActorPlaceholder) + closeLine
  513. output.write(line)
  514. # E74 P74 E53
  515. pastResidenceLabel = row['PRVC' + f]
  516. pastResidencePlaceHolder = museoCoords.prefix + row['PRVC' + f]
  517. line = triple(pastActorPlaceholder,
  518. cidocCoords.prefix + 'P74_has_current_or_former_residence',
  519. pastResidencePlaceHolder) + closeLine
  520. output.write(line)
  521. ####
  522. pastActor = ''
  523. newActor = ''
  524. pl = ''
  525. if row['LDCN'] != '':
  526. newActor = ' a ' + row['LDCN']
  527. if row['PRCD' + last] != '':
  528. pastActor = ' da ' + row['PRCD' + last]
  529. pl = row['PRCD' + last].replace(' ', '')
  530. line = triple(e10placeHolder,
  531. cidocCoords.prefix + 'P30_transferred_custody_of',
  532. datplaceHolder) + closeLine
  533. output.write(line)
  534. line = triple(e10placeHolder,
  535. nsCoords.prefix + 'type',
  536. cidocCoords.prefix + 'E10_Transfer_of_Custody') + closeLine
  537. output.write(line)
  538. line = triple(e10placeHolder,
  539. schemaCoords.prefix + 'label',
  540. '\"Passaggio di ' + row['SGTI'] + pastActor +
  541. newActor + '\"') + closeLine
  542. output.write(line)
  543. if row['PRDU' + last] != '':
  544. timespan = row['PRDU' + last]
  545. tt = timespan.replace(' ', '')
  546. ts = tt.replace('/', '')
  547. timespanPlaceholder = museoCoords.prefix + '_' + ts
  548. # E10 P4 E52
  549. line = triple(e10placeHolder,
  550. cidocCoords.prefix + 'P4_has_time-span',
  551. timespanPlaceholder) + closeLine
  552. output.write(line)
  553. line = triple(timespanPlaceholder,
  554. nsCoords.prefix + 'type',
  555. cidocCoords.prefix + 'E52_Time-Span') + closeLine
  556. output.write(line)
  557. line = triple(timespanPlaceholder,
  558. schemaCoords.prefix + 'label',
  559. '\"' + timespan + '\"') + closeLine
  560. output.write(line)
  561. pastActorPlaceholder = museoCoords.prefix + '_' + pl
  562. newLocPlaceholder = e74placeHolder
  563. # E10 P26 E74 (moved to)
  564. if newLocPlaceholder != '':
  565. line = triple(e10placeHolder,
  566. cidocCoords.prefix + 'P29_custody_received_by',
  567. newLocPlaceholder) + closeLine
  568. output.write(line)
  569. # E10 P27 E74
  570. pastActorLabel = row['PRCD' + last]
  571. line = triple(e10placeHolder,
  572. cidocCoords.prefix + 'P28_custody_surrendered_by',
  573. pastActorPlaceholder) + closeLine
  574. output.write(line)
  575. line = triple(pastActorPlaceholder,
  576. nsCoords.prefix + 'type',
  577. cidocCoords.prefix + 'E39_Actor') + closeLine
  578. output.write(line)
  579. line = triple(pastActorPlaceholder,
  580. schemaCoords.prefix + 'label',
  581. '\"' + pastActorLabel + '\"') + closeLine
  582. output.write(line)
  583. line = triple(datplaceHolder,
  584. cidocCoords.prefix + 'P49_has_former_or_current_keeper',
  585. pastActorPlaceholder) + closeLine
  586. output.write(line)
  587. # E74 P74 E53
  588. pastResidenceLabel = row['PRVC' + last]
  589. pastResidencePlaceHolder = museoCoords.prefix + row['PRVC' + last]
  590. if row['PRVP' + last] != '':
  591. pastResidenceLabel = pastResidenceLabel + ' (' + row['PRVP' + last] + ')'
  592. if row['PRVR' + last] != '':
  593. pastResidenceLabel = pastResidenceLabel + ', ' + row['PRVR' + last]
  594. if row['PRVS' + last] != '':
  595. pastResidenceLabel = pastResidenceLabel + ', ' + row['PRVS' + last]
  596. line = triple(pastActorPlaceholder,
  597. cidocCoords.prefix + 'P74_has_current_or_former_residence',
  598. pastResidencePlaceHolder) + closeLine
  599. output.write(line)
  600. line = triple(pastResidencePlaceHolder,
  601. nsCoords.prefix + 'type',
  602. cidocCoords.prefix + 'E53_Place') + closeLine
  603. output.write(line)
  604. # E22 P44 E3
  605. if row['STCC'] != '':
  606. line = triple(datplaceHolder,
  607. cidocCoords.prefix + 'P44_has_condition',
  608. e3placeHolder) + closeLine
  609. output.write(line)
  610. line = triple(e3placeHolder,
  611. nsCoords.prefix + 'type',
  612. cidocCoords.prefix + 'E3_Condition_State') + closeLine
  613. output.write(line)
  614. line = triple(e3placeHolder,
  615. schemaCoords.prefix + 'label',
  616. '\"Condizione di: ' + row['SGTI'] + '\"') + closeLine
  617. output.write(line)
  618. line = triple(e3placeHolder,
  619. cidocCoords.prefix + 'P2_has_type',
  620. '\"' + row['STCC'] + '\"') + closeLine
  621. output.write(line)
  622. # E22 P65 E34
  623. if (row['ISRI'] != ''):
  624. line = triple(datplaceHolder,
  625. cidocCoords.prefix + 'P56_bears_feature',
  626. e25placeHolder) + closeLine
  627. output.write(line)
  628. line = triple(e25placeHolder,
  629. nsCoords.prefix + 'type',
  630. cidocCoords.prefix + 'E25_Man-Made_Feature') + closeLine
  631. output.write(line)
  632. line = triple(e25placeHolder,
  633. schemaCoords.prefix + 'label',
  634. '\"Iscrizione su ' + subj + '\"') + closeLine
  635. output.write(line)
  636. line = triple(e25placeHolder,
  637. cidocCoords.prefix + 'P128_carries',
  638. e34placeHolder) + closeLine
  639. output.write(line)
  640. line = triple(e34placeHolder,
  641. nsCoords.prefix + 'type',
  642. cidocCoords.prefix + 'E34_Inscription') + closeLine
  643. output.write(line)
  644. line = triple(e34placeHolder,
  645. schemaCoords.prefix + 'label',
  646. '\"Iscrizione: ' + row['ISRI'] + '\"') + closeLine
  647. output.write(line)
  648. pl = row['ISRI'].replace(' ', '-')
  649. pla = pl.replace('.', '')
  650. line = triple(e34placeHolder,
  651. cidocCoords.prefix + 'P3_has_note',
  652. '\"' + row['ISRI'] + '\"') + closeLine
  653. output.write(line)
  654. # E34 P2 E55
  655. if (row['ISRT'] != ''):
  656. rr = row['ISRT'].replace(' ', '')
  657. line = triple(e34placeHolder,
  658. cidocCoords.prefix + 'P2_has_type',
  659. '\"' + row['ISRT'] + '\"') + closeLine
  660. output.write(line)
  661. # E34 P72 E56
  662. if (row['ISRL'] != ''):
  663. line = triple(e34placeHolder,
  664. cidocCoords.prefix + 'P72_has_language',
  665. museoCoords.prefix + '_' + row['ISRL']) + closeLine
  666. output.write(line)
  667. line = triple(museoCoords.prefix + '_' + row['ISRL'],
  668. nsCoords.prefix + 'type',
  669. cidocCoords.prefix + 'E56_Language') + closeLine
  670. output.write(line)
  671. line = triple(museoCoords.prefix + '_' + row['ISRL'],
  672. schemaCoords.prefix + 'label',
  673. '\"' + row['ISRL'] + '\"') + closeLine
  674. output.write(line)
  675. if (row['ISRA'] != '') or (row['ISRS'] != ''):
  676. line = triple(e34placeHolder,
  677. cidocCoords.prefix + 'P92i_was_brought_into_existence_by',
  678. e65placeHolder) + closeLine
  679. output.write(line)
  680. line = triple(e65placeHolder,
  681. nsCoords.prefix + 'type',
  682. cidocCoords.prefix + 'E65_Creation') + closeLine
  683. output.write(line)
  684. line = triple(e65placeHolder,
  685. schemaCoords.prefix + 'label',
  686. '\"Creazione dell\'Iscrizione ' + row['ISRI'] + '\"') + closeLine
  687. output.write(line)
  688. if row['ISRA'] != '':
  689. line = triple(e65placeHolder,
  690. cidocCoords.prefix + 'P14_carried_out_by',
  691. e21placeHolder) + closeLine
  692. output.write(line)
  693. line = triple(e21placeHolder,
  694. nsCoords.prefix + 'type',
  695. cidocCoords.prefix + 'E21_Person') + closeLine
  696. output.write(line)
  697. line = triple(e21placeHolder,
  698. schemaCoords.prefix + 'label',
  699. '\"' + row['ISRA'] + '\"') + closeLine
  700. output.write(line)
  701. if row['ISRS']:
  702. ss = row['ISRS'].replace(' ', '')
  703. tecPlaceholder = museoCoords.prefix + url + '_' + ss
  704. line = triple(e65placeHolder,
  705. cidocCoords.prefix + 'P32_used_general_technique',
  706. tecPlaceholder) + closeLine
  707. output.write(line)
  708. line = triple(tecPlaceholder,
  709. nsCoords.prefix + 'type',
  710. cidocCoords.prefix + 'E55_Type') + closeLine
  711. output.write(line)
  712. line = triple(tecPlaceholder,
  713. schemaCoords.prefix + 'label',
  714. '\"' + row['ISRS'] + '\"') + closeLine
  715. output.write(line)
  716. if row['ISRP'] != '':
  717. line = triple(e25placeHolder,
  718. cidocCoords.prefix + 'P3_has_note',
  719. '\"' + row['ISRP'] + '\"^^xsd:string') + closeLine
  720. output.write(line)
  721. unit = ''
  722. if (row['MISU'] != ''):
  723. unit = row['MISU']
  724. valueA = ''
  725. valueL = ''
  726. if (row['MISA'] != ''):
  727. value = row['MISA']
  728. valueA = value.replace(',', 'v')
  729. if (row['MISL'] != ''):
  730. value = row['MISL']
  731. valueL = value.replace(',', 'v')
  732. # Altezza
  733. # E22 P43 E54
  734. if (row['MISA'] != ''):
  735. line = triple(datplaceHolder,
  736. cidocCoords.prefix + 'P43_has_dimension',
  737. museoCoords.prefix + url + '_Altezza') + closeLine
  738. output.write(line)
  739. line = triple(museoCoords.prefix + url + '_Altezza',
  740. nsCoords.prefix + 'type',
  741. cidocCoords.prefix + 'E54_Dimension') + closeLine
  742. output.write(line)
  743. line = triple(museoCoords.prefix + url + '_Altezza',
  744. schemaCoords.prefix + 'label',
  745. '\"Altezza: ' + row['MISA'] + row['MISU'] + '\"') + closeLine
  746. output.write(line)
  747. # E54 P90 E60
  748. line = triple(museoCoords.prefix + url + '_Altezza',
  749. cidocCoords.prefix + 'P90_has_value',
  750. '\"' + row['MISA'] + '\"^^xsd:integer') + closeLine
  751. output.write(line)
  752. # E54 P2 E55
  753. line = triple(museoCoords.prefix + url + '_Altezza',
  754. cidocCoords.prefix + 'P2_has_type',
  755. aatCoords.prefix + '300055644') + closeLine
  756. output.write(line)
  757. line = triple(aatCoords.prefix + '300055644',
  758. schemaCoords.prefix + 'label',
  759. '\"altezza\"') + closeLine
  760. output.write(line)
  761. # E54 P91 E58
  762. if (row['MISU'] != ''):
  763. line = triple(museoCoords.prefix + url + '_Altezza',
  764. cidocCoords.prefix + 'P91_has_unit',
  765. aatCoords.prefix + '300379098') + closeLine
  766. output.write(line)
  767. line = triple(aatCoords.prefix + '300379098',
  768. nsCoords.prefix + 'type',
  769. cidocCoords.prefix + 'E58_Measurement_Unit') + closeLine
  770. output.write(line)
  771. line = triple(aatCoords.prefix + '300379098',
  772. schemaCoords.prefix + 'label',
  773. '\"' + row['MISU'] + '\"') + closeLine
  774. output.write(line)
  775. # Larghezza
  776. # E22 P43 E54
  777. if (row['MISL'] != ''):
  778. line = triple(datplaceHolder,
  779. cidocCoords.prefix + 'P43_has_dimension',
  780. museoCoords.prefix + url + '_Larghezza') + closeLine
  781. output.write(line)
  782. line = triple(museoCoords.prefix + url + '_Larghezza',
  783. nsCoords.prefix + 'type',
  784. cidocCoords.prefix + 'E54_Dimension') + closeLine
  785. output.write(line)
  786. line = triple(museoCoords.prefix + url + '_Larghezza',
  787. schemaCoords.prefix + 'label',
  788. '\"Larghezza: ' + row['MISL'] + row['MISU'] + '\"') + closeLine
  789. output.write(line)
  790. # E54 P90 E60
  791. line = triple(museoCoords.prefix + url + '_Larghezza',
  792. cidocCoords.prefix + 'P90_has_value',
  793. '\"' + row['MISL'] + '\"^^xsd:integer') + closeLine
  794. output.write(line)
  795. # E54 P2 E55
  796. line = triple(museoCoords.prefix + url + '_Larghezza',
  797. cidocCoords.prefix + 'P2_has_type',
  798. aatCoords.prefix + '300055647') + closeLine
  799. output.write(line)
  800. line = triple(aatCoords.prefix + '300055647',
  801. schemaCoords.prefix + 'label',
  802. '\"larghezza\"') + closeLine
  803. output.write(line)
  804. # E54 P91 E58
  805. if (row['MISU'] != ''):
  806. line = triple(museoCoords.prefix + url + '_Larghezza',
  807. cidocCoords.prefix + 'P91_has_unit',
  808. aatCoords.prefix + '300379098') + closeLine
  809. output.write(line)
  810. line = triple(aatCoords.prefix + '300379098',
  811. nsCoords.prefix + 'type',
  812. cidocCoords.prefix + 'E58_Measurement_Unit') + closeLine
  813. output.write(line)
  814. line = triple(aatCoords.prefix + '300379098',
  815. schemaCoords.prefix + 'label',
  816. '\"' + row['MISU'] + '\"') + closeLine
  817. output.write(line)
  818. if row['MTC'] != '':
  819. mtcs = []
  820. if '/' in row['MTC']:
  821. mtcs = row['MTC'].split('/')
  822. else:
  823. mtcs.append(row['MTC'])
  824. for tc in mtcs:
  825. mtc = tc.lstrip()
  826. el = get_elem(mtc)
  827. if el[1] == 'MTC/M':
  828. line = triple(datplaceHolder,
  829. cidocCoords.prefix + 'P45_consists_of',
  830. aatCoords.prefix + el[0]) + closeLine
  831. output.write(line)
  832. line = triple(aatCoords.prefix + el[0],
  833. nsCoords.prefix + 'type',
  834. cidocCoords.prefix + 'E57_Material') + closeLine
  835. output.write(line)
  836. line = triple(aatCoords.prefix + el[0],
  837. schemaCoords.prefix + 'label',
  838. '\"' + mtc + '\"') + closeLine
  839. output.write(line)
  840. else: #E12 Production - P32 used technique - E55 Type
  841. line = triple(e12placeHolder,
  842. cidocCoords.prefix + 'P32_used_general_technique',
  843. aatCoords.prefix + el[0]) + closeLine
  844. output.write(line)
  845. if e12FplaceHolder != '':
  846. line = triple(e12FplaceHolder,
  847. cidocCoords.prefix + 'P32_used_general_technique',
  848. aatCoords.prefix + el[0]) + closeLine
  849. output.write(line)
  850. line = triple(aatCoords.prefix + el[0],
  851. nsCoords.prefix + 'type',
  852. cidocCoords.prefix + 'E55_Type') + closeLine
  853. output.write(line)
  854. line = triple(aatCoords.prefix + el[0],
  855. schemaCoords.prefix + 'label',
  856. '\"' + mtc + '\"') + closeLine
  857. output.write(line)
  858. # E12 P140i E13
  859. if row['AUTM'] != '':
  860. mot = row['AUTM'].replace(' ', '_')
  861. e55placeHolder = museoCoords.prefix + url + '_' + mot
  862. line = triple(e12placeHolder,
  863. cidocCoords.prefix + 'P140i_was_attributed_by',
  864. e13placeHolder) + closeLine
  865. output.write(line)
  866. line = triple(e13placeHolder,
  867. nsCoords.prefix + 'type',
  868. cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine
  869. output.write(line)
  870. line = triple(e13placeHolder,
  871. schemaCoords.prefix + 'label',
  872. '\"Motivazione attribuzione\"') + closeLine
  873. output.write(line)
  874. line = triple(e13placeHolder,
  875. cidocCoords.prefix + 'P2_has_type',
  876. '\"' + row['AUTM'] + '\"') + closeLine
  877. output.write(line)
  878. aut = get_aut_url(row['AUTH'])
  879. aut_url = aut[0]
  880. AuthorPlaceholder = autCoords.prefix + aut_url
  881. line = triple(e13placeHolder,
  882. cidocCoords.prefix + 'P141_assigned',
  883. AuthorPlaceholder) + closeLine
  884. output.write(line)
  885. # E22 P44 E62
  886. if row['NSC'] != '':
  887. ph = row['NSC'].replace(' "', ' «')
  888. phr = ph.replace('"', '»')
  889. line = triple(datplaceHolder,
  890. cidocCoords.prefix + 'P3_has_note',
  891. '\"' + phr + '\"^^xsd:string') + closeLine
  892. output.write(line)
  893. iconclass = row['DESI']
  894. icon = iconclass.replace(' ', '')
  895. list_icon = []
  896. if ':' in icon:
  897. list_icon = icon.split(':')
  898. else:
  899. list_icon.append(icon)
  900. for ic in list_icon:
  901. url = 'http://iconclass.org/rdk/' + str(ic)
  902. html = urlopen(url).read()
  903. soup = BeautifulSoup(html, 'html.parser')
  904. # kill all script and style elements
  905. for script in soup(["script", "style"]):
  906. script.extract() # rip it out
  907. # get text
  908. text = soup.get_text()
  909. pretty = soup.prettify()
  910. ff = soup.find("div", {"id": "ic_current"})
  911. dd = ff.find("a", {"class", "ic_notation"})
  912. ss = dd.text
  913. x = ss.find(' ')
  914. icon_label = ss[x + 1:]
  915. ur = ic.replace("(", "%28")
  916. urr = ur.replace(")", "%29")
  917. line = triple(datplaceHolder,
  918. cidocCoords.prefix + 'P62_depicts',
  919. iconCoords.prefix + urr) + closeLine
  920. output.write(line)
  921. line = triple(iconCoords.prefix + urr,
  922. nsCoords.prefix + 'type',
  923. cidocCoords.prefix + 'E1_CRM_Entity') + closeLine
  924. output.write(line)
  925. # P2 Opera d'arte
  926. line = triple(datplaceHolder,
  927. cidocCoords.prefix + 'P2_has_type',
  928. '\"Opera d\'Arte\"') + closeLine
  929. output.write(line)
  930. output.write('\n')
  931. #
  932. #
  933. # Limit number of entries processed (if desired)
  934. if (ii > max_entries):
  935. break