CSV_to_RDF_Ospedale.py 48 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119
  1. # Utilities to read/write csv files
  2. import csv
  3. # Utilities to handle character encodings
  4. import unicodedata
  5. # Ordered Dicts
  6. from collections import OrderedDict
  7. from urllib.request import urlopen
  8. from bs4 import BeautifulSoup
  9. import json
  10. # OPZIONAL IMPORTS
  11. # For timestamping/simple speed tests
  12. from datetime import datetime
  13. # Random number generator
  14. from random import *
  15. # System & command line utilities
  16. import sys
  17. # Json for the dictionary
  18. import json
  19. import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/MPP/CSV/corretti/'
  20. export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/MPP/RDF/'
  21. # Custom class to store URIs + related infos for the ontologies/repositories
  22. class RDFcoords:
  23. def __init__(self, uri, prefix, code=None):
  24. self.uri = uri
  25. self.prefix = prefix
  26. self.code = code
  27. # Repositories
  28. museoCoords = RDFcoords('<https://palazzopretorio.prato.it/it/le-opere/alcuni-capolavori/>', 'mpp:')
  29. autCoords = RDFcoords('<https://palazzopretorio.prato.it/it/opere/autori/>', 'aut:')
  30. cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')
  31. aatCoords = RDFcoords('<http://vocab.getty.edu/aat/>', 'aat:')
  32. nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')
  33. schemaCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')
  34. xsdCoords = RDFcoords('<http://www.w3.org/2001/XMLSchema#>', 'xsd:')
  35. iconCoords = RDFcoords('<https://iconclass.org/>', 'ico:')
  36. # Basic functions for triples / shortened triples in TTL format
  37. def triple(subject, predicate, object1):
  38. line = subject + ' ' + predicate + ' ' + object1
  39. return line
  40. def doublet(predicate, object1):
  41. line = ' ' + predicate + ' ' + object1
  42. return line
  43. def singlet(object1):
  44. line = ' ' + object1
  45. return line
  46. # Line endings in TTL format
  47. continueLine1 = ' ;\n'
  48. continueLine2 = ' ,\n'
  49. closeLine = ' .\n'
  50. def writeTTLHeader(output):
  51. output.write('@prefix ' + museoCoords.prefix + ' ' + museoCoords.uri + closeLine)
  52. output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)
  53. output.write('@prefix ' + aatCoords.prefix + ' ' + aatCoords.uri + closeLine)
  54. output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)
  55. output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)
  56. output.write('@prefix ' + autCoords.prefix + ' ' + autCoords.uri + closeLine)
  57. output.write('@prefix ' + xsdCoords.prefix + ' ' + xsdCoords.uri + closeLine)
  58. output.write('@prefix ' + iconCoords.prefix + ' ' + iconCoords.uri + closeLine)
  59. output.write('\n')
  60. filePrefix = 'OA_Data'
  61. fileType = '_Ospedale'
  62. max_entries = 1000000000
  63. def get_aut_url(code):
  64. aut_file = open(import_dir + 'AR20AUT' + fileType + '.csv', newline="")
  65. reader = csv.DictReader(aut_file)
  66. for row in reader:
  67. auth = int(row['AUTH'])
  68. cod = int(code)
  69. role = ''
  70. if row['AUTQ'] != '':
  71. role = row['AUTQ']
  72. else:
  73. role = ''
  74. if auth == cod:
  75. return [row['URL'], role]
  76. def get_role(role):
  77. role_file = open('/Users/federicaspinelli/TEAMOVI/Parser/DATA/MPP/CSV/AAT_RUOLI.csv', newline="")
  78. reader = csv.DictReader(role_file)
  79. for row in reader:
  80. if row['Label'] == role:
  81. return row['AAT']
  82. def get_elem(mtc):
  83. mtc_file = open('/Users/federicaspinelli/TEAMOVI/Parser/DATA/MPP/CSV/AAT_MTC.csv', newline="")
  84. reader = csv.DictReader(mtc_file)
  85. for row in reader:
  86. if row['MTC'] == mtc:
  87. return [row['AAT'], row['Type']]
  88. with open(import_dir + filePrefix + fileType + '.csv', newline="") as csv_file, open(
  89. export_dir + filePrefix + fileType + '.ttl', 'w') as output:
  90. reader = csv.DictReader(csv_file)
  91. writeTTLHeader(output)
  92. first = True
  93. ii = 0
  94. for row in reader:
  95. # The index ii is used to process a limited number of entries for testing purposes
  96. ii = ii + 1
  97. if row['RVEL'] == '' or row['RVEL'] == '0':
  98. sb = ''
  99. subj = ''
  100. #pp = row['OGTD'] + ' (' + row['ACC'] + ') '
  101. if row['SGTI'] != '':
  102. sb = row['SGTI']
  103. # Triplify the 'codice' -- should exist for every entry
  104. codice = ''
  105. if (row['NCTR'] != '' and row['NCTN'] != ''):
  106. codice = row['NCTR'] + '0000' + row['NCTN']
  107. codiceP = ''
  108. if (row['AUTH'] != ''):
  109. codiceP = row['AUTH']
  110. place = ''
  111. if (row['PRVC'] != ''):
  112. place = row['PRVC']
  113. columnName = list(row)
  114. url = row['URL']
  115. # placeHolders
  116. datplaceHolder = museoCoords.prefix + url
  117. e1placeHolder = museoCoords.prefix + url + '_E1'
  118. e3placeHolder = museoCoords.prefix + url + 'E3'
  119. e10placeHolder = museoCoords.prefix + url + '_E10'
  120. e12placeHolder = museoCoords.prefix + url + '_E12'
  121. e13placeHolder = museoCoords.prefix + url + '_E13'
  122. e21placeHolder = museoCoords.prefix + url + '_InE21'
  123. e25placeHolder = museoCoords.prefix + url + '_E25'
  124. e34placeHolder = museoCoords.prefix + url + '_E34'
  125. e35placeHolder1 = museoCoords.prefix + url + '_E35'
  126. e42placeHolder = museoCoords.prefix + url + '_E42'
  127. e42CplaceHolder = museoCoords.prefix + url + '_E42_MPP'
  128. e65placeHolder = museoCoords.prefix + url + '_InE65'
  129. e73placeHolder = museoCoords.prefix + url + '_E73'
  130. e74placeHolder = museoCoords.prefix + url + '_E74'
  131. if (codice != ''):
  132. line = triple(datplaceHolder, cidocCoords.prefix + 'P1_is_identified_by', e42placeHolder) + closeLine
  133. output.write(line)
  134. line = triple(e42placeHolder, nsCoords.prefix + 'type',
  135. cidocCoords.prefix + 'E42_Identifier') + closeLine
  136. output.write(line)
  137. line = triple(e42placeHolder,
  138. schemaCoords.prefix + 'label',
  139. '\"0' + codice + '\"') + closeLine
  140. output.write(line)
  141. ###
  142. line = triple(e42placeHolder,
  143. cidocCoords.prefix + 'P2_has_type',
  144. '\"Codice univoco del bene (NCT)\"') + closeLine
  145. output.write(line)
  146. # Write E22 Man Made Object & E73 Information Object -- should exist for every entry?
  147. line = triple(datplaceHolder, nsCoords.prefix + 'type',
  148. cidocCoords.prefix + 'E22_Man-Made_Object') + closeLine
  149. output.write(line)
  150. # Added by AS
  151. line = triple(datplaceHolder, schemaCoords.prefix + 'label', '\"' + sb + '\"') + closeLine
  152. output.write(line)
  153. # End AS
  154. line = triple(datplaceHolder, cidocCoords.prefix + 'P128_carries', e73placeHolder) + closeLine
  155. output.write(line)
  156. line = triple(e73placeHolder, nsCoords.prefix + 'type',
  157. cidocCoords.prefix + 'E73_Information_Object') + closeLine
  158. output.write(line)
  159. # AS
  160. ss = ''
  161. if row['SGTI'] != '':
  162. ss = row['SGTI']
  163. else:
  164. ss = 'senza titolo'
  165. line = triple(e73placeHolder, schemaCoords.prefix + 'label',
  166. '\"Opera d\'arte raffigurante ' + ss + '\"') + closeLine
  167. output.write(line)
  168. # E73 - P2 - E55
  169. tt = ''
  170. typeLabel = ''
  171. if row['OGTD'] == 'dipinto':
  172. tt = aatCoords.prefix + "300033618"
  173. elif row['OGTD'] == 'rilievo':
  174. tt = aatCoords.prefix + "300047230"
  175. elif row['OGTD'] == 'polittico':
  176. tt = aatCoords.prefix + "300178235"
  177. elif row['OGTD'] == 'predella':
  178. tt = aatCoords.prefix + "300003745"
  179. line = triple(e73placeHolder,
  180. cidocCoords.prefix + 'P2_has_type',
  181. tt) + closeLine
  182. output.write(line)
  183. line = triple(tt, schemaCoords.prefix + 'label',
  184. '\"' + row['OGTD'] + '\"') + closeLine
  185. output.write(line)
  186. # E73 - P1 - E35
  187. if row['SGTT'] != '':
  188. line = triple(e73placeHolder, cidocCoords.prefix + 'P1_is_identified_by', e35placeHolder1) + closeLine
  189. output.write(line)
  190. line = triple(e35placeHolder1, nsCoords.prefix + 'type', cidocCoords.prefix + 'E35_Title') + closeLine
  191. output.write(line)
  192. line = triple(e35placeHolder1, schemaCoords.prefix + 'label', '\"' + row['SGTT'] + '\"') + closeLine
  193. output.write(line)
  194. # E22 - P62 - E1
  195. if row['SGTI'] != '':
  196. line = triple(datplaceHolder,
  197. cidocCoords.prefix + 'P62_depicts',
  198. e1placeHolder) + closeLine
  199. output.write(line)
  200. line = triple(e1placeHolder,
  201. nsCoords.prefix + 'type',
  202. cidocCoords.prefix + 'E1_CRM_Entity') + closeLine
  203. output.write(line)
  204. line = triple(e1placeHolder,
  205. schemaCoords.prefix + 'label', '\"' +
  206. row['SGTI'] + '\"') + closeLine
  207. output.write(line)
  208. line = triple(e1placeHolder,
  209. cidocCoords.prefix + 'P2_has_type',
  210. '\"Identificazione Iconografica\"') + closeLine
  211. output.write(line)
  212. # Attention: these triples are identified only for C100005 Museo di Palazzo Pretorio
  213. if row['ESC'] == 'C100005':
  214. line = triple(datplaceHolder,
  215. cidocCoords.prefix + 'P52_has_current_owner',
  216. '<https://palazzopretorio.prato.it/it/>') + closeLine
  217. output.write(line)
  218. line = triple('<https://palazzopretorio.prato.it/it/>',
  219. nsCoords.prefix + 'type',
  220. cidocCoords.prefix + 'E74_Group') + closeLine
  221. output.write(line)
  222. line = triple('<https://palazzopretorio.prato.it/it/>',
  223. schemaCoords.prefix + 'label',
  224. '\"Museo di Palazzo Pretorio\"') + closeLine
  225. output.write(line)
  226. currentLocation = ''
  227. # E22 - P54 - E53
  228. if row['LDCN'] != '':
  229. if row['LDCS'] != '':
  230. currentLocation = row['LDCS']
  231. else:
  232. currentLocation = currentLocation
  233. if row['LDCM'] != '':
  234. currentLocation = currentLocation + ', ' + row['LDCM']
  235. else:
  236. currentLocation = currentLocation
  237. if row['LDCN'] != '':
  238. currentLocation = currentLocation + ', ' + row['LDCN']
  239. else:
  240. currentLocation = currentLocation
  241. currentLocation = currentLocation + ', ' + row['PVCC'] + ' (' + row['PVCP'] + ')'
  242. line = triple(datplaceHolder, cidocCoords.prefix + 'P54_has_current_permanent_location',
  243. '\"' + currentLocation + '\"') + closeLine
  244. output.write(line)
  245. e12FplaceHolder = ''
  246. if row['DTSI'] != row['DTSF']:
  247. e12FplaceHolder = museoCoords.prefix + url + '_E12F'
  248. # Write E12 Production -- should exist for every entry?
  249. # E12 P108 E22
  250. line = triple(e12placeHolder, cidocCoords.prefix + 'P108_has_produced', datplaceHolder) + closeLine
  251. output.write(line)
  252. line = triple(e12placeHolder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E12_Production') + closeLine
  253. output.write(line)
  254. # E73 P108i E12
  255. line = triple(e73placeHolder, cidocCoords.prefix + 'P108i_was_produced_by', e12placeHolder) + closeLine
  256. output.write(line)
  257. if e12FplaceHolder != '':
  258. line = triple(e12FplaceHolder, cidocCoords.prefix + 'P108_has_produced', datplaceHolder) + closeLine
  259. output.write(line)
  260. line = triple(e12FplaceHolder, nsCoords.prefix + 'type',
  261. cidocCoords.prefix + 'E12_Production') + closeLine
  262. output.write(line)
  263. line = triple(e12FplaceHolder, schemaCoords.prefix + 'label',
  264. '\"Fine produzione di ' + row['SGTI'] + '\"') + closeLine
  265. output.write(line)
  266. # E73 P108i E12
  267. line = triple(e73placeHolder, cidocCoords.prefix + 'P108i_was_produced_by', e12FplaceHolder) + closeLine
  268. output.write(line)
  269. # E12 P140i E13
  270. line = triple(e12FplaceHolder,
  271. cidocCoords.prefix + 'P140i_was_attributed_by',
  272. e13placeHolder) + closeLine
  273. output.write(line)
  274. # E12 P2
  275. line = triple(e12FplaceHolder,
  276. cidocCoords.prefix + 'P2_has_type',
  277. '\"Fine\"^^xsd:string') + closeLine
  278. output.write(line)
  279. line = triple(e12placeHolder,
  280. cidocCoords.prefix + 'P2_has_type',
  281. '\"Inizio\"^^xsd:string') + closeLine
  282. output.write(line)
  283. line = triple(e12placeHolder, schemaCoords.prefix + 'label',
  284. '\"Inizio produzione di ' + row['SGTI'] + '\"') + closeLine
  285. output.write(line)
  286. else:
  287. line = triple(e12placeHolder, schemaCoords.prefix + 'label',
  288. '\"Produzione di ' + row['SGTI'] + '\"') + closeLine
  289. output.write(line)
  290. tcl = []
  291. for name in columnName:
  292. if 'TCL' in name:
  293. tcl.append(name)
  294. # E12 - P7 - E53
  295. for el in tcl:
  296. i = 0
  297. if row[el] == 'luogo di produzione':
  298. pl = ''
  299. if i == 0:
  300. pl = row['PRVC']
  301. else:
  302. pl = row['PRVC' + i]
  303. line = triple(e12placeHolder,
  304. cidocCoords.prefix + 'P7_took_place_at',
  305. museoCoords.prefix + pl) + closeLine
  306. output.write(line)
  307. if e12FplaceHolder != '':
  308. line = triple(e12FplaceHolder,
  309. cidocCoords.prefix + 'P7_took_place_at',
  310. museoCoords.prefix + pl) + closeLine
  311. output.write(line)
  312. i = i + 1
  313. # E12 - PC14 - E21
  314. if row['AUTH'] != '':
  315. aut = get_aut_url(row['AUTH'])
  316. aut_url = aut[0]
  317. aut_role = aut[1]
  318. if row['AUTN'] != '':
  319. ll = row['AUTH'] + '_' + aut_role
  320. lab = ll.replace(' ', '')
  321. label = lab.replace(',', '')
  322. AuthorPlaceholder = autCoords.prefix + aut_url
  323. line = triple(museoCoords.prefix + '_' + label,
  324. cidocCoords.prefix + 'P01_has_domain',
  325. e12placeHolder) + closeLine
  326. output.write(line)
  327. if e12FplaceHolder != '':
  328. line = triple(museoCoords.prefix + '_' + label,
  329. cidocCoords.prefix + 'P01_has_domain',
  330. e12FplaceHolder) + closeLine
  331. output.write(line)
  332. if 'AUTH1' in columnName:
  333. if row['AUTH1'] != '':
  334. aut = get_aut_url(row['AUTH1'])
  335. aut_url = aut[0]
  336. aut_role = aut[1]
  337. ll = row['AUTN1'] + '_' + aut_role
  338. lab = ll.replace(' ', '')
  339. label = lab.replace(',', '')
  340. AuthorPlaceholder = autCoords.prefix + aut_url
  341. line = triple(museoCoords.prefix + '_' + label,
  342. cidocCoords.prefix + 'P01_has_domain',
  343. e12placeHolder) + closeLine
  344. output.write(line)
  345. if e12FplaceHolder != '':
  346. line = triple(museoCoords.prefix + '_' + label,
  347. cidocCoords.prefix + 'P01_has_domain',
  348. e12FplaceHolder) + closeLine
  349. output.write(line)
  350. # E12 - PC14 - E21
  351. if 'CMMN' in columnName:
  352. if row['CMMN'] != '':
  353. cc = row['CMMN']
  354. cm = cc.replace(' ', '')
  355. cmmn = cm.replace(',', '')
  356. cmmPlaceholder = museoCoords.prefix + '_' + cmmn
  357. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  358. cidocCoords.prefix + 'P01_has_domain',
  359. e12placeHolder) + closeLine
  360. output.write(line)
  361. if e12FplaceHolder != '':
  362. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  363. cidocCoords.prefix + 'P01_has_domain',
  364. e12FplaceHolder) + closeLine
  365. output.write(line)
  366. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  367. nsCoords.prefix + 'type',
  368. cidocCoords.prefix + 'PC14_carried_out_by') + closeLine
  369. output.write(line)
  370. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  371. schemaCoords.prefix + 'label',
  372. '\"' + row['CMMN'] + ' nel ruolo di committente\"') + closeLine
  373. output.write(line)
  374. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  375. cidocCoords.prefix + 'P02_has_range',
  376. cmmPlaceholder) + closeLine
  377. output.write(line)
  378. line = triple(cmmPlaceholder,
  379. nsCoords.prefix + 'type',
  380. cidocCoords.prefix + 'E39_Actor') + closeLine
  381. output.write(line)
  382. line = triple(cmmPlaceholder,
  383. schemaCoords.prefix + 'label',
  384. '\"' + row['CMMN'] + '\"') + closeLine
  385. output.write(line)
  386. line = triple(museoCoords.prefix + '_commit_' + cmmn,
  387. cidocCoords.prefix + 'P14.1_in_the_role_of',
  388. museoCoords.prefix + '_client') + closeLine
  389. output.write(line)
  390. line = triple(museoCoords.prefix + '_client',
  391. nsCoords.prefix + 'type',
  392. cidocCoords.prefix + 'E55_Type') + closeLine
  393. output.write(line)
  394. line = triple(museoCoords.prefix + '_client',
  395. schemaCoords.prefix + 'label',
  396. '\"Committente\"') + closeLine
  397. output.write(line)
  398. # E12 - P4 - E52
  399. if row['DTSI'] != '':
  400. line = triple(e12placeHolder,
  401. cidocCoords.prefix + 'P4_has_time-span',
  402. museoCoords.prefix + row['DTSI']) + closeLine
  403. output.write(line)
  404. line = triple(museoCoords.prefix + row['DTSI'],
  405. nsCoords.prefix + 'type',
  406. cidocCoords.prefix + 'E52_Time-Span') + closeLine
  407. output.write(line)
  408. line = triple(museoCoords.prefix + row['DTSI'],
  409. schemaCoords.prefix + 'label',
  410. '\"' + row['DTSI'] + '\"') + closeLine
  411. output.write(line)
  412. if e12FplaceHolder != '':
  413. line = triple(e12FplaceHolder,
  414. cidocCoords.prefix + 'P4_has_time-span',
  415. museoCoords.prefix + row['DTSF']) + closeLine
  416. output.write(line)
  417. line = triple(museoCoords.prefix + row['DTSF'],
  418. nsCoords.prefix + 'type',
  419. cidocCoords.prefix + 'E52_Time-Span') + closeLine
  420. output.write(line)
  421. line = triple(museoCoords.prefix + row['DTSF'],
  422. schemaCoords.prefix + 'label',
  423. '\"' + row['DTSF'] + '\"') + closeLine
  424. output.write(line)
  425. tcl = []
  426. for name in columnName:
  427. if 'TCL' in name:
  428. tcl.append(name)
  429. j = 0
  430. for el in tcl:
  431. if row[el] != '':
  432. j = j + 1
  433. last = str(j - 1)
  434. n = len(tcl) - 1
  435. for i in range(n):
  436. k = str(i + 1)
  437. if i + 1 == 1:
  438. w = ''
  439. else:
  440. w = i
  441. f = str(w)
  442. if row['TCL' + k] != '':
  443. pastActor = ''
  444. newActor = ''
  445. pl = ''
  446. if row['PRCD' + k] != '':
  447. newActor = ' a ' + row['PRCD' + k]
  448. if row['PRCD' + f] != '':
  449. pastActor = ' da ' + row['PRCD' + f]
  450. pl = row['PRCD' + f].replace(' ', '')
  451. newe10placeHolder = museoCoords.prefix + url + "_E10_" + k
  452. line = triple(newe10placeHolder,
  453. cidocCoords.prefix + 'P30_transferred_custody_of',
  454. datplaceHolder) + closeLine
  455. output.write(line)
  456. line = triple(newe10placeHolder,
  457. nsCoords.prefix + 'type',
  458. cidocCoords.prefix + 'E10_Transfer_of_Custody') + closeLine
  459. output.write(line)
  460. line = triple(newe10placeHolder,
  461. schemaCoords.prefix + 'label',
  462. '\"Passaggio di ' + row['SGTI'] + pastActor +
  463. newActor + '\"') + closeLine
  464. output.write(line)
  465. if row['PRDI' + f] != '':
  466. timespan = row['PRDI' + f]
  467. tt = timespan.replace(' ', '')
  468. tp = tt.replace('.', '')
  469. ts = tp.replace('/', '')
  470. timespanPlaceholder = museoCoords.prefix + '_' + ts
  471. # E10 P4 E52
  472. line = triple(newe10placeHolder,
  473. cidocCoords.prefix + 'P4_has_time-span',
  474. timespanPlaceholder) + closeLine
  475. output.write(line)
  476. line = triple(timespanPlaceholder,
  477. nsCoords.prefix + 'type',
  478. cidocCoords.prefix + 'E52_Time-Span') + closeLine
  479. output.write(line)
  480. line = triple(timespanPlaceholder,
  481. schemaCoords.prefix + 'label',
  482. '\"' + timespan + '\"') + closeLine
  483. output.write(line)
  484. pastActorPlaceholder = museoCoords.prefix + '_' + pl
  485. newLoc = row['PRCD' + k].replace(' ', '')
  486. newActorPlaceholder = museoCoords.prefix + '_' + newLoc
  487. # E10 P26 E74 (moved to)
  488. if newActorPlaceholder != '':
  489. line = triple(newe10placeHolder,
  490. cidocCoords.prefix + 'P29_custody_received_by',
  491. newActorPlaceholder) + closeLine
  492. output.write(line)
  493. # E10 P27 E74
  494. pastActorLabel = row['PRCD' + f]
  495. line = triple(newe10placeHolder,
  496. cidocCoords.prefix + 'P28_custody_surrendered_by',
  497. pastActorPlaceholder) + closeLine
  498. output.write(line)
  499. line = triple(pastActorPlaceholder,
  500. nsCoords.prefix + 'type',
  501. cidocCoords.prefix + 'E39_Actor') + closeLine
  502. output.write(line)
  503. line = triple(pastActorPlaceholder,
  504. schemaCoords.prefix + 'label',
  505. '\"' + pastActorLabel + '\"') + closeLine
  506. output.write(line)
  507. line = triple(datplaceHolder,
  508. cidocCoords.prefix + 'P49_has_former_or_current_keeper',
  509. pastActorPlaceholder) + closeLine
  510. output.write(line)
  511. # E74 P74 E53
  512. pastResidenceLabel = row['PRVC' + f]
  513. pastResidencePlaceHolder = museoCoords.prefix + row['PRVC' + f]
  514. line = triple(pastActorPlaceholder,
  515. cidocCoords.prefix + 'P74_has_current_or_former_residence',
  516. pastResidencePlaceHolder) + closeLine
  517. output.write(line)
  518. ####
  519. pastActor = ''
  520. newActor = ''
  521. pl = ''
  522. if row['LDCN'] != '':
  523. newActor = ' a ' + row['LDCN']
  524. if row['PRCD' + last] != '':
  525. pastActor = ' da ' + row['PRCD' + last]
  526. pl = row['PRCD' + last].replace(' ', '')
  527. line = triple(e10placeHolder,
  528. cidocCoords.prefix + 'P30_transferred_custody_of',
  529. datplaceHolder) + closeLine
  530. output.write(line)
  531. line = triple(e10placeHolder,
  532. nsCoords.prefix + 'type',
  533. cidocCoords.prefix + 'E10_Transfer_of_Custody') + closeLine
  534. output.write(line)
  535. line = triple(e10placeHolder,
  536. schemaCoords.prefix + 'label',
  537. '\"Passaggio di ' + row['SGTI'] + pastActor +
  538. newActor + '\"') + closeLine
  539. output.write(line)
  540. if row['PRDU' + last] != '':
  541. timespan = row['PRDU' + last]
  542. tt = timespan.replace(' ', '')
  543. ts = tt.replace('/', '')
  544. timespanPlaceholder = museoCoords.prefix + '_' + ts
  545. # E10 P4 E52
  546. line = triple(e10placeHolder,
  547. cidocCoords.prefix + 'P4_has_time-span',
  548. timespanPlaceholder) + closeLine
  549. output.write(line)
  550. line = triple(timespanPlaceholder,
  551. nsCoords.prefix + 'type',
  552. cidocCoords.prefix + 'E52_Time-Span') + closeLine
  553. output.write(line)
  554. line = triple(timespanPlaceholder,
  555. schemaCoords.prefix + 'label',
  556. '\"' + timespan + '\"') + closeLine
  557. output.write(line)
  558. pastActorPlaceholder = museoCoords.prefix + '_' + pl
  559. newLocPlaceholder = e74placeHolder
  560. # E10 P26 E74 (moved to)
  561. if newLocPlaceholder != '':
  562. line = triple(e10placeHolder,
  563. cidocCoords.prefix + 'P29_custody_received_by',
  564. newLocPlaceholder) + closeLine
  565. output.write(line)
  566. # E10 P27 E74
  567. pastActorLabel = row['PRCD' + last]
  568. line = triple(e10placeHolder,
  569. cidocCoords.prefix + 'P28_custody_surrendered_by',
  570. pastActorPlaceholder) + closeLine
  571. output.write(line)
  572. line = triple(pastActorPlaceholder,
  573. nsCoords.prefix + 'type',
  574. cidocCoords.prefix + 'E39_Actor') + closeLine
  575. output.write(line)
  576. line = triple(pastActorPlaceholder,
  577. schemaCoords.prefix + 'label',
  578. '\"' + pastActorLabel + '\"') + closeLine
  579. output.write(line)
  580. line = triple(datplaceHolder,
  581. cidocCoords.prefix + 'P49_has_former_or_current_keeper',
  582. pastActorPlaceholder) + closeLine
  583. output.write(line)
  584. # E74 P74 E53
  585. pastResidenceLabel = row['PRVC' + last]
  586. pastResidencePlaceHolder = museoCoords.prefix + row['PRVC' + last]
  587. if row['PRVP' + last] != '':
  588. pastResidenceLabel = pastResidenceLabel + ' (' + row['PRVP' + last] + ')'
  589. if row['PRVR' + last] != '':
  590. pastResidenceLabel = pastResidenceLabel + ', ' + row['PRVR' + last]
  591. if row['PRVS' + last] != '':
  592. pastResidenceLabel = pastResidenceLabel + ', ' + row['PRVS' + last]
  593. line = triple(pastActorPlaceholder,
  594. cidocCoords.prefix + 'P74_has_current_or_former_residence',
  595. pastResidencePlaceHolder) + closeLine
  596. output.write(line)
  597. line = triple(pastResidencePlaceHolder,
  598. nsCoords.prefix + 'type',
  599. cidocCoords.prefix + 'E53_Place') + closeLine
  600. output.write(line)
  601. # E22 P44 E3
  602. if row['STCC'] != '':
  603. line = triple(datplaceHolder,
  604. cidocCoords.prefix + 'P44_has_condition',
  605. e3placeHolder) + closeLine
  606. output.write(line)
  607. line = triple(e3placeHolder,
  608. nsCoords.prefix + 'type',
  609. cidocCoords.prefix + 'E3_Condition_State') + closeLine
  610. output.write(line)
  611. line = triple(e3placeHolder,
  612. schemaCoords.prefix + 'label',
  613. '\"Condizione di: ' + row['SGTI'] + '\"') + closeLine
  614. output.write(line)
  615. line = triple(e3placeHolder,
  616. cidocCoords.prefix + 'P2_has_type',
  617. '\"' + row['STCC'] + '\"') + closeLine
  618. output.write(line)
  619. # E22 P65 E34
  620. if (row['ISRI'] != ''):
  621. line = triple(datplaceHolder,
  622. cidocCoords.prefix + 'P56_bears_feature',
  623. e25placeHolder) + closeLine
  624. output.write(line)
  625. line = triple(e25placeHolder,
  626. nsCoords.prefix + 'type',
  627. cidocCoords.prefix + 'E25_Man-Made_Feature') + closeLine
  628. output.write(line)
  629. line = triple(e25placeHolder,
  630. schemaCoords.prefix + 'label',
  631. '\"Iscrizione su ' + subj + '\"') + closeLine
  632. output.write(line)
  633. line = triple(e25placeHolder,
  634. cidocCoords.prefix + 'P128_carries',
  635. e34placeHolder) + closeLine
  636. output.write(line)
  637. line = triple(e34placeHolder,
  638. nsCoords.prefix + 'type',
  639. cidocCoords.prefix + 'E34_Inscription') + closeLine
  640. output.write(line)
  641. line = triple(e34placeHolder,
  642. schemaCoords.prefix + 'label',
  643. '\"Iscrizione: ' + row['ISRI'] + '\"') + closeLine
  644. output.write(line)
  645. pl = row['ISRI'].replace(' ', '-')
  646. pla = pl.replace('.', '')
  647. line = triple(e34placeHolder,
  648. cidocCoords.prefix + 'P3_has_note',
  649. '\"' + row['ISRI'] + '\"') + closeLine
  650. output.write(line)
  651. # E34 P2 E55
  652. if (row['ISRT'] != ''):
  653. rr = row['ISRT'].replace(' ', '')
  654. line = triple(e34placeHolder,
  655. cidocCoords.prefix + 'P2_has_type',
  656. '\"' + row['ISRT'] + '\"') + closeLine
  657. output.write(line)
  658. # E34 P72 E56
  659. if (row['ISRL'] != ''):
  660. line = triple(e34placeHolder,
  661. cidocCoords.prefix + 'P72_has_language',
  662. museoCoords.prefix + '_' + row['ISRL']) + closeLine
  663. output.write(line)
  664. line = triple(museoCoords.prefix + '_' + row['ISRL'],
  665. nsCoords.prefix + 'type',
  666. cidocCoords.prefix + 'E56_Language') + closeLine
  667. output.write(line)
  668. line = triple(museoCoords.prefix + '_' + row['ISRL'],
  669. schemaCoords.prefix + 'label',
  670. '\"' + row['ISRL'] + '\"') + closeLine
  671. output.write(line)
  672. if (row['ISRA'] != '') or (row['ISRS'] != ''):
  673. line = triple(e34placeHolder,
  674. cidocCoords.prefix + 'P92i_was_brought_into_existence_by',
  675. e65placeHolder) + closeLine
  676. output.write(line)
  677. line = triple(e65placeHolder,
  678. nsCoords.prefix + 'type',
  679. cidocCoords.prefix + 'E65_Creation') + closeLine
  680. output.write(line)
  681. line = triple(e65placeHolder,
  682. schemaCoords.prefix + 'label',
  683. '\"Creazione dell\'Iscrizione ' + row['ISRI'] + '\"') + closeLine
  684. output.write(line)
  685. if row['ISRA'] != '':
  686. line = triple(e65placeHolder,
  687. cidocCoords.prefix + 'P14_carried_out_by',
  688. e21placeHolder) + closeLine
  689. output.write(line)
  690. line = triple(e21placeHolder,
  691. nsCoords.prefix + 'type',
  692. cidocCoords.prefix + 'E21_Person') + closeLine
  693. output.write(line)
  694. line = triple(e21placeHolder,
  695. schemaCoords.prefix + 'label',
  696. '\"' + row['ISRA'] + '\"') + closeLine
  697. output.write(line)
  698. if row['ISRS']:
  699. ss = row['ISRS'].replace(' ', '')
  700. tecPlaceholder = museoCoords.prefix + url + '_' + ss
  701. line = triple(e65placeHolder,
  702. cidocCoords.prefix + 'P32_used_general_technique',
  703. tecPlaceholder) + closeLine
  704. output.write(line)
  705. line = triple(tecPlaceholder,
  706. nsCoords.prefix + 'type',
  707. cidocCoords.prefix + 'E55_Type') + closeLine
  708. output.write(line)
  709. line = triple(tecPlaceholder,
  710. schemaCoords.prefix + 'label',
  711. '\"' + row['ISRS'] + '\"') + closeLine
  712. output.write(line)
  713. if row['ISRP'] != '':
  714. line = triple(e25placeHolder,
  715. cidocCoords.prefix + 'P3_has_note',
  716. '\"' + row['ISRP'] + '\"^^xsd:string') + closeLine
  717. output.write(line)
  718. unit = ''
  719. if (row['MISU'] != ''):
  720. unit = row['MISU']
  721. valueA = ''
  722. valueL = ''
  723. if (row['MISA'] != ''):
  724. value = row['MISA']
  725. valueA = value.replace(',', 'v')
  726. if (row['MISL'] != ''):
  727. value = row['MISL']
  728. valueL = value.replace(',', 'v')
  729. # Altezza
  730. # E22 P43 E54
  731. if (row['MISA'] != ''):
  732. line = triple(datplaceHolder,
  733. cidocCoords.prefix + 'P43_has_dimension',
  734. museoCoords.prefix + url + '_Altezza') + closeLine
  735. output.write(line)
  736. line = triple(museoCoords.prefix + url + '_Altezza',
  737. nsCoords.prefix + 'type',
  738. cidocCoords.prefix + 'E54_Dimension') + closeLine
  739. output.write(line)
  740. line = triple(museoCoords.prefix + url + '_Altezza',
  741. schemaCoords.prefix + 'label',
  742. '\"Altezza: ' + row['MISA'] + row['MISU'] + '\"') + closeLine
  743. output.write(line)
  744. # E54 P90 E60
  745. line = triple(museoCoords.prefix + url + '_Altezza',
  746. cidocCoords.prefix + 'P90_has_value',
  747. '\"' + row['MISA'] + '\"^^xsd:integer') + closeLine
  748. output.write(line)
  749. # E54 P2 E55
  750. line = triple(museoCoords.prefix + url + '_Altezza',
  751. cidocCoords.prefix + 'P2_has_type',
  752. aatCoords.prefix + '300055644') + closeLine
  753. output.write(line)
  754. line = triple(aatCoords.prefix + '300055644',
  755. schemaCoords.prefix + 'label',
  756. '\"altezza\"') + closeLine
  757. output.write(line)
  758. # E54 P91 E58
  759. if (row['MISU'] != ''):
  760. line = triple(museoCoords.prefix + url + '_Altezza',
  761. cidocCoords.prefix + 'P91_has_unit',
  762. aatCoords.prefix + '300379098') + closeLine
  763. output.write(line)
  764. line = triple(aatCoords.prefix + '300379098',
  765. nsCoords.prefix + 'type',
  766. cidocCoords.prefix + 'E58_Measurement_Unit') + closeLine
  767. output.write(line)
  768. line = triple(aatCoords.prefix + '300379098',
  769. schemaCoords.prefix + 'label',
  770. '\"' + row['MISU'] + '\"') + closeLine
  771. output.write(line)
  772. # Larghezza
  773. # E22 P43 E54
  774. if (row['MISL'] != ''):
  775. line = triple(datplaceHolder,
  776. cidocCoords.prefix + 'P43_has_dimension',
  777. museoCoords.prefix + url + '_Larghezza') + closeLine
  778. output.write(line)
  779. line = triple(museoCoords.prefix + url + '_Larghezza',
  780. nsCoords.prefix + 'type',
  781. cidocCoords.prefix + 'E54_Dimension') + closeLine
  782. output.write(line)
  783. line = triple(museoCoords.prefix + url + '_Larghezza',
  784. schemaCoords.prefix + 'label',
  785. '\"Larghezza: ' + row['MISL'] + row['MISU'] + '\"') + closeLine
  786. output.write(line)
  787. # E54 P90 E60
  788. line = triple(museoCoords.prefix + url + '_Larghezza',
  789. cidocCoords.prefix + 'P90_has_value',
  790. '\"' + row['MISL'] + '\"^^xsd:integer') + closeLine
  791. output.write(line)
  792. # E54 P2 E55
  793. line = triple(museoCoords.prefix + url + '_Larghezza',
  794. cidocCoords.prefix + 'P2_has_type',
  795. aatCoords.prefix + '300055647') + closeLine
  796. output.write(line)
  797. line = triple(aatCoords.prefix + '300055647',
  798. schemaCoords.prefix + 'label',
  799. '\"larghezza\"') + closeLine
  800. output.write(line)
  801. # E54 P91 E58
  802. if (row['MISU'] != ''):
  803. line = triple(museoCoords.prefix + url + '_Larghezza',
  804. cidocCoords.prefix + 'P91_has_unit',
  805. aatCoords.prefix + '300379098') + closeLine
  806. output.write(line)
  807. line = triple(aatCoords.prefix + '300379098',
  808. nsCoords.prefix + 'type',
  809. cidocCoords.prefix + 'E58_Measurement_Unit') + closeLine
  810. output.write(line)
  811. line = triple(aatCoords.prefix + '300379098',
  812. schemaCoords.prefix + 'label',
  813. '\"' + row['MISU'] + '\"') + closeLine
  814. output.write(line)
  815. if row['MTC'] != '':
  816. mtcs = []
  817. if '/' in row['MTC']:
  818. mtcs = row['MTC'].split('/')
  819. else:
  820. mtcs.append(row['MTC'])
  821. for tc in mtcs:
  822. mtc = tc.lstrip()
  823. el = get_elem(mtc)
  824. if el[1] == 'MTC/M':
  825. line = triple(datplaceHolder,
  826. cidocCoords.prefix + 'P45_consists_of',
  827. aatCoords.prefix + el[0]) + closeLine
  828. output.write(line)
  829. line = triple(aatCoords.prefix + el[0],
  830. nsCoords.prefix + 'type',
  831. cidocCoords.prefix + 'E57_Material') + closeLine
  832. output.write(line)
  833. line = triple(aatCoords.prefix + el[0],
  834. schemaCoords.prefix + 'label',
  835. '\"' + mtc + '\"') + closeLine
  836. output.write(line)
  837. else: #E12 Production - P32 used technique - E55 Type
  838. line = triple(e12placeHolder,
  839. cidocCoords.prefix + 'P32_used_general_technique',
  840. aatCoords.prefix + el[0]) + closeLine
  841. output.write(line)
  842. if e12FplaceHolder != '':
  843. line = triple(e12FplaceHolder,
  844. cidocCoords.prefix + 'P32_used_general_technique',
  845. aatCoords.prefix + el[0]) + closeLine
  846. output.write(line)
  847. line = triple(aatCoords.prefix + el[0],
  848. nsCoords.prefix + 'type',
  849. cidocCoords.prefix + 'E55_Type') + closeLine
  850. output.write(line)
  851. line = triple(aatCoords.prefix + el[0],
  852. schemaCoords.prefix + 'label',
  853. '\"' + mtc + '\"') + closeLine
  854. output.write(line)
  855. # E12 P140i E13
  856. if row['AUTM'] != '':
  857. mot = row['AUTM'].replace(' ', '_')
  858. e55placeHolder = museoCoords.prefix + url + '_' + mot
  859. line = triple(e12placeHolder,
  860. cidocCoords.prefix + 'P140i_was_attributed_by',
  861. e13placeHolder) + closeLine
  862. output.write(line)
  863. line = triple(e13placeHolder,
  864. nsCoords.prefix + 'type',
  865. cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine
  866. output.write(line)
  867. line = triple(e13placeHolder,
  868. schemaCoords.prefix + 'label',
  869. '\"Motivazione attribuzione\"') + closeLine
  870. output.write(line)
  871. line = triple(e13placeHolder,
  872. cidocCoords.prefix + 'P2_has_type',
  873. '\"' + row['AUTM'] + '\"') + closeLine
  874. output.write(line)
  875. aut = get_aut_url(row['AUTH'])
  876. aut_url = aut[0]
  877. AuthorPlaceholder = autCoords.prefix + aut_url
  878. line = triple(e13placeHolder,
  879. cidocCoords.prefix + 'P141_assigned',
  880. AuthorPlaceholder) + closeLine
  881. output.write(line)
  882. # E22 P44 E62
  883. if row['NSC'] != '':
  884. ph = row['NSC'].replace(' "', ' «')
  885. phr = ph.replace('"', '»')
  886. line = triple(datplaceHolder,
  887. cidocCoords.prefix + 'P3_has_note',
  888. '\"' + phr + '\"^^xsd:string') + closeLine
  889. output.write(line)
  890. iconclass = row['DESI']
  891. icon = iconclass.replace(' ', '')
  892. list_icon = []
  893. if ':' in icon:
  894. list_icon = icon.split(':')
  895. else:
  896. list_icon.append(icon)
  897. for ic in list_icon:
  898. siglaiconclass = str(ic)
  899. #url = 'http://iconclass.org/rdk/' + str(ic)
  900. #html = urlopen(url).read()
  901. #soup = BeautifulSoup(html, 'html.parser')
  902. # kill all script and style elements
  903. #for script in soup(["script", "style"]):
  904. # script.extract() # rip it out
  905. # get text
  906. #text = soup.get_text()
  907. #pretty = soup.prettify()
  908. #ff = soup.find("div", {"id": "ic_current"})
  909. #dd = ff.find("a", {"class", "ic_notation"})
  910. #ss = dd.text
  911. #x = ss.find(' ')
  912. #icon_label = ss[x + 1:]
  913. siglaiconclass = ic.replace("%28", "(",)
  914. siglaiconclassok = siglaiconclass.replace("%29", ")")
  915. siglaiconclassokplaceHolder = '<https://iconclass.org/' + siglaiconclassok + '>'
  916. line = triple(datplaceHolder,
  917. cidocCoords.prefix + 'P62_depicts',
  918. siglaiconclassokplaceHolder) + closeLine
  919. output.write(line)
  920. line = triple(siglaiconclassokplaceHolder,
  921. nsCoords.prefix + 'type',
  922. cidocCoords.prefix + 'E1_CRM_Entity') + closeLine
  923. output.write(line)
  924. line = triple(siglaiconclassokplaceHolder,
  925. schemaCoords.prefix + 'label',
  926. '\"' + siglaiconclassok + '\"' ) + closeLine
  927. output.write(line)
  928. line = triple(siglaiconclassokplaceHolder,
  929. cidocCoords.prefix + 'P2_has_type',
  930. '\"Sigla Iconclass\"') + closeLine
  931. output.write(line)
  932. # P2 Opera d'arte
  933. line = triple(datplaceHolder,
  934. cidocCoords.prefix + 'P2_has_type',
  935. '\"Opera d\'Arte\"') + closeLine
  936. output.write(line)
  937. output.write('\n')
  938. #
  939. #
  940. # Limit number of entries processed (if desired)
  941. if (ii > max_entries):
  942. break