CSV_to_RDF_onomastica_ospedale_person_occupation.py 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633
  1. #Parser to convert the Datini onomastics CSV file into TTL format
  2. # Utilities to read/write csv files
  3. import csv
  4. # Utilities to handle character encodings
  5. import unicodedata
  6. # Ordered Dicts
  7. from collections import OrderedDict
  8. import json
  9. import re
  10. # OPZIONAL IMPORTS
  11. # For timestamping/simple speed tests
  12. from datetime import datetime
  13. # Random number generator
  14. from random import *
  15. # System & command line utilities
  16. import sys
  17. # Json for the dictionary
  18. import json
  19. import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/ospedale/'
  20. export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/ospedale/'
  21. # Custom class to store URIs + related infos for the ontologies/repositories
  22. class RDFcoords:
  23. def __init__(self, uri, prefix, code = None):
  24. self.uri = uri
  25. self.prefix = prefix
  26. self.code = code
  27. # Repositories
  28. aspoCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/>', 'aspo:')
  29. foafCoords = RDFcoords('<http://xmlns.com/foaf/0.1/>', 'foaf:')
  30. cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')
  31. schemaCoords = RDFcoords('<http://schema.org/>', 'schema:')
  32. personCoords = RDFcoords('<http://www.w3.org/ns/person#>', 'person:')
  33. nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')
  34. rdfsCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')
  35. owlCoords = RDFcoords('<http://www.w3.org/2002/07/owl#>', 'owl:')
  36. # Basic functions for triples / shortened triples in TTL format
  37. def triple(subject, predicate, object1):
  38. line = subject + ' ' + predicate + ' ' + object1
  39. return line
  40. def doublet(predicate, object1):
  41. line = ' ' + predicate + ' ' + object1
  42. return line
  43. def singlet(object1):
  44. line = ' ' + object1
  45. return line
  46. # Line endings in TTL format
  47. continueLine1 = ' ;\n'
  48. continueLine2 = ' ,\n'
  49. closeLine = ' .\n'
  50. def writeTTLHeader(output):
  51. output.write('@prefix ' + aspoCoords.prefix + ' ' + aspoCoords.uri + closeLine)
  52. output.write('@prefix ' + foafCoords.prefix + ' ' + foafCoords.uri + closeLine)
  53. output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)
  54. output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)
  55. output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)
  56. output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)
  57. output.write('@prefix ' + rdfsCoords.prefix + ' ' + rdfsCoords.uri + closeLine)
  58. output.write('@prefix ' + owlCoords.prefix + ' ' + owlCoords.uri + closeLine)
  59. output.write('\n')
  60. filePrefix = 'OSPEDALE-onomastica'
  61. fileType = '-persone-singole-occupation'
  62. max_entries = 10000000000000
  63. with open(import_dir + filePrefix + fileType + '.csv', newline="") as csv_file, open(
  64. export_dir + filePrefix + fileType + '.ttl', 'w') as output:
  65. reader = csv.DictReader(csv_file)
  66. writeTTLHeader(output)
  67. first = True
  68. ii = 0
  69. for row in reader:
  70. # The index ii is used to process a limited number of entries for testing purposes
  71. ii = ii + 1
  72. id_aspo = row['recordid']
  73. #placeHolders
  74. aspoPlaceHolder = aspoCoords.prefix + id_aspo
  75. if row['occupation_1'] != '' and row['occupation_1'] != ' ' :
  76. occupazioni = []
  77. pipe = "|"
  78. if pipe in row['occupation_1']:
  79. occupazioni = row['occupation_1'].split('|')
  80. for occupazione in occupazioni:
  81. #Remove all white-space characters:
  82. txt = occupazione
  83. x = re.sub("\n", " ", txt)
  84. y = re.sub("\s\s", "", x)
  85. occ = re.sub(r'[^A-Za-z]','', y)
  86. occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + occ.replace(" ","_") + '>'
  87. line = triple(aspoPlaceHolder,
  88. schemaCoords.prefix + 'hasOccupation',
  89. occupationPlaceHolder) + closeLine
  90. output.write(line)
  91. line = triple(occupationPlaceHolder,
  92. nsCoords.prefix + 'type',
  93. schemaCoords.prefix + 'Occupation') + closeLine
  94. output.write(line)
  95. line = triple(occupationPlaceHolder,
  96. rdfsCoords.prefix + 'label',
  97. '\"' + y + '\"') + closeLine
  98. output.write(line)
  99. if row['ID_ente_1'] != '':
  100. placeoccupationPlaceHolder = '<http://dev.restore.ovi.cnr.it/vocabularies/places/' + row['ID_ente_1'] + '>'
  101. line = triple(aspoPlaceHolder,
  102. schemaCoords.prefix + 'workLocation',
  103. placeoccupationPlaceHolder) + closeLine
  104. output.write(line)
  105. line = triple(occupationPlaceHolder,
  106. schemaCoords.prefix + 'occupationLocation',
  107. placeoccupationPlaceHolder) + closeLine
  108. output.write(line)
  109. elif row['ID_ente_1'] == '' and row['place occupation 1 ENTE'] != '':
  110. placeoccupazioni = []
  111. pipe = "|"
  112. if pipe in row['place occupation 1 ENTE']:
  113. placeoccupazioni = row['place occupation 1 ENTE'].split('|')
  114. for placeoccupazione in placeoccupazioni:
  115. #Remove all white-space characters:
  116. txt = row['place occupation 1 ENTE']
  117. x = re.sub("\n", " ", txt)
  118. y = re.sub("\s\s", "", x)
  119. placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
  120. line = triple(aspoPlaceHolder,
  121. schemaCoords.prefix + 'workLocation',
  122. placeoccupationPlaceHolder) + closeLine
  123. output.write(line)
  124. line = triple(occupationPlaceHolder,
  125. schemaCoords.prefix + 'occupationLocation',
  126. placeoccupationPlaceHolder) + closeLine
  127. output.write(line)
  128. line = triple(placeoccupationPlaceHolder,
  129. rdfsCoords.prefix + 'label',
  130. '\"' + y + '\"') + closeLine
  131. output.write(line)
  132. line = triple(placeoccupationPlaceHolder,
  133. nsCoords.prefix + 'type',
  134. cidocCoords.prefix + 'E53_Place') + closeLine
  135. output.write(line)
  136. line = triple(aspoPlaceHolder,
  137. schemaCoords.prefix + 'jobTitle',
  138. '\"' + row['occupation_1'].lower() + ' presso ' + row['place occupation 1 ENTE'].lower() + '\"') + closeLine
  139. output.write(line)
  140. else:
  141. placeoccupazioni = row['place occupation 1 ENTE'].split('|')
  142. txt = row['place occupation 1 ENTE']
  143. x = re.sub("\n", " ", txt)
  144. y = re.sub("\s\s", "", x)
  145. placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
  146. line = triple(aspoPlaceHolder,
  147. schemaCoords.prefix + 'workLocation',
  148. placeoccupationPlaceHolder) + closeLine
  149. output.write(line)
  150. line = triple(occupationPlaceHolder,
  151. schemaCoords.prefix + 'occupationLocation',
  152. placeoccupationPlaceHolder) + closeLine
  153. output.write(line)
  154. line = triple(placeoccupationPlaceHolder,
  155. rdfsCoords.prefix + 'label',
  156. '\"' + y + '\"') + closeLine
  157. output.write(line)
  158. line = triple(placeoccupationPlaceHolder,
  159. nsCoords.prefix + 'type',
  160. cidocCoords.prefix + 'E53_Place') + closeLine
  161. output.write(line)
  162. line = triple(aspoPlaceHolder,
  163. schemaCoords.prefix + 'jobTitle',
  164. '\"' + row['occupation_1'].lower() + ' presso ' + row['place occupation 1 ENTE'].lower() + '\"') + closeLine
  165. output.write(line)
  166. else:
  167. #Remove all white-space characters:
  168. txt = row['occupation_1']
  169. x = re.sub("\n", " ", txt)
  170. y = re.sub("\s\s", "", x)
  171. occ = re.sub(r'[^A-Za-z]','', y)
  172. occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + occ.replace(" ","_") + '>'
  173. line = triple(aspoPlaceHolder,
  174. schemaCoords.prefix + 'hasOccupation',
  175. occupationPlaceHolder) + closeLine
  176. output.write(line)
  177. line = triple(occupationPlaceHolder,
  178. nsCoords.prefix + 'type',
  179. schemaCoords.prefix + 'Occupation') + closeLine
  180. output.write(line)
  181. line = triple(occupationPlaceHolder,
  182. rdfsCoords.prefix + 'label',
  183. '\"' + y + '\"') + closeLine
  184. output.write(line)
  185. if row['ID_ente_1'] != '':
  186. placeoccupationPlaceHolder = '<http://dev.restore.ovi.cnr.it/vocabularies/places/' + row['ID_ente_1'] + '>'
  187. line = triple(aspoPlaceHolder,
  188. schemaCoords.prefix + 'workLocation',
  189. placeoccupationPlaceHolder) + closeLine
  190. output.write(line)
  191. line = triple(occupationPlaceHolder,
  192. schemaCoords.prefix + 'occupationLocation',
  193. placeoccupationPlaceHolder) + closeLine
  194. output.write(line)
  195. elif row['ID_ente_1'] == '' and row['place occupation 1 ENTE'] != '':
  196. placeoccupazioni = []
  197. pipe = "|"
  198. if pipe in row['place occupation 1 ENTE']:
  199. placeoccupazioni = row['place occupation 1 ENTE'].split('|')
  200. for placeoccupazione in placeoccupazioni:
  201. #Remove all white-space characters:
  202. txt = row['place occupation 1 ENTE']
  203. x = re.sub("\n", " ", txt)
  204. y = re.sub("\s\s", "", x)
  205. placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
  206. line = triple(aspoPlaceHolder,
  207. schemaCoords.prefix + 'workLocation',
  208. placeoccupationPlaceHolder) + closeLine
  209. output.write(line)
  210. line = triple(occupationPlaceHolder,
  211. schemaCoords.prefix + 'occupationLocation',
  212. placeoccupationPlaceHolder) + closeLine
  213. output.write(line)
  214. line = triple(placeoccupationPlaceHolder,
  215. rdfsCoords.prefix + 'label',
  216. '\"' + y + '\"') + closeLine
  217. output.write(line)
  218. line = triple(placeoccupationPlaceHolder,
  219. nsCoords.prefix + 'type',
  220. cidocCoords.prefix + 'E53_Place') + closeLine
  221. output.write(line)
  222. line = triple(aspoPlaceHolder,
  223. schemaCoords.prefix + 'jobTitle',
  224. '\"' + row['occupation_1'].lower() + ' presso ' + row['place occupation 1 ENTE'].lower() + '\"') + closeLine
  225. output.write(line)
  226. else:
  227. placeoccupazioni = row['place occupation 1 ENTE'].split('|')
  228. txt = row['place occupation 1 ENTE']
  229. x = re.sub("\n", " ", txt)
  230. y = re.sub("\s\s", "", x)
  231. placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
  232. line = triple(aspoPlaceHolder,
  233. schemaCoords.prefix + 'workLocation',
  234. placeoccupationPlaceHolder) + closeLine
  235. output.write(line)
  236. line = triple(occupationPlaceHolder,
  237. schemaCoords.prefix + 'occupationLocation',
  238. placeoccupationPlaceHolder) + closeLine
  239. output.write(line)
  240. line = triple(placeoccupationPlaceHolder,
  241. rdfsCoords.prefix + 'label',
  242. '\"' + y + '\"') + closeLine
  243. output.write(line)
  244. line = triple(placeoccupationPlaceHolder,
  245. nsCoords.prefix + 'type',
  246. cidocCoords.prefix + 'E53_Place') + closeLine
  247. output.write(line)
  248. line = triple(aspoPlaceHolder,
  249. schemaCoords.prefix + 'jobTitle',
  250. '\"' + row['occupation_1'].lower() + ' presso ' + row['place occupation 1 ENTE'].lower() + '\"') + closeLine
  251. output.write(line)
  252. if row['occupation_2'] != '' and row['occupation_2'] != ' ' :
  253. occupazioni = []
  254. pipe = "|"
  255. if pipe in row['occupation_2']:
  256. occupazioni = row['occupation_2'].split('|')
  257. for occupazione in occupazioni:
  258. #Remove all white-space characters:
  259. txt = occupazione
  260. x = re.sub("\n", " ", txt)
  261. y = re.sub("\s\s", "", x)
  262. occ = re.sub(r'[^A-Za-z]','', y)
  263. occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + occ.replace(" ","_") + '>'
  264. line = triple(aspoPlaceHolder,
  265. schemaCoords.prefix + 'hasOccupation',
  266. occupationPlaceHolder) + closeLine
  267. output.write(line)
  268. line = triple(occupationPlaceHolder,
  269. nsCoords.prefix + 'type',
  270. schemaCoords.prefix + 'Occupation') + closeLine
  271. output.write(line)
  272. line = triple(occupationPlaceHolder,
  273. rdfsCoords.prefix + 'label',
  274. '\"' + y + '\"') + closeLine
  275. output.write(line)
  276. if row['ID_ente_2'] != '':
  277. placeoccupationPlaceHolder = '<http://dev.restore.ovi.cnr.it/vocabularies/places/' + row['ID_ente_2'] + '>'
  278. line = triple(aspoPlaceHolder,
  279. schemaCoords.prefix + 'workLocation',
  280. placeoccupationPlaceHolder) + closeLine
  281. output.write(line)
  282. line = triple(occupationPlaceHolder,
  283. schemaCoords.prefix + 'occupationLocation',
  284. placeoccupationPlaceHolder) + closeLine
  285. output.write(line)
  286. elif row['place occupation 2 ENTE'] != '':
  287. placeoccupazioni = []
  288. pipe = "|"
  289. if pipe in row['place occupation 2 ENTE']:
  290. placeoccupazioni = row['place occupation 2 ENTE'].split('|')
  291. for placeoccupazione in placeoccupazioni:
  292. #Remove all white-space characters:
  293. txt = row['place occupation 2 ENTE']
  294. x = re.sub("\n", " ", txt)
  295. y = re.sub("\s\s", "", x)
  296. placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
  297. line = triple(aspoPlaceHolder,
  298. schemaCoords.prefix + 'workLocation',
  299. placeoccupationPlaceHolder) + closeLine
  300. output.write(line)
  301. line = triple(occupationPlaceHolder,
  302. schemaCoords.prefix + 'occupationLocation',
  303. placeoccupationPlaceHolder) + closeLine
  304. output.write(line)
  305. line = triple(placeoccupationPlaceHolder,
  306. rdfsCoords.prefix + 'label',
  307. '\"' + y + '\"') + closeLine
  308. output.write(line)
  309. line = triple(placeoccupationPlaceHolder,
  310. nsCoords.prefix + 'type',
  311. cidocCoords.prefix + 'E53_Place') + closeLine
  312. output.write(line)
  313. line = triple(aspoPlaceHolder,
  314. schemaCoords.prefix + 'jobTitle',
  315. '\"' + row['occupation_2'].lower() + ' presso ' + row['place occupation 2 ENTE'].lower() + '\"') + closeLine
  316. output.write(line)
  317. else:
  318. placeoccupazioni = row['place occupation 2 ENTE'].split('|')
  319. txt = row['place occupation 2 ENTE']
  320. x = re.sub("\n", " ", txt)
  321. y = re.sub("\s\s", "", x)
  322. placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
  323. line = triple(aspoPlaceHolder,
  324. schemaCoords.prefix + 'workLocation',
  325. placeoccupationPlaceHolder) + closeLine
  326. output.write(line)
  327. line = triple(occupationPlaceHolder,
  328. schemaCoords.prefix + 'occupationLocation',
  329. placeoccupationPlaceHolder) + closeLine
  330. output.write(line)
  331. line = triple(placeoccupationPlaceHolder,
  332. rdfsCoords.prefix + 'label',
  333. '\"' + y + '\"') + closeLine
  334. output.write(line)
  335. line = triple(placeoccupationPlaceHolder,
  336. nsCoords.prefix + 'type',
  337. cidocCoords.prefix + 'E53_Place') + closeLine
  338. output.write(line)
  339. line = triple(aspoPlaceHolder,
  340. schemaCoords.prefix + 'jobTitle',
  341. '\"' + row['occupation_2'].lower() + ' presso ' + row['place occupation 2 ENTE'].lower() + '\"') + closeLine
  342. output.write(line)
  343. else:
  344. #Remove all white-space characters:
  345. txt = row['occupation_2']
  346. x = re.sub("\n", " ", txt)
  347. y = re.sub("\s\s", "", x)
  348. occ = re.sub(r'[^A-Za-z]','', y)
  349. occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + occ.replace(" ","_") + '>'
  350. line = triple(aspoPlaceHolder,
  351. schemaCoords.prefix + 'hasOccupation',
  352. occupationPlaceHolder) + closeLine
  353. output.write(line)
  354. line = triple(occupationPlaceHolder,
  355. nsCoords.prefix + 'type',
  356. schemaCoords.prefix + 'Occupation') + closeLine
  357. output.write(line)
  358. line = triple(occupationPlaceHolder,
  359. rdfsCoords.prefix + 'label',
  360. '\"' + y + '\"') + closeLine
  361. output.write(line)
  362. if row['ID_ente_2'] != '':
  363. placeoccupationPlaceHolder = '<http://dev.restore.ovi.cnr.it/vocabularies/places/' + row['ID_ente_2'] + '>'
  364. line = triple(aspoPlaceHolder,
  365. schemaCoords.prefix + 'workLocation',
  366. placeoccupationPlaceHolder) + closeLine
  367. output.write(line)
  368. line = triple(occupationPlaceHolder,
  369. schemaCoords.prefix + 'occupationLocation',
  370. placeoccupationPlaceHolder) + closeLine
  371. output.write(line)
  372. elif row['ID_ente_2'] == '' and row['place occupation 2 ENTE'] != '':
  373. placeoccupazioni = []
  374. pipe = "|"
  375. if pipe in row['place occupation 2 ENTE']:
  376. placeoccupazioni = row['place occupation 2 ENTE'].split('|')
  377. for placeoccupazione in placeoccupazioni:
  378. #Remove all white-space characters:
  379. txt = row['place occupation 2 ENTE']
  380. x = re.sub("\n", " ", txt)
  381. y = re.sub("\s\s", "", x)
  382. placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
  383. line = triple(aspoPlaceHolder,
  384. schemaCoords.prefix + 'workLocation',
  385. placeoccupationPlaceHolder) + closeLine
  386. output.write(line)
  387. line = triple(occupationPlaceHolder,
  388. schemaCoords.prefix + 'occupationLocation',
  389. placeoccupationPlaceHolder) + closeLine
  390. output.write(line)
  391. line = triple(placeoccupationPlaceHolder,
  392. rdfsCoords.prefix + 'label',
  393. '\"' + y + '\"') + closeLine
  394. output.write(line)
  395. line = triple(placeoccupationPlaceHolder,
  396. nsCoords.prefix + 'type',
  397. cidocCoords.prefix + 'E53_Place') + closeLine
  398. output.write(line)
  399. line = triple(aspoPlaceHolder,
  400. schemaCoords.prefix + 'jobTitle',
  401. '\"' + row['occupation_2'].lower() + ' presso ' + row['place occupation 2 ENTE'].lower() + '\"') + closeLine
  402. output.write(line)
  403. else:
  404. placeoccupazioni = row['place occupation 2 ENTE'].split('|')
  405. txt = row['place occupation 2 ENTE']
  406. x = re.sub("\n", " ", txt)
  407. y = re.sub("\s\s", "", x)
  408. placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
  409. line = triple(aspoPlaceHolder,
  410. schemaCoords.prefix + 'workLocation',
  411. placeoccupationPlaceHolder) + closeLine
  412. output.write(line)
  413. line = triple(occupationPlaceHolder,
  414. schemaCoords.prefix + 'occupationLocation',
  415. placeoccupationPlaceHolder) + closeLine
  416. output.write(line)
  417. line = triple(placeoccupationPlaceHolder,
  418. rdfsCoords.prefix + 'label',
  419. '\"' + y + '\"') + closeLine
  420. output.write(line)
  421. line = triple(placeoccupationPlaceHolder,
  422. nsCoords.prefix + 'type',
  423. cidocCoords.prefix + 'E53_Place') + closeLine
  424. output.write(line)
  425. line = triple(aspoPlaceHolder,
  426. schemaCoords.prefix + 'jobTitle',
  427. '\"' + row['occupation_2'].lower() + ' presso ' + row['place occupation 2 ENTE'].lower() + '\"') + closeLine
  428. output.write(line)
  429. if row['occupation_3'] != '' and row['occupation_3'] != ' ' :
  430. occupazioni = []
  431. pipe = "|"
  432. if pipe in row['occupation_3']:
  433. occupazioni = row['occupation_3'].split('|')
  434. for occupazione in occupazioni:
  435. #Remove all white-space characters:
  436. txt = occupazione
  437. x = re.sub("\n", " ", txt)
  438. y = re.sub("\s\s", "", x)
  439. occ = re.sub(r'[^A-Za-z]','', y)
  440. occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + occ.replace(" ","_") + '>'
  441. line = triple(aspoPlaceHolder,
  442. schemaCoords.prefix + 'hasOccupation',
  443. occupationPlaceHolder) + closeLine
  444. output.write(line)
  445. line = triple(occupationPlaceHolder,
  446. nsCoords.prefix + 'type',
  447. schemaCoords.prefix + 'Occupation') + closeLine
  448. output.write(line)
  449. line = triple(occupationPlaceHolder,
  450. rdfsCoords.prefix + 'label',
  451. '\"' + y + '\"') + closeLine
  452. output.write(line)
  453. if row['ID_ente_3'] != '':
  454. placeoccupationPlaceHolder = '<http://dev.restore.ovi.cnr.it/vocabularies/places/' + row['ID_ente_3'] + '>'
  455. line = triple(aspoPlaceHolder,
  456. schemaCoords.prefix + 'workLocation',
  457. placeoccupationPlaceHolder) + closeLine
  458. output.write(line)
  459. line = triple(occupationPlaceHolder,
  460. schemaCoords.prefix + 'occupationLocation',
  461. placeoccupationPlaceHolder) + closeLine
  462. output.write(line)
  463. elif row['ID_ente_3'] == '' and row['place occupation 3 ENTE'] != '':
  464. placeoccupazioni = []
  465. pipe = "|"
  466. if pipe in row['place occupation 3 ENTE']:
  467. placeoccupazioni = row['place occupation 3 ENTE'].split('|')
  468. for placeoccupazione in placeoccupazioni:
  469. #Remove all white-space characters:
  470. txt = row['place occupation 3 ENTE']
  471. x = re.sub("\n", " ", txt)
  472. y = re.sub("\s\s", "", x)
  473. placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
  474. line = triple(aspoPlaceHolder,
  475. schemaCoords.prefix + 'workLocation',
  476. placeoccupationPlaceHolder) + closeLine
  477. output.write(line)
  478. line = triple(occupationPlaceHolder,
  479. schemaCoords.prefix + 'occupationLocation',
  480. placeoccupationPlaceHolder) + closeLine
  481. output.write(line)
  482. line = triple(placeoccupationPlaceHolder,
  483. rdfsCoords.prefix + 'label',
  484. '\"' + y + '\"') + closeLine
  485. output.write(line)
  486. line = triple(placeoccupationPlaceHolder,
  487. nsCoords.prefix + 'type',
  488. cidocCoords.prefix + 'E53_Place') + closeLine
  489. output.write(line)
  490. line = triple(aspoPlaceHolder,
  491. schemaCoords.prefix + 'jobTitle',
  492. '\"' + row['occupation_3'].lower() + ' presso ' + row['place occupation 3 ENTE'].lower() + '\"') + closeLine
  493. output.write(line)
  494. else:
  495. placeoccupazioni = row['place occupation 3 ENTE'].split('|')
  496. txt = row['place occupation 3 ENTE']
  497. x = re.sub("\n", " ", txt)
  498. y = re.sub("\s\s", "", x)
  499. placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
  500. line = triple(aspoPlaceHolder,
  501. schemaCoords.prefix + 'workLocation',
  502. placeoccupationPlaceHolder) + closeLine
  503. output.write(line)
  504. line = triple(occupationPlaceHolder,
  505. schemaCoords.prefix + 'occupationLocation',
  506. placeoccupationPlaceHolder) + closeLine
  507. output.write(line)
  508. line = triple(placeoccupationPlaceHolder,
  509. rdfsCoords.prefix + 'label',
  510. '\"' + y + '\"') + closeLine
  511. output.write(line)
  512. line = triple(placeoccupationPlaceHolder,
  513. nsCoords.prefix + 'type',
  514. cidocCoords.prefix + 'E53_Place') + closeLine
  515. output.write(line)
  516. line = triple(aspoPlaceHolder,
  517. schemaCoords.prefix + 'jobTitle',
  518. '\"' + row['occupation_3'].lower() + ' presso ' + row['place occupation 3 ENTE'].lower() + '\"') + closeLine
  519. output.write(line)
  520. else:
  521. #Remove all white-space characters:
  522. txt = row['occupation_3']
  523. x = re.sub("\n", " ", txt)
  524. y = re.sub("\s\s", "", x)
  525. occ = re.sub(r'[^A-Za-z]','', y)
  526. occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + occ.replace(" ","_") + '>'
  527. line = triple(aspoPlaceHolder,
  528. schemaCoords.prefix + 'hasOccupation',
  529. occupationPlaceHolder) + closeLine
  530. output.write(line)
  531. line = triple(occupationPlaceHolder,
  532. nsCoords.prefix + 'type',
  533. schemaCoords.prefix + 'Occupation') + closeLine
  534. output.write(line)
  535. line = triple(occupationPlaceHolder,
  536. rdfsCoords.prefix + 'label',
  537. '\"' + y + '\"') + closeLine
  538. output.write(line)
  539. if row['ID_ente_3'] != '':
  540. placeoccupationPlaceHolder = '<http://dev.restore.ovi.cnr.it/vocabularies/places/' + row['ID_ente_3'] + '>'
  541. line = triple(aspoPlaceHolder,
  542. schemaCoords.prefix + 'workLocation',
  543. placeoccupationPlaceHolder) + closeLine
  544. output.write(line)
  545. line = triple(occupationPlaceHolder,
  546. schemaCoords.prefix + 'occupationLocation',
  547. placeoccupationPlaceHolder) + closeLine
  548. output.write(line)
  549. elif row['ID_ente_3'] == '' and row['place occupation 3 ENTE'] != '':
  550. placeoccupazioni = []
  551. pipe = "|"
  552. if pipe in row['place occupation 3 ENTE']:
  553. placeoccupazioni = row['place occupation 3 ENTE'].split('|')
  554. for placeoccupazione in placeoccupazioni:
  555. #Remove all white-space characters:
  556. txt = row['place occupation 3 ENTE']
  557. x = re.sub("\n", " ", txt)
  558. y = re.sub("\s\s", "", x)
  559. placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
  560. line = triple(aspoPlaceHolder,
  561. schemaCoords.prefix + 'workLocation',
  562. placeoccupationPlaceHolder) + closeLine
  563. output.write(line)
  564. line = triple(occupationPlaceHolder,
  565. schemaCoords.prefix + 'occupationLocation',
  566. placeoccupationPlaceHolder) + closeLine
  567. output.write(line)
  568. line = triple(placeoccupationPlaceHolder,
  569. rdfsCoords.prefix + 'label',
  570. '\"' + y + '\"') + closeLine
  571. output.write(line)
  572. line = triple(placeoccupationPlaceHolder,
  573. nsCoords.prefix + 'type',
  574. cidocCoords.prefix + 'E53_Place') + closeLine
  575. output.write(line)
  576. line = triple(aspoPlaceHolder,
  577. schemaCoords.prefix + 'jobTitle',
  578. '\"' + row['occupation_3'].lower() + ' presso ' + row['place occupation 3 ENTE'].lower() + '\"') + closeLine
  579. output.write(line)
  580. else:
  581. placeoccupazioni = row['place occupation 3 ENTE'].split('|')
  582. txt = row['place occupation 3 ENTE']
  583. x = re.sub("\n", " ", txt)
  584. y = re.sub("\s\s", "", x)
  585. placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
  586. line = triple(aspoPlaceHolder,
  587. schemaCoords.prefix + 'workLocation',
  588. placeoccupationPlaceHolder) + closeLine
  589. output.write(line)
  590. line = triple(occupationPlaceHolder,
  591. schemaCoords.prefix + 'occupationLocation',
  592. placeoccupationPlaceHolder) + closeLine
  593. output.write(line)
  594. line = triple(placeoccupationPlaceHolder,
  595. rdfsCoords.prefix + 'label',
  596. '\"' + y + '\"') + closeLine
  597. output.write(line)
  598. line = triple(placeoccupationPlaceHolder,
  599. nsCoords.prefix + 'type',
  600. cidocCoords.prefix + 'E53_Place') + closeLine
  601. output.write(line)
  602. line = triple(aspoPlaceHolder,
  603. schemaCoords.prefix + 'jobTitle',
  604. '\"' + row['occupation_3'].lower() + ' presso ' + row['place occupation 3 ENTE'].lower() + '\"') + closeLine
  605. output.write(line)
  606. output.write('\n')
  607. #
  608. #
  609. # Limit number of entries processed (if desired)
  610. if (ii > max_entries):
  611. break