CSV_to_RDF_onomastica_ospedale_person_occupation.py 40 KB


  1. #Parser to convert the Datini onomastics CSV file into TTL format
  2. # Utilities to read/write csv files
  3. import csv
  4. # Utilities to handle character encodings
  5. import unicodedata
  6. # Ordered Dicts
  7. from collections import OrderedDict
  8. import json
  9. import re
  10. # OPZIONAL IMPORTS
  11. # For timestamping/simple speed tests
  12. from datetime import datetime
  13. # Random number generator
  14. from random import *
  15. # System & command line utilities
  16. import sys
  17. # Json for the dictionary
  18. import json
  19. import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/CSV/ospedale/'
  20. export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/ASPO/RDF/ospedale/'
  21. # Custom class to store URIs + related infos for the ontologies/repositories
  22. class RDFcoords:
  23. def __init__(self, uri, prefix, code = None):
  24. self.uri = uri
  25. self.prefix = prefix
  26. self.code = code
  27. # Repositories
  28. aspoCoords = RDFcoords('<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/>', 'aspo:')
  29. foafCoords = RDFcoords('<http://xmlns.com/foaf/0.1/>', 'foaf:')
  30. cidocCoords = RDFcoords('<http://www.cidoc-crm.org/cidoc-crm/>', 'crm:')
  31. schemaCoords = RDFcoords('<http://schema.org/>', 'schema:')
  32. personCoords = RDFcoords('<http://www.w3.org/ns/person#>', 'person:')
  33. nsCoords = RDFcoords('<http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'rdf:')
  34. rdfsCoords = RDFcoords('<http://www.w3.org/2000/01/rdf-schema#>', 'rdfs:')
  35. owlCoords = RDFcoords('<http://www.w3.org/2002/07/owl#>', 'owl:')
  36. # Basic functions for triples / shortened triples in TTL format
  37. def triple(subject, predicate, object1):
  38. line = subject + ' ' + predicate + ' ' + object1
  39. return line
  40. def doublet(predicate, object1):
  41. line = ' ' + predicate + ' ' + object1
  42. return line
  43. def singlet(object1):
  44. line = ' ' + object1
  45. return line
  46. # Line endings in TTL format
  47. continueLine1 = ' ;\n'
  48. continueLine2 = ' ,\n'
  49. closeLine = ' .\n'
  50. def writeTTLHeader(output):
  51. output.write('@prefix ' + aspoCoords.prefix + ' ' + aspoCoords.uri + closeLine)
  52. output.write('@prefix ' + foafCoords.prefix + ' ' + foafCoords.uri + closeLine)
  53. output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)
  54. output.write('@prefix ' + personCoords.prefix + ' ' + personCoords.uri + closeLine)
  55. output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)
  56. output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)
  57. output.write('@prefix ' + rdfsCoords.prefix + ' ' + rdfsCoords.uri + closeLine)
  58. output.write('@prefix ' + owlCoords.prefix + ' ' + owlCoords.uri + closeLine)
  59. output.write('\n')
  60. filePrefix = 'OSPEDALE-onomastica'
  61. fileType = '-persone-singole-occupation'
  62. max_entries = 10000000000000
  63. with open(import_dir + filePrefix + fileType + '.csv', newline="") as csv_file, open(
  64. export_dir + filePrefix + fileType + '.ttl', 'w') as output:
  65. reader = csv.DictReader(csv_file)
  66. writeTTLHeader(output)
  67. first = True
  68. ii = 0
  69. for row in reader:
  70. # The index ii is used to process a limited number of entries for testing purposes
  71. ii = ii + 1
  72. id_aspo = row['recordid']
  73. #placeHolders
  74. aspoPlaceHolder = aspoCoords.prefix + id_aspo
  75. if row['occupation_1'] != '' and row['occupation_1'] != ' ' :
  76. occupazioni = []
  77. pipe = "|"
  78. if pipe in row['occupation_1']:
  79. occupazioni = row['occupation_1'].split('|')
  80. for occupazione in occupazioni:
  81. #Remove all white-space characters:
  82. txt = occupazione
  83. x = re.sub("\n", " ", txt)
  84. y = re.sub("\s\s", "", x)
  85. occ = re.sub(r'[^A-Za-z]','', y)
  86. occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + occ.replace(" ","_") + '>'
  87. line = triple(aspoPlaceHolder,
  88. schemaCoords.prefix + 'hasOccupation',
  89. occupationPlaceHolder) + closeLine
  90. output.write(line)
  91. line = triple(occupationPlaceHolder,
  92. nsCoords.prefix + 'type',
  93. schemaCoords.prefix + 'Occupation') + closeLine
  94. output.write(line)
  95. line = triple(occupationPlaceHolder,
  96. rdfsCoords.prefix + 'label',
  97. '\"' + y + '\"') + closeLine
  98. output.write(line)
  99. if row['ID_ente_1'] != '':
  100. placeoccupationPlaceHolder = '<http://dev.restore.ovi.cnr.it/vocabularies/places/' + row['ID_ente_1'] + '>'
  101. line = triple(aspoPlaceHolder,
  102. schemaCoords.prefix + 'workLocation',
  103. placeoccupationPlaceHolder) + closeLine
  104. output.write(line)
  105. line = triple(occupationPlaceHolder,
  106. schemaCoords.prefix + 'occupationLocation',
  107. placeoccupationPlaceHolder) + closeLine
  108. output.write(line)
  109. elif row['ID_ente_1'] == '' and row['place occupation 1 ENTE'] != '':
  110. placeoccupazioni = []
  111. pipe = "|"
  112. if pipe in row['place occupation 1 ENTE']:
  113. placeoccupazioni = row['place occupation 1 ENTE'].split('|')
  114. for placeoccupazione in placeoccupazioni:
  115. #Remove all white-space characters:
  116. txt = row['place occupation 1 ENTE']
  117. x = re.sub("\n", " ", txt)
  118. y = re.sub("\s\s", "", x)
  119. placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
  120. line = triple(aspoPlaceHolder,
  121. schemaCoords.prefix + 'workLocation',
  122. placeoccupationPlaceHolder) + closeLine
  123. output.write(line)
  124. line = triple(occupationPlaceHolder,
  125. schemaCoords.prefix + 'occupationLocation',
  126. placeoccupationPlaceHolder) + closeLine
  127. output.write(line)
  128. line = triple(placeoccupationPlaceHolder,
  129. rdfsCoords.prefix + 'label',
  130. '\"' + y + '\"') + closeLine
  131. output.write(line)
  132. line = triple(placeoccupationPlaceHolder,
  133. nsCoords.prefix + 'type',
  134. cidocCoords.prefix + 'E53_Place') + closeLine
  135. output.write(line)
  136. line = triple(aspoPlaceHolder,
  137. schemaCoords.prefix + 'jobTitle',
  138. '\"' + row['occupation_1'].lower() + ' presso ' + row['place occupation 1 ENTE'].lower() + '\"') + closeLine
  139. output.write(line)
  140. else:
  141. placeoccupazioni = row['place occupation 1 ENTE'].split('|')
  142. txt = row['place occupation 1 ENTE']
  143. x = re.sub("\n", " ", txt)
  144. y = re.sub("\s\s", "", x)
  145. placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
  146. line = triple(aspoPlaceHolder,
  147. schemaCoords.prefix + 'workLocation',
  148. placeoccupationPlaceHolder) + closeLine
  149. output.write(line)
  150. line = triple(occupationPlaceHolder,
  151. schemaCoords.prefix + 'occupationLocation',
  152. placeoccupationPlaceHolder) + closeLine
  153. output.write(line)
  154. line = triple(placeoccupationPlaceHolder,
  155. rdfsCoords.prefix + 'label',
  156. '\"' + y + '\"') + closeLine
  157. output.write(line)
  158. line = triple(placeoccupationPlaceHolder,
  159. nsCoords.prefix + 'type',
  160. cidocCoords.prefix + 'E53_Place') + closeLine
  161. output.write(line)
  162. line = triple(aspoPlaceHolder,
  163. schemaCoords.prefix + 'jobTitle',
  164. '\"' + row['occupation_1'].lower() + ' presso ' + row['place occupation 1 ENTE'].lower() + '\"') + closeLine
  165. output.write(line)
  166. else:
  167. #Remove all white-space characters:
  168. txt = row['occupation_1']
  169. x = re.sub("\n", " ", txt)
  170. y = re.sub("\s\s", "", x)
  171. occ = re.sub(r'[^A-Za-z]','', y)
  172. occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + occ.replace(" ","_") + '>'
  173. line = triple(aspoPlaceHolder,
  174. schemaCoords.prefix + 'hasOccupation',
  175. occupationPlaceHolder) + closeLine
  176. output.write(line)
  177. line = triple(occupationPlaceHolder,
  178. nsCoords.prefix + 'type',
  179. schemaCoords.prefix + 'Occupation') + closeLine
  180. output.write(line)
  181. line = triple(occupationPlaceHolder,
  182. rdfsCoords.prefix + 'label',
  183. '\"' + y + '\"') + closeLine
  184. output.write(line)
  185. if row['ID_ente_1'] != '':
  186. placeoccupationPlaceHolder = '<http://dev.restore.ovi.cnr.it/vocabularies/places/' + row['ID_ente_1'] + '>'
  187. line = triple(aspoPlaceHolder,
  188. schemaCoords.prefix + 'workLocation',
  189. placeoccupationPlaceHolder) + closeLine
  190. output.write(line)
  191. line = triple(occupationPlaceHolder,
  192. schemaCoords.prefix + 'occupationLocation',
  193. placeoccupationPlaceHolder) + closeLine
  194. output.write(line)
  195. elif row['ID_ente_1'] == '' and row['place occupation 1 ENTE'] != '':
  196. placeoccupazioni = []
  197. pipe = "|"
  198. if pipe in row['place occupation 1 ENTE']:
  199. placeoccupazioni = row['place occupation 1 ENTE'].split('|')
  200. for placeoccupazione in placeoccupazioni:
  201. #Remove all white-space characters:
  202. txt = row['place occupation 1 ENTE']
  203. x = re.sub("\n", " ", txt)
  204. y = re.sub("\s\s", "", x)
  205. placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
  206. line = triple(aspoPlaceHolder,
  207. schemaCoords.prefix + 'workLocation',
  208. placeoccupationPlaceHolder) + closeLine
  209. output.write(line)
  210. line = triple(occupationPlaceHolder,
  211. schemaCoords.prefix + 'occupationLocation',
  212. placeoccupationPlaceHolder) + closeLine
  213. output.write(line)
  214. line = triple(placeoccupationPlaceHolder,
  215. rdfsCoords.prefix + 'label',
  216. '\"' + y + '\"') + closeLine
  217. output.write(line)
  218. line = triple(placeoccupationPlaceHolder,
  219. nsCoords.prefix + 'type',
  220. cidocCoords.prefix + 'E53_Place') + closeLine
  221. output.write(line)
  222. line = triple(aspoPlaceHolder,
  223. schemaCoords.prefix + 'jobTitle',
  224. '\"' + row['occupation_1'].lower() + ' presso ' + row['place occupation 1 ENTE'].lower() + '\"') + closeLine
  225. output.write(line)
  226. else:
  227. placeoccupazioni = row['place occupation 1 ENTE'].split('|')
  228. txt = row['place occupation 1 ENTE']
  229. x = re.sub("\n", " ", txt)
  230. y = re.sub("\s\s", "", x)
  231. placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
  232. line = triple(aspoPlaceHolder,
  233. schemaCoords.prefix + 'workLocation',
  234. placeoccupationPlaceHolder) + closeLine
  235. output.write(line)
  236. line = triple(occupationPlaceHolder,
  237. schemaCoords.prefix + 'occupationLocation',
  238. placeoccupationPlaceHolder) + closeLine
  239. output.write(line)
  240. line = triple(placeoccupationPlaceHolder,
  241. rdfsCoords.prefix + 'label',
  242. '\"' + y + '\"') + closeLine
  243. output.write(line)
  244. line = triple(placeoccupationPlaceHolder,
  245. nsCoords.prefix + 'type',
  246. cidocCoords.prefix + 'E53_Place') + closeLine
  247. output.write(line)
  248. line = triple(aspoPlaceHolder,
  249. schemaCoords.prefix + 'jobTitle',
  250. '\"' + row['occupation_1'].lower() + ' presso ' + row['place occupation 1 ENTE'].lower() + '\"') + closeLine
  251. output.write(line)
  252. if row['occupation_2'] != '' and row['occupation_2'] != ' ' :
  253. occupazioni = []
  254. pipe = "|"
  255. if pipe in row['occupation_2']:
  256. occupazioni = row['occupation_2'].split('|')
  257. for occupazione in occupazioni:
  258. #Remove all white-space characters:
  259. txt = occupazione
  260. x = re.sub("\n", " ", txt)
  261. y = re.sub("\s\s", "", x)
  262. occ = re.sub(r'[^A-Za-z]','', y)
  263. occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + occ.replace(" ","_") + '>'
  264. line = triple(aspoPlaceHolder,
  265. schemaCoords.prefix + 'hasOccupation',
  266. occupationPlaceHolder) + closeLine
  267. output.write(line)
  268. line = triple(occupationPlaceHolder,
  269. nsCoords.prefix + 'type',
  270. schemaCoords.prefix + 'Occupation') + closeLine
  271. output.write(line)
  272. line = triple(occupationPlaceHolder,
  273. rdfsCoords.prefix + 'label',
  274. '\"' + y + '\"') + closeLine
  275. output.write(line)
  276. if row['ID_ente_2'] != '':
  277. placeoccupationPlaceHolder = '<http://dev.restore.ovi.cnr.it/vocabularies/places/' + row['ID_ente_2'] + '>'
  278. line = triple(aspoPlaceHolder,
  279. schemaCoords.prefix + 'workLocation',
  280. placeoccupationPlaceHolder) + closeLine
  281. output.write(line)
  282. line = triple(occupationPlaceHolder,
  283. schemaCoords.prefix + 'occupationLocation',
  284. placeoccupationPlaceHolder) + closeLine
  285. output.write(line)
  286. elif row['place occupation 2 ENTE'] != '':
  287. placeoccupazioni = []
  288. pipe = "|"
  289. if pipe in row['place occupation 2 ENTE']:
  290. placeoccupazioni = row['place occupation 2 ENTE'].split('|')
  291. for placeoccupazione in placeoccupazioni:
  292. #Remove all white-space characters:
  293. txt = row['place occupation 2 ENTE']
  294. x = re.sub("\n", " ", txt)
  295. y = re.sub("\s\s", "", x)
  296. placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
  297. line = triple(aspoPlaceHolder,
  298. schemaCoords.prefix + 'workLocation',
  299. placeoccupationPlaceHolder) + closeLine
  300. output.write(line)
  301. line = triple(occupationPlaceHolder,
  302. schemaCoords.prefix + 'occupationLocation',
  303. placeoccupationPlaceHolder) + closeLine
  304. output.write(line)
  305. line = triple(placeoccupationPlaceHolder,
  306. rdfsCoords.prefix + 'label',
  307. '\"' + y + '\"') + closeLine
  308. output.write(line)
  309. line = triple(placeoccupationPlaceHolder,
  310. nsCoords.prefix + 'type',
  311. cidocCoords.prefix + 'E53_Place') + closeLine
  312. output.write(line)
  313. line = triple(aspoPlaceHolder,
  314. schemaCoords.prefix + 'jobTitle',
  315. '\"' + row['occupation_2'].lower() + ' presso ' + row['place occupation 2 ENTE'].lower() + '\"') + closeLine
  316. output.write(line)
  317. else:
  318. placeoccupazioni = row['place occupation 2 ENTE'].split('|')
  319. txt = row['place occupation 2 ENTE']
  320. x = re.sub("\n", " ", txt)
  321. y = re.sub("\s\s", "", x)
  322. placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
  323. line = triple(aspoPlaceHolder,
  324. schemaCoords.prefix + 'workLocation',
  325. placeoccupationPlaceHolder) + closeLine
  326. output.write(line)
  327. line = triple(occupationPlaceHolder,
  328. schemaCoords.prefix + 'occupationLocation',
  329. placeoccupationPlaceHolder) + closeLine
  330. output.write(line)
  331. line = triple(placeoccupationPlaceHolder,
  332. rdfsCoords.prefix + 'label',
  333. '\"' + y + '\"') + closeLine
  334. output.write(line)
  335. line = triple(placeoccupationPlaceHolder,
  336. nsCoords.prefix + 'type',
  337. cidocCoords.prefix + 'E53_Place') + closeLine
  338. output.write(line)
  339. line = triple(aspoPlaceHolder,
  340. schemaCoords.prefix + 'jobTitle',
  341. '\"' + row['occupation_2'].lower() + ' presso ' + row['place occupation 2 ENTE'].lower() + '\"') + closeLine
  342. output.write(line)
  343. else:
  344. #Remove all white-space characters:
  345. txt = row['occupation_2']
  346. x = re.sub("\n", " ", txt)
  347. y = re.sub("\s\s", "", x)
  348. occ = re.sub(r'[^A-Za-z]','', y)
  349. occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + occ.replace(" ","_") + '>'
  350. line = triple(aspoPlaceHolder,
  351. schemaCoords.prefix + 'hasOccupation',
  352. occupationPlaceHolder) + closeLine
  353. output.write(line)
  354. line = triple(occupationPlaceHolder,
  355. nsCoords.prefix + 'type',
  356. schemaCoords.prefix + 'Occupation') + closeLine
  357. output.write(line)
  358. line = triple(occupationPlaceHolder,
  359. rdfsCoords.prefix + 'label',
  360. '\"' + y + '\"') + closeLine
  361. output.write(line)
  362. if row['ID_ente_2'] != '':
  363. placeoccupationPlaceHolder = '<http://dev.restore.ovi.cnr.it/vocabularies/places/' + row['ID_ente_2'] + '>'
  364. line = triple(aspoPlaceHolder,
  365. schemaCoords.prefix + 'workLocation',
  366. placeoccupationPlaceHolder) + closeLine
  367. output.write(line)
  368. line = triple(occupationPlaceHolder,
  369. schemaCoords.prefix + 'occupationLocation',
  370. placeoccupationPlaceHolder) + closeLine
  371. output.write(line)
  372. elif row['ID_ente_2'] == '' and row['place occupation 2 ENTE'] != '':
  373. placeoccupazioni = []
  374. pipe = "|"
  375. if pipe in row['place occupation 2 ENTE']:
  376. placeoccupazioni = row['place occupation 2 ENTE'].split('|')
  377. for placeoccupazione in placeoccupazioni:
  378. #Remove all white-space characters:
  379. txt = row['place occupation 2 ENTE']
  380. x = re.sub("\n", " ", txt)
  381. y = re.sub("\s\s", "", x)
  382. placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
  383. line = triple(aspoPlaceHolder,
  384. schemaCoords.prefix + 'workLocation',
  385. placeoccupationPlaceHolder) + closeLine
  386. output.write(line)
  387. line = triple(occupationPlaceHolder,
  388. schemaCoords.prefix + 'occupationLocation',
  389. placeoccupationPlaceHolder) + closeLine
  390. output.write(line)
  391. line = triple(placeoccupationPlaceHolder,
  392. rdfsCoords.prefix + 'label',
  393. '\"' + y + '\"') + closeLine
  394. output.write(line)
  395. line = triple(placeoccupationPlaceHolder,
  396. nsCoords.prefix + 'type',
  397. cidocCoords.prefix + 'E53_Place') + closeLine
  398. output.write(line)
  399. line = triple(aspoPlaceHolder,
  400. schemaCoords.prefix + 'jobTitle',
  401. '\"' + row['occupation_2'].lower() + ' presso ' + row['place occupation 2 ENTE'].lower() + '\"') + closeLine
  402. output.write(line)
  403. else:
  404. placeoccupazioni = row['place occupation 2 ENTE'].split('|')
  405. txt = row['place occupation 2 ENTE']
  406. x = re.sub("\n", " ", txt)
  407. y = re.sub("\s\s", "", x)
  408. placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
  409. line = triple(aspoPlaceHolder,
  410. schemaCoords.prefix + 'workLocation',
  411. placeoccupationPlaceHolder) + closeLine
  412. output.write(line)
  413. line = triple(occupationPlaceHolder,
  414. schemaCoords.prefix + 'occupationLocation',
  415. placeoccupationPlaceHolder) + closeLine
  416. output.write(line)
  417. line = triple(placeoccupationPlaceHolder,
  418. rdfsCoords.prefix + 'label',
  419. '\"' + y + '\"') + closeLine
  420. output.write(line)
  421. line = triple(placeoccupationPlaceHolder,
  422. nsCoords.prefix + 'type',
  423. cidocCoords.prefix + 'E53_Place') + closeLine
  424. output.write(line)
  425. line = triple(aspoPlaceHolder,
  426. schemaCoords.prefix + 'jobTitle',
  427. '\"' + row['occupation_2'].lower() + ' presso ' + row['place occupation 2 ENTE'].lower() + '\"') + closeLine
  428. output.write(line)
  429. if row['occupation_3'] != '' and row['occupation_3'] != ' ' :
  430. occupazioni = []
  431. pipe = "|"
  432. if pipe in row['occupation_3']:
  433. occupazioni = row['occupation_3'].split('|')
  434. for occupazione in occupazioni:
  435. #Remove all white-space characters:
  436. txt = occupazione
  437. x = re.sub("\n", " ", txt)
  438. y = re.sub("\s\s", "", x)
  439. occ = re.sub(r'[^A-Za-z]','', y)
  440. occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + occ.replace(" ","_") + '>'
  441. line = triple(aspoPlaceHolder,
  442. schemaCoords.prefix + 'hasOccupation',
  443. occupationPlaceHolder) + closeLine
  444. output.write(line)
  445. line = triple(occupationPlaceHolder,
  446. nsCoords.prefix + 'type',
  447. schemaCoords.prefix + 'Occupation') + closeLine
  448. output.write(line)
  449. line = triple(occupationPlaceHolder,
  450. rdfsCoords.prefix + 'label',
  451. '\"' + y + '\"') + closeLine
  452. output.write(line)
  453. if row['ID_ente_3'] != '':
  454. placeoccupationPlaceHolder = '<http://dev.restore.ovi.cnr.it/vocabularies/places/' + row['ID_ente_3'] + '>'
  455. line = triple(aspoPlaceHolder,
  456. schemaCoords.prefix + 'workLocation',
  457. placeoccupationPlaceHolder) + closeLine
  458. output.write(line)
  459. line = triple(occupationPlaceHolder,
  460. schemaCoords.prefix + 'occupationLocation',
  461. placeoccupationPlaceHolder) + closeLine
  462. output.write(line)
  463. elif row['ID_ente_3'] == '' and row['place occupation 3 ENTE'] != '':
  464. placeoccupazioni = []
  465. pipe = "|"
  466. if pipe in row['place occupation 3 ENTE']:
  467. placeoccupazioni = row['place occupation 3 ENTE'].split('|')
  468. for placeoccupazione in placeoccupazioni:
  469. #Remove all white-space characters:
  470. txt = row['place occupation 3 ENTE']
  471. x = re.sub("\n", " ", txt)
  472. y = re.sub("\s\s", "", x)
  473. placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
  474. line = triple(aspoPlaceHolder,
  475. schemaCoords.prefix + 'workLocation',
  476. placeoccupationPlaceHolder) + closeLine
  477. output.write(line)
  478. line = triple(occupationPlaceHolder,
  479. schemaCoords.prefix + 'occupationLocation',
  480. placeoccupationPlaceHolder) + closeLine
  481. output.write(line)
  482. line = triple(placeoccupationPlaceHolder,
  483. rdfsCoords.prefix + 'label',
  484. '\"' + y + '\"') + closeLine
  485. output.write(line)
  486. line = triple(placeoccupationPlaceHolder,
  487. nsCoords.prefix + 'type',
  488. cidocCoords.prefix + 'E53_Place') + closeLine
  489. output.write(line)
  490. line = triple(aspoPlaceHolder,
  491. schemaCoords.prefix + 'jobTitle',
  492. '\"' + row['occupation_3'].lower() + ' presso ' + row['place occupation 3 ENTE'].lower() + '\"') + closeLine
  493. output.write(line)
  494. else:
  495. placeoccupazioni = row['place occupation 3 ENTE'].split('|')
  496. txt = row['place occupation 3 ENTE']
  497. x = re.sub("\n", " ", txt)
  498. y = re.sub("\s\s", "", x)
  499. placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
  500. line = triple(aspoPlaceHolder,
  501. schemaCoords.prefix + 'workLocation',
  502. placeoccupationPlaceHolder) + closeLine
  503. output.write(line)
  504. line = triple(occupationPlaceHolder,
  505. schemaCoords.prefix + 'occupationLocation',
  506. placeoccupationPlaceHolder) + closeLine
  507. output.write(line)
  508. line = triple(placeoccupationPlaceHolder,
  509. rdfsCoords.prefix + 'label',
  510. '\"' + y + '\"') + closeLine
  511. output.write(line)
  512. line = triple(placeoccupationPlaceHolder,
  513. nsCoords.prefix + 'type',
  514. cidocCoords.prefix + 'E53_Place') + closeLine
  515. output.write(line)
  516. line = triple(aspoPlaceHolder,
  517. schemaCoords.prefix + 'jobTitle',
  518. '\"' + row['occupation_3'].lower() + ' presso ' + row['place occupation 3 ENTE'].lower() + '\"') + closeLine
  519. output.write(line)
  520. else:
  521. #Remove all white-space characters:
  522. txt = row['occupation_3']
  523. x = re.sub("\n", " ", txt)
  524. y = re.sub("\s\s", "", x)
  525. occ = re.sub(r'[^A-Za-z]','', y)
  526. occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + occ.replace(" ","_") + '>'
  527. line = triple(aspoPlaceHolder,
  528. schemaCoords.prefix + 'hasOccupation',
  529. occupationPlaceHolder) + closeLine
  530. output.write(line)
  531. line = triple(occupationPlaceHolder,
  532. nsCoords.prefix + 'type',
  533. schemaCoords.prefix + 'Occupation') + closeLine
  534. output.write(line)
  535. line = triple(occupationPlaceHolder,
  536. rdfsCoords.prefix + 'label',
  537. '\"' + y + '\"') + closeLine
  538. output.write(line)
  539. if row['ID_ente_3'] != '':
  540. placeoccupationPlaceHolder = '<http://dev.restore.ovi.cnr.it/vocabularies/places/' + row['ID_ente_3'] + '>'
  541. line = triple(aspoPlaceHolder,
  542. schemaCoords.prefix + 'workLocation',
  543. placeoccupationPlaceHolder) + closeLine
  544. output.write(line)
  545. line = triple(occupationPlaceHolder,
  546. schemaCoords.prefix + 'occupationLocation',
  547. placeoccupationPlaceHolder) + closeLine
  548. output.write(line)
  549. elif row['ID_ente_3'] == '' and row['place occupation 3 ENTE'] != '':
  550. placeoccupazioni = []
  551. pipe = "|"
  552. if pipe in row['place occupation 3 ENTE']:
  553. placeoccupazioni = row['place occupation 3 ENTE'].split('|')
  554. for placeoccupazione in placeoccupazioni:
  555. #Remove all white-space characters:
  556. txt = row['place occupation 3 ENTE']
  557. x = re.sub("\n", " ", txt)
  558. y = re.sub("\s\s", "", x)
  559. placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
  560. line = triple(aspoPlaceHolder,
  561. schemaCoords.prefix + 'workLocation',
  562. placeoccupationPlaceHolder) + closeLine
  563. output.write(line)
  564. line = triple(occupationPlaceHolder,
  565. schemaCoords.prefix + 'occupationLocation',
  566. placeoccupationPlaceHolder) + closeLine
  567. output.write(line)
  568. line = triple(placeoccupationPlaceHolder,
  569. rdfsCoords.prefix + 'label',
  570. '\"' + y + '\"') + closeLine
  571. output.write(line)
  572. line = triple(placeoccupationPlaceHolder,
  573. nsCoords.prefix + 'type',
  574. cidocCoords.prefix + 'E53_Place') + closeLine
  575. output.write(line)
  576. line = triple(aspoPlaceHolder,
  577. schemaCoords.prefix + 'jobTitle',
  578. '\"' + row['occupation_3'].lower() + ' presso ' + row['place occupation 3 ENTE'].lower() + '\"') + closeLine
  579. output.write(line)
  580. else:
  581. placeoccupazioni = row['place occupation 3 ENTE'].split('|')
  582. txt = row['place occupation 3 ENTE']
  583. x = re.sub("\n", " ", txt)
  584. y = re.sub("\s\s", "", x)
  585. placeoccupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + y.replace(" ","_").replace("'","").replace('\\','\\\\').replace('"','\\"') + '>'
  586. line = triple(aspoPlaceHolder,
  587. schemaCoords.prefix + 'workLocation',
  588. placeoccupationPlaceHolder) + closeLine
  589. output.write(line)
  590. line = triple(occupationPlaceHolder,
  591. schemaCoords.prefix + 'occupationLocation',
  592. placeoccupationPlaceHolder) + closeLine
  593. output.write(line)
  594. line = triple(placeoccupationPlaceHolder,
  595. rdfsCoords.prefix + 'label',
  596. '\"' + y + '\"') + closeLine
  597. output.write(line)
  598. line = triple(placeoccupationPlaceHolder,
  599. nsCoords.prefix + 'type',
  600. cidocCoords.prefix + 'E53_Place') + closeLine
  601. output.write(line)
  602. line = triple(aspoPlaceHolder,
  603. schemaCoords.prefix + 'jobTitle',
  604. '\"' + row['occupation_3'].lower() + ' presso ' + row['place occupation 3 ENTE'].lower() + '\"') + closeLine
  605. output.write(line)
  606. output.write('\n')
  607. #
  608. #
  609. # Limit number of entries processed (if desired)
  610. if (ii > max_entries):
  611. break