|
@@ -88,9 +88,7 @@ with open(import_dir + filePrefix + fileType + '.csv', newline="") as csv_file,
|
|
|
# The index ii is used to process a limited number of entries for testing purposes
|
|
|
ii = ii + 1
|
|
|
if row['entityType'] == 'person':
|
|
|
-
|
|
|
id_aspo = row['recordId']
|
|
|
- #placeHolders
|
|
|
aspoPlaceHolder = aspoCoords.prefix + id_aspo
|
|
|
line = triple(aspoPlaceHolder,
|
|
|
nsCoords.prefix + 'type',
|
|
@@ -183,24 +181,48 @@ with open(import_dir + filePrefix + fileType + '.csv', newline="") as csv_file,
|
|
|
output.write(line)
|
|
|
|
|
|
if row['occupation'] != '' and row['occupation'] != ' ' :
|
|
|
- #Remove all white-space characters:
|
|
|
- txt = row['occupation']
|
|
|
- x = re.sub("\n", " ", txt)
|
|
|
- y = re.sub("\s\s", "", x)
|
|
|
- occ = re.sub(r'[^A-Za-z]','', y)
|
|
|
- occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + occ.replace(" ","_") + '>'
|
|
|
- line = triple(aspoPlaceHolder,
|
|
|
- schemaCoords.prefix + 'hasOccupation',
|
|
|
- occupationPlaceHolder) + closeLine
|
|
|
- output.write(line)
|
|
|
- line = triple(occupationPlaceHolder,
|
|
|
- nsCoords.prefix + 'type',
|
|
|
- schemaCoords.prefix + 'Occupation') + closeLine
|
|
|
- output.write(line)
|
|
|
- line = triple(occupationPlaceHolder,
|
|
|
- rdfsCoords.prefix + 'label',
|
|
|
- '\"' + y + '\"') + closeLine
|
|
|
- output.write(line)
|
|
|
+ occupazioni = []
|
|
|
+ pipe = "|"
|
|
|
+ if pipe in row['occupation']:
|
|
|
+ occupazioni = row['occupation'].split('|')
|
|
|
+ for occupazione in occupazioni:
|
|
|
+ #Remove all white-space characters:
|
|
|
+ txt = occupazione
|
|
|
+ x = re.sub("\n", " ", txt)
|
|
|
+ y = re.sub("\s\s", "", x)
|
|
|
+ occ = re.sub(r'[^A-Za-z]','', y)
|
|
|
+ occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + occ.replace(" ","_") + '>'
|
|
|
+ line = triple(aspoPlaceHolder,
|
|
|
+ schemaCoords.prefix + 'hasOccupation',
|
|
|
+ occupationPlaceHolder) + closeLine
|
|
|
+ output.write(line)
|
|
|
+ line = triple(occupationPlaceHolder,
|
|
|
+ nsCoords.prefix + 'type',
|
|
|
+ schemaCoords.prefix + 'Occupation') + closeLine
|
|
|
+ output.write(line)
|
|
|
+ line = triple(occupationPlaceHolder,
|
|
|
+ rdfsCoords.prefix + 'label',
|
|
|
+ '\"' + y + '\"') + closeLine
|
|
|
+ output.write(line)
|
|
|
+ else:
|
|
|
+ #Remove all white-space characters:
|
|
|
+ txt = row['occupation']
|
|
|
+ x = re.sub("\n", " ", txt)
|
|
|
+ y = re.sub("\s\s", "", x)
|
|
|
+ occ = re.sub(r'[^A-Za-z]','', y)
|
|
|
+ occupationPlaceHolder = '<http://www.archiviodistato.prato.it/' + occ.replace(" ","_") + '>'
|
|
|
+ line = triple(aspoPlaceHolder,
|
|
|
+ schemaCoords.prefix + 'hasOccupation',
|
|
|
+ occupationPlaceHolder) + closeLine
|
|
|
+ output.write(line)
|
|
|
+ line = triple(occupationPlaceHolder,
|
|
|
+ nsCoords.prefix + 'type',
|
|
|
+ schemaCoords.prefix + 'Occupation') + closeLine
|
|
|
+ output.write(line)
|
|
|
+ line = triple(occupationPlaceHolder,
|
|
|
+ rdfsCoords.prefix + 'label',
|
|
|
+ '\"' + y + '\"') + closeLine
|
|
|
+ output.write(line)
|
|
|
|
|
|
if row['avo 1'] != '':
|
|
|
avo1 = '<http://www.archiviodistato.prato.it/accedi-e-consulta/aspoMV001/scheda/' + id_aspo + "/avo1>"
|
|
@@ -233,25 +255,35 @@ with open(import_dir + filePrefix + fileType + '.csv', newline="") as csv_file,
|
|
|
output.write(line)
|
|
|
|
|
|
if row['Qualifica'] != '':
|
|
|
+ qualifiche = []
|
|
|
+ pipe = "|"
|
|
|
+ if pipe in row['Qualifica']:
|
|
|
+ qualifiche = row['Qualifica'].split('|')
|
|
|
+ for qualifica in qualifiche:
|
|
|
+ #Remove all white-space characters:
|
|
|
+ txt = qualifica
|
|
|
+ x = re.sub("\n", " ", txt)
|
|
|
+ y = re.sub("\s\s", " ", x)
|
|
|
+ line = triple(aspoPlaceHolder, schemaCoords.prefix + 'honorificPrefix', '\"' + str(y) + '\"') + closeLine
|
|
|
+ output.write(line)
|
|
|
+ else:
|
|
|
#Remove all white-space characters:
|
|
|
- txt = row['Qualifica']
|
|
|
+ txt = row['Qualifica']
|
|
|
+ x = re.sub("\n", " ", txt)
|
|
|
+ y = re.sub("\s\s", " ", x)
|
|
|
+ line = triple(aspoPlaceHolder, schemaCoords.prefix + 'honorificPrefix', '\"' + y + '\"') + closeLine
|
|
|
+ output.write(line)
|
|
|
+
|
|
|
+ if row['place_occupation_Qualifica'] != '':
|
|
|
+ #Remove all white-space characters:
|
|
|
+ txt = row['place_occupation_Qualifica']
|
|
|
x = re.sub("\n", " ", txt)
|
|
|
- y = re.sub("\s\s", " ", x)
|
|
|
+ y = re.sub("\s\s", "", x)
|
|
|
line = triple(aspoPlaceHolder,
|
|
|
- schemaCoords.prefix + 'honorificPrefix',
|
|
|
- '\"' + y + '\"') + closeLine
|
|
|
+ schemaCoords.prefix + 'workLocation',
|
|
|
+ '\"' + row['place_occupation_Qualifica'].replace('\\','\\\\').replace('"','\\"') + '\"') + closeLine
|
|
|
output.write(line)
|
|
|
|
|
|
- #if row['place_occupation_Qualifica'] != '':
|
|
|
- #Remove all white-space characters:
|
|
|
- # txt = row['place_occupation_Qualifica']
|
|
|
- # x = re.sub("\n", " ", txt)
|
|
|
- # y = re.sub("\s\s", "", x)
|
|
|
- # line = triple(aspoPlaceHolder,
|
|
|
- # schemaCoords.prefix + 'workLocation',
|
|
|
- # '\"' + row['place_occupation_Qualifica'].replace('\\','\\\\').replace('"','\\"') + '\"') + closeLine
|
|
|
- # output.write(line)
|
|
|
-
|
|
|
if row['biogHist p'] != '':
|
|
|
#Remove all white-space characters:
|
|
|
txt = row['biogHist p']
|
|
@@ -278,8 +310,8 @@ with open(import_dir + filePrefix + fileType + '.csv', newline="") as csv_file,
|
|
|
owlCoords.prefix + 'sameAs',
|
|
|
aspoCoords.prefix + row['Variante']) + closeLine
|
|
|
output.write(line)
|
|
|
- output.write('\n')
|
|
|
- #
|
|
|
+
|
|
|
+ output.write('\n')
|
|
|
#
|
|
|
# Limit number of entries processed (if desired)
|
|
|
if (ii > max_entries):
|