|
@@ -1,204 +0,0 @@
|
|
-# %%
|
|
|
|
-import json
|
|
|
|
-import csv
|
|
|
|
-
|
|
|
|
-# IMPORT MASTER FILE
|
|
|
|
-with open('.dat/man_draft.json', 'r') as in_file:
|
|
|
|
- data = json.load(in_file)
|
|
|
|
-with open('entities_in.csv', 'r') as csv_in_1:
|
|
|
|
- reader = csv.DictReader(csv_in_1)
|
|
|
|
- ent_csv_in = [row for row in reader]
|
|
|
|
-with open('relations_in.csv', 'r') as csv_in_2:
|
|
|
|
- reader = csv.DictReader(csv_in_2)
|
|
|
|
- rel_csv_in = [row for row in reader]
|
|
|
|
-# %%
|
|
|
|
-# Consistency check
|
|
|
|
-
|
|
|
|
-entity_rels = {ent for rel in data['Relazioni'] for ent in [rel['Entità 1'], rel['Entità 2']]}
|
|
|
|
-
|
|
|
|
-entities = set(data['Entità'].keys())
|
|
|
|
-entities.add('#any') # For compatibility
|
|
|
|
-
|
|
|
|
-if not entity_rels.issubset(entities):
|
|
|
|
- print(entity_rels.difference(entities))
|
|
|
|
-# %%
|
|
|
|
-# USE A DIRTY SHORTCUT: paste entity/relation info on a precompiled rdf template file.
|
|
|
|
-
|
|
|
|
-# Load template
|
|
|
|
-with open('template_2.rdf', 'r') as in_file:
|
|
|
|
- raw_rdf = in_file.read()
|
|
|
|
-
|
|
|
|
-# Defined rdf snippets; info will replace placeholder tags (in uppercase between '#')
|
|
|
|
-entity_template = '''
|
|
|
|
- <!-- http://www.h2iosc.it/onto##NAME# -->
|
|
|
|
-
|
|
|
|
- <owl:Class rdf:about="&h2iosc;#NAME#">
|
|
|
|
- <rdfs:label>#LABEL#</rdfs:label>
|
|
|
|
- <rdfs:subClassOf>#PARENT#</rdfs:subClassOf>
|
|
|
|
- <rdfs:isDefinedBy rdf:resource="#URI#"/>
|
|
|
|
- </owl:Class>
|
|
|
|
-'''
|
|
|
|
-subclass_string = " <rdfs:subClassOf>#PARENT#</rdfs:subClassOf>\n"
|
|
|
|
-class_defined_string = ' <rdfs:isDefinedBy rdf:resource="#URI#"/>\n'
|
|
|
|
-
|
|
|
|
-object_property_template = '''
|
|
|
|
- <!-- http://www.h2iosc.it/onto##NAME# -->
|
|
|
|
-
|
|
|
|
- <owl:ObjectProperty rdf:about="&h2iosc;#NAME#">
|
|
|
|
- <rdfs:label>#LABEL#</rdfs:label>
|
|
|
|
- <rdfs:range rdf:resource="&h2iosc;#RANGE#"/>
|
|
|
|
- <rdfs:domain rdf:resource="&h2iosc;#DOMAIN#"/>
|
|
|
|
- <rdfs:isDefinedBy rdf:resource="#URI#"/>
|
|
|
|
- </owl:ObjectProperty>
|
|
|
|
-'''
|
|
|
|
-
|
|
|
|
-object_property_inverse_template = '''
|
|
|
|
- <!-- http://www.h2iosc.it/onto##NAME# -->
|
|
|
|
-
|
|
|
|
- <owl:ObjectProperty rdf:about="&h2iosc;#NAME#">
|
|
|
|
- <rdfs:label>#LABEL#</rdfs:label>
|
|
|
|
- <owl:inverseOf rdf:resource="&h2iosc;#INV#"/>
|
|
|
|
- <rdfs:range rdf:resource="&h2iosc;#RANGE#"/>
|
|
|
|
- <rdfs:domain rdf:resource="&h2iosc;#DOMAIN#"/>
|
|
|
|
- <rdfs:isDefinedBy rdf:resource="#URI#"/>
|
|
|
|
- </owl:ObjectProperty>
|
|
|
|
-'''
|
|
|
|
-object_defined_string = ' <rdfs:isDefinedBy rdf:resource="#URI#"/>\n'
|
|
|
|
-
|
|
|
|
-datatype_property_template = '''
|
|
|
|
- <!-- http://www.h2iosc.it/onto##NAME# -->
|
|
|
|
-
|
|
|
|
- <owl:DatatypeProperty rdf:about="&h2iosc;#NAME#" rdf:isDefinedBy="#URI#">
|
|
|
|
- <rdfs:label>#LABEL#</rdfs:label>
|
|
|
|
- <rdfs:domain rdf:resource="&h2iosc;#DOMAIN#"/>
|
|
|
|
- <rdfs:isDefinedBy rdf:resource="#URI#"/>
|
|
|
|
- </owl:DatatypeProperty>
|
|
|
|
-'''
|
|
|
|
-datatype_defined_string = ' <rdfs:isDefinedBy rdf:resource="#URI#"/>\n'
|
|
|
|
-
|
|
|
|
-# Define a normalization function for rdf labels for easier portability
|
|
|
|
-def label_to_name(label):
|
|
|
|
- return label.replace(' ', '_').replace('à', 'a').replace('è', 'e').replace('é', 'e').replace('ì', 'i').replace('ò', 'o').replace('ù', 'u')
|
|
|
|
-
|
|
|
|
-# Generic ('propietary') datatypes to std. xsd datatypes mapping
|
|
|
|
-datatype_xsd = {
|
|
|
|
- "#string": 'string',
|
|
|
|
- '#uri': '#uri',
|
|
|
|
- '#number': 'decimal',
|
|
|
|
- '#date': 'date',
|
|
|
|
- '#coordinates': '#coordinates'
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-# %%
|
|
|
|
-# Map entity info to dedicated lists
|
|
|
|
-entities_rdf_list = []
|
|
|
|
-entities_csv = []
|
|
|
|
-datatype_properties_rdf_list = []
|
|
|
|
-same_as = list(data['Same_as'].keys())
|
|
|
|
-for label, ent in data['Entità'].items():
|
|
|
|
-
|
|
|
|
- entity_name = label_to_name(label)
|
|
|
|
- entity_rdf = entity_template.replace('#LABEL#', label).replace('#NAME#', entity_name)
|
|
|
|
-
|
|
|
|
- # Try to find entity in extra csv, see if there is CIDOC info and if so, map it
|
|
|
|
- entity_in_csv = next((ent for ent in ent_csv_in if ent['ENTITÀ']==label), None)
|
|
|
|
- cidoc_class = None
|
|
|
|
- if entity_in_csv is not None:
|
|
|
|
- cidoc_class = entity_in_csv.get('CIDOC-LINK')
|
|
|
|
- if cidoc_class is not None and cidoc_class!='':
|
|
|
|
- entity_rdf = entity_rdf.replace('#URI#', cidoc_class)
|
|
|
|
- else:
|
|
|
|
- entity_rdf = entity_rdf.replace(class_defined_string, '')
|
|
|
|
-
|
|
|
|
- # Subclasses
|
|
|
|
- if 'Sottoclasse di' in ent.keys():
|
|
|
|
- parent = ent['Sottoclasse di']
|
|
|
|
- data['Relazioni'].append({"Entità 1": label,
|
|
|
|
- "Entità 2": parent,
|
|
|
|
- "Etichetta": "is_subclass_of", "Inversa": "is_superclass_of"})
|
|
|
|
- entity_rdf = entity_rdf.replace('#PARENT#', label_to_name(parent))
|
|
|
|
- else:
|
|
|
|
- entity_rdf = entity_rdf.replace(subclass_string, '')
|
|
|
|
-
|
|
|
|
- entities_rdf_list.append(entity_rdf)
|
|
|
|
- #
|
|
|
|
- if label in same_as:
|
|
|
|
- entities_csv.append( [label, "", ', '.join(data['Same_as'][label])] )
|
|
|
|
- else:
|
|
|
|
- entities_csv.append([label, "", ""])
|
|
|
|
- for datatype_label, datatype_val in ent.items():
|
|
|
|
- if not isinstance(datatype_val, str) or not datatype_val.startswith('#'):
|
|
|
|
- continue
|
|
|
|
- entities_csv.append(["", datatype_label, ""])
|
|
|
|
- datatype_name = label_to_name(datatype_label)
|
|
|
|
- datatype_rdf = datatype_property_template.replace('#LABEL#', datatype_label).replace('#NAME#', datatype_name).replace('#DOMAIN#', entity_name)
|
|
|
|
- # Try to find entity in extra csv, see if there is CIDOC info and if so, map it
|
|
|
|
- datatype_in_csv = next((ent for ent in ent_csv_in if ent['ATTRIBUTO (LITERAL)']==datatype_label), None)
|
|
|
|
- cidoc_class = None
|
|
|
|
- if datatype_in_csv is not None:
|
|
|
|
- cidoc_class = datatype_in_csv.get('CIDOC-LINK')
|
|
|
|
- if cidoc_class is not None and cidoc_class!='':
|
|
|
|
- datatype_rdf = datatype_rdf.replace('#URI#', cidoc_class)
|
|
|
|
- else:
|
|
|
|
- datatype_rdf = datatype_rdf.replace(datatype_defined_string, '')
|
|
|
|
-
|
|
|
|
- datatype_properties_rdf_list.append(datatype_rdf)
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-# Map relation info to dedicated lists
|
|
|
|
-relations_rdf_list = []
|
|
|
|
-relations_csv = []
|
|
|
|
-for rel in data['Relazioni']:
|
|
|
|
- label = rel['Etichetta']
|
|
|
|
- inverse_label = rel['Inversa']
|
|
|
|
- domain = label_to_name(rel['Entità 1'])
|
|
|
|
- range1 = label_to_name(rel['Entità 2'])
|
|
|
|
- relations_csv.append([rel['Entità 1'], rel['Entità 2'], rel['Etichetta'], rel['Inversa']])
|
|
|
|
- name = domain + '_' + label_to_name(label) + '_' + range1
|
|
|
|
- inverse_name = range1 + '_' + label_to_name(inverse_label) + '_' + domain
|
|
|
|
-
|
|
|
|
- # Try to find entity in extra csv, see if there is CIDOC info and if so, map it
|
|
|
|
- relation_in_csv = next((rel_csv for rel_csv in rel_csv_in if (rel_csv['ENTITÀ 1']==rel['Entità 1'] and rel_csv['ENTITÀ 2']==rel['Entità 2']) ), None)
|
|
|
|
- cidoc_class = None
|
|
|
|
- #
|
|
|
|
- relation_rdf = object_property_template.replace('#NAME#', name).replace('#LABEL#', label).replace('#DOMAIN#', domain).replace('#RANGE#', range1)
|
|
|
|
- #
|
|
|
|
- if relation_in_csv is not None:
|
|
|
|
- cidoc_class = relation_in_csv.get('CIDOC-LINK')
|
|
|
|
- if cidoc_class is not None and cidoc_class!='':
|
|
|
|
- relation_rdf = relation_rdf.replace('#URI#', cidoc_class)
|
|
|
|
- else:
|
|
|
|
- relation_rdf = relation_rdf.replace(object_defined_string, '')
|
|
|
|
- #
|
|
|
|
- relation_inverse_rdf = object_property_inverse_template.replace('#NAME#', inverse_name).replace('#LABEL#', inverse_label).replace('#DOMAIN#', range1).replace('#RANGE#', domain).replace('#INV#', name)
|
|
|
|
- #
|
|
|
|
- if cidoc_class is not None and cidoc_class!='':
|
|
|
|
- relation_inverse_rdf = relation_inverse_rdf.replace('#URI#', cidoc_class)
|
|
|
|
- else:
|
|
|
|
- relation_inverse_rdf = relation_inverse_rdf.replace('<rdfs:isDefinedBy rdf:resource="#URI#"/>', '')
|
|
|
|
- #
|
|
|
|
- relation_full_rdf = relation_rdf + '\n\n\n' + relation_inverse_rdf
|
|
|
|
- relations_rdf_list.append(relation_full_rdf)
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-# Write info to template and export it to output file
|
|
|
|
-with open('draft.rdf', 'w') as out_file:
|
|
|
|
- to_out = raw_rdf.replace(entity_template, '\n\n\n'.join(entities_rdf_list)).replace(datatype_property_template, '\n\n\n'.join(datatype_properties_rdf_list)).replace(object_property_inverse_template, '\n\n\n'.join(relations_rdf_list))
|
|
|
|
- out_file.write(to_out)
|
|
|
|
-# %%
|
|
|
|
-# Write info to two csv files (one for Entities, one for Relations) for extra human readability
|
|
|
|
-with open('entities.csv', 'w') as out_csv:
|
|
|
|
- writer = csv.writer(out_csv)
|
|
|
|
- writer.writerow(['ENTITÀ', 'ATTRIBUTO (LITERAL)', 'SAME AS'])
|
|
|
|
- writer.writerows(entities_csv)
|
|
|
|
-with open('relations.csv', 'w') as out_csv:
|
|
|
|
- writer = csv.writer(out_csv)
|
|
|
|
- writer.writerow(['ENTITÀ 1', 'ENTITÀ 2', 'NOME RELAZIONE', 'NOME RELAZIONE INVERSA'])
|
|
|
|
- writer.writerows(relations_csv)
|
|
|
|
-# %%
|
|
|
|
-print(raw_rdf)
|
|
|
|
-# %%
|
|
|
|
-entity_template in raw_rdf
|
|
|
|
-# %%
|
|
|
|
-entity_template
|
|
|
|
-# %%
|
|
|