123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142 |
- # %%
- import json
- import csv
- with open('./etc/man_draft.json', 'r') as in_file:
- data = json.load(in_file)
- # %%
- entity_rels = {ent for rel in data['Relazioni'] for ent in [rel['Entità 1'], rel['Entità 2']]}
- # %%
- entities = set(data['Entità'].keys())
- entities.add('#any') # For compatibility
- # %%
- # Consistency check
- if not entity_rels.issubset(entities):
- print(entity_rels.difference(entities))
- # %%
- with open('./dat/template.rdf', 'r') as in_file:
- raw_rdf = in_file.read()
- entity_template = '''
- <!-- http://www.h2iosc.it/onto##NAME# -->
- <owl:Class rdf:about="&h2iosc;#NAME#">
- <rdfs:label>#LABEL#</rdfs:label>
- <rdfs:subClassOf>#PARENT#</rdfs:subClassOf>
- </owl:Class>
- '''
- subclass_string = " <rdfs:subClassOf>#PARENT#</rdfs:subClassOf>\n"
- object_property_template = '''
- <!-- http://www.h2iosc.it/onto##NAME# -->
- <owl:ObjectProperty rdf:about="&h2iosc;#NAME#">
- <rdfs:label>#LABEL#</rdfs:label>
- <rdfs:range rdf:resource="&h2iosc;#RANGE#"/>
- <rdfs:domain rdf:resource="&h2iosc;#DOMAIN#"/>
- </owl:ObjectProperty>
- '''
- object_property_inverse_template = '''
- <!-- http://www.h2iosc.it/onto##NAME# -->
- <owl:ObjectProperty rdf:about="&h2iosc;#NAME#">
- <rdfs:label>#LABEL#</rdfs:label>
- <owl:inverseOf rdf:resource="&h2iosc;#INV#"/>
- <rdfs:range rdf:resource="&h2iosc;#RANGE#"/>
- <rdfs:domain rdf:resource="&h2iosc;#DOMAIN#"/>
- </owl:ObjectProperty>
- '''
- datatype_property_template = '''
- <!-- http://www.h2iosc.it/onto##NAME# -->
- <owl:DatatypeProperty rdf:about="&h2iosc;#NAME#">
- <rdfs:label>#LABEL#</rdfs:label>
- <rdfs:domain rdf:resource="&h2iosc;#DOMAIN#"/>
- </owl:DatatypeProperty>
- '''
- def label_to_name(label):
- return label.replace(' ', '_').replace('à', 'a').replace('è', 'e').replace('é', 'e').replace('ì', 'i').replace('ò', 'o').replace('ù', 'u')
- datatype_xsd = {
- "#string": 'string',
- '#uri': '#uri',
- '#number': 'decimal',
- '#date': 'date',
- '#coordinates': '#coordinates'
- }
- entities_rdf_list = []
- entities_csv = []
- datatype_properties_rdf_list = []
- same_as = list(data['Same_as'].keys())
- for label, ent in data['Entità'].items():
-
- entity_name = label_to_name(label)
- entity_rdf = entity_template.replace('#LABEL#', label).replace('#NAME#', entity_name)
- # Subclasses
- if 'Sottoclasse di' in ent.keys():
- parent = ent['Sottoclasse di']
- data['Relazioni'].append({"Entità 1": label,
- "Entità 2": parent,
- "Etichetta": "is_subclass_of", "Inversa": "is_superclass_of"})
- entity_rdf = entity_rdf.replace('#PARENT#', label_to_name(parent))
- else:
- entity_rdf = entity_rdf.replace(subclass_string, '')
- entities_rdf_list.append(entity_rdf)
- #
- if label in same_as:
- entities_csv.append( [label, "", ', '.join(data['Same_as'][label])] )
- else:
- entities_csv.append([label, "", ""])
- for datatype_label, datatype_val in ent.items():
- if not isinstance(datatype_val, str) or not datatype_val.startswith('#'):
- continue
- entities_csv.append(["", datatype_label, ""])
- datatype_name = label_to_name(datatype_label)
- datatype_properties_rdf_list.append(
- datatype_property_template.replace('#LABEL#', datatype_label).replace(
- '#NAME#', datatype_name
- ).replace('#DOMAIN#', entity_name)
- )
- relations_rdf_list = []
- relations_csv = []
- for rel in data['Relazioni']:
- label = rel['Etichetta']
- inverse_label = rel['Inversa']
- domain = label_to_name(rel['Entità 1'])
- range1 = label_to_name(rel['Entità 2'])
- relations_csv.append([rel['Entità 1'], rel['Entità 2'], rel['Etichetta'], rel['Inversa']])
- name = domain + '_' + label_to_name(label) + '_' + range1
- inverse_name = range1 + '_' + label_to_name(inverse_label) + '_' + domain
- #
- relation_rdf = object_property_template.replace('#NAME#', name).replace('#LABEL#', label).replace('#DOMAIN#', domain).replace('#RANGE#', range1)
- #
- relation_inverse_rdf = object_property_inverse_template.replace('#NAME#', inverse_name).replace('#LABEL#', inverse_label).replace('#DOMAIN#', range1).replace('#RANGE#', domain).replace('#INV#', name)
- #
- relation_full_rdf = relation_rdf + '\n\n\n' + relation_inverse_rdf
- relations_rdf_list.append(relation_full_rdf)
- with open('./etc/draft.rdf', 'w') as out_file:
- to_out = raw_rdf.replace(entity_template, '\n\n\n'.join(entities_rdf_list)).replace(
- datatype_property_template, '\n\n\n'.join(datatype_properties_rdf_list)
- ).replace(object_property_inverse_template, '\n\n\n'.join(relations_rdf_list))
- out_file.write(to_out)
- # %%
- with open('./etc/entities.csv', 'w') as out_csv:
- writer = csv.writer(out_csv)
- writer.writerow(['ENTITÀ', 'ATTRIBUTO (LITERAL)', 'SAME AS'])
- writer.writerows(entities_csv)
- with open('./etc/relations.csv', 'w') as out_csv:
- writer = csv.writer(out_csv)
- writer.writerow(['ENTITÀ 1', 'ENTITÀ 2', 'NOME RELAZIONE', 'NOME RELAZIONE INVERSA'])
- writer.writerows(relations_csv)
- # %%
|