# %% import json import csv # IMPORT MASTER FILE with open('.dat/man_draft.json', 'r') as in_file: data = json.load(in_file) with open('entities_in.csv', 'r') as csv_in_1: reader = csv.DictReader(csv_in_1) ent_csv_in = [row for row in reader] with open('relations_in.csv', 'r') as csv_in_2: reader = csv.DictReader(csv_in_2) rel_csv_in = [row for row in reader] # %% # Consistency check entity_rels = {ent for rel in data['Relazioni'] for ent in [rel['Entità 1'], rel['Entità 2']]} entities = set(data['Entità'].keys()) entities.add('#any') # For compatibility if not entity_rels.issubset(entities): print(entity_rels.difference(entities)) # %% # USE A DIRTY SHORTCUT: paste entity/relation info on a precompiled rdf template file. # Load template with open('template_2.rdf', 'r') as in_file: raw_rdf = in_file.read() # Defined rdf snippets; info will replace placeholder tags (in uppercase between '#') entity_template = ''' #LABEL# #PARENT# ''' subclass_string = " #PARENT#\n" class_defined_string = ' \n' object_property_template = ''' #LABEL# ''' object_property_inverse_template = ''' #LABEL# ''' object_defined_string = ' \n' datatype_property_template = ''' #LABEL# ''' datatype_defined_string = ' \n' # Define a normalization function for rdf labels for easier portability def label_to_name(label): return label.replace(' ', '_').replace('à', 'a').replace('è', 'e').replace('é', 'e').replace('ì', 'i').replace('ò', 'o').replace('ù', 'u') # Generic ('propietary') datatypes to std. xsd datatypes mapping datatype_xsd = { "#string": 'string', '#uri': '#uri', '#number': 'decimal', '#date': 'date', '#coordinates': '#coordinates' } # %% # Map entity info to dedicated lists entities_rdf_list = [] entities_csv = [] datatype_properties_rdf_list = [] same_as = list(data['Same_as'].keys()) for label, ent in data['Entità'].items(): entity_name = label_to_name(label) entity_rdf = entity_template.replace('#LABEL#', label).replace('#NAME#', entity_name) # Try to find entity in extra csv, see if there is CIDOC info and if so, map it entity_in_csv = next((ent for ent in ent_csv_in if ent['ENTITÀ']==label), None) cidoc_class = None if entity_in_csv is not None: cidoc_class = entity_in_csv.get('CIDOC-LINK') if cidoc_class is not None and cidoc_class!='': entity_rdf = entity_rdf.replace('#URI#', cidoc_class) else: entity_rdf = entity_rdf.replace(class_defined_string, '') # Subclasses if 'Sottoclasse di' in ent.keys(): parent = ent['Sottoclasse di'] data['Relazioni'].append({"Entità 1": label, "Entità 2": parent, "Etichetta": "is_subclass_of", "Inversa": "is_superclass_of"}) entity_rdf = entity_rdf.replace('#PARENT#', label_to_name(parent)) else: entity_rdf = entity_rdf.replace(subclass_string, '') entities_rdf_list.append(entity_rdf) # if label in same_as: entities_csv.append( [label, "", ', '.join(data['Same_as'][label])] ) else: entities_csv.append([label, "", ""]) for datatype_label, datatype_val in ent.items(): if not isinstance(datatype_val, str) or not datatype_val.startswith('#'): continue entities_csv.append(["", datatype_label, ""]) datatype_name = label_to_name(datatype_label) datatype_rdf = datatype_property_template.replace('#LABEL#', datatype_label).replace('#NAME#', datatype_name).replace('#DOMAIN#', entity_name) # Try to find entity in extra csv, see if there is CIDOC info and if so, map it datatype_in_csv = next((ent for ent in ent_csv_in if ent['ATTRIBUTO (LITERAL)']==datatype_label), None) cidoc_class = None if datatype_in_csv is not None: cidoc_class = datatype_in_csv.get('CIDOC-LINK') if cidoc_class is not None and cidoc_class!='': datatype_rdf = datatype_rdf.replace('#URI#', cidoc_class) else: datatype_rdf = datatype_rdf.replace(datatype_defined_string, '') datatype_properties_rdf_list.append(datatype_rdf) # Map relation info to dedicated lists relations_rdf_list = [] relations_csv = [] for rel in data['Relazioni']: label = rel['Etichetta'] inverse_label = rel['Inversa'] domain = label_to_name(rel['Entità 1']) range1 = label_to_name(rel['Entità 2']) relations_csv.append([rel['Entità 1'], rel['Entità 2'], rel['Etichetta'], rel['Inversa']]) name = domain + '_' + label_to_name(label) + '_' + range1 inverse_name = range1 + '_' + label_to_name(inverse_label) + '_' + domain # Try to find entity in extra csv, see if there is CIDOC info and if so, map it relation_in_csv = next((rel_csv for rel_csv in rel_csv_in if (rel_csv['ENTITÀ 1']==rel['Entità 1'] and rel_csv['ENTITÀ 2']==rel['Entità 2']) ), None) cidoc_class = None # relation_rdf = object_property_template.replace('#NAME#', name).replace('#LABEL#', label).replace('#DOMAIN#', domain).replace('#RANGE#', range1) # if relation_in_csv is not None: cidoc_class = relation_in_csv.get('CIDOC-LINK') if cidoc_class is not None and cidoc_class!='': relation_rdf = relation_rdf.replace('#URI#', cidoc_class) else: relation_rdf = relation_rdf.replace(object_defined_string, '') # relation_inverse_rdf = object_property_inverse_template.replace('#NAME#', inverse_name).replace('#LABEL#', inverse_label).replace('#DOMAIN#', range1).replace('#RANGE#', domain).replace('#INV#', name) # if cidoc_class is not None and cidoc_class!='': relation_inverse_rdf = relation_inverse_rdf.replace('#URI#', cidoc_class) else: relation_inverse_rdf = relation_inverse_rdf.replace('', '') # relation_full_rdf = relation_rdf + '\n\n\n' + relation_inverse_rdf relations_rdf_list.append(relation_full_rdf) # Write info to template and export it to output file with open('draft.rdf', 'w') as out_file: to_out = raw_rdf.replace(entity_template, '\n\n\n'.join(entities_rdf_list)).replace(datatype_property_template, '\n\n\n'.join(datatype_properties_rdf_list)).replace(object_property_inverse_template, '\n\n\n'.join(relations_rdf_list)) out_file.write(to_out) # %% # Write info to two csv files (one for Entities, one for Relations) for extra human readability with open('entities.csv', 'w') as out_csv: writer = csv.writer(out_csv) writer.writerow(['ENTITÀ', 'ATTRIBUTO (LITERAL)', 'SAME AS']) writer.writerows(entities_csv) with open('relations.csv', 'w') as out_csv: writer = csv.writer(out_csv) writer.writerow(['ENTITÀ 1', 'ENTITÀ 2', 'NOME RELAZIONE', 'NOME RELAZIONE INVERSA']) writer.writerows(relations_csv) # %% print(raw_rdf) # %% entity_template in raw_rdf # %% entity_template # %%