reparsing_TO_REVIEW.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. # %%
  2. import json
  3. import csv
  4. with open('./etc/man_draft.json', 'r') as in_file:
  5. data = json.load(in_file)
  6. # %%
  7. entity_rels = {ent for rel in data['Relazioni'] for ent in [rel['Entità 1'], rel['Entità 2']]}
  8. # %%
  9. entities = set(data['Entità'].keys())
  10. entities.add('#any') # For compatibility
  11. # %%
  12. # Consistency check
  13. if not entity_rels.issubset(entities):
  14. print(entity_rels.difference(entities))
  15. # %%
  16. with open('./dat/template.rdf', 'r') as in_file:
  17. raw_rdf = in_file.read()
  18. entity_template = '''
  19. <!-- http://www.h2iosc.it/onto##NAME# -->
  20. <owl:Class rdf:about="&h2iosc;#NAME#">
  21. <rdfs:label>#LABEL#</rdfs:label>
  22. <rdfs:subClassOf>#PARENT#</rdfs:subClassOf>
  23. </owl:Class>
  24. '''
  25. subclass_string = " <rdfs:subClassOf>#PARENT#</rdfs:subClassOf>\n"
  26. object_property_template = '''
  27. <!-- http://www.h2iosc.it/onto##NAME# -->
  28. <owl:ObjectProperty rdf:about="&h2iosc;#NAME#">
  29. <rdfs:label>#LABEL#</rdfs:label>
  30. <rdfs:range rdf:resource="&h2iosc;#RANGE#"/>
  31. <rdfs:domain rdf:resource="&h2iosc;#DOMAIN#"/>
  32. </owl:ObjectProperty>
  33. '''
  34. object_property_inverse_template = '''
  35. <!-- http://www.h2iosc.it/onto##NAME# -->
  36. <owl:ObjectProperty rdf:about="&h2iosc;#NAME#">
  37. <rdfs:label>#LABEL#</rdfs:label>
  38. <owl:inverseOf rdf:resource="&h2iosc;#INV#"/>
  39. <rdfs:range rdf:resource="&h2iosc;#RANGE#"/>
  40. <rdfs:domain rdf:resource="&h2iosc;#DOMAIN#"/>
  41. </owl:ObjectProperty>
  42. '''
  43. datatype_property_template = '''
  44. <!-- http://www.h2iosc.it/onto##NAME# -->
  45. <owl:DatatypeProperty rdf:about="&h2iosc;#NAME#">
  46. <rdfs:label>#LABEL#</rdfs:label>
  47. <rdfs:domain rdf:resource="&h2iosc;#DOMAIN#"/>
  48. </owl:DatatypeProperty>
  49. '''
  50. def label_to_name(label):
  51. return label.replace(' ', '_').replace('à', 'a').replace('è', 'e').replace('é', 'e').replace('ì', 'i').replace('ò', 'o').replace('ù', 'u')
  52. datatype_xsd = {
  53. "#string": 'string',
  54. '#uri': '#uri',
  55. '#number': 'decimal',
  56. '#date': 'date',
  57. '#coordinates': '#coordinates'
  58. }
  59. entities_rdf_list = []
  60. entities_csv = []
  61. datatype_properties_rdf_list = []
  62. same_as = list(data['Same_as'].keys())
  63. for label, ent in data['Entità'].items():
  64. entity_name = label_to_name(label)
  65. entity_rdf = entity_template.replace('#LABEL#', label).replace('#NAME#', entity_name)
  66. # Subclasses
  67. if 'Sottoclasse di' in ent.keys():
  68. parent = ent['Sottoclasse di']
  69. data['Relazioni'].append({"Entità 1": label,
  70. "Entità 2": parent,
  71. "Etichetta": "is_subclass_of", "Inversa": "is_superclass_of"})
  72. entity_rdf = entity_rdf.replace('#PARENT#', label_to_name(parent))
  73. else:
  74. entity_rdf = entity_rdf.replace(subclass_string, '')
  75. entities_rdf_list.append(entity_rdf)
  76. #
  77. if label in same_as:
  78. entities_csv.append( [label, "", ', '.join(data['Same_as'][label])] )
  79. else:
  80. entities_csv.append([label, "", ""])
  81. for datatype_label, datatype_val in ent.items():
  82. if not isinstance(datatype_val, str) or not datatype_val.startswith('#'):
  83. continue
  84. entities_csv.append(["", datatype_label, ""])
  85. datatype_name = label_to_name(datatype_label)
  86. datatype_properties_rdf_list.append(
  87. datatype_property_template.replace('#LABEL#', datatype_label).replace(
  88. '#NAME#', datatype_name
  89. ).replace('#DOMAIN#', entity_name)
  90. )
  91. relations_rdf_list = []
  92. relations_csv = []
  93. for rel in data['Relazioni']:
  94. label = rel['Etichetta']
  95. inverse_label = rel['Inversa']
  96. domain = label_to_name(rel['Entità 1'])
  97. range1 = label_to_name(rel['Entità 2'])
  98. relations_csv.append([rel['Entità 1'], rel['Entità 2'], rel['Etichetta'], rel['Inversa']])
  99. name = domain + '_' + label_to_name(label) + '_' + range1
  100. inverse_name = range1 + '_' + label_to_name(inverse_label) + '_' + domain
  101. #
  102. relation_rdf = object_property_template.replace('#NAME#', name).replace('#LABEL#', label).replace('#DOMAIN#', domain).replace('#RANGE#', range1)
  103. #
  104. relation_inverse_rdf = object_property_inverse_template.replace('#NAME#', inverse_name).replace('#LABEL#', inverse_label).replace('#DOMAIN#', range1).replace('#RANGE#', domain).replace('#INV#', name)
  105. #
  106. relation_full_rdf = relation_rdf + '\n\n\n' + relation_inverse_rdf
  107. relations_rdf_list.append(relation_full_rdf)
  108. with open('./etc/draft.rdf', 'w') as out_file:
  109. to_out = raw_rdf.replace(entity_template, '\n\n\n'.join(entities_rdf_list)).replace(
  110. datatype_property_template, '\n\n\n'.join(datatype_properties_rdf_list)
  111. ).replace(object_property_inverse_template, '\n\n\n'.join(relations_rdf_list))
  112. out_file.write(to_out)
  113. # %%
  114. with open('./etc/entities.csv', 'w') as out_csv:
  115. writer = csv.writer(out_csv)
  116. writer.writerow(['ENTITÀ', 'ATTRIBUTO (LITERAL)', 'SAME AS'])
  117. writer.writerows(entities_csv)
  118. with open('./etc/relations.csv', 'w') as out_csv:
  119. writer = csv.writer(out_csv)
  120. writer.writerow(['ENTITÀ 1', 'ENTITÀ 2', 'NOME RELAZIONE', 'NOME RELAZIONE INVERSA'])
  121. writer.writerows(relations_csv)
  122. # %%