FRANCESCO CORADESCHI 2 달 전
부모
커밋
839a0ff1a7
1개의 변경된 파일7개의 추가작업 그리고 4개의 파일을 삭제
  1. 7 4
      Serious_attempt/the_one_that_does_it.py

+ 7 - 4
Serious_attempt/the_one_that_does_it.py

@@ -2,6 +2,8 @@
 import csv
 import json
 import openpyxl as op
+import re
+
 
 # BASIC CONFIGURATION
 DATA_FOLDER = './data/'
@@ -45,6 +47,7 @@ raw_relations = [{key: row[ind] for ind, key in enumerate(relations_keys)} for r
 #
 # TODO: completare secondo le specifiche sopra
 # TODO: effettuare il merge con i "miei" CSV, che hanno informazioni in più!
+# TODO: ottimizzare un po' la scrittura del codice
 
 
 # Process entities:
@@ -55,7 +58,7 @@ for ent in raw_entities:
     entity_names = ent['Concetto']
     if not isinstance(entity_names, str):
         continue
-    aliases = [al.strip().title() for al in entity_names.split('\n') if al.strip()]
+    aliases = [re.sub(r'\s+', ' ', al.strip().title()) for al in entity_names.split('\n') if al.strip()]
     if not aliases:
         continue
     entity_name = aliases[0]
@@ -79,14 +82,14 @@ for rel in raw_relations:
     obj = rel['Oggetto']
     if not isinstance(subj, str) or not isinstance(obj, str):
         continue
-    subj = subj.strip().title()
-    obj = obj.strip().title()
+    subj = re.sub(r'\s+', ' 'subj.strip().title())
+    obj = re.sub(r'\s+', ' 'obj.strip().title())
     if subj==obj:
         continue
 
     rel_name = rel['Relazione']
     if isinstance(rel_name, str):
-        rel_name = rel_name.strip().lower()
+        rel_name = re.sub(r'\s+', '_', rel_name.strip().lower()).replace('__'. '_')
     better_rel = {'Soggetto': subj, 'Relazione': rel_name, 'Oggetto': obj, 'Pair': tuple(set([subj, obj]))}
     clean_relations.append(better_rel)