EstraiLemmi.py 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. import re
  2. import csv
  3. import os
  4. lemmi = []
  5. basepath_lemmi = '/Users/alessiaspadi/Documents/RESTORE/temp_ovi/lemmi'
  6. for entry in os.listdir(basepath_lemmi):
  7. if os.path.isfile(os.path.join(basepath_lemmi, entry)):
  8. ll = entry.split('.')[1].lstrip().split(' ')[0]
  9. if ll != '':
  10. lemmi.append([ll, entry])
  11. iperlem_data = open('iperlem_Data.csv', 'w')
  12. csvwriter = csv.writer(iperlem_data)
  13. params = ["sigla", "file", "num", "iperlemma", "commento", "livello"]
  14. csvwriter.writerow(params)
  15. def write_lines(lines, sig, file):
  16. for line in lines:
  17. row = [sig, file]
  18. lem = re.split('\|', line)
  19. for l in lem:
  20. if "IPERLEMMA" in l:
  21. ll = l.replace("IPERLEMMA", " ")
  22. else:
  23. ll = l
  24. m = ll.rstrip()
  25. n = m.lstrip()
  26. row.append(n)
  27. csvwriter.writerow(row)
  28. for x in range(len(lemmi)):
  29. sigla = lemmi[x][0]
  30. file_name = lemmi[x][1]
  31. f = open('/Users/alessiaspadi/Documents/RESTORE/temp_ovi/lemmi/' + file_name, "r", encoding='latin-1')
  32. lines = f.readlines()
  33. clean_lines = []
  34. for line in lines:
  35. if "IPERLEMMA" in line:
  36. clean_lines.append(line)
  37. write_lines(clean_lines, sigla, file_name)
  38. f.close()
  39. iperlem_data.close()