Lemmi-Iperlemmi.py 1.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. import csv
  2. import codecs
  3. import pandas as pd
  4. import re
  5. import os
  6. import io
  7. import tokenize
  8. results = []
  9. with open('/Users/alessiaspadi/Documents/RESTORE/temp_ovi/assoc_Data.csv') as File:
  10. reader = csv.reader(File)
  11. for row in reader:
  12. results.append(row)
  13. lemmi = []
  14. iperlemmi = []
  15. df = pd.read_csv('/Users/alessiaspadi/Documents/RESTORE/temp_ovi/lem_Data.csv', sep=';')
  16. for x in range(len(df)):
  17. lemmi.append([df.sigla[x], df.num[x], df.lemma[x]])
  18. cf = pd.read_csv('/Users/alessiaspadi/Documents/RESTORE/temp_ovi/iperlemmi_Datini.csv', sep=',')
  19. for x in range(len(cf)):
  20. iperlemmi.append([cf.sigla[x], cf.num[x], cf.iperlemma[x]])
  21. lip_data = open('lip_Data.csv', 'w')
  22. csvwriter = csv.writer(lip_data)
  23. params = ["sigla", "n_lemma", "n_iperlemma", "lemma", "iperlemma"]
  24. csvwriter.writerow(params)
  25. for r in results:
  26. sigla = r[0]
  27. num = int(r[1])
  28. ip = int(r[2])
  29. row = [sigla, num, ip]
  30. file_name = "lemmi." + sigla + ".txt"
  31. f = open('/Users/alessiaspadi/Documents/RESTORE/temp_ovi/lemmi/' + file_name, "r", encoding='latin-1')
  32. lines = f.readlines()
  33. for line in lines:
  34. if "IPERLEMMA" not in line:
  35. lem = re.split('\|', line)
  36. nn = int(lem[0])
  37. if nn == num:
  38. row.append(lem[1])
  39. else:
  40. iplem = re.split('\|', line)
  41. np = iplem[0].replace("IPERLEMMA", " ")
  42. mp = int(np)
  43. if ip == mp:
  44. row.append(iplem[1])
  45. csvwriter.writerow(row)
  46. f.close()
  47. lip_data.close()