OVI_allLemmaInfo.py 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. import os
  2. import xml.etree.ElementTree as Xet
  3. import re
  4. from xml.etree import ElementTree
  5. import csv
  6. ovi_data = open('ovi_lemmi.csv', 'w')
  7. csvwriter = csv.writer(ovi_data)
  8. params = ["sigla", "n_lemma", "lemma", "pos", "iperlemma", "n_iperlemma", "commento"]
  9. csvwriter.writerow(params)
  10. '''lemmi = []
  11. basepath_lemmi = '/Users/alessiaspadi/Documents/RESTORE/temp_ovi/lemmi'
  12. for entry in os.listdir(basepath_lemmi):
  13. if os.path.isfile(os.path.join(basepath_lemmi, entry)):
  14. ll = entry.split('.')[1].lstrip().split(' ')[0]
  15. if ll != '':
  16. lemmi.append([ll, entry])'''
  17. def get_iperlemma(sig, n_lem, lem):
  18. link_file = open('/Users/federicaspinelli/TEAMOVI/Parser/DATA/OVI/CSV/ovi_lemmi.csv')
  19. reader = csv.DictReader(link_file)
  20. for row in reader:
  21. sigla = row['sigla'].upper()
  22. n_lemma = row['n_lemma']
  23. pos = row['pos']
  24. lemma = row['lemma']
  25. commento = row['commento']
  26. if (sig == sigla) and (n_lem == n_lemma) and (lem == lemma):
  27. return([pos, commento])
  28. merge_file = open('/Users/federicaspinelli/TEAMOVI/Parser/DATA/OVI/CSV/lemma-iperlemma_Data.csv')
  29. reader = csv.DictReader(merge_file)
  30. for row in reader:
  31. line=[]
  32. sigla = row['sigla'].upper()
  33. n_lemma = row['n_lemma']
  34. lemma = row['lemma']
  35. iperlemma = row['iperlemma']
  36. n_iperlemma = row['n_iperlemma']
  37. iper = get_iperlemma(sigla, n_lemma, lemma)
  38. line.append(sigla)
  39. line.append(n_lemma)
  40. line.append(lemma)
  41. line.append(n_iperlemma)
  42. line.append(iperlemma)
  43. if iper is not None:
  44. for ip in iper:
  45. line.append(ip)
  46. csvwriter.writerow(line)
  47. '''for x in range(len(lemmi)):
  48. sigla = lemmi[x][0]
  49. sig = sigla.upper()
  50. file_name = lemmi[x][1]
  51. f = open('/Users/alessiaspadi/Documents/RESTORE/temp_ovi/lemmi/' + file_name, "r", encoding='ISO-8859-1')
  52. lines = f.readlines()
  53. for line in lines:
  54. row = [sig]
  55. if "IPERLEMMA" not in line:
  56. lem = re.split(' \| ', line)
  57. lung = len(lem)
  58. n_lemma = lem[0]
  59. if (lung >= 2):
  60. lemma = lem[1]
  61. else:
  62. lemma = ""
  63. iper = get_iperlemma(sig, n_lemma, lemma)
  64. for el in lem:
  65. rr = el.replace('\n', '')
  66. row.append(rr)
  67. if iper is not None:
  68. for ip in iper:
  69. row.append(ip)
  70. csvwriter.writerow(row)
  71. f.close()'''
  72. ovi_data.close()