associate_OVI_URL_TLIO.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. import csv
  2. import codecs
  3. #import pandas as pd
  4. import re
  5. import os
  6. import io
  7. import tokenize
  8. merged_data = open('/Users/federicaspinelli/TEAMOVI/Parser/DATA/OVI/CSV/mergeOVI-TLIO.csv', 'w')
  9. csvwriter = csv.writer(merged_data)
  10. params = ['sigla', 'n_lemma', 'lemma', 'n_iperlemma', 'iperlemma', 'pos', 'commento', 'id', 'thing', 'link_tlio']
  11. csvwriter.writerow(params)
  12. def get_link_ovi (lemma):
  13. link_file = open('/Users/federicaspinelli/TEAMOVI/Parser/DATA/OVI/CSV/OVI_lemmi_clean.csv')
  14. reader = csv.DictReader(link_file)
  15. for row in reader:
  16. sLemma = row['sLemma']
  17. if (sLemma == lemma):
  18. return (row['FileHTM'])
  19. merge_file = open('/Users/federicaspinelli/TEAMOVI/Parser/DATA/OVI/CSV/data_lemmi_iperlemmi_thing_id.csv')
  20. reader = csv.DictReader(merge_file)
  21. for row in reader:
  22. line = []
  23. sigla = row['sigla']
  24. n_lemma = row['n_lemma']
  25. lemma = row['lemma']
  26. n_iperlemma = row['n_iperlemma']
  27. iperlemma = row['iperlemma']
  28. pos = row['pos']
  29. commento = row['commento']
  30. id = row['id']
  31. thing = row['thing']
  32. link_ovi = get_link_ovi(lemma)
  33. line.append (sigla)
  34. line.append (n_lemma)
  35. line.append (lemma)
  36. line.append (n_iperlemma)
  37. line.append (iperlemma)
  38. line.append (pos)
  39. line.append (commento)
  40. line.append (id)
  41. line.append (thing)
  42. if link_ovi is not None:
  43. line.append(link_ovi)
  44. else:
  45. line.append("")
  46. csvwriter.writerow(line)
  47. #print (line)
  48. merged_data.close()