OVI_cleaner.py 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. import csv
  2. import codecs
  3. from curses.ascii import DEL
  4. import pandas as pd
  5. import re
  6. import os
  7. import io
  8. import tokenize
  9. clean_data = open('OVI_lemmi_clean.csv', 'w')
  10. csvwriter = csv.writer(clean_data)
  11. params = ['ID', 'Lemma', 'sLemma', 'FileHTM']
  12. csvwriter.writerow(params)
  13. clean_file = open('/Users/leonardocanova/Library/CloudStorage/OneDrive-UniversityofPisa(1)/Documenti/Progetti università/OVI/Programmazione/slemmi_OVI.csv')
  14. reader = csv.DictReader(clean_file)
  15. #se la cosina sotto la metto dentro una funzione mi dà errore perché 'a' la vede come lista e non come array
  16. '''def cleaner(a):
  17. if len(a)>1:
  18. if re.search('/(*/)', a[1]):
  19. return a[0] + " " + a[1]
  20. else:
  21. return a[0]'''
  22. for row in reader:
  23. line = []
  24. ID = row['ID']
  25. Lemma = row['Lemma']
  26. sLemma = row['sLemma']
  27. FileHTM = row['FileHTM']
  28. line.append(ID)
  29. line.append(Lemma)
  30. sLemma_clean = sLemma.split(' ')
  31. del sLemma_clean[-1]
  32. if len(sLemma_clean)>1:
  33. if re.search("\(.\)", sLemma_clean[1]):
  34. line.append(sLemma_clean[0] + " " + sLemma_clean[1])
  35. else:
  36. line.append(sLemma_clean[0])
  37. line.append(FileHTM)
  38. csvwriter.writerow(line)
  39. #print (line)
  40. clean_data.close()