import csv import codecs from curses.ascii import DEL import pandas as pd import re import os import io import tokenize clean_data = open('OVI_lemmi_clean.csv', 'w') csvwriter = csv.writer(clean_data) params = ['ID', 'Lemma', 'sLemma', 'FileHTM'] csvwriter.writerow(params) clean_file = open('/Users/leonardocanova/Library/CloudStorage/OneDrive-UniversityofPisa(1)/Documenti/Progetti università/OVI/Programmazione/slemmi_OVI.csv') reader = csv.DictReader(clean_file) #se la cosina sotto la metto dentro una funzione mi dà errore perché 'a' la vede come lista e non come array '''def cleaner(a): if len(a)>1: if re.search('/(*/)', a[1]): return a[0] + " " + a[1] else: return a[0]''' for row in reader: line = [] ID = row['ID'] Lemma = row['Lemma'] sLemma = row['sLemma'] FileHTM = row['FileHTM'] line.append(ID) line.append(Lemma) sLemma_clean = sLemma.split(' ') del sLemma_clean[-1] if len(sLemma_clean)>1: if re.search("\(.\)", sLemma_clean[1]): line.append(sLemma_clean[0] + " " + sLemma_clean[1]) else: line.append(sLemma_clean[0]) line.append(FileHTM) csvwriter.writerow(line) #print (line) clean_data.close()