import csv import codecs #import pandas as pd import re import os import io import tokenize merged_data = open('/Users/federicaspinelli/TEAMOVI/Parser/DATA/OVI/CSV/mergeOVI-TLIO.csv', 'w') csvwriter = csv.writer(merged_data) params = ['sigla', 'n_lemma', 'lemma', 'n_iperlemma', 'iperlemma', 'pos', 'commento', 'id', 'thing', 'link_tlio'] csvwriter.writerow(params) def get_link_ovi (lemma): link_file = open('/Users/federicaspinelli/TEAMOVI/Parser/DATA/OVI/CSV/OVI_lemmi_clean.csv') reader = csv.DictReader(link_file) for row in reader: sLemma = row['sLemma'] if (sLemma == lemma): return (row['FileHTM']) merge_file = open('/Users/federicaspinelli/TEAMOVI/Parser/DATA/OVI/CSV/data_lemmi_iperlemmi_thing_id.csv') reader = csv.DictReader(merge_file) for row in reader: line = [] sigla = row['sigla'] n_lemma = row['n_lemma'] lemma = row['lemma'] n_iperlemma = row['n_iperlemma'] iperlemma = row['iperlemma'] pos = row['pos'] commento = row['commento'] id = row['id'] thing = row['thing'] link_ovi = get_link_ovi(lemma) line.append (sigla) line.append (n_lemma) line.append (lemma) line.append (n_iperlemma) line.append (iperlemma) line.append (pos) line.append (commento) line.append (id) line.append (thing) if link_ovi is not None: line.append(link_ovi) else: line.append("") csvwriter.writerow(line) #print (line) merged_data.close()