123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354 |
- import csv
- import codecs
- #import pandas as pd
- import re
- import os
- import io
- import tokenize
- merged_data = open('/Users/federicaspinelli/TEAMOVI/Parser/DATA/OVI/CSV/mergeOVI-TLIO.csv', 'w')
- csvwriter = csv.writer(merged_data)
- params = ['sigla', 'n_lemma', 'lemma', 'n_iperlemma', 'iperlemma', 'pos', 'commento', 'id', 'thing', 'link_tlio']
- csvwriter.writerow(params)
- def get_link_ovi (lemma):
- link_file = open('/Users/federicaspinelli/TEAMOVI/Parser/DATA/OVI/CSV/OVI_lemmi_clean.csv')
- reader = csv.DictReader(link_file)
- for row in reader:
- sLemma = row['sLemma']
- if (sLemma == lemma):
- return (row['FileHTM'])
- merge_file = open('/Users/federicaspinelli/TEAMOVI/Parser/DATA/OVI/CSV/data_lemmi_iperlemmi_thing_id.csv')
- reader = csv.DictReader(merge_file)
- for row in reader:
- line = []
- sigla = row['sigla']
- n_lemma = row['n_lemma']
- lemma = row['lemma']
- n_iperlemma = row['n_iperlemma']
- iperlemma = row['iperlemma']
- pos = row['pos']
- commento = row['commento']
- id = row['id']
- thing = row['thing']
- link_ovi = get_link_ovi(lemma)
- line.append (sigla)
- line.append (n_lemma)
- line.append (lemma)
- line.append (n_iperlemma)
- line.append (iperlemma)
- line.append (pos)
- line.append (commento)
- line.append (id)
- line.append (thing)
- if link_ovi is not None:
- line.append(link_ovi)
- else:
- line.append("")
- csvwriter.writerow(line)
- #print (line)
- merged_data.close()
|