12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182 |
- import os
- import xml.etree.ElementTree as Xet
- import re
- from xml.etree import ElementTree
- import csv
- ovi_data = open('ovi_lemmi.csv', 'w')
- csvwriter = csv.writer(ovi_data)
- params = ["sigla", "n_lemma", "lemma", "pos", "iperlemma", "n_iperlemma", "commento"]
- csvwriter.writerow(params)
- '''lemmi = []
- basepath_lemmi = '/Users/alessiaspadi/Documents/RESTORE/temp_ovi/lemmi'
- for entry in os.listdir(basepath_lemmi):
- if os.path.isfile(os.path.join(basepath_lemmi, entry)):
- ll = entry.split('.')[1].lstrip().split(' ')[0]
- if ll != '':
- lemmi.append([ll, entry])'''
- def get_iperlemma(sig, n_lem, lem):
- link_file = open('/Users/federicaspinelli/TEAMOVI/Parser/DATA/OVI/CSV/ovi_lemmi.csv')
- reader = csv.DictReader(link_file)
- for row in reader:
- sigla = row['sigla'].upper()
- n_lemma = row['n_lemma']
- pos = row['pos']
- lemma = row['lemma']
- commento = row['commento']
- if (sig == sigla) and (n_lem == n_lemma) and (lem == lemma):
- return([pos, commento])
- merge_file = open('/Users/federicaspinelli/TEAMOVI/Parser/DATA/OVI/CSV/lemma-iperlemma_Data.csv')
- reader = csv.DictReader(merge_file)
- for row in reader:
- line=[]
- sigla = row['sigla'].upper()
- n_lemma = row['n_lemma']
- lemma = row['lemma']
- iperlemma = row['iperlemma']
- n_iperlemma = row['n_iperlemma']
- iper = get_iperlemma(sigla, n_lemma, lemma)
- line.append(sigla)
- line.append(n_lemma)
- line.append(lemma)
- line.append(n_iperlemma)
- line.append(iperlemma)
- if iper is not None:
- for ip in iper:
- line.append(ip)
- csvwriter.writerow(line)
- '''for x in range(len(lemmi)):
- sigla = lemmi[x][0]
- sig = sigla.upper()
- file_name = lemmi[x][1]
- f = open('/Users/alessiaspadi/Documents/RESTORE/temp_ovi/lemmi/' + file_name, "r", encoding='ISO-8859-1')
- lines = f.readlines()
- for line in lines:
- row = [sig]
- if "IPERLEMMA" not in line:
- lem = re.split(' \| ', line)
- lung = len(lem)
- n_lemma = lem[0]
- if (lung >= 2):
- lemma = lem[1]
- else:
- lemma = ""
- iper = get_iperlemma(sig, n_lemma, lemma)
- for el in lem:
- rr = el.replace('\n', '')
- row.append(rr)
- if iper is not None:
- for ip in iper:
- row.append(ip)
- csvwriter.writerow(row)
- f.close()'''
- ovi_data.close()
|