1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556 |
- import csv
- import codecs
- import pandas as pd
- import re
- import os
- import io
- import tokenize
- results = []
- with open('/Users/alessiaspadi/Documents/RESTORE/temp_ovi/assoc_Data.csv') as File:
- reader = csv.reader(File)
- for row in reader:
- results.append(row)
- lemmi = []
- iperlemmi = []
- df = pd.read_csv('/Users/alessiaspadi/Documents/RESTORE/temp_ovi/lem_Data.csv', sep=';')
- for x in range(len(df)):
- lemmi.append([df.sigla[x], df.num[x], df.lemma[x]])
- cf = pd.read_csv('/Users/alessiaspadi/Documents/RESTORE/temp_ovi/iperlemmi_Datini.csv', sep=',')
- for x in range(len(cf)):
- iperlemmi.append([cf.sigla[x], cf.num[x], cf.iperlemma[x]])
- lip_data = open('lip_Data.csv', 'w')
- csvwriter = csv.writer(lip_data)
- params = ["sigla", "n_lemma", "n_iperlemma", "lemma", "iperlemma"]
- csvwriter.writerow(params)
- for r in results:
- sigla = r[0]
- num = int(r[1])
- ip = int(r[2])
- row = [sigla, num, ip]
- file_name = "lemmi." + sigla + ".txt"
- f = open('/Users/alessiaspadi/Documents/RESTORE/temp_ovi/lemmi/' + file_name, "r", encoding='latin-1')
- lines = f.readlines()
- for line in lines:
- if "IPERLEMMA" not in line:
- lem = re.split('\|', line)
- nn = int(lem[0])
- if nn == num:
- row.append(lem[1])
- else:
- iplem = re.split('\|', line)
- np = iplem[0].replace("IPERLEMMA", " ")
- mp = int(np)
- if ip == mp:
- row.append(iplem[1])
- csvwriter.writerow(row)
- f.close()
- lip_data.close()
|