import xml.etree.ElementTree as Xet from typing import Dict, Any import pandas as pd import os import csv from xml.dom import minidom import sys import re AspoDate_data = open('AspoDate.csv', 'w') csvwriter = csv.writer(AspoDate_data) params = ['URL', 'Label', 'Estremo temporale inferiore', 'Estremo temporale superiore'] csvwriter.writerow(params) ''' date_file = open('/Users/alessiaspadi/Documents/RESTORE/temp_ASPO/Cronologia/date_aspo.csv') reader = csv.DictReader(date_file) for row in reader: date = row['label'] dd = date.replace('Data di partenza: ', '') dt = dd.replace('Data di arrivo: ', '') ss = dt.split('/') norm = ss[2] + '-' + ss[1] + '-' + ss[0] year = ss[2] rr = [row['o'], date, norm, year] res.encode('utf8') csvwriter.writerow(rr) ''' date_file = open('/Users/alessiaspadi/Documents/RESTORE/temp_ASPO/Cronologia/Date2.csv') reader = csv.DictReader(date_file) for row in reader: lab = row['label'] lk = lab.encode("iso-8859-1") ll = lk.decode("utf8") kk = ll.replace("\uFFFD", "") pr = kk.replace(" ", "") periodo = pr.replace("Periodo:", "") date = periodo.split("-") mesi = ['gen.', 'feb.', 'mar.', 'apr.', 'mag.', 'giu.', 'lug.', 'ago.', 'set.', 'ott.', 'nov.', 'dic.'] link = row['s'] riga = [link, kk] for data in date: dat = '' if 'gen.' not in data and 'feb.' not in data and 'mar.' not in data and 'apr.' not in data and 'mag.' not in data and 'giu.'not in data and 'lug.' not in data and 'ago.' not in data and 'set.' not in data and 'ott.' not in data and 'nov.' not in data and 'dic.' not in data: dat = data if 'gen.' in data: dat = data.replace('gen.', "/1/") if 'feb.' in data: dat = data.replace('feb.', "/2/") if 'mar.' in data: dat = data.replace('mar.', "/3/") if 'apr.' in data: dat = data.replace('apr.', "/4/") if 'mag.' in data: dat = data.replace('mag.', "/5/") if 'giu.' in data: dat = data.replace('giu.', "/6/") if 'lug.' in data: dat = data.replace('lug.', "/7/") if 'ago.' in data: dat = data.replace('ago.', "/8/") if 'set.' in data: dat = data.replace('set.', "/9/") if 'ott.' in data: dat = data.replace('ott.', "/10/") if 'nov.' in data: dat = data.replace('nov.', "/11/") if 'dic.' in data: dat = data.replace('dic.', "/12/") tt = '' if dat.endswith("/"): i = len(dat) tt = dat[:i-1] else: tt = dat anno = '' if '/' in tt: ss = tt.split('/') if len(ss) == 2: anno = ss[0] if len(ss) == 3: if len(ss[0]) < 4: anno = ss[2] else: anno = ss[0] else: anno = tt riga.append(anno) csvwriter.writerow(riga) AspoDate_data.close()