import xml.etree.ElementTree as Xet import pandas as pd import os import csv from xml.dom import minidom import sys # Passo al parser i file xml xml_file_name = '/Users/alessiaspadi/Documents/RESTORE/temp_MPP/SR20OA_Datini.xml' tree = Xet.parse(xml_file_name) root = tree.getroot() schede = root.find("schede") schedeM = root.findall("schede/scheda") scheda = schede.find("scheda") OA_data = open('OA_Data.csv', 'w') csvwriter = csv.writer(OA_data) def iterate(node, aut, path=""): if path: current_path = path + "/" + node.tag else: current_path = node.tag path = current_path for child in node: iterate(child, aut, path=current_path) obj = cell(node) if obj is not None: aut.append(obj) return aut def cell(nd): if nd is not None: res = nd.text tag = nd.tag st = res.rstrip() if st != "": res = "".join(st.splitlines()) return [tag, res] def build_row(scheda): cc = [] iterate(scheda, cc) seen = dict() row = dict() plus_tag = [] for elem in cc: tag = elem[0] if tag in seen: num = seen[tag] n = num + 1 seen[tag] = n name = tag + str(n) row[name] = elem[1] else: tag = elem[0] seen[tag] = 0 row[tag] = elem[1] return (row) params = [] for scheda in schede: row = build_row(scheda) for p in row: if p not in params: params.append(p) def build_table(schede): for scheda in schede: row = build_row(scheda) ul = [] for x in params: if row.get(x) is None: ul.append(" ") else: ul.append(row[x]) csvwriter.writerow(ul) csvwriter.writerow(params) build_table(schede) OA_data.close()