1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889 |
- import xml.etree.ElementTree as Xet
- import pandas as pd
- import os
- import csv
- from xml.dom import minidom
- import sys
- # Passo al parser i file xml
- xml_file_name = '/Users/alessiaspadi/Documents/RESTORE/temp_MPP/SR20OA_Datini.xml'
- tree = Xet.parse(xml_file_name)
- root = tree.getroot()
- schede = root.find("schede")
- schedeM = root.findall("schede/scheda")
- scheda = schede.find("scheda")
- OA_data = open('OA_Data.csv', 'w')
- csvwriter = csv.writer(OA_data)
- def iterate(node, aut, path=""):
- if path:
- current_path = path + "/" + node.tag
- else:
- current_path = node.tag
- path = current_path
- for child in node:
- iterate(child, aut, path=current_path)
- obj = cell(node)
- if obj is not None:
- aut.append(obj)
- return aut
- def cell(nd):
- if nd is not None:
- res = nd.text
- tag = nd.tag
- st = res.rstrip()
- if st != "":
- res = "".join(st.splitlines())
- return [tag, res]
- def build_row(scheda):
- cc = []
- iterate(scheda, cc)
- seen = dict()
- row = dict()
- plus_tag = []
- for elem in cc:
- tag = elem[0]
- if tag in seen:
- num = seen[tag]
- n = num + 1
- seen[tag] = n
- name = tag + str(n)
- row[name] = elem[1]
- else:
- tag = elem[0]
- seen[tag] = 0
- row[tag] = elem[1]
- return (row)
- params = []
- for scheda in schede:
- row = build_row(scheda)
- for p in row:
- if p not in params:
- params.append(p)
- def build_table(schede):
- for scheda in schede:
- row = build_row(scheda)
- ul = []
- for x in params:
- if row.get(x) is None:
- ul.append(" ")
- else:
- ul.append(row[x])
- csvwriter.writerow(ul)
- csvwriter.writerow(params)
- build_table(schede)
- OA_data.close()
|