{ "cells": [ { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "import xml.etree.ElementTree as Xet\n", "import pandas as pd\n", "import os\n", "import csv\n", "from xml.dom import minidom\n", "import sys\n", "\n", "# Passo al parser i file xml\n", "xml_file_name = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/MPP/XML/SR20OA_Ospedale.xml'\n", "tree = Xet.parse(xml_file_name)\n", "root = tree.getroot()\n", "schede = root.find(\"schede\")\n", "schedeM = root.findall(\"schede/scheda\")\n", "scheda = schede.find(\"scheda\")\n", "\n", "OA_data = open('/Users/federicaspinelli/TEAMOVI/Parser/DATA/MPP/CSV/new/OA_Data_Ospedale.csv', 'w')\n", "csvwriter = csv.writer(OA_data)\n", "\n", "\n", "def iterate(node, aut, path=\"\"):\n", " if path:\n", " current_path = path + \"/\" + node.tag\n", " else:\n", " current_path = node.tag\n", " path = current_path\n", " for child in node:\n", " iterate(child, aut, path=current_path)\n", " obj = cell(node)\n", " if obj is not None:\n", " aut.append(obj)\n", " return aut\n", "\n", "\n", "def cell(nd):\n", " if nd is not None:\n", " res = nd.text\n", " tag = nd.tag\n", " st = res\n", " if st is not None:\n", " st = res.rstrip()\n", " if st != \"\" :\n", " res = \"\".join(st).splitlines()\n", " #\n", " return [tag, res]\n", "\n", "\n", "def build_row(scheda):\n", " cc = []\n", " iterate(scheda, cc)\n", " seen = dict()\n", " row = dict()\n", " plus_tag = []\n", " for elem in cc:\n", " tag = elem[0]\n", " if tag in seen:\n", " num = seen[tag]\n", " n = num + 1\n", " seen[tag] = n\n", " name = tag + str(n)\n", " row[name] = elem[1]\n", " else:\n", " tag = elem[0]\n", " seen[tag] = 0\n", " row[tag] = elem[1]\n", " return (row)\n", "\n", "\n", "params = []\n", "for scheda in schede:\n", " row = build_row(scheda)\n", " for p in row:\n", " if p not in params:\n", " params.append(p)\n", "\n", "\n", "def build_table(schede):\n", " for scheda in schede:\n", " row = build_row(scheda)\n", " ul = []\n", " for x in params:\n", " if row.get(x) is None:\n", " ul.append(\" \")\n", " else:\n", " ul.append(row[x])\n", "\n", " csvwriter.writerow(ul)\n", "\n", "\n", "csvwriter.writerow(params)\n", "build_table(schede)\n", "\n", "OA_data.close()\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.9.0 64-bit", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.0" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "397704579725e15f5c7cb49fe5f0341eb7531c82d19f2c29d197e8b64ab5776b" } } }, "nbformat": 4, "nbformat_minor": 2 }