{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import xml.etree.ElementTree as Xet\n", "from typing import Dict, Any\n", "#import pandas as pd\n", "import os\n", "import csv\n", "from xml.dom import minidom\n", "import sys\n", "import re\n", "\n", "xml_file_name = '/Users/federicaspinelli/Google Drive/OVI-CNR/export_aspoAuth002--Gettatelli-Ospedale-Marcovaldi.xml'\n", "tree = Xet.parse(xml_file_name)\n", "root = tree.getroot()\n", "schede = root.findall(\"xw_doc\")\n", "\n", "eac_data = open('eac_Data.csv', 'w')\n", "csvwriter = csv.writer(eac_data)\n", "\n", "params = [\"recordId\", \"entityType\", \"nameEntry@normal\", \"nameEntry@prime\", \"biogHist p\",\n", " \"nameEntry@ulterior\", \"nameEntry@ord\", \"place\", \"occupation\"]\n", "\n", "csvwriter.writerow(params)\n", "\n", "for scheda in schede:\n", " ul = []\n", " identifier = scheda.find(\"eac-cpf/control/recordId\")\n", " entityType = scheda.find(\"eac-cpf/cpfDescription/identity/entityType\")\n", " nameEntry_N = scheda.find(\"eac-cpf/cpfDescription/identity/nameEntry/part[@localType='normal']\")\n", " nameEntry_P = scheda.find(\"eac-cpf/cpfDescription/identity/nameEntry/part[@localType='prime']\")\n", " description = scheda.find(\"eac-cpf/cpfDescription/description/biogHist/p\")\n", " nameEntry_U = scheda.find(\"eac-cpf/cpfDescription/identity/nameEntry/part[@localType='ulterior']\")\n", " nameEntry_O = scheda.find(\"eac-cpf/cpfDescription/identity/nameEntry/part[@localType='ord']\")\n", " places = scheda.findall(\"eac-cpf/cpfDescription/description/places/place/placeEntry\")\n", " occupation = scheda.find(\"eac-cpf/cpfDescription/description/occupation/term\")\n", " ul.append(identifier.text)\n", " ul.append(entityType.text)\n", " if nameEntry_N is None:\n", " ul.append(\" \")\n", " else:\n", " ul.append(nameEntry_N.text)\n", "\n", " if nameEntry_P is None:\n", " ul.append(\" \")\n", " else:\n", " ul.append(nameEntry_P.text)\n", "\n", " if description is None:\n", " ul.append(\" \")\n", " else:\n", " ul.append(description.text)\n", "\n", " if nameEntry_U is None:\n", " ul.append(\" \")\n", " else:\n", " ul.append(nameEntry_U.text)\n", "\n", " if nameEntry_O is None:\n", " ul.append(\" \")\n", " else:\n", " ul.append(nameEntry_O.text)\n", "\n", " if places is None:\n", " ul.append(\" \")\n", " else:\n", " cell = []\n", " for place in places:\n", " cell.append(place.text)\n", " ul.append(\" | \".join(cell))\n", " \n", " if occupation is None:\n", " ul.append(\" \")\n", " else:\n", " ul.append(occupation.text)\n", "\n", " csvwriter.writerow(ul)\n", "\n", "\n", "eac_data.close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "interpreter": { "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49" }, "kernelspec": { "display_name": "Python 3.9.0 64-bit", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.0" }, "metadata": { "interpreter": { "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" } } }, "nbformat": 4, "nbformat_minor": 4 }