{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "ename": "ModuleNotFoundError", "evalue": "No module named 'pandas'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mxml\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0metree\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mElementTree\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mXet\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtyping\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mDict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mAny\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mpandas\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mcsv\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'pandas'" ] } ], "source": [ "import xml.etree.ElementTree as Xet\n", "from typing import Dict, Any\n", "import pandas as pd\n", "import os\n", "import csv\n", "from xml.dom import minidom\n", "import sys\n", "import re\n", "\n", "xml_file_name = '/Users/federicaspinelli/Google Drive/OVI:CNR/LAVORO 2020/SELEZIONE CONTENUTI/01_ASPO/XDAMS/export_aspoAuth002--22_09_20_17_34-3se381075198510401.xml'\n", "tree = Xet.parse(xml_file_name)\n", "root = tree.getroot()\n", "schede = root.findall(\"xw_doc\")\n", "\n", "eac_data = open('eac_Data.csv', 'w')\n", "csvwriter = csv.writer(eac_data)\n", "\n", "params = [\"recordId\", \"entityType\", \"nameEntry@normal\", \"nameEntry@prime\", \"biogHist p\",\n", " \"nameEntry@ulterior\", \"nameEntry@ord\", \"place\", \"occupation\"]\n", "\n", "csvwriter.writerow(params)\n", "\n", "for scheda in schede:\n", " ul = []\n", " identifier = scheda.find(\"eac-cpf/control/recordId\")\n", " entityType = scheda.find(\"eac-cpf/cpfDescription/identity/entityType\")\n", " nameEntry_N = scheda.find(\"eac-cpf/cpfDescription/identity/nameEntry/part[@localType='normal']\")\n", " nameEntry_P = scheda.find(\"eac-cpf/cpfDescription/identity/nameEntry/part[@localType='prime']\")\n", " description = scheda.find(\"eac-cpf/cpfDescription/description/biogHist/p\")\n", " nameEntry_U = scheda.find(\"eac-cpf/cpfDescription/identity/nameEntry/part[@localType='ulterior']\")\n", " nameEntry_O = scheda.find(\"eac-cpf/cpfDescription/identity/nameEntry/part[@localType='ord']\")\n", " places = scheda.findall(\"eac-cpf/cpfDescription/description/places/place/placeEntry\")\n", " occupation = scheda.find(\"eac-cpf/cpfDescription/description/occupation/term\")\n", " ul.append(identifier.text)\n", " ul.append(entityType.text)\n", " if nameEntry_N is None:\n", " ul.append(\" \")\n", " else:\n", " ul.append(nameEntry_N.text)\n", "\n", " if nameEntry_P is None:\n", " ul.append(\" \")\n", " else:\n", " ul.append(nameEntry_P.text)\n", "\n", " if description is None:\n", " ul.append(\" \")\n", " else:\n", " ul.append(description.text)\n", "\n", " if nameEntry_U is None:\n", " ul.append(\" \")\n", " else:\n", " ul.append(nameEntry_U.text)\n", "\n", " if nameEntry_O is None:\n", " ul.append(\" \")\n", " else:\n", " ul.append(nameEntry_O.text)\n", "\n", " if places is None:\n", " ul.append(\" \")\n", " else:\n", " cell = []\n", " for place in places:\n", " cell.append(place.text)\n", " ul.append(\" | \".join(cell))\n", " \n", " if occupation is None:\n", " ul.append(\" \")\n", " else:\n", " ul.append(occupation.text)\n", "\n", " csvwriter.writerow(ul)\n", "\n", "\n", "eac_data.close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "interpreter": { "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" }, "kernelspec": { "display_name": "Python 3.7.3 64-bit", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" }, "metadata": { "interpreter": { "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" } } }, "nbformat": 4, "nbformat_minor": 4 }