|
@@ -0,0 +1,124 @@
|
|
|
+{
|
|
|
+ "cells": [
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 3,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "import xml.etree.ElementTree as Xet\n",
|
|
|
+ "from typing import Dict, Any\n",
|
|
|
+ "#import pandas as pd\n",
|
|
|
+ "import os\n",
|
|
|
+ "import csv\n",
|
|
|
+ "from xml.dom import minidom\n",
|
|
|
+ "import sys\n",
|
|
|
+ "import re\n",
|
|
|
+ "\n",
|
|
|
+ "xml_file_name = '/Users/federicaspinelli/Google Drive/OVI-CNR/export_aspoAuth002--Gettatelli-Ospedale-Marcovaldi.xml'\n",
|
|
|
+ "tree = Xet.parse(xml_file_name)\n",
|
|
|
+ "root = tree.getroot()\n",
|
|
|
+ "schede = root.findall(\"xw_doc\")\n",
|
|
|
+ "\n",
|
|
|
+ "eac_data = open('eac_Data.csv', 'w')\n",
|
|
|
+ "csvwriter = csv.writer(eac_data)\n",
|
|
|
+ "\n",
|
|
|
+ "params = [\"recordId\", \"entityType\", \"nameEntry@normal\", \"nameEntry@prime\", \"biogHist p\",\n",
|
|
|
+ " \"nameEntry@ulterior\", \"nameEntry@ord\", \"place\", \"occupation\"]\n",
|
|
|
+ "\n",
|
|
|
+ "csvwriter.writerow(params)\n",
|
|
|
+ "\n",
|
|
|
+ "for scheda in schede:\n",
|
|
|
+ " ul = []\n",
|
|
|
+ " identifier = scheda.find(\"eac-cpf/control/recordId\")\n",
|
|
|
+ " entityType = scheda.find(\"eac-cpf/cpfDescription/identity/entityType\")\n",
|
|
|
+ " nameEntry_N = scheda.find(\"eac-cpf/cpfDescription/identity/nameEntry/part[@localType='normal']\")\n",
|
|
|
+ " nameEntry_P = scheda.find(\"eac-cpf/cpfDescription/identity/nameEntry/part[@localType='prime']\")\n",
|
|
|
+ " description = scheda.find(\"eac-cpf/cpfDescription/description/biogHist/p\")\n",
|
|
|
+ " nameEntry_U = scheda.find(\"eac-cpf/cpfDescription/identity/nameEntry/part[@localType='ulterior']\")\n",
|
|
|
+ " nameEntry_O = scheda.find(\"eac-cpf/cpfDescription/identity/nameEntry/part[@localType='ord']\")\n",
|
|
|
+ " places = scheda.findall(\"eac-cpf/cpfDescription/description/places/place/placeEntry\")\n",
|
|
|
+ " occupation = scheda.find(\"eac-cpf/cpfDescription/description/occupation/term\")\n",
|
|
|
+ " ul.append(identifier.text)\n",
|
|
|
+ " ul.append(entityType.text)\n",
|
|
|
+ " if nameEntry_N is None:\n",
|
|
|
+ " ul.append(\" \")\n",
|
|
|
+ " else:\n",
|
|
|
+ " ul.append(nameEntry_N.text)\n",
|
|
|
+ "\n",
|
|
|
+ " if nameEntry_P is None:\n",
|
|
|
+ " ul.append(\" \")\n",
|
|
|
+ " else:\n",
|
|
|
+ " ul.append(nameEntry_P.text)\n",
|
|
|
+ "\n",
|
|
|
+ " if description is None:\n",
|
|
|
+ " ul.append(\" \")\n",
|
|
|
+ " else:\n",
|
|
|
+ " ul.append(description.text)\n",
|
|
|
+ "\n",
|
|
|
+ " if nameEntry_U is None:\n",
|
|
|
+ " ul.append(\" \")\n",
|
|
|
+ " else:\n",
|
|
|
+ " ul.append(nameEntry_U.text)\n",
|
|
|
+ "\n",
|
|
|
+ " if nameEntry_O is None:\n",
|
|
|
+ " ul.append(\" \")\n",
|
|
|
+ " else:\n",
|
|
|
+ " ul.append(nameEntry_O.text)\n",
|
|
|
+ "\n",
|
|
|
+ " if places is None:\n",
|
|
|
+ " ul.append(\" \")\n",
|
|
|
+ " else:\n",
|
|
|
+ " cell = []\n",
|
|
|
+ " for place in places:\n",
|
|
|
+ " cell.append(place.text)\n",
|
|
|
+ " ul.append(\" | \".join(cell))\n",
|
|
|
+ " \n",
|
|
|
+ " if occupation is None:\n",
|
|
|
+ " ul.append(\" \")\n",
|
|
|
+ " else:\n",
|
|
|
+ " ul.append(occupation.text)\n",
|
|
|
+ "\n",
|
|
|
+ " csvwriter.writerow(ul)\n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
+ "eac_data.close()"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": []
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "metadata": {
|
|
|
+ "interpreter": {
|
|
|
+ "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
|
|
|
+ },
|
|
|
+ "kernelspec": {
|
|
|
+ "display_name": "Python 3.9.0 64-bit",
|
|
|
+ "name": "python3"
|
|
|
+ },
|
|
|
+ "language_info": {
|
|
|
+ "codemirror_mode": {
|
|
|
+ "name": "ipython",
|
|
|
+ "version": 3
|
|
|
+ },
|
|
|
+ "file_extension": ".py",
|
|
|
+ "mimetype": "text/x-python",
|
|
|
+ "name": "python",
|
|
|
+ "nbconvert_exporter": "python",
|
|
|
+ "pygments_lexer": "ipython3",
|
|
|
+ "version": "3.9.0"
|
|
|
+ },
|
|
|
+ "metadata": {
|
|
|
+ "interpreter": {
|
|
|
+ "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "nbformat": 4,
|
|
|
+ "nbformat_minor": 4
|
|
|
+}
|