Browse Source

Add new parser EAC_to_CSV ASPO

Federica 2 years ago
parent
commit
4373fd43e2
2 changed files with 201 additions and 0 deletions
  1. 77 0
      EAC/EAC_to_CSV_Geo.ipynb
  2. 124 0
      EAC/EAC_to_CSV_Ospedale.ipynb

+ 77 - 0
EAC/EAC_to_CSV_Geo.ipynb

@@ -0,0 +1,77 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import xml.etree.ElementTree as Xet\n",
+    "from typing import Dict, Any\n",
+    "import os\n",
+    "import csv\n",
+    "from xml.dom import minidom\n",
+    "import sys\n",
+    "import re\n",
+    "\n",
+    "xml_file_name = '/Users/federicaspinelli/Google Drive/OVI-CNR/export_aspoHGeo--22_09_20_17_35-3se38108719d30be01.xml'\n",
+    "tree = Xet.parse(xml_file_name)\n",
+    "root = tree.getroot()\n",
+    "schede = root.findall(\"xw_doc\")\n",
+    "\n",
+    "eac_data = open('data_eac_geo.csv', 'w')\n",
+    "csvwriter = csv.writer(eac_data)\n",
+    "\n",
+    "params = [\"ID ASPO\", \"PLACE NAME\"]\n",
+    "\n",
+    "csvwriter.writerow(params)\n",
+    "\n",
+    "for scheda in schede:\n",
+    "    ul = []\n",
+    "    identifier = scheda.find(\"eac/eacheader/eacid\")\n",
+    "    placeName = scheda.find(\"eac/condesc/identity/conhead/part\")\n",
+    "    ul.append(identifier.text)\n",
+    "    ul.append(placeName.text)\n",
+    "    csvwriter.writerow(ul)\n",
+    "\n",
+    "\n",
+    "eac_data.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.9.0 64-bit",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.0"
+  },
+  "metadata": {
+   "interpreter": {
+    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

+ 124 - 0
EAC/EAC_to_CSV_Ospedale.ipynb

@@ -0,0 +1,124 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import xml.etree.ElementTree as Xet\n",
+    "from typing import Dict, Any\n",
+    "#import pandas as pd\n",
+    "import os\n",
+    "import csv\n",
+    "from xml.dom import minidom\n",
+    "import sys\n",
+    "import re\n",
+    "\n",
+    "xml_file_name = '/Users/federicaspinelli/Google Drive/OVI-CNR/export_aspoAuth002--Gettatelli-Ospedale-Marcovaldi.xml'\n",
+    "tree = Xet.parse(xml_file_name)\n",
+    "root = tree.getroot()\n",
+    "schede = root.findall(\"xw_doc\")\n",
+    "\n",
+    "eac_data = open('eac_Data.csv', 'w')\n",
+    "csvwriter = csv.writer(eac_data)\n",
+    "\n",
+    "params = [\"recordId\", \"entityType\", \"nameEntry@normal\", \"nameEntry@prime\",  \"biogHist p\",\n",
+    "          \"nameEntry@ulterior\", \"nameEntry@ord\", \"place\", \"occupation\"]\n",
+    "\n",
+    "csvwriter.writerow(params)\n",
+    "\n",
+    "for scheda in schede:\n",
+    "    ul = []\n",
+    "    identifier = scheda.find(\"eac-cpf/control/recordId\")\n",
+    "    entityType = scheda.find(\"eac-cpf/cpfDescription/identity/entityType\")\n",
+    "    nameEntry_N = scheda.find(\"eac-cpf/cpfDescription/identity/nameEntry/part[@localType='normal']\")\n",
+    "    nameEntry_P = scheda.find(\"eac-cpf/cpfDescription/identity/nameEntry/part[@localType='prime']\")\n",
+    "    description = scheda.find(\"eac-cpf/cpfDescription/description/biogHist/p\")\n",
+    "    nameEntry_U = scheda.find(\"eac-cpf/cpfDescription/identity/nameEntry/part[@localType='ulterior']\")\n",
+    "    nameEntry_O = scheda.find(\"eac-cpf/cpfDescription/identity/nameEntry/part[@localType='ord']\")\n",
+    "    places = scheda.findall(\"eac-cpf/cpfDescription/description/places/place/placeEntry\")\n",
+    "    occupation = scheda.find(\"eac-cpf/cpfDescription/description/occupation/term\")\n",
+    "    ul.append(identifier.text)\n",
+    "    ul.append(entityType.text)\n",
+    "    if nameEntry_N is None:\n",
+    "        ul.append(\" \")\n",
+    "    else:\n",
+    "        ul.append(nameEntry_N.text)\n",
+    "\n",
+    "    if nameEntry_P is None:\n",
+    "        ul.append(\" \")\n",
+    "    else:\n",
+    "        ul.append(nameEntry_P.text)\n",
+    "\n",
+    "    if description is None:\n",
+    "        ul.append(\" \")\n",
+    "    else:\n",
+    "        ul.append(description.text)\n",
+    "\n",
+    "    if nameEntry_U is None:\n",
+    "        ul.append(\" \")\n",
+    "    else:\n",
+    "        ul.append(nameEntry_U.text)\n",
+    "\n",
+    "    if nameEntry_O is None:\n",
+    "        ul.append(\" \")\n",
+    "    else:\n",
+    "        ul.append(nameEntry_O.text)\n",
+    "\n",
+    "    if places is None:\n",
+    "        ul.append(\" \")\n",
+    "    else:\n",
+    "        cell = []\n",
+    "        for place in places:\n",
+    "            cell.append(place.text)\n",
+    "        ul.append(\" | \".join(cell))\n",
+    "    \n",
+    "    if occupation is None:\n",
+    "        ul.append(\" \")\n",
+    "    else:\n",
+    "        ul.append(occupation.text)\n",
+    "\n",
+    "    csvwriter.writerow(ul)\n",
+    "\n",
+    "\n",
+    "eac_data.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.9.0 64-bit",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.0"
+  },
+  "metadata": {
+   "interpreter": {
+    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}