{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import xml.etree.ElementTree as Xet\n",
    "from typing import Dict, Any\n",
    "#import pandas as pd\n",
    "import os\n",
    "import csv\n",
    "from xml.dom import minidom\n",
    "import sys\n",
    "import re\n",
    "\n",
    "xml_file_name = '/Users/federicaspinelli/Google Drive/OVI-CNR/export_aspoAuth002--Gettatelli-Ospedale-Marcovaldi.xml'\n",
    "tree = Xet.parse(xml_file_name)\n",
    "root = tree.getroot()\n",
    "schede = root.findall(\"xw_doc\")\n",
    "\n",
    "eac_data = open('eac_Data.csv', 'w')\n",
    "csvwriter = csv.writer(eac_data)\n",
    "\n",
    "params = [\"recordId\", \"entityType\", \"nameEntry@normal\", \"nameEntry@prime\",  \"biogHist p\",\n",
    "          \"nameEntry@ulterior\", \"nameEntry@ord\", \"place\", \"occupation\"]\n",
    "\n",
    "csvwriter.writerow(params)\n",
    "\n",
    "for scheda in schede:\n",
    "    ul = []\n",
    "    identifier = scheda.find(\"eac-cpf/control/recordId\")\n",
    "    entityType = scheda.find(\"eac-cpf/cpfDescription/identity/entityType\")\n",
    "    nameEntry_N = scheda.find(\"eac-cpf/cpfDescription/identity/nameEntry/part[@localType='normal']\")\n",
    "    nameEntry_P = scheda.find(\"eac-cpf/cpfDescription/identity/nameEntry/part[@localType='prime']\")\n",
    "    description = scheda.find(\"eac-cpf/cpfDescription/description/biogHist/p\")\n",
    "    nameEntry_U = scheda.find(\"eac-cpf/cpfDescription/identity/nameEntry/part[@localType='ulterior']\")\n",
    "    nameEntry_O = scheda.find(\"eac-cpf/cpfDescription/identity/nameEntry/part[@localType='ord']\")\n",
    "    places = scheda.findall(\"eac-cpf/cpfDescription/description/places/place/placeEntry\")\n",
    "    occupation = scheda.find(\"eac-cpf/cpfDescription/description/occupation/term\")\n",
    "    ul.append(identifier.text)\n",
    "    ul.append(entityType.text)\n",
    "    if nameEntry_N is None:\n",
    "        ul.append(\" \")\n",
    "    else:\n",
    "        ul.append(nameEntry_N.text)\n",
    "\n",
    "    if nameEntry_P is None:\n",
    "        ul.append(\" \")\n",
    "    else:\n",
    "        ul.append(nameEntry_P.text)\n",
    "\n",
    "    if description is None:\n",
    "        ul.append(\" \")\n",
    "    else:\n",
    "        ul.append(description.text)\n",
    "\n",
    "    if nameEntry_U is None:\n",
    "        ul.append(\" \")\n",
    "    else:\n",
    "        ul.append(nameEntry_U.text)\n",
    "\n",
    "    if nameEntry_O is None:\n",
    "        ul.append(\" \")\n",
    "    else:\n",
    "        ul.append(nameEntry_O.text)\n",
    "\n",
    "    if places is None:\n",
    "        ul.append(\" \")\n",
    "    else:\n",
    "        cell = []\n",
    "        for place in places:\n",
    "            cell.append(place.text)\n",
    "        ul.append(\" | \".join(cell))\n",
    "    \n",
    "    if occupation is None:\n",
    "        ul.append(\" \")\n",
    "    else:\n",
    "        ul.append(occupation.text)\n",
    "\n",
    "    csvwriter.writerow(ul)\n",
    "\n",
    "\n",
    "eac_data.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
  },
  "kernelspec": {
   "display_name": "Python 3.9.0 64-bit",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.0"
  },
  "metadata": {
   "interpreter": {
    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}