import xml.etree.ElementTree as Xet
from typing import Dict, Any
#import pandas as pd
import os
import csv
from xml.dom import minidom
import sys
import re
xml_file_name = '/Users/federicaspinelli/Google Drive/OVI-CNR/export_aspoAuth002--Gettatelli-Ospedale-Marcovaldi.xml'
tree = Xet.parse(xml_file_name)
root = tree.getroot()
schede = root.findall("xw_doc")
eac_data = open('eac_Data.csv', 'w')
csvwriter = csv.writer(eac_data)
params = ["recordId", "entityType", "nameEntry@normal", "nameEntry@prime", "biogHist p",
"nameEntry@ulterior", "nameEntry@ord", "place", "occupation"]
csvwriter.writerow(params)
for scheda in schede:
ul = []
identifier = scheda.find("eac-cpf/control/recordId")
entityType = scheda.find("eac-cpf/cpfDescription/identity/entityType")
nameEntry_N = scheda.find("eac-cpf/cpfDescription/identity/nameEntry/part[@localType='normal']")
nameEntry_P = scheda.find("eac-cpf/cpfDescription/identity/nameEntry/part[@localType='prime']")
description = scheda.find("eac-cpf/cpfDescription/description/biogHist/p")
nameEntry_U = scheda.find("eac-cpf/cpfDescription/identity/nameEntry/part[@localType='ulterior']")
nameEntry_O = scheda.find("eac-cpf/cpfDescription/identity/nameEntry/part[@localType='ord']")
places = scheda.findall("eac-cpf/cpfDescription/description/places/place/placeEntry")
occupation = scheda.find("eac-cpf/cpfDescription/description/occupation/term")
ul.append(identifier.text)
ul.append(entityType.text)
if nameEntry_N is None:
ul.append(" ")
else:
ul.append(nameEntry_N.text)
if nameEntry_P is None:
ul.append(" ")
else:
ul.append(nameEntry_P.text)
if description is None:
ul.append(" ")
else:
ul.append(description.text)
if nameEntry_U is None:
ul.append(" ")
else:
ul.append(nameEntry_U.text)
if nameEntry_O is None:
ul.append(" ")
else:
ul.append(nameEntry_O.text)
if places is None:
ul.append(" ")
else:
cell = []
for place in places:
cell.append(place.text)
ul.append(" | ".join(cell))
if occupation is None:
ul.append(" ")
else:
ul.append(occupation.text)
csvwriter.writerow(ul)
eac_data.close()