import xml.etree.ElementTree as Xet from typing import Dict, Any import pandas as pd import os import csv from xml.dom import minidom import sys import re xml_file_name = '/Users/federicaspinelli/Google Drive/OVI:CNR/LAVORO 2020/SELEZIONE CONTENUTI/01_ASPO/XDAMS/export_aspoAuth002--22_09_20_17_34-3se381075198510401.xml' tree = Xet.parse(xml_file_name) root = tree.getroot() schede = root.findall("xw_doc") eac_data = open('eac_Data.csv', 'w') csvwriter = csv.writer(eac_data) params = ["recordId", "entityType", "nameEntry@normal", "nameEntry@prime", "biogHist p", "nameEntry@ulterior", "nameEntry@ord", "place", "occupation"] csvwriter.writerow(params) for scheda in schede: ul = [] identifier = scheda.find("eac-cpf/control/recordId") entityType = scheda.find("eac-cpf/cpfDescription/identity/entityType") nameEntry_N = scheda.find("eac-cpf/cpfDescription/identity/nameEntry/part[@localType='normal']") nameEntry_P = scheda.find("eac-cpf/cpfDescription/identity/nameEntry/part[@localType='prime']") description = scheda.find("eac-cpf/cpfDescription/description/biogHist/p") nameEntry_U = scheda.find("eac-cpf/cpfDescription/identity/nameEntry/part[@localType='ulterior']") nameEntry_O = scheda.find("eac-cpf/cpfDescription/identity/nameEntry/part[@localType='ord']") places = scheda.findall("eac-cpf/cpfDescription/description/places/place/placeEntry") occupation = scheda.find("eac-cpf/cpfDescription/description/occupation/term") ul.append(identifier.text) ul.append(entityType.text) if nameEntry_N is None: ul.append(" ") else: ul.append(nameEntry_N.text) if nameEntry_P is None: ul.append(" ") else: ul.append(nameEntry_P.text) if description is None: ul.append(" ") else: ul.append(description.text) if nameEntry_U is None: ul.append(" ") else: ul.append(nameEntry_U.text) if nameEntry_O is None: ul.append(" ") else: ul.append(nameEntry_O.text) if places is None: ul.append(" ") else: cell = [] for place in places: cell.append(place.text) ul.append(" | ".join(cell)) if occupation is None: ul.append(" ") else: ul.append(occupation.text) csvwriter.writerow(ul) eac_data.close()