1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677 |
- import xml.etree.ElementTree as Xet
- from typing import Dict, Any
- import pandas as pd
- import os
- import csv
- from xml.dom import minidom
- import sys
- import re
- xml_file_name = '/Users/federicaspinelli/Google Drive/OVI:CNR/LAVORO 2020/SELEZIONE CONTENUTI/01_ASPO/XDAMS/export_aspoAuth002--22_09_20_17_34-3se381075198510401.xml'
- tree = Xet.parse(xml_file_name)
- root = tree.getroot()
- schede = root.findall("xw_doc")
- eac_data = open('eac_Data.csv', 'w')
- csvwriter = csv.writer(eac_data)
- params = ["recordId", "entityType", "nameEntry@normal", "nameEntry@prime", "biogHist p",
- "nameEntry@ulterior", "nameEntry@ord", "place", "occupation"]
- csvwriter.writerow(params)
- for scheda in schede:
- ul = []
- identifier = scheda.find("eac-cpf/control/recordId")
- entityType = scheda.find("eac-cpf/cpfDescription/identity/entityType")
- nameEntry_N = scheda.find("eac-cpf/cpfDescription/identity/nameEntry/part[@localType='normal']")
- nameEntry_P = scheda.find("eac-cpf/cpfDescription/identity/nameEntry/part[@localType='prime']")
- description = scheda.find("eac-cpf/cpfDescription/description/biogHist/p")
- nameEntry_U = scheda.find("eac-cpf/cpfDescription/identity/nameEntry/part[@localType='ulterior']")
- nameEntry_O = scheda.find("eac-cpf/cpfDescription/identity/nameEntry/part[@localType='ord']")
- places = scheda.findall("eac-cpf/cpfDescription/description/places/place/placeEntry")
- occupation = scheda.find("eac-cpf/cpfDescription/description/occupation/term")
- ul.append(identifier.text)
- ul.append(entityType.text)
- if nameEntry_N is None:
- ul.append(" ")
- else:
- ul.append(nameEntry_N.text)
- if nameEntry_P is None:
- ul.append(" ")
- else:
- ul.append(nameEntry_P.text)
- if description is None:
- ul.append(" ")
- else:
- ul.append(description.text)
- if nameEntry_U is None:
- ul.append(" ")
- else:
- ul.append(nameEntry_U.text)
- if nameEntry_O is None:
- ul.append(" ")
- else:
- ul.append(nameEntry_O.text)
- if places is None:
- ul.append(" ")
- else:
- cell = []
- for place in places:
- cell.append(place.text)
- ul.append(" | ".join(cell))
-
- if occupation is None:
- ul.append(" ")
- else:
- ul.append(occupation.text)
- csvwriter.writerow(ul)
- eac_data.close()
|