EAC_to_CSV 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. import xml.etree.ElementTree as Xet
  2. from typing import Dict, Any
  3. import pandas as pd
  4. import os
  5. import csv
  6. from xml.dom import minidom
  7. import sys
  8. import re
  9. xml_file_name = 'path al documento xml / nomedocumento.xml'
  10. tree = Xet.parse(xml_file_name)
  11. root = tree.getroot()
  12. schede = root.findall("xw_doc")
  13. eac_data = open('eac_Data.csv', 'w')
  14. csvwriter = csv.writer(eac_data)
  15. params = ["recordId", "entityType", "nameEntry@normal", "nameEntry@prime", "biogHist p"]
  16. csvwriter.writerow(params)
  17. for scheda in schede:
  18. ul = []
  19. identifier = scheda.find("eac-cpf/control/recordId")
  20. entityType = scheda.find("eac-cpf/cpfDescription/identity/entityType")
  21. nameEntry_N = scheda.find("eac-cpf/cpfDescription/identity/nameEntry/part[@localType='normal']")
  22. nameEntry_P = scheda.find("eac-cpf/cpfDescription/identity/nameEntry/part[@localType='prime']")
  23. description = scheda.find("eac-cpf/cpfDescription/description/biogHist/p")
  24. ul.append(identifier.text)
  25. ul.append(entityType.text)
  26. if nameEntry_N is None:
  27. ul.append(" ")
  28. else:
  29. ul.append(nameEntry_N.text)
  30. if nameEntry_P is None:
  31. ul.append(" ")
  32. else:
  33. ul.append(nameEntry_P.text)
  34. if description is None:
  35. ul.append(" ")
  36. else:
  37. ul.append(description.text)
  38. csvwriter.writerow(ul)
  39. eac_data.close()