EAC_to_CSV 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. import xml.etree.ElementTree as Xet
  2. from typing import Dict, Any
  3. import pandas as pd
  4. import os
  5. import csv
  6. from xml.dom import minidom
  7. import sys
  8. import re
  9. xml_file_name = '/Users/federicaspinelli/Google Drive/OVI:CNR/LAVORO 2020/SELEZIONE CONTENUTI/01_ASPO/XDAMS/export_aspoAuth002--22_09_20_17_34-3se381075198510401.xml'
  10. tree = Xet.parse(xml_file_name)
  11. root = tree.getroot()
  12. schede = root.findall("xw_doc")
  13. eac_data = open('eac_Data.csv', 'w')
  14. csvwriter = csv.writer(eac_data)
  15. params = ["recordId", "entityType", "nameEntry@normal", "nameEntry@prime", "biogHist p",
  16. "nameEntry@ulterior", "nameEntry@ord", "place", "occupation"]
  17. csvwriter.writerow(params)
  18. for scheda in schede:
  19. ul = []
  20. identifier = scheda.find("eac-cpf/control/recordId")
  21. entityType = scheda.find("eac-cpf/cpfDescription/identity/entityType")
  22. nameEntry_N = scheda.find("eac-cpf/cpfDescription/identity/nameEntry/part[@localType='normal']")
  23. nameEntry_P = scheda.find("eac-cpf/cpfDescription/identity/nameEntry/part[@localType='prime']")
  24. description = scheda.find("eac-cpf/cpfDescription/description/biogHist/p")
  25. nameEntry_U = scheda.find("eac-cpf/cpfDescription/identity/nameEntry/part[@localType='ulterior']")
  26. nameEntry_O = scheda.find("eac-cpf/cpfDescription/identity/nameEntry/part[@localType='ord']")
  27. places = scheda.findall("eac-cpf/cpfDescription/description/places/place/placeEntry")
  28. occupation = scheda.find("eac-cpf/cpfDescription/description/occupation/term")
  29. ul.append(identifier.text)
  30. ul.append(entityType.text)
  31. if nameEntry_N is None:
  32. ul.append(" ")
  33. else:
  34. ul.append(nameEntry_N.text)
  35. if nameEntry_P is None:
  36. ul.append(" ")
  37. else:
  38. ul.append(nameEntry_P.text)
  39. if description is None:
  40. ul.append(" ")
  41. else:
  42. ul.append(description.text)
  43. if nameEntry_U is None:
  44. ul.append(" ")
  45. else:
  46. ul.append(nameEntry_U.text)
  47. if nameEntry_O is None:
  48. ul.append(" ")
  49. else:
  50. ul.append(nameEntry_O.text)
  51. if places is None:
  52. ul.append(" ")
  53. else:
  54. cell = []
  55. for place in places:
  56. cell.append(place.text)
  57. ul.append(" | ".join(cell))
  58. if occupation is None:
  59. ul.append(" ")
  60. else:
  61. ul.append(occupation.text)
  62. csvwriter.writerow(ul)
  63. eac_data.close()