|
@@ -0,0 +1,46 @@
|
|
|
+import xml.etree.ElementTree as Xet
|
|
|
+from typing import Dict, Any
|
|
|
+import pandas as pd
|
|
|
+import os
|
|
|
+import csv
|
|
|
+from xml.dom import minidom
|
|
|
+import sys
|
|
|
+import re
|
|
|
+
|
|
|
+xml_file_name = 'path al documento xml / nomedocumento.xml'
|
|
|
+tree = Xet.parse(xml_file_name)
|
|
|
+root = tree.getroot()
|
|
|
+schede = root.findall("xw_doc")
|
|
|
+
|
|
|
+eac_data = open('eac_Data.csv', 'w')
|
|
|
+csvwriter = csv.writer(eac_data)
|
|
|
+
|
|
|
+params = ["recordId", "entityType", "nameEntry@normal", "nameEntry@prime", "biogHist p"]
|
|
|
+
|
|
|
+csvwriter.writerow(params)
|
|
|
+
|
|
|
+for scheda in schede:
|
|
|
+ ul = []
|
|
|
+ identifier = scheda.find("eac-cpf/control/recordId")
|
|
|
+ entityType = scheda.find("eac-cpf/cpfDescription/identity/entityType")
|
|
|
+ nameEntry_N = scheda.find("eac-cpf/cpfDescription/identity/nameEntry/part[@localType='normal']")
|
|
|
+ nameEntry_P = scheda.find("eac-cpf/cpfDescription/identity/nameEntry/part[@localType='prime']")
|
|
|
+ description = scheda.find("eac-cpf/cpfDescription/description/biogHist/p")
|
|
|
+ ul.append(identifier.text)
|
|
|
+ ul.append(entityType.text)
|
|
|
+ if nameEntry_N is None:
|
|
|
+ ul.append(" ")
|
|
|
+ else:
|
|
|
+ ul.append(nameEntry_N.text)
|
|
|
+ if nameEntry_P is None:
|
|
|
+ ul.append(" ")
|
|
|
+ else:
|
|
|
+ ul.append(nameEntry_P.text)
|
|
|
+ if description is None:
|
|
|
+ ul.append(" ")
|
|
|
+ else:
|
|
|
+ ul.append(description.text)
|
|
|
+ csvwriter.writerow(ul)
|
|
|
+
|
|
|
+
|
|
|
+eac_data.close()
|