Browse Source

Add parser EAD to CSV

Federica 2 years ago
parent
commit
48ff61f6a3
1 changed files with 46 additions and 0 deletions
  1. 46 0
      EAC_to_CSV

+ 46 - 0
EAC_to_CSV

@@ -0,0 +1,46 @@
+import xml.etree.ElementTree as Xet
+from typing import Dict, Any
+import pandas as pd
+import os
+import csv
+from xml.dom import minidom
+import sys
+import re
+
+xml_file_name = 'path al documento xml / nomedocumento.xml'
+tree = Xet.parse(xml_file_name)
+root = tree.getroot()
+schede = root.findall("xw_doc")
+
+eac_data = open('eac_Data.csv', 'w')
+csvwriter = csv.writer(eac_data)
+
+params = ["recordId", "entityType", "nameEntry@normal", "nameEntry@prime", "biogHist p"]
+
+csvwriter.writerow(params)
+
+for scheda in schede:
+    ul = []
+    identifier = scheda.find("eac-cpf/control/recordId")
+    entityType = scheda.find("eac-cpf/cpfDescription/identity/entityType")
+    nameEntry_N = scheda.find("eac-cpf/cpfDescription/identity/nameEntry/part[@localType='normal']")
+    nameEntry_P = scheda.find("eac-cpf/cpfDescription/identity/nameEntry/part[@localType='prime']")
+    description = scheda.find("eac-cpf/cpfDescription/description/biogHist/p")
+    ul.append(identifier.text)
+    ul.append(entityType.text)
+    if nameEntry_N is None:
+        ul.append(" ")
+    else:
+        ul.append(nameEntry_N.text)
+    if nameEntry_P is None:
+        ul.append(" ")
+    else:
+        ul.append(nameEntry_P.text)
+    if description is None:
+        ul.append(" ")
+    else:
+        ul.append(description.text)
+    csvwriter.writerow(ul)
+
+
+eac_data.close()