Browse Source

Add new parser EAC to CSV

Federica 2 years ago
parent
commit
eddbcfa0f2
1 changed files with 34 additions and 3 deletions
  1. 34 3
      EAC_to_CSV

+ 34 - 3
EAC_to_CSV

@@ -7,7 +7,7 @@ from xml.dom import minidom
 import sys
 import re
 
-xml_file_name = 'path al documento xml / nomedocumento.xml'
+xml_file_name = '/Users/federicaspinelli/Google Drive/OVI:CNR/LAVORO 2020/SELEZIONE CONTENUTI/01_ASPO/XDAMS/export_aspoAuth002--22_09_20_17_34-3se381075198510401.xml'
 tree = Xet.parse(xml_file_name)
 root = tree.getroot()
 schede = root.findall("xw_doc")
@@ -15,7 +15,8 @@ schede = root.findall("xw_doc")
 eac_data = open('eac_Data.csv', 'w')
 csvwriter = csv.writer(eac_data)
 
-params = ["recordId", "entityType", "nameEntry@normal", "nameEntry@prime", "biogHist p"]
+params = ["recordId", "entityType", "nameEntry@normal", "nameEntry@prime",  "biogHist p",
+          "nameEntry@ulterior", "nameEntry@ord", "place", "occupation"]
 
 csvwriter.writerow(params)
 
@@ -26,20 +27,50 @@ for scheda in schede:
     nameEntry_N = scheda.find("eac-cpf/cpfDescription/identity/nameEntry/part[@localType='normal']")
     nameEntry_P = scheda.find("eac-cpf/cpfDescription/identity/nameEntry/part[@localType='prime']")
     description = scheda.find("eac-cpf/cpfDescription/description/biogHist/p")
+    nameEntry_U = scheda.find("eac-cpf/cpfDescription/identity/nameEntry/part[@localType='ulterior']")
+    nameEntry_O = scheda.find("eac-cpf/cpfDescription/identity/nameEntry/part[@localType='ord']")
+    places = scheda.findall("eac-cpf/cpfDescription/description/places/place/placeEntry")
+    occupation = scheda.find("eac-cpf/cpfDescription/description/occupation/term")
     ul.append(identifier.text)
     ul.append(entityType.text)
     if nameEntry_N is None:
         ul.append(" ")
     else:
         ul.append(nameEntry_N.text)
+
     if nameEntry_P is None:
         ul.append(" ")
     else:
         ul.append(nameEntry_P.text)
+
     if description is None:
-        ul.append(" ")
+       ul.append(" ")
     else:
         ul.append(description.text)
+
+    if nameEntry_U is None:
+        ul.append(" ")
+    else:
+        ul.append(nameEntry_U.text)
+
+    if nameEntry_O is None:
+        ul.append(" ")
+    else:
+        ul.append(nameEntry_O.text)
+
+    if places is None:
+        ul.append(" ")
+    else:
+        cell = []
+        for place in places:
+            cell.append(place.text)
+        ul.append(" | ".join(cell))
+    
+    if occupation is None:
+        ul.append(" ")
+    else:
+        ul.append(occupation.text)
+
     csvwriter.writerow(ul)