Browse Source

add Parser ICCD to CSV

Alessia 2 years ago
parent
commit
cb8af93638
1 changed files with 89 additions and 0 deletions
  1. 89 0
      Museo/ICCD_to_CSV/ICCDtoCSV.py

+ 89 - 0
Museo/ICCD_to_CSV/ICCDtoCSV.py

@@ -0,0 +1,89 @@
+import xml.etree.ElementTree as Xet
+import pandas as pd
+import os
+import csv
+from xml.dom import minidom
+import sys
+
+# Passo al parser i file xml
+xml_file_name = '/Users/alessiaspadi/Documents/RESTORE/temp_MPP/SR20OA_Datini.xml'
+tree = Xet.parse(xml_file_name)
+root = tree.getroot()
+schede = root.find("schede")
+schedeM = root.findall("schede/scheda")
+scheda = schede.find("scheda")
+
+OA_data = open('OA_Data.csv', 'w')
+csvwriter = csv.writer(OA_data)
+
+
+def iterate(node, aut, path=""):
+    if path:
+        current_path = path + "/" + node.tag
+    else:
+        current_path = node.tag
+    path = current_path
+    for child in node:
+        iterate(child, aut, path=current_path)
+    obj = cell(node)
+    if obj is not None:
+        aut.append(obj)
+    return aut
+
+
+def cell(nd):
+    if nd is not None:
+        res = nd.text
+        tag = nd.tag
+        st = res.rstrip()
+        if st != "":
+            res = "".join(st.splitlines())
+            return [tag, res]
+
+
+def build_row(scheda):
+    cc = []
+    iterate(scheda, cc)
+    seen = dict()
+    row = dict()
+    plus_tag = []
+    for elem in cc:
+        tag = elem[0]
+        if tag in seen:
+            num = seen[tag]
+            n = num + 1
+            seen[tag] = n
+            name = tag + str(n)
+            row[name] = elem[1]
+        else:
+            tag = elem[0]
+            seen[tag] = 0
+            row[tag] = elem[1]
+    return (row)
+
+
+params = []
+for scheda in schede:
+    row = build_row(scheda)
+    for p in row:
+        if p not in params:
+            params.append(p)
+
+
+def build_table(schede):
+    for scheda in schede:
+        row = build_row(scheda)
+        ul = []
+        for x in params:
+            if row.get(x) is None:
+                ul.append(" ")
+            else:
+                ul.append(row[x])
+
+        csvwriter.writerow(ul)
+
+
+csvwriter.writerow(params)
+build_table(schede)
+
+OA_data.close()