ICCDtoCSV.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. import xml.etree.ElementTree as Xet
  2. import pandas as pd
  3. import os
  4. import csv
  5. from xml.dom import minidom
  6. import sys
  7. # Passo al parser i file xml
  8. xml_file_name = '/Users/alessiaspadi/Documents/RESTORE/temp_MPP/SR20OA_Datini.xml'
  9. tree = Xet.parse(xml_file_name)
  10. root = tree.getroot()
  11. schede = root.find("schede")
  12. schedeM = root.findall("schede/scheda")
  13. scheda = schede.find("scheda")
  14. OA_data = open('OA_Data.csv', 'w')
  15. csvwriter = csv.writer(OA_data)
  16. def iterate(node, aut, path=""):
  17. if path:
  18. current_path = path + "/" + node.tag
  19. else:
  20. current_path = node.tag
  21. path = current_path
  22. for child in node:
  23. iterate(child, aut, path=current_path)
  24. obj = cell(node)
  25. if obj is not None:
  26. aut.append(obj)
  27. return aut
  28. def cell(nd):
  29. if nd is not None:
  30. res = nd.text
  31. tag = nd.tag
  32. st = res.rstrip()
  33. if st != "":
  34. res = "".join(st.splitlines())
  35. return [tag, res]
  36. def build_row(scheda):
  37. cc = []
  38. iterate(scheda, cc)
  39. seen = dict()
  40. row = dict()
  41. plus_tag = []
  42. for elem in cc:
  43. tag = elem[0]
  44. if tag in seen:
  45. num = seen[tag]
  46. n = num + 1
  47. seen[tag] = n
  48. name = tag + str(n)
  49. row[name] = elem[1]
  50. else:
  51. tag = elem[0]
  52. seen[tag] = 0
  53. row[tag] = elem[1]
  54. return (row)
  55. params = []
  56. for scheda in schede:
  57. row = build_row(scheda)
  58. for p in row:
  59. if p not in params:
  60. params.append(p)
  61. def build_table(schede):
  62. for scheda in schede:
  63. row = build_row(scheda)
  64. ul = []
  65. for x in params:
  66. if row.get(x) is None:
  67. ul.append(" ")
  68. else:
  69. ul.append(row[x])
  70. csvwriter.writerow(ul)
  71. csvwriter.writerow(params)
  72. build_table(schede)
  73. OA_data.close()