|
@@ -5,17 +5,40 @@ import re
|
|
|
import json
|
|
|
import os
|
|
|
# %%
|
|
|
-# Import lems list + info file + authority files
|
|
|
-basedir = '/home/kora/Desktop/OVI_Data/Development/Parser/Data/'
|
|
|
+# Import lems list + xml info file
|
|
|
+basedir = '../../Data/'
|
|
|
|
|
|
# lems
|
|
|
-lems = json.load(open(basedir + 'DallOVI/datiniXML/power_lemmarioB.json', 'r'))
|
|
|
+lemfile = basedir + 'DallOVI/datiniXML/power_lemmarioB.json'
|
|
|
+lems = json.load(open(lemfile, 'r'))
|
|
|
+
|
|
|
+# BiblioDatini.xml
|
|
|
+
|
|
|
+infofile = basedir + 'DallOVI/datiniXML/BiblioDatini.xml'
|
|
|
+infotree = ET.parse(infofile)
|
|
|
+inforoot = infotree.getroot()
|
|
|
+infoBiblioNodeList = list(inforoot.iter('Biblio'))
|
|
|
# %%
|
|
|
+print(type(lems))
|
|
|
+print(lems[:10])
|
|
|
+print('Main nodes in BiblioDatini.xml:', len(infoBiblioNodeList))
|
|
|
+# %%
|
|
|
+# Utils to extract data from the info files
|
|
|
+
|
|
|
def lemIndex(lem):
|
|
|
for item in lems:
|
|
|
if lem.attrib['n'] in item['coordinate']:
|
|
|
return item['id']
|
|
|
raise ValueError("code " + lem.attrib['n'] + " not found")
|
|
|
+
|
|
|
+def getBiblioNodeBySigla(sigla):
|
|
|
+ for node in infoBiblioNodeList:
|
|
|
+ for child in node:
|
|
|
+ if child.tag=='sigla' and child.text==sigla:
|
|
|
+ return node
|
|
|
+# %%
|
|
|
+aa = getBiblioNodeBySigla('A03')
|
|
|
+ET.dump(aa)
|
|
|
# %%
|
|
|
# Import individual letter files
|
|
|
indir = basedir + 'DallOVI/datiniXML/xmlgat/'
|
|
@@ -109,9 +132,19 @@ for file in os.listdir(indir):
|
|
|
print("IndexError - " + file)
|
|
|
print('DONE!')
|
|
|
# %%
|
|
|
-filecodeexample = 'j92'
|
|
|
+filecodeexample = 'j91'
|
|
|
|
|
|
tree2 = ET.parse(indir + 'xmlgat.' + filecodeexample + '.xml')
|
|
|
-
|
|
|
-
|
|
|
+# %%
|
|
|
+ET.dump(tree2)
|
|
|
+# %%
|
|
|
+tree3 = processFile(indir, filecodeexample)
|
|
|
+# %%
|
|
|
+ET.dump(tree3)
|
|
|
+# %%
|
|
|
+indir + 'xmlgat.' + filecodeexample + '.xml'
|
|
|
+# %%
|
|
|
+tempdir = "/home/kora/Desktop/FREELANCE_LOCAL/"
|
|
|
+# %%
|
|
|
+tree3.write(tempdir + 'xmlevt-' + filecodeexample + '.xml')
|
|
|
# %%
|