# %% # Imports import xml.etree.ElementTree as ET import json import os basedir = '../../DATA/' baseindir = basedir + 'OVI/datiniXML/xmlgat/' baseoutdir = basedir + 'OVI/datiniXML/xmlevt/' # %% # This is to handle the xmnls attribute in the TEI element in the templates uri1 = "{http://www.tei-c.org/ns/1.0}" namespaces = { '': "http://www.tei-c.org/ns/1.0", } for prefix, uri in namespaces.items(): ET.register_namespace(prefix, uri) # Reference directories basedir = '../../DATA/' baseindir = basedir + 'OVI/datiniXML/xmlgat/' baseoutdir = basedir + 'OVI/datiniXML/xmlevt/' # %% # Import lems list + xml info file # get lem list as a json object lemfile = basedir + 'OVI/datiniXML/power_lemmarioB.json' lems = json.load(open(lemfile, 'r')) # %% persNames = [lem for lem in lems if lem['lemma']['categoria']=='antr.'] placeNames = [lem for lem in lems if lem['lemma']['categoria']=='n.g.'] fileNames = os.listdir(baseoutdir + "lettere/") # %% print(len(persNames), len(lems), len(placeNames)) print(len(os.listdir(baseindir)), len(os.listdir(baseoutdir + 'lettere/'))) # %% placeTemplate =''' PLACE_NAME ''' personTemplate =''' PERSON_NAME ''' fileTemplate = '' fileIndent = ' ' # %% persNames[10] # %% persListString = "" for person in persNames: persListString = persListString + personTemplate.replace('PERSON_ID', ''+str(person['id'])).replace('PERSON_NAME', person['lemma']['forma_standard']) placeListString = "" for place in placeNames: placeListString = placeListString + placeTemplate.replace('PLACE_ID', ''+str(place['id'])).replace('PLACE_NAME', '"'+place['lemma']['forma_standard']+'"') fileListString = "" for ii, f1 in enumerate(fileNames): fileListString = fileListString + fileTemplate.replace('LETTER_FILENAME', f1).replace('LETTER_TEXTPOINT', f1.replace('.xml', '_text')) if ii', persListString).replace('', fileListString) # %% with open('test_main.xml', 'w') as f2: f2.write(mainFileString) # %% with open(baseoutdir + 'main.xml', 'w') as f3: f3.write(mainFileString) # %% # %%