12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879 |
- # %%
- # Imports
- import xml.etree.ElementTree as ET
- import json
- import os
- basedir = '../../Data/'
- baseindir = basedir + 'DallOVI/datiniXML/xmlgat/'
- baseoutdir = basedir + 'DallOVI/datiniXML/xmlevt/'
- # %%
- # This is to handle the xmnls attribute in the TEI element in the templates
- uri1 = "{http://www.tei-c.org/ns/1.0}"
- namespaces = {
- '': "http://www.tei-c.org/ns/1.0",
- }
- for prefix, uri in namespaces.items():
- ET.register_namespace(prefix, uri)
- # Reference directories
- basedir = '../../Data/'
- baseindir = basedir + 'DallOVI/datiniXML/xmlgat/'
- baseoutdir = basedir + 'DallOVI/datiniXML/xmlevt/'
- # %%
- # Import lems list + xml info file
- # get lem list as a json object
- lemfile = basedir + 'DallOVI/datiniXML/power_lemmarioB.json'
- lems = json.load(open(lemfile, 'r'))
- # %%
- persNames = [lem for lem in lems if lem['lemma']['categoria']=='antr.']
- placeNames = [lem for lem in lems if lem['lemma']['categoria']=='n.g.']
- fileNames = os.listdir(baseoutdir + "lettere/")
- # %%
- print(len(persNames), len(lems), len(placeNames))
- print(len(os.listdir(baseindir)), len(os.listdir(baseoutdir + 'lettere/')))
- # %%
- placeTemplate ='''
- <place xml:id="PLACE_ID">
- <settlement type="">PLACE_NAME</settlement>
- <note></note>
- </place>'''
- personTemplate ='''
- <person xml:id="PERSON_ID">
- <persName>PERSON_NAME</persName>
- <sex></sex>
- </person>'''
- fileTemplate = '<xi:include href="lettere/LETTER_FILENAME" xmlns:xi="http://www.w3.org/2001/XInclude" xpointer="LETTER_TEXTPOINT"/>'
- fileIndent = ' '
- # %%
- persNames[10]
- # %%
- persListString = ""
- for person in persNames:
- persListString = persListString + personTemplate.replace('PERSON_ID', '#'+str(person['id'])).replace('PERSON_NAME', person['lemma']['forma_standard'])
- placeListString = ""
- for place in placeNames:
- placeListString = placeListString + placeTemplate.replace('PLACE_ID', '#'+str(place['id'])).replace('PLACE_NAME', '"'+place['lemma']['forma_standard']+'"')
- fileListString = ""
- for ii, f1 in enumerate(fileNames):
- fileListString = fileListString + fileTemplate.replace('LETTER_FILENAME', f1).replace('LETTER_TEXTPOINT', f1.replace('.xml', '_text'))
- if ii<len(fileNames)-1:
- fileListString = fileListString + '\n' + fileIndent
- # %%
- with open('pre_main_template.xml', 'r') as f1:
- mainTemplateString = f1.read()
- # %%
- mainFileString = mainTemplateString.replace('<PERSON_LIST>', persListString).replace('<PLACE_LIST', placeListString).replace('<FILE_LIST>', fileListString)
- # %%
- with open('test_main.xml', 'w') as f2:
- f2.write(mainFileString)
- # %%
- with open(baseoutdir + 'main.xml', 'w') as f3:
- f3.write(mainFileString)
- # %%
- # %%
|