12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091 |
- # %%
- # Imports
- import xml.etree.ElementTree as ET
- import json
- import os
- basedir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/'
- baseindir = basedir + 'OVI/datiniXML/xmlgat/'
- baseoutdir = basedir + 'OVI/datiniXML/xmlevt/'
- # %%
- # This is to handle the xmnls attribute in the TEI element in the templates
- uri1 = "{http://www.tei-c.org/ns/1.0}"
- namespaces = {
- '': "http://www.tei-c.org/ns/1.0",
- }
- for prefix, uri in namespaces.items():
- ET.register_namespace(prefix, uri)
- # Reference directories
- basedir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/'
- baseindir = basedir + 'OVI/datiniXML/xmlgat/'
- baseoutdir = basedir + 'OVI/datiniXML/xmlevt/'
- # %%
- # Import lems list + xml info file
- # get lem list as a json object
- lemfile = '/Users/federicaspinelli/TEAMOVI/Parser/OVI/Lemmi/associazione lemmi - link TLIO/power_lemmarioD_link.json'
- lems = json.load(open(lemfile, 'r'))
- # %%
- persNames = [lem for lem in lems if lem['lemma']['categoria']=='antr.']
- placeNames = [lem for lem in lems if lem['lemma']['categoria']=='n.g.']
- lemNames = [lem for lem in lems if lem['lemma']['categoria']!='antr.' or lem['lemma']['categoria']!='n.g.']
- fileNames = os.listdir(baseoutdir + "lettere/")
- # %%
- print(len(persNames), len(lems), len(placeNames), len(lemNames))
- print(len(os.listdir(baseindir)), len(os.listdir(baseoutdir + 'lettere/')))
- # %%
- placeTemplate ='''
- <place xml:id="PLACE_ID">
- <settlement>PLACE_NAME</settlement>
- <note>PLACE_NOTE</note>
- </place>'''
- personTemplate ='''
- <person xml:id="PERSON_ID">
- <persName>PERSON_NAME</persName>
- <note>PERSON_NOTE</note>
- </person>'''
- lemTemplate ='''
- <item type="lem" xml:id="LEM_ID">
- <lem iperlemma="LEM_IPER" norm="LEM_NAME" pos="LEM_CAT">LEM_NAME</lem>
- "LEM_REF"
- <note>LEM_NOTE</note>
- </item>'''
- uriTemplate = ''' '''
- fileTemplate = '<xi:include href="lettere/LETTER_FILENAME" xmlns:xi="http://www.w3.org/2001/XInclude" xpointer="LETTER_TEXTPOINT"/>'
- fileIndent = ' '
- # %%
- persNames[10]
- # %%
- persListString = ""
- for person in persNames:
- persListString = persListString + personTemplate.replace('PERSON_ID', ''+str(person['id'])).replace('PERSON_NAME', person['lemma']['forma_standard']).replace('PERSON_NOTE', person['lemma']['note'])
- placeListString = ""
- for place in placeNames:
- placeListString = placeListString + placeTemplate.replace('PLACE_ID', ''+str(place['id'])).replace('PLACE_NAME', place['lemma']['forma_standard']).replace('PLACE_NOTE', place['lemma']['note'])
- lemListString = ""
- for lemma in lemNames:
- lemListString = lemListString + lemTemplate.replace('LEM_ID', ''+str(lemma['id'])).replace('LEM_NAME', lemma['lemma']['forma_standard']).replace('LEM_CAT', lemma['lemma']['categoria']).replace('LEM_NOTE', lemma['lemma']['note']).replace('LEM_IPER', lemma['lemma']['iperlemma']).replace('LEM_REF', str(lemma['lemma']['uri'])).replace('\"[\'', '<a lemmaRef="http://tlio.ovi.cnr.it/voci/').replace("htm', '", 'htm\"></a> <a lemmaRef="http://tlio.ovi.cnr.it/voci/').replace("htm']\"","htm\"></a>")
- fileListString = ""
- for ii, f1 in enumerate(fileNames):
- fileListString = fileListString + fileTemplate.replace('LETTER_FILENAME', f1).replace('LETTER_TEXTPOINT', f1.replace('.xml', '_text'))
- if ii<len(fileNames)-1:
- fileListString = fileListString + '\n' + fileIndent
- # %%
- with open('/Users/federicaspinelli/TEAMOVI/Parser/OVI/EVT/pre_main_template.xml', 'r') as f1:
- mainTemplateString = f1.read()
- # %%
- mainFileString = mainTemplateString.replace('<PERSON_LIST>', persListString).replace('<PLACE_LIST>', placeListString).replace('<LEM_LIST>', lemListString).replace('<FILE_LIST>', fileListString)
- # %%
- with open('test_main.xml', 'w') as f2:
- f2.write(mainFileString)
- # %%
- with open(baseoutdir + 'main.xml', 'w') as f3:
- f3.write(mainFileString)
- # %%
- # %%
|