prepare_main_evt_file.py 4.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. # %%
  2. # Imports
  3. import xml.etree.ElementTree as ET
  4. import json
  5. import os
  6. basedir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/'
  7. baseindir = basedir + 'OVI/datiniXML/xmlgat/'
  8. baseoutdir = basedir + 'OVI/datiniXML/xmlevt/'
  9. # %%
  10. # This is to handle the xmnls attribute in the TEI element in the templates
  11. uri1 = "{http://www.tei-c.org/ns/1.0}"
  12. namespaces = {
  13. '': "http://www.tei-c.org/ns/1.0",
  14. }
  15. for prefix, uri in namespaces.items():
  16. ET.register_namespace(prefix, uri)
  17. # Reference directories
  18. basedir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/'
  19. baseindir = basedir + 'OVI/datiniXML/xmlgat/'
  20. baseoutdir = basedir + 'OVI/datiniXML/xmlevt/'
  21. # %%
  22. # Import lems list + xml info file
  23. # get lem list as a json object
  24. lemfile = '/Users/federicaspinelli/TEAMOVI/Parser/OVI/Lemmi/associazione lemmi - link TLIO/power_lemmarioD_link.json'
  25. lems = json.load(open(lemfile, 'r'))
  26. # %%
  27. persNames = [lem for lem in lems if lem['lemma']['categoria']=='antr.']
  28. placeNames = [lem for lem in lems if lem['lemma']['categoria']=='n.g.']
  29. lemNames = [lem for lem in lems if lem['lemma']['categoria']!='antr.' or lem['lemma']['categoria']!='n.g.']
  30. fileNames = os.listdir(baseoutdir + "lettere/")
  31. # %%
  32. print(len(persNames), len(lems), len(placeNames), len(lemNames))
  33. print(len(os.listdir(baseindir)), len(os.listdir(baseoutdir + 'lettere/')))
  34. # %%
  35. placeTemplate ='''
  36. <place xml:id="PLACE_ID">
  37. <settlement>PLACE_NAME</settlement>
  38. <note>PLACE_NOTE</note>
  39. </place>'''
  40. personTemplate ='''
  41. <person xml:id="PERSON_ID">
  42. <persName>PERSON_NAME</persName>
  43. <note>PERSON_NOTE</note>
  44. </person>'''
  45. lemTemplate ='''
  46. <item type="lem" xml:id="LEM_ID">
  47. <lem iperlemma="LEM_IPER" norm="LEM_NAME" pos="LEM_CAT">LEM_NAME</lem>
  48. "LEM_REF"
  49. <note>LEM_NOTE</note>
  50. </item>'''
  51. uriTemplate = ''' '''
  52. fileTemplate = '<xi:include href="lettere/LETTER_FILENAME" xmlns:xi="http://www.w3.org/2001/XInclude" xpointer="LETTER_TEXTPOINT"/>'
  53. fileIndent = ' '
  54. # %%
  55. persNames[10]
  56. # %%
  57. persListString = ""
  58. for person in persNames:
  59. persListString = persListString + personTemplate.replace('PERSON_ID', ''+str(person['id'])).replace('PERSON_NAME', person['lemma']['forma_standard']).replace('PERSON_NOTE', person['lemma']['note'])
  60. placeListString = ""
  61. for place in placeNames:
  62. placeListString = placeListString + placeTemplate.replace('PLACE_ID', ''+str(place['id'])).replace('PLACE_NAME', place['lemma']['forma_standard']).replace('PLACE_NOTE', place['lemma']['note'])
  63. lemListString = ""
  64. for lemma in lemNames:
  65. lemListString = lemListString + lemTemplate.replace('LEM_ID', ''+str(lemma['id'])).replace('LEM_NAME', lemma['lemma']['forma_standard']).replace('LEM_CAT', lemma['lemma']['categoria']).replace('LEM_NOTE', lemma['lemma']['note']).replace('LEM_IPER', lemma['lemma']['iperlemma']).replace('LEM_REF', str(lemma['lemma']['uri'])).replace('\"[\'', '<a lemmaRef="http://tlio.ovi.cnr.it/voci/').replace("htm', '", 'htm\"></a> <a lemmaRef="http://tlio.ovi.cnr.it/voci/').replace("htm']\"","htm\"></a>")
  66. fileListString = ""
  67. for ii, f1 in enumerate(fileNames):
  68. fileListString = fileListString + fileTemplate.replace('LETTER_FILENAME', f1).replace('LETTER_TEXTPOINT', f1.replace('.xml', '_text'))
  69. if ii<len(fileNames)-1:
  70. fileListString = fileListString + '\n' + fileIndent
  71. # %%
  72. with open('/Users/federicaspinelli/TEAMOVI/Parser/OVI/EVT/pre_main_template.xml', 'r') as f1:
  73. mainTemplateString = f1.read()
  74. # %%
  75. mainFileString = mainTemplateString.replace('<PERSON_LIST>', persListString).replace('<PLACE_LIST>', placeListString).replace('<LEM_LIST>', lemListString).replace('<FILE_LIST>', fileListString)
  76. # %%
  77. with open('test_main.xml', 'w') as f2:
  78. f2.write(mainFileString)
  79. # %%
  80. with open(baseoutdir + 'main.xml', 'w') as f3:
  81. f3.write(mainFileString)
  82. # %%
  83. # %%