prepare_main_evt_file.py 2.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. # %%
  2. # Imports
  3. import xml.etree.ElementTree as ET
  4. import json
  5. import os
  6. basedir = '../../DATA/'
  7. baseindir = basedir + 'OVI/datiniXML/xmlgat/'
  8. baseoutdir = basedir + 'OVI/datiniXML/xmlevt/'
  9. # %%
  10. # This is to handle the xmnls attribute in the TEI element in the templates
  11. uri1 = "{http://www.tei-c.org/ns/1.0}"
  12. namespaces = {
  13. '': "http://www.tei-c.org/ns/1.0",
  14. }
  15. for prefix, uri in namespaces.items():
  16. ET.register_namespace(prefix, uri)
  17. # Reference directories
  18. basedir = '../../DATA/'
  19. baseindir = basedir + 'OVI/datiniXML/xmlgat/'
  20. baseoutdir = basedir + 'OVI/datiniXML/xmlevt/'
  21. # %%
  22. # Import lems list + xml info file
  23. # get lem list as a json object
  24. lemfile = basedir + 'OVI/datiniXML/power_lemmarioB.json'
  25. lems = json.load(open(lemfile, 'r'))
  26. # %%
  27. persNames = [lem for lem in lems if lem['lemma']['categoria']=='antr.']
  28. placeNames = [lem for lem in lems if lem['lemma']['categoria']=='n.g.']
  29. fileNames = os.listdir(baseoutdir + "lettere/")
  30. # %%
  31. print(len(persNames), len(lems), len(placeNames))
  32. print(len(os.listdir(baseindir)), len(os.listdir(baseoutdir + 'lettere/')))
  33. # %%
  34. placeTemplate ='''
  35. <place xml:id="PLACE_ID">
  36. <settlement type="">PLACE_NAME</settlement>
  37. <note></note>
  38. </place>'''
  39. personTemplate ='''
  40. <person xml:id="PERSON_ID">
  41. <persName>PERSON_NAME</persName>
  42. <sex></sex>
  43. </person>'''
  44. fileTemplate = '<xi:include href="lettere/LETTER_FILENAME" xmlns:xi="http://www.w3.org/2001/XInclude" xpointer="LETTER_TEXTPOINT"/>'
  45. fileIndent = ' '
  46. # %%
  47. persNames[10]
  48. # %%
  49. persListString = ""
  50. for person in persNames:
  51. persListString = persListString + personTemplate.replace('PERSON_ID', ''+str(person['id'])).replace('PERSON_NAME', person['lemma']['forma_standard'])
  52. placeListString = ""
  53. for place in placeNames:
  54. placeListString = placeListString + placeTemplate.replace('PLACE_ID', ''+str(place['id'])).replace('PLACE_NAME', '"'+place['lemma']['forma_standard']+'"')
  55. fileListString = ""
  56. for ii, f1 in enumerate(fileNames):
  57. fileListString = fileListString + fileTemplate.replace('LETTER_FILENAME', f1).replace('LETTER_TEXTPOINT', f1.replace('.xml', '_text'))
  58. if ii<len(fileNames)-1:
  59. fileListString = fileListString + '\n' + fileIndent
  60. # %%
  61. with open('pre_main_template.xml', 'r') as f1:
  62. mainTemplateString = f1.read()
  63. # %%
  64. mainFileString = mainTemplateString.replace('<PERSON_LIST>', persListString).replace('<PLACE_LIST', placeListString).replace('<FILE_LIST>', fileListString)
  65. # %%
  66. with open('test_main.xml', 'w') as f2:
  67. f2.write(mainFileString)
  68. # %%
  69. with open(baseoutdir + 'main.xml', 'w') as f3:
  70. f3.write(mainFileString)
  71. # %%
  72. # %%