names.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. # %%
  2. import json
  3. import csv
  4. # %%
  5. with open("power_lemmarioB.json") as file1:
  6. lems = json.load(file1)
  7. # %%
  8. antroponyms = list(filter( lambda lem: lem['lemma']['categoria']=='antr.' , lems))
  9. # %%
  10. print(len(antroponyms))
  11. print(len(lems))
  12. # %%
  13. antr2 = list(map( lambda el: el['lemma'], antroponyms ))
  14. print(len(antr2))
  15. antr3 = list(filter( lambda el: el['note']=='', antr2 ))
  16. print(len(antr3))
  17. # %%
  18. with open("antroponimi_ovi.csv", 'w') as outfile:
  19. writer = csv.DictWriter(outfile, fieldnames=antr2[0].keys())
  20. writer.writeheader()
  21. writer.writerows(antr2)
  22. with open("antroponimi_short_ovi.csv", 'w') as outfile:
  23. writer = csv.DictWriter(outfile, fieldnames=antr3[0].keys())
  24. writer.writeheader()
  25. writer.writerows(antr3)
  26. # %%
  27. letters = []
  28. letterIDs = {}
  29. with open("../BiblioDatini_IDAspo.csv") as file2:
  30. reader = csv.DictReader(file2)
  31. for row in reader:
  32. letters.append(row)
  33. letterIDs[row['sigla']] = row['id']
  34. # %%
  35. print(len(letters))
  36. print(len(letterIDs))
  37. # %%
  38. letterIDsList = list(letterIDs.values())
  39. # %%
  40. antroponymsBySigla = {sigla: [] for sigla in letterIDs.keys()}
  41. for antr in antroponyms:
  42. sigle = list(map( lambda str1: str1.split("_")[0].upper(), antr['coordinate'] ))
  43. antr['lemma'].update({'id': antr['id']})
  44. for sigla in sigle:
  45. try:
  46. antroponymsBySigla[sigla].append(antr['lemma'])
  47. except:
  48. pass
  49. # %%
  50. antroponymsBySigla
  51. # %%
  52. datiniRelevantItems = []
  53. with open("../../data_item.csv") as file3:
  54. reader = csv.DictReader(file3)
  55. for row in reader:
  56. if row['id'] in letterIDsList:
  57. datiniRelevantItems.append(row)
  58. # %%
  59. print(len(letterIDsList))
  60. print(len(datiniRelevantItems))
  61. # %%
  62. def takeRelevantProperties(obj):
  63. toRet = {}
  64. toRet['id'] = obj['id']
  65. for tag in ['persona_destinatario', 'persona_mittente', 'persona_indirizzata', 'persona_mano']:
  66. if obj[tag]=='':
  67. toRet[tag] = ''
  68. else:
  69. toRet[tag] = json.loads(obj[tag])['authID']
  70. return toRet
  71. datiniRelevantStuff = {obj['id']: obj for obj in map( takeRelevantProperties, datiniRelevantItems ) }
  72. # %%
  73. datiniRelevantStuff
  74. # %%
  75. datiniEAC = {}
  76. with open("../../DallASPO/data_eac_datini.csv") as file4:
  77. reader = csv.DictReader(file4)
  78. for row in reader:
  79. datiniEAC[row['recordId']] = row
  80. # %%
  81. len(datiniEAC)
  82. # %%
  83. datiniEAC
  84. # %%
  85. prr = datiniRelevantStuff[list(datiniRelevantStuff.keys())[0]]['persona_mittente']
  86. datiniEAC[prr]
  87. # %%
  88. def processDatini(obj):
  89. toRet = []
  90. for tag in ['persona_destinatario', 'persona_mittente', 'persona_indirizzata', 'persona_mano']:
  91. if obj[tag] != '':
  92. toRet.append(datiniEAC[obj[tag]])
  93. return toRet
  94. out = {}
  95. for sigla in antroponymsBySigla:
  96. out[sigla] = {}
  97. out[sigla]['sigla'] = sigla
  98. out[sigla]['ovi'] = antroponymsBySigla[sigla]
  99. out[sigla]['aspo'] = processDatini(datiniRelevantStuff[letterIDs[sigla]])
  100. # %%
  101. with open("temp.json", 'w') as outfile1:
  102. json.dump(out, outfile1, indent = 2)
  103. # %%
  104. aspos = []
  105. tmp = [out[sigla]['aspo'] for sigla in out]
  106. for list in tmp:
  107. aspos = aspos + list
  108. # %%
  109. asposIDs = set(map( lambda el: el['recordId'], aspos ))
  110. # %%
  111. len(asposIDs)
  112. # %%
  113. len(datiniEAC)
  114. # %%
  115. len(antroponyms)
  116. # %%