interop_0.py 1.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. # %%
  2. import csv
  3. import json
  4. # %%
  5. # Import lems list + info files + authority files
  6. basedir = '/home/kora/Desktop/OVI_Data/Development/Parser/Data/'
  7. ovidir = 'DallOVI/'
  8. aspodir = 'DallASPO/'
  9. # datini OVI-ASPO lems, from OVI
  10. lems = json.load(open(basedir + ovidir + 'datiniXML/power_lemmarioB.json', 'r'))
  11. # datini people EAC, from ASPO
  12. with open(basedir + aspodir + 'data_eac_datini.csv') as infile:
  13. reader = csv.DictReader(infile)
  14. data_eac = [row for row in reader]
  15. # datini OVI-ASPO data, OVI side
  16. with open(basedir + 'FULL_MERGED.csv') as infile:
  17. reader = csv.DictReader(infile)
  18. datini_oviaspo_1 = [row for row in reader]
  19. # datini OVI-ASPO data, ASPO side
  20. with open(basedir + 'datini_ASPOOVI.csv') as infile:
  21. reader = csv.DictReader(infile)
  22. datini_oviaspo_2 = [row for row in reader]
  23. # %%
  24. iccio = list(filter(lambda el: el['lemma']['categoria']=='antr.' and el['lemma']['note'] != '' and el['lemma']['note'] != 'nome di famiglia', lems))
  25. # %%
  26. len(iccio)
  27. # %%
  28. datini_oviaspo_1[0].keys()
  29. # %%
  30. datini_oviaspo_2[0].keys()
  31. # %%
  32. data_eac[0].keys()
  33. # %%
  34. puppa = [ dict([(k, robba[k]) for k in ['mittente', 'destinatario']]) for robba in datini_oviaspo_1]
  35. # %%
  36. puppa
  37. # %%
  38. #######
  39. # TRY TO EXTRACT ALL PEOPLE-RELATED INFO FROM ALL FILES
  40. # FOR A GIVEN RANDOM LETTER
  41. print(len(iccio))
  42. for lem in iccio:
  43. print(lem['lemma']['note'])
  44. # %%
  45. aglio = list(filter(lambda el: el['lemma']['note']=='v. commento', iccio))
  46. print(aglio)
  47. # %%