Merger.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. # %%
  2. import csv
  3. import re
  4. from collections import OrderedDict
  5. # %%
  6. base_import_dir = '/home/kora/Desktop/OVI_Data/Development/Parser/Data/'
  7. ovidir = 'DallOVI/'
  8. ovifilename = 'BiblioDatini.csv'
  9. aspodir = 'DallASPO/'
  10. aspofilename = 'data_item.csv'
  11. # %%
  12. ovidata = []
  13. with open(base_import_dir + ovidir + ovifilename, newline="") as csv_file:
  14. reader = csv.DictReader(csv_file)
  15. for row in reader:
  16. ovidata.append(row)
  17. aspodata = []
  18. with open(base_import_dir + aspodir + aspofilename, newline="") as csv_file:
  19. reader = csv.DictReader(csv_file)
  20. for row in reader:
  21. aspodata.append(row)
  22. # %%
  23. aspo_codici = set(map( lambda el: el['segnatura_codice'], aspodata[1:] ))
  24. # %%
  25. ovi_prefix = "ASPrato, Archivio Datini, n. "
  26. ovi_prefix_2 = "ASPrato, Archivio Datini, n."
  27. ovi_segnature_raw = set(map( lambda el: el['segnatura'].replace(ovi_prefix, '').replace(ovi_prefix_2, ''), ovidata ))
  28. # %%
  29. def process_ovi_segn(entry: str):
  30. split = entry.split(', ')
  31. if(len(split)!=2):
  32. return (entry,)
  33. preComma = split[0]
  34. postComma = split[1]
  35. split2 = preComma.split('/')
  36. if(len(split2)>2):
  37. return (preComma, postComma)
  38. return tuple( split2 + [postComma] )
  39. # %%
  40. ovi_codici = set(map( lambda el: process_ovi_segn(el)[-1], ovi_segnature_raw ))
  41. # %%
  42. real_intersection = list(ovi_codici & aspo_codici)
  43. real_intersection.sort()
  44. # %%
  45. for item in ovidata:
  46. segnatura_raw = item['segnatura'].replace(ovi_prefix, '').replace(ovi_prefix_2, '')
  47. processed = process_ovi_segn(segnatura_raw)
  48. if(len(processed)>1):
  49. item['segnatura_codice'] = processed[-1]
  50. else:
  51. item['segnatura_codice'] = ''
  52. # %%
  53. datini_final = {}
  54. for codice in real_intersection:
  55. datini_final[codice] = {'codice': codice, 'aspo': [], 'ovi':[]}
  56. for item in aspodata[1:]:
  57. if item['segnatura_codice'] in real_intersection:
  58. datini_final[item['segnatura_codice']]['aspo'].append(item)
  59. for item in ovidata:
  60. if item['segnatura_codice'] in real_intersection:
  61. datini_final[item['segnatura_codice']]['ovi'].append(item)
  62. # %%
  63. def mapdict(some: dict):
  64. toRet = ''
  65. for key in some.keys():
  66. if(some[key] and some[key]!='' and not str(some[key]).isspace()):
  67. toRet = toRet + str(key) + ': ' + str(some[key]) + '\n'
  68. return toRet
  69. # %%
  70. ## EXPORT SECTION ##
  71. with open(base_import_dir + 'final_out_2.txt', 'w') as outfile1:
  72. for this_codice in real_intersection:
  73. this_str = 'CODICE: ' + datini_final[this_codice]['codice'] + '\n\n' + 'ASPO:\n' + '---\n' + '\ne/o\n\n'.join( list(map(mapdict, datini_final[this_codice]['aspo'])) ) + '\n\n' + 'OVI:\n' + '---\n' + '\ne/o\n\n'.join( list(map(mapdict, datini_final[this_codice]['ovi'])) ) + '##################\n\n\n'
  74. outfile1.write(this_str)
  75. # %%
  76. kk = 0
  77. with open(base_import_dir + 'record_marci_2.txt', 'w') as outfile1:
  78. outfile1.write('CON SEGNATURA STRANA O NON RISCONTRATA\n\n\n')
  79. for item in ovidata:
  80. if item['segnatura_codice'] not in real_intersection and not item['segnatura'].isspace():
  81. kk = kk+1
  82. outfile1.write(str(kk)+':\n')
  83. outfile1.write(mapdict(item))
  84. outfile1.write('#################\n\n')
  85. outfile1.write('\n\nSENZA SEGNATURA\n\n\n')
  86. for item in ovidata:
  87. if item['segnatura_codice'] not in real_intersection and item['segnatura'].isspace():
  88. kk = kk+1
  89. outfile1.write(str(kk)+':\n')
  90. outfile1.write(mapdict(item))
  91. outfile1.write('#################\n\n')
  92. # %%
  93. newOviFinal = []
  94. for ahia in datini_final.values():
  95. paglia = ahia['ovi']
  96. aspo1 = ahia['aspo'][0]
  97. for row in paglia:
  98. myKeys = list(row.keys())
  99. mySegn = [myKeys.index('segnatura'), myKeys.index('segnatura_codice'), myKeys.index('sigla')]
  100. theRest = list(filter(lambda appa: appa not in mySegn, range(len(myKeys))))
  101. myOrder = mySegn + theRest
  102. myNewKeys = list(myKeys[kk] for kk in myOrder)
  103. out1 = OrderedDict()
  104. out1['segnatura_aspo'] = aspo1['segnatura_busta'] + '.' + aspo1['segnatura_inserto'] + ', ' + aspo1['segnatura_codice']
  105. out1.update(OrderedDict((k, row[k]) for k in myNewKeys))
  106. newOviFinal.append(out1)
  107. with open(base_import_dir + 'FULL_MERGED.csv', 'w') as outfile1:
  108. writer = csv.DictWriter(outfile1, fieldnames=newOviFinal[0].keys())
  109. writer.writeheader()
  110. writer.writerows(newOviFinal)
  111. # %%
  112. datini_aspoovi = list(filter( lambda el: el['segnatura_codice'] in real_intersection, aspodata ))
  113. # %%
  114. with open(base_import_dir + 'datini_ASPOOVI.csv', 'w') as outfile1:
  115. writer = csv.DictWriter(outfile1, fieldnames=datini_aspoovi[0].keys())
  116. writer.writeheader()
  117. for row in aspodata:
  118. if row['segnatura_codice'] in real_intersection:
  119. writer.writerow(row)
  120. # %%