123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129 |
- # %%
- import csv
- import re
- from collections import OrderedDict
- # %%
- base_import_dir = '/home/kora/Desktop/OVI_Data/Development/Parser/Data/'
- ovidir = 'DallOVI/'
- ovifilename = 'BiblioDatini.csv'
- aspodir = 'DallASPO/'
- aspofilename = 'data_item.csv'
- # %%
- ovidata = []
- with open(base_import_dir + ovidir + ovifilename, newline="") as csv_file:
- reader = csv.DictReader(csv_file)
- for row in reader:
- ovidata.append(row)
- aspodata = []
- with open(base_import_dir + aspodir + aspofilename, newline="") as csv_file:
- reader = csv.DictReader(csv_file)
- for row in reader:
- aspodata.append(row)
- # %%
- aspo_codici = set(map( lambda el: el['segnatura_codice'], aspodata[1:] ))
- # %%
- ovi_prefix = "ASPrato, Archivio Datini, n. "
- ovi_prefix_2 = "ASPrato, Archivio Datini, n."
- ovi_segnature_raw = set(map( lambda el: el['segnatura'].replace(ovi_prefix, '').replace(ovi_prefix_2, ''), ovidata ))
- # %%
- def process_ovi_segn(entry: str):
- split = entry.split(', ')
- if(len(split)!=2):
- return (entry,)
- preComma = split[0]
- postComma = split[1]
- split2 = preComma.split('/')
- if(len(split2)>2):
- return (preComma, postComma)
- return tuple( split2 + [postComma] )
- # %%
- ovi_codici = set(map( lambda el: process_ovi_segn(el)[-1], ovi_segnature_raw ))
- # %%
- real_intersection = list(ovi_codici & aspo_codici)
- real_intersection.sort()
- # %%
- for item in ovidata:
- segnatura_raw = item['segnatura'].replace(ovi_prefix, '').replace(ovi_prefix_2, '')
- processed = process_ovi_segn(segnatura_raw)
- if(len(processed)>1):
- item['segnatura_codice'] = processed[-1]
- else:
- item['segnatura_codice'] = ''
- # %%
- datini_final = {}
- for codice in real_intersection:
- datini_final[codice] = {'codice': codice, 'aspo': [], 'ovi':[]}
- for item in aspodata[1:]:
- if item['segnatura_codice'] in real_intersection:
- datini_final[item['segnatura_codice']]['aspo'].append(item)
- for item in ovidata:
- if item['segnatura_codice'] in real_intersection:
- datini_final[item['segnatura_codice']]['ovi'].append(item)
- # %%
- def mapdict(some: dict):
- toRet = ''
- for key in some.keys():
- if(some[key] and some[key]!='' and not str(some[key]).isspace()):
- toRet = toRet + str(key) + ': ' + str(some[key]) + '\n'
- return toRet
- # %%
- ## EXPORT SECTION ##
- with open(base_import_dir + 'final_out_2.txt', 'w') as outfile1:
- for this_codice in real_intersection:
- this_str = 'CODICE: ' + datini_final[this_codice]['codice'] + '\n\n' + 'ASPO:\n' + '---\n' + '\ne/o\n\n'.join( list(map(mapdict, datini_final[this_codice]['aspo'])) ) + '\n\n' + 'OVI:\n' + '---\n' + '\ne/o\n\n'.join( list(map(mapdict, datini_final[this_codice]['ovi'])) ) + '##################\n\n\n'
- outfile1.write(this_str)
- # %%
- kk = 0
- with open(base_import_dir + 'record_marci_2.txt', 'w') as outfile1:
- outfile1.write('CON SEGNATURA STRANA O NON RISCONTRATA\n\n\n')
- for item in ovidata:
- if item['segnatura_codice'] not in real_intersection and not item['segnatura'].isspace():
- kk = kk+1
- outfile1.write(str(kk)+':\n')
- outfile1.write(mapdict(item))
- outfile1.write('#################\n\n')
- outfile1.write('\n\nSENZA SEGNATURA\n\n\n')
- for item in ovidata:
- if item['segnatura_codice'] not in real_intersection and item['segnatura'].isspace():
- kk = kk+1
- outfile1.write(str(kk)+':\n')
- outfile1.write(mapdict(item))
- outfile1.write('#################\n\n')
- # %%
- newOviFinal = []
- for ahia in datini_final.values():
- paglia = ahia['ovi']
- aspo1 = ahia['aspo'][0]
- for row in paglia:
- myKeys = list(row.keys())
- mySegn = [myKeys.index('segnatura'), myKeys.index('segnatura_codice'), myKeys.index('sigla')]
- theRest = list(filter(lambda appa: appa not in mySegn, range(len(myKeys))))
- myOrder = mySegn + theRest
- myNewKeys = list(myKeys[kk] for kk in myOrder)
- out1 = OrderedDict()
- out1['segnatura_aspo'] = aspo1['segnatura_busta'] + '.' + aspo1['segnatura_inserto'] + ', ' + aspo1['segnatura_codice']
- out1.update(OrderedDict((k, row[k]) for k in myNewKeys))
- newOviFinal.append(out1)
- with open(base_import_dir + 'FULL_MERGED.csv', 'w') as outfile1:
- writer = csv.DictWriter(outfile1, fieldnames=newOviFinal[0].keys())
- writer.writeheader()
- writer.writerows(newOviFinal)
- # %%
- datini_aspoovi = list(filter( lambda el: el['segnatura_codice'] in real_intersection, aspodata ))
- # %%
- with open(base_import_dir + 'datini_ASPOOVI.csv', 'w') as outfile1:
- writer = csv.DictWriter(outfile1, fieldnames=datini_aspoovi[0].keys())
- writer.writeheader()
- for row in aspodata:
- if row['segnatura_codice'] in real_intersection:
- writer.writerow(row)
- # %%
|