# %% import csv import re from collections import OrderedDict # %% base_import_dir = '/home/kora/Desktop/OVI_Data/Development/Parser/Data/' ovidir = 'DallOVI/' ovifilename = 'BiblioDatini.csv' aspodir = 'DallASPO/' aspofilename = 'data_item.csv' # %% ovidata = [] with open(base_import_dir + ovidir + ovifilename, newline="") as csv_file: reader = csv.DictReader(csv_file) for row in reader: ovidata.append(row) aspodata = [] with open(base_import_dir + aspodir + aspofilename, newline="") as csv_file: reader = csv.DictReader(csv_file) for row in reader: aspodata.append(row) # %% aspo_codici = set(map( lambda el: el['segnatura_codice'], aspodata[1:] )) # %% ovi_prefix = "ASPrato, Archivio Datini, n. " ovi_prefix_2 = "ASPrato, Archivio Datini, n." ovi_segnature_raw = set(map( lambda el: el['segnatura'].replace(ovi_prefix, '').replace(ovi_prefix_2, ''), ovidata )) # %% def process_ovi_segn(entry: str): split = entry.split(', ') if(len(split)!=2): return (entry,) preComma = split[0] postComma = split[1] split2 = preComma.split('/') if(len(split2)>2): return (preComma, postComma) return tuple( split2 + [postComma] ) # %% ovi_codici = set(map( lambda el: process_ovi_segn(el)[-1], ovi_segnature_raw )) # %% real_intersection = list(ovi_codici & aspo_codici) real_intersection.sort() # %% for item in ovidata: segnatura_raw = item['segnatura'].replace(ovi_prefix, '').replace(ovi_prefix_2, '') processed = process_ovi_segn(segnatura_raw) if(len(processed)>1): item['segnatura_codice'] = processed[-1] else: item['segnatura_codice'] = '' # %% datini_final = {} for codice in real_intersection: datini_final[codice] = {'codice': codice, 'aspo': [], 'ovi':[]} for item in aspodata[1:]: if item['segnatura_codice'] in real_intersection: datini_final[item['segnatura_codice']]['aspo'].append(item) for item in ovidata: if item['segnatura_codice'] in real_intersection: datini_final[item['segnatura_codice']]['ovi'].append(item) # %% def mapdict(some: dict): toRet = '' for key in some.keys(): if(some[key] and some[key]!='' and not str(some[key]).isspace()): toRet = toRet + str(key) + ': ' + str(some[key]) + '\n' return toRet # %% ## EXPORT SECTION ## with open(base_import_dir + 'final_out_2.txt', 'w') as outfile1: for this_codice in real_intersection: this_str = 'CODICE: ' + datini_final[this_codice]['codice'] + '\n\n' + 'ASPO:\n' + '---\n' + '\ne/o\n\n'.join( list(map(mapdict, datini_final[this_codice]['aspo'])) ) + '\n\n' + 'OVI:\n' + '---\n' + '\ne/o\n\n'.join( list(map(mapdict, datini_final[this_codice]['ovi'])) ) + '##################\n\n\n' outfile1.write(this_str) # %% kk = 0 with open(base_import_dir + 'record_marci_2.txt', 'w') as outfile1: outfile1.write('CON SEGNATURA STRANA O NON RISCONTRATA\n\n\n') for item in ovidata: if item['segnatura_codice'] not in real_intersection and not item['segnatura'].isspace(): kk = kk+1 outfile1.write(str(kk)+':\n') outfile1.write(mapdict(item)) outfile1.write('#################\n\n') outfile1.write('\n\nSENZA SEGNATURA\n\n\n') for item in ovidata: if item['segnatura_codice'] not in real_intersection and item['segnatura'].isspace(): kk = kk+1 outfile1.write(str(kk)+':\n') outfile1.write(mapdict(item)) outfile1.write('#################\n\n') # %% newOviFinal = [] for ahia in datini_final.values(): paglia = ahia['ovi'] aspo1 = ahia['aspo'][0] for row in paglia: myKeys = list(row.keys()) mySegn = [myKeys.index('segnatura'), myKeys.index('segnatura_codice'), myKeys.index('sigla')] theRest = list(filter(lambda appa: appa not in mySegn, range(len(myKeys)))) myOrder = mySegn + theRest myNewKeys = list(myKeys[kk] for kk in myOrder) out1 = OrderedDict() out1['segnatura_aspo'] = aspo1['segnatura_busta'] + '.' + aspo1['segnatura_inserto'] + ', ' + aspo1['segnatura_codice'] out1.update(OrderedDict((k, row[k]) for k in myNewKeys)) newOviFinal.append(out1) with open(base_import_dir + 'FULL_MERGED.csv', 'w') as outfile1: writer = csv.DictWriter(outfile1, fieldnames=newOviFinal[0].keys()) writer.writeheader() writer.writerows(newOviFinal) # %% datini_aspoovi = list(filter( lambda el: el['segnatura_codice'] in real_intersection, aspodata )) # %% with open(base_import_dir + 'datini_ASPOOVI.csv', 'w') as outfile1: writer = csv.DictWriter(outfile1, fieldnames=datini_aspoovi[0].keys()) writer.writeheader() for row in aspodata: if row['segnatura_codice'] in real_intersection: writer.writerow(row) # %%