# %% import json import csv # %% with open("power_lemmarioB.json") as file1: lems = json.load(file1) # %% antroponyms = list(filter( lambda lem: lem['lemma']['categoria']=='antr.' , lems)) # %% print(len(antroponyms)) print(len(lems)) # %% antr2 = list(map( lambda el: el['lemma'], antroponyms )) print(len(antr2)) antr3 = list(filter( lambda el: el['note']=='', antr2 )) print(len(antr3)) # %% with open("antroponimi_ovi.csv", 'w') as outfile: writer = csv.DictWriter(outfile, fieldnames=antr2[0].keys()) writer.writeheader() writer.writerows(antr2) with open("antroponimi_short_ovi.csv", 'w') as outfile: writer = csv.DictWriter(outfile, fieldnames=antr3[0].keys()) writer.writeheader() writer.writerows(antr3) # %% letters = [] letterIDs = {} with open("../BiblioDatini_IDAspo.csv") as file2: reader = csv.DictReader(file2) for row in reader: letters.append(row) letterIDs[row['sigla']] = row['id'] # %% print(len(letters)) print(len(letterIDs)) # %% letterIDsList = list(letterIDs.values()) # %% antroponymsBySigla = {sigla: [] for sigla in letterIDs.keys()} for antr in antroponyms: sigle = list(map( lambda str1: str1.split("_")[0].upper(), antr['coordinate'] )) antr['lemma'].update({'id': antr['id']}) for sigla in sigle: try: antroponymsBySigla[sigla].append(antr['lemma']) except: pass # %% antroponymsBySigla # %% datiniRelevantItems = [] with open("../../data_item.csv") as file3: reader = csv.DictReader(file3) for row in reader: if row['id'] in letterIDsList: datiniRelevantItems.append(row) # %% print(len(letterIDsList)) print(len(datiniRelevantItems)) # %% def takeRelevantProperties(obj): toRet = {} toRet['id'] = obj['id'] for tag in ['persona_destinatario', 'persona_mittente', 'persona_indirizzata', 'persona_mano']: if obj[tag]=='': toRet[tag] = '' else: toRet[tag] = json.loads(obj[tag])['authID'] return toRet datiniRelevantStuff = {obj['id']: obj for obj in map( takeRelevantProperties, datiniRelevantItems ) } # %% datiniRelevantStuff # %% datiniEAC = {} with open("../../DallASPO/data_eac_datini.csv") as file4: reader = csv.DictReader(file4) for row in reader: datiniEAC[row['recordId']] = row # %% len(datiniEAC) # %% datiniEAC # %% prr = datiniRelevantStuff[list(datiniRelevantStuff.keys())[0]]['persona_mittente'] datiniEAC[prr] # %% def processDatini(obj): toRet = [] for tag in ['persona_destinatario', 'persona_mittente', 'persona_indirizzata', 'persona_mano']: if obj[tag] != '': toRet.append(datiniEAC[obj[tag]]) return toRet out = {} for sigla in antroponymsBySigla: out[sigla] = {} out[sigla]['sigla'] = sigla out[sigla]['ovi'] = antroponymsBySigla[sigla] out[sigla]['aspo'] = processDatini(datiniRelevantStuff[letterIDs[sigla]]) # %% with open("temp.json", 'w') as outfile1: json.dump(out, outfile1, indent = 2) # %% aspos = [] tmp = [out[sigla]['aspo'] for sigla in out] for list in tmp: aspos = aspos + list # %% asposIDs = set(map( lambda el: el['recordId'], aspos )) # %% len(asposIDs) # %% len(datiniEAC) # %% len(antroponyms) # %%