123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119 |
- # %%
- import json
- import csv
- # %%
- with open("power_lemmarioB.json") as file1:
- lems = json.load(file1)
- # %%
- antroponyms = list(filter( lambda lem: lem['lemma']['categoria']=='antr.' , lems))
- # %%
- print(len(antroponyms))
- print(len(lems))
- # %%
- antr2 = list(map( lambda el: el['lemma'], antroponyms ))
- print(len(antr2))
- antr3 = list(filter( lambda el: el['note']=='', antr2 ))
- print(len(antr3))
- # %%
- with open("antroponimi_ovi.csv", 'w') as outfile:
- writer = csv.DictWriter(outfile, fieldnames=antr2[0].keys())
- writer.writeheader()
- writer.writerows(antr2)
- with open("antroponimi_short_ovi.csv", 'w') as outfile:
- writer = csv.DictWriter(outfile, fieldnames=antr3[0].keys())
- writer.writeheader()
- writer.writerows(antr3)
- # %%
- letters = []
- letterIDs = {}
- with open("../BiblioDatini_IDAspo.csv") as file2:
- reader = csv.DictReader(file2)
- for row in reader:
- letters.append(row)
- letterIDs[row['sigla']] = row['id']
- # %%
- print(len(letters))
- print(len(letterIDs))
- # %%
- letterIDsList = list(letterIDs.values())
- # %%
- antroponymsBySigla = {sigla: [] for sigla in letterIDs.keys()}
- for antr in antroponyms:
- sigle = list(map( lambda str1: str1.split("_")[0].upper(), antr['coordinate'] ))
- antr['lemma'].update({'id': antr['id']})
- for sigla in sigle:
- try:
- antroponymsBySigla[sigla].append(antr['lemma'])
- except:
- pass
- # %%
- antroponymsBySigla
- # %%
- datiniRelevantItems = []
- with open("../../data_item.csv") as file3:
- reader = csv.DictReader(file3)
- for row in reader:
- if row['id'] in letterIDsList:
- datiniRelevantItems.append(row)
- # %%
- print(len(letterIDsList))
- print(len(datiniRelevantItems))
- # %%
- def takeRelevantProperties(obj):
- toRet = {}
- toRet['id'] = obj['id']
- for tag in ['persona_destinatario', 'persona_mittente', 'persona_indirizzata', 'persona_mano']:
- if obj[tag]=='':
- toRet[tag] = ''
- else:
- toRet[tag] = json.loads(obj[tag])['authID']
- return toRet
- datiniRelevantStuff = {obj['id']: obj for obj in map( takeRelevantProperties, datiniRelevantItems ) }
- # %%
- datiniRelevantStuff
- # %%
- datiniEAC = {}
- with open("../../DallASPO/data_eac_datini.csv") as file4:
- reader = csv.DictReader(file4)
- for row in reader:
- datiniEAC[row['recordId']] = row
- # %%
- len(datiniEAC)
- # %%
- datiniEAC
- # %%
- prr = datiniRelevantStuff[list(datiniRelevantStuff.keys())[0]]['persona_mittente']
- datiniEAC[prr]
- # %%
- def processDatini(obj):
- toRet = []
- for tag in ['persona_destinatario', 'persona_mittente', 'persona_indirizzata', 'persona_mano']:
- if obj[tag] != '':
- toRet.append(datiniEAC[obj[tag]])
- return toRet
- out = {}
- for sigla in antroponymsBySigla:
- out[sigla] = {}
- out[sigla]['sigla'] = sigla
- out[sigla]['ovi'] = antroponymsBySigla[sigla]
- out[sigla]['aspo'] = processDatini(datiniRelevantStuff[letterIDs[sigla]])
- # %%
- with open("temp.json", 'w') as outfile1:
- json.dump(out, outfile1, indent = 2)
- # %%
- aspos = []
- tmp = [out[sigla]['aspo'] for sigla in out]
- for list in tmp:
- aspos = aspos + list
- # %%
- asposIDs = set(map( lambda el: el['recordId'], aspos ))
- # %%
- len(asposIDs)
- # %%
- len(datiniEAC)
- # %%
- len(antroponyms)
- # %%
|