contexts.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. import json
  2. import pandas as pd
  3. from .basic_queries import basicQueries
  4. from .utilities.format import formatAllContexts, formatContext
  5. # Executes query sequences to recover single and multiple contexts
  6. # Returns Pandas dataframes
  7. class contexts(basicQueries):
  8. def __init__(self, dataConfig):
  9. super().__init__(dataConfig)
  10. #%% funzione contesti multipli cumulativa
  11. # Potrebbe essere unita alle cooccorrenze?
  12. def contestimultipli (self, tipo_ricerca, ricerca, index = None):
  13. ricercadf = pd.DataFrame(ricerca)
  14. textlist = self.findtexts(tipo_ricerca, ricercadf, index)
  15. contexts = self.findcontexts (textlist)
  16. bibliocontexts = self.findbib (contexts)
  17. highlights = formatAllContexts(bibliocontexts)
  18. return highlights.to_dict(orient='records')
  19. #%% funzione contesti singoli cumulativa
  20. def contestosingolo (self, contestimultipli, indice, parole, periodi, brani):
  21. ### droppa le colonne "highlight" che gli rompono le scatole###
  22. contestimultipli = {k: v for k, v in contestimultipli.items() if not k.startswith('highlight')}
  23. ###############################################################
  24. contestimultiplidf = pd.DataFrame(contestimultipli, index=[0])
  25. contestosingolo = self.singlecontexts(contestimultiplidf, indice, parole, periodi, brani)
  26. braniassociati = self.findlinks(contestosingolo)
  27. contestosingoloclean = self.findbib (braniassociati)
  28. contestosingoloclean = formatAllContexts(contestosingoloclean)
  29. return contestosingoloclean.to_dict(orient='records')
  30. #%% funzione reperimento e raffinamento contesti singoli
  31. def singlecontexts(self, textlist, index, parole, periodi, brani):
  32. context = textlist.iloc[index]
  33. contexts = []
  34. formats = []
  35. listOcc = self.listOcc
  36. sigla = textlist.loc[index, "sigla"]
  37. periodlocal = textlist.loc[index, "numperiod"]
  38. ntxlocal = textlist.loc[index, "ntx"]
  39. mappalocal = textlist.loc[index, "mappa"]
  40. linkslocal = textlist.loc[index, "links"]
  41. numbranolocal = textlist.loc[index, "numbrano"]
  42. if parole != 0:
  43. pointerlist = pd.DataFrame()
  44. for table in listOcc:
  45. queryData = {'queryType': 'singlecontext', 'querySubtype': 'parole', 'parole': parole, 'periodi': periodi, 'brani': brani, 'table': table, 'ntxlocal': ntxlocal, 'mappalocal': mappalocal, 'periodlocal': periodlocal, 'numbranolocal': numbranolocal}
  46. queryresponse = self.queryHandler.query(queryData, pandas=True)
  47. pointerlist = pd.concat([pointerlist, queryresponse])
  48. fileQueryData = {'sigla': sigla, 'minChar': pointerlist["pitxt"].min(), 'maxChar': pointerlist["pitxt"].max()}
  49. cont, form = self.queryHandler.textQuery(fileQueryData, True)
  50. contexts.append(cont)
  51. formats.append(json.dumps(form))
  52. context ['piniz'] = pointerlist["pitxt"].min()
  53. context ['pfin'] = pointerlist["pitxt"].max()
  54. elif periodi != 0:
  55. queryData = {'queryType': 'singlecontext', 'querySubtype': 'parole', 'parole': parole, 'periodi': periodi, 'brani': brani, 'table': table, 'ntxlocal': ntxlocal, 'mappalocal': mappalocal, 'periodlocal': periodlocal, 'numbranolocal': numbranolocal}
  56. queryresponse = self.queryHandler.query(queryData, pandas=True)
  57. fileQueryData = {'sigla': sigla, 'minChar': pointerlist["pitxt"].min(), 'maxChar': pointerlist["pitxt"].max()}
  58. cont, form = self.queryHandler.textQuery(fileQueryData, True)
  59. contexts.append(cont)
  60. formats.append(json.dumps(form))
  61. context ['piniz'] = queryresponse["piniz"].min()
  62. context ['pfin'] = queryresponse["pfin"].max()
  63. elif brani != 0:
  64. if linkslocal == 0 or linkslocal == 1:
  65. return "Nessun brano associato a questo contesto"
  66. else:
  67. queryData = {'queryType': 'singlecontext', 'querySubtype': 'brani', 'parole': parole, 'periodi': periodi, 'brani': brani, 'table': table, 'ntxlocal': ntxlocal, 'mappalocal': mappalocal, 'periodlocal': periodlocal, 'numbranolocal': numbranolocal}
  68. queryresponse = self.queryHandler.query(queryData, pandas=True)
  69. fileQueryData = {'sigla': sigla, 'minChar': pointerlist["pitxt"].min(), 'maxChar': pointerlist["pitxt"].max()}
  70. cont, form = self.queryHandler.textQuery(fileQueryData, True)
  71. contexts.append(cont)
  72. formats.append(json.dumps(form))
  73. context ['piniz'] = queryresponse["piniz"].min()
  74. context ['pfin'] = queryresponse["pfin"].max()
  75. context['contesto'] = contexts[0]
  76. context['formattazione contesto'] = formats[0]
  77. return pd.DataFrame(context).T.reset_index(drop=True)
  78. #%% funzione reperimento note e brani associati
  79. def findlinks (self, context):
  80. linkslocal = context.loc[0, "links"]
  81. siglalocal = context.loc[0, "sigla"]
  82. ntxlocal = context.loc[0, "ntx"]
  83. pitxtlocal = context.loc[0, "pitxt"]
  84. pinizlocal = context.loc[0, "piniz"]
  85. pfinlocal = context.loc[0, "pfin"]
  86. if linkslocal == 0:
  87. return context
  88. if linkslocal == 1:
  89. queryData = {'queryType': 'links', 'querySubtype': 'nota', 'ntxlocal': ntxlocal, 'pinizlocal': pinizlocal, 'pitxtlocal': pitxtlocal, 'pfinlocal': pfinlocal}
  90. queryresponse = self.queryHandler.query(queryData, pandas=True)
  91. fileQueryData = {'sigla': siglalocal, 'minChar': queryresponse["piniz"].min(), 'maxChar': queryresponse["pfin"].max()}
  92. cont, form = self.queryHandler.textQuery(fileQueryData, True)
  93. context['nota'] = cont
  94. context['formattazione nota'] = json.dumps(form)
  95. context['nota formattata'] = formatContext(json.dumps(form))
  96. return context
  97. if linkslocal == 2:
  98. queryData = {'queryType': 'links', 'querySubtype': 'testo_associato', 'ntxlocal': ntxlocal, 'pinizlocal': pinizlocal, 'pitxtlocal': pitxtlocal, 'pfinlocal': pfinlocal}
  99. queryresponse = self.queryHandler.query(queryData, pandas=True)
  100. fileQueryData = {'sigla': siglalocal, 'minChar': queryresponse["piniz"].min(), 'maxChar': queryresponse["pfin"].max()}
  101. cont, form = self.queryHandler.textQuery(fileQueryData, True)
  102. context['testo associato'] = cont
  103. context['formattazione testo associato'] = json.dumps(form)
  104. context['testo associato formattato'] = formatContext(cont, json.dumps(form))
  105. if linkslocal == 3:
  106. queryData = {'queryType': 'links', 'querySubtype': 'nota', 'ntxlocal': ntxlocal, 'pinizlocal': pinizlocal, 'pitxtlocal': pitxtlocal, 'pfinlocal': pfinlocal}
  107. queryresponse = self.queryHandler.query(queryData, pandas=True)
  108. fileQueryData = {'sigla': siglalocal, 'minChar': queryresponse["piniz"].min(), 'maxChar': queryresponse["pfin"].max()}
  109. cont, form = self.queryHandler.textQuery(fileQueryData, True)
  110. context['nota'] = cont
  111. context['formattazione nota'] = json.dumps(form)
  112. context['nota formattata'] = formatContext(cont, json.dumps(form))
  113. queryData2 = {'queryType': 'links', 'querySubtype': 'testo_associato', 'ntxlocal': ntxlocal, 'pinizlocal': pinizlocal, 'pitxtlocal': pitxtlocal, 'pfinlocal': pfinlocal}
  114. queryresponse2 = self.queryHandler.query(queryData2, pandas=True)
  115. fileQueryData2 = {'sigla': siglalocal, 'minChar': queryresponse2["piniz"].min(), 'maxChar': queryresponse2["pfin"].max()}
  116. cont2, form2 = self.queryHandler.textQuery(fileQueryData2, True)
  117. context['testo associato'] = cont2
  118. context['formattazione testo associato'] = json.dumps(form2)
  119. context['testo associato formattato'] = formatContext(cont2, json.dumps(form2))
  120. return context