contexts.py 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. import json
  2. import polars as pl
  3. from .basic_queries import basicQueries
  4. from .utilities.format import formatAllContexts, formatContext
  5. # Executes query sequences to recover single and multiple contexts
  6. # Returns data in a dictionary format
  7. class contexts(basicQueries):
  8. def __init__(self, dataConfig):
  9. super().__init__(dataConfig)
  10. # %% funzione contesti multipli cumulativa
  11. def contestimultipli(self, tipo_ricerca, ricerca, index=None):
  12. ricercadf = pl.DataFrame(ricerca)
  13. textlist = self.findtexts(tipo_ricerca, ricercadf, index)
  14. contexts = self.findcontexts(textlist)
  15. bibliocontexts = self.findbib(contexts)
  16. highlights = formatAllContexts(bibliocontexts)
  17. return highlights.to_dict(as_series=False)
  18. # %% funzione contesti singoli cumulativa
  19. def contestosingolo(self, contestimultipli, indice, parole, periodi, brani):
  20. contestimultipli = {k: v for k, v in contestimultipli.items() if
  21. not k.startswith('highlight')}
  22. contestimultiplidf = pl.DataFrame(contestimultipli)
  23. contestosingolo = self.singlecontexts(contestimultiplidf, indice, parole, periodi, brani)
  24. braniassociati = self.findlinks(contestosingolo)
  25. contestosingoloclean = self.findbib(braniassociati)
  26. contestosingoloclean = formatAllContexts(contestosingoloclean)
  27. return contestosingoloclean.to_dict(as_series=False)
  28. # %% funzione reperimento e raffinamento contesti singoli
  29. def singlecontexts(self, textlist, index, parole, periodi, brani):
  30. context = textlist.row(index).as_dict()
  31. contexts = []
  32. formats = []
  33. listOcc = self.listOcc
  34. sigla = textlist[index, "sigla"]
  35. periodlocal = textlist[index, "numperiod"]
  36. ntxlocal = textlist[index, "ntx"]
  37. mappalocal = textlist[index, "mappa"]
  38. linkslocal = textlist[index, "links"]
  39. numbranolocal = textlist[index, "numbrano"]
  40. pointerlist = pl.DataFrame()
  41. if parole != 0:
  42. for table in listOcc:
  43. queryData = {'queryType': 'singlecontext', 'querySubtype': 'parole',
  44. 'parole': parole, 'periodi': periodi, 'brani': brani, 'table': table,
  45. 'ntxlocal': ntxlocal, 'mappalocal': mappalocal,
  46. 'periodlocal': periodlocal, 'numbranolocal': numbranolocal}
  47. queryresponse = self.queryHandler.query(queryData, polars=True)
  48. pointerlist = pl.concat([pointerlist, queryresponse])
  49. fileQueryData = {'sigla': sigla, 'minChar': pointerlist["pitxt"].min(),
  50. 'maxChar': pointerlist["pitxt"].max()}
  51. cont, form = self.queryHandler.textQuery(fileQueryData, True)
  52. contexts.append(cont)
  53. formats.append(json.dumps(form))
  54. context['piniz'] = pointerlist["pitxt"].min()
  55. context['pfin'] = pointerlist["pitxt"].max()
  56. elif periodi != 0:
  57. for table in listOcc:
  58. queryData = {'queryType': 'singlecontext', 'querySubtype': 'parole',
  59. 'parole': parole, 'periodi': periodi, 'brani': brani, 'table': table,
  60. 'ntxlocal': ntxlocal, 'mappalocal': mappalocal,
  61. 'periodlocal': periodlocal, 'numbranolocal': numbranolocal}
  62. queryresponse = self.queryHandler.query(queryData, polars=True)
  63. pointerlist = pl.concat([pointerlist, queryresponse])
  64. fileQueryData = {'sigla': sigla, 'minChar': pointerlist["pitxt"].min(),
  65. 'maxChar': pointerlist["pitxt"].max()}
  66. cont, form = self.queryHandler.textQuery(fileQueryData, True)
  67. contexts.append(cont)
  68. formats.append(json.dumps(form))
  69. context['piniz'] = queryresponse["piniz"].min()
  70. context['pfin'] = queryresponse["pfin"].max()
  71. elif brani != 0:
  72. if linkslocal == 0 or linkslocal == 1:
  73. return "Nessun brano associato a questo contesto"
  74. else:
  75. for table in listOcc:
  76. queryData = {'queryType': 'singlecontext', 'querySubtype': 'brani',
  77. 'parole': parole, 'periodi': periodi, 'brani': brani,
  78. 'table': table, 'ntxlocal': ntxlocal, 'mappalocal': mappalocal,
  79. 'periodlocal': periodlocal, 'numbranolocal': numbranolocal}
  80. queryresponse = self.queryHandler.query(queryData, polars=True)
  81. pointerlist = pl.concat([pointerlist, queryresponse])
  82. fileQueryData = {'sigla': sigla, 'minChar': pointerlist["pitxt"].min(),
  83. 'maxChar': pointerlist["pitxt"].max()}
  84. cont, form = self.queryHandler.textQuery(fileQueryData, True)
  85. contexts.append(cont)
  86. formats.append(json.dumps(form))
  87. context['piniz'] = queryresponse["piniz"].min()
  88. context['pfin'] = queryresponse["pfin"].max()
  89. context['contesto'] = contexts[0]
  90. context['formattazione contesto'] = formats[0]
  91. # Trasponi il dizionario in un DataFrame di Polars
  92. context_df = pl.DataFrame(context).melt()
  93. return context_df
  94. #%% funzione reperimento note e brani associati
  95. def findlinks (self, context):
  96. linkslocal = context.loc[0, "links"]
  97. siglalocal = context.loc[0, "sigla"]
  98. ntxlocal = context.loc[0, "ntx"]
  99. pitxtlocal = context.loc[0, "pitxt"]
  100. pinizlocal = context.loc[0, "piniz"]
  101. pfinlocal = context.loc[0, "pfin"]
  102. if linkslocal == 0:
  103. return context
  104. if linkslocal == 1:
  105. queryData = {'queryType': 'links', 'querySubtype': 'nota', 'ntxlocal': ntxlocal, 'pinizlocal': pinizlocal, 'pitxtlocal': pitxtlocal, 'pfinlocal': pfinlocal}
  106. queryresponse = self.queryHandler.query(queryData, pandas=True)
  107. fileQueryData = {'sigla': siglalocal, 'minChar': queryresponse["piniz"].min(), 'maxChar': queryresponse["pfin"].max()}
  108. cont, form = self.queryHandler.textQuery(fileQueryData, True)
  109. context['nota'] = cont
  110. context['formattazione nota'] = json.dumps(form)
  111. context['nota formattata'] = formatContext(json.dumps(form))
  112. return context
  113. if linkslocal == 2:
  114. queryData = {'queryType': 'links', 'querySubtype': 'testo_associato', 'ntxlocal': ntxlocal, 'pinizlocal': pinizlocal, 'pitxtlocal': pitxtlocal, 'pfinlocal': pfinlocal}
  115. queryresponse = self.queryHandler.query(queryData, pandas=True)
  116. fileQueryData = {'sigla': siglalocal, 'minChar': queryresponse["piniz"].min(), 'maxChar': queryresponse["pfin"].max()}
  117. cont, form = self.queryHandler.textQuery(fileQueryData, True)
  118. context['testo associato'] = cont
  119. context['formattazione testo associato'] = json.dumps(form)
  120. context['testo associato formattato'] = formatContext(cont, json.dumps(form))
  121. if linkslocal == 3:
  122. queryData = {'queryType': 'links', 'querySubtype': 'nota', 'ntxlocal': ntxlocal, 'pinizlocal': pinizlocal, 'pitxtlocal': pitxtlocal, 'pfinlocal': pfinlocal}
  123. queryresponse = self.queryHandler.query(queryData, pandas=True)
  124. fileQueryData = {'sigla': siglalocal, 'minChar': queryresponse["piniz"].min(), 'maxChar': queryresponse["pfin"].max()}
  125. cont, form = self.queryHandler.textQuery(fileQueryData, True)
  126. context['nota'] = cont
  127. context['formattazione nota'] = json.dumps(form)
  128. context['nota formattata'] = formatContext(cont, json.dumps(form))
  129. queryData2 = {'queryType': 'links', 'querySubtype': 'testo_associato', 'ntxlocal': ntxlocal, 'pinizlocal': pinizlocal, 'pitxtlocal': pitxtlocal, 'pfinlocal': pfinlocal}
  130. queryresponse2 = self.queryHandler.query(queryData2, pandas=True)
  131. fileQueryData2 = {'sigla': siglalocal, 'minChar': queryresponse2["piniz"].min(), 'maxChar': queryresponse2["pfin"].max()}
  132. cont2, form2 = self.queryHandler.textQuery(fileQueryData2, True)
  133. context['testo associato'] = cont2
  134. context['formattazione testo associato'] = json.dumps(form2)
  135. context['testo associato formattato'] = formatContext(cont2, json.dumps(form2))
  136. return context