de_code.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. #%%
  2. import csv
  3. from os import listdir
  4. class keyRing:
  5. def __init__(self, keyPath, dbEncoded, textsEncoded):
  6. self.keyPath = keyPath
  7. self.vettDictDec, self.vettDictEnc = self.getVettSpec(dbEncoded)
  8. self.textKeys = self.getKeys(textsEncoded)
  9. def getVettSpec(self, dbEncoded):
  10. if not dbEncoded:
  11. return None
  12. with open(self.keyPath + "vettSpec.csv", 'r') as file1:
  13. reader = csv.DictReader(file1)
  14. vettSpec = [row for row in reader]
  15. vettDictDec = {}
  16. vettDictEnc = {}
  17. for index, entry in enumerate(vettSpec):
  18. if index==0:
  19. continue
  20. vettDictDec[chr(int(entry['intcode']))] = chr(int(entry['unicode'], 16))
  21. vettDictEnc[chr(int(entry['unicode'], 16))] = chr(int(entry['intcode']))
  22. # Special chars
  23. vettDictEnc['\\%'] = "%"
  24. vettDictEnc['\\_'] = "_"
  25. return vettDictDec, vettDictEnc
  26. def getKeys(self, textsEncoded):
  27. if not textsEncoded:
  28. return None
  29. files = listdir(self.keyPath)
  30. keyFiles = [file for file in files if (file.startswith('key_') and file.endswith('.csv'))]
  31. keys = {}
  32. for keyFile in keyFiles:
  33. code = keyFile.replace('key_', '').replace('.csv', '')
  34. try:
  35. keys[code] = self.getKeyByCode(keyFile)
  36. except:
  37. pass
  38. return keys
  39. def getKeyByCode(self, keyFile):
  40. with open(self.keyPath + keyFile, 'r') as file1:
  41. reader = csv.reader(file1)
  42. key = [int(row[0]) for row in reader][1:]
  43. #key = [int(row[0]) for index, row in enumerate(reader) if index>0]
  44. #halfKeyLen = len(key)//2
  45. #key=key[:halfKeyLen]
  46. return key
  47. # Encoder/Decoders
  48. # DB field encoder/decoder
  49. # DB Columns that need this:
  50. # FORM -> norm, spec, invnorm, invspec
  51. # LEM -> norm, spec, invnorm, invspec, cat, omo
  52. def db_decode(vettDictDec, string0):
  53. return ''.join([vettDictDec[char] for char in string0])
  54. #
  55. def db_encode(vettDictEnc, string0):
  56. res = ""
  57. prevChar = ""
  58. for char0 in string0:
  59. if char0=="\\":
  60. prevChar = "\\"
  61. continue
  62. if prevChar!="\\":
  63. res += vettDictEnc[char0]
  64. else:
  65. comp = "\\" + char0
  66. add = vettDictEnc.get(comp) if vettDictEnc.get(comp) is not None else vettDictEnc["\\"] + vettDictEnc[char0]
  67. res += add
  68. return res
  69. # Text encoder/decoder
  70. def decodeTextByKey(text, key, startInFile):
  71. initialOffset = startInFile % len(key)
  72. res = ""
  73. len2 = len(key)//2 # Modify to use only 1/2 key as in Gatto
  74. for k, char0 in enumerate(text):
  75. offset = k + initialOffset
  76. if offset >= len2: # MOD
  77. offset = offset % len2 # MOD
  78. # if offset >= len(key):
  79. # offset = offset % len(key)
  80. res += shiftchar(char0, -key[offset])
  81. return res
  82. #
  83. def codeTextByKey(text, key, startInFile):
  84. initialOffset = startInFile % len(key)
  85. res = ""
  86. for k, char0 in enumerate(text):
  87. offset = k + initialOffset
  88. if offset >= len(key):
  89. offset = offset % len(key)
  90. res += shiftchar(char0, +key[offset])
  91. return res
  92. #
  93. def shiftchar(char0, shift):
  94. return chr(ord(char0) + shift)