de_code.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. #%%
  2. import csv
  3. from os import listdir
  4. class keyRing:
  5. def __init__(self, keyPath, dbEncoded, textsEncoded):
  6. self.keyPath = keyPath
  7. self.vettDictDec, self.vettDictEnc = self.getVettSpec(dbEncoded)
  8. self.textKeys = self.getKeys(textsEncoded)
  9. def getVettSpec(self, dbEncoded):
  10. if not dbEncoded:
  11. return None
  12. with open(self.keyPath + "vettSpec.csv", 'r') as file1:
  13. reader = csv.DictReader(file1)
  14. vettSpec = [row for row in reader]
  15. vettDictDec = {}
  16. vettDictEnc = {}
  17. for index, entry in enumerate(vettSpec):
  18. if index==0:
  19. continue
  20. vettDictDec[chr(int(entry['intcode']))] = chr(int(entry['unicode'], 16))
  21. vettDictEnc[chr(int(entry['unicode'], 16))] = chr(int(entry['intcode']))
  22. # Special chars
  23. vettDictEnc['\\%'] = "%"
  24. vettDictEnc['\\_'] = "_"
  25. return vettDictDec, vettDictEnc
  26. def getKeys(self, textsEncoded):
  27. if not textsEncoded:
  28. return None
  29. files = listdir(self.keyPath)
  30. keyFiles = [file for file in files if (file.startswith('key_') and file.endswith('.csv'))]
  31. keys = {}
  32. for keyFile in keyFiles:
  33. code = keyFile.replace('key_', '').replace('.csv', '')
  34. try:
  35. keys[code] = self.getKeyByCode(keyFile)
  36. except:
  37. pass
  38. return keys
  39. def getKeyByCode(self, keyFile):
  40. with open(self.keyPath + keyFile, 'r') as file1:
  41. reader = csv.reader(file1)
  42. key = [int(row[0]) for index, row in enumerate(reader) if index>1]
  43. halfKeyLen = len(key)//2
  44. key=key[:halfKeyLen]
  45. return key
  46. # Encoder/Decoders
  47. # DB field encoder/decoder
  48. # DB Columns that need this:
  49. # FORM -> norm, spec, invnorm, invspec
  50. # LEM -> norm, spec, invnorm, invspec, cat, omo
  51. def db_decode(vettDictDec, string0):
  52. return ''.join([vettDictDec[char] for char in string0])
  53. #
  54. def db_encode(vettDictEnc, string0):
  55. res = ""
  56. prevChar = ""
  57. for char0 in string0:
  58. if char0=="\\":
  59. prevChar = "\\"
  60. continue
  61. if prevChar!="\\":
  62. res += vettDictEnc[char0]
  63. else:
  64. comp = "\\" + char0
  65. add = vettDictEnc.get(comp) if vettDictEnc.get(comp) is not None else vettDictEnc["\\"] + vettDictEnc[char0]
  66. res += add
  67. return res
  68. # Text encoder/decoder
  69. def decodeTextByKey(text, key, startInFile):
  70. initialOffset = startInFile % len(key)
  71. res = ""
  72. for k, char0 in enumerate(text):
  73. offset = k + initialOffset
  74. if offset >= len(key):
  75. offset = offset % len(key)
  76. res += shiftchar(char0, -key[offset])
  77. return res
  78. #
  79. def codeTextByKey(text, key, startInFile):
  80. initialOffset = startInFile % len(key)
  81. res = ""
  82. for k, char0 in enumerate(text):
  83. offset = k + initialOffset
  84. if offset >= len(key):
  85. offset = offset % len(key)
  86. res += shiftchar(char0, +key[offset])
  87. return res
  88. #
  89. def shiftchar(char0, shift):
  90. return chr(ord(char0) + shift)