#%% import csv from os import listdir class keyRing: def __init__(self, keyPath, dbEncoded, textsEncoded): self.keyPath = keyPath self.vettDictDec, self.vettDictEnc = self.getVettSpec(dbEncoded) self.textKeys = self.getKeys(textsEncoded) def getVettSpec(self, dbEncoded): if not dbEncoded: return None with open(self.keyPath + "vettSpec.csv", 'r') as file1: reader = csv.DictReader(file1) vettSpec = [row for row in reader] vettDictDec = {} vettDictEnc = {} for index, entry in enumerate(vettSpec): if index==0: continue vettDictDec[chr(int(entry['intcode']))] = chr(int(entry['unicode'], 16)) vettDictEnc[chr(int(entry['unicode'], 16))] = chr(int(entry['intcode'])) # Special chars vettDictEnc['\\%'] = "%" vettDictEnc['\\_'] = "_" return vettDictDec, vettDictEnc def getKeys(self, textsEncoded): if not textsEncoded: return None files = listdir(self.keyPath) keyFiles = [file for file in files if (file.startswith('key_') and file.endswith('.csv'))] keys = {} for keyFile in keyFiles: code = keyFile.replace('key_', '').replace('.csv', '') try: keys[code] = self.getKeyByCode(keyFile) except: pass return keys def getKeyByCode(self, keyFile): with open(self.keyPath + keyFile, 'r') as file1: reader = csv.reader(file1) key = [int(row[0]) for index, row in enumerate(reader) if index>1] halfKeyLen = len(key)//2 key=key[:halfKeyLen] return key # Encoder/Decoders # DB field encoder/decoder # DB Columns that need this: # FORM -> norm, spec, invnorm, invspec # LEM -> norm, spec, invnorm, invspec, cat, omo def db_decode(vettDictDec, string0): return ''.join([vettDictDec[char] for char in string0]) # def db_encode(vettDictEnc, string0): res = "" prevChar = "" for char0 in string0: if char0=="\\": prevChar = "\\" continue if prevChar!="\\": res += vettDictEnc[char0] else: comp = "\\" + char0 add = vettDictEnc.get(comp) if vettDictEnc.get(comp) is not None else vettDictEnc["\\"] + vettDictEnc[char0] res += add return res # Text encoder/decoder def decodeTextByKey(text, key, startInFile): initialOffset = startInFile % len(key) res = "" for k, char0 in enumerate(text): offset = k + initialOffset if offset >= len(key): offset = offset % len(key) res += shiftchar(char0, -key[offset]) return res # def codeTextByKey(text, key, startInFile): initialOffset = startInFile % len(key) res = "" for k, char0 in enumerate(text): offset = k + initialOffset if offset >= len(key): offset = offset % len(key) res += shiftchar(char0, +key[offset]) return res # def shiftchar(char0, shift): return chr(ord(char0) + shift)