123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108 |
- #%%
- import csv
- from os import listdir
- class keyRing:
- def __init__(self, keyPath, dbEncoded, textsEncoded):
- self.keyPath = keyPath
- self.vettDictDec, self.vettDictEnc = self.getVettSpec(dbEncoded)
- self.textKeys = self.getKeys(textsEncoded)
- def getVettSpec(self, dbEncoded):
- if not dbEncoded:
- return None
- with open(self.keyPath + "vettSpec.csv", 'r') as file1:
- reader = csv.DictReader(file1)
- vettSpec = [row for row in reader]
- vettDictDec = {}
- vettDictEnc = {}
- for index, entry in enumerate(vettSpec):
- if index==0:
- continue
- vettDictDec[chr(int(entry['intcode']))] = chr(int(entry['unicode'], 16))
- vettDictEnc[chr(int(entry['unicode'], 16))] = chr(int(entry['intcode']))
- # Special chars
- vettDictEnc['\\%'] = "%"
- vettDictEnc['\\_'] = "_"
- return vettDictDec, vettDictEnc
- def getKeys(self, textsEncoded):
- if not textsEncoded:
- return None
- files = listdir(self.keyPath)
- keyFiles = [file for file in files if (file.startswith('key_') and file.endswith('.csv'))]
- keys = {}
- for keyFile in keyFiles:
- code = keyFile.replace('key_', '').replace('.csv', '')
- try:
- keys[code] = self.getKeyByCode(keyFile)
- except:
- pass
- return keys
- def getKeyByCode(self, keyFile):
- with open(self.keyPath + keyFile, 'r') as file1:
- reader = csv.reader(file1)
- key = [int(row[0]) for row in reader][1:]
- #key = [int(row[0]) for index, row in enumerate(reader) if index>0]
- #halfKeyLen = len(key)//2
- #key=key[:halfKeyLen]
- return key
- # Encoder/Decoders
- # DB field encoder/decoder
- # DB Columns that need this:
- # FORM -> norm, spec, invnorm, invspec
- # LEM -> norm, spec, invnorm, invspec, cat, omo
- def db_decode(vettDictDec, string0):
- return ''.join([vettDictDec[char] for char in string0])
- #
- def db_encode(vettDictEnc, string0):
- res = ""
- prevChar = ""
- for char0 in string0:
- if char0=="\\":
- prevChar = "\\"
- continue
- if prevChar!="\\":
- res += vettDictEnc[char0]
- else:
- comp = "\\" + char0
- add = vettDictEnc.get(comp) if vettDictEnc.get(comp) is not None else vettDictEnc["\\"] + vettDictEnc[char0]
- res += add
- return res
- # Text encoder/decoder
- def decodeTextByKey(text, key, startInFile):
- initialOffset = startInFile % len(key)
- res = ""
- len2 = len(key)//2 # Modify to use only 1/2 key as in Gatto
- for k, char0 in enumerate(text):
- offset = k + initialOffset
- if offset >= len2: # MOD
- offset = offset % len2 # MOD
- # if offset >= len(key):
- # offset = offset % len(key)
- res += shiftchar(char0, -key[offset])
- return res
- #
- def codeTextByKey(text, key, startInFile):
- initialOffset = startInFile % len(key)
- res = ""
- for k, char0 in enumerate(text):
- offset = k + initialOffset
- if offset >= len(key):
- offset = offset % len(key)
- res += shiftchar(char0, +key[offset])
- return res
- #
- def shiftchar(char0, shift):
- return chr(ord(char0) + shift)
|