added Reed-Solomon error correction

This commit is contained in:
AlfredoSequeida 2021-02-21 17:07:55 -08:00
parent 7f1cad4029
commit c0f3ecd2db
1 changed files with 106 additions and 11 deletions

View File

@ -12,6 +12,8 @@ import gzip
import json import json
import base64 import base64
from reedsolo import RSCodec, ReedSolomonError
from cryptography.hazmat.backends import default_backend from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import hashes from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
@ -34,6 +36,16 @@ NOTDEBUG = True
TEMPVIDEO = "_temp.mp4" TEMPVIDEO = "_temp.mp4"
FRAMERATE = "1" FRAMERATE = "1"
# using Reed Solomon error correcting
# error correction codec, larger value is safer, but causes larger file
# sizes / longer videos
# using eec of 5:
# max corrections = 8 (all 8 bytes)
# max erasures = 16
ECC = 12
class WrongPassword(Exception): class WrongPassword(Exception):
pass pass
@ -68,20 +80,55 @@ def get_password(password_provided: str) -> bytes:
return key return key
def get_bits_from_file(filepath: str, key: bytes) -> BitArray: def apply_reed_solomon(bit_array: BitArray) -> BitArray:
""" """
Get/read bits from file Apply Reed-Solomon error correction every byte to maximize retrieval
possibility as opposed to applying it to the entire file
bit_array -- BitArray containing raw file data
ecc -- The error correction value to be used (default DEFAULT_ECC)
"""
# one byte is 8 bits
byte = 8
bits = bit_array.bin
rsc = RSCodec(ECC)
# split bits into bytes (sets of 8)
byte_list = split_string_by_n(bits, byte)
ecc_bytes = bytearray()
print("Applying Reed-Solomon Error Correction...")
# apply reed solomon
for b in tqdm(byte_list):
ecc_bytes += rsc.encode(b.encode("utf-8"))
return BitArray(bytes=ecc_bytes)
def get_bits_from_file(
filepath: str, key: bytes, reed_solomon: bool
) -> BitArray:
"""
Get/read bits fom file, encrypt data, and zip
filepath -- the file to read filepath -- the file to read
key -- key used to encrypt file key -- key used to encrypt file
reed_solomon -- if reed solomon should be used to encode bits
""" """
print("Reading file...") print("Reading file...")
bitarray = BitArray(filename=filepath) bitarray = BitArray(filename=filepath)
# adding a delimiter to know when the file ends to avoid corrupted files if reed_solomon:
# when retrieving bitarray = apply_reed_solomon(BitArray(filename=filepath))
# encrypt data
cipher = AES.new(key, AES.MODE_EAX, nonce=SALT) cipher = AES.new(key, AES.MODE_EAX, nonce=SALT)
ciphertext, tag = cipher.encrypt_and_digest(bitarray.tobytes()) ciphertext, tag = cipher.encrypt_and_digest(bitarray.tobytes())
@ -89,6 +136,7 @@ def get_bits_from_file(filepath: str, key: bytes) -> BitArray:
# because json can only serialize strings, the byte objects are encoded # because json can only serialize strings, the byte objects are encoded
# using base64 # using base64
data_bytes = json.dumps( data_bytes = json.dumps(
{ {
"tag": base64.b64encode(tag).decode("utf-8"), "tag": base64.b64encode(tag).decode("utf-8"),
@ -97,7 +145,7 @@ def get_bits_from_file(filepath: str, key: bytes) -> BitArray:
} }
).encode("utf-8") ).encode("utf-8")
print("Zipping...") # print("Zipping...")
# zip # zip
out = io.BytesIO() out = io.BytesIO()
@ -109,6 +157,7 @@ def get_bits_from_file(filepath: str, key: bytes) -> BitArray:
del bitarray del bitarray
bitarray = BitArray(zip) bitarray = BitArray(zip)
# bitarray = BitArray(data_bytes)
return bitarray.bin return bitarray.bin
@ -185,19 +234,43 @@ def get_bits_from_video(video_filepath: str) -> str:
if use_cython: if use_cython:
print("Using Cython...") print("Using Cython...")
for index in tqdm(range(sequence_length)): for index in tqdm(range(sequence_length)):
bits += get_bits_from_image(image_sequence[index]) bits += get_bits_from_image(image_sequence[index])
return bits return bits
def save_bits_to_file(file_path: str, bits: str, key: bytes): def decode_reed_solomon(data_bytes: bytes) -> bytes:
rsc = RSCodec(ECC)
byte = 8
# n is the block size (message length + ecc) as defined by the dist
# (n k + 1)
n = byte + ECC
byte_list = split_string_by_n(data_bytes, n)
decoded_bytes = bytes()
print("Decoding Reed-Solomon Error Correction...")
for b in tqdm(byte_list):
decoded_bytes += rsc.decode(b)[0]
return decoded_bytes
def save_bits_to_file(
file_path: str, bits: str, key: bytes, reed_solomon: bool
):
""" """
save/write bits to a file save/write bits to a file
file_path -- the path to write to file_path -- the path to write to
bits -- the bits to write bits -- the bits to write
key -- key userd for file decryption key -- key userd for file decryption
reed_solomon -- needed if reed solomon was used to encode bits
""" """
bitstring = Bits(bin=bits) bitstring = Bits(bin=bits)
@ -220,8 +293,9 @@ def save_bits_to_file(file_path: str, bits: str, key: bytes):
filename = data["filename"] filename = data["filename"]
# decrypting data
cipher = AES.new(key, AES.MODE_EAX, nonce=SALT) cipher = AES.new(key, AES.MODE_EAX, nonce=SALT)
bitstring = cipher.decrypt(ciphertext) data_bytes = cipher.decrypt(ciphertext)
print("Checking integrity...") print("Checking integrity...")
@ -230,7 +304,12 @@ def save_bits_to_file(file_path: str, bits: str, key: bytes):
except ValueError: except ValueError:
raise WrongPassword("Key incorrect or message corrupted") raise WrongPassword("Key incorrect or message corrupted")
bitstring = BitArray(bitstring) bitstring = Bits(data_bytes)
if reed_solomon:
bitstring = Bits(
"0b" + decode_reed_solomon(data_bytes).decode("utf-8")
)
# If filepath not passed in use default otherwise used passed in filepath # If filepath not passed in use default otherwise used passed in filepath
if file_path == None: if file_path == None:
@ -286,11 +365,12 @@ def make_image_sequence(bitstring: BitArray, resolution: tuple = (1920, 1080)):
index = 1 index = 1
bitlist = bitlist[::-1] bitlist = bitlist[::-1]
print("Saving frames...") print("Saving frames...")
for _ in tqdm(range(len(bitlist))): for _ in tqdm(range(len(bitlist))):
bitl = bitlist.pop() bitl = bitlist.pop()
image_bits = list(map(int, bitl)) image_bits = list(map(int, bitl))
# print(image_bits)
image = Image.new("1", (width, height)) image = Image.new("1", (width, height))
image.putdata(image_bits) image.putdata(image_bits)
@ -364,6 +444,21 @@ def main():
type=str, type=str,
default="default", default="default",
) )
parser.add_argument(
"-r",
"--reed-solomon",
help=(
"Apply Reed-Solomon error correcting. This is helpful if you're"
" finding that your data is not being decoded correctly. It adds"
" extra data every byte making it possible to recover all 8 bits"
" in the case the data changes during the decoding process at"
" the cost of making your video files larger. Note, if you use"
" this option, you must also use the -r flag to decode a video"
" back to a file, otherwise, your data will not be recovered"
" correctly."
),
action="store_true",
)
args = parser.parse_args() args = parser.parse_args()
@ -398,7 +493,7 @@ def main():
if args.output: if args.output:
file_path = args.output file_path = args.output
save_bits_to_file(file_path, bits, key) save_bits_to_file(file_path, bits, key, args.reed_solomon)
elif args.encode: elif args.encode:
@ -415,7 +510,7 @@ def main():
) )
# get bits from file # get bits from file
bits = get_bits_from_file(args.input, key) bits = get_bits_from_file(args.input, key, args.reed_solomon)
# create image sequence # create image sequence
make_image_sequence(bits) make_image_sequence(bits)