Skip to content

Add lempel ziv compression #2107

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 17, 2020
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Added lempel-ziv decompression algorithm implementation
  • Loading branch information
iko1133 committed Jun 12, 2020
commit 7a049b6d285f62328d1cdbb4e81fa17504a3b305
105 changes: 105 additions & 0 deletions compression/lempel_ziv_decompress.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
"""
One of the several implementations of Lempel–Ziv–Welch decompression algorithm
https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
"""

import sys
import math


def read_file_binary(file):
"""
Reads given file as bytes and returns them as a long string
"""
result = ""
try:
with open(file, "rb") as binary_file:
data = binary_file.read()
for dat in data:
curr_byte = "{0:08b}".format(dat)
result += curr_byte
return result
except IOError:
print("File not accessible")
sys.exit()


def decompress_data(data_bits):
"""
Decompresses given data_bits using Lempel–Ziv–Welch compression algorithm
and returns the result as a string
"""
lexicon = {"0": "0", "1": "1"}
result, curr_string = "", ""
index = len(lexicon)

for i in range(len(data_bits)):
curr_string += data_bits[i]
if curr_string not in lexicon.keys():
continue

last_match_id = lexicon[curr_string]
result += last_match_id
lexicon[curr_string] = last_match_id + "0"

if math.log2(index).is_integer():
newLex = {}
for curr_key in list(lexicon.keys()):
newLex["0" + curr_key] = lexicon.pop(curr_key)
lexicon = newLex

lexicon[bin(index)[2:]] = last_match_id + "1"
index += 1
curr_string = ""
return result


def write_file_binary(file, to_write):
"""
Writes given to_write string (should only consist of 0's and 1's) as bytes in the file
"""
byte_length = 8
try:
with open(file, "wb") as opened_file:
result_byte_array = [to_write[i:i + byte_length] for i in range(0, len(to_write), byte_length)]

if len(result_byte_array[-1]) % byte_length == 0:
result_byte_array.append("10000000")
else:
result_byte_array[-1] += "1" + "0" * (byte_length - len(result_byte_array[-1]) - 1)

for elem in result_byte_array[:-1]:
opened_file.write(int(elem, 2).to_bytes(1, byteorder="big"))
except IOError:
print("File not accessible")
sys.exit()


def remove_prefix(data_bits):
"""
Removes size prefix, that compressed file should have
Returns the result
"""
counter = 0
for letter in data_bits:
if letter == "1":
break
counter += 1

data_bits = data_bits[counter:]
data_bits = data_bits[counter + 1:]
return data_bits


def compress(source, destination):
"""
Reads source file, decompresses it and writes the result in destination file
"""
data_bits = read_file_binary(source)
data_bits = remove_prefix(data_bits)
decompressed = decompress_data(data_bits)
write_file_binary(destination, decompressed)


if __name__ == "__main__":
compress(sys.argv[1], sys.argv[2])