Added lempel-ziv decompression algorithm implementation

TheAlgorithms · cclauss · Jun 17, 2020 · Jun 12, 2020 · Jun 12, 2020 · Jun 12, 2020
commit 7a049b6d285f62328d1cdbb4e81fa17504a3b305
diff --git a/compression/lempel_ziv_decompress.py b/compression/lempel_ziv_decompress.py
@@ -0,0 +1,105 @@
+"""
+    One of the several implementations of Lempel–Ziv–Welch decompression algorithm
+    https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
+"""
+
+import sys
+import math
+
+
+def read_file_binary(file):
+    """
+    Reads given file as bytes and returns them as a long string
+    """
+    result = ""
+    try:
+        with open(file, "rb") as binary_file:
+            data = binary_file.read()
+            for dat in data:
+                curr_byte = "{0:08b}".format(dat)
+                result += curr_byte
+        return result
+    except IOError:
+        print("File not accessible")
+        sys.exit()
+
+
+def decompress_data(data_bits):
+    """
+    Decompresses given data_bits using Lempel–Ziv–Welch compression algorithm
+    and returns the result as a string
+    """
+    lexicon = {"0": "0", "1": "1"}
+    result, curr_string = "", ""
+    index = len(lexicon)
+
+    for i in range(len(data_bits)):
+        curr_string += data_bits[i]
+        if curr_string not in lexicon.keys():
+            continue
+
+        last_match_id = lexicon[curr_string]
+        result += last_match_id
+        lexicon[curr_string] = last_match_id + "0"
+
+        if math.log2(index).is_integer():
+            newLex = {}
+            for curr_key in list(lexicon.keys()):
+                newLex["0" + curr_key] = lexicon.pop(curr_key)
+            lexicon = newLex
+
+        lexicon[bin(index)[2:]] = last_match_id + "1"
+        index += 1
+        curr_string = ""
+    return result
+
+
+def write_file_binary(file, to_write):
+    """
+    Writes given to_write string (should only consist of 0's and 1's) as bytes in the file
+    """
+    byte_length = 8
+    try:
+        with open(file, "wb") as opened_file:
+            result_byte_array = [to_write[i:i + byte_length] for i in range(0, len(to_write), byte_length)]
+
+            if len(result_byte_array[-1]) % byte_length == 0:
+                result_byte_array.append("10000000")
+            else:
+                result_byte_array[-1] += "1" + "0" * (byte_length - len(result_byte_array[-1]) - 1)
+
+            for elem in result_byte_array[:-1]:
+                opened_file.write(int(elem, 2).to_bytes(1, byteorder="big"))
+    except IOError:
+        print("File not accessible")
+        sys.exit()
+
+
+def remove_prefix(data_bits):
+    """
+    Removes size prefix, that compressed file should have
+    Returns the result
+    """
+    counter = 0
+    for letter in data_bits:
+        if letter == "1":
+            break
+        counter += 1
+
+    data_bits = data_bits[counter:]
+    data_bits = data_bits[counter + 1:]
+    return data_bits
+
+
+def compress(source, destination):
+    """
+    Reads source file, decompresses it and writes the result in destination file
+    """
+    data_bits = read_file_binary(source)
+    data_bits = remove_prefix(data_bits)
+    decompressed = decompress_data(data_bits)
+    write_file_binary(destination, decompressed)
+
+
+if __name__ == "__main__":
+    compress(sys.argv[1], sys.argv[2])