From 848143b85c32aef8cf33ef32f3d82a4d5cd6534c Mon Sep 17 00:00:00 2001
From: LuciaHarcekova <lucy.harcekova@gmail.com>
Date: Tue, 27 Dec 2022 19:32:11 +0100
Subject: [PATCH 01/17] - add "lz77_compressor" class with compress and
 decompress methods using LZ77 compression algorithm

---
 compression/lz77.py | 197 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 197 insertions(+)
 create mode 100644 compression/lz77.py

diff --git a/compression/lz77.py b/compression/lz77.py
new file mode 100644
index 000000000000..6844dde143f7
--- /dev/null
+++ b/compression/lz77.py
@@ -0,0 +1,197 @@
+"""
+LZ77 compression algorithm
+- lossless data compression published in papers by Abraham Lempel and Jacob Ziv in 1977
+- also known as LZ1 or sliding-window compression
+- form the basis for many variations including LZW, LZSS, LZMA and others
+
+It uses a “sliding window” method. Within the sliding window we have:
+  - search buffer
+  - look ahead buffer
+len(sliding_window) = len(search_buffer) + len(look_ahead_buffer)
+
+LZ77 manages a dictionary that uses triples composed of:
+    - Offset into search buffer, it's the distance between the start of a phrase and
+      the beginning of a file.
+    - Length of the match, it's the number of characters that make up a phrase.
+    - The indicator is represented by a character that is going to be encoded next.
+
+As a file is parsed, the dictionary is dynamically updated to reflect the compressed
+data contents and size.
+
+Examples:
+"cabracadabrarrarrad" <-> [(0, 0, 'c'), (0, 0, 'a'), (0, 0, 'b'), (0, 0, 'r'),
+                           (3, 1, 'c'), (2, 1, 'd'), (7, 4, 'r'), (3, 5, 'd')]
+"ababcbababaa" <-> [(0, 0, 'a'), (0, 0, 'b'), (2, 2, 'c'), (4, 3, 'a'), (2, 2, 'a')]
+"aacaacabcabaaac" <-> [(0, 0, 'a'), (1, 1, 'c'), (3, 4, 'b'), (3, 3, 'a'), (1, 2, 'c')]
+
+Sources:
+en.wikipedia.org/wiki/LZ77_and_LZ78
+"""
+
+from typing import List, Tuple
+
+__version__ = '0.1'
+__author__ = 'Lucia Harcekova'
+
+
+class LZ77Compressor:
+    """
+    Class containg compress and decompress methods using LZ77 compression algorithm.
+    """
+
+    def __init__(self, window_size=13, lookahead_buffer_size=6):
+        self.window_size = window_size
+        self.lookahead_buffer_size = lookahead_buffer_size
+        self.search_buffer_size = self.window_size - self.lookahead_buffer_size
+
+    def compress(self, text: str) -> List[Tuple[int, int, str]]:
+        """This method compresses given string text using LZ77 compression algorithm.
+
+        Args:
+            text (str): string that's going to be compressed
+
+        Returns:
+            output (List[Tuple[int, int, str]]): the compressed text
+
+        Tests:
+            >>> lz77_compressor = LZ77Compressor(13, 6)
+            >>> lz77_compressor.compress("ababcbababaa")
+            [(0, 0, 'a'), (0, 0, 'b'), (2, 2, 'c'), (4, 3, 'a'), (2, 2, 'a')]
+            >>> lz77_compressor.compress("aacaacabcabaaac")
+            [(0, 0, 'a'), (1, 1, 'c'), (3, 4, 'b'), (3, 3, 'a'), (1, 2, 'c')]
+        """
+
+        output = []
+        search_buffer = ""
+
+        # while there are still characters in text to compress
+        while text:
+
+            # find the next encoding phrase
+            # - triplet with offset, length, indicator (the next encoding character)
+            (offset, length, indicator) = self._find_encoding_token(
+                text, search_buffer)
+
+            # update the search buffer:
+            # - add new characters from text into it
+            # - check if size exceed the max search buffer size, if so, drop the
+            #   oldest elements
+            search_buffer += text[:length+1]
+            if len(search_buffer) > self.search_buffer_size:
+                search_buffer = search_buffer[-self.search_buffer_size:]
+
+            # update the text
+            text = text[length+1:]
+
+            # append the token to output
+            output.append((offset, length, indicator))
+
+        return output
+
+    def decompress(self, tokens: List[Tuple[int, int, str]]) -> str:
+        """This method turns the list of tokens consisting of triplets of the form
+        (offset, length, char), into an output string.
+
+        Args:
+            tokens (List[Tuple[int, int, str]]): Tokens (offset, length, char)
+
+        Returns:
+            output (str): The decompressed text
+
+        Tests:
+            >>> lz77_compressor = LZ77Compressor(13, 6)
+            >>> lz77_compressor.decompress([(0, 0, 'c'), (0, 0, 'a'), (0, 0, 'b'), \
+                    (0, 0, 'r'), (3, 1, 'c'), (2, 1, 'd'), (7, 4, 'r'), (3, 5, 'd')])
+            'cabracadabrarrarrad'
+            >>> lz77_compressor.decompress([(0, 0, 'a'), (0, 0, 'b'), (2, 2, 'c'), \
+                    (4, 3, 'a'), (2, 2, 'a')])
+            'ababcbababaa'
+            >>> lz77_compressor.decompress([(0, 0, 'a'), (1, 1, 'c'), (3, 4, 'b'), \
+                    (3, 3, 'a'), (1, 2, 'c')])
+            'aacaacabcabaaac'
+        """
+
+        output = ""
+
+        for (offset, length, indicator) in tokens:
+            for _ in range(length):
+                output += output[-offset]
+            output += indicator
+
+        return output
+
+    def _find_encoding_token(self, text: str, search_buffer: str) \
+        -> Tuple[int, int, str]:
+        """Finds the encoding token for the first character in the text.
+
+        Args:
+            text (str)
+            search_buffer (str)
+
+        Returns:
+            Tuple[int, int, str]: Token
+
+        Tests:
+            >>> lz77_compressor = LZ77Compressor(13, 6)
+            >>> lz77_compressor._find_encoding_token("abrarrarrad", "abracad")
+            (7, 4, 'r')
+            >>> lz77_compressor._find_encoding_token("adabrarrarrad", "cabrac")
+            (2, 1, 'd')
+        """
+
+        # Initialise result parameters to default values
+        length, offset = 0, 0
+
+        if search_buffer == "":
+            return offset, length, text[length]
+
+        for i, character in enumerate(search_buffer):
+            found_offset = len(search_buffer) - i
+            if character == text[0]:
+                found_length = self._match_length_from_index(
+                    text, search_buffer, 0, i)
+                # if the found length is bigger than the current or if it's equal,
+                # which means it's offset is smaller: update offset and length
+                if found_length >= length:
+                    offset, length = found_offset, found_length
+
+        return offset, length, text[length]
+
+    def _match_length_from_index(self, text: str,
+        window: str, text_index: int, window_index: int) -> int:
+        """Calculate the longest possible match of text and window characters from
+        text_index in text and window_index in window.
+
+        Args:
+            text (str): _description_
+            window (str): sliding window
+            text_index (int): index of character in text
+            window_index (int): index of character in sliding window
+
+        Returns:
+            int: The maximum match between text and window, from given indexes.
+
+        Tests:
+            >>> lz77_compressor = LZ77Compressor(13, 6)
+            >>> lz77_compressor._match_length_from_index("rarrad", "adabrar", 0, 4)
+            5
+            >>> lz77_compressor._match_length_from_index("adabrarrarrad", \
+                    "cabrac", 0, 1)
+            1
+        """
+        if text == "" or text[text_index] != window[window_index]:
+            return 0
+        return 1 + self._match_length_from_index(text,
+            window + text[text_index], text_index + 1, window_index + 1)
+
+
+if __name__ == '__main__':
+
+    # Initialize compressor class
+    lz77_compressor = LZ77Compressor(window_size=13, lookahead_buffer_size=6)
+
+    # Example
+    TEXT = "cabracadabrarrarrad"
+    compressed_text = lz77_compressor.compress(TEXT)
+    decompressed_text = lz77_compressor.decompress(compressed_text)
+    assert decompressed_text == TEXT, "The LZ77 agirithm returned the invalid result."

From ee44d716f72358feabea3f229917a849b7ecaf36 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 27 Dec 2022 18:43:00 +0000
Subject: [PATCH 02/17] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 compression/lz77.py | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/compression/lz77.py b/compression/lz77.py
index 6844dde143f7..8774c8f1edd7 100644
--- a/compression/lz77.py
+++ b/compression/lz77.py
@@ -30,8 +30,8 @@
 
 from typing import List, Tuple
 
-__version__ = '0.1'
-__author__ = 'Lucia Harcekova'
+__version__ = "0.1"
+__author__ = "Lucia Harcekova"
 
 
 class LZ77Compressor:
@@ -44,7 +44,7 @@ def __init__(self, window_size=13, lookahead_buffer_size=6):
         self.lookahead_buffer_size = lookahead_buffer_size
         self.search_buffer_size = self.window_size - self.lookahead_buffer_size
 
-    def compress(self, text: str) -> List[Tuple[int, int, str]]:
+    def compress(self, text: str) -> list[tuple[int, int, str]]:
         """This method compresses given string text using LZ77 compression algorithm.
 
         Args:
@@ -69,26 +69,25 @@ def compress(self, text: str) -> List[Tuple[int, int, str]]:
 
             # find the next encoding phrase
             # - triplet with offset, length, indicator (the next encoding character)
-            (offset, length, indicator) = self._find_encoding_token(
-                text, search_buffer)
+            (offset, length, indicator) = self._find_encoding_token(text, search_buffer)
 
             # update the search buffer:
             # - add new characters from text into it
             # - check if size exceed the max search buffer size, if so, drop the
             #   oldest elements
-            search_buffer += text[:length+1]
+            search_buffer += text[: length + 1]
             if len(search_buffer) > self.search_buffer_size:
-                search_buffer = search_buffer[-self.search_buffer_size:]
+                search_buffer = search_buffer[-self.search_buffer_size :]
 
             # update the text
-            text = text[length+1:]
+            text = text[length + 1 :]
 
             # append the token to output
             output.append((offset, length, indicator))
 
         return output
 
-    def decompress(self, tokens: List[Tuple[int, int, str]]) -> str:
+    def decompress(self, tokens: list[tuple[int, int, str]]) -> str:
         """This method turns the list of tokens consisting of triplets of the form
         (offset, length, char), into an output string.
 
@@ -120,8 +119,9 @@ def decompress(self, tokens: List[Tuple[int, int, str]]) -> str:
 
         return output
 
-    def _find_encoding_token(self, text: str, search_buffer: str) \
-        -> Tuple[int, int, str]:
+    def _find_encoding_token(
+        self, text: str, search_buffer: str
+    ) -> tuple[int, int, str]:
         """Finds the encoding token for the first character in the text.
 
         Args:
@@ -148,8 +148,7 @@ def _find_encoding_token(self, text: str, search_buffer: str) \
         for i, character in enumerate(search_buffer):
             found_offset = len(search_buffer) - i
             if character == text[0]:
-                found_length = self._match_length_from_index(
-                    text, search_buffer, 0, i)
+                found_length = self._match_length_from_index(text, search_buffer, 0, i)
                 # if the found length is bigger than the current or if it's equal,
                 # which means it's offset is smaller: update offset and length
                 if found_length >= length:
@@ -157,8 +156,9 @@ def _find_encoding_token(self, text: str, search_buffer: str) \
 
         return offset, length, text[length]
 
-    def _match_length_from_index(self, text: str,
-        window: str, text_index: int, window_index: int) -> int:
+    def _match_length_from_index(
+        self, text: str, window: str, text_index: int, window_index: int
+    ) -> int:
         """Calculate the longest possible match of text and window characters from
         text_index in text and window_index in window.
 
@@ -181,11 +181,12 @@ def _match_length_from_index(self, text: str,
         """
         if text == "" or text[text_index] != window[window_index]:
             return 0
-        return 1 + self._match_length_from_index(text,
-            window + text[text_index], text_index + 1, window_index + 1)
+        return 1 + self._match_length_from_index(
+            text, window + text[text_index], text_index + 1, window_index + 1
+        )
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
 
     # Initialize compressor class
     lz77_compressor = LZ77Compressor(window_size=13, lookahead_buffer_size=6)

From 86c2bb32942d055144cf9915a449c65564386560 Mon Sep 17 00:00:00 2001
From: LuciaHarcekova <lucy.harcekova@gmail.com>
Date: Tue, 27 Dec 2022 20:28:44 +0100
Subject: [PATCH 03/17] - use "list" instead "List", formatting

---
 compression/lz77.py | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/compression/lz77.py b/compression/lz77.py
index 8774c8f1edd7..191bebbe9395 100644
--- a/compression/lz77.py
+++ b/compression/lz77.py
@@ -28,7 +28,6 @@
 en.wikipedia.org/wiki/LZ77_and_LZ78
 """
 
-from typing import List, Tuple
 
 __version__ = "0.1"
 __author__ = "Lucia Harcekova"
@@ -39,19 +38,19 @@ class LZ77Compressor:
     Class containg compress and decompress methods using LZ77 compression algorithm.
     """
 
-    def __init__(self, window_size=13, lookahead_buffer_size=6):
+    def __init__(self, window_size=13, lookahead_buffer_size=6) -> None:
         self.window_size = window_size
         self.lookahead_buffer_size = lookahead_buffer_size
         self.search_buffer_size = self.window_size - self.lookahead_buffer_size
 
-    def compress(self, text: str) -> list[tuple[int, int, str]]:
+    def compress(self, text: str) -> list:
         """This method compresses given string text using LZ77 compression algorithm.
 
         Args:
             text (str): string that's going to be compressed
 
         Returns:
-            output (List[Tuple[int, int, str]]): the compressed text
+            output (list): the compressed text
 
         Tests:
             >>> lz77_compressor = LZ77Compressor(13, 6)
@@ -87,12 +86,12 @@ def compress(self, text: str) -> list[tuple[int, int, str]]:
 
         return output
 
-    def decompress(self, tokens: list[tuple[int, int, str]]) -> str:
+    def decompress(self, tokens: list) -> str:
         """This method turns the list of tokens consisting of triplets of the form
         (offset, length, char), into an output string.
 
         Args:
-            tokens (List[Tuple[int, int, str]]): Tokens (offset, length, char)
+            tokens (list): Tokens (offset, length, char)
 
         Returns:
             output (str): The decompressed text
@@ -119,9 +118,7 @@ def decompress(self, tokens: list[tuple[int, int, str]]) -> str:
 
         return output
 
-    def _find_encoding_token(
-        self, text: str, search_buffer: str
-    ) -> tuple[int, int, str]:
+    def _find_encoding_token(self, text: str, search_buffer: str) -> tuple:
         """Finds the encoding token for the first character in the text.
 
         Args:
@@ -129,7 +126,7 @@ def _find_encoding_token(
             search_buffer (str)
 
         Returns:
-            Tuple[int, int, str]: Token
+            tuple: Token
 
         Tests:
             >>> lz77_compressor = LZ77Compressor(13, 6)

From f5936933a3e58cae34438e5fdd5c5a1216a4afb7 Mon Sep 17 00:00:00 2001
From: LuciaHarcekova <lucy.harcekova@gmail.com>
Date: Tue, 27 Dec 2022 20:33:25 +0100
Subject: [PATCH 04/17] - fix spelling

---
 compression/lz77.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compression/lz77.py b/compression/lz77.py
index 191bebbe9395..9804d9f7032f 100644
--- a/compression/lz77.py
+++ b/compression/lz77.py
@@ -35,7 +35,7 @@
 
 class LZ77Compressor:
     """
-    Class containg compress and decompress methods using LZ77 compression algorithm.
+    Class containing compress and decompress methods using LZ77 compression algorithm.
     """
 
     def __init__(self, window_size=13, lookahead_buffer_size=6) -> None:

From ee06ca09f12cb433752f183b481e81e3015ab820 Mon Sep 17 00:00:00 2001
From: LuciaHarcekova <lucy.harcekova@gmail.com>
Date: Tue, 27 Dec 2022 20:44:08 +0100
Subject: [PATCH 05/17] - add Python type hints

---
 compression/lz77.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compression/lz77.py b/compression/lz77.py
index 9804d9f7032f..aa0c13a688b1 100644
--- a/compression/lz77.py
+++ b/compression/lz77.py
@@ -38,7 +38,7 @@ class LZ77Compressor:
     Class containing compress and decompress methods using LZ77 compression algorithm.
     """
 
-    def __init__(self, window_size=13, lookahead_buffer_size=6) -> None:
+    def __init__(self, window_size: int = 13, lookahead_buffer_size: int = 6) -> None:
         self.window_size = window_size
         self.lookahead_buffer_size = lookahead_buffer_size
         self.search_buffer_size = self.window_size - self.lookahead_buffer_size

From 3198b334b5162ef0395424af4f7120dcb43cf444 Mon Sep 17 00:00:00 2001
From: LuciaHarcekova <lucy.harcekova@gmail.com>
Date: Tue, 27 Dec 2022 23:10:52 +0100
Subject: [PATCH 06/17] - add 'Token' class to represent triplet (offset,
 length, indicator)

---
 compression/lz77.py | 78 +++++++++++++++++++++++++--------------------
 1 file changed, 43 insertions(+), 35 deletions(-)

diff --git a/compression/lz77.py b/compression/lz77.py
index aa0c13a688b1..12dc4385f537 100644
--- a/compression/lz77.py
+++ b/compression/lz77.py
@@ -32,6 +32,20 @@
 __version__ = "0.1"
 __author__ = "Lucia Harcekova"
 
+from typing import List
+
+
+class Token:
+    """
+    Dataclass representing triplet called token consisting of length, offset
+    and indicator. This triplet is used during LZ77 compression.
+    """
+
+    def __init__(self, offset: int, length: int, indicator: str) -> None:
+        self.offset = offset
+        self.length = length
+        self.indicator = indicator
+
 
 class LZ77Compressor:
     """
@@ -43,21 +57,14 @@ def __init__(self, window_size: int = 13, lookahead_buffer_size: int = 6) -> Non
         self.lookahead_buffer_size = lookahead_buffer_size
         self.search_buffer_size = self.window_size - self.lookahead_buffer_size
 
-    def compress(self, text: str) -> list:
+    def compress(self, text: str) -> List[Token]:
         """This method compresses given string text using LZ77 compression algorithm.
 
         Args:
             text (str): string that's going to be compressed
 
         Returns:
-            output (list): the compressed text
-
-        Tests:
-            >>> lz77_compressor = LZ77Compressor(13, 6)
-            >>> lz77_compressor.compress("ababcbababaa")
-            [(0, 0, 'a'), (0, 0, 'b'), (2, 2, 'c'), (4, 3, 'a'), (2, 2, 'a')]
-            >>> lz77_compressor.compress("aacaacabcabaaac")
-            [(0, 0, 'a'), (1, 1, 'c'), (3, 4, 'b'), (3, 3, 'a'), (1, 2, 'c')]
+            output (List[Token]): the compressed text
         """
 
         output = []
@@ -68,57 +75,58 @@ def compress(self, text: str) -> list:
 
             # find the next encoding phrase
             # - triplet with offset, length, indicator (the next encoding character)
-            (offset, length, indicator) = self._find_encoding_token(text, search_buffer)
+            token = self._find_encoding_token(text, search_buffer)
 
             # update the search buffer:
             # - add new characters from text into it
             # - check if size exceed the max search buffer size, if so, drop the
             #   oldest elements
-            search_buffer += text[: length + 1]
+            search_buffer += text[: token.length + 1]
             if len(search_buffer) > self.search_buffer_size:
                 search_buffer = search_buffer[-self.search_buffer_size :]
 
             # update the text
-            text = text[length + 1 :]
+            text = text[token.length + 1 :]
 
             # append the token to output
-            output.append((offset, length, indicator))
+            output.append(token)
 
         return output
 
-    def decompress(self, tokens: list) -> str:
-        """This method turns the list of tokens consisting of triplets of the form
+    def decompress(self, tokens: List[Token]) -> str:
+        """This method turns the List of tokens consisting of triplets of the form
         (offset, length, char), into an output string.
 
         Args:
-            tokens (list): Tokens (offset, length, char)
+            tokens (List[Token]): Tokens (offset, length, char)
 
         Returns:
             output (str): The decompressed text
 
         Tests:
             >>> lz77_compressor = LZ77Compressor(13, 6)
-            >>> lz77_compressor.decompress([(0, 0, 'c'), (0, 0, 'a'), (0, 0, 'b'), \
-                    (0, 0, 'r'), (3, 1, 'c'), (2, 1, 'd'), (7, 4, 'r'), (3, 5, 'd')])
+            >>> lz77_compressor.decompress([Token(0, 0, 'c'), Token(0, 0, 'a'), \
+                Token(0, 0, 'b'), Token(0, 0, 'r'), Token(3, 1, 'c'), \
+                Token(2, 1, 'd'), Token(7, 4, 'r'), Token(3, 5, 'd')])
             'cabracadabrarrarrad'
-            >>> lz77_compressor.decompress([(0, 0, 'a'), (0, 0, 'b'), (2, 2, 'c'), \
-                    (4, 3, 'a'), (2, 2, 'a')])
+            >>> lz77_compressor.decompress([Token(0, 0, 'a'), Token(0, 0, 'b'), \
+                Token(2, 2, 'c'), Token(4, 3, 'a'), Token(2, 2, 'a')])
             'ababcbababaa'
-            >>> lz77_compressor.decompress([(0, 0, 'a'), (1, 1, 'c'), (3, 4, 'b'), \
-                    (3, 3, 'a'), (1, 2, 'c')])
+            >>> lz77_compressor.decompress([Token(0, 0, 'a'), Token(1, 1, 'c'), \
+                Token(3, 4, 'b'), Token(3, 3, 'a'), Token(1, 2, 'c')])
             'aacaacabcabaaac'
         """
 
         output = ""
 
-        for (offset, length, indicator) in tokens:
-            for _ in range(length):
-                output += output[-offset]
-            output += indicator
+        for token in tokens:
+            for _ in range(token.length):
+                output += output[-token.offset]
+            output += token.indicator
 
         return output
 
-    def _find_encoding_token(self, text: str, search_buffer: str) -> tuple:
+    def _find_encoding_token(self, text: str, search_buffer: str) -> Token:
         """Finds the encoding token for the first character in the text.
 
         Args:
@@ -126,21 +134,21 @@ def _find_encoding_token(self, text: str, search_buffer: str) -> tuple:
             search_buffer (str)
 
         Returns:
-            tuple: Token
+            (offset, length, indicator) (Token)
 
         Tests:
             >>> lz77_compressor = LZ77Compressor(13, 6)
-            >>> lz77_compressor._find_encoding_token("abrarrarrad", "abracad")
-            (7, 4, 'r')
-            >>> lz77_compressor._find_encoding_token("adabrarrarrad", "cabrac")
-            (2, 1, 'd')
+            >>> lz77_compressor._find_encoding_token("abrarrarrad", "abracad").offset
+            7
+            >>> lz77_compressor._find_encoding_token("adabrarrarrad", "cabrac").length
+            1
         """
 
         # Initialise result parameters to default values
         length, offset = 0, 0
 
         if search_buffer == "":
-            return offset, length, text[length]
+            return Token(offset, length, text[length])
 
         for i, character in enumerate(search_buffer):
             found_offset = len(search_buffer) - i
@@ -151,7 +159,7 @@ def _find_encoding_token(self, text: str, search_buffer: str) -> tuple:
                 if found_length >= length:
                     offset, length = found_offset, found_length
 
-        return offset, length, text[length]
+        return Token(offset, length, text[length])
 
     def _match_length_from_index(
         self, text: str, window: str, text_index: int, window_index: int
@@ -192,4 +200,4 @@ def _match_length_from_index(
     TEXT = "cabracadabrarrarrad"
     compressed_text = lz77_compressor.compress(TEXT)
     decompressed_text = lz77_compressor.decompress(compressed_text)
-    assert decompressed_text == TEXT, "The LZ77 agirithm returned the invalid result."
+    assert decompressed_text == TEXT, "The LZ77 algorithm returned the invalid result."

From 41c5a0fd5ab0ae1a2a68ec0c7ba62f21de68e071 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 27 Dec 2022 22:11:48 +0000
Subject: [PATCH 07/17] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 compression/lz77.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/compression/lz77.py b/compression/lz77.py
index 12dc4385f537..66687316f148 100644
--- a/compression/lz77.py
+++ b/compression/lz77.py
@@ -57,7 +57,7 @@ def __init__(self, window_size: int = 13, lookahead_buffer_size: int = 6) -> Non
         self.lookahead_buffer_size = lookahead_buffer_size
         self.search_buffer_size = self.window_size - self.lookahead_buffer_size
 
-    def compress(self, text: str) -> List[Token]:
+    def compress(self, text: str) -> list[Token]:
         """This method compresses given string text using LZ77 compression algorithm.
 
         Args:
@@ -93,7 +93,7 @@ def compress(self, text: str) -> List[Token]:
 
         return output
 
-    def decompress(self, tokens: List[Token]) -> str:
+    def decompress(self, tokens: list[Token]) -> str:
         """This method turns the List of tokens consisting of triplets of the form
         (offset, length, char), into an output string.
 

From 63f28c6f27bb80f1494c8ffaa513366f7ccbc9e2 Mon Sep 17 00:00:00 2001
From: LuciaHarcekova <lucy.harcekova@gmail.com>
Date: Wed, 28 Dec 2022 07:03:50 +0100
Subject: [PATCH 08/17] - add test, hange type rom List to list

---
 compression/lz77.py | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/compression/lz77.py b/compression/lz77.py
index 66687316f148..feebc77e50f1 100644
--- a/compression/lz77.py
+++ b/compression/lz77.py
@@ -28,12 +28,11 @@
 en.wikipedia.org/wiki/LZ77_and_LZ78
 """
 
+from __future__ import annotations
 
 __version__ = "0.1"
 __author__ = "Lucia Harcekova"
 
-from typing import List
-
 
 class Token:
     """
@@ -64,7 +63,17 @@ def compress(self, text: str) -> list[Token]:
             text (str): string that's going to be compressed
 
         Returns:
-            output (List[Token]): the compressed text
+            output (list[Token]): the compressed text
+
+        Returns:
+            (offset, length, indicator) (Token)
+
+        Tests:
+            >>> lz77_compressor = LZ77Compressor(13, 6)
+            >>> len(lz77_compressor.compress("ababcbababaa"))
+            5
+            >>> len(lz77_compressor.compress("aacaacabcabaaac"))
+            5
         """
 
         output = []
@@ -83,10 +92,10 @@ def compress(self, text: str) -> list[Token]:
             #   oldest elements
             search_buffer += text[: token.length + 1]
             if len(search_buffer) > self.search_buffer_size:
-                search_buffer = search_buffer[-self.search_buffer_size :]
+                search_buffer = search_buffer[-self.search_buffer_size:]
 
             # update the text
-            text = text[token.length + 1 :]
+            text = text[token.length + 1:]
 
             # append the token to output
             output.append(token)
@@ -94,11 +103,11 @@ def compress(self, text: str) -> list[Token]:
         return output
 
     def decompress(self, tokens: list[Token]) -> str:
-        """This method turns the List of tokens consisting of triplets of the form
+        """This method turns the list of tokens consisting of triplets of the form
         (offset, length, char), into an output string.
 
         Args:
-            tokens (List[Token]): Tokens (offset, length, char)
+            tokens (list[Token]): Tokens (offset, length, char)
 
         Returns:
             output (str): The decompressed text

From 76b22a21680dc615a20f4586bef410233dd84d4a Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 28 Dec 2022 06:05:41 +0000
Subject: [PATCH 09/17] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 compression/lz77.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/compression/lz77.py b/compression/lz77.py
index feebc77e50f1..98f699bffb87 100644
--- a/compression/lz77.py
+++ b/compression/lz77.py
@@ -92,10 +92,10 @@ def compress(self, text: str) -> list[Token]:
             #   oldest elements
             search_buffer += text[: token.length + 1]
             if len(search_buffer) > self.search_buffer_size:
-                search_buffer = search_buffer[-self.search_buffer_size:]
+                search_buffer = search_buffer[-self.search_buffer_size :]
 
             # update the text
-            text = text[token.length + 1:]
+            text = text[token.length + 1 :]
 
             # append the token to output
             output.append(token)

From dd40cf3cc93cb6045b6450a2987c1249907d559e Mon Sep 17 00:00:00 2001
From: LuciaHarcekova <lucy.harcekova@gmail.com>
Date: Wed, 28 Dec 2022 07:07:52 +0100
Subject: [PATCH 10/17] - remove extra import

---
 compression/lz77.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/compression/lz77.py b/compression/lz77.py
index 98f699bffb87..3b6d37cd93d7 100644
--- a/compression/lz77.py
+++ b/compression/lz77.py
@@ -28,8 +28,6 @@
 en.wikipedia.org/wiki/LZ77_and_LZ78
 """
 
-from __future__ import annotations
-
 __version__ = "0.1"
 __author__ = "Lucia Harcekova"
 

From 153ed96445db46140e23e9fefb0a386050b64c5d Mon Sep 17 00:00:00 2001
From: LuciaHarcekova <lucy.harcekova@gmail.com>
Date: Wed, 28 Dec 2022 07:14:17 +0100
Subject: [PATCH 11/17] - remove extra types in comments

---
 compression/lz77.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/compression/lz77.py b/compression/lz77.py
index 3b6d37cd93d7..06543960c4ed 100644
--- a/compression/lz77.py
+++ b/compression/lz77.py
@@ -58,13 +58,13 @@ def compress(self, text: str) -> list[Token]:
         """This method compresses given string text using LZ77 compression algorithm.
 
         Args:
-            text (str): string that's going to be compressed
+            text: string that's going to be compressed
 
         Returns:
-            output (list[Token]): the compressed text
+            output: the compressed text
 
         Returns:
-            (offset, length, indicator) (Token)
+            token (offset, length, indicator)
 
         Tests:
             >>> lz77_compressor = LZ77Compressor(13, 6)
@@ -105,10 +105,10 @@ def decompress(self, tokens: list[Token]) -> str:
         (offset, length, char), into an output string.
 
         Args:
-            tokens (list[Token]): Tokens (offset, length, char)
+            tokens: list containing triplets (offset, length, char)
 
         Returns:
-            output (str): The decompressed text
+            output: decompressed text
 
         Tests:
             >>> lz77_compressor = LZ77Compressor(13, 6)
@@ -137,11 +137,11 @@ def _find_encoding_token(self, text: str, search_buffer: str) -> Token:
         """Finds the encoding token for the first character in the text.
 
         Args:
-            text (str)
-            search_buffer (str)
+            text
+            search_buffer
 
         Returns:
-            (offset, length, indicator) (Token)
+            (offset, length, indicator)
 
         Tests:
             >>> lz77_compressor = LZ77Compressor(13, 6)
@@ -175,13 +175,13 @@ def _match_length_from_index(
         text_index in text and window_index in window.
 
         Args:
-            text (str): _description_
-            window (str): sliding window
-            text_index (int): index of character in text
-            window_index (int): index of character in sliding window
+            text: _description_
+            window: sliding window
+            text_index: index of character in text
+            window_index: index of character in sliding window
 
         Returns:
-            int: The maximum match between text and window, from given indexes.
+            The maximum match between text and window, from given indexes.
 
         Tests:
             >>> lz77_compressor = LZ77Compressor(13, 6)

From 7bf90967c44e360cb5d683dcf733c518973119c3 Mon Sep 17 00:00:00 2001
From: LuciaHarcekova <lucy.harcekova@gmail.com>
Date: Wed, 28 Dec 2022 08:22:37 +0100
Subject: [PATCH 12/17] - better test

---
 compression/lz77.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/compression/lz77.py b/compression/lz77.py
index 06543960c4ed..c9e2c031cb64 100644
--- a/compression/lz77.py
+++ b/compression/lz77.py
@@ -28,6 +28,7 @@
 en.wikipedia.org/wiki/LZ77_and_LZ78
 """
 
+
 __version__ = "0.1"
 __author__ = "Lucia Harcekova"
 
@@ -43,6 +44,12 @@ def __init__(self, offset: int, length: int, indicator: str) -> None:
         self.length = length
         self.indicator = indicator
 
+    def __repr__(self):
+        return f"({self.offset}, {self.length}, {self.indicator})"
+
+    def __str__(self):
+        return f"({self.offset}, {self.length}, {self.indicator})"
+
 
 class LZ77Compressor:
     """
@@ -64,14 +71,14 @@ def compress(self, text: str) -> list[Token]:
             output: the compressed text
 
         Returns:
-            token (offset, length, indicator)
+            (offset, length, indicator)
 
         Tests:
             >>> lz77_compressor = LZ77Compressor(13, 6)
-            >>> len(lz77_compressor.compress("ababcbababaa"))
-            5
-            >>> len(lz77_compressor.compress("aacaacabcabaaac"))
-            5
+            >>> str(lz77_compressor.compress("ababcbababaa"))
+            '[(0, 0, a), (0, 0, b), (2, 2, c), (4, 3, a), (2, 2, a)]'
+            >>> str(lz77_compressor.compress("aacaacabcabaaac"))
+            '[(0, 0, a), (1, 1, c), (3, 4, b), (3, 3, a), (1, 2, c)]'
         """
 
         output = []
@@ -206,5 +213,6 @@ def _match_length_from_index(
     # Example
     TEXT = "cabracadabrarrarrad"
     compressed_text = lz77_compressor.compress(TEXT)
+    print(lz77_compressor.compress("ababcbababaa"))
     decompressed_text = lz77_compressor.decompress(compressed_text)
     assert decompressed_text == TEXT, "The LZ77 algorithm returned the invalid result."

From 52af3cfc75a93c305cadb5999842511359b7ac3f Mon Sep 17 00:00:00 2001
From: LuciaHarcekova <lucy.harcekova@gmail.com>
Date: Wed, 28 Dec 2022 08:24:09 +0100
Subject: [PATCH 13/17] - edit comments

---
 compression/lz77.py | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/compression/lz77.py b/compression/lz77.py
index c9e2c031cb64..2f11e0f8524a 100644
--- a/compression/lz77.py
+++ b/compression/lz77.py
@@ -71,7 +71,7 @@ def compress(self, text: str) -> list[Token]:
             output: the compressed text
 
         Returns:
-            (offset, length, indicator)
+            Compressed text made of triplets (offset, length, indicator).
 
         Tests:
             >>> lz77_compressor = LZ77Compressor(13, 6)
@@ -143,13 +143,6 @@ def decompress(self, tokens: list[Token]) -> str:
     def _find_encoding_token(self, text: str, search_buffer: str) -> Token:
         """Finds the encoding token for the first character in the text.
 
-        Args:
-            text
-            search_buffer
-
-        Returns:
-            (offset, length, indicator)
-
         Tests:
             >>> lz77_compressor = LZ77Compressor(13, 6)
             >>> lz77_compressor._find_encoding_token("abrarrarrad", "abracad").offset

From 32982841aaa360bb29617868e0d7056f2c98e81e Mon Sep 17 00:00:00 2001
From: LuciaHarcekova <lucy.harcekova@gmail.com>
Date: Wed, 28 Dec 2022 17:30:47 +0100
Subject: [PATCH 14/17] - add return types

---
 compression/lz77.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/compression/lz77.py b/compression/lz77.py
index 2f11e0f8524a..8be0314171d2 100644
--- a/compression/lz77.py
+++ b/compression/lz77.py
@@ -44,10 +44,10 @@ def __init__(self, offset: int, length: int, indicator: str) -> None:
         self.length = length
         self.indicator = indicator
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return f"({self.offset}, {self.length}, {self.indicator})"
 
-    def __str__(self):
+    def __str__(self) -> str:
         return f"({self.offset}, {self.length}, {self.indicator})"
 
 

From b530862009442ea6009f9006b3531e6c1b99637b Mon Sep 17 00:00:00 2001
From: LuciaHarcekova <lucy.harcekova@gmail.com>
Date: Wed, 28 Dec 2022 18:00:54 +0100
Subject: [PATCH 15/17] - add tests for __str__ and __repr__

---
 compression/lz77.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/compression/lz77.py b/compression/lz77.py
index 8be0314171d2..68161b3dabf1 100644
--- a/compression/lz77.py
+++ b/compression/lz77.py
@@ -28,6 +28,7 @@
 en.wikipedia.org/wiki/LZ77_and_LZ78
 """
 
+from __future__ import annotations
 
 __version__ = "0.1"
 __author__ = "Lucia Harcekova"
@@ -45,9 +46,21 @@ def __init__(self, offset: int, length: int, indicator: str) -> None:
         self.indicator = indicator
 
     def __repr__(self) -> str:
+        """
+        Tests:
+            >>> token = Token(5, 6, "a")
+            >>> token.__repr__()
+            '(5, 6, a)'
+        """
         return f"({self.offset}, {self.length}, {self.indicator})"
 
     def __str__(self) -> str:
+        """
+        Tests:
+            >>> token = Token(5, 6, "a")
+            >>> token.__str__()
+            '(5, 6, a)'
+        """
         return f"({self.offset}, {self.length}, {self.indicator})"
 
 

From b891abf349c07882bc2b26579a65b68cd16bbc28 Mon Sep 17 00:00:00 2001
From: Christian Clauss <cclauss@me.com>
Date: Wed, 28 Dec 2022 18:30:52 +0100
Subject: [PATCH 16/17] Update lz77.py

---
 compression/lz77.py | 94 +++++++++++++++++++++++----------------------
 1 file changed, 48 insertions(+), 46 deletions(-)

diff --git a/compression/lz77.py b/compression/lz77.py
index 68161b3dabf1..8b981ed83afc 100644
--- a/compression/lz77.py
+++ b/compression/lz77.py
@@ -28,38 +28,30 @@
 en.wikipedia.org/wiki/LZ77_and_LZ78
 """
 
-from __future__ import annotations
+
+from dataclasses import dataclass
 
 __version__ = "0.1"
 __author__ = "Lucia Harcekova"
 
 
+@dataclass
 class Token:
     """
     Dataclass representing triplet called token consisting of length, offset
     and indicator. This triplet is used during LZ77 compression.
     """
-
-    def __init__(self, offset: int, length: int, indicator: str) -> None:
-        self.offset = offset
-        self.length = length
-        self.indicator = indicator
+    offset: int
+    length: int
+    indicator: str
 
     def __repr__(self) -> str:
         """
-        Tests:
-            >>> token = Token(5, 6, "a")
-            >>> token.__repr__()
-            '(5, 6, a)'
-        """
-        return f"({self.offset}, {self.length}, {self.indicator})"
-
-    def __str__(self) -> str:
-        """
-        Tests:
-            >>> token = Token(5, 6, "a")
-            >>> token.__str__()
-            '(5, 6, a)'
+        >>> token = Token(1, 2, "c")
+        >>> repr(token)
+        '(1, 2, c)'
+        >>> str(token)
+        '(1, 2, c)'
         """
         return f"({self.offset}, {self.length}, {self.indicator})"
 
@@ -75,23 +67,20 @@ def __init__(self, window_size: int = 13, lookahead_buffer_size: int = 6) -> Non
         self.search_buffer_size = self.window_size - self.lookahead_buffer_size
 
     def compress(self, text: str) -> list[Token]:
-        """This method compresses given string text using LZ77 compression algorithm.
+        """
+        Compress the given string text using LZ77 compression algorithm.
 
         Args:
-            text: string that's going to be compressed
-
-        Returns:
-            output: the compressed text
+            text: string to be compressed
 
         Returns:
-            Compressed text made of triplets (offset, length, indicator).
+            output: the compressed text as a list of Tokens
 
-        Tests:
-            >>> lz77_compressor = LZ77Compressor(13, 6)
-            >>> str(lz77_compressor.compress("ababcbababaa"))
-            '[(0, 0, a), (0, 0, b), (2, 2, c), (4, 3, a), (2, 2, a)]'
-            >>> str(lz77_compressor.compress("aacaacabcabaaac"))
-            '[(0, 0, a), (1, 1, c), (3, 4, b), (3, 3, a), (1, 2, c)]'
+        >>> lz77_compressor = LZ77Compressor()
+        >>> str(lz77_compressor.compress("ababcbababaa"))
+        '[(0, 0, a), (0, 0, b), (2, 2, c), (4, 3, a), (2, 2, a)]'
+        >>> str(lz77_compressor.compress("aacaacabcabaaac"))
+        '[(0, 0, a), (1, 1, c), (3, 4, b), (3, 3, a), (1, 2, c)]'
         """
 
         output = []
@@ -121,8 +110,8 @@ def compress(self, text: str) -> list[Token]:
         return output
 
     def decompress(self, tokens: list[Token]) -> str:
-        """This method turns the list of tokens consisting of triplets of the form
-        (offset, length, char), into an output string.
+        """
+        Convert the list of tokens into an output string.
 
         Args:
             tokens: list containing triplets (offset, length, char)
@@ -131,16 +120,16 @@ def decompress(self, tokens: list[Token]) -> str:
             output: decompressed text
 
         Tests:
-            >>> lz77_compressor = LZ77Compressor(13, 6)
-            >>> lz77_compressor.decompress([Token(0, 0, 'c'), Token(0, 0, 'a'), \
-                Token(0, 0, 'b'), Token(0, 0, 'r'), Token(3, 1, 'c'), \
-                Token(2, 1, 'd'), Token(7, 4, 'r'), Token(3, 5, 'd')])
+            >>> lz77_compressor = LZ77Compressor()
+            >>> lz77_compressor.decompress([Token(0, 0, 'c'), Token(0, 0, 'a'),
+            ... Token(0, 0, 'b'), Token(0, 0, 'r'), Token(3, 1, 'c'),
+            ... Token(2, 1, 'd'), Token(7, 4, 'r'), Token(3, 5, 'd')])
             'cabracadabrarrarrad'
-            >>> lz77_compressor.decompress([Token(0, 0, 'a'), Token(0, 0, 'b'), \
-                Token(2, 2, 'c'), Token(4, 3, 'a'), Token(2, 2, 'a')])
+            >>> lz77_compressor.decompress([Token(0, 0, 'a'), Token(0, 0, 'b'),
+            ... Token(2, 2, 'c'), Token(4, 3, 'a'), Token(2, 2, 'a')])
             'ababcbababaa'
-            >>> lz77_compressor.decompress([Token(0, 0, 'a'), Token(1, 1, 'c'), \
-                Token(3, 4, 'b'), Token(3, 3, 'a'), Token(1, 2, 'c')])
+            >>> lz77_compressor.decompress([Token(0, 0, 'a'), Token(1, 1, 'c'),
+            ... Token(3, 4, 'b'), Token(3, 3, 'a'), Token(1, 2, 'c')])
             'aacaacabcabaaac'
         """
 
@@ -157,17 +146,28 @@ def _find_encoding_token(self, text: str, search_buffer: str) -> Token:
         """Finds the encoding token for the first character in the text.
 
         Tests:
-            >>> lz77_compressor = LZ77Compressor(13, 6)
+            >>> lz77_compressor = LZ77Compressor()
             >>> lz77_compressor._find_encoding_token("abrarrarrad", "abracad").offset
             7
             >>> lz77_compressor._find_encoding_token("adabrarrarrad", "cabrac").length
             1
+            >>> lz77_compressor._find_encoding_token("abc", "xyz").offset
+            0
+            >>> lz77_compressor._find_encoding_token("", "xyz").offset
+            Traceback (most recent call last):
+                ...
+            ValueError: We need some text to work with.
+            >>> lz77_compressor._find_encoding_token("abc", "").offset
+            0
         """
 
+        if not text:
+            raise ValueError("We need some text to work with.")
+        
         # Initialise result parameters to default values
         length, offset = 0, 0
 
-        if search_buffer == "":
+        if not search_buffer:
             return Token(offset, length, text[length])
 
         for i, character in enumerate(search_buffer):
@@ -200,11 +200,11 @@ def _match_length_from_index(
             >>> lz77_compressor = LZ77Compressor(13, 6)
             >>> lz77_compressor._match_length_from_index("rarrad", "adabrar", 0, 4)
             5
-            >>> lz77_compressor._match_length_from_index("adabrarrarrad", \
-                    "cabrac", 0, 1)
+            >>> lz77_compressor._match_length_from_index("adabrarrarrad",
+            ...     "cabrac", 0, 1)
             1
         """
-        if text == "" or text[text_index] != window[window_index]:
+        if not text or text[text_index] != window[window_index]:
             return 0
         return 1 + self._match_length_from_index(
             text, window + text[text_index], text_index + 1, window_index + 1
@@ -212,7 +212,9 @@ def _match_length_from_index(
 
 
 if __name__ == "__main__":
+    from doctest import testmod
 
+    testmod()
     # Initialize compressor class
     lz77_compressor = LZ77Compressor(window_size=13, lookahead_buffer_size=6)
 

From bda07c5d29039caf23866846c8dcb40c093bab48 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 28 Dec 2022 17:31:58 +0000
Subject: [PATCH 17/17] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 compression/lz77.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/compression/lz77.py b/compression/lz77.py
index 8b981ed83afc..7c1a6f6a4c19 100644
--- a/compression/lz77.py
+++ b/compression/lz77.py
@@ -41,6 +41,7 @@ class Token:
     Dataclass representing triplet called token consisting of length, offset
     and indicator. This triplet is used during LZ77 compression.
     """
+
     offset: int
     length: int
     indicator: str
@@ -163,7 +164,7 @@ def _find_encoding_token(self, text: str, search_buffer: str) -> Token:
 
         if not text:
             raise ValueError("We need some text to work with.")
-        
+
         # Initialise result parameters to default values
         length, offset = 0, 0