From 5fe2275e2bb152fe94ad6a2ab824ea79737309ac Mon Sep 17 00:00:00 2001 From: Rohan R Bharadwaj Date: Thu, 11 Nov 2021 16:50:11 +0530 Subject: [PATCH 01/12] Type annotations for `strings/autocomplete_using_trie.py` --- strings/autocomplete_using_trie.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/strings/autocomplete_using_trie.py b/strings/autocomplete_using_trie.py index 8aa0dc223680..b91b0780c75d 100644 --- a/strings/autocomplete_using_trie.py +++ b/strings/autocomplete_using_trie.py @@ -1,11 +1,14 @@ +from __future__ import annotations + + END = "#" class Trie: - def __init__(self): - self._trie = {} + def __init__(self) -> None: + self._trie: dict = {} - def insert_word(self, text): + def insert_word(self, text: str) -> None: trie = self._trie for char in text: if char not in trie: @@ -13,7 +16,7 @@ def insert_word(self, text): trie = trie[char] trie[END] = True - def find_word(self, prefix): + def find_word(self, prefix: str) -> tuple | list: trie = self._trie for char in prefix: if char in trie: @@ -22,7 +25,7 @@ def find_word(self, prefix): return [] return self._elements(trie) - def _elements(self, d): + def _elements(self, d: dict) -> tuple: result = [] for c, v in d.items(): if c == END: @@ -39,26 +42,28 @@ def _elements(self, d): trie.insert_word(word) -def autocomplete_using_trie(s): +def autocomplete_using_trie(string: str) -> tuple: """ >>> trie = Trie() >>> for word in words: ... trie.insert_word(word) ... >>> matches = autocomplete_using_trie("de") - "detergent " in matches True "dog " in matches False """ - suffixes = trie.find_word(s) - return tuple(s + w for w in suffixes) + suffixes = trie.find_word(string) + return tuple(string + word for word in suffixes) -def main(): +def main() -> None: print(autocomplete_using_trie("de")) if __name__ == "__main__": + import doctest + + doctest.testmod() main() From b4c3a9df1c11adf6faf76b1308be3dfb7c85aa18 Mon Sep 17 00:00:00 2001 From: Rohan R Bharadwaj Date: Thu, 11 Nov 2021 16:55:15 +0530 Subject: [PATCH 02/12] Update autocomplete_using_trie.py --- strings/autocomplete_using_trie.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/strings/autocomplete_using_trie.py b/strings/autocomplete_using_trie.py index b91b0780c75d..758260292a30 100644 --- a/strings/autocomplete_using_trie.py +++ b/strings/autocomplete_using_trie.py @@ -1,6 +1,5 @@ from __future__ import annotations - END = "#" @@ -49,9 +48,9 @@ def autocomplete_using_trie(string: str) -> tuple: ... trie.insert_word(word) ... >>> matches = autocomplete_using_trie("de") - "detergent " in matches + >>> "detergent " in matches True - "dog " in matches + >>> "dog " in matches False """ suffixes = trie.find_word(string) From ca7750a8d194f7bbd68772fb248cc6e1d8812999 Mon Sep 17 00:00:00 2001 From: Rohan R Bharadwaj Date: Thu, 11 Nov 2021 17:11:18 +0530 Subject: [PATCH 03/12] Update detecting_english_programmatically.py --- strings/detecting_english_programmatically.py | 53 ++++++++++--------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/strings/detecting_english_programmatically.py b/strings/detecting_english_programmatically.py index 44fb7191866b..2d82c0eddee9 100644 --- a/strings/detecting_english_programmatically.py +++ b/strings/detecting_english_programmatically.py @@ -4,55 +4,56 @@ LETTERS_AND_SPACE = UPPERLETTERS + UPPERLETTERS.lower() + " \t\n" -def loadDictionary(): +def load_dictionary() -> dict[str, None]: path = os.path.split(os.path.realpath(__file__)) - englishWords = {} - with open(path[0] + "/dictionary.txt") as dictionaryFile: - for word in dictionaryFile.read().split("\n"): - englishWords[word] = None - return englishWords + english_words: dict[str, None] = {} + with open(path[0] + "/dictionary.txt") as dictionary_file: + for word in dictionary_file.read().split("\n"): + english_words[word] = None + return english_words -ENGLISH_WORDS = loadDictionary() +ENGLISH_WORDS = load_dictionary() -def getEnglishCount(message): +def get_english_count(message: str) -> float: message = message.upper() - message = removeNonLetters(message) - possibleWords = message.split() + message = remove_non_letters(message) + possible_words = message.split() - if possibleWords == []: + if possible_words == []: return 0.0 matches = 0 - for word in possibleWords: + for word in possible_words: if word in ENGLISH_WORDS: matches += 1 - return float(matches) / len(possibleWords) + return float(matches) / len(possible_words) -def removeNonLetters(message): - lettersOnly = [] +def remove_non_letters(message: str) -> str: + letters_only = [] for symbol in message: if symbol in LETTERS_AND_SPACE: - lettersOnly.append(symbol) - return "".join(lettersOnly) + letters_only.append(symbol) + return "".join(letters_only) -def isEnglish(message, wordPercentage=20, letterPercentage=85): +def is_english( + message: str, word_percentage: int = 20, letter_percentage: int = 85 +)-> bool: """ - >>> isEnglish('Hello World') + >>> is_english('Hello World') True - - >>> isEnglish('llold HorWd') + >>> is_english('llold HorWd') False """ - wordsMatch = getEnglishCount(message) * 100 >= wordPercentage - numLetters = len(removeNonLetters(message)) - messageLettersPercentage = (float(numLetters) / len(message)) * 100 - lettersMatch = messageLettersPercentage >= letterPercentage - return wordsMatch and lettersMatch + words_match = get_english_count(message) * 100 >= word_percentage + num_letters = len(remove_non_letters(message)) + message_letters_percentage = (float(num_letters) / len(message)) * 100 + letters_match = message_letters_percentage >= letter_percentage + return words_match and letters_match if __name__ == "__main__": From 774dae8a7efb7fda880d4a418812b908a65a929e Mon Sep 17 00:00:00 2001 From: Rohan R Bharadwaj Date: Thu, 11 Nov 2021 17:16:22 +0530 Subject: [PATCH 04/12] Update detecting_english_programmatically.py --- strings/detecting_english_programmatically.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/strings/detecting_english_programmatically.py b/strings/detecting_english_programmatically.py index 2d82c0eddee9..aa18db21027a 100644 --- a/strings/detecting_english_programmatically.py +++ b/strings/detecting_english_programmatically.py @@ -42,7 +42,7 @@ def remove_non_letters(message: str) -> str: def is_english( message: str, word_percentage: int = 20, letter_percentage: int = 85 -)-> bool: +) -> bool: """ >>> is_english('Hello World') True From 0b591e6e90eb1ede77056e7f45b1c4cbbef37fad Mon Sep 17 00:00:00 2001 From: Rohan R Bharadwaj Date: Thu, 11 Nov 2021 20:32:38 +0530 Subject: [PATCH 05/12] Update frequency_finder.py --- strings/frequency_finder.py | 96 ++++++++++++++----------------------- 1 file changed, 36 insertions(+), 60 deletions(-) diff --git a/strings/frequency_finder.py b/strings/frequency_finder.py index 48760a9deb09..04938ca192c0 100644 --- a/strings/frequency_finder.py +++ b/strings/frequency_finder.py @@ -1,7 +1,9 @@ # Frequency Finder +import string + # frequency taken from http://en.wikipedia.org/wiki/Letter_frequency -englishLetterFreq = { +english_letter_freq = { "E": 12.70, "T": 9.06, "A": 8.17, @@ -33,85 +35,59 @@ LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" -def getLetterCount(message): - letterCount = { - "A": 0, - "B": 0, - "C": 0, - "D": 0, - "E": 0, - "F": 0, - "G": 0, - "H": 0, - "I": 0, - "J": 0, - "K": 0, - "L": 0, - "M": 0, - "N": 0, - "O": 0, - "P": 0, - "Q": 0, - "R": 0, - "S": 0, - "T": 0, - "U": 0, - "V": 0, - "W": 0, - "X": 0, - "Y": 0, - "Z": 0, - } +def get_letter_count(message: str) -> dict: + letter_count = {letter: 0 for letter in string.ascii_uppercase} for letter in message.upper(): if letter in LETTERS: - letterCount[letter] += 1 + letter_count[letter] += 1 - return letterCount + return letter_count -def getItemAtIndexZero(x): +def get_item_at_index_zero(x: tuple) -> str: return x[0] -def getFrequencyOrder(message): - letterToFreq = getLetterCount(message) - freqToLetter = {} +def get_frequency_order(message: str) -> str: + letter_to_freq = get_letter_count(message) + freq_to_letter: dict[int, list] = { + freq: [] for letter, freq in letter_to_freq.items() + } for letter in LETTERS: - if letterToFreq[letter] not in freqToLetter: - freqToLetter[letterToFreq[letter]] = [letter] - else: - freqToLetter[letterToFreq[letter]].append(letter) + freq_to_letter[letter_to_freq[letter]].append(letter) + + freq_to_letter_str: dict[int, str] = {} - for freq in freqToLetter: - freqToLetter[freq].sort(key=ETAOIN.find, reverse=True) - freqToLetter[freq] = "".join(freqToLetter[freq]) + for freq in freq_to_letter: + freq_to_letter[freq].sort(key=ETAOIN.find, reverse=True) + freq_to_letter_str[freq] = "".join(freq_to_letter[freq]) - freqPairs = list(freqToLetter.items()) - freqPairs.sort(key=getItemAtIndexZero, reverse=True) + freq_pairs = list(freq_to_letter_str.items()) + freq_pairs.sort(key=get_item_at_index_zero, reverse=True) - freqOrder = [] - for freqPair in freqPairs: - freqOrder.append(freqPair[1]) + freq_order = [] + for freq_pair in freq_pairs: + freq_order.append(freq_pair[1]) - return "".join(freqOrder) + return "".join(freq_order) -def englishFreqMatchScore(message): +def english_freq_match_score(message: str) -> int: """ - >>> englishFreqMatchScore('Hello World') + >>> english_freq_match_score('Hello World') 1 """ - freqOrder = getFrequencyOrder(message) - matchScore = 0 - for commonLetter in ETAOIN[:6]: - if commonLetter in freqOrder[:6]: - matchScore += 1 + freq_order = get_frequency_order(message) + match_score = 0 + for common_letter in ETAOIN[:6]: + if common_letter in freq_order[:6]: + match_score += 1 - for uncommonLetter in ETAOIN[-6:]: - if uncommonLetter in freqOrder[-6:]: - matchScore += 1 + for uncommon_letter in ETAOIN[-6:]: + if uncommon_letter in freq_order[-6:]: + match_score += 1 - return matchScore + return match_score if __name__ == "__main__": From c524df712a6d50471334a7f3d2dc2bd13dd658e6 Mon Sep 17 00:00:00 2001 From: Rohan R Bharadwaj Date: Thu, 11 Nov 2021 20:39:28 +0530 Subject: [PATCH 06/12] Update frequency_finder.py --- strings/frequency_finder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/strings/frequency_finder.py b/strings/frequency_finder.py index 04938ca192c0..edd0bc6cbdc9 100644 --- a/strings/frequency_finder.py +++ b/strings/frequency_finder.py @@ -55,7 +55,7 @@ def get_frequency_order(message: str) -> str: } for letter in LETTERS: freq_to_letter[letter_to_freq[letter]].append(letter) - + freq_to_letter_str: dict[int, str] = {} for freq in freq_to_letter: From 74bb0a60bbd2e032b8d4bbd7fd34a7dff3b14d25 Mon Sep 17 00:00:00 2001 From: Rohan R Bharadwaj Date: Thu, 11 Nov 2021 22:46:40 +0530 Subject: [PATCH 07/12] Update frequency_finder.py --- strings/frequency_finder.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/strings/frequency_finder.py b/strings/frequency_finder.py index edd0bc6cbdc9..f4ad04f0db5b 100644 --- a/strings/frequency_finder.py +++ b/strings/frequency_finder.py @@ -35,7 +35,7 @@ LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" -def get_letter_count(message: str) -> dict: +def get_letter_count(message: str) -> dict[str, int]: letter_count = {letter: 0 for letter in string.ascii_uppercase} for letter in message.upper(): if letter in LETTERS: @@ -44,13 +44,13 @@ def get_letter_count(message: str) -> dict: return letter_count -def get_item_at_index_zero(x: tuple) -> str: +def get_item_at_index_zero(x: tuple[str, int]) -> str: return x[0] def get_frequency_order(message: str) -> str: letter_to_freq = get_letter_count(message) - freq_to_letter: dict[int, list] = { + freq_to_letter: dict[int, list[str]] = { freq: [] for letter, freq in letter_to_freq.items() } for letter in LETTERS: @@ -65,7 +65,7 @@ def get_frequency_order(message: str) -> str: freq_pairs = list(freq_to_letter_str.items()) freq_pairs.sort(key=get_item_at_index_zero, reverse=True) - freq_order = [] + freq_order: list[str] = [] for freq_pair in freq_pairs: freq_order.append(freq_pair[1]) From 91f879a341e097b5678bb088f80b1b350942c482 Mon Sep 17 00:00:00 2001 From: Rohan R Bharadwaj Date: Thu, 11 Nov 2021 22:47:56 +0530 Subject: [PATCH 08/12] Update word_occurrence.py --- strings/word_occurrence.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/strings/word_occurrence.py b/strings/word_occurrence.py index 4acfa41adf11..4e0b3ff34ccf 100644 --- a/strings/word_occurrence.py +++ b/strings/word_occurrence.py @@ -1,6 +1,7 @@ # Created by sarathkaul on 17/11/19 # Modified by Arkadip Bhattacharya(@darkmatter18) on 20/04/2020 from collections import defaultdict +from typing import DefaultDict def word_occurence(sentence: str) -> dict: @@ -14,7 +15,7 @@ def word_occurence(sentence: str) -> dict: >>> dict(word_occurence("Two spaces")) {'Two': 1, 'spaces': 1} """ - occurrence: dict = defaultdict(int) + occurrence: DefaultDict[str, int] = defaultdict(int) # Creating a dictionary containing count of each word for word in sentence.split(): occurrence[word] += 1 From a4f17d47229f0d5158528d229bfaff41462c032a Mon Sep 17 00:00:00 2001 From: Rohan R Bharadwaj Date: Thu, 11 Nov 2021 22:50:17 +0530 Subject: [PATCH 09/12] Update frequency_finder.py --- strings/frequency_finder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/strings/frequency_finder.py b/strings/frequency_finder.py index f4ad04f0db5b..7e9151760a5a 100644 --- a/strings/frequency_finder.py +++ b/strings/frequency_finder.py @@ -44,7 +44,7 @@ def get_letter_count(message: str) -> dict[str, int]: return letter_count -def get_item_at_index_zero(x: tuple[str, int]) -> str: +def get_item_at_index_zero(x: tuple) -> str: return x[0] From b74542a6f78124a77d7220c114a41d65a6f84950 Mon Sep 17 00:00:00 2001 From: Rohan R Bharadwaj Date: Thu, 11 Nov 2021 22:55:59 +0530 Subject: [PATCH 10/12] Update z_function.py --- strings/z_function.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/strings/z_function.py b/strings/z_function.py index d8d823a37efb..7b9316add920 100644 --- a/strings/z_function.py +++ b/strings/z_function.py @@ -10,7 +10,7 @@ """ -def z_function(input_str: str) -> list: +def z_function(input_str: str) -> list[int]: """ For the given string this function computes value for each index, which represents the maximal length substring starting from the index @@ -27,7 +27,7 @@ def z_function(input_str: str) -> list: >>> z_function("zxxzxxz") [0, 0, 0, 4, 0, 0, 1] """ - z_result = [0] * len(input_str) + z_result = [0 for i in range(len(input_str)] # initialize interval's left pointer and right pointer left_pointer, right_pointer = 0, 0 @@ -49,7 +49,7 @@ def z_function(input_str: str) -> list: return z_result -def go_next(i, z_result, s): +def go_next(i: int, z_result: list[int], s: str) -> bool: """ Check if we have to move forward to the next characters or not """ From 649c7dc84891a55332232f80be9bdf928c833b49 Mon Sep 17 00:00:00 2001 From: Rohan R Bharadwaj Date: Thu, 11 Nov 2021 22:57:41 +0530 Subject: [PATCH 11/12] Update z_function.py --- strings/z_function.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/strings/z_function.py b/strings/z_function.py index 7b9316add920..e77ba8dab5df 100644 --- a/strings/z_function.py +++ b/strings/z_function.py @@ -27,7 +27,7 @@ def z_function(input_str: str) -> list[int]: >>> z_function("zxxzxxz") [0, 0, 0, 4, 0, 0, 1] """ - z_result = [0 for i in range(len(input_str)] + z_result = [0 for i in range(len(input_str))] # initialize interval's left pointer and right pointer left_pointer, right_pointer = 0, 0 From c1254187c4151a85689bf1a75e60e9f7721068a2 Mon Sep 17 00:00:00 2001 From: Rohan R Bharadwaj Date: Fri, 12 Nov 2021 17:51:01 +0530 Subject: [PATCH 12/12] Update frequency_finder.py --- strings/frequency_finder.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/strings/frequency_finder.py b/strings/frequency_finder.py index 7e9151760a5a..7024be17b8ab 100644 --- a/strings/frequency_finder.py +++ b/strings/frequency_finder.py @@ -65,9 +65,7 @@ def get_frequency_order(message: str) -> str: freq_pairs = list(freq_to_letter_str.items()) freq_pairs.sort(key=get_item_at_index_zero, reverse=True) - freq_order: list[str] = [] - for freq_pair in freq_pairs: - freq_order.append(freq_pair[1]) + freq_order: list[str] = [freq_pair[1] for freq_pair in freq_pairs] return "".join(freq_order)