From 2f1704dae579295ea2f47584ef80b4b321a284d7 Mon Sep 17 00:00:00 2001 From: Mandeep Singh <135956602+MannCode@users.noreply.github.com> Date: Sun, 2 Jun 2024 18:27:35 -0700 Subject: [PATCH 1/3] issue #11150 Ensure explicit column selection and data type setting in data reading process. (#11302) * issue #11150 Ensure explicit column selection and data type setting in data reading process. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- machine_learning/sequential_minimum_optimization.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/machine_learning/sequential_minimum_optimization.py b/machine_learning/sequential_minimum_optimization.py index 3abdd6ccbed8..2ebdeb764a80 100644 --- a/machine_learning/sequential_minimum_optimization.py +++ b/machine_learning/sequential_minimum_optimization.py @@ -463,7 +463,11 @@ def test_cancel_data(): with open(r"cancel_data.csv", "w") as f: f.write(content) - data = pd.read_csv(r"cancel_data.csv", header=None) + data = pd.read_csv( + "cancel_data.csv", + header=None, + dtype={0: str}, # Assuming the first column contains string data + ) # 1: pre-processing data del data[data.columns.tolist()[0]] From ffaa976f6c5a5de30e284ae2fc8122f40cd3fa6a Mon Sep 17 00:00:00 2001 From: Harsh buddhdev Date: Sun, 2 Jun 2024 23:00:26 -0400 Subject: [PATCH 2/3] Fixes #9943 (#10252) * added doctest for all_permutations.py * added doctest for all_subsequences.py * added doctest for all_subsequences.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * doctest added * updated * Update backtracking/all_subsequences.py --------- Co-authored-by: Harsh Buddhdev Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Tianyi Zheng --- backtracking/all_permutations.py | 36 ++++++++++++++++++++++ backtracking/all_subsequences.py | 52 +++++++++++++++++++++++++++++++- 2 files changed, 87 insertions(+), 1 deletion(-) diff --git a/backtracking/all_permutations.py b/backtracking/all_permutations.py index c483cd62c99b..f376e6fa0945 100644 --- a/backtracking/all_permutations.py +++ b/backtracking/all_permutations.py @@ -23,6 +23,42 @@ def create_state_space_tree( Creates a state space tree to iterate through each branch using DFS. We know that each state has exactly len(sequence) - index children. It terminates when it reaches the end of the given sequence. + + :param sequence: The input sequence for which permutations are generated. + :param current_sequence: The current permutation being built. + :param index: The current index in the sequence. + :param index_used: list to track which elements are used in permutation. + + Example 1: + >>> sequence = [1, 2, 3] + >>> current_sequence = [] + >>> index_used = [False, False, False] + >>> create_state_space_tree(sequence, current_sequence, 0, index_used) + [1, 2, 3] + [1, 3, 2] + [2, 1, 3] + [2, 3, 1] + [3, 1, 2] + [3, 2, 1] + + Example 2: + >>> sequence = ["A", "B", "C"] + >>> current_sequence = [] + >>> index_used = [False, False, False] + >>> create_state_space_tree(sequence, current_sequence, 0, index_used) + ['A', 'B', 'C'] + ['A', 'C', 'B'] + ['B', 'A', 'C'] + ['B', 'C', 'A'] + ['C', 'A', 'B'] + ['C', 'B', 'A'] + + Example 3: + >>> sequence = [1] + >>> current_sequence = [] + >>> index_used = [False] + >>> create_state_space_tree(sequence, current_sequence, 0, index_used) + [1] """ if index == len(sequence): diff --git a/backtracking/all_subsequences.py b/backtracking/all_subsequences.py index 7844a829d046..18696054eb7e 100644 --- a/backtracking/all_subsequences.py +++ b/backtracking/all_subsequences.py @@ -22,6 +22,56 @@ def create_state_space_tree( Creates a state space tree to iterate through each branch using DFS. We know that each state has exactly two children. It terminates when it reaches the end of the given sequence. + + :param sequence: The input sequence for which subsequences are generated. + :param current_subsequence: The current subsequence being built. + :param index: The current index in the sequence. + + Example: + >>> sequence = [3, 2, 1] + >>> current_subsequence = [] + >>> create_state_space_tree(sequence, current_subsequence, 0) + [] + [1] + [2] + [2, 1] + [3] + [3, 1] + [3, 2] + [3, 2, 1] + + >>> sequence = ["A", "B"] + >>> current_subsequence = [] + >>> create_state_space_tree(sequence, current_subsequence, 0) + [] + ['B'] + ['A'] + ['A', 'B'] + + >>> sequence = [] + >>> current_subsequence = [] + >>> create_state_space_tree(sequence, current_subsequence, 0) + [] + + >>> sequence = [1, 2, 3, 4] + >>> current_subsequence = [] + >>> create_state_space_tree(sequence, current_subsequence, 0) + [] + [4] + [3] + [3, 4] + [2] + [2, 4] + [2, 3] + [2, 3, 4] + [1] + [1, 4] + [1, 3] + [1, 3, 4] + [1, 2] + [1, 2, 4] + [1, 2, 3] + [1, 2, 3, 4] """ if index == len(sequence): @@ -35,7 +85,7 @@ def create_state_space_tree( if __name__ == "__main__": - seq: list[Any] = [3, 1, 2, 4] + seq: list[Any] = [1, 2, 3] generate_all_subsequences(seq) seq.clear() From c919579869ae9f57d6878336af6de6bc9a001c61 Mon Sep 17 00:00:00 2001 From: AtomicVar Date: Mon, 3 Jun 2024 11:15:01 +0800 Subject: [PATCH 3/3] Add KL divergence loss algorithm (#11238) * Add KL divergence loss algorithm * Apply suggestions from code review --------- Co-authored-by: Tianyi Zheng --- machine_learning/loss_functions.py | 34 ++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/machine_learning/loss_functions.py b/machine_learning/loss_functions.py index 16e5a3278b73..150035661eb7 100644 --- a/machine_learning/loss_functions.py +++ b/machine_learning/loss_functions.py @@ -629,6 +629,40 @@ def smooth_l1_loss(y_true: np.ndarray, y_pred: np.ndarray, beta: float = 1.0) -> return np.mean(loss) +def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float: + """ + Calculate the Kullback-Leibler divergence (KL divergence) loss between true labels + and predicted probabilities. + + KL divergence loss quantifies dissimilarity between true labels and predicted + probabilities. It's often used in training generative models. + + KL = Σ(y_true * ln(y_true / y_pred)) + + Reference: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence + + Parameters: + - y_true: True class probabilities + - y_pred: Predicted class probabilities + + >>> true_labels = np.array([0.2, 0.3, 0.5]) + >>> predicted_probs = np.array([0.3, 0.3, 0.4]) + >>> kullback_leibler_divergence(true_labels, predicted_probs) + 0.030478754035472025 + >>> true_labels = np.array([0.2, 0.3, 0.5]) + >>> predicted_probs = np.array([0.3, 0.3, 0.4, 0.5]) + >>> kullback_leibler_divergence(true_labels, predicted_probs) + Traceback (most recent call last): + ... + ValueError: Input arrays must have the same length. + """ + if len(y_true) != len(y_pred): + raise ValueError("Input arrays must have the same length.") + + kl_loss = y_true * np.log(y_true / y_pred) + return np.sum(kl_loss) + + if __name__ == "__main__": import doctest