From 05a5cdacc3cfd9814ad6f5cb2d4dec86109b640a Mon Sep 17 00:00:00 2001 From: Suyash Dongre <109069262+Suyashd999@users.noreply.github.com> Date: Thu, 18 Jan 2024 18:09:27 +0530 Subject: [PATCH 1/2] Added doctest to skew_heap.py (#11147) * Added doctest to skew_heap.py * Update skew_heap.py * Update data_structures/heap/skew_heap.py Co-authored-by: Saptadeep Banerjee <69459134+imSanko@users.noreply.github.com> * Update skew_heap.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update skew_heap.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: Saptadeep Banerjee <69459134+imSanko@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- data_structures/heap/skew_heap.py | 45 +++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/data_structures/heap/skew_heap.py b/data_structures/heap/skew_heap.py index c4c13b08276a..0839db711cb1 100644 --- a/data_structures/heap/skew_heap.py +++ b/data_structures/heap/skew_heap.py @@ -21,14 +21,55 @@ def __init__(self, value: T) -> None: @property def value(self) -> T: - """Return the value of the node.""" + """ + Return the value of the node. + + >>> SkewNode(0).value + 0 + >>> SkewNode(3.14159).value + 3.14159 + >>> SkewNode("hello").value + 'hello' + >>> SkewNode(None).value + + >>> SkewNode(True).value + True + >>> SkewNode([]).value + [] + >>> SkewNode({}).value + {} + >>> SkewNode(set()).value + set() + >>> SkewNode(0.0).value + 0.0 + >>> SkewNode(-1e-10).value + -1e-10 + >>> SkewNode(10).value + 10 + >>> SkewNode(-10.5).value + -10.5 + >>> SkewNode().value + Traceback (most recent call last): + ... + TypeError: SkewNode.__init__() missing 1 required positional argument: 'value' + """ return self._value @staticmethod def merge( root1: SkewNode[T] | None, root2: SkewNode[T] | None ) -> SkewNode[T] | None: - """Merge 2 nodes together.""" + """ + Merge 2 nodes together. + >>> SkewNode.merge(SkewNode(10),SkewNode(-10.5)).value + -10.5 + >>> SkewNode.merge(SkewNode(10),SkewNode(10.5)).value + 10 + >>> SkewNode.merge(SkewNode(10),SkewNode(10)).value + 10 + >>> SkewNode.merge(SkewNode(-100),SkewNode(-10.5)).value + -100 + """ if not root1: return root2 From 3952ba703a5b84a37891a001037c5c366d20941a Mon Sep 17 00:00:00 2001 From: AtomicVar Date: Thu, 18 Jan 2024 20:41:29 +0800 Subject: [PATCH 2/2] Add categorical focal cross-entropy loss algorithm (#11248) --- machine_learning/loss_functions.py | 102 +++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/machine_learning/loss_functions.py b/machine_learning/loss_functions.py index 36a760326f3d..f05fa0cbe686 100644 --- a/machine_learning/loss_functions.py +++ b/machine_learning/loss_functions.py @@ -148,6 +148,108 @@ def categorical_cross_entropy( return -np.sum(y_true * np.log(y_pred)) +def categorical_focal_cross_entropy( + y_true: np.ndarray, + y_pred: np.ndarray, + alpha: np.ndarray = None, + gamma: float = 2.0, + epsilon: float = 1e-15, +) -> float: + """ + Calculate the mean categorical focal cross-entropy (CFCE) loss between true + labels and predicted probabilities for multi-class classification. + + CFCE loss is a generalization of binary focal cross-entropy for multi-class + classification. It addresses class imbalance by focusing on hard examples. + + CFCE = -Σ alpha * (1 - y_pred)**gamma * y_true * log(y_pred) + + Reference: [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf) + + Parameters: + - y_true: True labels in one-hot encoded form. + - y_pred: Predicted probabilities for each class. + - alpha: Array of weighting factors for each class. + - gamma: Focusing parameter for modulating the loss (default: 2.0). + - epsilon: Small constant to avoid numerical instability. + + Returns: + - The mean categorical focal cross-entropy loss. + + >>> true_labels = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) + >>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1], [0.0, 0.1, 0.9]]) + >>> alpha = np.array([0.6, 0.2, 0.7]) + >>> categorical_focal_cross_entropy(true_labels, pred_probs, alpha) + 0.0025966118981496423 + + >>> true_labels = np.array([[0, 1, 0], [0, 0, 1]]) + >>> pred_probs = np.array([[0.05, 0.95, 0], [0.1, 0.8, 0.1]]) + >>> alpha = np.array([0.25, 0.25, 0.25]) + >>> categorical_focal_cross_entropy(true_labels, pred_probs, alpha) + 0.23315276982014324 + + >>> true_labels = np.array([[1, 0], [0, 1]]) + >>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]]) + >>> categorical_cross_entropy(true_labels, pred_probs) + Traceback (most recent call last): + ... + ValueError: Input arrays must have the same shape. + + >>> true_labels = np.array([[2, 0, 1], [1, 0, 0]]) + >>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]]) + >>> categorical_focal_cross_entropy(true_labels, pred_probs) + Traceback (most recent call last): + ... + ValueError: y_true must be one-hot encoded. + + >>> true_labels = np.array([[1, 0, 1], [1, 0, 0]]) + >>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]]) + >>> categorical_focal_cross_entropy(true_labels, pred_probs) + Traceback (most recent call last): + ... + ValueError: y_true must be one-hot encoded. + + >>> true_labels = np.array([[1, 0, 0], [0, 1, 0]]) + >>> pred_probs = np.array([[0.9, 0.1, 0.1], [0.2, 0.7, 0.1]]) + >>> categorical_focal_cross_entropy(true_labels, pred_probs) + Traceback (most recent call last): + ... + ValueError: Predicted probabilities must sum to approximately 1. + + >>> true_labels = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) + >>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1], [0.0, 0.1, 0.9]]) + >>> alpha = np.array([0.6, 0.2]) + >>> categorical_focal_cross_entropy(true_labels, pred_probs, alpha) + Traceback (most recent call last): + ... + ValueError: Length of alpha must match the number of classes. + """ + if y_true.shape != y_pred.shape: + raise ValueError("Shape of y_true and y_pred must be the same.") + + if alpha is None: + alpha = np.ones(y_true.shape[1]) + + if np.any((y_true != 0) & (y_true != 1)) or np.any(y_true.sum(axis=1) != 1): + raise ValueError("y_true must be one-hot encoded.") + + if len(alpha) != y_true.shape[1]: + raise ValueError("Length of alpha must match the number of classes.") + + if not np.all(np.isclose(np.sum(y_pred, axis=1), 1, rtol=epsilon, atol=epsilon)): + raise ValueError("Predicted probabilities must sum to approximately 1.") + + # Clip predicted probabilities to avoid log(0) + y_pred = np.clip(y_pred, epsilon, 1 - epsilon) + + # Calculate loss for each class and sum across classes + cfce_loss = -np.sum( + alpha * np.power(1 - y_pred, gamma) * y_true * np.log(y_pred), axis=1 + ) + + return np.mean(cfce_loss) + + def hinge_loss(y_true: np.ndarray, y_pred: np.ndarray) -> float: """ Calculate the mean hinge loss for between true labels and predicted probabilities