AlgorithmAndLeetCode · pull · Jan 18, 2024 · Jan 18, 2024 · Jan 18, 2024
diff --git a/data_structures/heap/skew_heap.py b/data_structures/heap/skew_heap.py
@@ -21,14 +21,55 @@ def __init__(self, value: T) -> None:
 
     @property
     def value(self) -> T:
-        """Return the value of the node."""
+        """
+        Return the value of the node.
+
+        >>> SkewNode(0).value
+        0
+        >>> SkewNode(3.14159).value
+        3.14159
+        >>> SkewNode("hello").value
+        'hello'
+        >>> SkewNode(None).value
+
+        >>> SkewNode(True).value
+        True
+        >>> SkewNode([]).value
+        []
+        >>> SkewNode({}).value
+        {}
+        >>> SkewNode(set()).value
+        set()
+        >>> SkewNode(0.0).value
+        0.0
+        >>> SkewNode(-1e-10).value
+        -1e-10
+        >>> SkewNode(10).value
+        10
+        >>> SkewNode(-10.5).value
+        -10.5
+        >>> SkewNode().value
+        Traceback (most recent call last):
+        ...
+        TypeError: SkewNode.__init__() missing 1 required positional argument: 'value'
+        """
         return self._value
 
     @staticmethod
     def merge(
         root1: SkewNode[T] | None, root2: SkewNode[T] | None
     ) -> SkewNode[T] | None:
-        """Merge 2 nodes together."""
+        """
+        Merge 2 nodes together.
+        >>> SkewNode.merge(SkewNode(10),SkewNode(-10.5)).value
+        -10.5
+        >>> SkewNode.merge(SkewNode(10),SkewNode(10.5)).value
+        10
+        >>> SkewNode.merge(SkewNode(10),SkewNode(10)).value
+        10
+        >>> SkewNode.merge(SkewNode(-100),SkewNode(-10.5)).value
+        -100
+        """
         if not root1:
             return root2
 

diff --git a/machine_learning/loss_functions.py b/machine_learning/loss_functions.py
@@ -148,6 +148,108 @@ def categorical_cross_entropy(
     return -np.sum(y_true * np.log(y_pred))
 
 
+def categorical_focal_cross_entropy(
+    y_true: np.ndarray,
+    y_pred: np.ndarray,
+    alpha: np.ndarray = None,
+    gamma: float = 2.0,
+    epsilon: float = 1e-15,
+) -> float:
+    """
+    Calculate the mean categorical focal cross-entropy (CFCE) loss between true
+    labels and predicted probabilities for multi-class classification.
+
+    CFCE loss is a generalization of binary focal cross-entropy for multi-class
+    classification. It addresses class imbalance by focusing on hard examples.
+
+    CFCE = -Σ alpha * (1 - y_pred)**gamma * y_true * log(y_pred)
+
+    Reference: [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf)
+
+    Parameters:
+    - y_true: True labels in one-hot encoded form.
+    - y_pred: Predicted probabilities for each class.
+    - alpha: Array of weighting factors for each class.
+    - gamma: Focusing parameter for modulating the loss (default: 2.0).
+    - epsilon: Small constant to avoid numerical instability.
+
+    Returns:
+    - The mean categorical focal cross-entropy loss.
+
+    >>> true_labels = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
+    >>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1], [0.0, 0.1, 0.9]])
+    >>> alpha = np.array([0.6, 0.2, 0.7])
+    >>> categorical_focal_cross_entropy(true_labels, pred_probs, alpha)
+    0.0025966118981496423
+
+    >>> true_labels = np.array([[0, 1, 0], [0, 0, 1]])
+    >>> pred_probs = np.array([[0.05, 0.95, 0], [0.1, 0.8, 0.1]])
+    >>> alpha = np.array([0.25, 0.25, 0.25])
+    >>> categorical_focal_cross_entropy(true_labels, pred_probs, alpha)
+    0.23315276982014324
+
+    >>> true_labels = np.array([[1, 0], [0, 1]])
+    >>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]])
+    >>> categorical_cross_entropy(true_labels, pred_probs)
+    Traceback (most recent call last):
+        ...
+    ValueError: Input arrays must have the same shape.
+
+    >>> true_labels = np.array([[2, 0, 1], [1, 0, 0]])
+    >>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]])
+    >>> categorical_focal_cross_entropy(true_labels, pred_probs)
+    Traceback (most recent call last):
+        ...
+    ValueError: y_true must be one-hot encoded.
+
+    >>> true_labels = np.array([[1, 0, 1], [1, 0, 0]])
+    >>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]])
+    >>> categorical_focal_cross_entropy(true_labels, pred_probs)
+    Traceback (most recent call last):
+        ...
+    ValueError: y_true must be one-hot encoded.
+
+    >>> true_labels = np.array([[1, 0, 0], [0, 1, 0]])
+    >>> pred_probs = np.array([[0.9, 0.1, 0.1], [0.2, 0.7, 0.1]])
+    >>> categorical_focal_cross_entropy(true_labels, pred_probs)
+    Traceback (most recent call last):
+        ...
+    ValueError: Predicted probabilities must sum to approximately 1.
+
+    >>> true_labels = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
+    >>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1], [0.0, 0.1, 0.9]])
+    >>> alpha = np.array([0.6, 0.2])
+    >>> categorical_focal_cross_entropy(true_labels, pred_probs, alpha)
+    Traceback (most recent call last):
+        ...
+    ValueError: Length of alpha must match the number of classes.
+    """
+    if y_true.shape != y_pred.shape:
+        raise ValueError("Shape of y_true and y_pred must be the same.")
+
+    if alpha is None:
+        alpha = np.ones(y_true.shape[1])
+
+    if np.any((y_true != 0) & (y_true != 1)) or np.any(y_true.sum(axis=1) != 1):
+        raise ValueError("y_true must be one-hot encoded.")
+
+    if len(alpha) != y_true.shape[1]:
+        raise ValueError("Length of alpha must match the number of classes.")
+
+    if not np.all(np.isclose(np.sum(y_pred, axis=1), 1, rtol=epsilon, atol=epsilon)):
+        raise ValueError("Predicted probabilities must sum to approximately 1.")
+
+    # Clip predicted probabilities to avoid log(0)
+    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
+
+    # Calculate loss for each class and sum across classes
+    cfce_loss = -np.sum(
+        alpha * np.power(1 - y_pred, gamma) * y_true * np.log(y_pred), axis=1
+    )
+
+    return np.mean(cfce_loss)
+
+
 def hinge_loss(y_true: np.ndarray, y_pred: np.ndarray) -> float:
     """
     Calculate the mean hinge loss for between true labels and predicted probabilities