From 6177c807b553275c021312ff0b6aa19fbf8bb341 Mon Sep 17 00:00:00 2001
From: John lee <johnleenimh@gmail.com>
Date: Tue, 2 Apr 2024 16:08:34 +0100
Subject: [PATCH 01/33] fix misspecified version reference (#3228)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: leej3 <“johnleenimh@gmail.com>
---
 docs/source/contrib/handlers.rst | 2 +-
 docs/source/contrib/metrics.rst  | 4 ++--
 ignite/engine/__init__.py        | 6 +++---
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/source/contrib/handlers.rst b/docs/source/contrib/handlers.rst
index 214f23a843ba..3f1af41856bd 100644
--- a/docs/source/contrib/handlers.rst
+++ b/docs/source/contrib/handlers.rst
@@ -28,5 +28,5 @@ Time profilers [deprecated]
 Loggers [deprecated]
 --------------------
 
-.. deprecated:: 0.5.1
+.. deprecated:: 0.5.0
     Loggers moved to :ref:`Loggers`.
diff --git a/docs/source/contrib/metrics.rst b/docs/source/contrib/metrics.rst
index fdaa0432f5cb..3c5603e6ec5f 100644
--- a/docs/source/contrib/metrics.rst
+++ b/docs/source/contrib/metrics.rst
@@ -4,12 +4,12 @@ ignite.contrib.metrics
 Contrib module metrics [deprecated]
 -----------------------------------
 
-.. deprecated:: 0.5.1
+.. deprecated:: 0.5.0
     All metrics moved to :ref:`Complete list of metrics`.
 
 
 Regression metrics [deprecated]
 --------------------------------
 
-.. deprecated:: 0.5.1
+.. deprecated:: 0.5.0
     All metrics moved to :ref:`Complete list of metrics`.
diff --git a/ignite/engine/__init__.py b/ignite/engine/__init__.py
index 865be7e7800d..cbaac4e16cb7 100644
--- a/ignite/engine/__init__.py
+++ b/ignite/engine/__init__.py
@@ -96,7 +96,7 @@ def supervised_training_step(
         Added `model_transform` to transform model's output
     .. versionchanged:: 0.4.13
         Added `model_fn` to customize model's application on the sample
-    .. versionchanged:: 0.5.1
+    .. versionchanged:: 0.5.0
         Added support for ``mps`` device
     """
 
@@ -551,7 +551,7 @@ def output_transform_fn(x, y, y_pred, loss):
         Added ``model_transform`` to transform model's output
     .. versionchanged:: 0.4.13
         Added `model_fn` to customize model's application on the sample
-    .. versionchanged:: 0.5.1
+    .. versionchanged:: 0.5.0
         Added support for ``mps`` device
     """
 
@@ -799,7 +799,7 @@ def create_supervised_evaluator(
         Added ``model_transform`` to transform model's output
     .. versionchanged:: 0.4.13
         Added `model_fn` to customize model's application on the sample
-    .. versionchanged:: 0.5.1
+    .. versionchanged:: 0.5.0
         Added support for ``mps`` device
     """
     device_type = device.type if isinstance(device, torch.device) else device

From 2c79b7ed9b7a0acf247498b772a34f636cebbae3 Mon Sep 17 00:00:00 2001
From: Kazuki Adachi <kazuki.adachi.xy@gmail.com>
Date: Tue, 9 Apr 2024 01:47:47 +0900
Subject: [PATCH 02/33] Add MutualInformation Metric (#3230)

* add MutualInformationMetric

* update test for MutualInformation metric

* format code for MutualInformation Metric

* update test for MutualInformation metric

* update test

* update docstring

* fix device compatibility

* fix test_accumulator_device for MutualInformation metric

* update doc

* modify docstring

* modify formula of docstring

* update formula of docstring

* update formula of docstring

* remove unused import

* add reference

* commonalize redundant code

* modify decorator

* add a comment

* fix decorator
---
 docs/source/metrics.rst                       |   1 +
 ignite/metrics/__init__.py                    |   2 +
 ignite/metrics/entropy.py                     |   6 +-
 ignite/metrics/mutual_information.py          |  94 ++++++++++++
 .../ignite/metrics/test_mutual_information.py | 145 ++++++++++++++++++
 5 files changed, 247 insertions(+), 1 deletion(-)
 create mode 100644 ignite/metrics/mutual_information.py
 create mode 100644 tests/ignite/metrics/test_mutual_information.py

diff --git a/docs/source/metrics.rst b/docs/source/metrics.rst
index 0696cc3070ae..f6742f73be55 100644
--- a/docs/source/metrics.rst
+++ b/docs/source/metrics.rst
@@ -337,6 +337,7 @@ Complete list of metrics
     metric.Metric
     metrics_lambda.MetricsLambda
     MultiLabelConfusionMatrix
+    MutualInformation
     precision.Precision
     PSNR
     recall.Recall
diff --git a/ignite/metrics/__init__.py b/ignite/metrics/__init__.py
index 1b23257d4aa0..05ce97c40664 100644
--- a/ignite/metrics/__init__.py
+++ b/ignite/metrics/__init__.py
@@ -21,6 +21,7 @@
 from ignite.metrics.metric import BatchFiltered, BatchWise, EpochWise, Metric, MetricUsage
 from ignite.metrics.metrics_lambda import MetricsLambda
 from ignite.metrics.multilabel_confusion_matrix import MultiLabelConfusionMatrix
+from ignite.metrics.mutual_information import MutualInformation
 from ignite.metrics.nlp.bleu import Bleu
 from ignite.metrics.nlp.rouge import Rouge, RougeL, RougeN
 from ignite.metrics.precision import Precision
@@ -57,6 +58,7 @@
     "mIoU",
     "JaccardIndex",
     "MultiLabelConfusionMatrix",
+    "MutualInformation",
     "Precision",
     "PSNR",
     "Recall",
diff --git a/ignite/metrics/entropy.py b/ignite/metrics/entropy.py
index b3d0cff21b6c..4208bf205b3e 100644
--- a/ignite/metrics/entropy.py
+++ b/ignite/metrics/entropy.py
@@ -80,9 +80,13 @@ def update(self, output: Sequence[torch.Tensor]) -> None:
 
         prob = F.softmax(y_pred, dim=1)
         log_prob = F.log_softmax(y_pred, dim=1)
+
+        self._update(prob, log_prob)
+
+    def _update(self, prob: torch.Tensor, log_prob: torch.Tensor) -> None:
         entropy_sum = -torch.sum(prob * log_prob)
         self._sum_of_entropies += entropy_sum.to(self._device)
-        self._num_examples += y_pred.shape[0]
+        self._num_examples += prob.shape[0]
 
     @sync_all_reduce("_sum_of_entropies", "_num_examples")
     def compute(self) -> float:
diff --git a/ignite/metrics/mutual_information.py b/ignite/metrics/mutual_information.py
new file mode 100644
index 000000000000..2cca768ce43b
--- /dev/null
+++ b/ignite/metrics/mutual_information.py
@@ -0,0 +1,94 @@
+import torch
+
+from ignite.exceptions import NotComputableError
+from ignite.metrics import Entropy
+from ignite.metrics.metric import reinit__is_reduced, sync_all_reduce
+
+__all__ = ["MutualInformation"]
+
+
+class MutualInformation(Entropy):
+    r"""Calculates the `mutual information <https://en.wikipedia.org/wiki/Mutual_information>`_
+    between input :math:`X` and prediction :math:`Y`.
+
+    .. math::
+       \begin{align*}
+            I(X;Y) &= H(Y) - H(Y|X) = H \left( \frac{1}{N}\sum_{i=1}^N \hat{\mathbf{p}}_i \right)
+            - \frac{1}{N}\sum_{i=1}^N H(\hat{\mathbf{p}}_i), \\
+            H(\mathbf{p}) &= -\sum_{c=1}^C p_c \log p_c.
+       \end{align*}
+
+    where :math:`\hat{\mathbf{p}}_i` is the prediction probability vector for :math:`i`-th input,
+    and :math:`H(\mathbf{p})` is the entropy of :math:`\mathbf{p}`.
+
+    Intuitively, this metric measures how well input data are clustered by classes in the feature space [1].
+
+    [1] https://proceedings.mlr.press/v70/hu17b.html
+
+    - ``update`` must receive output of the form ``(y_pred, y)`` while ``y`` is not used in this metric.
+    - ``y_pred`` is expected to be the unnormalized logits for each class. :math:`(B, C)` (classification)
+      or :math:`(B, C, ...)` (e.g., image segmentation) shapes are allowed.
+
+    Args:
+        output_transform: a callable that is used to transform the
+            :class:`~ignite.engine.engine.Engine`'s ``process_function``'s output into the
+            form expected by the metric. This can be useful if, for example, you have a multi-output model and
+            you want to compute the metric with respect to one of the outputs.
+            By default, metrics require the output as ``(y_pred, y)`` or ``{'y_pred': y_pred, 'y': y}``.
+        device: specifies which device updates are accumulated on. Setting the
+            metric's device to be the same as your ``update`` arguments ensures the ``update`` method is
+            non-blocking. By default, CPU.
+
+    Examples:
+        To use with ``Engine`` and ``process_function``, simply attach the metric instance to the engine.
+        The output of the engine's ``process_function`` needs to be in the format of
+        ``(y_pred, y)`` or ``{'y_pred': y_pred, 'y': y, ...}``. If not, ``output_tranform`` can be added
+        to the metric to transform the output into the form expected by the metric.
+
+        For more information on how metric works with :class:`~ignite.engine.engine.Engine`, visit :ref:`attach-engine`.
+
+        .. include:: defaults.rst
+            :start-after: :orphan:
+
+        .. testcode::
+
+            metric = MutualInformation()
+            metric.attach(default_evaluator, 'mutual_information')
+            y_true = torch.tensor([0, 1, 2])  # not considered in the MutualInformation metric.
+            y_pred = torch.tensor([
+                [ 0.0000,  0.6931,  1.0986],
+                [ 1.3863,  1.6094,  1.6094],
+                [ 0.0000, -2.3026, -2.3026]
+            ])
+            state = default_evaluator.run([[y_pred, y_true]])
+            print(state.metrics['mutual_information'])
+
+        .. testoutput::
+
+           0.18599730730056763
+    """
+
+    _state_dict_all_req_keys = ("_sum_of_probabilities",)
+
+    @reinit__is_reduced
+    def reset(self) -> None:
+        super().reset()
+        self._sum_of_probabilities = torch.tensor(0.0, device=self._device)
+
+    def _update(self, prob: torch.Tensor, log_prob: torch.Tensor) -> None:
+        super()._update(prob, log_prob)
+        # We can't use += below as _sum_of_probabilities can be a scalar and prob.sum(dim=0) is a vector
+        self._sum_of_probabilities = self._sum_of_probabilities + prob.sum(dim=0).to(self._device)
+
+    @sync_all_reduce("_sum_of_probabilities", "_sum_of_entropies", "_num_examples")
+    def compute(self) -> float:
+        n = self._num_examples
+        if n == 0:
+            raise NotComputableError("MutualInformation must have at least one example before it can be computed.")
+
+        marginal_prob = self._sum_of_probabilities / n
+        marginal_ent = -(marginal_prob * torch.log(marginal_prob)).sum()
+        conditional_ent = self._sum_of_entropies / n
+        mi = marginal_ent - conditional_ent
+        mi = torch.clamp(mi, min=0.0)  # mutual information cannot be negative
+        return float(mi.item())
diff --git a/tests/ignite/metrics/test_mutual_information.py b/tests/ignite/metrics/test_mutual_information.py
new file mode 100644
index 000000000000..18d58d300bfc
--- /dev/null
+++ b/tests/ignite/metrics/test_mutual_information.py
@@ -0,0 +1,145 @@
+from typing import Tuple
+
+import numpy as np
+import pytest
+import torch
+from scipy.special import softmax
+from scipy.stats import entropy
+from torch import Tensor
+
+import ignite.distributed as idist
+
+from ignite.engine import Engine
+from ignite.exceptions import NotComputableError
+from ignite.metrics import MutualInformation
+
+
+def np_mutual_information(np_y_pred: np.ndarray) -> float:
+    prob = softmax(np_y_pred, axis=1)
+    marginal_ent = entropy(np.mean(prob, axis=0))
+    conditional_ent = np.mean(entropy(prob, axis=1))
+    return max(0.0, marginal_ent - conditional_ent)
+
+
+def test_zero_sample():
+    mi = MutualInformation()
+    with pytest.raises(
+        NotComputableError, match=r"MutualInformation must have at least one example before it can be computed"
+    ):
+        mi.compute()
+
+
+def test_invalid_shape():
+    mi = MutualInformation()
+    y_pred = torch.randn(10).float()
+    with pytest.raises(ValueError, match=r"y_pred must be in the shape of \(B, C\) or \(B, C, ...\), got"):
+        mi.update((y_pred, None))
+
+
+@pytest.fixture(params=list(range(4)))
+def test_case(request):
+    return [
+        (torch.randn((100, 10)).float(), torch.randint(0, 10, size=[100]), 1),
+        (torch.rand((100, 500)).float(), torch.randint(0, 500, size=[100]), 1),
+        # updated batches
+        (torch.normal(0.0, 5.0, size=(100, 10)).float(), torch.randint(0, 10, size=[100]), 16),
+        (torch.normal(5.0, 3.0, size=(100, 200)).float(), torch.randint(0, 200, size=[100]), 16),
+        # image segmentation
+        (torch.randn((100, 5, 32, 32)).float(), torch.randint(0, 5, size=(100, 32, 32)), 16),
+        (torch.randn((100, 5, 224, 224)).float(), torch.randint(0, 5, size=(100, 224, 224)), 16),
+    ][request.param]
+
+
+@pytest.mark.parametrize("n_times", range(5))
+def test_compute(n_times, test_case: Tuple[Tensor, Tensor, int]):
+    mi = MutualInformation()
+
+    y_pred, y, batch_size = test_case
+
+    mi.reset()
+    if batch_size > 1:
+        n_iters = y.shape[0] // batch_size + 1
+        for i in range(n_iters):
+            idx = i * batch_size
+            mi.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size]))
+    else:
+        mi.update((y_pred, y))
+
+    np_res = np_mutual_information(y_pred.numpy())
+    res = mi.compute()
+
+    assert isinstance(res, float)
+    assert pytest.approx(np_res, rel=1e-4) == res
+
+
+def test_accumulator_detached():
+    mi = MutualInformation()
+
+    y_pred = torch.tensor([[2.0, 3.0], [-2.0, -1.0]], requires_grad=True)
+    y = torch.zeros(2)
+    mi.update((y_pred, y))
+
+    assert not mi._sum_of_probabilities.requires_grad
+
+
+@pytest.mark.usefixtures("distributed")
+class TestDistributed:
+    def test_integration(self):
+        tol = 1e-4
+        n_iters = 100
+        batch_size = 10
+        n_cls = 50
+        device = idist.device()
+        rank = idist.get_rank()
+        torch.manual_seed(12 + rank)
+
+        metric_devices = [torch.device("cpu")]
+        if device.type != "xla":
+            metric_devices.append(device)
+
+        for metric_device in metric_devices:
+            y_true = torch.randint(0, n_cls, size=[n_iters * batch_size], dtype=torch.long).to(device)
+            y_preds = torch.normal(0.0, 3.0, size=(n_iters * batch_size, n_cls), dtype=torch.float).to(device)
+
+            engine = Engine(
+                lambda e, i: (
+                    y_preds[i * batch_size : (i + 1) * batch_size],
+                    y_true[i * batch_size : (i + 1) * batch_size],
+                )
+            )
+
+            m = MutualInformation(device=metric_device)
+            m.attach(engine, "mutual_information")
+
+            data = list(range(n_iters))
+            engine.run(data=data, max_epochs=1)
+
+            y_preds = idist.all_gather(y_preds)
+            y_true = idist.all_gather(y_true)
+
+            assert "mutual_information" in engine.state.metrics
+            res = engine.state.metrics["mutual_information"]
+
+            true_res = np_mutual_information(y_preds.cpu().numpy())
+
+            assert pytest.approx(true_res, rel=tol) == res
+
+    def test_accumulator_device(self):
+        device = idist.device()
+        metric_devices = [torch.device("cpu")]
+        if device.type != "xla":
+            metric_devices.append(device)
+        for metric_device in metric_devices:
+            mi = MutualInformation(device=metric_device)
+
+            devices = (mi._device, mi._sum_of_probabilities.device)
+            for dev in devices:
+                assert dev == metric_device, f"{type(dev)}:{dev} vs {type(metric_device)}:{metric_device}"
+
+            y_pred = torch.tensor([[2.0, 3.0], [-2.0, -1.0]], requires_grad=True)
+            y = torch.zeros(2)
+            mi.update((y_pred, y))
+
+            devices = (mi._device, mi._sum_of_probabilities.device)
+            for dev in devices:
+                assert dev == metric_device, f"{type(dev)}:{dev} vs {type(metric_device)}:{metric_device}"

From f431e60b09743dc8d99b7e5f32e234f46a2a920d Mon Sep 17 00:00:00 2001
From: Kazuki Adachi <kazuki.adachi.xy@gmail.com>
Date: Thu, 18 Apr 2024 05:10:11 +0900
Subject: [PATCH 03/33] Add divergence metrics (#3232)

* add KLDivergence metric

* add JSDivergence

* fix variable name

* update docstring for JSDivergence

* Update ignite/metrics/js_divergence.py

Co-authored-by: vfdev <vfdev.5@gmail.com>

* Update ignite/metrics/kl_divergence.py

Co-authored-by: vfdev <vfdev.5@gmail.com>

* swap ground truth and prediction

* swap the definitions of p and q

---------

Co-authored-by: vfdev <vfdev.5@gmail.com>
---
 docs/source/metrics.rst                    |   2 +
 ignite/metrics/__init__.py                 |   4 +
 ignite/metrics/js_divergence.py            |  87 +++++++++++
 ignite/metrics/kl_divergence.py            | 102 +++++++++++++
 tests/ignite/metrics/test_js_divergence.py | 159 +++++++++++++++++++++
 tests/ignite/metrics/test_kl_divergence.py | 158 ++++++++++++++++++++
 6 files changed, 512 insertions(+)
 create mode 100644 ignite/metrics/js_divergence.py
 create mode 100644 ignite/metrics/kl_divergence.py
 create mode 100644 tests/ignite/metrics/test_js_divergence.py
 create mode 100644 tests/ignite/metrics/test_kl_divergence.py

diff --git a/docs/source/metrics.rst b/docs/source/metrics.rst
index f6742f73be55..a7f90b754d96 100644
--- a/docs/source/metrics.rst
+++ b/docs/source/metrics.rst
@@ -353,6 +353,8 @@ Complete list of metrics
     FID
     CosineSimilarity
     Entropy
+    KLDivergence
+    JSDivergence
     AveragePrecision
     CohenKappa
     GpuInfo
diff --git a/ignite/metrics/__init__.py b/ignite/metrics/__init__.py
index 05ce97c40664..2cc55aace661 100644
--- a/ignite/metrics/__init__.py
+++ b/ignite/metrics/__init__.py
@@ -14,6 +14,8 @@
 from ignite.metrics.gan.fid import FID
 from ignite.metrics.gan.inception_score import InceptionScore
 from ignite.metrics.gpu_info import GpuInfo
+from ignite.metrics.js_divergence import JSDivergence
+from ignite.metrics.kl_divergence import KLDivergence
 from ignite.metrics.loss import Loss
 from ignite.metrics.mean_absolute_error import MeanAbsoluteError
 from ignite.metrics.mean_pairwise_distance import MeanPairwiseDistance
@@ -57,6 +59,8 @@
     "InceptionScore",
     "mIoU",
     "JaccardIndex",
+    "JSDivergence",
+    "KLDivergence",
     "MultiLabelConfusionMatrix",
     "MutualInformation",
     "Precision",
diff --git a/ignite/metrics/js_divergence.py b/ignite/metrics/js_divergence.py
new file mode 100644
index 000000000000..1bd37cfedc69
--- /dev/null
+++ b/ignite/metrics/js_divergence.py
@@ -0,0 +1,87 @@
+import torch
+import torch.nn.functional as F
+
+from ignite.exceptions import NotComputableError
+from ignite.metrics.kl_divergence import KLDivergence
+from ignite.metrics.metric import sync_all_reduce
+
+__all__ = ["JSDivergence"]
+
+
+class JSDivergence(KLDivergence):
+    r"""Calculates the mean of `Jensen-Shannon (JS) divergence
+    <https://en.wikipedia.org/wiki/Jensen%E2%80%93Shannon_divergence>`_.
+
+    .. math::
+       \begin{align*}
+           D_\text{JS}(\mathbf{p}_i \| \mathbf{q}_i) &= \frac{1}{2} D_\text{KL}(\mathbf{p}_i \| \mathbf{m}_i)
+           + \frac{1}{2} D_\text{KL}(\mathbf{q}_i \| \mathbf{m}_i), \\
+           \mathbf{m}_i &= \frac{1}{2}(\mathbf{p}_i + \mathbf{q}_i), \\
+           D_\text{KL}(\mathbf{p}_i \| \mathbf{q}_i) &= \sum_{c=1}^C p_{i,c} \log \frac{p_{i,c}}{q_{i,c}}.
+       \end{align*}
+
+    where :math:`\mathbf{p}_i` and :math:`\mathbf{q}_i` are the ground truth and prediction probability tensors,
+    and :math:`D_\text{KL}` is the KL-divergence.
+
+    - ``update`` must receive output of the form ``(y_pred, y)``.
+    - ``y_pred`` and ``y`` are expected to be the unnormalized logits for each class. :math:`(B, C)` (classification)
+      or :math:`(B, C, ...)` (e.g., image segmentation) shapes are allowed.
+
+    Args:
+        output_transform: a callable that is used to transform the
+            :class:`~ignite.engine.engine.Engine`'s ``process_function``'s output into the
+            form expected by the metric. This can be useful if, for example, you have a multi-output model and
+            you want to compute the metric with respect to one of the outputs.
+            By default, metrics require the output as ``(y_pred, y)`` or ``{'y_pred': y_pred, 'y': y}``.
+        device: specifies which device updates are accumulated on. Setting the
+            metric's device to be the same as your ``update`` arguments ensures the ``update`` method is
+            non-blocking. By default, CPU.
+
+    Examples:
+        To use with ``Engine`` and ``process_function``, simply attach the metric instance to the engine.
+        The output of the engine's ``process_function`` needs to be in the format of
+        ``(y_pred, y)`` or ``{'y_pred': y_pred, 'y': y, ...}``. If not, ``output_tranform`` can be added
+        to the metric to transform the output into the form expected by the metric.
+
+        For more information on how metric works with :class:`~ignite.engine.engine.Engine`, visit :ref:`attach-engine`.
+
+        .. include:: defaults.rst
+            :start-after: :orphan:
+
+        .. testcode::
+
+            metric = JSDivergence()
+            metric.attach(default_evaluator, 'js-div')
+            y_true = torch.tensor([
+                [ 0.0000, -2.3026, -2.3026],
+                [ 1.3863,  1.6094,  1.6094],
+                [ 0.0000,  0.6931,  1.0986]
+            ])
+            y_pred = torch.tensor([
+                [ 0.0000,  0.6931,  1.0986],
+                [ 1.3863,  1.6094,  1.6094],
+                [ 0.0000, -2.3026, -2.3026]
+            ])
+            state = default_evaluator.run([[y_pred, y_true]])
+            print(state.metrics['js-div'])
+
+        .. testoutput::
+
+           0.16266516844431558
+    """
+
+    def _update(self, y_pred: torch.Tensor, y: torch.Tensor) -> None:
+        m_prob = (F.softmax(y_pred, dim=1) + F.softmax(y, dim=1)) / 2
+        m_log = m_prob.log()
+        y_pred = F.log_softmax(y_pred, dim=1)
+        y = F.log_softmax(y, dim=1)
+        self._sum_of_kl += (
+            F.kl_div(m_log, y_pred, log_target=True, reduction="sum")
+            + F.kl_div(m_log, y, log_target=True, reduction="sum")
+        ).to(self._device)
+
+    @sync_all_reduce("_sum_of_kl", "_num_examples")
+    def compute(self) -> float:
+        if self._num_examples == 0:
+            raise NotComputableError("JSDivergence must have at least one example before it can be computed.")
+        return self._sum_of_kl.item() / (self._num_examples * 2)
diff --git a/ignite/metrics/kl_divergence.py b/ignite/metrics/kl_divergence.py
new file mode 100644
index 000000000000..99f6cbcfa849
--- /dev/null
+++ b/ignite/metrics/kl_divergence.py
@@ -0,0 +1,102 @@
+from typing import Sequence
+
+import torch
+import torch.nn.functional as F
+
+from ignite.exceptions import NotComputableError
+from ignite.metrics.metric import Metric, reinit__is_reduced, sync_all_reduce
+
+__all__ = ["KLDivergence"]
+
+
+class KLDivergence(Metric):
+    r"""Calculates the mean of `Kullback-Leibler (KL) divergence
+    <https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence>`_.
+
+    .. math:: D_\text{KL}(\mathbf{p}_i \| \mathbf{q}_i) = \sum_{c=1}^C p_{i,c} \log \frac{p_{i,c}}{q_{i,c}}
+
+    where :math:`\mathbf{p}_i` and :math:`\mathbf{q}_i` are the ground truth and prediction probability tensors.
+
+    - ``update`` must receive output of the form ``(y_pred, y)``.
+    - ``y_pred`` and ``y`` are expected to be the unnormalized logits for each class. :math:`(B, C)` (classification)
+      or :math:`(B, C, ...)` (e.g., image segmentation) shapes are allowed.
+
+    Args:
+        output_transform: a callable that is used to transform the
+            :class:`~ignite.engine.engine.Engine`'s ``process_function``'s output into the
+            form expected by the metric. This can be useful if, for example, you have a multi-output model and
+            you want to compute the metric with respect to one of the outputs.
+            By default, metrics require the output as ``(y_pred, y)`` or ``{'y_pred': y_pred, 'y': y}``.
+        device: specifies which device updates are accumulated on. Setting the
+            metric's device to be the same as your ``update`` arguments ensures the ``update`` method is
+            non-blocking. By default, CPU.
+
+    Examples:
+        To use with ``Engine`` and ``process_function``, simply attach the metric instance to the engine.
+        The output of the engine's ``process_function`` needs to be in the format of
+        ``(y_pred, y)`` or ``{'y_pred': y_pred, 'y': y, ...}``. If not, ``output_tranform`` can be added
+        to the metric to transform the output into the form expected by the metric.
+
+        For more information on how metric works with :class:`~ignite.engine.engine.Engine`, visit :ref:`attach-engine`.
+
+        .. include:: defaults.rst
+            :start-after: :orphan:
+
+        .. testcode::
+
+            metric = KLDivergence()
+            metric.attach(default_evaluator, 'kl-div')
+            y_true = torch.tensor([
+                [ 0.0000, -2.3026, -2.3026],
+                [ 1.3863,  1.6094,  1.6094],
+                [ 0.0000,  0.6931,  1.0986]
+            ])
+            y_pred = torch.tensor([
+                [ 0.0000,  0.6931,  1.0986],
+                [ 1.3863,  1.6094,  1.6094],
+                [ 0.0000, -2.3026, -2.3026]
+            ])
+            state = default_evaluator.run([[y_pred, y_true]])
+            print(state.metrics['kl-div'])
+
+        .. testoutput::
+
+           0.7220296859741211
+    """
+
+    _state_dict_all_req_keys = ("_sum_of_kl", "_num_examples")
+
+    @reinit__is_reduced
+    def reset(self) -> None:
+        self._sum_of_kl = torch.tensor(0.0, device=self._device)
+        self._num_examples = 0
+
+    @reinit__is_reduced
+    def update(self, output: Sequence[torch.Tensor]) -> None:
+        y_pred, y = output[0].detach(), output[1].detach()
+        if y_pred.shape != y.shape:
+            raise ValueError(f"y_pred and y must be in the same shape, got {y_pred.shape} != {y.shape}.")
+
+        if y_pred.ndim >= 3:
+            num_classes = y_pred.shape[1]
+            # (B, C, ...) -> (B, ..., C) -> (B*..., C)
+            # regarding as B*... predictions
+            y_pred = y_pred.movedim(1, -1).reshape(-1, num_classes)
+            y = y.movedim(1, -1).reshape(-1, num_classes)
+        elif y_pred.ndim == 1:
+            raise ValueError(f"y_pred must be in the shape of (B, C) or (B, C, ...), got {y_pred.shape}.")
+
+        self._num_examples += y_pred.shape[0]
+        self._update(y_pred, y)
+
+    def _update(self, y_pred: torch.Tensor, y: torch.Tensor) -> None:
+        y_pred = F.log_softmax(y_pred, dim=1)
+        y = F.log_softmax(y, dim=1)
+        kl_sum = F.kl_div(y_pred, y, log_target=True, reduction="sum")
+        self._sum_of_kl += kl_sum.to(self._device)
+
+    @sync_all_reduce("_sum_of_kl", "_num_examples")
+    def compute(self) -> float:
+        if self._num_examples == 0:
+            raise NotComputableError("KLDivergence must have at least one example before it can be computed.")
+        return self._sum_of_kl.item() / self._num_examples
diff --git a/tests/ignite/metrics/test_js_divergence.py b/tests/ignite/metrics/test_js_divergence.py
new file mode 100644
index 000000000000..ac4721ef99d9
--- /dev/null
+++ b/tests/ignite/metrics/test_js_divergence.py
@@ -0,0 +1,159 @@
+from typing import Tuple
+
+import numpy as np
+import pytest
+import torch
+from scipy.spatial.distance import jensenshannon
+from scipy.special import softmax
+from torch import Tensor
+
+import ignite.distributed as idist
+from ignite.engine import Engine
+from ignite.exceptions import NotComputableError
+from ignite.metrics import JSDivergence
+
+
+def scipy_js_div(np_y_pred: np.ndarray, np_y: np.ndarray) -> float:
+    y_pred_prob = softmax(np_y_pred, axis=1)
+    y_prob = softmax(np_y, axis=1)
+    # jensenshannon computes the sqrt of the JS divergence
+    js_mean = np.mean(np.square(jensenshannon(y_pred_prob, y_prob, axis=1)))
+    return js_mean
+
+
+def test_zero_sample():
+    js_div = JSDivergence()
+    with pytest.raises(
+        NotComputableError, match=r"JSDivergence must have at least one example before it can be computed"
+    ):
+        js_div.compute()
+
+
+def test_shape_mismatch():
+    js_div = JSDivergence()
+    y_pred = torch.tensor([[2.0, 3.0], [-2.0, 1.0]], dtype=torch.float)
+    y = torch.tensor([[-2.0, 1.0]], dtype=torch.float)
+    with pytest.raises(ValueError, match=r"y_pred and y must be in the same shape, got"):
+        js_div.update((y_pred, y))
+
+
+def test_invalid_shape():
+    js_div = JSDivergence()
+    y_pred = torch.tensor([2.0, 3.0], dtype=torch.float)
+    y = torch.tensor([4.0, 5.0], dtype=torch.float)
+    with pytest.raises(ValueError, match=r"y_pred must be in the shape of \(B, C\) or \(B, C, ...\), got"):
+        js_div.update((y_pred, y))
+
+
+@pytest.fixture(params=list(range(4)))
+def test_case(request):
+    return [
+        (torch.randn((100, 10)), torch.rand((100, 10)), 1),
+        (torch.rand((100, 500)), torch.randn((100, 500)), 1),
+        # updated batches
+        (torch.normal(0.0, 5.0, size=(100, 10)), torch.rand((100, 10)), 16),
+        (torch.normal(5.0, 3.0, size=(100, 200)), torch.rand((100, 200)), 16),
+        # image segmentation
+        (torch.randn((100, 5, 32, 32)), torch.rand((100, 5, 32, 32)), 16),
+        (torch.rand((100, 5, 224, 224)), torch.randn((100, 5, 224, 224)), 16),
+    ][request.param]
+
+
+@pytest.mark.parametrize("n_times", range(5))
+def test_compute(n_times, test_case: Tuple[Tensor, Tensor, int]):
+    y_pred, y, batch_size = test_case
+
+    js_div = JSDivergence()
+
+    js_div.reset()
+    if batch_size > 1:
+        n_iters = y.shape[0] // batch_size + 1
+        for i in range(n_iters):
+            idx = i * batch_size
+            js_div.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size]))
+    else:
+        js_div.update((y_pred, y))
+
+    res = js_div.compute()
+
+    np_y_pred = y_pred.numpy()
+    np_y = y.numpy()
+
+    np_res = scipy_js_div(np_y_pred, np_y)
+
+    assert isinstance(res, float)
+    assert pytest.approx(np_res, rel=1e-4) == res
+
+
+def test_accumulator_detached():
+    js_div = JSDivergence()
+
+    y_pred = torch.tensor([[2.0, 3.0], [-2.0, 1.0]], dtype=torch.float)
+    y = torch.tensor([[-2.0, 1.0], [2.0, 3.0]], dtype=torch.float)
+    js_div.update((y_pred, y))
+
+    assert not js_div._sum_of_kl.requires_grad
+
+
+@pytest.mark.usefixtures("distributed")
+class TestDistributed:
+    def test_integration(self):
+        tol = 1e-4
+        n_iters = 100
+        batch_size = 10
+        n_dims = 100
+
+        rank = idist.get_rank()
+        torch.manual_seed(12 + rank)
+
+        device = idist.device()
+        metric_devices = [torch.device("cpu")]
+        if device.type != "xla":
+            metric_devices.append(device)
+
+        for metric_device in metric_devices:
+            y_true = torch.randn((n_iters * batch_size, n_dims)).float().to(device)
+            y_preds = torch.normal(2.0, 3.0, size=(n_iters * batch_size, n_dims)).float().to(device)
+
+            engine = Engine(
+                lambda e, i: (
+                    y_preds[i * batch_size : (i + 1) * batch_size],
+                    y_true[i * batch_size : (i + 1) * batch_size],
+                )
+            )
+
+            m = JSDivergence(device=metric_device)
+            m.attach(engine, "js_div")
+
+            data = list(range(n_iters))
+            engine.run(data=data, max_epochs=1)
+
+            y_preds = idist.all_gather(y_preds)
+            y_true = idist.all_gather(y_true)
+
+            assert "js_div" in engine.state.metrics
+            res = engine.state.metrics["js_div"]
+
+            y_true_np = y_true.cpu().numpy()
+            y_preds_np = y_preds.cpu().numpy()
+            true_res = scipy_js_div(y_preds_np, y_true_np)
+
+            assert pytest.approx(true_res, rel=tol) == res
+
+    def test_accumulator_device(self):
+        device = idist.device()
+        metric_devices = [torch.device("cpu")]
+        if device.type != "xla":
+            metric_devices.append(device)
+        for metric_device in metric_devices:
+            js_div = JSDivergence(device=metric_device)
+
+            for dev in (js_div._device, js_div._sum_of_kl.device):
+                assert dev == metric_device, f"{type(dev)}:{dev} vs {type(metric_device)}:{metric_device}"
+
+            y_pred = torch.tensor([[2.0, 3.0], [-2.0, 1.0]]).float()
+            y = torch.ones(2, 2).float()
+            js_div.update((y_pred, y))
+
+            for dev in (js_div._device, js_div._sum_of_kl.device):
+                assert dev == metric_device, f"{type(dev)}:{dev} vs {type(metric_device)}:{metric_device}"
diff --git a/tests/ignite/metrics/test_kl_divergence.py b/tests/ignite/metrics/test_kl_divergence.py
new file mode 100644
index 000000000000..6c9512d42310
--- /dev/null
+++ b/tests/ignite/metrics/test_kl_divergence.py
@@ -0,0 +1,158 @@
+from typing import Tuple
+
+import numpy as np
+import pytest
+import torch
+from scipy.special import softmax
+from scipy.stats import entropy
+from torch import Tensor
+
+import ignite.distributed as idist
+from ignite.engine import Engine
+from ignite.exceptions import NotComputableError
+from ignite.metrics import KLDivergence
+
+
+def scipy_kl_div(np_y_pred: np.ndarray, np_y: np.ndarray) -> float:
+    y_pred_prob = softmax(np_y_pred, axis=1)
+    y_prob = softmax(np_y, axis=1)
+    kl_mean = entropy(y_prob, y_pred_prob, axis=1).mean()
+    return kl_mean
+
+
+def test_zero_sample():
+    kl_div = KLDivergence()
+    with pytest.raises(
+        NotComputableError, match=r"KLDivergence must have at least one example before it can be computed"
+    ):
+        kl_div.compute()
+
+
+def test_shape_mismatch():
+    kl_div = KLDivergence()
+    y_pred = torch.tensor([[2.0, 3.0], [-2.0, 1.0]], dtype=torch.float)
+    y = torch.tensor([[-2.0, 1.0]], dtype=torch.float)
+    with pytest.raises(ValueError, match=r"y_pred and y must be in the same shape, got"):
+        kl_div.update((y_pred, y))
+
+
+def test_invalid_shape():
+    kl_div = KLDivergence()
+    y_pred = torch.tensor([2.0, 3.0], dtype=torch.float)
+    y = torch.tensor([4.0, 5.0], dtype=torch.float)
+    with pytest.raises(ValueError, match=r"y_pred must be in the shape of \(B, C\) or \(B, C, ...\), got"):
+        kl_div.update((y_pred, y))
+
+
+@pytest.fixture(params=list(range(4)))
+def test_case(request):
+    return [
+        (torch.randn((100, 10)), torch.rand((100, 10)), 1),
+        (torch.rand((100, 500)), torch.randn((100, 500)), 1),
+        # updated batches
+        (torch.normal(0.0, 5.0, size=(100, 10)), torch.rand((100, 10)), 16),
+        (torch.normal(5.0, 3.0, size=(100, 200)), torch.rand((100, 200)), 16),
+        # image segmentation
+        (torch.randn((100, 5, 32, 32)), torch.rand((100, 5, 32, 32)), 16),
+        (torch.rand((100, 5, 224, 224)), torch.randn((100, 5, 224, 224)), 16),
+    ][request.param]
+
+
+@pytest.mark.parametrize("n_times", range(5))
+def test_compute(n_times, test_case: Tuple[Tensor, Tensor, int]):
+    y_pred, y, batch_size = test_case
+
+    kl_div = KLDivergence()
+
+    kl_div.reset()
+    if batch_size > 1:
+        n_iters = y.shape[0] // batch_size + 1
+        for i in range(n_iters):
+            idx = i * batch_size
+            kl_div.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size]))
+    else:
+        kl_div.update((y_pred, y))
+
+    res = kl_div.compute()
+
+    np_y_pred = y_pred.numpy()
+    np_y = y.numpy()
+
+    np_res = scipy_kl_div(np_y_pred, np_y)
+
+    assert isinstance(res, float)
+    assert pytest.approx(np_res, rel=1e-4) == res
+
+
+def test_accumulator_detached():
+    kl_div = KLDivergence()
+
+    y_pred = torch.tensor([[2.0, 3.0], [-2.0, 1.0]], dtype=torch.float)
+    y = torch.tensor([[-2.0, 1.0], [2.0, 3.0]], dtype=torch.float)
+    kl_div.update((y_pred, y))
+
+    assert not kl_div._sum_of_kl.requires_grad
+
+
+@pytest.mark.usefixtures("distributed")
+class TestDistributed:
+    def test_integration(self):
+        tol = 1e-4
+        n_iters = 100
+        batch_size = 10
+        n_dims = 100
+
+        rank = idist.get_rank()
+        torch.manual_seed(12 + rank)
+
+        device = idist.device()
+        metric_devices = [torch.device("cpu")]
+        if device.type != "xla":
+            metric_devices.append(device)
+
+        for metric_device in metric_devices:
+            y_true = torch.randn((n_iters * batch_size, n_dims)).float().to(device)
+            y_preds = torch.normal(2.0, 3.0, size=(n_iters * batch_size, n_dims)).float().to(device)
+
+            engine = Engine(
+                lambda e, i: (
+                    y_preds[i * batch_size : (i + 1) * batch_size],
+                    y_true[i * batch_size : (i + 1) * batch_size],
+                )
+            )
+
+            m = KLDivergence(device=metric_device)
+            m.attach(engine, "kl_div")
+
+            data = list(range(n_iters))
+            engine.run(data=data, max_epochs=1)
+
+            y_preds = idist.all_gather(y_preds)
+            y_true = idist.all_gather(y_true)
+
+            assert "kl_div" in engine.state.metrics
+            res = engine.state.metrics["kl_div"]
+
+            y_true_np = y_true.cpu().numpy()
+            y_preds_np = y_preds.cpu().numpy()
+            true_res = scipy_kl_div(y_preds_np, y_true_np)
+
+            assert pytest.approx(true_res, rel=tol) == res
+
+    def test_accumulator_device(self):
+        device = idist.device()
+        metric_devices = [torch.device("cpu")]
+        if device.type != "xla":
+            metric_devices.append(device)
+        for metric_device in metric_devices:
+            kl_div = KLDivergence(device=metric_device)
+
+            for dev in (kl_div._device, kl_div._sum_of_kl.device):
+                assert dev == metric_device, f"{type(dev)}:{dev} vs {type(metric_device)}:{metric_device}"
+
+            y_pred = torch.tensor([[2.0, 3.0], [-2.0, 1.0]]).float()
+            y = torch.ones(2, 2).float()
+            kl_div.update((y_pred, y))
+
+            for dev in (kl_div._device, kl_div._sum_of_kl.device):
+                assert dev == metric_device, f"{type(dev)}:{dev} vs {type(metric_device)}:{metric_device}"

From 95c015483e851a347923f9efbbb72a2e9ac2849b Mon Sep 17 00:00:00 2001
From: John lee <johnleenimh@gmail.com>
Date: Wed, 24 Apr 2024 14:33:30 +0100
Subject: [PATCH 04/33] Add test timeouts (#3241)

* add timeout to distributed tests

* add pytest-timeout as dep

* alter timeout in distributed config group
---
 requirements-dev.txt                                | 1 +
 tests/ignite/conftest.py                            | 4 ++--
 tests/ignite/distributed/comp_models/test_native.py | 2 ++
 tests/ignite/distributed/test_launcher.py           | 2 ++
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/requirements-dev.txt b/requirements-dev.txt
index b387dd03a652..93b791226036 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -3,6 +3,7 @@ numpy
 pytest
 pytest-cov
 pytest-xdist
+pytest-timeout
 dill
 setuptools
 # Test contrib dependencies
diff --git a/tests/ignite/conftest.py b/tests/ignite/conftest.py
index caf92e6e7ad2..9855fd8eb9fd 100644
--- a/tests/ignite/conftest.py
+++ b/tests/ignite/conftest.py
@@ -195,7 +195,7 @@ def distributed_context_single_node_gloo(local_rank, world_size):
         "world_size": world_size,
         "rank": local_rank,
         "init_method": init_method,
-        "timeout": timedelta(seconds=60),
+        "timeout": timedelta(seconds=30),
     }
     yield _create_dist_context(dist_info, local_rank)
     _destroy_dist_context()
@@ -423,7 +423,7 @@ def distributed(request, local_rank, world_size):
             dist_info["backend"] = "gloo"
             from datetime import timedelta
 
-            dist_info["timeout"] = timedelta(seconds=60)
+            dist_info["timeout"] = timedelta(seconds=30)
         yield _create_dist_context(dist_info, local_rank)
         _destroy_dist_context()
         if temp_file:
diff --git a/tests/ignite/distributed/comp_models/test_native.py b/tests/ignite/distributed/comp_models/test_native.py
index 09e4d3054601..c771da4148c4 100644
--- a/tests/ignite/distributed/comp_models/test_native.py
+++ b/tests/ignite/distributed/comp_models/test_native.py
@@ -11,6 +11,8 @@
 else:
     from ignite.distributed.comp_models.native import _expand_hostlist, _NativeDistModel, _setup_ddp_vars_from_slurm_env
 
+pytestmark = pytest.mark.timeout(60)
+
 
 # tests from https://github.com/LLNL/py-hostlist/blob/master/hostlist/unittest_hostlist.py
 @pytest.mark.parametrize(
diff --git a/tests/ignite/distributed/test_launcher.py b/tests/ignite/distributed/test_launcher.py
index b12e2acf1c26..10083ed1bc22 100644
--- a/tests/ignite/distributed/test_launcher.py
+++ b/tests/ignite/distributed/test_launcher.py
@@ -10,6 +10,8 @@
 import ignite.distributed as idist
 from ignite.distributed.utils import has_hvd_support, has_native_dist_support, has_xla_support
 
+pytestmark = pytest.mark.timeout(60)
+
 
 def test_parallel_wrong_inputs():
     with pytest.raises(ValueError, match=r"Unknown backend 'abc'. Available backends:"):

From 565e8be07b29f37dc02096a78857e0fa3930d314 Mon Sep 17 00:00:00 2001
From: Kazuki Adachi <kazuki.adachi.xy@gmail.com>
Date: Sat, 27 Apr 2024 05:04:53 +0900
Subject: [PATCH 05/33] Fix error in old PyTorch for KL and JS divergence
 (#3236)

* add KLDivergence metric

* add JSDivergence

* fix variable name

* update docstring for JSDivergence

* Update ignite/metrics/js_divergence.py

Co-authored-by: vfdev <vfdev.5@gmail.com>

* Update ignite/metrics/kl_divergence.py

Co-authored-by: vfdev <vfdev.5@gmail.com>

* swap ground truth and prediction

* swap the definitions of p and q

* fix error in old pytorch

* switch to use log_target option by version

* check pytorch version in the global space in advance

---------

Co-authored-by: vfdev <vfdev.5@gmail.com>
---
 ignite/metrics/js_divergence.py | 27 ++++++++++++++++++++-------
 ignite/metrics/kl_divergence.py | 15 +++++++++++++--
 2 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/ignite/metrics/js_divergence.py b/ignite/metrics/js_divergence.py
index 1bd37cfedc69..ee223014061d 100644
--- a/ignite/metrics/js_divergence.py
+++ b/ignite/metrics/js_divergence.py
@@ -1,5 +1,6 @@
 import torch
 import torch.nn.functional as F
+from packaging.version import Version
 
 from ignite.exceptions import NotComputableError
 from ignite.metrics.kl_divergence import KLDivergence
@@ -7,6 +8,8 @@
 
 __all__ = ["JSDivergence"]
 
+TORCH_VERSION_GE_160 = Version(torch.__version__) >= Version("1.6.0")
+
 
 class JSDivergence(KLDivergence):
     r"""Calculates the mean of `Jensen-Shannon (JS) divergence
@@ -71,14 +74,24 @@ class JSDivergence(KLDivergence):
     """
 
     def _update(self, y_pred: torch.Tensor, y: torch.Tensor) -> None:
-        m_prob = (F.softmax(y_pred, dim=1) + F.softmax(y, dim=1)) / 2
+        y_pred_prob = F.softmax(y_pred, dim=1)
+        y_prob = F.softmax(y, dim=1)
+        m_prob = (y_pred_prob + y_prob) / 2
         m_log = m_prob.log()
-        y_pred = F.log_softmax(y_pred, dim=1)
-        y = F.log_softmax(y, dim=1)
-        self._sum_of_kl += (
-            F.kl_div(m_log, y_pred, log_target=True, reduction="sum")
-            + F.kl_div(m_log, y, log_target=True, reduction="sum")
-        ).to(self._device)
+
+        if TORCH_VERSION_GE_160:
+            # log_target option can be used from 1.6.0
+            y_pred_log = F.log_softmax(y_pred, dim=1)
+            y_log = F.log_softmax(y, dim=1)
+            self._sum_of_kl += (
+                F.kl_div(m_log, y_pred_log, log_target=True, reduction="sum")
+                + F.kl_div(m_log, y_log, log_target=True, reduction="sum")
+            ).to(self._device)
+        else:
+            # y_pred and y are expected to be probabilities
+            self._sum_of_kl += (
+                F.kl_div(m_log, y_pred_prob, reduction="sum") + F.kl_div(m_log, y_prob, reduction="sum")
+            ).to(self._device)
 
     @sync_all_reduce("_sum_of_kl", "_num_examples")
     def compute(self) -> float:
diff --git a/ignite/metrics/kl_divergence.py b/ignite/metrics/kl_divergence.py
index 99f6cbcfa849..93f6d5a85282 100644
--- a/ignite/metrics/kl_divergence.py
+++ b/ignite/metrics/kl_divergence.py
@@ -2,12 +2,15 @@
 
 import torch
 import torch.nn.functional as F
+from packaging.version import Version
 
 from ignite.exceptions import NotComputableError
 from ignite.metrics.metric import Metric, reinit__is_reduced, sync_all_reduce
 
 __all__ = ["KLDivergence"]
 
+TORCH_VERSION_GE_160 = Version(torch.__version__) >= Version("1.6.0")
+
 
 class KLDivergence(Metric):
     r"""Calculates the mean of `Kullback-Leibler (KL) divergence
@@ -91,8 +94,16 @@ def update(self, output: Sequence[torch.Tensor]) -> None:
 
     def _update(self, y_pred: torch.Tensor, y: torch.Tensor) -> None:
         y_pred = F.log_softmax(y_pred, dim=1)
-        y = F.log_softmax(y, dim=1)
-        kl_sum = F.kl_div(y_pred, y, log_target=True, reduction="sum")
+
+        if TORCH_VERSION_GE_160:
+            # log_target option can be used from 1.6.0
+            y = F.log_softmax(y, dim=1)
+            kl_sum = F.kl_div(y_pred, y, log_target=True, reduction="sum")
+        else:
+            # y is expected to be a probability tensor
+            y = F.softmax(y, dim=1)
+            kl_sum = F.kl_div(y_pred, y, reduction="sum")
+
         self._sum_of_kl += kl_sum.to(self._device)
 
     @sync_all_reduce("_sum_of_kl", "_num_examples")

From 3f5febf5229d4bffec65eff2bce70bb547718a53 Mon Sep 17 00:00:00 2001
From: John lee <johnleenimh@gmail.com>
Date: Mon, 29 Apr 2024 13:18:50 +0100
Subject: [PATCH 06/33] Make testing improvements (#3242)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix mypy errors

* improve robustness of tqdm tests

fixes error when running tests using pytest -n >1
this fixes the with of the test time progress bars to avoid
some failures in tests.

* use xdist for distributed tests
Tests from a loadgroup are all run sequentially on a single worker.

* remove warnings for unregistered marks

seems to miss marks in the conftest.py file itself though.

* improve execution of visdom tests

limit setup/teardown of visdom servers using a session
scoped fixture. Add visdom tests to an xdist group to run
them serially to avoid issues with server connection.

add timeout to the tests explicitly requesting the server
to further limit any future issues.

* Improve visdom tests

Do not clean up visdom_server fixture. Session scoped fixtures are not
guaranteed to be executed just once when using xdist and trying to
cleanup twice can cause hangs.
Timeout all visdom tests to avoid future issues with a hanging/dead server

* remove superfluous marks

* add comments

* avoid multiple downloads of nltk punkt

* fixup

* add timeout for all distributed tests

* download mnist once

* do not use --dist=loadgroup on ci for now

---------

Co-authored-by: leej3 <“johnleenimh@gmail.com>
Co-authored-by: vfdev <vfdev.5@gmail.com>
---
 ignite/metrics/frequency.py                   |   2 +-
 ignite/metrics/gan/fid.py                     |   2 +-
 ignite/metrics/gan/inception_score.py         |   2 +-
 tests/ignite/conftest.py                      |  17 +++
 .../distributed/comp_models/test_native.py    |   2 -
 tests/ignite/distributed/test_launcher.py     |   2 -
 tests/ignite/handlers/conftest.py             |  59 ++++------
 tests/ignite/handlers/test_lr_finder.py       |  21 +++-
 tests/ignite/handlers/test_tqdm_logger.py     | 102 +++++++++---------
 tests/ignite/handlers/test_visdom_logger.py   |   6 +-
 tests/ignite/metrics/nlp/test_rouge.py        |  17 ++-
 11 files changed, 129 insertions(+), 103 deletions(-)

diff --git a/ignite/metrics/frequency.py b/ignite/metrics/frequency.py
index 8c63edd1ec97..52f02565ac42 100644
--- a/ignite/metrics/frequency.py
+++ b/ignite/metrics/frequency.py
@@ -49,7 +49,7 @@ def reset(self) -> None:
         self._acc = 0
         self._n = 0
         self._elapsed = 0.0
-        super(Frequency, self).reset()
+        super(Frequency, self).reset()  # type: ignore
 
     @reinit__is_reduced
     def update(self, output: int) -> None:
diff --git a/ignite/metrics/gan/fid.py b/ignite/metrics/gan/fid.py
index 188bad5035a2..b74efe3e0e9a 100644
--- a/ignite/metrics/gan/fid.py
+++ b/ignite/metrics/gan/fid.py
@@ -226,7 +226,7 @@ def reset(self) -> None:
         self._test_total = torch.zeros(self._num_features, dtype=torch.float64, device=self._device)
         self._num_examples: int = 0
 
-        super(FID, self).reset()
+        super(FID, self).reset()  # type: ignore
 
     @reinit__is_reduced
     def update(self, output: Sequence[torch.Tensor]) -> None:
diff --git a/ignite/metrics/gan/inception_score.py b/ignite/metrics/gan/inception_score.py
index 60b1d4785f71..b2a179fa65d1 100644
--- a/ignite/metrics/gan/inception_score.py
+++ b/ignite/metrics/gan/inception_score.py
@@ -106,7 +106,7 @@ def reset(self) -> None:
         self._prob_total = torch.zeros(self._num_features, dtype=torch.float64, device=self._device)
         self._total_kl_d = torch.zeros(self._num_features, dtype=torch.float64, device=self._device)
 
-        super(InceptionScore, self).reset()
+        super(InceptionScore, self).reset()  # type: ignore
 
     @reinit__is_reduced
     def update(self, output: torch.Tensor) -> None:
diff --git a/tests/ignite/conftest.py b/tests/ignite/conftest.py
index 9855fd8eb9fd..4e6712c43cf0 100644
--- a/tests/ignite/conftest.py
+++ b/tests/ignite/conftest.py
@@ -13,6 +13,12 @@
 import ignite.distributed as idist
 
 
+def pytest_configure(config):
+    config.addinivalue_line("markers", "distributed: run distributed")
+    config.addinivalue_line("markers", "multinode_distributed: distributed")
+    config.addinivalue_line("markers", "tpu: run on tpu")
+
+
 @pytest.fixture(
     params=[
         "cpu",
@@ -492,3 +498,14 @@ def xla_worker(index, fn):
                 assert ex_.code == 0, "Didn't successfully exit in XLA test"
 
         pyfuncitem.obj = functools.partial(testfunc_wrapper, pyfuncitem.obj)
+
+
+def pytest_collection_modifyitems(items):
+    for item in items:
+        if "distributed" in item.fixturenames:
+            # Run distributed tests on a single worker to avoid RACE conditions
+            # This requires that the --dist=loadgroup option be passed to pytest.
+            item.add_marker(pytest.mark.xdist_group("distributed"))
+            item.add_marker(pytest.mark.timeout(45))
+        if "multinode_distributed" in item.fixturenames:
+            item.add_marker(pytest.mark.timeout(45))
diff --git a/tests/ignite/distributed/comp_models/test_native.py b/tests/ignite/distributed/comp_models/test_native.py
index c771da4148c4..09e4d3054601 100644
--- a/tests/ignite/distributed/comp_models/test_native.py
+++ b/tests/ignite/distributed/comp_models/test_native.py
@@ -11,8 +11,6 @@
 else:
     from ignite.distributed.comp_models.native import _expand_hostlist, _NativeDistModel, _setup_ddp_vars_from_slurm_env
 
-pytestmark = pytest.mark.timeout(60)
-
 
 # tests from https://github.com/LLNL/py-hostlist/blob/master/hostlist/unittest_hostlist.py
 @pytest.mark.parametrize(
diff --git a/tests/ignite/distributed/test_launcher.py b/tests/ignite/distributed/test_launcher.py
index 10083ed1bc22..b12e2acf1c26 100644
--- a/tests/ignite/distributed/test_launcher.py
+++ b/tests/ignite/distributed/test_launcher.py
@@ -10,8 +10,6 @@
 import ignite.distributed as idist
 from ignite.distributed.utils import has_hvd_support, has_native_dist_support, has_xla_support
 
-pytestmark = pytest.mark.timeout(60)
-
 
 def test_parallel_wrong_inputs():
     with pytest.raises(ValueError, match=r"Unknown backend 'abc'. Available backends:"):
diff --git a/tests/ignite/handlers/conftest.py b/tests/ignite/handlers/conftest.py
index 9d7bb999463b..79ac0809698e 100644
--- a/tests/ignite/handlers/conftest.py
+++ b/tests/ignite/handlers/conftest.py
@@ -1,58 +1,41 @@
-import random
+import subprocess
+import time
 from pathlib import Path
 from unittest.mock import Mock
 
 import pytest
 import torch
+from visdom import Visdom
+from visdom.server.build import download_scripts
 
-vd_hostname = None
-vd_port = None
-vd_server_process = None
 
-
-@pytest.fixture()
+@pytest.fixture(scope="session")
 def visdom_server():
     # Start Visdom server once and stop it with visdom_server_stop
-    global vd_hostname, vd_port, vd_server_process
-
-    if vd_server_process is None:
-        import subprocess
-        import time
-
-        from visdom import Visdom
-        from visdom.server.build import download_scripts
-
+    vd_hostname = "localhost"
+    if not (Path.home() / ".visdom").exists():
         (Path.home() / ".visdom").mkdir(exist_ok=True)
         download_scripts()
+    vis = None
 
-        vd_hostname = "localhost"
-        vd_port = random.randint(8089, 8887)
-
+    vd_port = 29777
+    vd_server_process = subprocess.Popen(
+        ["python", "-m", "visdom.server", "--hostname", vd_hostname, "-port", str(vd_port)]
+    )
+    time.sleep(2)
+    for ii in range(5):
         try:
+            time.sleep(1)
             vis = Visdom(server=vd_hostname, port=vd_port, raise_exceptions=True)
+            break
         except ConnectionError:
-            pass
-
-        vd_server_process = subprocess.Popen(
-            ["python", "-m", "visdom.server", "--hostname", vd_hostname, "-port", str(vd_port)]
-        )
-        time.sleep(5)
-
-        vis = Visdom(server=vd_hostname, port=vd_port)
-        assert vis.check_connection()
-        vis.close()
+            continue
 
+    assert vis and vis.check_connection()
     yield (vd_hostname, vd_port)
-
-
-@pytest.fixture()
-def visdom_server_stop():
-    yield None
-
-    import time
-
-    vd_server_process.kill()
-    time.sleep(2)
+    # Trying to clean up slows things down and sometimes causes hangs.
+    # vis.close()
+    # vd_server_process.kill()
 
 
 @pytest.fixture
diff --git a/tests/ignite/handlers/test_lr_finder.py b/tests/ignite/handlers/test_lr_finder.py
index e12d951dfbf2..23b823d9ce47 100644
--- a/tests/ignite/handlers/test_lr_finder.py
+++ b/tests/ignite/handlers/test_lr_finder.py
@@ -3,6 +3,8 @@
 from pathlib import Path
 from unittest.mock import MagicMock
 
+import filelock
+
 import matplotlib
 import pytest
 import torch
@@ -144,16 +146,27 @@ def dataloader_plot():
 
 
 @pytest.fixture
-def mnist_dataloader():
+def mnist_dataloader(tmp_path_factory):
     from torch.utils.data import DataLoader
     from torchvision.datasets import MNIST
     from torchvision.transforms import Compose, Normalize, ToTensor
 
     data_transform = Compose([ToTensor(), Normalize((0.1307,), (0.3081,))])
 
-    train_loader = DataLoader(
-        MNIST(download=True, root="/tmp", transform=data_transform, train=True), batch_size=256, shuffle=True
-    )
+    root_tmp_dir = tmp_path_factory.getbasetemp().parent
+    while True:
+        try:
+            with filelock.FileLock(root_tmp_dir / "mnist_download.lock", timeout=0.2) as fn:
+                fn.acquire()
+                train_loader = DataLoader(
+                    MNIST(download=True, root="/tmp", transform=data_transform, train=True),
+                    batch_size=256,
+                    shuffle=True,
+                )
+                fn.release()
+                break
+        except filelock._error.Timeout:
+            pass
 
     yield train_loader
 
diff --git a/tests/ignite/handlers/test_tqdm_logger.py b/tests/ignite/handlers/test_tqdm_logger.py
index 0f9a501ebf82..cae59ac15b4e 100644
--- a/tests/ignite/handlers/test_tqdm_logger.py
+++ b/tests/ignite/handlers/test_tqdm_logger.py
@@ -33,9 +33,9 @@ def update_fn(engine, batch):
 def test_pbar_errors():
     with pytest.raises(ModuleNotFoundError, match=r"This contrib module requires tqdm to be installed"):
         with patch.dict("sys.modules", {"tqdm.autonotebook": None}):
-            ProgressBar()
+            ProgressBar(ncols=80)
 
-    pbar = ProgressBar()
+    pbar = ProgressBar(ncols=80)
     with pytest.raises(ValueError, match=r"Logging event abc is not in allowed"):
         pbar.attach(Engine(lambda e, b: None), event_name=Namespace(name="abc"))
 
@@ -45,7 +45,7 @@ def test_pbar(capsys):
     loader = [1, 2]
     engine = Engine(update_fn)
 
-    pbar = ProgressBar()
+    pbar = ProgressBar(ncols=80)
     pbar.attach(engine, ["a"])
 
     engine.run(loader, max_epochs=n_epochs)
@@ -55,9 +55,9 @@ def test_pbar(capsys):
     err = list(map(lambda x: x.strip(), err))
     err = list(filter(None, err))
     if get_tqdm_version() < Version("4.49.0"):
-        expected = "Epoch [2/2]: [1/2]  50%|█████     , a=1 [00:00<00:00]"
+        expected = "Epoch 8 -*-     , a=1 [00:00<00:00]"
     else:
-        expected = "Epoch [2/2]: [1/2]  50%|█████     , a=1 [00:00<?]"
+        expected = "Epoch [2/2]: [1/2]  50%|████████████████████▌                    , a=1 [00:00<?]"
     assert err[-1] == expected
 
 
@@ -69,7 +69,7 @@ def test_pbar_file(tmp_path):
     file_path = tmp_path / "temp.txt"
     file = open(str(file_path), "w+")
 
-    pbar = ProgressBar(file=file)
+    pbar = ProgressBar(file=file, ncols=80)
     pbar.attach(engine, ["a"])
     engine.run(loader, max_epochs=n_epochs)
 
@@ -81,12 +81,12 @@ def test_pbar_file(tmp_path):
     if get_tqdm_version() < Version("4.49.0"):
         expected = "Epoch [2/2]: [1/2]  50%|█████     , a=1 [00:00<00:00]\n"
     else:
-        expected = "Epoch [2/2]: [1/2]  50%|█████     , a=1 [00:00<?]\n"
+        expected = "Epoch [2/2]: [1/2]  50%|████████████████████▌                    , a=1 [00:00<?]\n"
     assert lines[-2] == expected
 
 
 def test_pbar_log_message(capsys):
-    pbar = ProgressBar()
+    pbar = ProgressBar(ncols=80)
 
     pbar.log_message("test")
 
@@ -102,7 +102,7 @@ def test_pbar_log_message_file(tmp_path):
     file_path = tmp_path / "temp.txt"
     file = open(str(file_path), "w+")
 
-    pbar = ProgressBar(file=file)
+    pbar = ProgressBar(file=file, ncols=80)
     pbar.log_message("test")
 
     file.close()  # Force a flush of the buffer. file.flush() does not work.
@@ -116,7 +116,7 @@ def test_pbar_log_message_file(tmp_path):
 
 def test_attach_fail_with_string():
     engine = Engine(update_fn)
-    pbar = ProgressBar()
+    pbar = ProgressBar(ncols=80)
 
     with pytest.raises(TypeError):
         pbar.attach(engine, "a")
@@ -129,7 +129,7 @@ def test_pbar_batch_indeces(capsys):
     def print_iter(_):
         print("iteration: ", engine.state.iteration)
 
-    ProgressBar(persist=True).attach(engine)
+    ProgressBar(persist=True, ncols=80).attach(engine)
     engine.run(list(range(4)), max_epochs=1)
 
     captured = capsys.readouterr()
@@ -154,7 +154,7 @@ def step(engine, batch):
 
     RunningAverage(alpha=0.5, output_transform=lambda x: x).attach(trainer, "batchloss")
 
-    pbar = ProgressBar()
+    pbar = ProgressBar(ncols=80)
     pbar.attach(trainer, metric_names=["batchloss"])
 
     trainer.run(data=data, max_epochs=1)
@@ -165,9 +165,9 @@ def step(engine, batch):
     err = list(filter(None, err))
     actual = err[-1]
     if get_tqdm_version() < Version("4.49.0"):
-        expected = "Iteration: [1/2]  50%|█████     , batchloss=0.5 [00:00<00:00]"
+        expected = "Iteration: [1/2]  50%|██████     , batchloss=0.5 [00:00<00:00]"
     else:
-        expected = "Iteration: [1/2]  50%|█████     , batchloss=0.5 [00:00<?]"
+        expected = "Iteration: [1/2]  50%|████████████████▌                , batchloss=0.5 [00:00<?]"
     assert actual == expected
 
 
@@ -187,7 +187,7 @@ def step(engine, batch):
     RunningAverage(alpha=0.5, output_transform=lambda x: x[0]).attach(trainer, "batchloss")
     RunningAverage(alpha=0.5, output_transform=lambda x: x[1]).attach(trainer, "another batchloss")
 
-    pbar = ProgressBar()
+    pbar = ProgressBar(ncols=80)
     pbar.attach(trainer, metric_names="all")
 
     trainer.run(data=data, max_epochs=1)
@@ -198,7 +198,7 @@ def step(engine, batch):
     err = list(filter(None, err))
     actual = err[-1]
     if get_tqdm_version() < Version("4.49.0"):
-        expected = "Iteration: [1/2]  50%|█████     , batchloss=0.5, another batchloss=1.5 [00:00<00:00]"
+        expected = "Iteration: [1/2]  50%|███   , batchloss=0.5, another batchloss=1.5 [00:00<00:00]"
     else:
         expected = "Iteration: [1/2]  50%|█████     , batchloss=0.5, another batchloss=1.5 [00:00<?]"
     assert actual == expected
@@ -220,7 +220,7 @@ def step(engine, batch):
 
     RunningAverage(alpha=0.5, output_transform=lambda x: x).attach(trainer, "batchloss")
 
-    pbar = ProgressBar()
+    pbar = ProgressBar(ncols=80)
     pbar.attach(trainer, metric_names=["batchloss"], state_attributes=["alpha", "beta", "gamma"])
 
     trainer.run(data=data, max_epochs=1)
@@ -235,9 +235,7 @@ def step(engine, batch):
             "Iteration: [1/2]  50%|█████     , batchloss=0.5, alpha=3.9, beta=12.2, gamma_0=21, gamma_1=6 [00:00<00:00]"
         )
     else:
-        expected = (
-            "Iteration: [1/2]  50%|█████     , batchloss=0.5, alpha=3.9, beta=12.2, gamma_0=21, gamma_1=6 [00:00<?]"
-        )
+        expected = "Iteration: [1/2]  50%|▌, batchloss=0.5, alpha=3.9, beta=12.2, gamma_0=21, gamma_"
     assert actual == expected
 
 
@@ -246,7 +244,7 @@ def test_pbar_no_metric_names(capsys):
     loader = [1, 2]
     engine = Engine(update_fn)
 
-    pbar = ProgressBar()
+    pbar = ProgressBar(ncols=80)
     pbar.attach(engine)
 
     engine.run(loader, max_epochs=n_epochs)
@@ -257,9 +255,9 @@ def test_pbar_no_metric_names(capsys):
     err = list(filter(None, err))
     actual = err[-1]
     if get_tqdm_version() < Version("4.49.0"):
-        expected = "Epoch [2/2]: [1/2]  50%|█████      [00:00<00:00]"
+        expected = "Epoch [2/2]: [1/2]  50%|██████████            [00:00<00:00]"
     else:
-        expected = "Epoch [2/2]: [1/2]  50%|█████      [00:00<?]"
+        expected = "Epoch [2/2]: [1/2]  50%|███████████████████████                        [00:00<?]"
     assert actual == expected
 
 
@@ -268,7 +266,7 @@ def test_pbar_with_output(capsys):
     loader = [1, 2]
     engine = Engine(update_fn)
 
-    pbar = ProgressBar()
+    pbar = ProgressBar(ncols=80)
     pbar.attach(engine, output_transform=lambda x: {"a": x})
 
     engine.run(loader, max_epochs=n_epochs)
@@ -280,13 +278,13 @@ def test_pbar_with_output(capsys):
     if get_tqdm_version() < Version("4.49.0"):
         expected = "Epoch [2/2]: [1/2]  50%|█████     , a=1 [00:00<00:00]"
     else:
-        expected = "Epoch [2/2]: [1/2]  50%|█████     , a=1 [00:00<?]"
+        expected = "Epoch [2/2]: [1/2]  50%|████████████████████▌                    , a=1 [00:00<?]"
     assert err[-1] == expected
 
 
 def test_pbar_fail_with_non_callable_transform():
     engine = Engine(update_fn)
-    pbar = ProgressBar()
+    pbar = ProgressBar(ncols=80)
 
     with pytest.raises(TypeError):
         pbar.attach(engine, output_transform=1)
@@ -297,7 +295,7 @@ def test_pbar_with_scalar_output(capsys):
     loader = [1, 2]
     engine = Engine(update_fn)
 
-    pbar = ProgressBar()
+    pbar = ProgressBar(ncols=80)
     pbar.attach(engine, output_transform=lambda x: x)
 
     engine.run(loader, max_epochs=n_epochs)
@@ -309,7 +307,7 @@ def test_pbar_with_scalar_output(capsys):
     if get_tqdm_version() < Version("4.49.0"):
         expected = "Epoch [2/2]: [1/2]  50%|█████     , output=1 [00:00<00:00]"
     else:
-        expected = "Epoch [2/2]: [1/2]  50%|█████     , output=1 [00:00<?]"
+        expected = "Epoch [2/2]: [1/2]  50%|██████████████████                  , output=1 [00:00<?]"
     assert err[-1] == expected
 
 
@@ -318,7 +316,7 @@ def test_pbar_with_str_output(capsys):
     loader = [1, 2]
     engine = Engine(update_fn)
 
-    pbar = ProgressBar()
+    pbar = ProgressBar(ncols=80)
     pbar.attach(engine, output_transform=lambda x: "red")
 
     engine.run(loader, max_epochs=n_epochs)
@@ -330,7 +328,7 @@ def test_pbar_with_str_output(capsys):
     if get_tqdm_version() < Version("4.49.0"):
         expected = "Epoch [2/2]: [1/2]  50%|█████     , output=red [00:00<00:00]"
     else:
-        expected = "Epoch [2/2]: [1/2]  50%|█████     , output=red [00:00<?]"
+        expected = "Epoch [2/2]: [1/2]  50%|█████████████████                 , output=red [00:00<?]"
     assert err[-1] == expected
 
 
@@ -339,7 +337,7 @@ def test_pbar_with_tqdm_kwargs(capsys):
     loader = [1, 2, 3, 4, 5]
     engine = Engine(update_fn)
 
-    pbar = ProgressBar(desc="My description: ")
+    pbar = ProgressBar(desc="My description: ", ncols=80)
     pbar.attach(engine, output_transform=lambda x: x)
     engine.run(loader, max_epochs=n_epochs)
 
@@ -347,7 +345,7 @@ def test_pbar_with_tqdm_kwargs(capsys):
     err = captured.err.split("\r")
     err = list(map(lambda x: x.strip(), err))
     err = list(filter(None, err))
-    expected = "My description:  [10/10]: [4/5]  80%|████████  , output=1 [00:00<00:00]"
+    expected = "My description:  [10/10]: [4/5]  80%|███████████████▏   , output=1 [00:00<00:00]"
     assert err[-1] == expected
 
 
@@ -355,7 +353,7 @@ def test_pbar_for_validation(capsys):
     loader = [1, 2, 3, 4, 5]
     engine = Engine(update_fn)
 
-    pbar = ProgressBar(desc="Validation")
+    pbar = ProgressBar(desc="Validation", ncols=80)
     pbar.attach(engine)
     engine.run(loader, max_epochs=1)
 
@@ -363,7 +361,7 @@ def test_pbar_for_validation(capsys):
     err = captured.err.split("\r")
     err = list(map(lambda x: x.strip(), err))
     err = list(filter(None, err))
-    expected = "Validation: [4/5]  80%|████████   [00:00<00:00]"
+    expected = "Validation: [4/5]  80%|██████████████████████████████████▍         [00:00<00:00]"
     assert err[-1] == expected
 
 
@@ -376,7 +374,7 @@ def update_fn(engine, batch):
 
         engine = Engine(update_fn)
 
-        pbar = ProgressBar(desc="Output tensor")
+        pbar = ProgressBar(desc="Output tensor", ncols=80)
         pbar.attach(engine, output_transform=lambda x: x)
         engine.run(loader, max_epochs=1)
 
@@ -384,12 +382,12 @@ def update_fn(engine, batch):
         err = captured.err.split("\r")
         err = list(map(lambda x: x.strip(), err))
         err = list(filter(None, err))
-        expected = f"Output tensor: [4/5]  80%|████████  , {out_msg} [00:00<00:00]"
+        expected = f"Output tensor: [4/5]  {out_msg} [00:00<00:00]"
         assert err[-1] == expected
 
-    _test(out_tensor=torch.tensor([5, 0]), out_msg="output_0=5, output_1=0")
-    _test(out_tensor=torch.tensor(123), out_msg="output=123")
-    _test(out_tensor=torch.tensor(1.234), out_msg="output=1.23")
+    _test(out_tensor=torch.tensor([5, 0]), out_msg="80%|████████████▊   , output_0=5, output_1=0")
+    _test(out_tensor=torch.tensor(123), out_msg="80%|██████████████████████▍     , output=123")
+    _test(out_tensor=torch.tensor(1.234), out_msg="80%|█████████████████████▌     , output=1.23")
 
 
 def test_pbar_output_warning(capsys):
@@ -400,7 +398,7 @@ def update_fn(engine, batch):
 
     engine = Engine(update_fn)
 
-    pbar = ProgressBar(desc="Output tensor")
+    pbar = ProgressBar(desc="Output tensor", ncols=80)
     pbar.attach(engine, output_transform=lambda x: x)
     with pytest.warns(UserWarning):
         engine.run(loader, max_epochs=1)
@@ -411,7 +409,7 @@ def test_pbar_on_epochs(capsys):
     loader = [1, 2, 3, 4, 5]
     engine = Engine(update_fn)
 
-    pbar = ProgressBar()
+    pbar = ProgressBar(ncols=80)
     pbar.attach(engine, event_name=Events.EPOCH_STARTED, closing_event_name=Events.COMPLETED)
     engine.run(loader, max_epochs=n_epochs)
 
@@ -420,7 +418,7 @@ def test_pbar_on_epochs(capsys):
     err = list(map(lambda x: x.strip(), err))
     err = list(filter(None, err))
     actual = err[-1]
-    expected = "Epoch: [9/10]  90%|█████████  [00:00<00:00]"
+    expected = "Epoch: [9/10]  90%|██████████████████████████████████████████▎     [00:00<00:00]"
     assert actual == expected
 
 
@@ -429,7 +427,7 @@ def test_pbar_with_max_epochs_set_to_one(capsys):
     loader = [1, 2]
     engine = Engine(update_fn)
 
-    pbar = ProgressBar()
+    pbar = ProgressBar(ncols=80)
     pbar.attach(engine, ["a"])
 
     engine.run(loader, max_epochs=n_epochs)
@@ -441,13 +439,13 @@ def test_pbar_with_max_epochs_set_to_one(capsys):
     if get_tqdm_version() < Version("4.49.0"):
         expected = "Iteration: [1/2]  50%|█████     , a=1 [00:00<00:00]"
     else:
-        expected = "Iteration: [1/2]  50%|█████     , a=1 [00:00<?]"
+        expected = "Iteration: [1/2]  50%|█████████████████████▌                     , a=1 [00:00<?]"
     assert err[-1] == expected
 
 
 def test_pbar_wrong_events_order():
     engine = Engine(update_fn)
-    pbar = ProgressBar()
+    pbar = ProgressBar(ncols=80)
 
     with pytest.raises(ValueError, match="should be called before closing event"):
         pbar.attach(engine, event_name=Events.COMPLETED, closing_event_name=Events.COMPLETED)
@@ -475,7 +473,7 @@ def update(engine, batch):
 
     def create_engine():
         engine = Engine(update)
-        pbar = ProgressBar()
+        pbar = ProgressBar(ncols=80)
 
         engine.add_event_handler(Events.ITERATION_COMPLETED, TerminateOnNan())
         pbar.attach(engine, event_name=Events.EPOCH_COMPLETED, closing_event_name=Events.COMPLETED)
@@ -501,7 +499,7 @@ def test_pbar_on_callable_events(capsys):
     loader = list(range(100))
     engine = Engine(update_fn)
 
-    pbar = ProgressBar()
+    pbar = ProgressBar(ncols=80)
     pbar.attach(engine, event_name=Events.ITERATION_STARTED(every=10), closing_event_name=Events.EPOCH_COMPLETED)
     engine.run(loader, max_epochs=n_epochs)
 
@@ -510,14 +508,14 @@ def test_pbar_on_callable_events(capsys):
     err = list(map(lambda x: x.strip(), err))
     err = list(filter(None, err))
     actual = err[-1]
-    expected = "Iteration: [90/100]  90%|█████████  [00:00<00:00]"
+    expected = "Iteration: [90/100]  90%|████████████████████████████████████▉     [00:00<00:00]"
     assert actual == expected
 
 
 def test_tqdm_logger_epoch_length(capsys):
     loader = list(range(100))
     engine = Engine(update_fn)
-    pbar = ProgressBar(persist=True)
+    pbar = ProgressBar(persist=True, ncols=80)
     pbar.attach(engine)
     engine.run(loader, epoch_length=50)
 
@@ -526,7 +524,7 @@ def test_tqdm_logger_epoch_length(capsys):
     err = list(map(lambda x: x.strip(), err))
     err = list(filter(None, err))
     actual = err[-1]
-    expected = "Iteration: [50/50] 100%|██████████ [00:00<00:00]"
+    expected = "Iteration: [50/50] 100%|██████████████████████████████████████████ [00:00<00:00]"
     assert actual == expected
 
 
@@ -546,7 +544,7 @@ def train_step(trainer, batch):
     def restart_iter():
         trainer.state.dataloader = finite_size_data_iter(size)
 
-    pbar = ProgressBar(persist=True)
+    pbar = ProgressBar(persist=True, ncols=80)
     pbar.attach(trainer)
 
     data_iter = finite_size_data_iter(size)
@@ -557,5 +555,5 @@ def restart_iter():
     err = list(map(lambda x: x.strip(), err))
     err = list(filter(None, err))
     actual = err[-1]
-    expected = "Epoch [5/5]: [11/11] 100%|██████████ [00:00<00:00]"
+    expected = "Epoch [5/5]: [11/11] 100%|████████████████████████████████████████ [00:00<00:00]"
     assert actual == expected
diff --git a/tests/ignite/handlers/test_visdom_logger.py b/tests/ignite/handlers/test_visdom_logger.py
index 40657d180cf1..e41f108f291a 100644
--- a/tests/ignite/handlers/test_visdom_logger.py
+++ b/tests/ignite/handlers/test_visdom_logger.py
@@ -16,6 +16,10 @@
     WeightsScalarHandler,
 )
 
+# Run tests on a single worker to avoid issues with connecting to the visdom
+# server This requires that the --dist=loadgroup option be passed to pytest.
+pytestmark = [pytest.mark.timeout(30), pytest.mark.xdist_group(name="visdom")]
+
 
 def test_optimizer_params_handler_wrong_setup():
     with pytest.raises(TypeError):
@@ -948,7 +952,7 @@ def update_fn(engine, batch):
 
 
 @pytest.mark.skipif(sys.platform.startswith("win"), reason="Skip on Windows")
-def test_integration_with_executor_as_context_manager(visdom_server, visdom_server_stop):
+def test_integration_with_executor_as_context_manager(visdom_server):
     n_epochs = 5
     data = list(range(50))
 
diff --git a/tests/ignite/metrics/nlp/test_rouge.py b/tests/ignite/metrics/nlp/test_rouge.py
index c2fb75051829..5d8562866c83 100644
--- a/tests/ignite/metrics/nlp/test_rouge.py
+++ b/tests/ignite/metrics/nlp/test_rouge.py
@@ -1,5 +1,7 @@
 import os
 
+import filelock
+
 import nltk
 import pytest
 import rouge as pyrouge
@@ -12,7 +14,20 @@
 
 from . import CorpusForTest
 
-nltk.download("punkt")
+
+@pytest.fixture(scope="session", autouse=True)
+def download_nltk_punkt(worker_id, tmp_path_factory):
+    root_tmp_dir = tmp_path_factory.getbasetemp().parent
+    while True:
+        try:
+            with filelock.FileLock(root_tmp_dir / "nltk_download.lock", timeout=0.2) as fn:
+                fn.acquire()
+                nltk.download("punkt")
+                fn.release()
+                break
+        except filelock._error.Timeout:
+            pass
+
 
 corpus = CorpusForTest()
 

From 8c1912aebb6f7e601e1c7d382b4098bae5e05135 Mon Sep 17 00:00:00 2001
From: Kazuki Adachi <kazuki.adachi.xy@gmail.com>
Date: Wed, 8 May 2024 05:31:49 +0900
Subject: [PATCH 07/33] Add MaximumMeanDiscrepancy metric (#3243)

* add MaximumMeanDiscrepancy metric

* fix URL

* update formula

* modify test for MMD

* set default var value for np_mmd

* accumulate mmd2

* accumulate sum of xx, yy, and xy

* add reference paper to docstring

* fix accumulator variables

* fix test_accumulator_device
---
 docs/source/metrics.rst                       |   1 +
 ignite/metrics/__init__.py                    |   2 +
 ignite/metrics/maximum_mean_discrepancy.py    | 138 ++++++++++++++
 .../metrics/test_maximum_mean_discrepancy.py  | 176 ++++++++++++++++++
 4 files changed, 317 insertions(+)
 create mode 100644 ignite/metrics/maximum_mean_discrepancy.py
 create mode 100644 tests/ignite/metrics/test_maximum_mean_discrepancy.py

diff --git a/docs/source/metrics.rst b/docs/source/metrics.rst
index a7f90b754d96..ef1250314811 100644
--- a/docs/source/metrics.rst
+++ b/docs/source/metrics.rst
@@ -355,6 +355,7 @@ Complete list of metrics
     Entropy
     KLDivergence
     JSDivergence
+    MaximumMeanDiscrepancy
     AveragePrecision
     CohenKappa
     GpuInfo
diff --git a/ignite/metrics/__init__.py b/ignite/metrics/__init__.py
index 2cc55aace661..e4f4e24337c5 100644
--- a/ignite/metrics/__init__.py
+++ b/ignite/metrics/__init__.py
@@ -17,6 +17,7 @@
 from ignite.metrics.js_divergence import JSDivergence
 from ignite.metrics.kl_divergence import KLDivergence
 from ignite.metrics.loss import Loss
+from ignite.metrics.maximum_mean_discrepancy import MaximumMeanDiscrepancy
 from ignite.metrics.mean_absolute_error import MeanAbsoluteError
 from ignite.metrics.mean_pairwise_distance import MeanPairwiseDistance
 from ignite.metrics.mean_squared_error import MeanSquaredError
@@ -61,6 +62,7 @@
     "JaccardIndex",
     "JSDivergence",
     "KLDivergence",
+    "MaximumMeanDiscrepancy",
     "MultiLabelConfusionMatrix",
     "MutualInformation",
     "Precision",
diff --git a/ignite/metrics/maximum_mean_discrepancy.py b/ignite/metrics/maximum_mean_discrepancy.py
new file mode 100644
index 000000000000..d92dd5448ce1
--- /dev/null
+++ b/ignite/metrics/maximum_mean_discrepancy.py
@@ -0,0 +1,138 @@
+from typing import Callable, Sequence
+
+import torch
+
+from ignite.exceptions import NotComputableError
+from ignite.metrics.metric import Metric, reinit__is_reduced, sync_all_reduce
+
+__all__ = ["MaximumMeanDiscrepancy"]
+
+
+class MaximumMeanDiscrepancy(Metric):
+    r"""Calculates the mean of `maximum mean discrepancy (MMD)
+    <https://www.onurtunali.com/ml/2019/03/08/maximum-mean-discrepancy-in-machine-learning.html>`_.
+
+    .. math::
+       \begin{align*}
+           \text{MMD}^2 (P,Q) &= \underset{\| f \| \leq 1}{\text{sup}} | \mathbb{E}_{X\sim P}[f(X)]
+           - \mathbb{E}_{Y\sim Q}[f(Y)] |^2 \\
+           &\approx \frac{1}{B(B-1)} \sum_{i=1}^B \sum_{\substack{j=1 \\ j\neq i}}^B k(\mathbf{x}_i,\mathbf{x}_j)
+           -\frac{2}{B^2}\sum_{i=1}^B \sum_{j=1}^B k(\mathbf{x}_i,\mathbf{y}_j)
+           + \frac{1}{B(B-1)} \sum_{i=1}^B \sum_{\substack{j=1 \\ j\neq i}}^B k(\mathbf{y}_i,\mathbf{y}_j)
+       \end{align*}
+
+    where :math:`B` is the batch size, and :math:`\mathbf{x}_i` and :math:`\mathbf{y}_j` are
+    feature vectors sampled from :math:`P` and :math:`Q`, respectively.
+    :math:`k(\mathbf{x},\mathbf{y})=\exp(-\| \mathbf{x}-\mathbf{y} \|^2/ 2\sigma^2)` is the Gaussian RBF kernel.
+
+    This metric computes the MMD for each batch and takes the average.
+
+    More details can be found in `Gretton et al. 2012`__.
+
+    __ https://jmlr.csail.mit.edu/papers/v13/gretton12a.html
+
+    - ``update`` must receive output of the form ``(x, y)``.
+    - ``x`` and ``y`` are expected to be in the same shape :math:`(B, \ldots)`.
+
+    Args:
+        var: the bandwidth :math:`\sigma^2` of the kernel. Default: 1.0
+        output_transform: a callable that is used to transform the
+            :class:`~ignite.engine.engine.Engine`'s ``process_function``'s output into the
+            form expected by the metric. This can be useful if, for example, you have a multi-output model and
+            you want to compute the metric with respect to one of the outputs.
+            By default, this metric requires the output as ``(x, y)``.
+        device: specifies which device updates are accumulated on. Setting the
+            metric's device to be the same as your ``update`` arguments ensures the ``update`` method is
+            non-blocking. By default, CPU.
+
+    Examples:
+        To use with ``Engine`` and ``process_function``, simply attach the metric instance to the engine.
+        The output of the engine's ``process_function`` needs to be in the format of
+        ``(x, y)``. If not, ``output_tranform`` can be added
+        to the metric to transform the output into the form expected by the metric.
+
+        For more information on how metric works with :class:`~ignite.engine.engine.Engine`, visit :ref:`attach-engine`.
+
+        .. include:: defaults.rst
+            :start-after: :orphan:
+
+        .. testcode::
+
+            metric = MaximumMeanDiscrepancy()
+            metric.attach(default_evaluator, "mmd")
+            x = torch.tensor([[-0.80324818, -0.95768364, -0.03807209],
+                            [-0.11059691, -0.38230813, -0.4111988],
+                            [-0.8864329, -0.02890403, -0.60119252],
+                            [-0.68732452, -0.12854739, -0.72095073],
+                            [-0.62604613, -0.52368328, -0.24112842]])
+            y = torch.tensor([[0.0686768, 0.80502737, 0.53321717],
+                            [0.83849465, 0.59099726, 0.76385441],
+                            [0.68688272, 0.56833803, 0.98100778],
+                            [0.55267761, 0.13084654, 0.45382906],
+                            [0.0754253, 0.70317304, 0.4756805]])
+            state = default_evaluator.run([[x, y]])
+            print(state.metrics["mmd"])
+
+        .. testoutput::
+
+           1.0726975202560425
+    """
+
+    _state_dict_all_req_keys = ("_xx_sum", "_yy_sum", "_xy_sum", "_num_batches")
+
+    def __init__(
+        self, var: float = 1.0, output_transform: Callable = lambda x: x, device: torch.device = torch.device("cpu")
+    ):
+        self.var = var
+        super().__init__(output_transform, device)
+
+    @reinit__is_reduced
+    def reset(self) -> None:
+        self._xx_sum = torch.tensor(0.0, device=self._device)
+        self._yy_sum = torch.tensor(0.0, device=self._device)
+        self._xy_sum = torch.tensor(0.0, device=self._device)
+        self._num_batches = 0
+
+    @reinit__is_reduced
+    def update(self, output: Sequence[torch.Tensor]) -> None:
+        x, y = output[0].detach(), output[1].detach()
+        if x.shape != y.shape:
+            raise ValueError(f"x and y must be in the same shape, got {x.shape} != {y.shape}.")
+
+        if x.ndim >= 3:
+            x = x.flatten(start_dim=1)
+            y = y.flatten(start_dim=1)
+        elif x.ndim == 1:
+            raise ValueError(f"x must be in the shape of (B, ...), got {x.shape}.")
+
+        xx, yy, zz = torch.mm(x, x.t()), torch.mm(y, y.t()), torch.mm(x, y.t())
+        rx = xx.diag().unsqueeze(0).expand_as(xx)
+        ry = yy.diag().unsqueeze(0).expand_as(yy)
+
+        dxx = rx.t() + rx - 2.0 * xx
+        dyy = ry.t() + ry - 2.0 * yy
+        dxy = rx.t() + ry - 2.0 * zz
+
+        v = self.var
+        XX = torch.exp(-0.5 * dxx / v)
+        YY = torch.exp(-0.5 * dyy / v)
+        XY = torch.exp(-0.5 * dxy / v)
+
+        # unbiased
+        n = x.shape[0]
+        XX = (XX.sum() - n) / (n * (n - 1))
+        YY = (YY.sum() - n) / (n * (n - 1))
+        XY = XY.sum() / (n * n)
+
+        self._xx_sum += XX.to(self._device)
+        self._yy_sum += YY.to(self._device)
+        self._xy_sum += XY.to(self._device)
+
+        self._num_batches += 1
+
+    @sync_all_reduce("_xx_sum", "_yy_sum", "_xy_sum", "_num_batches")
+    def compute(self) -> float:
+        if self._num_batches == 0:
+            raise NotComputableError("MaximumMeanDiscrepacy must have at least one batch before it can be computed.")
+        mmd2 = (self._xx_sum + self._yy_sum - 2.0 * self._xy_sum).clamp(min=0.0) / self._num_batches
+        return mmd2.sqrt().item()
diff --git a/tests/ignite/metrics/test_maximum_mean_discrepancy.py b/tests/ignite/metrics/test_maximum_mean_discrepancy.py
new file mode 100644
index 000000000000..8cfc5f55567d
--- /dev/null
+++ b/tests/ignite/metrics/test_maximum_mean_discrepancy.py
@@ -0,0 +1,176 @@
+from typing import Tuple
+
+import numpy as np
+import pytest
+import torch
+from torch import Tensor
+
+import ignite.distributed as idist
+from ignite.engine import Engine
+from ignite.exceptions import NotComputableError
+from ignite.metrics import MaximumMeanDiscrepancy
+
+
+def np_mmd2(x: np.ndarray, y: np.ndarray, var: float = 1.0):
+    n = x.shape[0]
+    x = x.reshape(n, -1)
+    y = y.reshape(n, -1)
+
+    a = np.arange(n)
+    ii, jj = np.meshgrid(a, a, indexing="ij")
+    XX = np.exp(-np.square(x[ii] - x[jj]).sum(axis=2) / (var * 2))
+    XX = (np.sum(XX) - n) / (n * (n - 1))
+
+    XY = np.exp(-np.square(x[ii] - y[jj]).sum(axis=2) / (var * 2))
+    XY = np.sum(XY) / (n * n)
+
+    YY = np.exp(-np.square(y[ii] - y[jj]).sum(axis=2) / (var * 2))
+    YY = (np.sum(YY) - n) / (n * (n - 1))
+
+    mmd2 = np.clip(XX + YY - XY * 2, 0.0, None)
+    return mmd2
+
+
+def test_zero_sample():
+    mmd = MaximumMeanDiscrepancy()
+    with pytest.raises(
+        NotComputableError, match=r"MaximumMeanDiscrepacy must have at least one batch before it can be computed"
+    ):
+        mmd.compute()
+
+
+def test_shape_mismatch():
+    mmd = MaximumMeanDiscrepancy()
+    x = torch.tensor([[2.0, 3.0], [-2.0, 1.0]], dtype=torch.float)
+    y = torch.tensor([[-2.0, 1.0]], dtype=torch.float)
+    with pytest.raises(ValueError, match=r"x and y must be in the same shape, got"):
+        mmd.update((x, y))
+
+
+def test_invalid_shape():
+    mmd = MaximumMeanDiscrepancy()
+    x = torch.tensor([2.0, 3.0], dtype=torch.float)
+    y = torch.tensor([4.0, 5.0], dtype=torch.float)
+    with pytest.raises(ValueError, match=r"x must be in the shape of \(B, ...\), got"):
+        mmd.update((x, y))
+
+
+@pytest.fixture(params=list(range(4)))
+def test_case(request):
+    return [
+        (torch.randn((100, 10)), torch.rand((100, 10)), 10 ** np.random.uniform(-1.0, 0.0), 1),
+        (torch.rand((100, 500)), torch.randn((100, 500)), 10 ** np.random.uniform(-1.0, 0.0), 1),
+        # updated batches
+        (torch.normal(0.0, 5.0, size=(100, 10)), torch.rand((100, 10)), 10 ** np.random.uniform(-1.0, 0.0), 16),
+        (torch.normal(5.0, 3.0, size=(100, 200)), torch.rand((100, 200)), 10 ** np.random.uniform(-1.0, 0.0), 16),
+        # image segmentation
+        (torch.randn((100, 5, 32, 32)), torch.rand((100, 5, 32, 32)), 10 ** np.random.uniform(-1.0, 0.0), 32),
+        (torch.rand((100, 5, 224, 224)), torch.randn((100, 5, 224, 224)), 10 ** np.random.uniform(-1.0, 0.0), 32),
+    ][request.param]
+
+
+@pytest.mark.parametrize("n_times", range(5))
+def test_compute(n_times, test_case: Tuple[Tensor, Tensor, float, int]):
+    x, y, var, batch_size = test_case
+
+    mmd = MaximumMeanDiscrepancy(var=var)
+    mmd.reset()
+
+    if batch_size > 1:
+        np_mmd2_sum = 0.0
+        n_iters = y.shape[0] // batch_size + 1
+        for i in range(n_iters):
+            idx = i * batch_size
+            x_batch, y_batch = x[idx : idx + batch_size], y[idx : idx + batch_size]
+            mmd.update((x_batch, y_batch))
+
+            np_mmd2_sum += np_mmd2(x_batch.cpu().numpy(), y_batch.cpu().numpy(), var)
+
+        np_res = np.sqrt(np_mmd2_sum / n_iters)
+    else:
+        mmd.update((x, y))
+        np_res = np.sqrt(np_mmd2(x.cpu().numpy(), y.cpu().numpy(), var))
+
+    res = mmd.compute()
+
+    assert isinstance(res, float)
+    assert pytest.approx(np_res, abs=1e-4) == res
+
+
+def test_accumulator_detached():
+    mmd = MaximumMeanDiscrepancy()
+
+    x = torch.tensor([[2.0, 3.0], [-2.0, 1.0]], dtype=torch.float)
+    y = torch.tensor([[-2.0, 1.0], [2.0, 3.0]], dtype=torch.float)
+    mmd.update((x, y))
+
+    assert not any(acc.requires_grad for acc in (mmd._xx_sum, mmd._yy_sum, mmd._xy_sum))
+
+
+@pytest.mark.usefixtures("distributed")
+class TestDistributed:
+    def test_integration(self):
+        tol = 1e-4
+        n_iters = 100
+        batch_size = 10
+        n_dims = 100
+
+        rank = idist.get_rank()
+        torch.manual_seed(12 + rank)
+
+        device = idist.device()
+        metric_devices = [torch.device("cpu")]
+        if device.type != "xla":
+            metric_devices.append(device)
+
+        for metric_device in metric_devices:
+            y = torch.randn((n_iters * batch_size, n_dims)).float().to(device)
+            x = torch.normal(2.0, 3.0, size=(n_iters * batch_size, n_dims)).float().to(device)
+
+            def data_loader(i):
+                return x[i * batch_size : (i + 1) * batch_size], y[i * batch_size : (i + 1) * batch_size]
+
+            engine = Engine(lambda e, i: data_loader(i))
+
+            m = MaximumMeanDiscrepancy(device=metric_device)
+            m.attach(engine, "mmd")
+
+            data = list(range(n_iters))
+            engine.run(data=data, max_epochs=1)
+
+            x = idist.all_gather(x)
+            y = idist.all_gather(y)
+
+            assert "mmd" in engine.state.metrics
+            res = engine.state.metrics["mmd"]
+
+            # compute numpy mmd
+            true_res = 0.0
+            for i in range(n_iters):
+                x_batch, y_batch = data_loader(i)
+                x_np = x_batch.cpu().numpy()
+                y_np = y_batch.cpu().numpy()
+                true_res += np_mmd2(x_np, y_np)
+
+            true_res = np.sqrt(true_res / n_iters)
+            assert pytest.approx(true_res, abs=tol) == res
+
+    def test_accumulator_device(self):
+        device = idist.device()
+        metric_devices = [torch.device("cpu")]
+        if device.type != "xla":
+            metric_devices.append(device)
+        for metric_device in metric_devices:
+            mmd = MaximumMeanDiscrepancy(device=metric_device)
+
+            devices = (mmd._device, mmd._xx_sum.device, mmd._yy_sum.device, mmd._xy_sum.device)
+            for dev in devices:
+                assert dev == metric_device, f"{type(dev)}:{dev} vs {type(metric_device)}:{metric_device}"
+
+            x = torch.tensor([[2.0, 3.0], [-2.0, 1.0]]).float()
+            y = torch.ones(2, 2).float()
+            mmd.update((x, y))
+
+            devices = (mmd._device, mmd._xx_sum.device, mmd._yy_sum.device, mmd._xy_sum.device)
+            for dev in devices:
+                assert dev == metric_device, f"{type(dev)}:{dev} vs {type(metric_device)}:{metric_device}"

From 0c680df2f1ec8df44a1c55d45c9fb57ceb4c7a0f Mon Sep 17 00:00:00 2001
From: sjiang95 <51251025+sjiang95@users.noreply.github.com>
Date: Wed, 8 May 2024 16:17:10 +0800
Subject: [PATCH 08/33] setup_logger: add optional arg `encoding` for
 FileHandler (#3240)

* setup_logger: add optional arg encoding for FileHandler

By default, the encoding is `utf-8` for compatibility with CJK characters.

Signed-off-by: Shengjiang Quan <qsj287068067@126.com>

* Added a test and updated docstring

* open() the test log file with corresponding encoding

* add Japanese and Korean test words

* bypass encoding = None check on Windows

* Updated tests

---------

Signed-off-by: Shengjiang Quan <qsj287068067@126.com>
Co-authored-by: vfdev <vfdev.5@gmail.com>
---
 ignite/utils.py            |  7 ++++++-
 tests/ignite/test_utils.py | 24 ++++++++++++++++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/ignite/utils.py b/ignite/utils.py
index 078e16663f68..6e5b2176d6a4 100644
--- a/ignite/utils.py
+++ b/ignite/utils.py
@@ -163,6 +163,7 @@ def setup_logger(
     filepath: Optional[str] = None,
     distributed_rank: Optional[int] = None,
     reset: bool = False,
+    encoding: Optional[str] = "utf-8",
 ) -> logging.Logger:
     """Setups logger: name, level, format etc.
 
@@ -175,6 +176,7 @@ def setup_logger(
         distributed_rank: Optional, rank in distributed configuration to avoid logger setup for workers.
             If None, distributed_rank is initialized to the rank of process.
         reset: if True, reset an existing logger rather than keep format, handlers, and level.
+        encoding: open the file with the encoding. By default, 'utf-8'.
 
     Returns:
         logging.Logger
@@ -228,6 +230,9 @@ def setup_logger(
 
     .. versionchanged:: 0.4.5
         Added ``reset`` parameter.
+
+    .. versionchanged:: 0.5.1
+        Argument ``encoding`` added to correctly handle special characters in the file, default "utf-8".
     """
     # check if the logger already exists
     existing = name is None or name in logging.root.manager.loggerDict
@@ -265,7 +270,7 @@ def setup_logger(
         logger.addHandler(ch)
 
         if filepath is not None:
-            fh = logging.FileHandler(filepath)
+            fh = logging.FileHandler(filepath, encoding=encoding)
             fh.setLevel(level)
             fh.setFormatter(formatter)
             logger.addHandler(fh)
diff --git a/tests/ignite/test_utils.py b/tests/ignite/test_utils.py
index c4c65a29d696..828533ce2019 100644
--- a/tests/ignite/test_utils.py
+++ b/tests/ignite/test_utils.py
@@ -1,4 +1,5 @@
 import logging
+import platform
 import sys
 from collections import namedtuple
 
@@ -174,6 +175,29 @@ def test_override_setup_logger(capsys):
     logging.shutdown()
 
 
+@pytest.mark.parametrize("encoding", [None, "utf-8"])
+def test_setup_logger_encoding(encoding, dirname):
+    fp = dirname / "log.txt"
+    logger = setup_logger(name="logger", filepath=fp, encoding=encoding, reset=True)
+    test_words = ["say hello", "say 你好", "say こんにちわ", "say 안녕하세요", "say привет"]
+    for w in test_words:
+        logger.info(w)
+    logging.shutdown()
+
+    with open(fp, "r", encoding=encoding) as h:
+        data = h.readlines()
+
+    if platform.system() == "Windows" and encoding is None:
+        flatten_data = "\n".join(data)
+        assert test_words[0] in flatten_data
+        for word in test_words[1:]:
+            assert word not in flatten_data
+    else:
+        assert len(data) == len(test_words)
+        for expected, output in zip(test_words, data):
+            assert expected in output
+
+
 def test_deprecated():
     # Test on function without docs, @deprecated without reasons
     @deprecated("0.4.2", "0.6.0")

From 13b9d34c45752e1ffb4a5c796ea35523a7de63da Mon Sep 17 00:00:00 2001
From: Kazuki Adachi <kazuki.adachi.xy@gmail.com>
Date: Fri, 10 May 2024 06:43:36 +0900
Subject: [PATCH 09/33] Fix error of doc test for MaximumMeanDiscrepancy metric
 (#3245)

* add MaximumMeanDiscrepancy metric

* fix URL

* update formula

* modify test for MMD

* set default var value for np_mmd

* accumulate mmd2

* accumulate sum of xx, yy, and xy

* add reference paper to docstring

* fix accumulator variables

* fix test_accumulator_device

* fix result of the sample code of the docstring
---
 ignite/metrics/maximum_mean_discrepancy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ignite/metrics/maximum_mean_discrepancy.py b/ignite/metrics/maximum_mean_discrepancy.py
index d92dd5448ce1..24faf5758c63 100644
--- a/ignite/metrics/maximum_mean_discrepancy.py
+++ b/ignite/metrics/maximum_mean_discrepancy.py
@@ -75,7 +75,7 @@ class MaximumMeanDiscrepancy(Metric):
 
         .. testoutput::
 
-           1.0726975202560425
+           1.072697639465332
     """
 
     _state_dict_all_req_keys = ("_xx_sum", "_yy_sum", "_xy_sum", "_num_batches")

From 03747ec0ba23e7c34640cb7d55f04b093c97b33d Mon Sep 17 00:00:00 2001
From: John lee <johnleenimh@gmail.com>
Date: Fri, 10 May 2024 11:20:27 +0100
Subject: [PATCH 10/33] adjust tpu timeout (#3246)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

moves collection time test modification into the main hook for
modifying tests

Co-authored-by: leej3 <“johnleenimh@gmail.com>
---
 tests/ignite/conftest.py | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/tests/ignite/conftest.py b/tests/ignite/conftest.py
index 4e6712c43cf0..265ae97e3e72 100644
--- a/tests/ignite/conftest.py
+++ b/tests/ignite/conftest.py
@@ -449,6 +449,16 @@ def distributed(request, local_rank, world_size):
 
 @pytest.hookimpl
 def pytest_pyfunc_call(pyfuncitem: pytest.Function) -> None:
+    if any(fx in pyfuncitem.fixturenames for fx in ["distributed", "multinode_distributed"]):
+        # Run distributed tests on a single worker to avoid RACE conditions
+        # This requires that the --dist=loadgroup option be passed to pytest.
+        pyfuncitem.add_marker(pytest.mark.xdist_group("distributed"))
+        # Add timeouts to prevent hanging
+        if "tpu" in pyfuncitem.fixturenames:
+            pyfuncitem.add_marker(pytest.mark.timeout(60))
+        else:
+            pyfuncitem.add_marker(pytest.mark.timeout(45))
+
     if pyfuncitem.stash.get(is_horovod_stash_key, False):
 
         def testfunc_wrapper(test_func, **kwargs):
@@ -498,14 +508,3 @@ def xla_worker(index, fn):
                 assert ex_.code == 0, "Didn't successfully exit in XLA test"
 
         pyfuncitem.obj = functools.partial(testfunc_wrapper, pyfuncitem.obj)
-
-
-def pytest_collection_modifyitems(items):
-    for item in items:
-        if "distributed" in item.fixturenames:
-            # Run distributed tests on a single worker to avoid RACE conditions
-            # This requires that the --dist=loadgroup option be passed to pytest.
-            item.add_marker(pytest.mark.xdist_group("distributed"))
-            item.add_marker(pytest.mark.timeout(45))
-        if "multinode_distributed" in item.fixturenames:
-            item.add_marker(pytest.mark.timeout(45))

From 37d9a673620b28193df82139e7fe7ac929d1a7cb Mon Sep 17 00:00:00 2001
From: John lee <johnleenimh@gmail.com>
Date: Tue, 21 May 2024 14:13:34 +0100
Subject: [PATCH 11/33] mypy (#3248)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: leej3 <“johnleenimh@gmail.com>
---
 ignite/handlers/lr_finder.py       | 2 +-
 ignite/handlers/param_scheduler.py | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/ignite/handlers/lr_finder.py b/ignite/handlers/lr_finder.py
index 2b3e58c05aeb..e3840d5da7d3 100644
--- a/ignite/handlers/lr_finder.py
+++ b/ignite/handlers/lr_finder.py
@@ -542,7 +542,7 @@ def __init__(
         # override base_lrs
         self.base_lrs = start_lrs
 
-    def get_lr(self) -> List[float]:  # type: ignore[override]
+    def get_lr(self) -> List[float]:
         curr_iter = self.last_epoch + 1
         r = curr_iter / self.num_iter
         return [base_lr * (end_lr / base_lr) ** r for end_lr, base_lr in zip(self.end_lrs, self.base_lrs)]
diff --git a/ignite/handlers/param_scheduler.py b/ignite/handlers/param_scheduler.py
index d0d0cba4fd8f..dee9a4116b81 100644
--- a/ignite/handlers/param_scheduler.py
+++ b/ignite/handlers/param_scheduler.py
@@ -7,7 +7,7 @@
 from collections import OrderedDict
 from copy import copy
 from pathlib import Path
-from typing import Any, cast, Dict, List, Mapping, Optional, Sequence, Tuple, Type, Union
+from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple, Type, Union
 
 import torch
 from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, ReduceLROnPlateau
@@ -992,7 +992,7 @@ def get_param(self) -> Union[float, List[float]]:
         """Method to get current optimizer's parameter value"""
         # Emulate context manager for pytorch>=1.4
         self.lr_scheduler._get_lr_called_within_step = True  # type: ignore[union-attr]
-        lr_list = cast(List[float], self.lr_scheduler.get_lr())
+        lr_list = self.lr_scheduler.get_lr()
         self.lr_scheduler._get_lr_called_within_step = False  # type: ignore[union-attr]
         if len(lr_list) == 1:
             return lr_list[0]
@@ -1670,7 +1670,7 @@ def __init__(
             _scheduler_kwargs["verbose"] = False
 
         self.scheduler = ReduceLROnPlateau(optimizer, **_scheduler_kwargs)
-        self.scheduler._reduce_lr = self._reduce_lr  # type: ignore[attr-defined]
+        self.scheduler._reduce_lr = self._reduce_lr  # type: ignore[method-assign]
 
         self._state_attrs += ["metric_name", "scheduler"]
 

From 20d6b5bb0d4ba15881bcb8a52d9b02dfdb45b16f Mon Sep 17 00:00:00 2001
From: John lee <johnleenimh@gmail.com>
Date: Wed, 22 May 2024 11:06:36 +0100
Subject: [PATCH 12/33] skip tests when mps not functional (#3249)

---
 tests/ignite/__init__.py                      | 11 +++++++++++
 tests/ignite/distributed/test_auto.py         |  4 ++++
 tests/ignite/distributed/test_launcher.py     |  4 ++++
 tests/ignite/engine/test_create_supervised.py |  8 +++++---
 4 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/tests/ignite/__init__.py b/tests/ignite/__init__.py
index d553c222e58b..8f84e2e74b99 100644
--- a/tests/ignite/__init__.py
+++ b/tests/ignite/__init__.py
@@ -3,3 +3,14 @@
 
 def cpu_and_maybe_cuda():
     return ("cpu",) + (("cuda",) if torch.cuda.is_available() else ())
+
+
+def is_mps_available_and_functional():
+    if not torch.backends.mps.is_available():
+        return False
+    try:
+        # Try to allocate a small tensor on the MPS device
+        torch.tensor([1.0], device="mps")
+        return True
+    except RuntimeError:
+        return False
diff --git a/tests/ignite/distributed/test_auto.py b/tests/ignite/distributed/test_auto.py
index 761e328944c4..a53999c1cb69 100644
--- a/tests/ignite/distributed/test_auto.py
+++ b/tests/ignite/distributed/test_auto.py
@@ -12,6 +12,7 @@
 
 import ignite.distributed as idist
 from ignite.distributed.auto import auto_dataloader, auto_model, auto_optim, DistributedProxySampler
+from tests.ignite import is_mps_available_and_functional
 
 
 class DummyDS(Dataset):
@@ -179,6 +180,9 @@ def _test_auto_model_optimizer(ws, device):
         assert optimizer.backward_passes_per_step == backward_passes_per_step
 
 
+@pytest.mark.skipif(
+    torch.backends.mps.is_available() and not is_mps_available_and_functional(), reason="Skip if MPS not functional"
+)
 def test_auto_methods_no_dist():
     _test_auto_dataloader(1, 1, batch_size=1)
     _test_auto_dataloader(1, 1, batch_size=10, num_workers=2)
diff --git a/tests/ignite/distributed/test_launcher.py b/tests/ignite/distributed/test_launcher.py
index b12e2acf1c26..8cc1001aa742 100644
--- a/tests/ignite/distributed/test_launcher.py
+++ b/tests/ignite/distributed/test_launcher.py
@@ -9,6 +9,7 @@
 
 import ignite.distributed as idist
 from ignite.distributed.utils import has_hvd_support, has_native_dist_support, has_xla_support
+from tests.ignite import is_mps_available_and_functional
 
 
 def test_parallel_wrong_inputs():
@@ -54,6 +55,9 @@ def execute(cmd, env=None):
     return str(process.stdout.read()) + str(process.stderr.read())
 
 
+@pytest.mark.skipif(
+    torch.backends.mps.is_available() and not is_mps_available_and_functional(), reason="Skip if MPS not functional"
+)
 def test_check_idist_parallel_no_dist(exec_filepath):
     cmd = [sys.executable, "-u", exec_filepath]
     out = execute(cmd)
diff --git a/tests/ignite/engine/test_create_supervised.py b/tests/ignite/engine/test_create_supervised.py
index 31ca43f4bbf7..d9b0c161f75d 100644
--- a/tests/ignite/engine/test_create_supervised.py
+++ b/tests/ignite/engine/test_create_supervised.py
@@ -25,6 +25,8 @@
 )
 from ignite.metrics import MeanSquaredError
 
+from tests.ignite import is_mps_available_and_functional
+
 
 class DummyModel(torch.nn.Module):
     def __init__(self, output_as_list=False):
@@ -485,7 +487,7 @@ def test_create_supervised_trainer_on_cuda():
     _test_create_mocked_supervised_trainer(model_device=model_device, trainer_device=trainer_device)
 
 
-@pytest.mark.skipif(not (_torch_version_le_112 and torch.backends.mps.is_available()), reason="Skip if no MPS")
+@pytest.mark.skipif(not (_torch_version_le_112 and is_mps_available_and_functional()), reason="Skip if no MPS")
 def test_create_supervised_trainer_on_mps():
     model_device = trainer_device = "mps"
     _test_create_supervised_trainer_wrong_accumulation(model_device=model_device, trainer_device=trainer_device)
@@ -666,14 +668,14 @@ def test_create_supervised_evaluator_on_cuda_with_model_on_cpu():
     _test_mocked_supervised_evaluator(evaluator_device="cuda")
 
 
-@pytest.mark.skipif(not (_torch_version_le_112 and torch.backends.mps.is_available()), reason="Skip if no MPS")
+@pytest.mark.skipif(not (_torch_version_le_112 and is_mps_available_and_functional()), reason="Skip if no MPS")
 def test_create_supervised_evaluator_on_mps():
     model_device = evaluator_device = "mps"
     _test_create_supervised_evaluator(model_device=model_device, evaluator_device=evaluator_device)
     _test_mocked_supervised_evaluator(model_device=model_device, evaluator_device=evaluator_device)
 
 
-@pytest.mark.skipif(not (_torch_version_le_112 and torch.backends.mps.is_available()), reason="Skip if no MPS")
+@pytest.mark.skipif(not (_torch_version_le_112 and is_mps_available_and_functional()), reason="Skip if no MPS")
 def test_create_supervised_evaluator_on_mps_with_model_on_cpu():
     _test_create_supervised_evaluator(evaluator_device="mps")
     _test_mocked_supervised_evaluator(evaluator_device="mps")

From 9d31a9ca4ce418d806f6f190e565b9bc64adb656 Mon Sep 17 00:00:00 2001
From: vfdev <vfdev.5@gmail.com>
Date: Thu, 23 May 2024 11:37:04 +0200
Subject: [PATCH 13/33] fix linkcheck (#3250)

Ignore https://machinelearningmastery.com/gentle-introduction-backpropagation-time/
---
 docs/source/conf.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/source/conf.py b/docs/source/conf.py
index 2256d425becf..80c15e9b4d26 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -350,6 +350,7 @@ def run(self):
     "https://github.com/fossasia/visdom#visdom-arguments-python-only",
     "https://github.com/pytorch/ignite/tree/master/examples/cifar10#check-resume-training",
     "https://github.com/pytorch/ignite/tree/master/examples/mnist#training-save--resume",
+    "https://machinelearningmastery.com/gentle-introduction-backpropagation-time/",
 ]
 
 

From 8db318b0c0c984ea114136dedb8b63f6c1d0cb2d Mon Sep 17 00:00:00 2001
From: John lee <johnleenimh@gmail.com>
Date: Thu, 23 May 2024 11:01:25 +0100
Subject: [PATCH 14/33] Retry tests (#3229)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* retry tests

* retry with pytest last failed logic

greatly speeds up reruns of tests as only previously failed tests are
rerun.

define pytest cachedir for each pytest invocation to prevent interaction
between different selections of tests.

protect against exit code of 5 when a previous pytest invocation
had no failed tests which results in all tests being deselected.

use eval to avoid issues with the -k and -m expansions.

* tidy test scripts

* set correct root dir

* add option to treat unrun tests as failures

* interpret sigterm as sigint

* adjust timeouts

* respond to comments and add comments

---------

Co-authored-by: leej3 <“johnleenimh@gmail.com>
Co-authored-by: vfdev <vfdev.5@gmail.com>
---
 .github/workflows/gpu-hvd-tests.yml         |   8 +-
 .github/workflows/gpu-tests.yml             |  21 ++--
 .github/workflows/hvd-tests.yml             |  10 +-
 .github/workflows/pytorch-version-tests.yml |  16 +--
 .github/workflows/tpu-tests.yml             |  20 ++--
 .github/workflows/unit-tests.yml            |  15 ++-
 tests/common-test-functionality.sh          | 102 ++++++++++++++++++++
 tests/ignite/conftest.py                    |  96 ++++++++++++++++++
 tests/run_cpu_tests.sh                      |  33 ++++---
 tests/run_gpu_tests.sh                      |  50 ++++++----
 tests/run_multinode_tests_in_docker.sh      |   2 +-
 tests/run_tpu_tests.sh                      |  16 ++-
 12 files changed, 316 insertions(+), 73 deletions(-)
 create mode 100644 tests/common-test-functionality.sh

diff --git a/.github/workflows/gpu-hvd-tests.yml b/.github/workflows/gpu-hvd-tests.yml
index 6661f46b501b..2017cf8acdad 100644
--- a/.github/workflows/gpu-hvd-tests.yml
+++ b/.github/workflows/gpu-hvd-tests.yml
@@ -22,7 +22,7 @@ jobs:
   gpu-hvd-tests:
     strategy:
       matrix:
-        pytorch-channel: [pytorch, ]
+        pytorch-channel: [pytorch]
       fail-fast: false
     env:
       DOCKER_IMAGE: "pytorch/conda-builder:cuda12.1"
@@ -128,8 +128,8 @@ jobs:
           # Can't build Horovod with recent pytorch due to pytorch required C++17 standard
           # and horovod is still using C++14
           # HOROVOD_GPU_OPERATIONS=NCCL HOROVOD_WITH_PYTORCH=1 pip install horovod[pytorch]
-          # Using a similar hack as described here: 
-          # https://github.com/horovod/horovod/issues/3941#issuecomment-1732505345 
+          # Using a similar hack as described here:
+          # https://github.com/horovod/horovod/issues/3941#issuecomment-1732505345
           git clone --recursive https://github.com/horovod/horovod.git /horovod
           cd /horovod
           sed -i "s/CMAKE_CXX_STANDARD 14/CMAKE_CXX_STANDARD 17/g" CMakeLists.txt
@@ -152,7 +152,7 @@ jobs:
           set -xe
 
           bash tests/run_gpu_tests.sh 2 hvd
-          CUDA_VISIBLE_DEVICES="" pytest --cov ignite --cov-append --cov-report term-missing --cov-report xml -vvv tests/ -m distributed -k hvd
+          CUDA_VISIBLE_DEVICES="" pytest --cov ignite --cov-append --cov-report term-missing --cov-report xml -vvv tests/ignite -m distributed -k hvd
 
           EOF
           )
diff --git a/.github/workflows/gpu-tests.yml b/.github/workflows/gpu-tests.yml
index 92345b3baed3..faa84deffd95 100644
--- a/.github/workflows/gpu-tests.yml
+++ b/.github/workflows/gpu-tests.yml
@@ -29,7 +29,7 @@ jobs:
       REPOSITORY: ${{ github.repository }}
       PR_NUMBER: ${{ github.event.pull_request.number }}
     runs-on: linux.8xlarge.nvidia.gpu
-    timeout-minutes: 45
+    timeout-minutes: 85
 
     steps:
       - name: Clean workspace
@@ -121,18 +121,13 @@ jobs:
 
       - name: Run GPU Unit Tests
         continue-on-error: false
-        run: |
-
-          script=$(cat << EOF
-
-          set -xe
-
-          bash tests/run_gpu_tests.sh 2
-
-          EOF
-          )
-
-          docker exec -t pthd /bin/bash -c "${script}"
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 5
+          timeout_minutes: 25
+          shell: bash
+          command: docker exec -t pthd /bin/bash -xec 'tests/run_gpu_tests.sh 2'
+          new_command_on_retry: docker exec -e USE_LAST_FAILED=1 -t pthd /bin/bash -xec 'tests/run_gpu_tests.sh 2'
 
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v3
diff --git a/.github/workflows/hvd-tests.yml b/.github/workflows/hvd-tests.yml
index f483d21f38ee..35e107f888b7 100644
--- a/.github/workflows/hvd-tests.yml
+++ b/.github/workflows/hvd-tests.yml
@@ -75,9 +75,13 @@ jobs:
           target_dir: /tmp
 
       - name: Run Tests
-        shell: bash -l {0}
-        run: |
-          bash tests/run_cpu_tests.sh
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 5
+          timeout_minutes: 15
+          shell: bash
+          command: bash tests/run_cpu_tests.sh
+          new_command_on_retry: USE_LAST_FAILED=1 bash tests/run_cpu_tests.sh
 
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v3
diff --git a/.github/workflows/pytorch-version-tests.yml b/.github/workflows/pytorch-version-tests.yml
index e47f8faaa463..2e0ad5e0c986 100644
--- a/.github/workflows/pytorch-version-tests.yml
+++ b/.github/workflows/pytorch-version-tests.yml
@@ -10,7 +10,7 @@ on:
 jobs:
   build:
     runs-on: ubuntu-latest
-    timeout-minutes: 45
+    timeout-minutes: 85
     strategy:
       max-parallel: 5
       fail-fast: false
@@ -18,7 +18,7 @@ jobs:
         python-version: [3.8, 3.9, "3.10"]
         pytorch-version:
           [2.1.2, 2.0.1, 1.13.1, 1.12.1, 1.11.0, 1.10.0, 1.9.1, 1.8.1, 1.5.1]
-        exclude:            
+        exclude:
           - pytorch-version: 1.5.1
             python-version: 3.9
           - pytorch-version: 1.5.1
@@ -78,7 +78,7 @@ jobs:
           pip install -r requirements-dev.txt
           python setup.py install
 
-          # pytorch>=1.9.0,<1.11.0 is using "from setuptools import distutils; distutils.version.LooseVersion" anti-pattern 
+          # pytorch>=1.9.0,<1.11.0 is using "from setuptools import distutils; distutils.version.LooseVersion" anti-pattern
           # which raises the error: AttributeError: module 'distutils' has no attribute 'version' for setuptools>59
           bad_pth_version=$(python -c "import torch; print('.'.join(torch.__version__.split('.')[:2]) in ['1.9', '1.10'])")
           if [ "${bad_pth_version}" == "True" ]; then
@@ -92,9 +92,13 @@ jobs:
           target_dir: /tmp
 
       - name: Run Tests
-        shell: bash -l {0}
-        run: |
-          bash tests/run_cpu_tests.sh "not test_time_profilers"
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 5
+          timeout_minutes: 15
+          shell: bash
+          command: bash tests/run_cpu_tests.sh "not test_time_profilers"
+          new_command_on_retry: USE_LAST_FAILED=1 bash tests/run_cpu_tests.sh "not test_time_profilers"
 
   # create-issue:
   #   runs-on: ubuntu-latest
diff --git a/.github/workflows/tpu-tests.yml b/.github/workflows/tpu-tests.yml
index 08eaaf30d8f7..ab14ad3c1de0 100644
--- a/.github/workflows/tpu-tests.yml
+++ b/.github/workflows/tpu-tests.yml
@@ -89,13 +89,19 @@ jobs:
           target_dir: /tmp
 
       - name: Run Tests
-        run: |
-          export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${Python_ROOT_DIR}/lib
-          export XRT_DEVICE_MAP="CPU:0;/job:localservice/replica:0/task:0/device:XLA_CPU:0"
-          export XRT_WORKERS="localservice:0;grpc://localhost:40934"
-
-          python -c "import torch_xla; print('torch xla version:', torch_xla.__version__)"
-          bash tests/run_tpu_tests.sh
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 5
+          timeout_minutes: 25
+          shell: bash
+          command: |
+            python -c "import torch_xla; print('torch xla version:', torch_xla.__version__)"
+            bash tests/run_tpu_tests.sh
+          new_command_on_retry: USE_LAST_FAILED=1 bash tests/run_tpu_tests.sh
+        env:
+          LD_LIBRARY_PATH: ${{ env.LD_LIBRARY_PATH }}:${{ env.Python_ROOT_DIR }}/lib
+          XRT_DEVICE_MAP: "CPU:0;/job:localservice/replica:0/task:0/device:XLA_CPU:0"
+          XRT_WORKERS: "localservice:0;grpc://localhost:40934"
 
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v3
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index a4b697255699..0b94e0d0e9e1 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -31,7 +31,7 @@ concurrency:
 jobs:
   cpu-tests:
     runs-on: ${{ matrix.os }}
-    timeout-minutes: 45
+    timeout-minutes: 85
     defaults:
       run:
         shell: bash
@@ -40,7 +40,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest]
-        python-version: ["3.8", "3.9", "3.10", "3.11","3.12"]
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
         pytorch-channel: [pytorch, pytorch-nightly]
         include:
           # includes a single build on windows
@@ -102,7 +102,7 @@ jobs:
 
       - name: Run Mypy
         # https://github.com/pytorch/ignite/pull/2780
-        # 
+        #
         if: ${{ matrix.os == 'ubuntu-latest' && matrix.pytorch-channel == 'pytorch-nightly'}}
         run: |
           bash ./tests/run_code_style.sh mypy
@@ -120,8 +120,13 @@ jobs:
           cp -R /tmp/MNIST .
 
       - name: Run Tests
-        run: |
-          SKIP_DISTRIB_TESTS=${{ matrix.skip-distrib-tests }} bash tests/run_cpu_tests.sh
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 5
+          timeout_minutes: 15
+          shell: bash
+          command: SKIP_DISTRIB_TESTS=${{ matrix.skip-distrib-tests }} bash tests/run_cpu_tests.sh
+          new_command_on_retry: USE_LAST_FAILED=1 SKIP_DISTRIB_TESTS=${{ matrix.skip-distrib-tests }} bash tests/run_cpu_tests.sh
 
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v3
diff --git a/tests/common-test-functionality.sh b/tests/common-test-functionality.sh
new file mode 100644
index 000000000000..daf9d284f6b6
--- /dev/null
+++ b/tests/common-test-functionality.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+
+# Will catch exit code 5 when tests are deselected from previous passing run
+# (relevent for --last-failed-no-failures none)
+last_failed_no_failures_code=5
+
+#  functions shared across test files
+run_tests() {
+    # Set defaults
+    local core_args="-vvv tests/ignite"
+    local cache_dir=".unknown-cache"
+    local skip_distrib_tests=1
+    local match_tests_expression=""
+    local trap_deselected_exit_code=1
+    local use_last_failed=0
+    local use_coverage=0
+    local world_size=0
+    # Always clean up pytest.ini
+    trap 'rm -f pytest.ini' RETURN
+    # Parse arguments
+    while [[ $# -gt 0 ]]
+    do
+        key="$1"
+        case $key in
+            --core_args)
+            core_args="$2"
+            shift
+            shift
+            ;;
+            --cache_dir)
+            cache_dir="$2"
+            shift
+            shift
+            ;;
+            --skip_distrib_tests)
+            skip_distrib_tests="$2"
+            shift
+            shift
+            ;;
+            --match_tests_expression)
+            match_tests_expression="$2"
+            shift
+            shift
+            ;;
+            --trap_deselected_exit_code)
+            trap_deselected_exit_code="$2"
+            shift
+            shift
+            ;;
+            --use_last_failed)
+            use_last_failed="$2"
+            shift
+            shift
+            ;;
+            --use_coverage)
+            use_coverage="$2"
+            shift
+            shift
+            ;;
+            --world_size)
+            world_size="$2"
+            shift
+            shift
+            ;;
+            *)
+            echo "Error: Unknown argument $key"
+            exit 1
+            shift
+            ;;
+        esac
+    done
+
+    if [ "${skip_distrib_tests}" -eq "1" ]; then
+        # can be overwritten by core_args
+        skip_distrib_opt="-m 'not distributed and not tpu and not multinode_distributed'"
+    else
+        skip_distrib_opt=""
+    fi
+
+
+    echo [pytest] > pytest.ini ; echo "cache_dir=${cache_dir}" >> pytest.ini
+
+    # Assemble options for the pytest command
+    pytest_args="${skip_distrib_opt} ${core_args} --treat-unrun-as-failed -k '${match_tests_expression}'"
+    if [ "${use_last_failed:-0}" -eq "1" ] && [ -d "${cache_dir}" ]; then
+        pytest_args="--last-failed --last-failed-no-failures none ${pytest_args}"
+    fi
+    if [ "${use_coverage}" -eq "1" ]; then
+        pytest_args="--cov ignite --cov-append --cov-report term-missing --cov-report xml ${pytest_args}"
+    fi
+    if [ ! "${world_size}" -eq "0" ]; then
+        export WORLD_SIZE="${world_size}"
+        pytest_args="--dist=each --tx ${WORLD_SIZE}*popen//python=python ${pytest_args}"
+    fi
+
+    # Run the command
+    if [ "$trap_deselected_exit_code" -eq "1" ]; then
+        CUDA_VISIBLE_DEVICES="" eval "pytest ${pytest_args}" || { exit_code=$?; if [ "$exit_code" -eq ${last_failed_no_failures_code} ]; then echo "All tests deselected"; else exit $exit_code; fi; }
+    else
+        CUDA_VISIBLE_DEVICES="" eval "pytest ${pytest_args}"
+    fi
+}
diff --git a/tests/ignite/conftest.py b/tests/ignite/conftest.py
index 265ae97e3e72..d5546a75bae5 100644
--- a/tests/ignite/conftest.py
+++ b/tests/ignite/conftest.py
@@ -1,8 +1,10 @@
 import functools
 import os
 import shutil
+import signal
 import sys
 import tempfile
+import threading
 import time
 from pathlib import Path
 
@@ -13,10 +15,57 @@
 import ignite.distributed as idist
 
 
+def pytest_addoption(parser):
+    """
+    Add custom command line options for the ignite test suite here.
+    See:
+    This function is a pytest hook (due to its name) and is *"automatically"
+    executed at the start of a test run
+    https://docs.pytest.org/en/latest/reference/reference.html#initialization-hooks
+
+    * "automatically" is true provided this conftest.py file is the
+    root directory. See:
+    https://docs.pytest.org/en/latest/reference/customize.html#initialization-determining-rootdir-and-configfile
+    """
+    parser.addoption(
+        "--treat-unrun-as-failed",
+        action="store_true",
+        help="""
+        If a session is interrupted, treat the unrun tests as failed so that a
+        rerun with --last-failed runs any tests that have not passed or been
+        skipped. Note that if all tests in a module have been skipped, the
+        module will be skipped for all subsequent runs.
+        """,
+    )
+
+
 def pytest_configure(config):
+    """
+    This function is a pytest hook (due to its name) and is run after command
+    line parsing is complete in order to configure the test session.
+    """
     config.addinivalue_line("markers", "distributed: run distributed")
     config.addinivalue_line("markers", "multinode_distributed: distributed")
     config.addinivalue_line("markers", "tpu: run on tpu")
+    if config.option.treat_unrun_as_failed:
+        unrun_tracker = UnrunTracker()
+        config.pluginmanager.register(unrun_tracker, "unrun_tracker_plugin")
+
+
+@pytest.fixture(scope="session", autouse=True)
+def term_handler():
+    """
+    This allows the pytest session to be terminated upon retries on CI. It may
+    be worth using this fixture solely in that context. For a discussion on
+    whether sigterm should be ignored and why pytest usually ignores it see:
+    https://github.com/pytest-dev/pytest/issues/5243
+    """
+    if threading.current_thread() is threading.main_thread() and hasattr(signal, "SIGTERM"):
+        orig = signal.signal(signal.SIGTERM, signal.getsignal(signal.SIGINT))
+        yield
+        signal.signal(signal.SIGTERM, orig)
+    else:
+        yield  # Just pass through if SIGTERM isn't supported or we are not in the main thread
 
 
 @pytest.fixture(
@@ -447,6 +496,40 @@ def distributed(request, local_rank, world_size):
         raise RuntimeError(f"Invalid parameter value for `distributed` fixture, given {request.param}")
 
 
+class UnrunTracker:
+    """
+    Keeps track of unrun tests to improve the user experience when using the
+    "--last-failed" pytest option and a test session is interrupted. This is
+    particularly useful on CI when rerunning "failing" tests where the failure
+    was due to a deadlock and many tests weren't actually run so they didn't
+    actually fail. This is a pytest plugin that implements some standard hooks
+    to modify the test session. Its functionality can be added to a test session
+    by registering it with the pytest plugin manager.
+    """
+
+    def __init__(self):
+        self.unrun_tests = []
+
+    def pytest_collection_finish(self, session):
+        # At the end of the collection, add all items to the unrun_tests list
+        self.unrun_tests.extend(session.items)
+
+    def pytest_runtest_teardown(self, item):
+        if item in self.unrun_tests:
+            self.unrun_tests.remove(item)
+
+    def record_unrun_as_failed(self, session, exitstatus):
+        # Get current lastfailed entries (if any)
+        lastfailed = session.config.cache.get("cache/lastfailed", {})
+
+        # Add unrun tests to lastfailed
+        for test in self.unrun_tests:
+            lastfailed[test.nodeid] = True
+
+        # Update the cache with the new lastfailed
+        session.config.cache.set("cache/lastfailed", lastfailed)
+
+
 @pytest.hookimpl
 def pytest_pyfunc_call(pyfuncitem: pytest.Function) -> None:
     if any(fx in pyfuncitem.fixturenames for fx in ["distributed", "multinode_distributed"]):
@@ -508,3 +591,16 @@ def xla_worker(index, fn):
                 assert ex_.code == 0, "Didn't successfully exit in XLA test"
 
         pyfuncitem.obj = functools.partial(testfunc_wrapper, pyfuncitem.obj)
+
+
+def pytest_sessionfinish(session, exitstatus):
+    """
+    Any functionality that should be run at the end of the session should be
+    added here.
+    This is a pytest hook (due to its name) and is called after the whole test
+    run finished, right before returning the exit status to the system.
+    """
+    # If requested by the user, track all unrun tests and add them to the lastfailed cache
+    if session.config.option.treat_unrun_as_failed:
+        unrun_tracker = session.config.pluginmanager.get_plugin("unrun_tracker_plugin")
+        unrun_tracker.record_unrun_as_failed(session, exitstatus)
diff --git a/tests/run_cpu_tests.sh b/tests/run_cpu_tests.sh
index 2297be94219d..7d647de1e018 100644
--- a/tests/run_cpu_tests.sh
+++ b/tests/run_cpu_tests.sh
@@ -1,22 +1,31 @@
 #!/bin/bash
-
+source "$(dirname "$0")/common-test-functionality.sh"
 set -xeu
 
-if [ "${SKIP_DISTRIB_TESTS:-0}" -eq "1" ]; then
-    skip_distrib_opt=(-m "not distributed and not tpu and not multinode_distributed")
-else
-    skip_distrib_opt=(-m "")
-fi
+skip_distrib_tests=${SKIP_DISTRIB_TESTS:-0}
+use_last_failed=${USE_LAST_FAILED:-0}
+match_tests_expression=${1:-""}
 
-MATCH_TESTS_EXPRESSION=${1:-""}
 
-CUDA_VISIBLE_DEVICES="" pytest --tx 4*popen//python=python --cov ignite --cov-report term-missing --cov-report xml -vvv tests "${skip_distrib_opt[@]}" -k "$MATCH_TESTS_EXPRESSION"
+run_tests \
+    --core_args "--tx 4*popen//python=python -vvv tests/ignite" \
+    --cache_dir ".cpu-not-distrib" \
+    --skip_distrib_tests "${skip_distrib_tests}" \
+    --use_coverage 1 \
+    --match_tests_expression "${match_tests_expression}" \
+    --use_last_failed ${use_last_failed}
 
 # https://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_06_02
-if [ "${SKIP_DISTRIB_TESTS:-0}" -eq "1" ]; then
+if [ "${skip_distrib_tests}" -eq "1" ]; then
     exit 0
 fi
 
-export WORLD_SIZE=2
-CUDA_VISIBLE_DEVICES="" pytest --cov ignite --cov-append --cov-report term-missing --cov-report xml --dist=each --tx $WORLD_SIZE*popen//python=python tests -m distributed -vvv -k "$MATCH_TESTS_EXPRESSION"
-unset WORLD_SIZE
+# Run 2 processes with --dist=each
+run_tests \
+    --core_args "-m distributed -vvv tests/ignite" \
+    --world_size 2 \
+    --cache_dir ".cpu-distrib" \
+    --skip_distrib_tests 0 \
+    --use_coverage 1 \
+    --match_tests_expression "${match_tests_expression}" \
+    --use_last_failed ${use_last_failed}
diff --git a/tests/run_gpu_tests.sh b/tests/run_gpu_tests.sh
index 3146443a531d..371c70aee375 100644
--- a/tests/run_gpu_tests.sh
+++ b/tests/run_gpu_tests.sh
@@ -1,35 +1,47 @@
 #!/bin/bash
+source "$(dirname "$0")/common-test-functionality.sh"
+set -xeu
 
-if [ -z "$1" ]; then
-    ngpus=1
-else
-    ngpus=$1
-fi
-
-MATCH_TESTS_EXPRESSION=${2:-""}
+skip_distrib_tests=${SKIP_DISTRIB_TESTS:-1}
+use_last_failed=${USE_LAST_FAILED:-0}
+ngpus=${1:-1}
 
-if [ -z "$MATCH_TESTS_EXPRESSION" ]; then
+match_tests_expression=${2:-""}
+if [ -z "$match_tests_expression" ]; then
     cuda_pattern="cuda"
 else
-    cuda_pattern="cuda and $MATCH_TESTS_EXPRESSION"
+    cuda_pattern="cuda and $match_tests_expression"
 fi
 
-set -xeu
-
-pytest --cov ignite --cov-report term-missing --cov-report xml -vvv tests/ -k "$cuda_pattern"
+run_tests \
+    --core_args "-vvv tests/ignite" \
+    --cache_dir ".gpu-cuda" \
+    --skip_distrib_tests "${skip_distrib_tests}" \
+    --use_coverage 1 \
+    --match_tests_expression "${cuda_pattern}" \
+    --use_last_failed ${use_last_failed}
 
 # https://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_06_02
-if [ "${SKIP_DISTRIB_TESTS:-0}" -eq "1" ]; then
+if [ "${skip_distrib_tests}" -eq "1" ]; then
     exit 0
 fi
 
-pytest --cov ignite --cov-append --cov-report term-missing --cov-report xml -vvv tests/ -m distributed -k "$MATCH_TESTS_EXPRESSION"
+run_tests \
+    --core_args "-vvv -m distributed tests/ignite" \
+    --cache_dir ".gpu-distrib" \
+    --skip_distrib_tests 0 \
+    --use_coverage 1 \
+    --match_tests_expression "${match_tests_expression}" \
+    --use_last_failed ${use_last_failed}
 
 
 if [ ${ngpus} -gt 1 ]; then
-
-    export WORLD_SIZE=${ngpus}
-    pytest --cov ignite --cov-append --cov-report term-missing --cov-report xml --dist=each --tx ${WORLD_SIZE}*popen//python=python tests -m distributed -vvv -k "$MATCH_TESTS_EXPRESSION"
-    unset WORLD_SIZE
-
+    run_tests \
+        --core_args "-vvv -m distributed tests/ignite" \
+        --world_size "${ngpus}" \
+        --cache_dir ".gpu-distrib-multi" \
+        --skip_distrib_tests 0 \
+        --use_coverage 1 \
+        --match_tests_expression "${match_tests_expression}" \
+        --use_last_failed ${use_last_failed}
 fi
diff --git a/tests/run_multinode_tests_in_docker.sh b/tests/run_multinode_tests_in_docker.sh
index 0dca1b603278..041284bb97c2 100644
--- a/tests/run_multinode_tests_in_docker.sh
+++ b/tests/run_multinode_tests_in_docker.sh
@@ -36,7 +36,7 @@ RUN pip install --no-cache-dir mock pytest pytest-xdist scikit-learn scikit-imag
 EOF
 
 docker_python_version=`docker run --rm -i $docker_image python -c "import sys; print(str(sys.version_info[0]) + \".\" + str(sys.version_info[1]), end=\"\")"`
-cmd="pytest --dist=each --tx $nproc_per_node*popen//python${docker_python_version} -m multinode_distributed -vvv tests"
+cmd="pytest --dist=each --tx $nproc_per_node*popen//python${docker_python_version} -m multinode_distributed -vvv tests/ignite"
 
 export MASTER_ADDR=node0
 export MASTER_PORT=9999
diff --git a/tests/run_tpu_tests.sh b/tests/run_tpu_tests.sh
index 0877de858aed..5ea0f9931738 100644
--- a/tests/run_tpu_tests.sh
+++ b/tests/run_tpu_tests.sh
@@ -1,10 +1,20 @@
 #!/bin/bash
-
+source "$(dirname "$0")/common-test-functionality.sh"
 set -xeu
+use_last_failed=${USE_LAST_FAILED:-0}
+
+run_tests \
+    --core_args "-vvv -m tpu tests/ignite" \
+    --cache_dir ".tpu" \
+    --use_coverage 1 \
+    --use_last_failed ${use_last_failed}
 
-pytest --cov ignite --cov-report term-missing --cov-report xml tests/ -vvv -m tpu
 
 if [ -z ${NUM_TPU_WORKERS+x} ]; then
     export NUM_TPU_WORKERS=1
-    pytest --cov ignite --cov-append --cov-report term-missing --cov-report xml tests/ -vvv -m tpu
+    run_tests \
+        --core_args "-vvv -m tpu tests/ignite" \
+        --cache_dir ".tpu-multi" \
+        --use_coverage 1 \
+        --use_last_failed ${use_last_failed}
 fi

From 0d8f3bc44b59ac350bb0048b252874a3f8aa2865 Mon Sep 17 00:00:00 2001
From: vfdev <vfdev.5@gmail.com>
Date: Thu, 23 May 2024 17:54:46 +0200
Subject: [PATCH 15/33] downgrade retry action for gpu tests (#3251) (#3252)

* downgrade retry action for gpu tests (#3251)

* Update gpu-tests.yml

* fix docker command

* fixup

---------

Co-authored-by: John lee <johnleenimh@gmail.com>
---
 .github/workflows/gpu-tests.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/gpu-tests.yml b/.github/workflows/gpu-tests.yml
index faa84deffd95..81862e1f67bd 100644
--- a/.github/workflows/gpu-tests.yml
+++ b/.github/workflows/gpu-tests.yml
@@ -121,13 +121,13 @@ jobs:
 
       - name: Run GPU Unit Tests
         continue-on-error: false
-        uses: nick-fields/retry@v3
+        uses: nick-fields/retry@v2.9.0
         with:
           max_attempts: 5
           timeout_minutes: 25
           shell: bash
-          command: docker exec -t pthd /bin/bash -xec 'tests/run_gpu_tests.sh 2'
-          new_command_on_retry: docker exec -e USE_LAST_FAILED=1 -t pthd /bin/bash -xec 'tests/run_gpu_tests.sh 2'
+          command: docker exec -t pthd /bin/bash -xec 'bash tests/run_gpu_tests.sh 2'
+          new_command_on_retry: docker exec -e USE_LAST_FAILED=1 -t pthd /bin/bash -xec 'bash tests/run_gpu_tests.sh 2'
 
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v3

From 931dc8eb81bdf70682b19a183940601de6df6aee Mon Sep 17 00:00:00 2001
From: John lee <johnleenimh@gmail.com>
Date: Wed, 29 May 2024 14:44:04 +0100
Subject: [PATCH 16/33] Fix scheduled tests (#3254)

* fix scheduled tests

add better error message when pytest is not on the PATH
use a login shell to make use of conda env

* fix missing filelock dep

* fix mps errors for older pytorch versions

use gt rather than le
---
 .github/workflows/pytorch-version-tests.yml       | 4 ++--
 ignite/distributed/comp_models/base.py            | 4 ++--
 requirements-dev.txt                              | 1 +
 tests/common-test-functionality.sh                | 8 ++++++++
 tests/ignite/distributed/comp_models/test_base.py | 4 ++--
 tests/ignite/distributed/test_auto.py             | 4 +++-
 tests/ignite/distributed/test_launcher.py         | 4 +++-
 tests/ignite/distributed/utils/test_serial.py     | 6 +++---
 tests/ignite/engine/test_create_supervised.py     | 8 ++++----
 9 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/pytorch-version-tests.yml b/.github/workflows/pytorch-version-tests.yml
index 2e0ad5e0c986..7b143deaa500 100644
--- a/.github/workflows/pytorch-version-tests.yml
+++ b/.github/workflows/pytorch-version-tests.yml
@@ -97,8 +97,8 @@ jobs:
           max_attempts: 5
           timeout_minutes: 15
           shell: bash
-          command: bash tests/run_cpu_tests.sh "not test_time_profilers"
-          new_command_on_retry: USE_LAST_FAILED=1 bash tests/run_cpu_tests.sh "not test_time_profilers"
+          command: bash -l tests/run_cpu_tests.sh "not test_time_profilers"
+          new_command_on_retry: USE_LAST_FAILED=1 bash -l tests/run_cpu_tests.sh "not test_time_profilers"
 
   # create-issue:
   #   runs-on: ubuntu-latest
diff --git a/ignite/distributed/comp_models/base.py b/ignite/distributed/comp_models/base.py
index 6e86193381c7..6d2d7d819fa1 100644
--- a/ignite/distributed/comp_models/base.py
+++ b/ignite/distributed/comp_models/base.py
@@ -5,7 +5,7 @@
 import torch
 from packaging.version import Version
 
-_torch_version_le_112 = Version(torch.__version__) > Version("1.12.0")
+_torch_version_gt_112 = Version(torch.__version__) > Version("1.12.0")
 
 
 class ComputationModel(metaclass=ABCMeta):
@@ -329,7 +329,7 @@ def get_node_rank(self) -> int:
     def device(self) -> torch.device:
         if torch.cuda.is_available():
             return torch.device("cuda")
-        if _torch_version_le_112 and torch.backends.mps.is_available():
+        if _torch_version_gt_112 and torch.backends.mps.is_available():
             return torch.device("mps")
         return torch.device("cpu")
 
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 93b791226036..bf60639e75c3 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -5,6 +5,7 @@ pytest-cov
 pytest-xdist
 pytest-timeout
 dill
+filelock
 setuptools
 # Test contrib dependencies
 scipy
diff --git a/tests/common-test-functionality.sh b/tests/common-test-functionality.sh
index daf9d284f6b6..6e60947f927b 100644
--- a/tests/common-test-functionality.sh
+++ b/tests/common-test-functionality.sh
@@ -70,6 +70,14 @@ run_tests() {
         esac
     done
 
+    if ! command -v pytest &> /dev/null
+    then
+        echo "pytest could not be found"
+        echo "The path is: ${PATH}"
+        exit 1
+    fi
+
+
     if [ "${skip_distrib_tests}" -eq "1" ]; then
         # can be overwritten by core_args
         skip_distrib_opt="-m 'not distributed and not tpu and not multinode_distributed'"
diff --git a/tests/ignite/distributed/comp_models/test_base.py b/tests/ignite/distributed/comp_models/test_base.py
index c8041c6dc337..4c151d8d0b08 100644
--- a/tests/ignite/distributed/comp_models/test_base.py
+++ b/tests/ignite/distributed/comp_models/test_base.py
@@ -1,7 +1,7 @@
 import pytest
 import torch
 
-from ignite.distributed.comp_models.base import _SerialModel, _torch_version_le_112, ComputationModel
+from ignite.distributed.comp_models.base import _SerialModel, _torch_version_gt_112, ComputationModel
 
 
 def test_serial_model():
@@ -16,7 +16,7 @@ def test_serial_model():
     assert model.get_node_rank() == 0
     if torch.cuda.is_available():
         assert model.device().type == "cuda"
-    elif _torch_version_le_112 and torch.backends.mps.is_available():
+    elif _torch_version_gt_112 and torch.backends.mps.is_available():
         assert model.device().type == "mps"
     else:
         assert model.device().type == "cpu"
diff --git a/tests/ignite/distributed/test_auto.py b/tests/ignite/distributed/test_auto.py
index a53999c1cb69..2ecc3404c907 100644
--- a/tests/ignite/distributed/test_auto.py
+++ b/tests/ignite/distributed/test_auto.py
@@ -12,6 +12,7 @@
 
 import ignite.distributed as idist
 from ignite.distributed.auto import auto_dataloader, auto_model, auto_optim, DistributedProxySampler
+from ignite.distributed.comp_models.base import _torch_version_gt_112
 from tests.ignite import is_mps_available_and_functional
 
 
@@ -181,7 +182,8 @@ def _test_auto_model_optimizer(ws, device):
 
 
 @pytest.mark.skipif(
-    torch.backends.mps.is_available() and not is_mps_available_and_functional(), reason="Skip if MPS not functional"
+    (not _torch_version_gt_112) or (torch.backends.mps.is_available() and not is_mps_available_and_functional()),
+    reason="Skip if MPS not functional",
 )
 def test_auto_methods_no_dist():
     _test_auto_dataloader(1, 1, batch_size=1)
diff --git a/tests/ignite/distributed/test_launcher.py b/tests/ignite/distributed/test_launcher.py
index 8cc1001aa742..eac7ffe2e06c 100644
--- a/tests/ignite/distributed/test_launcher.py
+++ b/tests/ignite/distributed/test_launcher.py
@@ -8,6 +8,7 @@
 from packaging.version import Version
 
 import ignite.distributed as idist
+from ignite.distributed.comp_models.base import _torch_version_gt_112
 from ignite.distributed.utils import has_hvd_support, has_native_dist_support, has_xla_support
 from tests.ignite import is_mps_available_and_functional
 
@@ -56,7 +57,8 @@ def execute(cmd, env=None):
 
 
 @pytest.mark.skipif(
-    torch.backends.mps.is_available() and not is_mps_available_and_functional(), reason="Skip if MPS not functional"
+    (not _torch_version_gt_112) or (torch.backends.mps.is_available() and not is_mps_available_and_functional()),
+    reason="Skip if MPS not functional",
 )
 def test_check_idist_parallel_no_dist(exec_filepath):
     cmd = [sys.executable, "-u", exec_filepath]
diff --git a/tests/ignite/distributed/utils/test_serial.py b/tests/ignite/distributed/utils/test_serial.py
index fdbf26e83608..df2d6742b54a 100644
--- a/tests/ignite/distributed/utils/test_serial.py
+++ b/tests/ignite/distributed/utils/test_serial.py
@@ -1,7 +1,7 @@
 import torch
 
 import ignite.distributed as idist
-from ignite.distributed.comp_models.base import _torch_version_le_112
+from ignite.distributed.comp_models.base import _torch_version_gt_112
 from tests.ignite.distributed.utils import (
     _sanity_check,
     _test_distrib__get_max_length,
@@ -18,7 +18,7 @@ def test_no_distrib(capsys):
     assert idist.backend() is None
     if torch.cuda.is_available():
         assert idist.device().type == "cuda"
-    elif _torch_version_le_112 and torch.backends.mps.is_available():
+    elif _torch_version_gt_112 and torch.backends.mps.is_available():
         assert idist.device().type == "mps"
     else:
         assert idist.device().type == "cpu"
@@ -41,7 +41,7 @@ def test_no_distrib(capsys):
     assert "ignite.distributed.utils INFO: backend: None" in out[-1]
     if torch.cuda.is_available():
         assert "ignite.distributed.utils INFO: device: cuda" in out[-1]
-    elif _torch_version_le_112 and torch.backends.mps.is_available():
+    elif _torch_version_gt_112 and torch.backends.mps.is_available():
         assert "ignite.distributed.utils INFO: device: mps" in out[-1]
     else:
         assert "ignite.distributed.utils INFO: device: cpu" in out[-1]
diff --git a/tests/ignite/engine/test_create_supervised.py b/tests/ignite/engine/test_create_supervised.py
index d9b0c161f75d..6f7e0a2b2187 100644
--- a/tests/ignite/engine/test_create_supervised.py
+++ b/tests/ignite/engine/test_create_supervised.py
@@ -12,7 +12,7 @@
 from torch.optim import SGD
 
 import ignite.distributed as idist
-from ignite.distributed.comp_models.base import _torch_version_le_112
+from ignite.distributed.comp_models.base import _torch_version_gt_112
 from ignite.engine import (
     _check_arg,
     create_supervised_evaluator,
@@ -487,7 +487,7 @@ def test_create_supervised_trainer_on_cuda():
     _test_create_mocked_supervised_trainer(model_device=model_device, trainer_device=trainer_device)
 
 
-@pytest.mark.skipif(not (_torch_version_le_112 and is_mps_available_and_functional()), reason="Skip if no MPS")
+@pytest.mark.skipif(not (_torch_version_gt_112 and is_mps_available_and_functional()), reason="Skip if no MPS")
 def test_create_supervised_trainer_on_mps():
     model_device = trainer_device = "mps"
     _test_create_supervised_trainer_wrong_accumulation(model_device=model_device, trainer_device=trainer_device)
@@ -668,14 +668,14 @@ def test_create_supervised_evaluator_on_cuda_with_model_on_cpu():
     _test_mocked_supervised_evaluator(evaluator_device="cuda")
 
 
-@pytest.mark.skipif(not (_torch_version_le_112 and is_mps_available_and_functional()), reason="Skip if no MPS")
+@pytest.mark.skipif(not (_torch_version_gt_112 and is_mps_available_and_functional()), reason="Skip if no MPS")
 def test_create_supervised_evaluator_on_mps():
     model_device = evaluator_device = "mps"
     _test_create_supervised_evaluator(model_device=model_device, evaluator_device=evaluator_device)
     _test_mocked_supervised_evaluator(model_device=model_device, evaluator_device=evaluator_device)
 
 
-@pytest.mark.skipif(not (_torch_version_le_112 and is_mps_available_and_functional()), reason="Skip if no MPS")
+@pytest.mark.skipif(not (_torch_version_gt_112 and is_mps_available_and_functional()), reason="Skip if no MPS")
 def test_create_supervised_evaluator_on_mps_with_model_on_cpu():
     _test_create_supervised_evaluator(evaluator_device="mps")
     _test_mocked_supervised_evaluator(evaluator_device="mps")

From d772953d3e1cbcd735e881f81dd962bd60d19b5e Mon Sep 17 00:00:00 2001
From: vfdev <vfdev.5@gmail.com>
Date: Tue, 4 Jun 2024 09:08:18 +0200
Subject: [PATCH 17/33] Rename common-test-functionality.sh to
 common_test_functionality.sh (#3255)

* Rename common-test-functionality.sh to common_test_functionality.sh

* more changes
---
 ...ommon-test-functionality.sh => common_test_functionality.sh} | 0
 tests/run_cpu_tests.sh                                          | 2 +-
 tests/run_gpu_tests.sh                                          | 2 +-
 tests/run_tpu_tests.sh                                          | 2 +-
 4 files changed, 3 insertions(+), 3 deletions(-)
 rename tests/{common-test-functionality.sh => common_test_functionality.sh} (100%)

diff --git a/tests/common-test-functionality.sh b/tests/common_test_functionality.sh
similarity index 100%
rename from tests/common-test-functionality.sh
rename to tests/common_test_functionality.sh
diff --git a/tests/run_cpu_tests.sh b/tests/run_cpu_tests.sh
index 7d647de1e018..8d387f5542e7 100644
--- a/tests/run_cpu_tests.sh
+++ b/tests/run_cpu_tests.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-source "$(dirname "$0")/common-test-functionality.sh"
+source "$(dirname "$0")/common_test_functionality.sh"
 set -xeu
 
 skip_distrib_tests=${SKIP_DISTRIB_TESTS:-0}
diff --git a/tests/run_gpu_tests.sh b/tests/run_gpu_tests.sh
index 371c70aee375..26497f19c83e 100644
--- a/tests/run_gpu_tests.sh
+++ b/tests/run_gpu_tests.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-source "$(dirname "$0")/common-test-functionality.sh"
+source "$(dirname "$0")/common_test_functionality.sh"
 set -xeu
 
 skip_distrib_tests=${SKIP_DISTRIB_TESTS:-1}
diff --git a/tests/run_tpu_tests.sh b/tests/run_tpu_tests.sh
index 5ea0f9931738..6fd695f2e277 100644
--- a/tests/run_tpu_tests.sh
+++ b/tests/run_tpu_tests.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-source "$(dirname "$0")/common-test-functionality.sh"
+source "$(dirname "$0")/common_test_functionality.sh"
 set -xeu
 use_last_failed=${USE_LAST_FAILED:-0}
 

From f32e122d4beab0ae2c7b76362193dea679bb0876 Mon Sep 17 00:00:00 2001
From: vfdev <vfdev.5@gmail.com>
Date: Fri, 14 Jun 2024 14:41:21 +0200
Subject: [PATCH 18/33] Updated FBResearchLogger example doctring (#3237)

---
 ignite/handlers/fbresearch_logger.py | 60 ++++++++++++++++++++++++++--
 1 file changed, 57 insertions(+), 3 deletions(-)

diff --git a/ignite/handlers/fbresearch_logger.py b/ignite/handlers/fbresearch_logger.py
index a291138e48d5..395561ae5751 100644
--- a/ignite/handlers/fbresearch_logger.py
+++ b/ignite/handlers/fbresearch_logger.py
@@ -30,10 +30,64 @@ class FBResearchLogger:
         .. code-block:: python
 
             import logging
-            from ignite.handlers.fbresearch_logger import *
 
-            logger = FBResearchLogger(logger=logging.Logger(__name__), show_output=True)
-            logger.attach(trainer, name="Train", every=10, optimizer=my_optimizer)
+            import torch
+            import torch.nn as nn
+            import torch.optim as optim
+
+            from ignite.engine import create_supervised_trainer, Events
+            from ignite.handlers.fbresearch_logger import FBResearchLogger
+            from ignite.utils import setup_logger
+
+            model = nn.Linear(10, 5)
+            opt = optim.SGD(model.parameters(), lr=0.001)
+            criterion = nn.CrossEntropyLoss()
+
+            data = [(torch.rand(4, 10), torch.randint(0, 5, size=(4, ))) for _ in range(100)]
+
+            trainer = create_supervised_trainer(
+                model, opt, criterion, output_transform=lambda x, y, y_pred, loss: {"total_loss": loss.item()}
+            )
+
+            logger = setup_logger("trainer", level=logging.INFO)
+            logger = FBResearchLogger(logger=logger, show_output=True)
+            logger.attach(trainer, name="Train", every=20, optimizer=opt)
+
+            trainer.run(data, max_epochs=4)
+
+        Output:
+
+        .. code-block:: text
+
+            2024-04-22 12:05:47,843 trainer INFO: Train: start epoch [1/4]
+            ... Epoch [1/4]  [20/100]:  ETA: 0:00:00  lr: 0.00100  total_loss: 1.5999  Iter time: 0.0008 s  Data prep ..
+            ... Epoch [1/4]  [40/100]:  ETA: 0:00:00  lr: 0.00100  total_loss: 1.9297  Iter time: 0.0008 s  Data prep ..
+            ... Epoch [1/4]  [60/100]:  ETA: 0:00:00  lr: 0.00100  total_loss: 1.9985  Iter time: 0.0008 s  Data prep ..
+            ... Epoch [1/4]  [80/100]:  ETA: 0:00:00  lr: 0.00100  total_loss: 1.9785  Iter time: 0.0008 s  Data prep ..
+            ... Epoch [1/4]  [100/100]:  ETA: 0:00:00  lr: 0.00100  total_loss: 1.6211  Iter time: 0.0008 s  Data prep .
+            ... Train: Epoch [1/4]  Total time: 0:00:00  (0.0008 s / it)
+            ... Train: start epoch [2/4]
+            ... Epoch [2/4]  [19/100]:  ETA: 0:00:00  lr: 0.00100  total_loss: 1.5981  Iter time: 0.0009 s  Data prep ..
+            ... Epoch [2/4]  [39/100]:  ETA: 0:00:00  lr: 0.00100  total_loss: 1.9013  Iter time: 0.0008 s  Data prep ..
+            ... Epoch [2/4]  [59/100]:  ETA: 0:00:00  lr: 0.00100  total_loss: 1.9811  Iter time: 0.0008 s  Data prep ..
+            ... Epoch [2/4]  [79/100]:  ETA: 0:00:00  lr: 0.00100  total_loss: 1.9434  Iter time: 0.0008 s  Data prep ..
+            ... Epoch [2/4]  [99/100]:  ETA: 0:00:00  lr: 0.00100  total_loss: 1.6116  Iter time: 0.0008 s  Data prep ..
+            ... Train: Epoch [2/4]  Total time: 0:00:00  (0.0009 s / it)
+            ... Train: start epoch [3/4]
+            ... Epoch [3/4]  [18/100]:  ETA: 0:00:00  lr: 0.00100  total_loss: 1.5972  Iter time: 0.0008 s  Data prep ..
+            ... Epoch [3/4]  [38/100]:  ETA: 0:00:00  lr: 0.00100  total_loss: 1.8753  Iter time: 0.0008 s  Data prep ..
+            ... Epoch [3/4]  [58/100]:  ETA: 0:00:00  lr: 0.00100  total_loss: 1.9657  Iter time: 0.0009 s  Data prep ..
+            ... Epoch [3/4]  [78/100]:  ETA: 0:00:00  lr: 0.00100  total_loss: 1.9112  Iter time: 0.0008 s  Data prep ..
+            ... Epoch [3/4]  [98/100]:  ETA: 0:00:00  lr: 0.00100  total_loss: 1.6035  Iter time: 0.0008 s  Data prep ..
+            ... Train: Epoch [3/4]  Total time: 0:00:00  (0.0009 s / it)
+            ... Train: start epoch [4/4]
+            ... Epoch [4/4]  [17/100]:  ETA: 0:00:00  lr: 0.00100  total_loss: 1.5969  Iter time: 0.0008 s  Data prep ..
+            ... Epoch [4/4]  [37/100]:  ETA: 0:00:00  lr: 0.00100  total_loss: 1.8516  Iter time: 0.0008 s  Data prep ..
+            ... Epoch [4/4]  [57/100]:  ETA: 0:00:00  lr: 0.00100  total_loss: 1.9521  Iter time: 0.0008 s  Data prep ..
+            ... Epoch [4/4]  [77/100]:  ETA: 0:00:00  lr: 0.00100  total_loss: 1.8816  Iter time: 0.0008 s  Data prep ..
+            ... Epoch [4/4]  [97/100]:  ETA: 0:00:00  lr: 0.00100  total_loss: 1.5966  Iter time: 0.0009 s  Data prep ..
+            ... Train: Epoch [4/4]  Total time: 0:00:00  (0.0009 s / it)
+            ... Train: run completed  Total time: 0:00:00
     """
 
     def __init__(self, logger: Any, delimiter: str = "  ", show_output: bool = False):

From 0c4a2125d52afbb26d5d2bb14923455611eca800 Mon Sep 17 00:00:00 2001
From: vfdev <vfdev.5@gmail.com>
Date: Fri, 14 Jun 2024 14:55:44 +0200
Subject: [PATCH 19/33] Added 2.2.2 version and removed 1.9.1

---
 .github/workflows/pytorch-version-tests.yml | 31 ++++++++++-----------
 1 file changed, 14 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/pytorch-version-tests.yml b/.github/workflows/pytorch-version-tests.yml
index 7b143deaa500..a703e72333e2 100644
--- a/.github/workflows/pytorch-version-tests.yml
+++ b/.github/workflows/pytorch-version-tests.yml
@@ -17,7 +17,7 @@ jobs:
       matrix:
         python-version: [3.8, 3.9, "3.10"]
         pytorch-version:
-          [2.1.2, 2.0.1, 1.13.1, 1.12.1, 1.11.0, 1.10.0, 1.9.1, 1.8.1, 1.5.1]
+          [2.2.2, 2.1.2, 2.0.1, 1.13.1, 1.12.1, 1.11.0, 1.10.0, 1.8.1, 1.5.1]
         exclude:
           - pytorch-version: 1.5.1
             python-version: 3.9
@@ -31,9 +31,6 @@ jobs:
           - pytorch-version: 1.8.1
             python-version: "3.10"
 
-          - pytorch-version: 1.9.1
-            python-version: "3.10"
-
           - pytorch-version: 1.10.0
             python-version: "3.10"
 
@@ -100,16 +97,16 @@ jobs:
           command: bash -l tests/run_cpu_tests.sh "not test_time_profilers"
           new_command_on_retry: USE_LAST_FAILED=1 bash -l tests/run_cpu_tests.sh "not test_time_profilers"
 
-  # create-issue:
-  #   runs-on: ubuntu-latest
-  #   # https://docs.github.com/en/actions/reference/context-and-expression-syntax-for-github-actions#needs-context
-  #   needs: build
-  #   if: always() && needs.build.result == 'failure'
-  #   steps:
-  #     - uses: actions/checkout@v4
-  #     - uses: JasonEtco/create-an-issue@v2
-  #       name: Create issue if pytorch version tests failed
-  #       with:
-  #         filename: .github/failed_schedule_issue_template.md
-  #       env:
-  #         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+  create-issue:
+    runs-on: ubuntu-latest
+    # https://docs.github.com/en/actions/reference/context-and-expression-syntax-for-github-actions#needs-context
+    needs: build
+    if: always() && needs.build.result == 'failure'
+    steps:
+      - uses: actions/checkout@v4
+      - uses: JasonEtco/create-an-issue@v2
+        name: Create issue if pytorch version tests failed
+        with:
+          filename: .github/failed_schedule_issue_template.md
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

From 24e71aff9dc44f447901fa82349970ca70e2bccd Mon Sep 17 00:00:00 2001
From: vfdev <vfdev.5@gmail.com>
Date: Fri, 14 Jun 2024 16:22:59 +0200
Subject: [PATCH 20/33] Updated docker pth version (#3256)

---
 docker/docker.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/docker.cfg b/docker/docker.cfg
index 5c48d9f83e3f..dd43c35e7df8 100644
--- a/docker/docker.cfg
+++ b/docker/docker.cfg
@@ -1,4 +1,4 @@
 [DEFAULT]
-build_docker_image_pytorch_version = 2.2.2-cuda12.1-cudnn8
+build_docker_image_pytorch_version = 2.3.1-cuda12.1-cudnn8
 build_docker_image_hvd_version = v0.28.1
 build_docker_image_msdp_version = v0.14.0

From 5a66d9e09fad42edb1fdda88a426a8658876fedd Mon Sep 17 00:00:00 2001
From: John lee <johnleenimh@gmail.com>
Date: Fri, 28 Jun 2024 06:55:46 +0100
Subject: [PATCH 21/33] improve type support for fbrlogger (#3238)

* fbr logger: improve types and kwargs supported

* remove autolist for utils

* add clean directive to docs Makefile

* tidy matrix display

* make reporting of shape more compact

* remove superfluous import

* fix bug in autosummary
---
 docs/Makefile                                 |  7 ++
 docs/source/conf.py                           | 10 ++-
 ignite/handlers/fbresearch_logger.py          | 34 +++++---
 ignite/metrics/maximum_mean_discrepancy.py    |  2 +-
 ignite/utils.py                               | 78 +++++++++++++++++++
 .../ignite/handlers/test_fbresearch_logger.py | 52 ++++++++++++-
 tests/ignite/test_utils.py                    | 25 +++++-
 7 files changed, 194 insertions(+), 14 deletions(-)

diff --git a/docs/Makefile b/docs/Makefile
index 3d1f9ada6a8b..413cdff94ad5 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -22,6 +22,13 @@ docset: html
 rebuild:
 	rm -rf source/generated && make clean && make html
 
+clean:
+	@echo "Cleaning up..."
+	python -c "import shutil; shutil.rmtree('$(BUILDDIR)', ignore_errors=True)"
+	python -c "import shutil; shutil.rmtree('$(SOURCEDIR)/generated', ignore_errors=True)"
+	python -c "import os; [os.remove(f) for f in os.listdir('.') if f.endswith('.pyc')]"
+	python -c "import shutil; import os; [shutil.rmtree(f) for f in os.listdir('.') if f == '__pycache__' and os.path.isdir(f)]"
+
 .PHONY: help Makefile docset
 
 # Catch-all target: route all unknown targets to Sphinx using the new
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 80c15e9b4d26..e26a50785f2c 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -301,7 +301,15 @@ def run(self):
                         names = [name[0] for name in getmembers(module)]
 
                 # Filter out members w/o doc strings
-                names = [name for name in names if getattr(module, name).__doc__ is not None]
+                filtered_names = []
+                for name in names:
+                    try:
+                        if not name.startswith("_") and getattr(module, name).__doc__ is not None:
+                            filtered_names.append(name)
+                    except AttributeError:
+                        continue
+
+                names = filtered_names
 
                 if auto == "autolist":
                     # Get list of all classes and functions inside module
diff --git a/ignite/handlers/fbresearch_logger.py b/ignite/handlers/fbresearch_logger.py
index 395561ae5751..4243a636b6fb 100644
--- a/ignite/handlers/fbresearch_logger.py
+++ b/ignite/handlers/fbresearch_logger.py
@@ -1,18 +1,18 @@
 """FBResearch logger and its helper handlers."""
 
 import datetime
-from typing import Any, Optional
-
-# from typing import Any, Dict, Optional, Union
+from typing import Any, Callable, List, Optional
 
 import torch
 
+from ignite import utils
 from ignite.engine import Engine, Events
 from ignite.handlers import Timer
 
-
 MB = 1024.0 * 1024.0
 
+__all__ = ["FBResearchLogger"]
+
 
 class FBResearchLogger:
     """Logs training and validation metrics for research purposes.
@@ -98,7 +98,13 @@ def __init__(self, logger: Any, delimiter: str = "  ", show_output: bool = False
         self.show_output: bool = show_output
 
     def attach(
-        self, engine: Engine, name: str, every: int = 1, optimizer: Optional[torch.optim.Optimizer] = None
+        self,
+        engine: Engine,
+        name: str,
+        every: int = 1,
+        output_transform: Optional[Callable] = None,
+        state_attributes: Optional[List[str]] = None,
+        optimizer: Optional[torch.optim.Optimizer] = None,
     ) -> None:
         """Attaches all the logging handlers to the given engine.
 
@@ -106,8 +112,13 @@ def attach(
             engine: The engine to attach the logging handlers to.
             name: The name of the engine (e.g., "Train", "Validate") to include in log messages.
             every: Frequency of iterations to log information. Logs are generated every 'every' iterations.
+            output_transform: A function to select the value to log.
+            state_attributes: A list of attributes to log.
             optimizer: The optimizer used during training to log current learning rates.
         """
+        self.name = name
+        self.output_transform = output_transform
+        self.state_attributes = state_attributes
         engine.add_event_handler(Events.EPOCH_STARTED, self.log_epoch_started, engine, name)
         engine.add_event_handler(Events.ITERATION_COMPLETED(every=every), self.log_every, engine, optimizer=optimizer)
         engine.add_event_handler(Events.EPOCH_COMPLETED, self.log_epoch_completed, engine, name)
@@ -151,10 +162,9 @@ def log_every(self, engine: Engine, optimizer: Optional[torch.optim.Optimizer] =
         outputs = []
         if self.show_output and engine.state.output is not None:
             output = engine.state.output
-            if isinstance(output, dict):
-                outputs += [f"{k}: {v:.4f}" for k, v in output.items()]
-            else:
-                outputs += [f"{v:.4f}" if isinstance(v, float) else f"{v}" for v in output]  # type: ignore
+            if self.output_transform is not None:
+                output = self.output_transform(output)
+            outputs = utils._to_str_list(output)
 
         lrs = ""
         if optimizer is not None:
@@ -164,6 +174,11 @@ def log_every(self, engine: Engine, optimizer: Optional[torch.optim.Optimizer] =
                 for i, g in enumerate(optimizer.param_groups):
                     lrs += f"lr [g{i}]: {g['lr']:.5f}"
 
+        state_attrs = []
+        if self.state_attributes is not None:
+            state_attrs = utils._to_str_list(
+                {name: getattr(engine.state, name, None) for name in self.state_attributes}
+            )
         msg = self.delimiter.join(
             [
                 f"Epoch [{engine.state.epoch}/{engine.state.max_epochs}]",
@@ -172,6 +187,7 @@ def log_every(self, engine: Engine, optimizer: Optional[torch.optim.Optimizer] =
                 f"{lrs}",
             ]
             + outputs
+            + [" ".join(state_attrs)]
             + [
                 f"Iter time: {iter_avg_time:.4f} s",
                 f"Data prep time: {self.data_timer.value():.4f} s",
diff --git a/ignite/metrics/maximum_mean_discrepancy.py b/ignite/metrics/maximum_mean_discrepancy.py
index 24faf5758c63..586aa94ffb79 100644
--- a/ignite/metrics/maximum_mean_discrepancy.py
+++ b/ignite/metrics/maximum_mean_discrepancy.py
@@ -29,7 +29,7 @@ class MaximumMeanDiscrepancy(Metric):
 
     More details can be found in `Gretton et al. 2012`__.
 
-    __ https://jmlr.csail.mit.edu/papers/v13/gretton12a.html
+    __ https://www.jmlr.org/papers/volume13/gretton12a/gretton12a.pdf
 
     - ``update`` must receive output of the form ``(x, y)``.
     - ``x`` and ``y`` are expected to be in the same shape :math:`(B, \ldots)`.
diff --git a/ignite/utils.py b/ignite/utils.py
index 6e5b2176d6a4..1345e2bb0d86 100644
--- a/ignite/utils.py
+++ b/ignite/utils.py
@@ -2,6 +2,7 @@
 import functools
 import hashlib
 import logging
+import numbers
 import random
 import shutil
 import warnings
@@ -14,6 +15,7 @@
     "convert_tensor",
     "apply_to_tensor",
     "apply_to_type",
+    "_to_str_list",
     "to_onehot",
     "setup_logger",
     "manual_seed",
@@ -90,6 +92,82 @@ def _tree_map(
     return func(x, key=key)
 
 
+def _to_str_list(data: Any) -> List[str]:
+    """
+    Recursively flattens and formats complex data structures, including keys for
+    dictionaries, into a list of human-readable strings.
+
+    This function processes nested dictionaries, lists, tuples, numbers, and
+    PyTorch tensors, formatting numbers to four decimal places and handling
+    tensors with special formatting rules. It's particularly useful for logging,
+    debugging, or any scenario where a human-readable representation of complex,
+    nested data structures is required.
+
+    The function handles the following types:
+
+    - Numbers: Formatted to four decimal places.
+    - PyTorch tensors:
+        - Scalars are formatted to four decimal places.
+        - 1D tensors with more than 10 elements show the first 10 elements
+          followed by an ellipsis.
+        - 1D tensors with 10 or fewer elements are fully listed.
+        - Multi-dimensional tensors display their shape.
+    - Dictionaries: Each key-value pair is included in the output with the key
+      as a prefix.
+    - Lists and tuples: Flattened and included in the output. Empty lists/tuples are represented
+      by an empty string.
+    - None values: Represented by an empty string.
+
+    Args:
+        data: The input data to be flattened and formatted. It can be a nested
+            combination of dictionaries, lists, tuples, numbers, and PyTorch
+            tensors.
+
+    Returns:
+        A list of formatted strings, each representing a part of the input data
+        structure.
+    """
+    formatted_items: List[str] = []
+
+    def format_item(item: Any, prefix: str = "") -> Optional[str]:
+        if isinstance(item, numbers.Number):
+            return f"{prefix}{item:.4f}"
+        elif torch.is_tensor(item):
+            if item.dim() == 0:
+                return f"{prefix}{item.item():.4f}"  # Format scalar tensor without brackets
+            elif item.dim() == 1 and item.size(0) > 10:
+                return f"{prefix}[" + ", ".join(f"{x.item():.4f}" for x in item[:10]) + ", ...]"
+            elif item.dim() == 1:
+                return f"{prefix}[" + ", ".join(f"{x.item():.4f}" for x in item) + "]"
+            else:
+                return f"{prefix}Shape{list(item.shape)}"
+        elif isinstance(item, dict):
+            for key, value in item.items():
+                formatted_value = format_item(value, f"{key}: ")
+                if formatted_value is not None:
+                    formatted_items.append(formatted_value)
+        elif isinstance(item, (list, tuple)):
+            if not item:
+                if prefix:
+                    formatted_items.append(f"{prefix}")
+            else:
+                values = [format_item(x) for x in item]
+                values_str = [v for v in values if v is not None]
+                if values_str:
+                    formatted_items.append(f"{prefix}" + ", ".join(values_str))
+        elif item is None:
+            if prefix:
+                formatted_items.append(f"{prefix}")
+        return None
+
+    # Directly handle single numeric values
+    if isinstance(data, numbers.Number):
+        return [f"{data:.4f}"]
+
+    format_item(data)
+    return formatted_items
+
+
 class _CollectionItem:
     types_as_collection_item: Tuple = (int, float, torch.Tensor)
 
diff --git a/tests/ignite/handlers/test_fbresearch_logger.py b/tests/ignite/handlers/test_fbresearch_logger.py
index b85bdcf2794e..728c97870e09 100644
--- a/tests/ignite/handlers/test_fbresearch_logger.py
+++ b/tests/ignite/handlers/test_fbresearch_logger.py
@@ -3,9 +3,13 @@
 from unittest.mock import MagicMock
 
 import pytest
+import torch
+import torch.nn as nn
+import torch.optim as optim
 
-from ignite.engine import Engine, Events
-from ignite.handlers.fbresearch_logger import FBResearchLogger  # Adjust the import path as necessary
+from ignite.engine import create_supervised_trainer, Engine, Events
+from ignite.handlers.fbresearch_logger import FBResearchLogger
+from ignite.utils import setup_logger
 
 
 @pytest.fixture
@@ -56,3 +60,47 @@ def test_output_formatting(mock_engine, fb_research_logger, output, expected_pat
 
     actual_output = fb_research_logger.logger.info.call_args_list[0].args[0]
     assert re.search(expected_pattern, actual_output)
+
+
+def test_logger_type_support():
+    model = nn.Linear(10, 5)
+    opt = optim.SGD(model.parameters(), lr=0.001)
+    criterion = nn.CrossEntropyLoss()
+
+    data = [(torch.rand(4, 10), torch.randint(0, 5, size=(4,))) for _ in range(100)]
+
+    trainer = create_supervised_trainer(model, opt, criterion)
+
+    logger = setup_logger("trainer", level=logging.INFO)
+    logger = FBResearchLogger(logger=logger, show_output=True)
+    logger.attach(trainer, name="Train", every=20, optimizer=opt)
+
+    trainer.run(data, max_epochs=4)
+    trainer.state.output = {"loss": 4.2}
+    trainer.fire_event(Events.ITERATION_COMPLETED)
+    trainer.state.output = "4.2"
+    trainer.fire_event(Events.ITERATION_COMPLETED)
+    trainer.state.output = [4.2, 4.2]
+    trainer.fire_event(Events.ITERATION_COMPLETED)
+    trainer.state.output = (4.2, 4.2)
+    trainer.fire_event(Events.ITERATION_COMPLETED)
+
+
+def test_fbrlogger_with_output_transform(mock_logger):
+    trainer = Engine(lambda e, b: 42)
+    fbr = FBResearchLogger(logger=mock_logger, show_output=True)
+    fbr.attach(trainer, "Training", output_transform=lambda x: {"loss": x})
+    trainer.run(data=[10], epoch_length=1, max_epochs=1)
+    assert "loss: 42.0000" in fbr.logger.info.call_args_list[-2].args[0]
+
+
+def test_fbrlogger_with_state_attrs(mock_logger):
+    trainer = Engine(lambda e, b: 42)
+    fbr = FBResearchLogger(logger=mock_logger, show_output=True)
+    fbr.attach(trainer, "Training", state_attributes=["alpha", "beta", "gamma"])
+    trainer.state.alpha = 3.899
+    trainer.state.beta = torch.tensor(12.21)
+    trainer.state.gamma = torch.tensor([21.0, 6.0])
+    trainer.run(data=[10], epoch_length=1, max_epochs=1)
+    attrs = "alpha: 3.8990 beta: 12.2100 gamma: [21.0000, 6.0000]"
+    assert attrs in fbr.logger.info.call_args_list[-2].args[0]
diff --git a/tests/ignite/test_utils.py b/tests/ignite/test_utils.py
index 828533ce2019..4b00fb8c67ab 100644
--- a/tests/ignite/test_utils.py
+++ b/tests/ignite/test_utils.py
@@ -8,7 +8,7 @@
 from packaging.version import Version
 
 from ignite.engine import Engine, Events
-from ignite.utils import convert_tensor, deprecated, hash_checkpoint, setup_logger, to_onehot
+from ignite.utils import _to_str_list, convert_tensor, deprecated, hash_checkpoint, setup_logger, to_onehot
 
 
 def test_convert_tensor():
@@ -55,6 +55,29 @@ def test_convert_tensor():
         convert_tensor(12345)
 
 
+@pytest.mark.parametrize(
+    "input_data,expected",
+    [
+        (42, ["42.0000"]),
+        ([{"a": 15, "b": torch.tensor([2.0])}], ["a: 15.0000", "b: [2.0000]"]),
+        ({"a": 10, "b": 2.33333}, ["a: 10.0000", "b: 2.3333"]),
+        ({"x": torch.tensor(0.1234), "y": [1, 2.3567]}, ["x: 0.1234", "y: 1.0000, 2.3567"]),
+        (({"nested": [3.1415, torch.tensor(0.0001)]},), ["nested: 3.1415, 0.0001"]),
+        (
+            {"large_vector": torch.tensor(range(20))},
+            ["large_vector: [0.0000, 1.0000, 2.0000, 3.0000, 4.0000, 5.0000, 6.0000, 7.0000, 8.0000, 9.0000, ...]"],
+        ),
+        ({"large_matrix": torch.randn(5, 5)}, ["large_matrix: Shape[5, 5]"]),
+        ({"empty": []}, ["empty: "]),
+        ([], []),
+        ({"none": None}, ["none: "]),
+        ({1: 100, 2: 200}, ["1: 100.0000", "2: 200.0000"]),
+    ],
+)
+def test__to_str_list(input_data, expected):
+    assert _to_str_list(input_data) == expected
+
+
 def test_to_onehot():
     indices = torch.tensor([0, 1, 2, 3], dtype=torch.long)
     actual = to_onehot(indices, 4)

From d715807462506d6b4358b66af9be8d03b102653e Mon Sep 17 00:00:00 2001
From: Simeet Nayan <78461155+simeetnayan81@users.noreply.github.com>
Date: Tue, 2 Jul 2024 00:09:52 +0530
Subject: [PATCH 22/33] Introduce a variable skip_unrolling in class Metric
 (#3258)

* Introduce a variable skip_unrolling in class Metric

* Add docstring for skip_unrolling, modify skip_unrolling clause

* Modify docstring

Co-authored-by: vfdev <vfdev.5@gmail.com>

* Apply suggestions from code review

Co-authored-by: vfdev <vfdev.5@gmail.com>

* Modify docstring, revert version tag

* Add test_skip_unrolling, DummyMetric5 class

* Add example usage of skip unrolling in Metric, Update Loss class with skip_unrolling arg

* Fix doc

* Add test for skip_unrolling in Loss

* Apply suggestions from code review

* Update ignite/metrics/metric.py

* Update docstring

* fix test_loss.py for python below 3.9

---------

Co-authored-by: vfdev <vfdev.5@gmail.com>
---
 ignite/metrics/loss.py              |  8 +++-
 ignite/metrics/metric.py            | 68 ++++++++++++++++++++++++++++-
 tests/ignite/metrics/test_loss.py   | 50 ++++++++++++++++++++-
 tests/ignite/metrics/test_metric.py | 30 +++++++++++++
 4 files changed, 152 insertions(+), 4 deletions(-)

diff --git a/ignite/metrics/loss.py b/ignite/metrics/loss.py
index 7182e7033d54..2be0a7d2387f 100644
--- a/ignite/metrics/loss.py
+++ b/ignite/metrics/loss.py
@@ -29,6 +29,9 @@ class Loss(Metric):
         device: specifies which device updates are accumulated on. Setting the
             metric's device to be the same as your ``update`` arguments ensures the ``update`` method is
             non-blocking. By default, CPU.
+        skip_unrolling: specifies whether input should be unrolled or not before it is passed to to loss_fn.
+            Should be true for multi-output model, for example, if ``y_pred`` contains multi-ouput as
+            ``(y_pred_a, y_pred_b)``
 
     Attributes:
         required_output_keys: dictionary defines required keys to be found in ``engine.state.output`` if the
@@ -62,6 +65,8 @@ class Loss(Metric):
 
             -0.3499999...
 
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     required_output_keys = ("y_pred", "y", "criterion_kwargs")
@@ -73,8 +78,9 @@ def __init__(
         output_transform: Callable = lambda x: x,
         batch_size: Callable = len,
         device: Union[str, torch.device] = torch.device("cpu"),
+        skip_unrolling: bool = False,
     ):
-        super(Loss, self).__init__(output_transform, device=device)
+        super(Loss, self).__init__(output_transform, device=device, skip_unrolling=skip_unrolling)
         self._loss_fn = loss_fn
         self._batch_size = batch_size
 
diff --git a/ignite/metrics/metric.py b/ignite/metrics/metric.py
index 39e5cb745222..4ccfd8ea7af7 100644
--- a/ignite/metrics/metric.py
+++ b/ignite/metrics/metric.py
@@ -233,6 +233,59 @@ class Metric(Serializable, metaclass=ABCMeta):
         device: specifies which device updates are accumulated on. Setting the
             metric's device to be the same as your ``update`` arguments ensures the ``update`` method is
             non-blocking. By default, CPU.
+        skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+            true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+            Alternatively, ``output_transform`` can be used to handle this.
+
+            Examples:
+                The following example shows a custom loss metric that expects input from a multi-output model.
+
+                .. code-block:: python
+
+                    import torch
+                    import torch.nn as nn
+                    import torch.nn.functional as F
+
+                    from ignite.engine import create_supervised_evaluator
+                    from ignite.metrics import Loss
+
+                    class MyLoss(nn.Module):
+                        def __init__(self, ca: float = 1.0, cb: float = 1.0) -> None:
+                            super().__init__()
+                            self.ca = ca
+                            self.cb = cb
+
+                        def forward(self,
+                                    y_pred: Tuple[torch.Tensor, torch.Tensor],
+                                    y_true: Tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor:
+                            a_true, b_true = y_true
+                            a_pred, b_pred = y_pred
+                            return self.ca * F.mse_loss(a_pred, a_true) + self.cb * F.cross_entropy(b_pred, b_true)
+
+
+                    def prepare_batch(batch, device, non_blocking):
+                        return torch.rand(4, 1), (torch.rand(4, 1), torch.rand(4, 2))
+
+
+                    class MyModel(nn.Module):
+
+                        def forward(self, x):
+                            return torch.rand(4, 1), torch.rand(4, 2)
+
+
+                    model = MyModel()
+
+                    device = "cpu"
+                    loss = MyLoss(0.5, 1.0)
+                    metrics = {
+                        "Loss": Loss(loss, skip_unrolling=True)
+                    }
+                    train_evaluator = create_supervised_evaluator(model, metrics, device, prepare_batch=prepare_batch)
+
+
+                    data = range(10)
+                    train_evaluator.run(data)
+                    train_evaluator.state.metrics["Loss"]
 
     Attributes:
         required_output_keys: dictionary defines required keys to be found in ``engine.state.output`` if the
@@ -292,6 +345,9 @@ def compute(self):
 
     .. versionchanged:: 0.4.2
         ``required_output_keys`` became public attribute.
+
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     # public class attribute
@@ -300,7 +356,10 @@ def compute(self):
     _required_output_keys = required_output_keys
 
     def __init__(
-        self, output_transform: Callable = lambda x: x, device: Union[str, torch.device] = torch.device("cpu")
+        self,
+        output_transform: Callable = lambda x: x,
+        device: Union[str, torch.device] = torch.device("cpu"),
+        skip_unrolling: bool = False,
     ):
         self._output_transform = output_transform
 
@@ -309,6 +368,7 @@ def __init__(
             raise ValueError("Cannot create metric on an XLA device. Use device='cpu' instead.")
 
         self._device = torch.device(device)
+        self._skip_unrolling = skip_unrolling
         self.reset()
 
     @abstractmethod
@@ -390,7 +450,11 @@ def iteration_completed(self, engine: Engine) -> None:
                 )
             output = tuple(output[k] for k in self.required_output_keys)
 
-        if isinstance(output, Sequence) and all([_is_list_of_tensors_or_numbers(o) for o in output]):
+        if (
+            (not self._skip_unrolling)
+            and isinstance(output, Sequence)
+            and all([_is_list_of_tensors_or_numbers(o) for o in output])
+        ):
             if not (len(output) == 2 and len(output[0]) == len(output[1])):
                 raise ValueError(
                     f"Output should have 2 items of the same length, "
diff --git a/tests/ignite/metrics/test_loss.py b/tests/ignite/metrics/test_loss.py
index 19cc68cd45cc..0e945bec58cf 100644
--- a/tests/ignite/metrics/test_loss.py
+++ b/tests/ignite/metrics/test_loss.py
@@ -1,11 +1,12 @@
 import os
+from typing import Tuple
 from unittest.mock import MagicMock
 
 import pytest
 import torch
 from numpy.testing import assert_almost_equal
 from torch import nn
-from torch.nn.functional import nll_loss
+from torch.nn.functional import mse_loss, nll_loss
 
 import ignite.distributed as idist
 from ignite.engine import State
@@ -314,3 +315,50 @@ def compute(self):
         (torch.rand(4, 10), torch.randint(0, 3, size=(4,))),
     ]
     evaluator.run(data)
+
+
+class CustomMultiMSELoss(nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(
+        self, y_pred: Tuple[torch.Tensor, torch.Tensor], y_true: Tuple[torch.Tensor, torch.Tensor]
+    ) -> torch.Tensor:
+        a_true, b_true = y_true
+        a_pred, b_pred = y_pred
+        return mse_loss(a_pred, a_true) + mse_loss(b_pred, b_true)
+
+
+class DummyLoss3(Loss):
+    def __init__(self, loss_fn, expected_loss, output_transform=lambda x: x, skip_unrolling=False):
+        super(DummyLoss3, self).__init__(loss_fn, output_transform=output_transform, skip_unrolling=skip_unrolling)
+        self._expected_loss = expected_loss
+        self._loss_fn = loss_fn
+
+    def reset(self):
+        pass
+
+    def compute(self):
+        pass
+
+    def update(self, output):
+        y_pred, y_true = output
+        calculated_loss = self._loss_fn(y_pred=y_pred, y_true=y_true)
+        assert calculated_loss == self._expected_loss
+
+
+def test_skip_unrolling_loss():
+    a_pred = torch.rand(8, 1)
+    b_pred = torch.rand(8, 1)
+    y_pred = [a_pred, b_pred]
+    a_true = torch.rand(8, 1)
+    b_true = torch.rand(8, 1)
+    y_true = [a_true, b_true]
+
+    multi_output_mse_loss = CustomMultiMSELoss()
+    expected_loss = multi_output_mse_loss(y_pred=y_pred, y_true=y_true)
+
+    loss_metric = DummyLoss3(loss_fn=multi_output_mse_loss, expected_loss=expected_loss, skip_unrolling=True)
+    state = State(output=(y_pred, y_true))
+    engine = MagicMock(state=state)
+    loss_metric.iteration_completed(engine)
diff --git a/tests/ignite/metrics/test_metric.py b/tests/ignite/metrics/test_metric.py
index f9db11b1a370..96c19d668d74 100644
--- a/tests/ignite/metrics/test_metric.py
+++ b/tests/ignite/metrics/test_metric.py
@@ -1416,3 +1416,33 @@ def wrapper(x, **kwargs):
             assert (output == expected).all(), (output, expected)
         else:
             assert output == expected, (output, expected)
+
+
+class DummyMetric5(Metric):
+    def __init__(self, true_output, output_transform=lambda x: x, skip_unrolling=False):
+        super(DummyMetric5, self).__init__(output_transform=output_transform, skip_unrolling=skip_unrolling)
+        self.true_output = true_output
+
+    def reset(self):
+        pass
+
+    def compute(self):
+        pass
+
+    def update(self, output):
+        assert output == self.true_output
+
+
+def test_skip_unrolling():
+    # y_pred and y are ouputs recieved from a multi_output model
+    a_pred = torch.rand(8, 1)
+    b_pred = torch.rand(8, 1)
+    y_pred = [a_pred, b_pred]
+    a_true = torch.rand(8, 1)
+    b_true = torch.rand(8, 1)
+    y_true = [a_true, b_true]
+
+    metric = DummyMetric5(true_output=(y_pred, y_true), skip_unrolling=True)
+    state = State(output=(y_pred, y_true))
+    engine = MagicMock(state=state)
+    metric.iteration_completed(engine)

From 6b6b16961073c5fcfe05714d35ba06d949150912 Mon Sep 17 00:00:00 2001
From: Simeet Nayan <78461155+simeetnayan81@users.noreply.github.com>
Date: Tue, 16 Jul 2024 17:54:54 +0530
Subject: [PATCH 23/33] Skip unrolling follow up (#3260)

* Update accuracy.py, test_accuracy.py with skip_unrolling

* change test_accuracy.py

* update average_precision, epoch_metric, test_epoch_metric, test_average_precision

* Update chohen_kappa.py, test_cohen_kappa.py

* update confusion_matrix, cosine_similarity, test_confusion_matrix, test_cosine_similarity

* Update docstring for js_divergence, kl_divergence and entropy

* update maximum_mean_discrepancy.py, mean_absolute_error.py and test_maximum_mean_discrepancy.py

* Update mean_pairwise_distance.py, metrics/test_maximum_mean_discrepancy.py and test_mean_pairwise_distance.py

* Update mean_squared_error.py, mutual_information.py, multilabel_consfusion_matrix.py, test_multilabel_confusion_matrix.py

* Update precision_recall_curve, test_precision_recall_curve

* update precision, psnr, recall, root_mean_square and add tests

* Remove unwanted tests, update roc_auc, update docstring for mpd, average_precision

* update running_average, ssim, top_k_categorical_accuracy

* update frequency.py

* update accumulation.py, fix mean_pairwise_distance
---
 ignite/metrics/accumulation.py                | 40 ++++++++++++++++---
 ignite/metrics/accuracy.py                    | 16 +++++++-
 ignite/metrics/average_precision.py           |  7 ++++
 ignite/metrics/cohen_kappa.py                 |  7 ++++
 ignite/metrics/confusion_matrix.py            | 11 ++++-
 ignite/metrics/cosine_similarity.py           |  9 ++++-
 ignite/metrics/entropy.py                     |  6 +++
 ignite/metrics/epoch_metric.py                |  8 +++-
 ignite/metrics/frequency.py                   | 22 +++++++++-
 ignite/metrics/js_divergence.py               |  6 +++
 ignite/metrics/kl_divergence.py               |  6 +++
 ignite/metrics/maximum_mean_discrepancy.py    | 14 ++++++-
 ignite/metrics/mean_absolute_error.py         |  6 +++
 ignite/metrics/mean_pairwise_distance.py      |  9 ++++-
 ignite/metrics/mean_squared_error.py          |  6 +++
 ignite/metrics/multilabel_confusion_matrix.py | 10 ++++-
 ignite/metrics/mutual_information.py          |  6 +++
 ignite/metrics/precision.py                   |  9 ++++-
 ignite/metrics/precision_recall_curve.py      |  7 ++++
 ignite/metrics/psnr.py                        |  9 ++++-
 ignite/metrics/recall.py                      |  6 +++
 ignite/metrics/roc_auc.py                     | 21 +++++++++-
 ignite/metrics/root_mean_squared_error.py     |  6 +++
 ignite/metrics/running_average.py             | 11 ++++-
 ignite/metrics/ssim.py                        |  9 ++++-
 ignite/metrics/top_k_categorical_accuracy.py  |  9 ++++-
 tests/ignite/metrics/test_accuracy.py         | 35 +++++++++++++++-
 tests/ignite/metrics/test_epoch_metric.py     | 20 ++++++++++
 28 files changed, 308 insertions(+), 23 deletions(-)

diff --git a/ignite/metrics/accumulation.py b/ignite/metrics/accumulation.py
index 426b35a21abc..4457917c5beb 100644
--- a/ignite/metrics/accumulation.py
+++ b/ignite/metrics/accumulation.py
@@ -34,7 +34,12 @@ class VariableAccumulation(Metric):
         device: specifies which device updates are accumulated on. Setting the metric's
             device to be the same as your ``update`` arguments ensures the ``update`` method is non-blocking. By
             default, CPU.
+        skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+            true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+            Alternatively, ``output_transform`` can be used to handle this.
 
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     required_output_keys = None
@@ -45,13 +50,16 @@ def __init__(
         op: Callable,
         output_transform: Callable = lambda x: x,
         device: Union[str, torch.device] = torch.device("cpu"),
+        skip_unrolling: bool = False,
     ):
         if not callable(op):
             raise TypeError(f"Argument op should be a callable, but given {type(op)}")
 
         self._op = op
 
-        super(VariableAccumulation, self).__init__(output_transform=output_transform, device=device)
+        super(VariableAccumulation, self).__init__(
+            output_transform=output_transform, device=device, skip_unrolling=skip_unrolling
+        )
 
     @reinit__is_reduced
     def reset(self) -> None:
@@ -110,6 +118,9 @@ class Average(VariableAccumulation):
         device: specifies which device updates are accumulated on. Setting the metric's
             device to be the same as your ``update`` arguments ensures the ``update`` method is non-blocking. By
             default, CPU.
+        skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+            true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+            Alternatively, ``output_transform`` can be used to handle this.
 
     Examples:
 
@@ -164,17 +175,25 @@ class Average(VariableAccumulation):
         .. testoutput::
 
             tensor([1.5000, 1.5000, 1.5000], dtype=torch.float64)
+
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     def __init__(
-        self, output_transform: Callable = lambda x: x, device: Union[str, torch.device] = torch.device("cpu")
+        self,
+        output_transform: Callable = lambda x: x,
+        device: Union[str, torch.device] = torch.device("cpu"),
+        skip_unrolling: bool = False,
     ):
         def _mean_op(a: Union[float, torch.Tensor], x: Union[float, torch.Tensor]) -> Union[float, torch.Tensor]:
             if isinstance(x, torch.Tensor) and x.ndim > 1:
                 x = x.sum(dim=0)
             return a + x
 
-        super(Average, self).__init__(op=_mean_op, output_transform=output_transform, device=device)
+        super(Average, self).__init__(
+            op=_mean_op, output_transform=output_transform, device=device, skip_unrolling=skip_unrolling
+        )
 
     @sync_all_reduce("accumulator", "num_examples")
     def compute(self) -> Union[float, torch.Tensor]:
@@ -200,6 +219,9 @@ class GeometricAverage(VariableAccumulation):
         device: specifies which device updates are accumulated on. Setting the metric's
             device to be the same as your ``update`` arguments ensures the ``update`` method is non-blocking. By
             default, CPU.
+        skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+            true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+            Alternatively, ``output_transform`` can be used to handle this.
 
     Note:
 
@@ -267,10 +289,16 @@ class GeometricAverage(VariableAccumulation):
         .. testoutput::
 
             tensor([2.2134, 2.2134, 2.2134], dtype=torch.float64)
+
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     def __init__(
-        self, output_transform: Callable = lambda x: x, device: Union[str, torch.device] = torch.device("cpu")
+        self,
+        output_transform: Callable = lambda x: x,
+        device: Union[str, torch.device] = torch.device("cpu"),
+        skip_unrolling: bool = False,
     ):
         def _geom_op(a: torch.Tensor, x: Union[float, torch.Tensor]) -> torch.Tensor:
             if not isinstance(x, torch.Tensor):
@@ -280,7 +308,9 @@ def _geom_op(a: torch.Tensor, x: Union[float, torch.Tensor]) -> torch.Tensor:
                 x = x.sum(dim=0)
             return a + x
 
-        super(GeometricAverage, self).__init__(op=_geom_op, output_transform=output_transform, device=device)
+        super(GeometricAverage, self).__init__(
+            op=_geom_op, output_transform=output_transform, device=device, skip_unrolling=skip_unrolling
+        )
 
     @sync_all_reduce("accumulator", "num_examples")
     def compute(self) -> Union[float, torch.Tensor]:
diff --git a/ignite/metrics/accuracy.py b/ignite/metrics/accuracy.py
index 0bfe62b85b7b..c205008e24c4 100644
--- a/ignite/metrics/accuracy.py
+++ b/ignite/metrics/accuracy.py
@@ -14,11 +14,14 @@ def __init__(
         output_transform: Callable = lambda x: x,
         is_multilabel: bool = False,
         device: Union[str, torch.device] = torch.device("cpu"),
+        skip_unrolling: bool = False,
     ):
         self._is_multilabel = is_multilabel
         self._type: Optional[str] = None
         self._num_classes: Optional[int] = None
-        super(_BaseClassification, self).__init__(output_transform=output_transform, device=device)
+        super(_BaseClassification, self).__init__(
+            output_transform=output_transform, device=device, skip_unrolling=skip_unrolling
+        )
 
     def reset(self) -> None:
         self._type = None
@@ -114,6 +117,9 @@ class Accuracy(_BaseClassification):
         device: specifies which device updates are accumulated on. Setting the metric's
             device to be the same as your ``update`` arguments ensures the ``update`` method is non-blocking. By
             default, CPU.
+        skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+            true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+            Alternatively, ``output_transform`` can be used to handle this.
 
     Examples:
 
@@ -206,6 +212,9 @@ def thresholded_output_transform(output):
         .. testoutput:: 4
 
             0.6666...
+
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     _state_dict_all_req_keys = ("_num_correct", "_num_examples")
@@ -215,8 +224,11 @@ def __init__(
         output_transform: Callable = lambda x: x,
         is_multilabel: bool = False,
         device: Union[str, torch.device] = torch.device("cpu"),
+        skip_unrolling: bool = False,
     ):
-        super(Accuracy, self).__init__(output_transform=output_transform, is_multilabel=is_multilabel, device=device)
+        super(Accuracy, self).__init__(
+            output_transform=output_transform, is_multilabel=is_multilabel, device=device, skip_unrolling=skip_unrolling
+        )
 
     @reinit__is_reduced
     def reset(self) -> None:
diff --git a/ignite/metrics/average_precision.py b/ignite/metrics/average_precision.py
index e2dab8b09abc..03d53de4e29b 100644
--- a/ignite/metrics/average_precision.py
+++ b/ignite/metrics/average_precision.py
@@ -28,6 +28,9 @@ class AveragePrecision(EpochMetric):
             #sklearn.metrics.average_precision_score>`_ is run on the first batch of data to ensure there are
             no issues. User will be warned in case there are any issues computing the function.
         device: optional device specification for internal storage.
+        skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+            true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+            Alternatively, ``output_transform`` can be used to handle this.
 
     Note:
         AveragePrecision expects y to be comprised of 0's and 1's. y_pred must either be probability estimates or
@@ -60,6 +63,8 @@ def activated_output_transform(output):
 
             0.9166...
 
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     def __init__(
@@ -67,6 +72,7 @@ def __init__(
         output_transform: Callable = lambda x: x,
         check_compute_fn: bool = False,
         device: Union[str, torch.device] = torch.device("cpu"),
+        skip_unrolling: bool = False,
     ):
         try:
             from sklearn.metrics import average_precision_score  # noqa: F401
@@ -78,4 +84,5 @@ def __init__(
             output_transform=output_transform,
             check_compute_fn=check_compute_fn,
             device=device,
+            skip_unrolling=skip_unrolling,
         )
diff --git a/ignite/metrics/cohen_kappa.py b/ignite/metrics/cohen_kappa.py
index 92d9b07aa4a6..15cb0222c250 100644
--- a/ignite/metrics/cohen_kappa.py
+++ b/ignite/metrics/cohen_kappa.py
@@ -23,6 +23,9 @@ class CohenKappa(EpochMetric):
             is run on the first batch of data to ensure there are
             no issues. User will be warned in case there are any issues computing the function.
         device: optional device specification for internal storage.
+        skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+            true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+            Alternatively, ``output_transform`` can be used to handle this.
 
     Examples:
         To use with ``Engine`` and ``process_function``, simply attach the metric instance to the engine.
@@ -46,6 +49,8 @@ class CohenKappa(EpochMetric):
 
             0.4285...
 
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     def __init__(
@@ -54,6 +59,7 @@ def __init__(
         weights: Optional[str] = None,
         check_compute_fn: bool = False,
         device: Union[str, torch.device] = torch.device("cpu"),
+        skip_unrolling: bool = False,
     ):
         try:
             from sklearn.metrics import cohen_kappa_score  # noqa: F401
@@ -72,6 +78,7 @@ def __init__(
             output_transform=output_transform,
             check_compute_fn=check_compute_fn,
             device=device,
+            skip_unrolling=skip_unrolling,
         )
 
     def get_cohen_kappa_fn(self) -> Callable[[torch.Tensor, torch.Tensor], float]:
diff --git a/ignite/metrics/confusion_matrix.py b/ignite/metrics/confusion_matrix.py
index 75a9f9848a29..95fd06897ecf 100644
--- a/ignite/metrics/confusion_matrix.py
+++ b/ignite/metrics/confusion_matrix.py
@@ -34,6 +34,9 @@ class ConfusionMatrix(Metric):
         device: specifies which device updates are accumulated on. Setting the metric's
             device to be the same as your ``update`` arguments ensures the ``update`` method is non-blocking. By
             default, CPU.
+        skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+            true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+            Alternatively, ``output_transform`` can be used to handle this.
 
     Note:
         The confusion matrix is formatted such that columns are predictions and rows are targets.
@@ -98,6 +101,9 @@ def binary_one_hot_output_transform(output):
 
             tensor([[2, 1],
                     [1, 1]])
+
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     _state_dict_all_req_keys = ("confusion_matrix", "_num_examples")
@@ -108,6 +114,7 @@ def __init__(
         average: Optional[str] = None,
         output_transform: Callable = lambda x: x,
         device: Union[str, torch.device] = torch.device("cpu"),
+        skip_unrolling: bool = True,
     ):
         if average is not None and average not in ("samples", "recall", "precision"):
             raise ValueError("Argument average can None or one of 'samples', 'recall', 'precision'")
@@ -118,7 +125,9 @@ def __init__(
         self.num_classes = num_classes
         self._num_examples = 0
         self.average = average
-        super(ConfusionMatrix, self).__init__(output_transform=output_transform, device=device)
+        super(ConfusionMatrix, self).__init__(
+            output_transform=output_transform, device=device, skip_unrolling=skip_unrolling
+        )
 
     @reinit__is_reduced
     def reset(self) -> None:
diff --git a/ignite/metrics/cosine_similarity.py b/ignite/metrics/cosine_similarity.py
index a9760530ea7b..9b9e44d90655 100644
--- a/ignite/metrics/cosine_similarity.py
+++ b/ignite/metrics/cosine_similarity.py
@@ -29,6 +29,9 @@ class CosineSimilarity(Metric):
         device: specifies which device updates are accumulated on. Setting the
             metric's device to be the same as your ``update`` arguments ensures the ``update`` method is
             non-blocking. By default, CPU.
+        skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+            true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+            Alternatively, ``output_transform`` can be used to handle this.
 
     Examples:
         To use with ``Engine`` and ``process_function``, simply attach the metric instance to the engine.
@@ -65,6 +68,9 @@ class CosineSimilarity(Metric):
         .. testoutput::
 
             0.5080491304397583
+
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     def __init__(
@@ -72,8 +78,9 @@ def __init__(
         eps: float = 1e-8,
         output_transform: Callable = lambda x: x,
         device: Union[str, torch.device] = torch.device("cpu"),
+        skip_unrolling: bool = False,
     ):
-        super().__init__(output_transform, device)
+        super().__init__(output_transform, device, skip_unrolling=skip_unrolling)
 
         self.eps = eps
 
diff --git a/ignite/metrics/entropy.py b/ignite/metrics/entropy.py
index 4208bf205b3e..9aa75be54f20 100644
--- a/ignite/metrics/entropy.py
+++ b/ignite/metrics/entropy.py
@@ -30,6 +30,9 @@ class Entropy(Metric):
         device: specifies which device updates are accumulated on. Setting the
             metric's device to be the same as your ``update`` arguments ensures the ``update`` method is
             non-blocking. By default, CPU.
+        skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+            true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+            Alternatively, ``output_transform`` can be used to handle this.
 
     Examples:
         To use with ``Engine`` and ``process_function``, simply attach the metric instance to the engine.
@@ -58,6 +61,9 @@ class Entropy(Metric):
         .. testoutput::
 
             0.8902875582377116
+
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     _state_dict_all_req_keys = ("_sum_of_entropies", "_num_examples")
diff --git a/ignite/metrics/epoch_metric.py b/ignite/metrics/epoch_metric.py
index 116a841e49ff..5918b6428120 100644
--- a/ignite/metrics/epoch_metric.py
+++ b/ignite/metrics/epoch_metric.py
@@ -65,6 +65,9 @@ def mse_fn(y_preds, y_targets):
     Warnings:
         EpochMetricWarning: User is warned that there are issues with ``compute_fn`` on a batch of data processed.
         To disable the warning, set ``check_compute_fn=False``.
+
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     _state_dict_all_req_keys = ("_predictions", "_targets")
@@ -75,6 +78,7 @@ def __init__(
         output_transform: Callable = lambda x: x,
         check_compute_fn: bool = True,
         device: Union[str, torch.device] = torch.device("cpu"),
+        skip_unrolling: bool = False,
     ) -> None:
         if not callable(compute_fn):
             raise TypeError("Argument compute_fn should be callable.")
@@ -82,7 +86,9 @@ def __init__(
         self.compute_fn = compute_fn
         self._check_compute_fn = check_compute_fn
 
-        super(EpochMetric, self).__init__(output_transform=output_transform, device=device)
+        super(EpochMetric, self).__init__(
+            output_transform=output_transform, device=device, skip_unrolling=skip_unrolling
+        )
 
     @reinit__is_reduced
     def reset(self) -> None:
diff --git a/ignite/metrics/frequency.py b/ignite/metrics/frequency.py
index 52f02565ac42..921471bdeba2 100644
--- a/ignite/metrics/frequency.py
+++ b/ignite/metrics/frequency.py
@@ -11,6 +11,18 @@
 class Frequency(Metric):
     """Provides metrics for the number of examples processed per second.
 
+    Args:
+        output_transform: a callable that is used to transform the
+            :class:`~ignite.engine.engine.Engine`'s ``process_function``'s output into the
+            form expected by the metric. This can be useful if, for example, you have a multi-output model and
+            you want to compute the metric with respect to one of the outputs.
+        device: specifies which device updates are accumulated on. Setting the metric's
+            device to be the same as your ``update`` arguments ensures the ``update`` method is non-blocking. By
+            default, CPU.
+        skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+            true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+            Alternatively, ``output_transform`` can be used to handle this.
+
     Examples:
         For more information on how metric works with :class:`~ignite.engine.engine.Engine`, visit :ref:`attach-engine`.
 
@@ -36,12 +48,18 @@ class Frequency(Metric):
             ProgressBar(persist=True).attach(trainer, metric_names=['wps'])
             # Progress bar will look like
             # Epoch [2/10]: [50/100]  50%|█████      , wps=400 [00:17<00:35]
+
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     def __init__(
-        self, output_transform: Callable = lambda x: x, device: Union[str, torch.device] = torch.device("cpu")
+        self,
+        output_transform: Callable = lambda x: x,
+        device: Union[str, torch.device] = torch.device("cpu"),
+        skip_unrolling: bool = False,
     ) -> None:
-        super(Frequency, self).__init__(output_transform=output_transform, device=device)
+        super(Frequency, self).__init__(output_transform=output_transform, device=device, skip_unrolling=skip_unrolling)
 
     @reinit__is_reduced
     def reset(self) -> None:
diff --git a/ignite/metrics/js_divergence.py b/ignite/metrics/js_divergence.py
index ee223014061d..204995dd0ae8 100644
--- a/ignite/metrics/js_divergence.py
+++ b/ignite/metrics/js_divergence.py
@@ -39,6 +39,9 @@ class JSDivergence(KLDivergence):
         device: specifies which device updates are accumulated on. Setting the
             metric's device to be the same as your ``update`` arguments ensures the ``update`` method is
             non-blocking. By default, CPU.
+        skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+            true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+            Alternatively, ``output_transform`` can be used to handle this.
 
     Examples:
         To use with ``Engine`` and ``process_function``, simply attach the metric instance to the engine.
@@ -71,6 +74,9 @@ class JSDivergence(KLDivergence):
         .. testoutput::
 
            0.16266516844431558
+
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     def _update(self, y_pred: torch.Tensor, y: torch.Tensor) -> None:
diff --git a/ignite/metrics/kl_divergence.py b/ignite/metrics/kl_divergence.py
index 93f6d5a85282..4f285ea797a9 100644
--- a/ignite/metrics/kl_divergence.py
+++ b/ignite/metrics/kl_divergence.py
@@ -33,6 +33,9 @@ class KLDivergence(Metric):
         device: specifies which device updates are accumulated on. Setting the
             metric's device to be the same as your ``update`` arguments ensures the ``update`` method is
             non-blocking. By default, CPU.
+        skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+            true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+            Alternatively, ``output_transform`` can be used to handle this.
 
     Examples:
         To use with ``Engine`` and ``process_function``, simply attach the metric instance to the engine.
@@ -65,6 +68,9 @@ class KLDivergence(Metric):
         .. testoutput::
 
            0.7220296859741211
+
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     _state_dict_all_req_keys = ("_sum_of_kl", "_num_examples")
diff --git a/ignite/metrics/maximum_mean_discrepancy.py b/ignite/metrics/maximum_mean_discrepancy.py
index 586aa94ffb79..8a5d04a2c858 100644
--- a/ignite/metrics/maximum_mean_discrepancy.py
+++ b/ignite/metrics/maximum_mean_discrepancy.py
@@ -44,6 +44,9 @@ class MaximumMeanDiscrepancy(Metric):
         device: specifies which device updates are accumulated on. Setting the
             metric's device to be the same as your ``update`` arguments ensures the ``update`` method is
             non-blocking. By default, CPU.
+        skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+            true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+            Alternatively, ``output_transform`` can be used to handle this.
 
     Examples:
         To use with ``Engine`` and ``process_function``, simply attach the metric instance to the engine.
@@ -76,15 +79,22 @@ class MaximumMeanDiscrepancy(Metric):
         .. testoutput::
 
            1.072697639465332
+
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     _state_dict_all_req_keys = ("_xx_sum", "_yy_sum", "_xy_sum", "_num_batches")
 
     def __init__(
-        self, var: float = 1.0, output_transform: Callable = lambda x: x, device: torch.device = torch.device("cpu")
+        self,
+        var: float = 1.0,
+        output_transform: Callable = lambda x: x,
+        device: torch.device = torch.device("cpu"),
+        skip_unrolling: bool = False,
     ):
         self.var = var
-        super().__init__(output_transform, device)
+        super().__init__(output_transform, device, skip_unrolling=skip_unrolling)
 
     @reinit__is_reduced
     def reset(self) -> None:
diff --git a/ignite/metrics/mean_absolute_error.py b/ignite/metrics/mean_absolute_error.py
index eb90d3aa3c24..12fee3f12327 100644
--- a/ignite/metrics/mean_absolute_error.py
+++ b/ignite/metrics/mean_absolute_error.py
@@ -26,6 +26,9 @@ class MeanAbsoluteError(Metric):
         device: specifies which device updates are accumulated on. Setting the
             metric's device to be the same as your ``update`` arguments ensures the ``update`` method is
             non-blocking. By default, CPU.
+        skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+            true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+            Alternatively, ``output_transform`` can be used to handle this.
 
     Examples:
         To use with ``Engine`` and ``process_function``, simply attach the metric instance to the engine.
@@ -57,6 +60,9 @@ class MeanAbsoluteError(Metric):
         .. testoutput::
 
             2.9375
+
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     _state_dict_all_req_keys = ("_sum_of_absolute_errors", "_num_examples")
diff --git a/ignite/metrics/mean_pairwise_distance.py b/ignite/metrics/mean_pairwise_distance.py
index 79676564e5fb..dd6910347e9b 100644
--- a/ignite/metrics/mean_pairwise_distance.py
+++ b/ignite/metrics/mean_pairwise_distance.py
@@ -26,6 +26,9 @@ class MeanPairwiseDistance(Metric):
         device: specifies which device updates are accumulated on. Setting the
             metric's device to be the same as your ``update`` arguments ensures the ``update`` method is
             non-blocking. By default, CPU.
+        skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+            true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+            Alternatively, ``output_transform`` can be used to handle this.
 
     Examples:
         To use with ``Engine`` and ``process_function``, simply attach the metric instance to the engine.
@@ -57,6 +60,9 @@ class MeanPairwiseDistance(Metric):
         .. testoutput::
 
             1.5955...
+
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     _state_dict_all_req_keys = ("_sum_of_distances", "_num_examples")
@@ -67,8 +73,9 @@ def __init__(
         eps: float = 1e-6,
         output_transform: Callable = lambda x: x,
         device: Union[str, torch.device] = torch.device("cpu"),
+        skip_unrolling: bool = False,
     ) -> None:
-        super(MeanPairwiseDistance, self).__init__(output_transform, device=device)
+        super(MeanPairwiseDistance, self).__init__(output_transform, device=device, skip_unrolling=False)
         self._p = p
         self._eps = eps
 
diff --git a/ignite/metrics/mean_squared_error.py b/ignite/metrics/mean_squared_error.py
index 3407b4adcb70..97630f6ce7c9 100644
--- a/ignite/metrics/mean_squared_error.py
+++ b/ignite/metrics/mean_squared_error.py
@@ -26,6 +26,9 @@ class MeanSquaredError(Metric):
         device: specifies which device updates are accumulated on. Setting the
             metric's device to be the same as your ``update`` arguments ensures the ``update`` method is
             non-blocking. By default, CPU.
+        skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+            true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+            Alternatively, ``output_transform`` can be used to handle this.
 
     Examples:
         To use with ``Engine`` and ``process_function``, simply attach the metric instance to the engine.
@@ -57,6 +60,9 @@ class MeanSquaredError(Metric):
         .. testoutput::
 
             3.828125
+
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     _state_dict_all_req_keys = ("_sum_of_squared_errors", "_num_examples")
diff --git a/ignite/metrics/multilabel_confusion_matrix.py b/ignite/metrics/multilabel_confusion_matrix.py
index 2a7b25d68c67..e4da5ea5c70d 100644
--- a/ignite/metrics/multilabel_confusion_matrix.py
+++ b/ignite/metrics/multilabel_confusion_matrix.py
@@ -37,6 +37,9 @@ class MultiLabelConfusionMatrix(Metric):
             device to be the same as your ``update`` arguments ensures the ``update`` method is non-blocking. By
             default, CPU.
         normalized: whether to normalize confusion matrix by its sum or not.
+        skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+            true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+            Alternatively, ``output_transform`` can be used to handle this.
 
     Example:
 
@@ -79,6 +82,8 @@ class MultiLabelConfusionMatrix(Metric):
 
     .. versionadded:: 0.4.5
 
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     _state_dict_all_req_keys = ("confusion_matrix", "_num_examples")
@@ -89,6 +94,7 @@ def __init__(
         output_transform: Callable = lambda x: x,
         device: Union[str, torch.device] = torch.device("cpu"),
         normalized: bool = False,
+        skip_unrolling: bool = False,
     ):
         if num_classes <= 1:
             raise ValueError("Argument num_classes needs to be > 1")
@@ -96,7 +102,9 @@ def __init__(
         self.num_classes = num_classes
         self._num_examples = 0
         self.normalized = normalized
-        super(MultiLabelConfusionMatrix, self).__init__(output_transform=output_transform, device=device)
+        super(MultiLabelConfusionMatrix, self).__init__(
+            output_transform=output_transform, device=device, skip_unrolling=skip_unrolling
+        )
 
     @reinit__is_reduced
     def reset(self) -> None:
diff --git a/ignite/metrics/mutual_information.py b/ignite/metrics/mutual_information.py
index 2cca768ce43b..dfacd29527d0 100644
--- a/ignite/metrics/mutual_information.py
+++ b/ignite/metrics/mutual_information.py
@@ -38,6 +38,9 @@ class MutualInformation(Entropy):
         device: specifies which device updates are accumulated on. Setting the
             metric's device to be the same as your ``update`` arguments ensures the ``update`` method is
             non-blocking. By default, CPU.
+        skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+            true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+            Alternatively, ``output_transform`` can be used to handle this.
 
     Examples:
         To use with ``Engine`` and ``process_function``, simply attach the metric instance to the engine.
@@ -66,6 +69,9 @@ class MutualInformation(Entropy):
         .. testoutput::
 
            0.18599730730056763
+
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     _state_dict_all_req_keys = ("_sum_of_probabilities",)
diff --git a/ignite/metrics/precision.py b/ignite/metrics/precision.py
index 31fbd42b19b4..b25154eae358 100644
--- a/ignite/metrics/precision.py
+++ b/ignite/metrics/precision.py
@@ -21,6 +21,7 @@ def __init__(
         average: Optional[Union[bool, str]] = False,
         is_multilabel: bool = False,
         device: Union[str, torch.device] = torch.device("cpu"),
+        skip_unrolling: bool = False,
     ):
         if not (average is None or isinstance(average, bool) or average in ["macro", "micro", "weighted", "samples"]):
             raise ValueError(
@@ -35,7 +36,7 @@ def __init__(
         self.eps = 1e-20
         self._updated = False
         super(_BasePrecisionRecall, self).__init__(
-            output_transform=output_transform, is_multilabel=is_multilabel, device=device
+            output_transform=output_transform, is_multilabel=is_multilabel, device=device, skip_unrolling=skip_unrolling
         )
 
     def _check_type(self, output: Sequence[torch.Tensor]) -> None:
@@ -241,6 +242,9 @@ class Precision(_BasePrecisionRecall):
         device: specifies which device updates are accumulated on. Setting the metric's
             device to be the same as your ``update`` arguments ensures the ``update`` method is non-blocking. By
             default, CPU.
+        skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+            true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+            Alternatively, ``output_transform`` can be used to handle this.
 
     Examples:
 
@@ -371,6 +375,9 @@ def thresholded_output_transform(output):
 
     .. versionchanged:: 0.4.10
             Some new options were added to `average` parameter.
+
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     @reinit__is_reduced
diff --git a/ignite/metrics/precision_recall_curve.py b/ignite/metrics/precision_recall_curve.py
index 29b3710b58c0..5b9ece27545c 100644
--- a/ignite/metrics/precision_recall_curve.py
+++ b/ignite/metrics/precision_recall_curve.py
@@ -33,6 +33,9 @@ class PrecisionRecallCurve(EpochMetric):
             <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.precision_recall_curve.html
             #sklearn.metrics.precision_recall_curve>`_ is run on the first batch of data to ensure there are
             no issues. User will be warned in case there are any issues computing the function.
+        skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+            true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+            Alternatively, ``output_transform`` can be used to handle this.
 
     Note:
         PrecisionRecallCurve expects y to be comprised of 0's and 1's. y_pred must either be probability estimates
@@ -69,6 +72,8 @@ def sigmoid_output_transform(output):
             Recall [1.0, 1.0, 1.0, 0.5, 0.0]
             Thresholds [0.0474, 0.5987, 0.7109, 0.9997]
 
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     def __init__(
@@ -76,12 +81,14 @@ def __init__(
         output_transform: Callable = lambda x: x,
         check_compute_fn: bool = False,
         device: Union[str, torch.device] = torch.device("cpu"),
+        skip_unrolling: bool = False,
     ) -> None:
         super(PrecisionRecallCurve, self).__init__(
             precision_recall_curve_compute_fn,  # type: ignore[arg-type]
             output_transform=output_transform,
             check_compute_fn=check_compute_fn,
             device=device,
+            skip_unrolling=skip_unrolling,
         )
 
     def compute(self) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:  # type: ignore[override]
diff --git a/ignite/metrics/psnr.py b/ignite/metrics/psnr.py
index 4251a24f8f13..be9dcb2b0b36 100644
--- a/ignite/metrics/psnr.py
+++ b/ignite/metrics/psnr.py
@@ -30,6 +30,9 @@ class PSNR(Metric):
         device: specifies which device updates are accumulated on.
             Setting the metric’s device to be the same as your update arguments ensures
             the update method is non-blocking. By default, CPU.
+        skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+            true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+            Alternatively, ``output_transform`` can be used to handle this.
 
     Examples:
         To use with ``Engine`` and ``process_function``, simply attach the metric instance to the engine.
@@ -79,6 +82,9 @@ def get_y_channel(output):
             16.7027966...
 
     .. versionadded:: 0.4.3
+
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     _state_dict_all_req_keys = ("_sum_of_batchwise_psnr", "_num_examples")
@@ -88,8 +94,9 @@ def __init__(
         data_range: Union[int, float],
         output_transform: Callable = lambda x: x,
         device: Union[str, torch.device] = torch.device("cpu"),
+        skip_unrolling: bool = False,
     ):
-        super().__init__(output_transform=output_transform, device=device)
+        super().__init__(output_transform=output_transform, device=device, skip_unrolling=skip_unrolling)
         self.data_range = data_range
 
     def _check_shape_dtype(self, output: Sequence[torch.Tensor]) -> None:
diff --git a/ignite/metrics/recall.py b/ignite/metrics/recall.py
index b570951e291f..46331decc058 100644
--- a/ignite/metrics/recall.py
+++ b/ignite/metrics/recall.py
@@ -94,6 +94,9 @@ class Recall(_BasePrecisionRecall):
         device: specifies which device updates are accumulated on. Setting the metric's
             device to be the same as your ``update`` arguments ensures the ``update`` method is non-blocking. By
             default, CPU.
+        skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+            true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+            Alternatively, ``output_transform`` can be used to handle this.
 
     Examples:
 
@@ -212,6 +215,9 @@ def thresholded_output_transform(output):
 
     .. versionchanged:: 0.4.10
             Some new options were added to `average` parameter.
+
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     @reinit__is_reduced
diff --git a/ignite/metrics/roc_auc.py b/ignite/metrics/roc_auc.py
index a4ff51a09a98..4cf2f1cdd1c1 100644
--- a/ignite/metrics/roc_auc.py
+++ b/ignite/metrics/roc_auc.py
@@ -39,6 +39,9 @@ class ROC_AUC(EpochMetric):
             sklearn.metrics.roc_auc_score>`_ is run on the first batch of data to ensure there are
             no issues. User will be warned in case there are any issues computing the function.
         device: optional device specification for internal storage.
+        skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+            true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+            Alternatively, ``output_transform`` can be used to handle this.
 
     Note:
 
@@ -71,6 +74,9 @@ def sigmoid_output_transform(output):
         .. testoutput::
 
             0.6666...
+
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     def __init__(
@@ -78,6 +84,7 @@ def __init__(
         output_transform: Callable = lambda x: x,
         check_compute_fn: bool = False,
         device: Union[str, torch.device] = torch.device("cpu"),
+        skip_unrolling: bool = False,
     ):
         try:
             from sklearn.metrics import roc_auc_score  # noqa: F401
@@ -85,7 +92,11 @@ def __init__(
             raise ModuleNotFoundError("This contrib module requires scikit-learn to be installed.")
 
         super(ROC_AUC, self).__init__(
-            roc_auc_compute_fn, output_transform=output_transform, check_compute_fn=check_compute_fn, device=device
+            roc_auc_compute_fn,
+            output_transform=output_transform,
+            check_compute_fn=check_compute_fn,
+            device=device,
+            skip_unrolling=skip_unrolling,
         )
 
 
@@ -105,6 +116,9 @@ class RocCurve(EpochMetric):
             sklearn.metrics.roc_curve>`_ is run on the first batch of data to ensure there are
             no issues. User will be warned in case there are any issues computing the function.
         device: optional device specification for internal storage.
+        skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+            true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+            Alternatively, ``output_transform`` can be used to handle this.
 
     Note:
         RocCurve expects y to be comprised of 0's and 1's. y_pred must either be probability estimates or confidence
@@ -143,6 +157,9 @@ def sigmoid_output_transform(output):
 
     ..  versionchanged:: 0.4.11
         added `device` argument
+
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     def __init__(
@@ -150,6 +167,7 @@ def __init__(
         output_transform: Callable = lambda x: x,
         check_compute_fn: bool = False,
         device: Union[str, torch.device] = torch.device("cpu"),
+        skip_unrolling: bool = False,
     ) -> None:
         try:
             from sklearn.metrics import roc_curve  # noqa: F401
@@ -161,6 +179,7 @@ def __init__(
             output_transform=output_transform,
             check_compute_fn=check_compute_fn,
             device=device,
+            skip_unrolling=skip_unrolling,
         )
 
     def compute(self) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:  # type: ignore[override]
diff --git a/ignite/metrics/root_mean_squared_error.py b/ignite/metrics/root_mean_squared_error.py
index ab2218d2372c..22605123bde5 100644
--- a/ignite/metrics/root_mean_squared_error.py
+++ b/ignite/metrics/root_mean_squared_error.py
@@ -26,6 +26,9 @@ class RootMeanSquaredError(MeanSquaredError):
         device: specifies which device updates are accumulated on. Setting the
             metric's device to be the same as your ``update`` arguments ensures the ``update`` method is
             non-blocking. By default, CPU.
+        skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+            true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+            Alternatively, ``output_transform`` can be used to handle this.
 
     Examples:
         To use with ``Engine`` and ``process_function``, simply attach the metric instance to the engine.
@@ -57,6 +60,9 @@ class RootMeanSquaredError(MeanSquaredError):
         .. testoutput::
 
             1.956559480312316
+
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     def compute(self) -> Union[torch.Tensor, float]:
diff --git a/ignite/metrics/running_average.py b/ignite/metrics/running_average.py
index 9b3b4efb4f3f..a622558b5abd 100644
--- a/ignite/metrics/running_average.py
+++ b/ignite/metrics/running_average.py
@@ -27,6 +27,9 @@ class RunningAverage(Metric):
             None when ``src`` is an instance of :class:`~ignite.metrics.metric.Metric`, as the running average will
             use the ``src``'s device. Otherwise, defaults to CPU. Only applicable when the computed value
             from the metric is a tensor.
+        skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+            true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+            Alternatively, ``output_transform`` can be used to handle this.
 
     Examples:
 
@@ -84,6 +87,9 @@ def log_running_avg_metrics():
             0.039208...
             0.038423...
             0.057655...
+
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     required_output_keys = None
@@ -96,6 +102,7 @@ def __init__(
         output_transform: Optional[Callable] = None,
         epoch_bound: Optional[bool] = None,
         device: Optional[Union[str, torch.device]] = None,
+        skip_unrolling: bool = False,
     ):
         if not (isinstance(src, Metric) or src is None):
             raise TypeError("Argument src should be a Metric or None.")
@@ -131,7 +138,9 @@ def output_transform(x: Any) -> Any:
             )
         self.epoch_bound = epoch_bound
         self.alpha = alpha
-        super(RunningAverage, self).__init__(output_transform=output_transform, device=device)
+        super(RunningAverage, self).__init__(
+            output_transform=output_transform, device=device, skip_unrolling=skip_unrolling
+        )
 
     @reinit__is_reduced
     def reset(self) -> None:
diff --git a/ignite/metrics/ssim.py b/ignite/metrics/ssim.py
index 6824c0b3f374..a662fb6f5413 100644
--- a/ignite/metrics/ssim.py
+++ b/ignite/metrics/ssim.py
@@ -33,6 +33,9 @@ class SSIM(Metric):
         device: specifies which device updates are accumulated on. Setting the metric's
             device to be the same as your ``update`` arguments ensures the ``update`` method is non-blocking. By
             default, CPU.
+        skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+            true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+            Alternatively, ``output_transform`` can be used to handle this.
 
     Examples:
         To use with ``Engine`` and ``process_function``, simply attach the metric instance to the engine.
@@ -62,6 +65,9 @@ class SSIM(Metric):
             0.9218971...
 
     .. versionadded:: 0.4.2
+
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     _state_dict_all_req_keys = ("_sum_of_ssim", "_num_examples", "_kernel")
@@ -76,6 +82,7 @@ def __init__(
         gaussian: bool = True,
         output_transform: Callable = lambda x: x,
         device: Union[str, torch.device] = torch.device("cpu"),
+        skip_unrolling: bool = False,
     ):
         if isinstance(kernel_size, int):
             self.kernel_size: Sequence[int] = [kernel_size, kernel_size]
@@ -97,7 +104,7 @@ def __init__(
         if any(y <= 0 for y in self.sigma):
             raise ValueError(f"Expected sigma to have positive number. Got {sigma}.")
 
-        super(SSIM, self).__init__(output_transform=output_transform, device=device)
+        super(SSIM, self).__init__(output_transform=output_transform, device=device, skip_unrolling=skip_unrolling)
         self.gaussian = gaussian
         self.data_range = data_range
         self.c1 = (k1 * data_range) ** 2
diff --git a/ignite/metrics/top_k_categorical_accuracy.py b/ignite/metrics/top_k_categorical_accuracy.py
index 87da4c868731..611013c7905d 100644
--- a/ignite/metrics/top_k_categorical_accuracy.py
+++ b/ignite/metrics/top_k_categorical_accuracy.py
@@ -24,6 +24,9 @@ class TopKCategoricalAccuracy(Metric):
         device: specifies which device updates are accumulated on. Setting the
             metric's device to be the same as your ``update`` arguments ensures the ``update`` method is
             non-blocking. By default, CPU.
+        skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+            true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+            Alternatively, ``output_transform`` can be used to handle this.
 
     Examples:
         To use with ``Engine`` and ``process_function``, simply attach the metric instance to the engine.
@@ -71,6 +74,9 @@ def one_hot_to_binary_output_transform(output):
         .. testoutput::
 
             0.75
+
+    .. versionchanged:: 0.5.1
+        ``skip_unrolling`` argument is added.
     """
 
     _state_dict_all_req_keys = ("_num_correct", "_num_examples")
@@ -80,8 +86,9 @@ def __init__(
         k: int = 5,
         output_transform: Callable = lambda x: x,
         device: Union[str, torch.device] = torch.device("cpu"),
+        skip_unrolling: bool = False,
     ) -> None:
-        super(TopKCategoricalAccuracy, self).__init__(output_transform, device=device)
+        super(TopKCategoricalAccuracy, self).__init__(output_transform, device=device, skip_unrolling=skip_unrolling)
         self._k = k
 
     @reinit__is_reduced
diff --git a/tests/ignite/metrics/test_accuracy.py b/tests/ignite/metrics/test_accuracy.py
index a7954e6afa30..35631b2b47e7 100644
--- a/tests/ignite/metrics/test_accuracy.py
+++ b/tests/ignite/metrics/test_accuracy.py
@@ -1,4 +1,6 @@
 import os
+from typing import Callable, Union
+from unittest.mock import MagicMock
 
 import pytest
 import torch
@@ -6,7 +8,7 @@
 from sklearn.metrics import accuracy_score
 
 import ignite.distributed as idist
-from ignite.engine import Engine
+from ignite.engine import Engine, State
 from ignite.exceptions import NotComputableError
 from ignite.metrics import Accuracy
 
@@ -638,3 +640,34 @@ def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
     _test_distrib_integration_multilabel(device)
     _test_distrib_accumulator_device(device)
     _test_distrib_integration_list_of_tensors_or_numbers(device)
+
+
+def test_skip_unrolling():
+    class DummyAcc(Accuracy):
+        def __init__(
+            self,
+            true_output,
+            output_transform: Callable = lambda x: x,
+            is_multilabel: bool = False,
+            device: Union[str, torch.device] = torch.device("cpu"),
+            skip_unrolling: bool = False,
+        ):
+            super(DummyAcc, self).__init__(
+                output_transform=output_transform, is_multilabel=False, device=device, skip_unrolling=skip_unrolling
+            )
+            self.true_output = true_output
+
+        def update(self, output):
+            assert output == self.true_output
+
+    a_pred = torch.randint(0, 2, size=(8, 1))
+    b_pred = torch.randint(0, 2, size=(8, 1))
+    y_pred = [a_pred, b_pred]
+    a_true = torch.randint(0, 2, size=(8, 1))
+    b_true = torch.randint(0, 2, size=(8, 1))
+    y_true = [a_true, b_true]
+
+    acc = DummyAcc(true_output=(y_pred, y_true), skip_unrolling=True)
+    state = State(output=(y_pred, y_true))
+    engine = MagicMock(state=state)
+    acc.iteration_completed(engine)
diff --git a/tests/ignite/metrics/test_epoch_metric.py b/tests/ignite/metrics/test_epoch_metric.py
index d82168266b1d..5c42957cf57d 100644
--- a/tests/ignite/metrics/test_epoch_metric.py
+++ b/tests/ignite/metrics/test_epoch_metric.py
@@ -186,3 +186,23 @@ def assert_data_fn(all_preds, all_targets):
 
     assert engine.state.metrics["epm"] == ep_metric_true
     assert ep_metric.compute() == ep_metric_true
+
+
+def test_skip_unrolling():
+    def compute_fn(y_preds, y_targets):
+        return 0.0
+
+    em = EpochMetric(compute_fn, skip_unrolling=True)
+
+    em.reset()
+    output1 = (torch.rand(4, 2), torch.randint(0, 2, size=(4, 2), dtype=torch.long))
+    em.update(output1)
+    output2 = (torch.rand(4, 2), torch.randint(0, 2, size=(4, 2), dtype=torch.long))
+    em.update(output2)
+
+    assert all([t.device.type == "cpu" for t in em._predictions + em._targets])
+    assert torch.equal(em._predictions[0], output1[0])
+    assert torch.equal(em._predictions[1], output2[0])
+    assert torch.equal(em._targets[0], output1[1])
+    assert torch.equal(em._targets[1], output2[1])
+    assert em.compute() == 0.0

From edd5025e7d597a6e5fe45c5173487c37d3f9d1df Mon Sep 17 00:00:00 2001
From: Sadra Barikbin <sadraqazvin1@yahoo.com>
Date: Wed, 17 Jul 2024 13:27:50 +0330
Subject: [PATCH 24/33] Exclude special attributes from causing `MetricsLambda`
 creation (#3263)

* Fix the problem

* Add a test
---
 ignite/metrics/metric.py            |  3 +++
 tests/ignite/metrics/test_metric.py | 22 ++++++++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/ignite/metrics/metric.py b/ignite/metrics/metric.py
index 4ccfd8ea7af7..453fb1291e94 100644
--- a/ignite/metrics/metric.py
+++ b/ignite/metrics/metric.py
@@ -768,6 +768,9 @@ def __floordiv__(self, other: Any) -> "MetricsLambda":
     def __getattr__(self, attr: str) -> Callable:
         from ignite.metrics.metrics_lambda import MetricsLambda
 
+        if attr.startswith("__") and attr.endswith("__"):
+            return object.__getattribute__(self, attr)
+
         def fn(x: Metric, *args: Any, **kwargs: Any) -> Any:
             return getattr(x, attr)(*args, **kwargs)
 
diff --git a/tests/ignite/metrics/test_metric.py b/tests/ignite/metrics/test_metric.py
index 96c19d668d74..645a40b7ac69 100644
--- a/tests/ignite/metrics/test_metric.py
+++ b/tests/ignite/metrics/test_metric.py
@@ -1446,3 +1446,25 @@ def test_skip_unrolling():
     state = State(output=(y_pred, y_true))
     engine = MagicMock(state=state)
     metric.iteration_completed(engine)
+
+
+class DummyMetric6(Metric):
+    def reset(self):
+        pass
+
+    def compute(self):
+        pass
+
+    def update(self, output):
+        pass
+
+    def __call__(self, value):
+        pass
+
+
+def test_access_to_metric_dunder_attributes():
+    metric = DummyMetric6()
+    import inspect
+
+    # `inspect.signature` accesses `__signature__` attribute of the metric.
+    assert "value" in inspect.signature(metric).parameters.keys()

From aab39d6e858371c261bced41a82901aa2b30d548 Mon Sep 17 00:00:00 2001
From: Jean Schmidt <4520845+jeanschmidt@users.noreply.github.com>
Date: Tue, 30 Jul 2024 09:23:40 +0200
Subject: [PATCH 25/33] Replace runners prefix amz2023. (#3265)

---
 .github/workflows/docker-build.yml  |  8 ++--
 .github/workflows/gpu-hvd-tests.yml |  2 +-
 .github/workflows/gpu-tests.yml     |  2 +-
 docs/make.bat                       | 72 ++++++++++++++---------------
 4 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml
index aadccaafb334..54db85d7af13 100644
--- a/.github/workflows/docker-build.yml
+++ b/.github/workflows/docker-build.yml
@@ -56,7 +56,7 @@ jobs:
     env:
       REPOSITORY: ${{ github.repository }}
       PR_NUMBER: ${{ github.event.pull_request.number }}
-    runs-on: linux.4xlarge
+    runs-on: amz2023.linux.4xlarge
     steps:
       - name: Clean workspace
         run: |
@@ -116,7 +116,7 @@ jobs:
     env:
       REPOSITORY: ${{ github.repository }}
       PR_NUMBER: ${{ github.event.pull_request.number }}
-    runs-on: linux.12xlarge
+    runs-on: amz2023.linux.12xlarge
     steps:
       - name: Clean workspace
         run: |
@@ -176,7 +176,7 @@ jobs:
     env:
       REPOSITORY: ${{ github.repository }}
       PR_NUMBER: ${{ github.event.pull_request.number }}
-    runs-on: linux.4xlarge
+    runs-on: amz2023.linux.4xlarge
     steps:
       - name: Clean workspace
         run: |
@@ -236,7 +236,7 @@ jobs:
     env:
       REPOSITORY: ${{ github.repository }}
       PR_NUMBER: ${{ github.event.pull_request.number }}
-    runs-on: linux.12xlarge
+    runs-on: amz2023.linux.12xlarge
     steps:
       - name: Clean workspace
         run: |
diff --git a/.github/workflows/gpu-hvd-tests.yml b/.github/workflows/gpu-hvd-tests.yml
index 2017cf8acdad..3f0dd5ad2c68 100644
--- a/.github/workflows/gpu-hvd-tests.yml
+++ b/.github/workflows/gpu-hvd-tests.yml
@@ -28,7 +28,7 @@ jobs:
       DOCKER_IMAGE: "pytorch/conda-builder:cuda12.1"
       REPOSITORY: ${{ github.repository }}
       PR_NUMBER: ${{ github.event.pull_request.number }}
-    runs-on: linux.8xlarge.nvidia.gpu
+    runs-on: amz2023.linux.8xlarge.nvidia.gpu
     timeout-minutes: 60
 
     steps:
diff --git a/.github/workflows/gpu-tests.yml b/.github/workflows/gpu-tests.yml
index 81862e1f67bd..094f20ed2257 100644
--- a/.github/workflows/gpu-tests.yml
+++ b/.github/workflows/gpu-tests.yml
@@ -28,7 +28,7 @@ jobs:
       DOCKER_IMAGE: "pytorch/conda-builder:cuda12.1"
       REPOSITORY: ${{ github.repository }}
       PR_NUMBER: ${{ github.event.pull_request.number }}
-    runs-on: linux.8xlarge.nvidia.gpu
+    runs-on: amz2023.linux.8xlarge.nvidia.gpu
     timeout-minutes: 85
 
     steps:
diff --git a/docs/make.bat b/docs/make.bat
index 3bf02ee70066..e552da3d300b 100644
--- a/docs/make.bat
+++ b/docs/make.bat
@@ -1,36 +1,36 @@
-@ECHO OFF
-
-pushd %~dp0
-
-REM Command file for Sphinx documentation
-
-if "%SPHINXBUILD%" == "" (
-	set SPHINXBUILD=sphinx-build
-)
-set SOURCEDIR=source
-set BUILDDIR=build
-set SPHINXPROJ=ignite
-
-if "%1" == "" goto help
-
-%SPHINXBUILD% >NUL 2>NUL
-if errorlevel 9009 (
-	echo.
-	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
-	echo.installed, then set the SPHINXBUILD environment variable to point
-	echo.to the full path of the 'sphinx-build' executable. Alternatively you
-	echo.may add the Sphinx directory to PATH.
-	echo.
-	echo.If you don't have Sphinx installed, grab it from
-	echo.http://sphinx-doc.org/
-	exit /b 1
-)
-
-%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
-goto end
-
-:help
-%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
-
-:end
-popd
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+set SPHINXPROJ=ignite
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+
+:end
+popd

From 2ac34be3a385b88f7f4f62dfa2d49e85c246b1e1 Mon Sep 17 00:00:00 2001
From: Simeet Nayan <78461155+simeetnayan81@users.noreply.github.com>
Date: Tue, 30 Jul 2024 17:23:37 +0530
Subject: [PATCH 26/33] Fix failing Pytorch version tests CI (#3267)

* Update requirement for nltk

* Update requirement for nltk
---
 requirements-dev.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements-dev.txt b/requirements-dev.txt
index bf60639e75c3..d475e556cdff 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -27,7 +27,7 @@ scikit-image
 py-rouge
 # temporary fix for python=3.12 and v3.8.1
 # nltk
-git+https://github.com/nltk/nltk
+git+https://github.com/nltk/nltk@aba99c8
 # Examples dependencies
 pandas
 gymnasium

From 65352ad9d40c0ab614137110cb3719a1253c610e Mon Sep 17 00:00:00 2001
From: vfdev <vfdev.5@gmail.com>
Date: Wed, 31 Jul 2024 05:53:18 +0200
Subject: [PATCH 27/33] Fix failing tpu tests (#3261)

* Update tpu-tests.yml

* Update tpu-tests.yml
---
 .github/workflows/tpu-tests.yml | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/tpu-tests.yml b/.github/workflows/tpu-tests.yml
index ab14ad3c1de0..cc330de0c279 100644
--- a/.github/workflows/tpu-tests.yml
+++ b/.github/workflows/tpu-tests.yml
@@ -36,10 +36,10 @@ jobs:
 
     steps:
       - uses: actions/checkout@v4
-      - name: Set up Python 3.9
+      - name: Set up Python 3.10
         uses: actions/setup-python@v4
         with:
-          python-version: "3.9"
+          python-version: "3.10"
           architecture: "x64"
 
       - name: Get year & week number
@@ -50,7 +50,7 @@ jobs:
       - name: Get pip cache dir
         id: pip-cache
         run: |
-          pip3 install -U pip
+          pip3 install -U "pip<24"
           echo "pip_cache=$(pip cache dir)" >> $GITHUB_OUTPUT
         shell: bash -l {0}
 
@@ -70,10 +70,9 @@ jobs:
           pip install mkl==2021.4.0
 
           ## Install torch & xla and torchvision
-          pip install --pre  https://storage.googleapis.com/tpu-pytorch/wheels/colab/torch-nightly-cp39-cp39-linux_x86_64.whl
-          pip install --pre  https://storage.googleapis.com/tpu-pytorch/wheels/colab/torch_xla-nightly-cp39-cp39-linux_x86_64.whl
-          pip install --pre  https://storage.googleapis.com/tpu-pytorch/wheels/colab/torchvision-nightly-cp39-cp39-linux_x86_64.whl
-
+          pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cpu
+          pip install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-nightly-cp310-cp310-linux_x86_64.whl
+          
           # Check installation
           python -c "import torch"
 

From 4c93282385353b585b4536c4553589af7dd58af0 Mon Sep 17 00:00:00 2001
From: Sadra Barikbin <sadraqazvin1@yahoo.com>
Date: Thu, 1 Aug 2024 22:03:38 +0330
Subject: [PATCH 28/33] Add `MetricGroup` feature (#3266)

* Initial commit

* Add tests

* Fix two typos

* Fix Mypy

* Fix engine mypy issue

* Fix docstring

* Fix another problem in docstring

---------

Co-authored-by: vfdev <vfdev.5@gmail.com>
---
 docs/source/metrics.rst                   |   1 +
 ignite/engine/engine.py                   |   4 +-
 ignite/metrics/__init__.py                |   2 +
 ignite/metrics/metric_group.py            |  54 ++++++++++
 tests/ignite/metrics/test_metric_group.py | 118 ++++++++++++++++++++++
 5 files changed, 177 insertions(+), 2 deletions(-)
 create mode 100644 ignite/metrics/metric_group.py
 create mode 100644 tests/ignite/metrics/test_metric_group.py

diff --git a/docs/source/metrics.rst b/docs/source/metrics.rst
index ef1250314811..0e4979f82a1c 100644
--- a/docs/source/metrics.rst
+++ b/docs/source/metrics.rst
@@ -335,6 +335,7 @@ Complete list of metrics
     MeanPairwiseDistance
     MeanSquaredError
     metric.Metric
+    metric_group.MetricGroup
     metrics_lambda.MetricsLambda
     MultiLabelConfusionMatrix
     MutualInformation
diff --git a/ignite/engine/engine.py b/ignite/engine/engine.py
index 865218af3599..27a949cacca2 100644
--- a/ignite/engine/engine.py
+++ b/ignite/engine/engine.py
@@ -157,7 +157,7 @@ def __init__(self, process_function: Callable[["Engine", Any], Any]):
         _check_signature(process_function, "process_function", self, None)
 
         # generator provided by self._internal_run_as_gen
-        self._internal_run_generator: Optional[Generator] = None
+        self._internal_run_generator: Optional[Generator[Any, None, State]] = None
 
     def register_events(
         self, *event_names: Union[List[str], List[EventEnum]], event_to_attr: Optional[dict] = None
@@ -951,7 +951,7 @@ def _internal_run(self) -> State:
             self._internal_run_generator = None
             return out.value
 
-    def _internal_run_as_gen(self) -> Generator:
+    def _internal_run_as_gen(self) -> Generator[Any, None, State]:
         self.should_terminate = self.should_terminate_single_epoch = self.should_interrupt = False
         self._init_timers(self.state)
         try:
diff --git a/ignite/metrics/__init__.py b/ignite/metrics/__init__.py
index e4f4e24337c5..142a13e5934e 100644
--- a/ignite/metrics/__init__.py
+++ b/ignite/metrics/__init__.py
@@ -22,6 +22,7 @@
 from ignite.metrics.mean_pairwise_distance import MeanPairwiseDistance
 from ignite.metrics.mean_squared_error import MeanSquaredError
 from ignite.metrics.metric import BatchFiltered, BatchWise, EpochWise, Metric, MetricUsage
+from ignite.metrics.metric_group import MetricGroup
 from ignite.metrics.metrics_lambda import MetricsLambda
 from ignite.metrics.multilabel_confusion_matrix import MultiLabelConfusionMatrix
 from ignite.metrics.mutual_information import MutualInformation
@@ -41,6 +42,7 @@
     "Metric",
     "Accuracy",
     "Loss",
+    "MetricGroup",
     "MetricsLambda",
     "MeanAbsoluteError",
     "MeanPairwiseDistance",
diff --git a/ignite/metrics/metric_group.py b/ignite/metrics/metric_group.py
new file mode 100644
index 000000000000..58a52f658ae1
--- /dev/null
+++ b/ignite/metrics/metric_group.py
@@ -0,0 +1,54 @@
+from typing import Any, Callable, Dict, Sequence
+
+import torch
+
+from ignite.metrics import Metric
+
+
+class MetricGroup(Metric):
+    """
+    A class for grouping metrics so that user could manage them easier.
+
+    Args:
+        metrics: a dictionary of names to metric instances.
+        output_transform: a callable that is used to transform the
+            :class:`~ignite.engine.engine.Engine`'s ``process_function``'s output into the
+            form expected by the metric. `output_transform` of each metric in the group is also
+            called upon its update.
+
+    Examples:
+        We construct a group of metrics, attach them to the engine at once and retrieve their result.
+
+        .. code-block:: python
+
+           import torch
+
+           metric_group = MetricGroup({'acc': Accuracy(), 'precision': Precision(), 'loss': Loss(nn.NLLLoss())})
+           metric_group.attach(default_evaluator, "eval_metrics")
+           y_true = torch.tensor([1, 0, 1, 1, 0, 1])
+           y_pred = torch.tensor([1, 0, 1, 0, 1, 1])
+           state = default_evaluator.run([[y_pred, y_true]])
+
+           # Metrics individually available in `state.metrics`
+           state.metrics["acc"], state.metrics["precision"], state.metrics["loss"]
+
+           # And also altogether
+           state.metrics["eval_metrics"]
+    """
+
+    _state_dict_all_req_keys = ("metrics",)
+
+    def __init__(self, metrics: Dict[str, Metric], output_transform: Callable = lambda x: x):
+        self.metrics = metrics
+        super(MetricGroup, self).__init__(output_transform=output_transform)
+
+    def reset(self) -> None:
+        for m in self.metrics.values():
+            m.reset()
+
+    def update(self, output: Sequence[torch.Tensor]) -> None:
+        for m in self.metrics.values():
+            m.update(m._output_transform(output))
+
+    def compute(self) -> Dict[str, Any]:
+        return {k: m.compute() for k, m in self.metrics.items()}
diff --git a/tests/ignite/metrics/test_metric_group.py b/tests/ignite/metrics/test_metric_group.py
new file mode 100644
index 000000000000..237df966e059
--- /dev/null
+++ b/tests/ignite/metrics/test_metric_group.py
@@ -0,0 +1,118 @@
+import pytest
+import torch
+
+from ignite import distributed as idist
+from ignite.engine import Engine
+from ignite.metrics import Accuracy, MetricGroup, Precision
+
+torch.manual_seed(41)
+
+
+def test_update():
+    precision = Precision()
+    accuracy = Accuracy()
+
+    group = MetricGroup({"precision": Precision(), "accuracy": Accuracy()})
+
+    y_pred = torch.randint(0, 2, (100,))
+    y = torch.randint(0, 2, (100,))
+
+    precision.update((y_pred, y))
+    accuracy.update((y_pred, y))
+    group.update((y_pred, y))
+
+    assert precision.state_dict() == group.metrics["precision"].state_dict()
+    assert accuracy.state_dict() == group.metrics["accuracy"].state_dict()
+
+
+def test_output_transform():
+    def drop_first(output):
+        y_pred, y = output
+        return (y_pred[1:], y[1:])
+
+    precision = Precision(output_transform=drop_first)
+    accuracy = Accuracy(output_transform=drop_first)
+
+    group = MetricGroup(
+        {"precision": Precision(output_transform=drop_first), "accuracy": Accuracy(output_transform=drop_first)}
+    )
+
+    y_pred = torch.randint(0, 2, (100,))
+    y = torch.randint(0, 2, (100,))
+
+    precision.update(drop_first(drop_first((y_pred, y))))
+    accuracy.update(drop_first(drop_first((y_pred, y))))
+    group.update(drop_first((y_pred, y)))
+
+    assert precision.state_dict() == group.metrics["precision"].state_dict()
+    assert accuracy.state_dict() == group.metrics["accuracy"].state_dict()
+
+
+def test_compute():
+    precision = Precision()
+    accuracy = Accuracy()
+
+    group = MetricGroup({"precision": Precision(), "accuracy": Accuracy()})
+
+    for _ in range(3):
+        y_pred = torch.randint(0, 2, (100,))
+        y = torch.randint(0, 2, (100,))
+
+        precision.update((y_pred, y))
+        accuracy.update((y_pred, y))
+        group.update((y_pred, y))
+
+    assert group.compute() == {"precision": precision.compute(), "accuracy": accuracy.compute()}
+
+    precision.reset()
+    accuracy.reset()
+    group.reset()
+
+    assert precision.state_dict() == group.metrics["precision"].state_dict()
+    assert accuracy.state_dict() == group.metrics["accuracy"].state_dict()
+
+
+@pytest.mark.usefixtures("distributed")
+class TestDistributed:
+    def test_integration(self):
+        rank = idist.get_rank()
+        torch.manual_seed(12 + rank)
+
+        n_epochs = 3
+        n_iters = 5
+        batch_size = 10
+        device = idist.device()
+
+        y_true = torch.randint(0, 2, size=(n_iters * batch_size,)).to(device)
+        y_pred = torch.randint(0, 2, (n_iters * batch_size,)).to(device)
+
+        def update(_, i):
+            return (
+                y_pred[i * batch_size : (i + 1) * batch_size],
+                y_true[i * batch_size : (i + 1) * batch_size],
+            )
+
+        engine = Engine(update)
+
+        precision = Precision()
+        precision.attach(engine, "precision")
+
+        accuracy = Accuracy()
+        accuracy.attach(engine, "accuracy")
+
+        group = MetricGroup({"eval_metrics.accuracy": Accuracy(), "eval_metrics.precision": Precision()})
+        group.attach(engine, "eval_metrics")
+
+        data = list(range(n_iters))
+        engine.run(data=data, max_epochs=n_epochs)
+
+        assert "eval_metrics" in engine.state.metrics
+        assert "eval_metrics.accuracy" in engine.state.metrics
+        assert "eval_metrics.precision" in engine.state.metrics
+
+        assert engine.state.metrics["eval_metrics"] == {
+            "eval_metrics.accuracy": engine.state.metrics["accuracy"],
+            "eval_metrics.precision": engine.state.metrics["precision"],
+        }
+        assert engine.state.metrics["eval_metrics.accuracy"] == engine.state.metrics["accuracy"]
+        assert engine.state.metrics["eval_metrics.precision"] == engine.state.metrics["precision"]

From 8be85b42e7c5990ce1d11ff66467d3b340a5807e Mon Sep 17 00:00:00 2001
From: zubatyuk <zubatyuk@gmail.com>
Date: Sat, 3 Aug 2024 02:01:21 -0400
Subject: [PATCH 29/33] Fix WandBLogger init (#3273)

* Fix WandBLogger init

* fix kwargs.pop in WandBLogger

* fix qoutes
---
 ignite/handlers/wandb_logger.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ignite/handlers/wandb_logger.py b/ignite/handlers/wandb_logger.py
index 3f8e44840c71..621ff9d3ebd6 100644
--- a/ignite/handlers/wandb_logger.py
+++ b/ignite/handlers/wandb_logger.py
@@ -134,6 +134,7 @@ def __init__(self, *args: Any, **kwargs: Any):
                 "You man install wandb with the command:\n pip install wandb\n"
             )
         if kwargs.get("init", True):
+            kwargs.pop("init", None)
             wandb.init(*args, **kwargs)
 
     def __getattr__(self, attr: Any) -> Any:

From e58039e81bf7241f721b641d19dae7bc0b384dab Mon Sep 17 00:00:00 2001
From: vfdev <vfdev.5@gmail.com>
Date: Mon, 5 Aug 2024 21:59:17 +0200
Subject: [PATCH 30/33] Fix mps ci (#3274)

---
 tests/ignite/engine/test_create_supervised.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/ignite/engine/test_create_supervised.py b/tests/ignite/engine/test_create_supervised.py
index 6f7e0a2b2187..54938167601a 100644
--- a/tests/ignite/engine/test_create_supervised.py
+++ b/tests/ignite/engine/test_create_supervised.py
@@ -316,7 +316,8 @@ def _test_create_supervised_evaluator(
             # This is broken in 1.6.0 but will be probably fixed with 1.7.0
             err_msg_1 = "Expected all tensors to be on the same device"
             err_msg_2 = "Placeholder storage has not been allocated on MPS device"
-            with pytest.raises(RuntimeError, match=f"({err_msg_1}|{err_msg_2})"):
+            err_msg_3 = "Tensor for argument weight is on cpu but expected on mps"
+            with pytest.raises(RuntimeError, match=f"({err_msg_1}|{err_msg_2}|{err_msg_3})"):
                 evaluator.run(data)
 
 

From aa3e3e13c214fe6cf72e941a46f13378911c8894 Mon Sep 17 00:00:00 2001
From: vfdev <vfdev.5@gmail.com>
Date: Tue, 6 Aug 2024 02:07:02 +0200
Subject: [PATCH 31/33] Updated pytorch versions in CI and docker (#3275)

* Updated pytorch versions in CI and docker

* Removed torchtext as the project has stopped releasing new torch compatible versions
---
 .github/workflows/pytorch-version-tests.yml | 2 +-
 docker/docker.cfg                           | 2 +-
 docker/hvd/Dockerfile.hvd-apex-nlp          | 3 +--
 docker/hvd/Dockerfile.hvd-nlp               | 3 +--
 docker/main/Dockerfile.apex-nlp             | 3 +--
 docker/main/Dockerfile.nlp                  | 3 +--
 docker/test_image.py                        | 1 -
 7 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/pytorch-version-tests.yml b/.github/workflows/pytorch-version-tests.yml
index a703e72333e2..f268669158e9 100644
--- a/.github/workflows/pytorch-version-tests.yml
+++ b/.github/workflows/pytorch-version-tests.yml
@@ -17,7 +17,7 @@ jobs:
       matrix:
         python-version: [3.8, 3.9, "3.10"]
         pytorch-version:
-          [2.2.2, 2.1.2, 2.0.1, 1.13.1, 1.12.1, 1.11.0, 1.10.0, 1.8.1, 1.5.1]
+          [2.3.1, 2.2.2, 2.1.2, 2.0.1, 1.13.1, 1.12.1, 1.10.0, 1.8.1, 1.5.1]
         exclude:
           - pytorch-version: 1.5.1
             python-version: 3.9
diff --git a/docker/docker.cfg b/docker/docker.cfg
index dd43c35e7df8..6b9769333471 100644
--- a/docker/docker.cfg
+++ b/docker/docker.cfg
@@ -1,4 +1,4 @@
 [DEFAULT]
-build_docker_image_pytorch_version = 2.3.1-cuda12.1-cudnn8
+build_docker_image_pytorch_version = 2.4.0-cuda12.4-cudnn9
 build_docker_image_hvd_version = v0.28.1
 build_docker_image_msdp_version = v0.14.0
diff --git a/docker/hvd/Dockerfile.hvd-apex-nlp b/docker/hvd/Dockerfile.hvd-apex-nlp
index 6379490c4966..a8f51988baa7 100644
--- a/docker/hvd/Dockerfile.hvd-apex-nlp
+++ b/docker/hvd/Dockerfile.hvd-apex-nlp
@@ -4,5 +4,4 @@ FROM pytorchignite/hvd-apex:latest
 # Ignite NLP dependencies
 RUN pip install --upgrade --no-cache-dir transformers \
                                          spacy \
-                                         nltk \
-                                         torchtext
+                                         nltk
diff --git a/docker/hvd/Dockerfile.hvd-nlp b/docker/hvd/Dockerfile.hvd-nlp
index db4ca4c3ebc1..84da0230b9e9 100644
--- a/docker/hvd/Dockerfile.hvd-nlp
+++ b/docker/hvd/Dockerfile.hvd-nlp
@@ -4,5 +4,4 @@ FROM pytorchignite/hvd-base:latest
 # Ignite NLP dependencies
 RUN pip install --upgrade --no-cache-dir transformers \
                                          spacy \
-                                         nltk \
-                                         torchtext
+                                         nltk
diff --git a/docker/main/Dockerfile.apex-nlp b/docker/main/Dockerfile.apex-nlp
index ad7507df777c..b9be5acd6d9f 100644
--- a/docker/main/Dockerfile.apex-nlp
+++ b/docker/main/Dockerfile.apex-nlp
@@ -4,5 +4,4 @@ FROM pytorchignite/apex:latest
 # Ignite NLP dependencies
 RUN pip install --upgrade --no-cache-dir transformers \
                                          spacy \
-                                         nltk \
-                                         torchtext
+                                         nltk
diff --git a/docker/main/Dockerfile.nlp b/docker/main/Dockerfile.nlp
index e5ef45cfef0f..7826caa03ef8 100644
--- a/docker/main/Dockerfile.nlp
+++ b/docker/main/Dockerfile.nlp
@@ -4,5 +4,4 @@ FROM pytorchignite/base:latest
 # Ignite NLP dependencies
 RUN pip install --upgrade --no-cache-dir transformers \
                                          spacy \
-                                         nltk \
-                                         torchtext
+                                         nltk
\ No newline at end of file
diff --git a/docker/test_image.py b/docker/test_image.py
index 0bfdc6c4697b..ebb706c303fb 100644
--- a/docker/test_image.py
+++ b/docker/test_image.py
@@ -54,7 +54,6 @@ def check_package(package_name, expected_version=None):
         check_package("cv2")
 
     if "nlp" in image_type:
-        check_package("torchtext")
         check_package("transformers")
 
     if "apex" in image_type:

From 3e67049226d5e8129b54fb9430825a9a842f119d Mon Sep 17 00:00:00 2001
From: vfdev-5 <vfdev.5@gmail.com>
Date: Mon, 5 Aug 2024 21:57:15 +0200
Subject: [PATCH 32/33] Bump version to 0.5.1

---
 ignite/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ignite/__init__.py b/ignite/__init__.py
index d804675afa5b..3c926af9124d 100644
--- a/ignite/__init__.py
+++ b/ignite/__init__.py
@@ -6,4 +6,4 @@
 import ignite.metrics
 import ignite.utils
 
-__version__ = "0.6.0"
+__version__ = "0.5.1"

From 2d11cd7fa98225e4bada7b833d8ec0072ddbfd4e Mon Sep 17 00:00:00 2001
From: vfdev-5 <vfdev.5@gmail.com>
Date: Sun, 31 Mar 2024 23:43:58 +0200
Subject: [PATCH 33/33] Revert "Adding max_iters as an optional arg in Engine
 run (#1381)"

---
 ignite/engine/engine.py                 | 53 ++++---------------------
 ignite/engine/events.py                 |  2 -
 ignite/handlers/lr_finder.py            |  1 -
 tests/ignite/engine/test_engine.py      | 41 -------------------
 tests/ignite/handlers/test_lr_finder.py |  2 +-
 5 files changed, 9 insertions(+), 90 deletions(-)

diff --git a/ignite/engine/engine.py b/ignite/engine/engine.py
index 27a949cacca2..24e7f885ec8d 100644
--- a/ignite/engine/engine.py
+++ b/ignite/engine/engine.py
@@ -1,6 +1,5 @@
 import functools
 import logging
-import math
 import time
 import warnings
 import weakref
@@ -731,14 +730,13 @@ def load_state_dict(self, state_dict: Mapping) -> None:
 
     @staticmethod
     def _is_done(state: State) -> bool:
-        is_done_iters = state.max_iters is not None and state.iteration >= state.max_iters
         is_done_count = (
             state.epoch_length is not None
             and state.max_epochs is not None
             and state.iteration >= state.epoch_length * state.max_epochs
         )
         is_done_epochs = state.max_epochs is not None and state.epoch >= state.max_epochs
-        return is_done_iters or is_done_count or is_done_epochs
+        return is_done_count or is_done_epochs
 
     def set_data(self, data: Union[Iterable, DataLoader]) -> None:
         """Method to set data. After calling the method the next batch passed to `processing_function` is
@@ -780,14 +778,13 @@ def run(
         self,
         data: Optional[Iterable] = None,
         max_epochs: Optional[int] = None,
-        max_iters: Optional[int] = None,
         epoch_length: Optional[int] = None,
     ) -> State:
         """Runs the ``process_function`` over the passed data.
 
         Engine has a state and the following logic is applied in this function:
 
-        - At the first call, new state is defined by `max_epochs`, `max_iters`, `epoch_length`, if provided.
+        - At the first call, new state is defined by `max_epochs`, `epoch_length`, if provided.
           A timer for total and per-epoch time is initialized when Events.STARTED is handled.
         - If state is already defined such that there are iterations to run until `max_epochs` and no input arguments
           provided, state is kept and used in the function.
@@ -805,9 +802,6 @@ def run(
                 `len(data)`. If `data` is an iterator and `epoch_length` is not set, then it will be automatically
                 determined as the iteration on which data iterator raises `StopIteration`.
                 This argument should not change if run is resuming from a state.
-            max_iters: Number of iterations to run for.
-                `max_iters` and `max_epochs` are mutually exclusive; only one of the two arguments should be provided.
-
         Returns:
             State: output state.
 
@@ -858,6 +852,8 @@ def switch_batch(engine):
 
         if self.state.max_epochs is None or (self._is_done(self.state) and self._internal_run_generator is None):
             # Create new state
+            if max_epochs is None:
+                max_epochs = 1
             if epoch_length is None:
                 if data is None:
                     raise ValueError("epoch_length should be provided if data is None")
@@ -866,22 +862,9 @@ def switch_batch(engine):
                 if epoch_length is not None and epoch_length < 1:
                     raise ValueError("Input data has zero size. Please provide non-empty data")
 
-            if max_iters is None:
-                if max_epochs is None:
-                    max_epochs = 1
-            else:
-                if max_epochs is not None:
-                    raise ValueError(
-                        "Arguments max_iters and max_epochs are mutually exclusive."
-                        "Please provide only max_epochs or max_iters."
-                    )
-                if epoch_length is not None:
-                    max_epochs = math.ceil(max_iters / epoch_length)
-
             self.state.iteration = 0
             self.state.epoch = 0
             self.state.max_epochs = max_epochs
-            self.state.max_iters = max_iters
             self.state.epoch_length = epoch_length
             # Reset generator if previously used
             self._internal_run_generator = None
@@ -1062,18 +1045,12 @@ def _run_once_on_dataset_as_gen(self) -> Generator[State, None, float]:
                     if self.state.epoch_length is None:
                         # Define epoch length and stop the epoch
                         self.state.epoch_length = iter_counter
-                        if self.state.max_iters is not None:
-                            self.state.max_epochs = math.ceil(self.state.max_iters / self.state.epoch_length)
                         break
 
                     # Should exit while loop if we can not iterate
                     if should_exit:
-                        if not self._is_done(self.state):
-                            total_iters = (
-                                self.state.epoch_length * self.state.max_epochs
-                                if self.state.max_epochs is not None
-                                else self.state.max_iters
-                            )
+                        if not self._is_done(self.state) and self.state.max_epochs is not None:
+                            total_iters = self.state.epoch_length * self.state.max_epochs
 
                             warnings.warn(
                                 "Data iterator can not provide data anymore but required total number of "
@@ -1104,10 +1081,6 @@ def _run_once_on_dataset_as_gen(self) -> Generator[State, None, float]:
                 if self.state.epoch_length is not None and iter_counter == self.state.epoch_length:
                     break
 
-                if self.state.max_iters is not None and self.state.iteration == self.state.max_iters:
-                    self.should_terminate = True
-                    raise _EngineTerminateException()
-
         except _EngineTerminateSingleEpochException:
             self._fire_event(Events.TERMINATE_SINGLE_EPOCH, iter_counter=iter_counter)
             self.should_terminate_single_epoch = False
@@ -1229,18 +1202,12 @@ def _run_once_on_dataset_legacy(self) -> float:
                     if self.state.epoch_length is None:
                         # Define epoch length and stop the epoch
                         self.state.epoch_length = iter_counter
-                        if self.state.max_iters is not None:
-                            self.state.max_epochs = math.ceil(self.state.max_iters / self.state.epoch_length)
                         break
 
                     # Should exit while loop if we can not iterate
                     if should_exit:
-                        if not self._is_done(self.state):
-                            total_iters = (
-                                self.state.epoch_length * self.state.max_epochs
-                                if self.state.max_epochs is not None
-                                else self.state.max_iters
-                            )
+                        if not self._is_done(self.state) and self.state.max_epochs is not None:
+                            total_iters = self.state.epoch_length * self.state.max_epochs
 
                             warnings.warn(
                                 "Data iterator can not provide data anymore but required total number of "
@@ -1271,10 +1238,6 @@ def _run_once_on_dataset_legacy(self) -> float:
                 if self.state.epoch_length is not None and iter_counter == self.state.epoch_length:
                     break
 
-                if self.state.max_iters is not None and self.state.iteration == self.state.max_iters:
-                    self.should_terminate = True
-                    raise _EngineTerminateException()
-
         except _EngineTerminateSingleEpochException:
             self._fire_event(Events.TERMINATE_SINGLE_EPOCH, iter_counter=iter_counter)
             self.should_terminate_single_epoch = False
diff --git a/ignite/engine/events.py b/ignite/engine/events.py
index 9dd99348492b..aebffdfe058a 100644
--- a/ignite/engine/events.py
+++ b/ignite/engine/events.py
@@ -443,7 +443,6 @@ class State:
         state.dataloader        # data passed to engine
         state.epoch_length      # optional length of an epoch
         state.max_epochs        # number of epochs to run
-        state.max_iters         # number of iterations to run
         state.batch             # batch passed to `process_function`
         state.output            # output of `process_function` after a single iteration
         state.metrics           # dictionary with defined metrics if any
@@ -470,7 +469,6 @@ def __init__(self, **kwargs: Any) -> None:
         self.epoch = 0
         self.epoch_length: Optional[int] = None
         self.max_epochs: Optional[int] = None
-        self.max_iters: Optional[int] = None
         self.output: Optional[int] = None
         self.batch: Optional[int] = None
         self.metrics: Dict[str, Any] = {}
diff --git a/ignite/handlers/lr_finder.py b/ignite/handlers/lr_finder.py
index e3840d5da7d3..3643709a1b61 100644
--- a/ignite/handlers/lr_finder.py
+++ b/ignite/handlers/lr_finder.py
@@ -105,7 +105,6 @@ def _run(
             max_iter = trainer.state.epoch_length * trainer.state.max_epochs  # type: ignore[operator]
             if max_iter < num_iter:
                 max_iter = num_iter
-                trainer.state.max_iters = num_iter
                 trainer.state.max_epochs = ceil(num_iter / trainer.state.epoch_length)  # type: ignore[operator]
 
         if not trainer.has_event_handler(self._reached_num_iterations):
diff --git a/tests/ignite/engine/test_engine.py b/tests/ignite/engine/test_engine.py
index 130212426504..d1cc017bf916 100644
--- a/tests/ignite/engine/test_engine.py
+++ b/tests/ignite/engine/test_engine.py
@@ -1026,47 +1026,6 @@ def switch_dataloader():
 
         trainer.run(data1, max_epochs=10)
 
-    def test_run_with_max_iters(self):
-        max_iters = 8
-        engine = Engine(lambda e, b: 1)
-        engine.run([0] * 20, max_iters=max_iters)
-        assert engine.state.iteration == max_iters
-        assert engine.state.max_iters == max_iters
-
-    def test_run_with_max_iters_greater_than_epoch_length(self):
-        max_iters = 73
-        engine = Engine(lambda e, b: 1)
-        engine.run([0] * 20, max_iters=max_iters)
-        assert engine.state.iteration == max_iters
-
-    def test_run_with_invalid_max_iters_and_max_epoch(self):
-        max_iters = 12
-        max_epochs = 2
-        engine = Engine(lambda e, b: 1)
-        with pytest.raises(
-            ValueError,
-            match=r"Arguments max_iters and max_epochs are mutually exclusive."
-            "Please provide only max_epochs or max_iters.",
-        ):
-            engine.run([0] * 20, max_iters=max_iters, max_epochs=max_epochs)
-
-    def test_epoch_events_fired_max_iters(self):
-        max_iters = 32
-        engine = Engine(lambda e, b: 1)
-
-        @engine.on(Events.EPOCH_COMPLETED)
-        def fired_event(engine):
-            assert engine.state.iteration % engine.state.epoch_length == 0
-
-        engine.run([0] * 10, max_iters=max_iters)
-
-    def test_is_done_with_max_iters(self):
-        state = State(iteration=100, epoch=1, max_epochs=3, epoch_length=100, max_iters=250)
-        assert not Engine._is_done(state)
-
-        state = State(iteration=250, epoch=1, max_epochs=3, epoch_length=100, max_iters=250)
-        assert Engine._is_done(state)
-
     @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
     def test_batch_is_released_before_new_one_is_loaded_on_cuda(self):
         torch.cuda.empty_cache()
diff --git a/tests/ignite/handlers/test_lr_finder.py b/tests/ignite/handlers/test_lr_finder.py
index 23b823d9ce47..b64b3ab8527b 100644
--- a/tests/ignite/handlers/test_lr_finder.py
+++ b/tests/ignite/handlers/test_lr_finder.py
@@ -357,7 +357,7 @@ def test_num_iter_is_not_enough(lr_finder, to_save, dummy_engine, dataloader):
             trainer_with_finder.run(dataloader)
         assert_output_sizes(lr_finder, dummy_engine)
         assert dummy_engine.state.iteration != len(dataloader)
-        assert dummy_engine.state.iteration == 150
+        assert dummy_engine.state.iteration == 150 + 1
 
 
 def test_detach_terminates(lr_finder, to_save, dummy_engine, dataloader):