pytorch · Oct 9, 2019
diff --git a/‎README.md
+26-27 b/‎README.md
+26-27
diff --git a/‎captum/attr/_core/deep_lift.py
+48-8 b/‎captum/attr/_core/deep_lift.py
+48-8
diff --git a/‎captum/attr/_core/gradient_shap.py
+24-4 b/‎captum/attr/_core/gradient_shap.py
+24-4
diff --git a/‎captum/attr/_core/input_x_gradient.py
+24-4 b/‎captum/attr/_core/input_x_gradient.py
+24-4
diff --git a/‎captum/attr/_core/integrated_gradients.py
+23-4 b/‎captum/attr/_core/integrated_gradients.py
+23-4
diff --git a/‎captum/attr/_core/internal_influence.py
+23-4 b/‎captum/attr/_core/internal_influence.py
+23-4
diff --git a/‎captum/attr/_core/layer_conductance.py
+23-4 b/‎captum/attr/_core/layer_conductance.py
+23-4
diff --git a/‎captum/attr/_core/layer_gradient_x_activation.py
+23-4 b/‎captum/attr/_core/layer_gradient_x_activation.py
+23-4
diff --git a/‎captum/attr/_core/neuron_conductance.py
+23-4 b/‎captum/attr/_core/neuron_conductance.py
+23-4
diff --git a/‎captum/attr/_core/saliency.py
+23-4 b/‎captum/attr/_core/saliency.py
+23-4
diff --git a/‎captum/attr/_utils/gradient.py
+8-4 b/‎captum/attr/_utils/gradient.py
+8-4
diff --git a/‎captum/insights/api.py
+126-99 b/‎captum/insights/api.py
+126-99
diff --git a/‎captum/insights/features.py
+6-6 b/‎captum/insights/features.py
+6-6
diff --git a/‎captum/insights/frontend/public/index.html
+1-1 b/‎captum/insights/frontend/public/index.html
+1-1
diff --git a/‎captum/insights/frontend/src/App.css
+16 b/‎captum/insights/frontend/src/App.css
+16
diff --git a/‎captum/insights/frontend/src/App.js
+114-35 b/‎captum/insights/frontend/src/App.js
+114-35
diff --git a/‎captum/insights/server.py
+13 b/‎captum/insights/server.py
+13
diff --git a/‎docs/algorithms.md
+143-15 b/‎docs/algorithms.md
+143-15
diff --git a/‎tutorials/CIFAR_TorchVision_Interpret.ipynb
+48-17 b/‎tutorials/CIFAR_TorchVision_Interpret.ipynb
+48-17
diff --git a/‎tutorials/IMDB_TorchText_Interpret.ipynb
+57-92 b/‎tutorials/IMDB_TorchText_Interpret.ipynb
+57-92
diff --git a/‎tutorials/Multimodal_VQA_Interpret.ipynb
+484-66 b/‎tutorials/Multimodal_VQA_Interpret.ipynb
+484-66
diff --git a/‎tutorials/Resnet_TorchVision_Interpret.ipynb
+12-20 b/‎tutorials/Resnet_TorchVision_Interpret.ipynb
+12-20
diff --git a/‎website/static/img/conductance_eq_1.png
25.1 KB b/‎website/static/img/conductance_eq_1.png
25.1 KB
diff --git a/‎website/static/img/conductance_eq_2.png
41.3 KB b/‎website/static/img/conductance_eq_2.png
41.3 KB
@@ -120,16 +120,15 @@ class ToyModel(nn.Module):
         self.lin1 = nn.Linear(3, 3)
         self.relu = nn.ReLU()
         self.lin2 = nn.Linear(3, 2)
-        self.sigmoid = nn.Sigmoid()
 
         # initialize weights and biases
-        self.lin1.weight = nn.Parameter(torch.arange(0.0, 9.0).view(3, 3))
+        self.lin1.weight = nn.Parameter(torch.arange(-4.0, 5.0).view(3, 3))
         self.lin1.bias = nn.Parameter(torch.zeros(1,3))
-        self.lin2.weight = nn.Parameter(torch.arange(0.0, 6.0).view(2, 3))
+        self.lin2.weight = nn.Parameter(torch.arange(-3.0, 3.0).view(2, 3))
         self.lin2.bias = nn.Parameter(torch.ones(1,2))
 
     def forward(self, input):
-        return self.sigmoid(self.lin2(self.relu(self.lin1(input))))
+        return self.lin2(self.relu(self.lin1(input)))
 ```
 
 Let's create an instance of our model and set it to eval mode.
@@ -176,9 +175,9 @@ print('IG Attributions: ', attributions, ' Convergence Delta: ', delta)
 ```
 Output:
 ```
-IG Attributions:  tensor([[0.0628, 0.1314, 0.0747],
-                          [0.0930, 0.0120, 0.1639]])
-Convergence Delta: tensor([0., 0.])
+IG Attributions:  tensor([[-0.5922, -1.5497, -1.0067],
+                          [ 0.0000, -0.2219, -5.1991]])
+Convergence Delta: tensor([2.3842e-07, -4.7684e-07])
 ```
 The algorithm outputs an attribution score for each input element and a
 convergence delta. The lower the absolute value of the convergence delta the better
@@ -217,9 +216,9 @@ print('GradientShap Attributions: ', attributions, ' Convergence Delta: ', delta
 ```
 Output
 ```
-GradientShap Attributions:  tensor([[ 0.0008,  0.0019,  0.0009],
-                                    [ 0.1892, -0.0045,  0.2445]])
-Convergence Delta: tensor([-0.2681, -0.2633, -0.2607, -0.2655, -0.2689, -0.2689,  1.4493, -0.2688])
+GradientShap Attributions:  tensor([[-0.1542, -1.6229, -1.5835],
+                                    [-0.3916, -0.2836, -4.6851]])
+Convergence Delta: tensor([ 0.0000, -0.0005, -0.0029, -0.0084, -0.0087, -0.0405,  0.0000, -0.0084])
 
 ```
 Deltas are computed for each `n_samples * input.shape[0]` example. The user can,
@@ -243,8 +242,8 @@ print('DeepLift Attributions: ', attributions, ' Convergence Delta: ', delta)
 ```
 Output
 ```
-DeepLift Attributions:  tensor([[0.0628, 0.1314, 0.0747],
-                                [0.0930, 0.0120, 0.1639]])
+DeepLift Attributions:  tensor([[-0.5922, -1.5497, -1.0067],
+                                [ 0.0000, -0.2219, -5.1991])
 Convergence Delta: tensor([0., 0.])
 ```
 DeepLift assigns similar attribution scores as Integrated Gradients to inputs,
@@ -269,12 +268,12 @@ print('DeepLiftSHAP Attributions: ', attributions, ' Convergence Delta: ', delta
 ```
 Output
 ```
-DeepLiftShap Attributions: tensor([0.0627, 0.1313, 0.0747],
-                                  [0.0929, 0.0120, 0.1637], grad_fn=<MeanBackward1>)
-Convergence Delta:  tensor([-2.9802e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
-         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  2.9802e-08,
-         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
-         0.0000e+00,  0.0000e+00,  2.9802e-08,  0.0000e+00,  2.9802e-08])
+DeepLiftShap Attributions: tensor([[-5.9169e-01, -1.5491e+00, -1.0076e+00],
+        [-4.7101e-03, -2.2300e-01, -5.1926e+00]], grad_fn=<MeanBackward1>)
+Convergence Delta:  tensor([-4.6120e-03, -1.6267e-03, -5.1045e-04, -1.4184e-03, -6.8886e-03,
+        -2.2224e-02,  0.0000e+00, -2.8790e-02, -4.1285e-03, -2.7295e-02,
+        -3.2349e-03, -1.6265e-03, -4.7684e-07, -1.4191e-03, -6.8889e-03,
+        -2.2224e-02,  0.0000e+00, -2.4792e-02, -4.1289e-03, -2.7296e-02])
 ```
 `DeepLiftShap` uses `DeepLift` to compute attribution score for each
 input-baseline pair and averages it for each input across all baselines.
@@ -303,10 +302,10 @@ print('IG + SmoothGrad Attributions: ', attributions, ' Convergence Delta: ', de
 ```
 Output
 ```
-IG + SmoothGrad Attributions:  tensor([[0.0631, 0.1335, 0.0723],
-                                       [0.0911, 0.0142, 0.1636]])
-Convergence Delta:  tensor([ 1.4901e-07, -8.9407e-08,  1.1921e-07,
-        1.4901e-07,  1.1921e-07, -1.7881e-07, -5.9605e-08,  5.9605e-08])
+IG + SmoothGrad Attributions:  tensor([[-0.4574, -1.5493, -1.0893],
+                                       [ 0.0000, -0.2647, -5.1619]])
+Convergence Delta:  tensor([ 0.0000e+00,  2.3842e-07,  0.0000e+00, -2.3842e-07,  0.0000e+00,
+        -4.7684e-07,  0.0000e+00, -4.7684e-07])
 
 ```
 The number of elements in the `delta` tensor is equal to: `n_samples * input.shape[0]`
@@ -334,8 +333,8 @@ print('Neuron Attributions: ', attributions)
 ```
 Output
 ```
-Neuron Attributions:  tensor([[0.0106, 0.0247, 0.0150],
-                              [0.0144, 0.0021, 0.0301]])
+Neuron Attributions:  tensor([[ 0.0000,  0.0000,  0.0000],
+                              [ 1.3358,  0.0000, -1.6811]])
 ```
 
 Layer conductance shows the importance of neurons for a layer and given input.
@@ -351,9 +350,9 @@ print('Layer Attributions: ', attributions, ' Convergence Delta: ', delta)
 ```
 Outputs
 ```
-Layer Attributions: tensor([[0.0000, 0.0515, 0.1811],
-                            [0.0000, 0.0477, 0.1652]], grad_fn=<SumBackward1>)
-Convergence Delta:  tensor([-0.0363, -0.0560])
+Layer Attributions: tensor([[ 0.0000,  0.0000, -3.0856],
+                            [ 0.0000, -0.3488, -4.9638]], grad_fn=<SumBackward1>)
+Convergence Delta:  tensor([0.0630, 0.1084])
 ```
 
 Similar to other attribution algorithms that return convergence delta, LayerConductance
 
@@ -101,11 +101,31 @@ def attribute(
                         If inputs is a tuple of tensors, baselines must also be
                         a tuple of tensors, with matching dimensions to inputs.
                         Default: zero tensor for each input tensor
-            target (int, optional):  Output index for which gradient is computed
-                        (for classification cases, this is the target class).
+            target (int, tuple, tensor or list, optional):  Output indices for
+                        which gradients are computed (for classification cases,
+                        this is usually the target class).
                         If the network returns a scalar value per example,
-                        no target index is necessary. (Note: Tuples for multi
-                        -dimensional output indices will be supported soon.)
+                        no target index is necessary.
+                        For general 2D outputs, targets can be either:
+
+                        - a single integer or a tensor containing a single
+                            integer, which is applied to all input examples
+
+                        - a list of integers or a 1D tensor, with length matching
+                            the number of examples in inputs (dim 0). Each integer
+                            is applied as the target for the corresponding example.
+
+                        For outputs with > 2 dimensions, targets can be either:
+
+                        - A single tuple, which contains #output_dims - 1
+                            elements. This target index is applied to all examples.
+
+                        - A list of tuples with length equal to the number of
+                            examples in inputs (dim 0), and each tuple containing
+                            #output_dims - 1 elements. Each tuple is applied as the
+                            target for the corresponding example.
+
+                        Default: None
             additional_forward_args (tuple, optional): If the forward function
                         requires additional arguments other than the inputs for
                         which attributions should not be computed, this argument
@@ -372,11 +392,31 @@ def attribute(
                         first dimension. It is recommended that the number of
                         samples in the baselines' tensors is larger than one.
                         Default: zero tensor for each input tensor
-            target (int, optional):  Output index for which gradient is computed
-                        (for classification cases, this is the target class).
+            target (int, tuple, tensor or list, optional):  Output indices for
+                        which gradients are computed (for classification cases,
+                        this is usually the target class).
                         If the network returns a scalar value per example,
-                        no target index is necessary. (Note: Tuples for multi
-                        -dimensional output indices will be supported soon.)
+                        no target index is necessary.
+                        For general 2D outputs, targets can be either:
+
+                        - a single integer or a tensor containing a single
+                            integer, which is applied to all input examples
+
+                        - a list of integers or a 1D tensor, with length matching
+                            the number of examples in inputs (dim 0). Each integer
+                            is applied as the target for the corresponding example.
+
+                        For outputs with > 2 dimensions, targets can be either:
+
+                        - A single tuple, which contains #output_dims - 1
+                            elements. This target index is applied to all examples.
+
+                        - A list of tuples with length equal to the number of
+                            examples in inputs (dim 0), and each tuple containing
+                            #output_dims - 1 elements. Each tuple is applied as the
+                            target for the corresponding example.
+
+                        Default: None
             additional_forward_args (tuple, optional): If the forward function
                         requires additional arguments other than the inputs for
                         which attributions should not be computed, this argument
 
@@ -98,11 +98,31 @@ def attribute(
                         corresponds to the input with the same index in the inputs
                         tuple.
                         Default: 0.0
-            target (int, optional):  Output index for which gradient is computed
-                        (for classification cases, this is the target class).
+            target (int, tuple, tensor or list, optional):  Output indices for
+                        which gradients are computed (for classification cases,
+                        this is usually the target class).
                         If the network returns a scalar value per example,
-                        no target index is necessary. (Note: Tuples for multi
-                        -dimensional output indices will be supported soon.)
+                        no target index is necessary.
+                        For general 2D outputs, targets can be either:
+
+                        - a single integer or a tensor containing a single
+                            integer, which is applied to all input examples
+
+                        - a list of integers or a 1D tensor, with length matching
+                            the number of examples in inputs (dim 0). Each integer
+                            is applied as the target for the corresponding example.
+
+                        For outputs with > 2 dimensions, targets can be either:
+
+                        - A single tuple, which contains #output_dims - 1
+                            elements. This target index is applied to all examples.
+
+                        - A list of tuples with length equal to the number of
+                            examples in inputs (dim 0), and each tuple containing
+                            #output_dims - 1 elements. Each tuple is applied as the
+                            target for the corresponding example.
+
+                        Default: None
             additional_forward_args (tuple, optional): If the forward function
                         requires additional arguments other than the inputs for
                         which attributions should not be computed, this argument
 
@@ -31,11 +31,31 @@ def attribute(self, inputs, target=None, additional_forward_args=None):
                         to the number of examples (aka batch size), and if
                         mutliple input tensors are provided, the examples must
                         be aligned appropriately.
-            target (int, optional):  Output index for which gradient is computed
-                        (for classification cases, this is the target class).
+            target (int, tuple, tensor or list, optional):  Output indices for
+                        which gradients are computed (for classification cases,
+                        this is usually the target class).
                         If the network returns a scalar value per example,
-                        no target index is necessary. (Note: Tuples for multi
-                        -dimensional output indices will be supported soon.)
+                        no target index is necessary.
+                        For general 2D outputs, targets can be either:
+
+                        - a single integer or a tensor containing a single
+                            integer, which is applied to all input examples
+
+                        - a list of integers or a 1D tensor, with length matching
+                            the number of examples in inputs (dim 0). Each integer
+                            is applied as the target for the corresponding example.
+
+                        For outputs with > 2 dimensions, targets can be either:
+
+                        - A single tuple, which contains #output_dims - 1
+                            elements. This target index is applied to all examples.
+
+                        - A list of tuples with length equal to the number of
+                            examples in inputs (dim 0), and each tuple containing
+                            #output_dims - 1 elements. Each tuple is applied as the
+                            target for the corresponding example.
+
+                        Default: None
             additional_forward_args (tuple, optional): If the forward function
                         requires additional arguments other than the inputs for
                         which attributions should not be computed, this argument
 
@@ -61,11 +61,30 @@ def attribute(
                             If inputs is a tuple of tensors, baselines must also be
                             a tuple of tensors, with matching dimensions to inputs.
                             Default: zero tensor for each input tensor
-                target (int, optional):  Output index for which gradient is computed
-                            (for classification cases, this is the target class).
+                target (int, tuple, tensor or list, optional):  Output indices for
+                            which gradients are computed (for classification cases,
+                            this is usually the target class).
                             If the network returns a scalar value per example,
-                            no target index is necessary. (Note: Tuples for multi
-                            -dimensional output indices will be supported soon.)
+                            no target index is necessary.
+                            For general 2D outputs, targets can be either:
+
+                            - a single integer or a tensor containing a single
+                                integer, which is applied to all input examples
+
+                            - a list of integers or a 1D tensor, with length matching
+                                the number of examples in inputs (dim 0). Each integer
+                                is applied as the target for the corresponding example.
+
+                            For outputs with > 2 dimensions, targets can be either:
+
+                            - A single tuple, which contains #output_dims - 1
+                                elements. This target index is applied to all examples.
+
+                            - A list of tuples with length equal to the number of
+                                examples in inputs (dim 0), and each tuple containing
+                                #output_dims - 1 elements. Each tuple is applied as the
+                                target for the corresponding example.
+
                             Default: None
                 additional_forward_args (tuple, optional): If the forward function
                             requires additional arguments other than the inputs for
 
@@ -74,11 +74,30 @@ def attribute(
                             baselines must also be a tuple of tensors, with matching
                             dimensions to inputs.
                             Default: zero tensor for each input tensor
-                target (int, optional):  Output index for which gradient is computed
-                            (for classification cases, this is the target class).
+                target (int, tuple, tensor or list, optional):  Output indices for
+                            which gradients are computed (for classification cases,
+                            this is usually the target class).
                             If the network returns a scalar value per example,
-                            no target index is necessary. (Note: Tuples for multi
-                            -dimensional output indices will be supported soon.)
+                            no target index is necessary.
+                            For general 2D outputs, targets can be either:
+
+                            - a single integer or a tensor containing a single
+                                integer, which is applied to all input examples
+
+                            - a list of integers or a 1D tensor, with length matching
+                                the number of examples in inputs (dim 0). Each integer
+                                is applied as the target for the corresponding example.
+
+                            For outputs with > 2 dimensions, targets can be either:
+
+                            - A single tuple, which contains #output_dims - 1
+                                elements. This target index is applied to all examples.
+
+                            - A list of tuples with length equal to the number of
+                                examples in inputs (dim 0), and each tuple containing
+                                #output_dims - 1 elements. Each tuple is applied as the
+                                target for the corresponding example.
+
                             Default: None
                 additional_forward_args (tuple, optional): If the forward function
                             requires additional arguments other than the inputs for
 
@@ -80,11 +80,30 @@ def attribute(
                             baselines must also be a tuple of tensors, with matching
                             dimensions to inputs.
                             Default: zero tensor for each input tensor
-                target (int, optional):  Output index for which gradient is computed
-                            (for classification cases, this is the target class).
+                target (int, tuple, tensor or list, optional):  Output indices for
+                            which gradients are computed (for classification cases,
+                            this is usually the target class).
                             If the network returns a scalar value per example,
-                            no target index is necessary. (Note: Tuples for multi
-                            -dimensional output indices will be supported soon.)
+                            no target index is necessary.
+                            For general 2D outputs, targets can be either:
+
+                            - a single integer or a tensor containing a single
+                                integer, which is applied to all input examples
+
+                            - a list of integers or a 1D tensor, with length matching
+                                the number of examples in inputs (dim 0). Each integer
+                                is applied as the target for the corresponding example.
+
+                            For outputs with > 2 dimensions, targets can be either:
+
+                            - A single tuple, which contains #output_dims - 1
+                                elements. This target index is applied to all examples.
+
+                            - A list of tuples with length equal to the number of
+                                examples in inputs (dim 0), and each tuple containing
+                                #output_dims - 1 elements. Each tuple is applied as the
+                                target for the corresponding example.
+
                             Default: None
                 additional_forward_args (tuple, optional): If the forward function
                             requires additional arguments other than the inputs for
 
@@ -40,11 +40,30 @@ def attribute(self, inputs, target=None, additional_forward_args=None):
                             that for all given input tensors, dimension 0 corresponds
                             to the number of examples, and if mutliple input tensors
                             are provided, the examples must be aligned appropriately.
-                target (int, optional):  Output index for which gradient is computed
-                            (for classification cases, this is the target class).
+                target (int, tuple, tensor or list, optional):  Output indices for
+                            which gradients are computed (for classification cases,
+                            this is usually the target class).
                             If the network returns a scalar value per example,
-                            no target index is necessary. (Note: Tuples for multi
-                            -dimensional output indices will be supported soon.)
+                            no target index is necessary.
+                            For general 2D outputs, targets can be either:
+
+                            - a single integer or a tensor containing a single
+                                integer, which is applied to all input examples
+
+                            - a list of integers or a 1D tensor, with length matching
+                                the number of examples in inputs (dim 0). Each integer
+                                is applied as the target for the corresponding example.
+
+                            For outputs with > 2 dimensions, targets can be either:
+
+                            - A single tuple, which contains #output_dims - 1
+                                elements. This target index is applied to all examples.
+
+                            - A list of tuples with length equal to the number of
+                                examples in inputs (dim 0), and each tuple containing
+                                #output_dims - 1 elements. Each tuple is applied as the
+                                target for the corresponding example.
+
                             Default: None
                 additional_forward_args (tuple, optional): If the forward function
                             requires additional arguments other than the inputs for
 
@@ -79,11 +79,30 @@ def attribute(
                             baselines must also be a tuple of tensors, with matching
                             dimensions to inputs.
                             Default: zero tensor for each input tensor
-                target (int, optional):  Output index for which gradient is computed
-                            (for classification cases, this is the target class).
+                target (int, tuple, tensor or list, optional):  Output indices for
+                            which gradients are computed (for classification cases,
+                            this is usually the target class).
                             If the network returns a scalar value per example,
-                            no target index is necessary. (Note: Tuples for multi
-                            -dimensional output indices will be supported soon.)
+                            no target index is necessary.
+                            For general 2D outputs, targets can be either:
+
+                            - a single integer or a tensor containing a single
+                                integer, which is applied to all input examples
+
+                            - a list of integers or a 1D tensor, with length matching
+                                the number of examples in inputs (dim 0). Each integer
+                                is applied as the target for the corresponding example.
+
+                            For outputs with > 2 dimensions, targets can be either:
+
+                            - A single tuple, which contains #output_dims - 1
+                                elements. This target index is applied to all examples.
+
+                            - A list of tuples with length equal to the number of
+                                examples in inputs (dim 0), and each tuple containing
+                                #output_dims - 1 elements. Each tuple is applied as the
+                                target for the corresponding example.
+
                             Default: None
                 additional_forward_args (tuple, optional): If the forward function
                             requires additional arguments other than the inputs for
 
@@ -37,11 +37,30 @@ def attribute(self, inputs, target=None, abs=True, additional_forward_args=None)
                             to the number of examples (aka batch size), and if
                             multiple input tensors are provided, the examples must
                             be aligned appropriately.
-                target (int, optional):  Output index for which gradient is computed
-                            (for classification cases, this is the target class).
+                target (int, tuple, tensor or list, optional):  Output indices for
+                            which gradients are computed (for classification cases,
+                            this is usually the target class).
                             If the network returns a scalar value per example,
-                            no target index is necessary. (Note: Tuples for multi
-                            -dimensional output indices will be supported soon.)
+                            no target index is necessary.
+                            For general 2D outputs, targets can be either:
+
+                            - a single integer or a tensor containing a single
+                                integer, which is applied to all input examples
+
+                            - a list of integers or a 1D tensor, with length matching
+                                the number of examples in inputs (dim 0). Each integer
+                                is applied as the target for the corresponding example.
+
+                            For outputs with > 2 dimensions, targets can be either:
+
+                            - A single tuple, which contains #output_dims - 1
+                                elements. This target index is applied to all examples.
+
+                            - A list of tuples with length equal to the number of
+                                examples in inputs (dim 0), and each tuple containing
+                                #output_dims - 1 elements. Each tuple is applied as the
+                                target for the corresponding example.
+
                             Default: None
                 abs (bool, optional): Returns absolute value of gradients if set
                             to True, otherwise returns the (signed) gradients if
 
@@ -83,8 +83,10 @@ def compute_gradients(
     with torch.autograd.set_grad_enabled(True):
         # runs forward pass
         output = _run_forward(forward_fn, inputs, target_ind, additional_forward_args)
-        assert output[0].numel() == 1, "Target not provided when necessary, cannot"
-        "take gradient with respect to multiple outputs."
+        assert output[0].numel() == 1, (
+            "Target not provided when necessary, cannot"
+            " take gradient with respect to multiple outputs."
+        )
         # torch.unbind(forward_out) is a list of scalar tensor tuples and
         # contains batch_size * #steps elements
         grads = torch.autograd.grad(torch.unbind(output), inputs)
@@ -262,8 +264,10 @@ def forward_hook(module, inp, out):
 
         hook = layer.register_forward_hook(forward_hook)
         output = _run_forward(forward_fn, inputs, target_ind, additional_forward_args)
-        assert output[0].numel() == 1, "Target not provided when necessary, cannot"
-        "take gradient with respect to multiple outputs."
+        assert output[0].numel() == 1, (
+            "Target not provided when necessary, cannot"
+            " take gradient with respect to multiple outputs."
+        )
         # Remove unnecessary forward hook.
         hook.remove()
 
 
@@ -10,11 +10,12 @@
 from torch import Tensor
 from torch.nn import Module
 
-PredictionScore = namedtuple("PredictionScore", "score label")
+OutputScore = namedtuple("OutputScore", "score index label")
 VisualizationOutput = namedtuple(
-    "VisualizationOutput", "feature_outputs actual predicted"
+    "VisualizationOutput", "feature_outputs actual predicted active_index"
 )
 Contribution = namedtuple("Contribution", "name percent")
+SampleCache = namedtuple("SampleCache", "inputs additional_forward_args label")
 
 
 class FilterConfig(NamedTuple):
@@ -44,6 +45,7 @@ def __init__(
         features: Union[List[BaseFeature], BaseFeature],
         dataset: Iterable[Data],
         score_func: Optional[Callable] = None,
+        use_label_for_attr: bool = True,
     ):
         if not isinstance(models, List):
             models = [models]
@@ -56,20 +58,34 @@ def __init__(
         self.features = features
         self.dataset = dataset
         self.score_func = score_func
+        self._outputs = []
         self._config = FilterConfig(steps=25, prediction="all", classes=[], count=4)
+        self._use_label_for_attr = use_label_for_attr
+
+    def _calculate_attribution_from_cache(
+        self, index: int, target: Optional[Tensor]
+    ) -> VisualizationOutput:
+        c = self._outputs[index][1]
+        return self._calculate_vis_output(
+            c.inputs, c.additional_forward_args, c.label, torch.tensor(target)
+        )
 
     def _calculate_attribution(
         self,
         net: Module,
         baselines: Optional[List[Tuple[Tensor, ...]]],
         data: Tuple[Tensor, ...],
         additional_forward_args: Optional[Tuple[Tensor, ...]],
-        label: Optional[Tensor],
+        label: Optional[Union[Tensor]],
     ) -> Tensor:
         ig = IntegratedGradients(net)
         # TODO support multiple baselines
         baseline = baselines[0] if len(baselines) > 0 else None
-        label = None if label is None or label.nelement() == 0 else label
+        label = (
+            None
+            if not self._use_label_for_attr or label is None or label.nelement() == 0
+            else label
+        )
         attr_ig = ig.attribute(
             data,
             baselines=baseline,
@@ -98,11 +114,11 @@ def render(self, blocking=False, debug=False):
 
     def _get_labels_from_scores(
         self, scores: Tensor, indices: Tensor
-    ) -> List[PredictionScore]:
+    ) -> List[OutputScore]:
         pred_scores = []
         for i in range(len(indices)):
-            score = scores[i].item()
-            pred_scores.append(PredictionScore(score, self.classes[indices[i]]))
+            score = scores[i]
+            pred_scores.append(OutputScore(score, indices[i], self.classes[indices[i]]))
         return pred_scores
 
     def _transform(
@@ -123,7 +139,7 @@ def _transform(
             transformed_inputs = transforms(transformed_inputs)
 
         if batch:
-            transformed_inputs.unsqueeze_(0)
+            transformed_inputs = transformed_inputs.unsqueeze(0)
 
         return transformed_inputs
 
@@ -141,22 +157,20 @@ def _calculate_net_contrib(self, attrs_per_input_feature: List[Tensor]):
         return net_contrib.tolist()
 
     def _predictions_matches_labels(
-        self,
-        predicted_scores: List[PredictionScore],
-        actual_labels: Union[str, List[str]],
+        self, predicted_scores: List[OutputScore], labels: Union[str, List[str]]
     ) -> bool:
         if len(predicted_scores) == 0:
             return False
 
         predicted_label = predicted_scores[0].label
 
-        if isinstance(actual_labels, List):
-            return predicted_label in actual_labels
+        if isinstance(labels, List):
+            return predicted_label in labels
 
-        return actual_labels == predicted_label
+        return labels == predicted_label
 
     def _should_keep_prediction(
-        self, predicted_scores: List[PredictionScore], actual_label: str
+        self, predicted_scores: List[OutputScore], actual_label: str
     ) -> bool:
         # filter by class
         if len(self._config.classes) != 0:
@@ -179,104 +193,117 @@ def _should_keep_prediction(
 
         return True
 
-    def _get_outputs(self) -> List[VisualizationOutput]:
-        batch_data = next(self.dataset)
+    def _calculate_vis_output(
+        self, inputs, additional_forward_args, label, target=None
+    ) -> Optional[VisualizationOutput]:
         net = self.models[0]  # TODO process multiple models
-        vis_outputs = []
 
-        for inputs, additional_forward_args, label in _batched_generator(
-            inputs=batch_data.inputs,
-            additional_forward_args=batch_data.additional_args,
-            target_ind=batch_data.labels,
-            internal_batch_size=1,  # should be 1 until we have batch label support
-        ):
-            # initialize baselines
-            baseline_transforms_len = len(self.features[0].baseline_transforms or [])
-            baselines = [
-                [None] * len(self.features) for _ in range(baseline_transforms_len)
-            ]
-            transformed_inputs = list(inputs)
-
-            for feature_i, feature in enumerate(self.features):
-                if feature.input_transforms is not None:
-                    transformed_inputs[feature_i] = self._transform(
-                        feature.input_transforms, transformed_inputs[feature_i], True
+        # initialize baselines
+        baseline_transforms_len = len(self.features[0].baseline_transforms or [])
+        baselines = [
+            [None] * len(self.features) for _ in range(baseline_transforms_len)
+        ]
+        transformed_inputs = list(inputs)
+
+        # transformed_inputs = list([i.clone() for i in inputs])
+        for feature_i, feature in enumerate(self.features):
+            if feature.input_transforms is not None:
+                transformed_inputs[feature_i] = self._transform(
+                    feature.input_transforms, transformed_inputs[feature_i], True
+                )
+            if feature.baseline_transforms is not None:
+                assert baseline_transforms_len == len(
+                    feature.baseline_transforms
+                ), "Must have same number of baselines across all features"
+
+                for baseline_i, baseline_transform in enumerate(
+                    feature.baseline_transforms
+                ):
+                    baselines[baseline_i][feature_i] = self._transform(
+                        baseline_transform, transformed_inputs[feature_i], True
                     )
-                if feature.baseline_transforms is not None:
-                    assert baseline_transforms_len == len(
-                        feature.baseline_transforms
-                    ), "Must have same number of baselines across all features"
-
-                    for baseline_i, baseline_transform in enumerate(
-                        feature.baseline_transforms
-                    ):
-                        baselines[baseline_i][feature_i] = self._transform(
-                            baseline_transform, transformed_inputs[feature_i], True
-                        )
-
-            outputs = _run_forward(
-                net, tuple(transformed_inputs), additional_forward_args
-            )
 
-            if self.score_func is not None:
-                outputs = self.score_func(outputs)
-
-            if outputs.nelement() == 1:
-                scores = outputs
-                predicted = scores.round().to(torch.int)
-            else:
-                scores, predicted = outputs.topk(min(4, outputs.shape[-1]))
-
-            scores = scores.cpu().squeeze(0)
-            predicted = predicted.cpu().squeeze(0)
-
-            actual_label = self.classes[label[0]] if label is not None else None
-            predicted_scores = self._get_labels_from_scores(scores, predicted)
-
-            # Filter based on UI configuration
-            if not self._should_keep_prediction(predicted_scores, actual_label):
-                continue
-
-            baselines = [tuple(b) for b in baselines]
-
-            # attributions are given per input*
-            # inputs given to the model are described via `self.features`
-            #
-            # *an input contains multiple features that represent it
-            #   e.g. all the pixels that describe an image is an input
-            attrs_per_input_feature = self._calculate_attribution(
-                net,
-                baselines,
-                tuple(transformed_inputs),
-                additional_forward_args,
-                label,
+        outputs = _run_forward(net, tuple(transformed_inputs), additional_forward_args)
+
+        if self.score_func is not None:
+            outputs = self.score_func(outputs)
+
+        if outputs.nelement() == 1:
+            scores = outputs
+            predicted = scores.round().to(torch.int)
+        else:
+            scores, predicted = outputs.topk(min(4, outputs.shape[-1]))
+
+        scores = scores.cpu().squeeze(0)
+        predicted = predicted.cpu().squeeze(0)
+
+        if label is not None and len(label) > 0:
+            actual_label = OutputScore(
+                score=0, index=label[0], label=self.classes[label[0]]
             )
+        else:
+            actual_label = None
 
-            net_contrib = self._calculate_net_contrib(attrs_per_input_feature)
+        predicted_scores = self._get_labels_from_scores(scores, predicted)
 
-            # the features per input given
-            features_per_input = [
-                feature.visualize(attr, data, contrib)
-                for feature, attr, data, contrib in zip(
-                    self.features, attrs_per_input_feature, inputs, net_contrib
-                )
-            ]
+        # Filter based on UI configuration
+        if not self._should_keep_prediction(predicted_scores, actual_label):
+            return None
+
+        baselines = [tuple(b) for b in baselines]
+
+        if target is None:
+            target = predicted_scores[0].index if len(predicted_scores) > 0 else None
+
+        # attributions are given per input*
+        # inputs given to the model are described via `self.features`
+        #
+        # *an input contains multiple features that represent it
+        #   e.g. all the pixels that describe an image is an input
+
+        attrs_per_input_feature = self._calculate_attribution(
+            net, baselines, tuple(transformed_inputs), additional_forward_args, target
+        )
+
+        net_contrib = self._calculate_net_contrib(attrs_per_input_feature)
 
-            output = VisualizationOutput(
-                feature_outputs=features_per_input,
-                actual=actual_label,
-                predicted=predicted_scores,
+        # the features per input given
+        features_per_input = [
+            feature.visualize(attr, data, contrib)
+            for feature, attr, data, contrib in zip(
+                self.features, attrs_per_input_feature, inputs, net_contrib
             )
+        ]
 
-            vis_outputs.append(output)
+        return VisualizationOutput(
+            feature_outputs=features_per_input,
+            actual=actual_label,
+            predicted=predicted_scores,
+            active_index=target if target is not None else actual_label.index,
+        )
+
+    def _get_outputs(self) -> List[VisualizationOutput]:
+        batch_data = next(self.dataset)
+        vis_outputs = []
+
+        for inputs, additional_forward_args, label in _batched_generator(
+            inputs=batch_data.inputs,
+            additional_forward_args=batch_data.additional_args,
+            target_ind=batch_data.labels,
+            internal_batch_size=1,  # should be 1 until we have batch label support
+        ):
+            output = self._calculate_vis_output(inputs, additional_forward_args, label)
+            if output is not None:
+                cache = SampleCache(inputs, additional_forward_args, label)
+                vis_outputs.append((output, cache))
 
         return vis_outputs
 
     def visualize(self):
-        output_list = []
-        while len(output_list) < self._config.count:
+        self._outputs = []
+        while len(self._outputs) < self._config.count:
             try:
-                output_list.extend(self._get_outputs())
+                self._outputs.extend(self._get_outputs())
             except StopIteration:
                 break
-        return output_list
+        return [o[0] for o in self._outputs]
@@ -60,8 +60,8 @@ def visualization_type(self) -> str:
         return "image"
 
     def visualize(self, attribution, data, contribution_frac) -> FeatureOutput:
-        attribution.squeeze_()
-        data.squeeze_()
+        attribution = attribution.squeeze()
+        data = data.squeeze()
         data_t = np.transpose(data.cpu().detach().numpy(), (1, 2, 0))
         attribution_t = np.transpose(
             attribution.squeeze().cpu().detach().numpy(), (1, 2, 0)
@@ -111,8 +111,8 @@ def visualization_type(self) -> str:
     def visualize(self, attribution, data, contribution_frac) -> FeatureOutput:
         text = self.visualization_transform(data)
 
-        attribution.squeeze_(0)
-        data.squeeze_(0)
+        attribution = attribution.squeeze(0)
+        data = data.squeeze(0)
         attribution = attribution.sum(dim=1)
 
         # L-Infinity norm
@@ -142,8 +142,8 @@ def visualization_type(self) -> str:
         return "general"
 
     def visualize(self, attribution, data, contribution_frac) -> FeatureOutput:
-        attribution.squeeze_(0)
-        data.squeeze_(0)
+        attribution = attribution.squeeze(0)
+        data = data.squeeze(0)
 
         # L-2 norm
         normalized_attribution = attribution / attribution.norm()
 
@@ -3,7 +3,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1" />
-    <title>Captum Visualization</title>
+    <title>Captum Insights</title>
   </head>
   <body>
     <noscript>You need to enable JavaScript to run this app.</noscript>
 
@@ -118,6 +118,7 @@ button {
   border: solid 1px #ee4c2c;
   text-align: center;
   font-weight: 600;
+  font-size: 1em;
   border-radius: 4px;
   padding: 6px 8px;
   display: inline-block;
@@ -150,13 +151,28 @@ button {
   display: block;
 }
 
+.loading {
+  margin-top: 150px;
+  position: absolute;
+  width: 100%;
+  align-items: center;
+  justify-content: center;
+  display: flex;
+}
+
 .panel {
   margin: 16px;
   padding: 24px;
   background: white;
   border-radius: 8px;
   display: flex;
   box-shadow: 0px 3px 6px 0px rgba(0, 0, 0, 0.18);
+  transition: opacity 0.2s; /* for loading */
+}
+
+.panel--loading {
+  opacity: 0.5;
+  pointer-events: none; /* disables all interactions inside panel */
 }
 
 .panel--center {
 
@@ -312,49 +312,104 @@ class Contributions extends React.Component {
   }
 }
 
+class LabelButton extends React.Component {
+  onClick = e => {
+    e.preventDefault();
+    this.props.onTargetClick(this.props.labelIndex, this.props.instance);
+  };
+
+  render() {
+    return (
+      <button
+        onClick={this.onClick}
+        className={cx({
+          btn: true,
+          "btn--solid": this.props.active,
+          "btn--outline": !this.props.active
+        })}
+      >
+        {this.props.children}
+      </button>
+    );
+  }
+}
+
 class Visualization extends React.Component {
+  constructor(props) {
+    super(props);
+    this.state = {
+      loading: false
+    };
+  }
+
+  onTargetClick = (labelIndex, instance) => {
+    this.setState({ loading: true });
+    this.props.onTargetClick(labelIndex, instance, () =>
+      this.setState({ loading: false })
+    );
+  };
+
   render() {
     const data = this.props.data;
     const features = data.feature_outputs.map(f => <Feature data={f} />);
 
     return (
-      <div className="panel panel--long">
-        <div className="panel__column">
-          <div className="panel__column__title">Predicted</div>
-          <div className="panel__column__body">
-            {data.predicted.map((p, i) => (
+      <>
+        {this.state.loading && (
+          <div className="loading">
+            <Spinner />
+          </div>
+        )}
+        <div
+          className={cx({
+            panel: true,
+            "panel--long": true,
+            "panel--loading": this.state.loading
+          })}
+        >
+          <div className="panel__column">
+            <div className="panel__column__title">Predicted</div>
+            <div className="panel__column__body">
+              {data.predicted.map(p => (
+                <div className="row row--padding">
+                  <LabelButton
+                    onTargetClick={this.onTargetClick}
+                    labelIndex={p.index}
+                    instance={this.props.instance}
+                    active={p.index === data.active_index}
+                  >
+                    {p.label} ({p.score.toFixed(3)})
+                  </LabelButton>
+                </div>
+              ))}
+            </div>
+          </div>
+          <div className="panel__column">
+            <div className="panel__column__title">Label</div>
+            <div className="panel__column__body">
               <div className="row row--padding">
-                <div
-                  className={cx({
-                    btn: true,
-                    "btn--solid": i === 0,
-                    "btn--outline": i !== 0
-                  })}
+                <LabelButton
+                  onTargetClick={this.onTargetClick}
+                  labelIndex={data.actual.index}
+                  instance={this.props.instance}
+                  active={data.actual.index === data.active_index}
                 >
-                  {p.label} ({p.score.toFixed(3)})
-                </div>
+                  {data.actual.label}
+                </LabelButton>
               </div>
-            ))}
-          </div>
-        </div>
-        <div className="panel__column">
-          <div className="panel__column__title">Label</div>
-          <div className="panel__column__body">
-            <div className="row row--padding">
-              <div className="btn btn--outline">{data.actual}</div>
             </div>
           </div>
-        </div>
-        <div className="panel__column">
-          <div className="panel__column__title">Contribution</div>
-          <div className="panel__column__body">
-            <div className="bar-chart">
-              <Contributions feature_outputs={data.feature_outputs} />
+          <div className="panel__column">
+            <div className="panel__column__title">Contribution</div>
+            <div className="panel__column__body">
+              <div className="bar-chart">
+                <Contributions feature_outputs={data.feature_outputs} />
+              </div>
             </div>
           </div>
+          <div className="panel__column panel__column--stretch">{features}</div>
         </div>
-        <div className="panel__column panel__column--stretch">{features}</div>
-      </div>
+      </>
     );
   }
 }
@@ -385,7 +440,12 @@ function Visualizations(props) {
   return (
     <div className="viz">
       {props.data.map((v, i) => (
-        <Visualization data={v} key={i} />
+        <Visualization
+          data={v}
+          instance={i}
+          key={i}
+          onTargetClick={props.onTargetClick}
+        />
       ))}
     </div>
   );
@@ -404,10 +464,8 @@ class App extends React.Component {
 
   _fetchInit = () => {
     fetch("/init")
-      .then(response => response.json())
-      .then(response => {
-        this.setState({ config: response });
-      });
+      .then(r => r.json())
+      .then(r => this.setState({ config: r }));
   };
 
   fetchData = filter_config => {
@@ -423,6 +481,23 @@ class App extends React.Component {
       .then(response => this.setState({ data: response, loading: false }));
   };
 
+  onTargetClick = (labelIndex, instance, callback) => {
+    fetch("/attribute", {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json"
+      },
+      body: JSON.stringify({ labelIndex, instance })
+    })
+      .then(response => response.json())
+      .then(response => {
+        const data = Object.assign([], this.state.data);
+        data[instance] = response;
+        this.setState({ data });
+        callback();
+      });
+  };
+
   render() {
     return (
       <div className="app">
@@ -432,7 +507,11 @@ class App extends React.Component {
           config={this.state.config}
           key={this.state.config}
         />
-        <Visualizations data={this.state.data} loading={this.state.loading} />
+        <Visualizations
+          data={this.state.data}
+          loading={this.state.loading}
+          onTargetClick={this.onTargetClick}
+        />
       </div>
     );
   }
 
@@ -4,6 +4,7 @@
 from time import sleep
 from typing import Optional
 
+from torch import Tensor
 from flask import Flask, jsonify, render_template, request
 
 app = Flask(
@@ -14,6 +15,8 @@
 
 
 def namedtuple_to_dict(obj):
+    if isinstance(obj, Tensor):
+        return obj.item()
     if hasattr(obj, "_asdict"):  # detect namedtuple
         return dict(zip(obj._fields, (namedtuple_to_dict(item) for item in obj)))
     elif isinstance(obj, str):  # iterables - strings
@@ -28,6 +31,16 @@ def namedtuple_to_dict(obj):
         return obj
 
 
+@app.route("/attribute", methods=["POST"])
+def attribute():
+    r = request.json
+    return jsonify(
+        namedtuple_to_dict(
+            visualizer._calculate_attribution_from_cache(r["instance"], r["labelIndex"])
+        )
+    )
+
+
 @app.route("/fetch", methods=["POST"])
 def fetch():
     visualizer._update_config(request.json)