Add derivative for cholesky_inverse (pytorch#26451)

vishwakftw · facebook-github-bot · commit c643290982bb · 2019-09-24T07:12:41.000-07:00
Summary: Changelog: - Add derivative of cholesky_inverse. The equations are derived akin to the derivative of solve methods using the technique detailed [here](https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&uact=8&ved=2ahUKEwiXrOjIyM7kAhWstlkKHRxqCDgQFjAAegQIAhAC&url=https%3A%2F%2Fpeople.maths.ox.ac.uk%2Fgilesm%2Ffiles%2FNA-08-01.pdf&usg=AOvVaw0BNISOvM_I9KjPrl0xv1R_) Pull Request resolved: pytorch#26451 Test Plan: - Added tests for cholesky_inverse in test_autograd.py Closes pytorch#4669. Differential Revision: D17548526 Pulled By: ezyang fbshipit-source-id: 51aa8b900a8dc4012b01a73d432606f216f62c9d
diff --git a/test/test_autograd.py b/test/test_autograd.py
@@ -2442,6 +2442,29 @@ def run_test(upper, dims):
         for upper, dims in product([True, False], [(3, 3), (5, 3, 3), (4, 3, 2, 2)]):
             run_test(upper, dims)
 
+    @skipIfNoLapack
+    def test_cholesky_inverse(self):
+        def _test_with_size(upper, dims):
+            # We require to create a Cholesky factor which requires that the diagonal elements are positive.
+            # Initializing too small values for the diagonal elements could cause issues when being perturbed
+            # to obtain the numerical Jacobian, thereby leading to inconsistent gradcheck
+            A = torch.randn(*dims)
+            A.diagonal().uniform_(0.1, 5.0)
+            A.requires_grad_()
+
+            def func(A, upper):
+                if upper:
+                    root = A.triu()
+                else:
+                    root = A.tril()
+                return torch.cholesky_inverse(root, upper)
+
+            gradcheck(func, [A, upper])
+            gradgradcheck(func, [A, upper])
+
+        for upper, dims in product([True, False], [(3, 3), (5, 5)]):
+            _test_with_size(upper, dims)
+
     @skipIfNoLapack
     def test_triangular_solve(self):
         def _test_with_size(A_dims, B_dims):
diff --git a/tools/autograd/derivatives.yaml b/tools/autograd/derivatives.yaml
@@ -208,7 +208,7 @@
   self, input2: cholesky_solve_backward(grad, self, input2, result, upper)
 
 - name: cholesky_inverse(Tensor self, bool upper=False) -> Tensor
-  self: not_implemented("cholesky_inverse")
+  self: cholesky_inverse_backward(grad, self, upper, result)
 
 - name: fbgemm_linear_int8_weight_fp32_activation(Tensor input, Tensor weight, Tensor packed, Tensor col_offsets, Scalar weight_scale, Scalar weight_zero_point, Tensor bias) -> Tensor
   self: not_implemented("fbgemm_linear_int8_weight_fp32_activation only supported for inference")
diff --git a/tools/autograd/templates/Functions.cpp b/tools/autograd/templates/Functions.cpp
@@ -751,6 +751,22 @@ Tensor cholesky_backward(Tensor grad, bool upper, Tensor L) {
   return grad_input.add(grad_input.transpose(-1, -2)).mul_(0.5);  // Symmetrizing the gradient
 }
 
+Tensor cholesky_inverse_backward(Tensor grad, Tensor L, bool upper, Tensor inverse) {
+  Tensor grad_L;
+  if (grad.defined()) {
+    Tensor common_term = grad + grad.transpose(-2, -1);
+    common_term = at::matmul(inverse, at::matmul(common_term, inverse));
+    if (upper) {
+      grad_L = -at::matmul(L, common_term);
+    } else {
+      grad_L = -at::matmul(common_term, L);
+    }
+  } else {
+    grad_L = at::zeros({1}, L.options()).expand_as(L);
+  }
+  return grad_L;
+}
+
 Tensor split_with_sizes_backward(const std::vector<torch::autograd::Variable> &grads,
                                  IntArrayRef split_sizes, int64_t dim, IntArrayRef sizes, const at::TensorOptions &options) {
   dim = at::maybe_wrap_dim(dim, sizes.size());