add jit in torch interface

refraction-ray · refraction-ray · commit 958ae6064edc · 2021-11-27T19:32:05.000+08:00
diff --git a/check_all.sh b/check_all.sh
@@ -7,7 +7,7 @@ mypy tensorcircuit
 echo "pylint check"
 pylint tensorcircuit tests
 echo "pytest check"
-pytest --cov=tensorcircuit -vv
+pytest --cov=tensorcircuit -vv -W ignore::DeprecationWarning
 echo "sphinx check"
 cd docs && make html
-echo "all checks passed, congratulates!"
+echo "all checks passed, congratulates! 💐"
diff --git a/tensorcircuit/backends.py b/tensorcircuit/backends.py
@@ -329,7 +329,7 @@ def softmax(  # pylint: disable=unused-variable
 
         .. math ::
 
-            \\mathrm{softmax}(x) = \\frac{\exp(x_i)}{\\sum_j \\exp(x_j)}
+            \\mathrm{softmax}(x) = \\frac{\\exp(x_i)}{\\sum_j \\exp(x_j)}
 
 
         :param a: Tensor
@@ -850,7 +850,7 @@ def vectorized_value_and_grad(  # pylint: disable=unused-variable
         And if argnums=1, the gradient is like
 
         .. math::
-            g^1_i = \\frac{\\partial \sum_j f(vargs[0][j], args[1])}{\\partial args[1][i]}
+            g^1_i = \\frac{\\partial \\sum_j f(vargs[0][j], args[1])}{\\partial args[1][i]}
 
         , which is suitable for quantum machine learning scenarios, where ``f`` is the loss function,
         args[0] corresponds the input data and args[1] corresponds to the weights in the QML model.
diff --git a/tensorcircuit/cons.py b/tensorcircuit/cons.py
@@ -319,7 +319,7 @@ def _get_path_cache_friendly(
                 mapping_dict[id(e)] = i
                 i += 1
     input_sets = [set([mapping_dict[id(e)] for e in node.edges]) for node in nodes]
-    order = np.argsort(list(map(sorted, input_sets)) + [[1e10]])[:-1]  # type: ignore
+    order = np.argsort(np.array(list(map(sorted, input_sets)) + [[1e10]], dtype=object))[:-1]  # type: ignore
     # TODO(@refraction-ray): more stable and unwarning arg sorting here
     nodes_new = [nodes[i] for i in order]
     input_sets = [set([mapping_dict[id(e)] for e in node.edges]) for node in nodes_new]
diff --git a/tensorcircuit/interfaces.py b/tensorcircuit/interfaces.py
@@ -2,12 +2,10 @@
 interfaces bridging different backends
 """
 
-from typing import Any, Callable
+from typing import Any, Callable, Tuple
 
 import numpy as np
-from jax import numpy as jnp
 import torch
-import tensorflow as tf
 
 from .cons import backend
 from .backends import get_backend
@@ -19,6 +17,9 @@
 
 
 def tensor_to_numpy(t: Tensor) -> Array:
+    from jax import numpy as jnp
+    import tensorflow as tf
+
     if isinstance(t, torch.Tensor):
         return t.numpy()
     if isinstance(t, tf.Tensor) or isinstance(t, tf.Variable):
@@ -28,7 +29,7 @@ def tensor_to_numpy(t: Tensor) -> Array:
     return t
 
 
-def general_args_to_numpy(args: Any, same_pytree: bool = False) -> Any:
+def general_args_to_numpy(args: Any, same_pytree: bool = True) -> Any:
     res = []
     alone = False
     if not (isinstance(args, tuple) or isinstance(args, list)):
@@ -46,7 +47,7 @@ def general_args_to_numpy(args: Any, same_pytree: bool = False) -> Any:
 
 
 def numpy_args_to_backend(
-    args: Any, same_pytree: bool = False, dtype: Any = None, target_backend: Any = None
+    args: Any, same_pytree: bool = True, dtype: Any = None, target_backend: Any = None
 ) -> Any:
     # TODO(@refraction-ray): switch same_pytree default to True
     if target_backend is None:
@@ -82,13 +83,20 @@ def is_sequence(x: Any) -> bool:
     return False
 
 
-def torch_interface(fun: Callable[..., Any]) -> Callable[..., Any]:
+def torch_interface(fun: Callable[..., Any], jit: bool = False) -> Callable[..., Any]:
+    def vjp_fun(x: Tensor, v: Tensor) -> Tuple[Tensor, Tensor]:
+        return backend.vjp(fun, x, v)  # type: ignore
+
+    if jit is True:
+        fun = backend.jit(fun)
+        vjp_fun = backend.jit(vjp_fun)
+
     class F(torch.autograd.Function):  # type: ignore
         @staticmethod
         def forward(ctx: Any, *x: Any) -> Any:  # type: ignore
             ctx.xdtype = [xi.dtype for xi in x]
-            x = general_args_to_numpy(x, same_pytree=True)
-            x = numpy_args_to_backend(x, same_pytree=True)
+            x = general_args_to_numpy(x)
+            x = numpy_args_to_backend(x)
             y = fun(*x)
             if not is_sequence(y):
                 ctx.ydtype = [y.dtype]
@@ -99,25 +107,23 @@ def forward(ctx: Any, *x: Any) -> Any:  # type: ignore
             else:
                 ctx.x = x
             y = numpy_args_to_backend(
-                general_args_to_numpy(y, same_pytree=True),
-                same_pytree=True,
+                general_args_to_numpy(y),
                 target_backend="pytorch",
             )
             return y
 
         @staticmethod
         def backward(ctx: Any, *grad_y: Any) -> Any:
-            grad_y = general_args_to_numpy(grad_y, same_pytree=True)
+            grad_y = general_args_to_numpy(grad_y)
             grad_y = numpy_args_to_backend(
-                grad_y, dtype=[d for d in ctx.ydtype], same_pytree=True
+                grad_y, dtype=[d for d in ctx.ydtype]
             )  # backend.dtype
             if len(grad_y) == 1:
                 grad_y = grad_y[0]
-            _, g = backend.vjp(fun, ctx.x, grad_y)
+            _, g = vjp_fun(ctx.x, grad_y)
             # a redundency due to current vjp API
             r = numpy_args_to_backend(
-                general_args_to_numpy(g, same_pytree=True),
-                same_pytree=True,
+                general_args_to_numpy(g),
                 dtype=[d for d in ctx.xdtype],  # torchdtype
                 target_backend="pytorch",
             )
diff --git a/tensorcircuit/mpscircuit.py b/tensorcircuit/mpscircuit.py
@@ -35,6 +35,8 @@ def split_tensor(
     :type max_truncation_err: float, optional
     :param relative: Multiply `max_truncation_err` with the largest singular value.
     :type relative: bool, optional
+    :return: two tensors after splitting
+    :rtype: Tuple[Tensor, Tensor]
     """
     # The behavior is a little bit different from tn.split_node because it explicitly requires a center
     svd = (max_truncation_err is not None) or (max_singular_values is not None)
@@ -398,7 +400,7 @@ def from_wavefunction(
         """
         wavefunction = backend.reshape(wavefunction, (-1, 1))
         tensors: List[Tensor] = []
-        while True:
+        while True:  # not jittable
             nright = wavefunction.shape[1]
             wavefunction = backend.reshape(wavefunction, (-1, nright * 2))
             wavefunction, Q = split_tensor(
@@ -599,10 +601,10 @@ def expectation_two_gates_correlations(
         :type gate1: Gate
         :param gate2: second gate to be applied
         :type gate2: Gate
-        :param site: qubit index of the first gate
-        :type site: int
-        :param site: qubit index of the second gate
-        :type site: int
+        :param site1: qubit index of the first gate
+        :type site1: int
+        :param site2: qubit index of the second gate
+        :type site2: int
         """
         value = self._mps.measure_two_body_correlator(
             gate1.tensor, gate2.tensor, site1, [site2]
diff --git a/tests/test_interfaces.py b/tests/test_interfaces.py
@@ -81,7 +81,7 @@ def f2(paramzz, paramx):
         )
         return tc.backend.real(loss1), tc.backend.real(loss2)
 
-    f2_torch = interfaces.torch_interface(f2)
+    f2_torch = interfaces.torch_interface(f2, jit=True)
 
     paramzz = torch.ones([2, n], requires_grad=True)
     paramx = torch.ones([2, n], requires_grad=True)

Original file line number	Diff line number	Diff line change
`@@ -81,7 +81,7 @@ def f2(paramzz, paramx):`
`81`	`81`	`)`
`82`	`82`	`return tc.backend.real(loss1), tc.backend.real(loss2)`
`83`	`83`
`84`		`- f2_torch = interfaces.torch_interface(f2)`
	`84`	`+ f2_torch = interfaces.torch_interface(f2, jit=True)`
`85`	`85`
`86`	`86`	`paramzz = torch.ones([2, n], requires_grad=True)`
`87`	`87`	`paramx = torch.ones([2, n], requires_grad=True)`