add parameter shift grad (super fast)

refraction-ray · refraction-ray · commit 3c3ce72f549a · 2022-08-06T13:59:01.000+08:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,16 @@
 
 - Add alias `expps` for `expectation_ps` and `sexpps` for `sampled_expectation_ps`
 
+- Add `counts_d2s` and `counts_s2d` in quantum module to transform different representation of measurement shots results
+
+- Add vmap enhanced `parameter_shift_grad` in experimental module (API subjects to change)
+
+- Add `parameter_shift.py` script in examples
+
+### Changed
+
+- `rxx`, `ryy`, `rzz` gates now has 1/2 factor before theta consitent with `rx`, `ry`, `rz` gates. (breaking change)
+
 ## 0.3.1
 
 ### Added
diff --git a/examples/parameter_shift.py b/examples/parameter_shift.py
@@ -0,0 +1,63 @@
+"""
+Demonstration on the correctness and efficiency of parameter shift gradient implementation
+"""
+
+import numpy as np
+import tensorcircuit as tc
+from tensorcircuit import experimental as E
+
+K = tc.set_backend("tensorflow")
+
+n = 6
+m = 3
+
+
+def f1(param):
+    c = tc.Circuit(n)
+    for j in range(m):
+        for i in range(n - 1):
+            c.cnot(i, i + 1)
+        for i in range(n):
+            c.rx(i, theta=param[i, j])
+    return c.expectation_ps(y=[n // 2])
+
+
+g1f1 = K.jit(K.grad(f1))
+
+r1, ts, tr = tc.utils.benchmark(g1f1, K.ones([n, m], dtype="float32"))
+
+g2f1 = K.jit(E.parameter_shift_grad(f1))
+
+r2, ts, tr = tc.utils.benchmark(g2f1, K.ones([n, m], dtype="float32"))
+
+np.testing.assert_allclose(r1, r2, atol=1e-5)
+print("equality test passed!")
+
+# mutiple weights args version
+
+
+def f2(paramzz, paramx):
+    c = tc.Circuit(n)
+    for j in range(m):
+        for i in range(n - 1):
+            c.rzz(i, i + 1, theta=paramzz[i, j])
+        for i in range(n):
+            c.rx(i, theta=paramx[i, j])
+    return c.expectation_ps(y=[n // 2])
+
+
+g1f2 = K.jit(K.grad(f2, argnums=(0, 1)))
+
+r1, ts, tr = tc.utils.benchmark(
+    g1f2, K.ones([n, m], dtype="float32"), K.ones([n, m], dtype="float32")
+)
+
+g2f2 = K.jit(E.parameter_shift_grad(f2, argnums=(0, 1)))
+
+r2, ts, tr = tc.utils.benchmark(
+    g2f2, K.ones([n, m], dtype="float32"), K.ones([n, m], dtype="float32")
+)
+
+np.testing.assert_allclose(r1[0], r2[0], atol=1e-5)
+np.testing.assert_allclose(r1[1], r2[1], atol=1e-5)
+print("equality test passed!")
diff --git a/tensorcircuit/experimental.py b/tensorcircuit/experimental.py
@@ -5,6 +5,8 @@
 from functools import partial
 from typing import Any, Callable, Optional, Sequence, Union
 
+import numpy as np
+
 from .cons import backend, dtypestr
 
 Tensor = Any
@@ -202,3 +204,56 @@ def energy(params: Tensor) -> Tensor:
         return backend.grad(energy)(params)
 
     return wrapper
+
+
+def parameter_shift_grad(
+    f: Callable[..., Tensor],
+    argnums: Union[int, Sequence[int]] = 0,
+    jit: bool = False,
+) -> Callable[..., Tensor]:
+    """
+    similar to `grad` function but using parameter shift internally instead of AD,
+    vmap is utilized for evaluation, so the speed is still ok
+
+    :param f: quantum function with weights in and expectation out
+    :type f: Callable[..., Tensor]
+    :param argnums: label which args should be differentiated,
+        defaults to 0
+    :type argnums: Union[int, Sequence[int]], optional
+    :param jit: whether jit the original function `f` at the beginning,
+        defaults to False
+    :type jit: bool, optional
+    :return: the grad function
+    :rtype: Callable[..., Tensor]
+    """
+    if jit is True:
+        f = backend.jit(f)
+
+    if isinstance(argnums, int):
+        argnums = [argnums]
+
+    vfs = [backend.vmap(f, vectorized_argnums=i) for i in argnums]
+
+    def grad_f(*args: Any, **kws: Any) -> Any:
+        grad_values = []
+        for i in argnums:  # type: ignore
+            shape = backend.shape_tuple(args[i])
+            size = backend.sizen(args[i])
+            onehot = backend.eye(size)
+            onehot = backend.cast(onehot, args[i].dtype)
+            onehot = backend.reshape(onehot, [size] + list(shape))
+            onehot = np.pi / 2 * onehot
+            nargs = list(args)
+            arg = backend.reshape(args[i], [1] + list(shape))
+            batched_arg = backend.tile(arg, [size] + [1 for _ in shape])
+            nargs[i] = batched_arg + onehot
+            nargs2 = list(args)
+            nargs2[i] = batched_arg - onehot
+            r = (vfs[i](*nargs, **kws) - vfs[i](*nargs2, **kws)) / 2.0
+            r = backend.reshape(r, shape)
+            grad_values.append(r)
+        if len(argnums) > 1:  # type: ignore
+            return tuple(grad_values)
+        return grad_values[0]
+
+    return grad_f
diff --git a/tensorcircuit/gates.py b/tensorcircuit/gates.py
@@ -721,7 +721,9 @@ def exponential_gate(unitary: Tensor, theta: float, name: str = "none") -> Gate:
 # exp = exponential_gate
 
 
-def exponential_gate_unity(unitary: Tensor, theta: float, name: str = "none") -> Gate:
+def exponential_gate_unity(
+    unitary: Tensor, theta: float, half: bool = False, name: str = "none"
+) -> Gate:
     r"""
     Faster exponential gate directly implemented based on RHS. Only works when :math:`U^2 = I` is an identity matrix.
 
@@ -733,6 +735,9 @@ def exponential_gate_unity(unitary: Tensor, theta: float, name: str = "none") ->
     :type unitary: Tensor
     :param theta: angle in radians
     :type theta: float
+    :param half: if True, the angel theta is mutiplied by 1/2,
+        defaults to False
+    :type half: bool
     :param name: suffix of Gate name
     :type name: str, optional
     :return: Exponential Gate
@@ -745,15 +750,17 @@ def exponential_gate_unity(unitary: Tensor, theta: float, name: str = "none") ->
     i = i.reshape([2 for _ in range(n)])
     unitary = backend.reshape(unitary, [2 for _ in range(n)])
     it = array_to_tensor(i)
+    if half is True:
+        theta = theta / 2.0
     mat = backend.cos(theta) * it - 1.0j * backend.sin(theta) * unitary
     return Gate(mat, name="exp1-" + name)
 
 
 exp1_gate = exponential_gate_unity
 # exp1 = exponential_gate_unity
-rzz_gate = partial(exp1_gate, unitary=_zz_matrix)
-rxx_gate = partial(exp1_gate, unitary=_xx_matrix)
-ryy_gate = partial(exp1_gate, unitary=_yy_matrix)
+rzz_gate = partial(exp1_gate, unitary=_zz_matrix, half=True)
+rxx_gate = partial(exp1_gate, unitary=_xx_matrix, half=True)
+ryy_gate = partial(exp1_gate, unitary=_yy_matrix, half=True)
 
 
 def multicontrol_gate(unitary: Tensor, ctrl: Union[int, Sequence[int]] = 1) -> Operator:
diff --git a/tests/test_gates.py b/tests/test_gates.py
@@ -88,7 +88,7 @@ def test_rxx_gate():
     c1.ryy(0, 2, theta=0.5)
     c1.rzz(0, 1, theta=-0.5)
     c2 = tc.Circuit(3)
-    c2.exp1(0, 1, theta=1.0, unitary=tc.gates._xx_matrix)
-    c2.exp1(0, 2, theta=0.5, unitary=tc.gates._yy_matrix)
-    c2.exp1(0, 1, theta=-0.5, unitary=tc.gates._zz_matrix)
+    c2.exp1(0, 1, theta=1.0 / 2, unitary=tc.gates._xx_matrix)
+    c2.exp1(0, 2, theta=0.5 / 2, unitary=tc.gates._yy_matrix)
+    c2.exp1(0, 1, theta=-0.5 / 2, unitary=tc.gates._zz_matrix)
     np.testing.assert_allclose(c1.state(), c2.state(), atol=1e-5)