{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# QML in PyTorch"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Overview\n",
    "\n",
    "In this tutorial, we show the MNIST binary classification QML example with the same setup as [mnist_qml](mnist_qml.ipynb). This time, we use the PyTorch machine learning pipeline to build the QML model.\n",
    "Again, this note is not about the best QML practice or the best PyTorch pipeline practice, instead, it is just a demonstration of the integration between PyTorch and TensorCircuit."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import time\n",
    "import numpy as np\n",
    "import tensorflow as tf\n",
    "import torch\n",
    "\n",
    "import tensorcircuit as tc\n",
    "\n",
    "K = tc.set_backend(\"tensorflow\")\n",
    "\n",
    "# Use TensorFlow as the backend, while wrapping the quantum function in the PyTorch interface"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(torch.Size([12163, 9]), torch.Size([12163]))"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# We load and preprocess the dataset as the previous notebook using tensorflow and jax backend\n",
    "\n",
    "(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()\n",
    "x_train = x_train[..., np.newaxis] / 255.0\n",
    "\n",
    "\n",
    "def filter_pair(x, y, a, b):\n",
    "    keep = (y == a) | (y == b)\n",
    "    x, y = x[keep], y[keep]\n",
    "    y = y == a\n",
    "    return x, y\n",
    "\n",
    "\n",
    "x_train, y_train = filter_pair(x_train, y_train, 1, 5)\n",
    "x_train_small = tf.image.resize(x_train, (3, 3)).numpy()\n",
    "x_train_bin = np.array(x_train_small > 0.5, dtype=np.float32)\n",
    "x_train_bin = np.squeeze(x_train_bin).reshape([-1, 9])\n",
    "y_train_torch = torch.tensor(y_train, dtype=torch.float32)\n",
    "x_train_torch = torch.tensor(x_train_bin)\n",
    "x_train_torch.shape, y_train_torch.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Wrap Quantum Function Using ``torch_interface``"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "n = 9\n",
    "nlayers = 3\n",
    "\n",
    "# We define the quantum function,\n",
    "# note how this function is running on tensorflow\n",
    "\n",
    "\n",
    "def qpred(x, weights):\n",
    "    c = tc.Circuit(n)\n",
    "    for i in range(n):\n",
    "        c.rx(i, theta=x[i])\n",
    "    for j in range(nlayers):\n",
    "        for i in range(n - 1):\n",
    "            c.cnot(i, i + 1)\n",
    "        for i in range(n):\n",
    "            c.rx(i, theta=weights[2 * j, i])\n",
    "            c.ry(i, theta=weights[2 * j + 1, i])\n",
    "    ypred = c.expectation_ps(z=[n // 2])\n",
    "    ypred = K.real(ypred)\n",
    "    return K.sigmoid(ypred)\n",
    "\n",
    "\n",
    "# Wrap the function into pytorch form but with tensorflow speed!\n",
    "qpred_torch = tc.interfaces.torch_interface(qpred, jit=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "After we have the AD aware function in PyTorch format, we can further wrap it as a torch Module (network layer)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "class QuantumNet(torch.nn.Module):\n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "        self.q_weights = torch.nn.Parameter(torch.randn([2 * nlayers, n]))\n",
    "\n",
    "    def forward(self, inputs):\n",
    "        ypred = qpred_torch(inputs, self.q_weights)\n",
    "        return ypred"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(0.4539, grad_fn=<FunBackward>)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "net = QuantumNet()\n",
    "net(x_train_torch[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor(0.7287, grad_fn=<BinaryCrossEntropyBackward0>)\n",
      "tensor(0.5947, grad_fn=<BinaryCrossEntropyBackward0>)\n",
      "tensor(0.5804, grad_fn=<BinaryCrossEntropyBackward0>)\n",
      "tensor(0.6358, grad_fn=<BinaryCrossEntropyBackward0>)\n",
      "tensor(0.6503, grad_fn=<BinaryCrossEntropyBackward0>)\n",
      "training time per step:  0.12587213516235352\n"
     ]
    }
   ],
   "source": [
    "criterion = torch.nn.BCELoss()\n",
    "opt = torch.optim.Adam(net.parameters(), lr=1e-2)\n",
    "nepochs = 500\n",
    "nbatch = 32\n",
    "times = []\n",
    "\n",
    "for epoch in range(nepochs):\n",
    "    index = np.random.randint(low=0, high=100, size=nbatch)\n",
    "    # index = np.arange(nbatch)\n",
    "    inputs, labels = x_train_torch[index], y_train_torch[index]\n",
    "    opt.zero_grad()\n",
    "\n",
    "    with torch.set_grad_enabled(True):\n",
    "        time0 = time.time()\n",
    "        yps = []\n",
    "        for i in range(nbatch):\n",
    "            yp = net(inputs[i])\n",
    "            yps.append(yp)\n",
    "        yps = torch.stack(yps)\n",
    "        loss = criterion(\n",
    "            torch.reshape(yps, [nbatch, 1]), torch.reshape(labels, [nbatch, 1])\n",
    "        )\n",
    "        loss.backward()\n",
    "        if epoch % 100 == 0:\n",
    "            print(loss)\n",
    "        opt.step()\n",
    "        time1 = time.time()\n",
    "        times.append(time1 - time0)\n",
    "\n",
    "print(\"training time per step: \", np.mean(time1 - time0))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Batched Version\n",
    "\n",
    "Now let's try vectorized version to speed up the batch input processing. Note how intrisically, ``tf.vectorized_map`` helps in the batch pipeline."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "qpred_vmap = K.vmap(qpred, vectorized_argnums=0)\n",
    "\n",
    "# `qpred_vmap` is a tensorflow function with vectorization capacity\n",
    "\n",
    "qpred_batch = tc.interfaces.torch_interface(qpred_vmap, jit=True)\n",
    "\n",
    "# We further wrap the function as a PyTorch one"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[-6.2068e-03, -3.0100e-05, -1.0997e-02, -1.8381e-02, -9.1800e-02,\n",
      "          1.2481e-01, -6.5200e-02,  1.1176e-08,  7.4506e-09],\n",
      "        [-3.2353e-03,  3.4989e-03, -1.1344e-02, -1.6136e-02,  1.9075e-02,\n",
      "          2.1119e-02,  2.6881e-02, -1.1176e-08,  0.0000e+00],\n",
      "        [-1.1777e-02, -1.1572e-03, -5.0570e-03,  6.4838e-03, -5.5077e-02,\n",
      "         -3.4250e-02, -7.4506e-09, -1.1176e-08,  3.7253e-09],\n",
      "        [-1.4748e-02, -2.3818e-02, -4.3567e-02, -4.7879e-02,  1.2331e-01,\n",
      "          1.4314e-01,  3.7253e-09,  1.1176e-08,  3.7253e-09],\n",
      "        [-3.7253e-09,  3.7253e-09,  0.0000e+00,  0.0000e+00, -2.7574e-02,\n",
      "          7.4506e-09,  7.4506e-09, -1.1176e-08,  0.0000e+00],\n",
      "        [ 3.7253e-09,  3.7253e-09,  1.4901e-08, -7.4506e-09,  7.1655e-02,\n",
      "         -7.4506e-09,  3.7253e-09,  1.4901e-08,  0.0000e+00]])\n"
     ]
    }
   ],
   "source": [
    "# Test the AD capacity of the PyTorch function\n",
    "\n",
    "w = torch.ones([2 * nlayers, n])\n",
    "w.requires_grad_()\n",
    "with torch.set_grad_enabled(True):\n",
    "    yps = qpred_batch(x_train_torch[:3], w)\n",
    "    loss = torch.sum(yps)\n",
    "    loss.backward()\n",
    "print(w.grad)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "class QuantumNetV2(torch.nn.Module):\n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "        self.q_weights = torch.nn.Parameter(torch.randn([2 * nlayers, n]))\n",
    "\n",
    "    def forward(self, inputs):\n",
    "        ypred = qpred_batch(inputs, self.q_weights)\n",
    "        return ypred"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([0.4706, 0.4706, 0.4767], grad_fn=<FunBackward>)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "net2 = QuantumNetV2()\n",
    "net2(x_train_torch[:3])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "With the help of vmap infrastructure borrowed from TensorFlow, the performance of training is greatly improved!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor(0.6973, grad_fn=<BinaryCrossEntropyBackward0>)\n",
      "tensor(0.6421, grad_fn=<BinaryCrossEntropyBackward0>)\n",
      "tensor(0.6419, grad_fn=<BinaryCrossEntropyBackward0>)\n",
      "tensor(0.6498, grad_fn=<BinaryCrossEntropyBackward0>)\n",
      "tensor(0.6466, grad_fn=<BinaryCrossEntropyBackward0>)\n",
      "training time per step:  0.009107916531916371\n"
     ]
    }
   ],
   "source": [
    "criterion = torch.nn.BCELoss()\n",
    "opt = torch.optim.Adam(net2.parameters(), lr=1e-2)\n",
    "nepochs = 500\n",
    "nbatch = 32\n",
    "times = []\n",
    "for epoch in range(nepochs):\n",
    "    index = np.random.randint(low=0, high=100, size=nbatch)\n",
    "    # index = np.arange(nbatch)\n",
    "    inputs, labels = x_train_torch[index], y_train_torch[index]\n",
    "    opt.zero_grad()\n",
    "\n",
    "    with torch.set_grad_enabled(True):\n",
    "        time0 = time.time()\n",
    "        yps = net2(inputs)\n",
    "        loss = criterion(\n",
    "            torch.reshape(yps, [nbatch, 1]), torch.reshape(labels, [nbatch, 1])\n",
    "        )\n",
    "        loss.backward()\n",
    "        if epoch % 100 == 0:\n",
    "            print(loss)\n",
    "        opt.step()\n",
    "        time1 = time.time()\n",
    "        times.append(time1 - time0)\n",
    "print(\"training time per step: \", np.mean(times[1:]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Hybrid Model with Classical Post-processing\n",
    "\n",
    "We now build a quantum-classical hybrid machine learning model pipeline where the output measurement results are further fed into a classical fully connected layer."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 0.2839,  0.3786,  0.0158,  0.1512,  0.1395,  0.1364,  0.1403,  0.1423,\n",
       "         -0.1285],\n",
       "        [ 0.2839,  0.3786,  0.0158,  0.1512,  0.1395,  0.1364,  0.1403,  0.1423,\n",
       "         -0.1285]])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def qpreds(x, weights):\n",
    "    c = tc.Circuit(n)\n",
    "    for i in range(n):\n",
    "        c.rx(i, theta=x[i])\n",
    "    for j in range(nlayers):\n",
    "        for i in range(n - 1):\n",
    "            c.cnot(i, i + 1)\n",
    "        for i in range(n):\n",
    "            c.rx(i, theta=weights[2 * j, i])\n",
    "            c.ry(i, theta=weights[2 * j + 1, i])\n",
    "\n",
    "    return K.stack([K.real(c.expectation_ps(z=[i])) for i in range(n)])\n",
    "\n",
    "\n",
    "qpreds_vmap = K.vmap(qpreds, vectorized_argnums=0)\n",
    "qpreds_batch = tc.interfaces.torch_interface(qpreds_vmap, jit=True)\n",
    "\n",
    "qpreds_batch(x_train_torch[:2], torch.ones([2 * nlayers, n]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "class QuantumNetV3(torch.nn.Module):\n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "        self.q_weights = torch.nn.Parameter(torch.randn([2 * nlayers, n]))\n",
    "\n",
    "    def forward(self, inputs):\n",
    "        ypred = qpreds_batch(inputs, self.q_weights)\n",
    "        return ypred"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 0.2931,  0.5393, -0.0369, -0.0450,  0.0511, -0.0121,  0.0156, -0.0406,\n",
       "         -0.1330],\n",
       "        [ 0.2931,  0.5393, -0.0369, -0.0450,  0.0511, -0.0121,  0.0156, -0.0406,\n",
       "         -0.1330]], grad_fn=<FunBackward>)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "net3 = QuantumNetV3()\n",
    "net3(x_train_bin[:2])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We now build a hybrid model with the quantum layer ``net3`` and append a Linear layer behind it."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[0.5500],\n",
       "        [0.5500]], grad_fn=<SigmoidBackward0>)"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = torch.nn.Sequential(QuantumNetV3(), torch.nn.Linear(9, 1), torch.nn.Sigmoid())\n",
    "\n",
    "model(x_train_bin[:2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor(0.6460, grad_fn=<BinaryCrossEntropyBackward0>)\n",
      "tensor(0.6086, grad_fn=<BinaryCrossEntropyBackward0>)\n",
      "tensor(0.5199, grad_fn=<BinaryCrossEntropyBackward0>)\n",
      "tensor(0.5697, grad_fn=<BinaryCrossEntropyBackward0>)\n",
      "tensor(0.5248, grad_fn=<BinaryCrossEntropyBackward0>)\n",
      "training time per step:  0.020270218113381304\n"
     ]
    }
   ],
   "source": [
    "criterion = torch.nn.BCELoss()\n",
    "opt = torch.optim.Adam(model.parameters(), lr=1e-2)\n",
    "nepochs = 500\n",
    "nbatch = 32\n",
    "times = []\n",
    "for epoch in range(nepochs):\n",
    "    index = np.random.randint(low=0, high=100, size=nbatch)\n",
    "    # index = np.arange(nbatch)\n",
    "    inputs, labels = x_train_torch[index], y_train_torch[index]\n",
    "    opt.zero_grad()\n",
    "\n",
    "    with torch.set_grad_enabled(True):\n",
    "        time0 = time.time()\n",
    "        yps = model(inputs)\n",
    "        loss = criterion(\n",
    "            torch.reshape(yps, [nbatch, 1]), torch.reshape(labels, [nbatch, 1])\n",
    "        )\n",
    "        loss.backward()\n",
    "        if epoch % 100 == 0:\n",
    "            print(loss)\n",
    "        opt.step()\n",
    "        time1 = time.time()\n",
    "        times.append(time1 - time0)\n",
    "print(\"training time per step: \", np.mean(times[1:]))"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "2d121f26c836fa50684100f1e49bcafe1050f1ef27bfc6ab5222e4b5b1a90afa"
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}