diff --git a/NeuralNetwok/MNIST/processed/test.pt b/NeuralNetwok/MNIST/processed/test.pt new file mode 100644 index 0000000..a7bff97 Binary files /dev/null and b/NeuralNetwok/MNIST/processed/test.pt differ diff --git a/NeuralNetwok/MNIST/processed/training.pt b/NeuralNetwok/MNIST/processed/training.pt new file mode 100644 index 0000000..a01dc89 Binary files /dev/null and b/NeuralNetwok/MNIST/processed/training.pt differ diff --git a/NeuralNetwok/MNIST/raw/t10k-images-idx3-ubyte b/NeuralNetwok/MNIST/raw/t10k-images-idx3-ubyte new file mode 100644 index 0000000..1170b2c Binary files /dev/null and b/NeuralNetwok/MNIST/raw/t10k-images-idx3-ubyte differ diff --git a/NeuralNetwok/MNIST/raw/t10k-images-idx3-ubyte.gz b/NeuralNetwok/MNIST/raw/t10k-images-idx3-ubyte.gz new file mode 100644 index 0000000..5ace8ea Binary files /dev/null and b/NeuralNetwok/MNIST/raw/t10k-images-idx3-ubyte.gz differ diff --git a/NeuralNetwok/MNIST/raw/t10k-labels-idx1-ubyte b/NeuralNetwok/MNIST/raw/t10k-labels-idx1-ubyte new file mode 100644 index 0000000..d1c3a97 Binary files /dev/null and b/NeuralNetwok/MNIST/raw/t10k-labels-idx1-ubyte differ diff --git a/NeuralNetwok/MNIST/raw/t10k-labels-idx1-ubyte.gz b/NeuralNetwok/MNIST/raw/t10k-labels-idx1-ubyte.gz new file mode 100644 index 0000000..a7e1415 Binary files /dev/null and b/NeuralNetwok/MNIST/raw/t10k-labels-idx1-ubyte.gz differ diff --git a/NeuralNetwok/MNIST/raw/train-images-idx3-ubyte b/NeuralNetwok/MNIST/raw/train-images-idx3-ubyte new file mode 100644 index 0000000..bbce276 Binary files /dev/null and b/NeuralNetwok/MNIST/raw/train-images-idx3-ubyte differ diff --git a/NeuralNetwok/MNIST/raw/train-images-idx3-ubyte.gz b/NeuralNetwok/MNIST/raw/train-images-idx3-ubyte.gz new file mode 100644 index 0000000..b50e4b6 Binary files /dev/null and b/NeuralNetwok/MNIST/raw/train-images-idx3-ubyte.gz differ diff --git a/NeuralNetwok/MNIST/raw/train-labels-idx1-ubyte b/NeuralNetwok/MNIST/raw/train-labels-idx1-ubyte new file mode 100644 index 0000000..d6b4c5d Binary files /dev/null and b/NeuralNetwok/MNIST/raw/train-labels-idx1-ubyte differ diff --git a/NeuralNetwok/MNIST/raw/train-labels-idx1-ubyte.gz b/NeuralNetwok/MNIST/raw/train-labels-idx1-ubyte.gz new file mode 100644 index 0000000..707a576 Binary files /dev/null and b/NeuralNetwok/MNIST/raw/train-labels-idx1-ubyte.gz differ diff --git a/NeuralNetwok/Pytorch_automaticDiff.py b/NeuralNetwok/Pytorch_automaticDiff.py new file mode 100644 index 0000000..7c222e9 --- /dev/null +++ b/NeuralNetwok/Pytorch_automaticDiff.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- +""" +Created on Tue Nov 12 16:12:34 2019 + +@author: Qifan_17 +""" + +import torch + +x = torch.ones(2, 2, requires_grad=True) +print(x) + +y = x + 2 +print(y) + +print(y.grad_fn) + +z = y * y * 3 +out = z.mean() + +print(z) +print(out) + +############ +a = torch.randn(2, 2) +a = ((a * 3) / (a - 1)) +print(a.requires_grad) +a.requires_grad_(True) +print(a.requires_grad) +b = (a * a).sum() +print(b.grad_fn) + + +out.backward() +print(x.grad) + + +x = torch.randn(3, requires_grad=True) + +y = x * 2 +while y.data.norm() < 1000: + y = y * 2 + +print(y) + +v = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float) +y.backward(v) + +print(x.grad) + +print(x.requires_grad) +print((x ** 2).requires_grad) + +with torch.no_grad(): + print((x ** 2).requires_grad) \ No newline at end of file diff --git a/NeuralNetwok/Pytorch_quitstart.py b/NeuralNetwok/Pytorch_quitstart.py new file mode 100644 index 0000000..ab0a0b7 --- /dev/null +++ b/NeuralNetwok/Pytorch_quitstart.py @@ -0,0 +1,87 @@ +# -*- coding: utf-8 -*- +""" +Created on Tue Nov 12 15:34:17 2019 + +@author: Qifan_17 +""" + + +#Basic operation + +from __future__ import print_function +import torch +import numpy as np + +x = torch.empty(5, 3) +print(x) + +x = torch.rand(5, 3) +print(x) + +x = torch.zeros(5, 3, dtype=torch.long) +print(x) + +x = torch.tensor([5.5, 3]) +print(x) + + +x = x.new_ones(5, 3, dtype=torch.double) # new_* methods take in sizes +print(x) + +x = torch.randn_like(x, dtype=torch.float) # override dtype! +print(x) # result has the same size + +print(x.size()) + +y = torch.rand(5, 3) +print(x + y) + +print(torch.add(x, y)) + +result = torch.empty(5, 3) +torch.add(x, y, out=result) +print(result) + +# adds x to y +y.add_(x) +print(y) + + +print(x[:, 1]) + +x = torch.randn(4, 4) +y = x.view(16) +z = x.view(-1, 8) # the size -1 is inferred from other dimensions +print(x.size(), y.size(), z.size()) + +x = torch.randn(1) +print(x) +print(x.item()) + +###NumPy Bridge +a = torch.ones(5) +print(a) +b = a.numpy() +print(b) +a.add_(1) +print(a) +print(b) + + + +a = np.ones(5) +b = torch.from_numpy(a) +np.add(a, 1, out=a) +print(a) +print(b) + + +# let us run this cell only if CUDA is available +# We will use ``torch.device`` objects to move tensors in and out of GPU +if torch.cuda.is_available(): + device = torch.device("cuda") # a CUDA device object + y = torch.ones_like(x, device=device) # directly create a tensor on GPU + x = x.to(device) # or just use strings ``.to("cuda")`` + z = x + y + print(z) + print(z.to("cpu", torch.double)) # ``.to`` can also change dtype together! \ No newline at end of file diff --git a/NeuralNetwok/Pytorch_vae.py b/NeuralNetwok/Pytorch_vae.py new file mode 100644 index 0000000..ecced73 --- /dev/null +++ b/NeuralNetwok/Pytorch_vae.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- +""" +Created on Tue Nov 12 17:06:38 2019 + +@author: Qifan_17 +""" + +import torch +from torch.autograd import Variable +import numpy as np +import torch.nn.functional as F +import torchvision +from torchvision import transforms +import torch.optim as optim +from torch import nn +import matplotlib.pyplot as plt + + +class Normal(object): + def __init__(self, mu, sigma, log_sigma, v=None, r=None): + self.mu = mu + self.sigma = sigma # either stdev diagonal itself, or stdev diagonal from decomposition + self.logsigma = log_sigma + dim = mu.get_shape() + if v is None: + v = torch.FloatTensor(*dim) + if r is None: + r = torch.FloatTensor(*dim) + self.v = v + self.r = r + + +class Encoder(torch.nn.Module): + def __init__(self, D_in, H, D_out): + super(Encoder, self).__init__() + self.linear1 = torch.nn.Linear(D_in, H) + self.linear2 = torch.nn.Linear(H, D_out) + + def forward(self, x): + x = F.relu(self.linear1(x)) + return F.relu(self.linear2(x)) + + +class Decoder(torch.nn.Module): + def __init__(self, D_in, H, D_out): + super(Decoder, self).__init__() + self.linear1 = torch.nn.Linear(D_in, H) + self.linear2 = torch.nn.Linear(H, D_out) + + def forward(self, x): + x = F.relu(self.linear1(x)) + return F.relu(self.linear2(x)) + + +class VAE(torch.nn.Module): + latent_dim = 8 + + def __init__(self, encoder, decoder): + super(VAE, self).__init__() + self.encoder = encoder + self.decoder = decoder + self._enc_mu = torch.nn.Linear(100, 8) + self._enc_log_sigma = torch.nn.Linear(100, 8) + + def _sample_latent(self, h_enc): + """ + Return the latent normal sample z ~ N(mu, sigma^2) + """ + mu = self._enc_mu(h_enc) + log_sigma = self._enc_log_sigma(h_enc) + sigma = torch.exp(log_sigma) + std_z = torch.from_numpy(np.random.normal(0, 1, size=sigma.size())).float() + + self.z_mean = mu + self.z_sigma = sigma + + return mu + sigma * Variable(std_z, requires_grad=False) # Reparameterization trick + + def forward(self, state): + h_enc = self.encoder(state) + z = self._sample_latent(h_enc) + return self.decoder(z) + + +def latent_loss(z_mean, z_stddev): + mean_sq = z_mean * z_mean + stddev_sq = z_stddev * z_stddev + return 0.5 * torch.mean(mean_sq + stddev_sq - torch.log(stddev_sq) - 1) + + +if __name__ == '__main__': + + input_dim = 28 * 28 + batch_size = 32 + + transform = transforms.Compose( + [transforms.ToTensor()]) + mnist = torchvision.datasets.MNIST('./', download=True, transform=transform) + + dataloader = torch.utils.data.DataLoader(mnist, batch_size=batch_size, + shuffle=True, num_workers=2) + + print('Number of samples: ', len(mnist)) + + encoder = Encoder(input_dim, 100, 100) + decoder = Decoder(8, 100, input_dim) + vae = VAE(encoder, decoder) + + criterion = nn.MSELoss() + + optimizer = optim.Adam(vae.parameters(), lr=0.0001) + l = None + for epoch in range(100): + for i, data in enumerate(dataloader, 0): + inputs, classes = data + inputs, classes = Variable(inputs.resize_(batch_size, input_dim)), Variable(classes) + optimizer.zero_grad() + dec = vae(inputs) + ll = latent_loss(vae.z_mean, vae.z_sigma) + loss = criterion(dec, inputs) + ll + loss.backward() + optimizer.step() + l = loss.data[0] + print(epoch, l) + + plt.imshow(vae(inputs).data[0].numpy().reshape(28, 28), cmap='gray') + plt.show(block=True) \ No newline at end of file diff --git a/NeuralNetwok/neural_networks_tutorial.ipynb b/NeuralNetwok/neural_networks_tutorial.ipynb new file mode 100644 index 0000000..ecaab78 --- /dev/null +++ b/NeuralNetwok/neural_networks_tutorial.ipynb @@ -0,0 +1,187 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\nNeural Networks\n===============\n\nNeural networks can be constructed using the ``torch.nn`` package.\n\nNow that you had a glimpse of ``autograd``, ``nn`` depends on\n``autograd`` to define models and differentiate them.\nAn ``nn.Module`` contains layers, and a method ``forward(input)``\\ that\nreturns the ``output``.\n\nFor example, look at this network that classifies digit images:\n\n.. figure:: /_static/img/mnist.png\n :alt: convnet\n\n convnet\n\nIt is a simple feed-forward network. It takes the input, feeds it\nthrough several layers one after the other, and then finally gives the\noutput.\n\nA typical training procedure for a neural network is as follows:\n\n- Define the neural network that has some learnable parameters (or\n weights)\n- Iterate over a dataset of inputs\n- Process input through the network\n- Compute the loss (how far is the output from being correct)\n- Propagate gradients back into the network\u2019s parameters\n- Update the weights of the network, typically using a simple update rule:\n ``weight = weight - learning_rate * gradient``\n\nDefine the network\n------------------\n\nLet\u2019s define this network:\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\n\nclass Net(nn.Module):\n\n def __init__(self):\n super(Net, self).__init__()\n # 1 input image channel, 6 output channels, 3x3 square convolution\n # kernel\n self.conv1 = nn.Conv2d(1, 6, 3)\n self.conv2 = nn.Conv2d(6, 16, 3)\n # an affine operation: y = Wx + b\n self.fc1 = nn.Linear(16 * 6 * 6, 120) # 6*6 from image dimension \n self.fc2 = nn.Linear(120, 84)\n self.fc3 = nn.Linear(84, 10)\n\n def forward(self, x):\n # Max pooling over a (2, 2) window\n x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))\n # If the size is a square you can only specify a single number\n x = F.max_pool2d(F.relu(self.conv2(x)), 2)\n x = x.view(-1, self.num_flat_features(x))\n x = F.relu(self.fc1(x))\n x = F.relu(self.fc2(x))\n x = self.fc3(x)\n return x\n\n def num_flat_features(self, x):\n size = x.size()[1:] # all dimensions except the batch dimension\n num_features = 1\n for s in size:\n num_features *= s\n return num_features\n\n\nnet = Net()\nprint(net)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You just have to define the ``forward`` function, and the ``backward``\nfunction (where gradients are computed) is automatically defined for you\nusing ``autograd``.\nYou can use any of the Tensor operations in the ``forward`` function.\n\nThe learnable parameters of a model are returned by ``net.parameters()``\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "params = list(net.parameters())\nprint(len(params))\nprint(params[0].size()) # conv1's .weight" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let try a random 32x32 input.\nNote: expected input size of this net (LeNet) is 32x32. To use this net on\nMNIST dataset, please resize the images from the dataset to 32x32.\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "input = torch.randn(1, 1, 32, 32)\nout = net(input)\nprint(out)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Zero the gradient buffers of all parameters and backprops with random\ngradients:\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "net.zero_grad()\nout.backward(torch.randn(1, 10))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "

Note

``torch.nn`` only supports mini-batches. The entire ``torch.nn``\n package only supports inputs that are a mini-batch of samples, and not\n a single sample.\n\n For example, ``nn.Conv2d`` will take in a 4D Tensor of\n ``nSamples x nChannels x Height x Width``.\n\n If you have a single sample, just use ``input.unsqueeze(0)`` to add\n a fake batch dimension.

\n\nBefore proceeding further, let's recap all the classes you\u2019ve seen so far.\n\n**Recap:**\n - ``torch.Tensor`` - A *multi-dimensional array* with support for autograd\n operations like ``backward()``. Also *holds the gradient* w.r.t. the\n tensor.\n - ``nn.Module`` - Neural network module. *Convenient way of\n encapsulating parameters*, with helpers for moving them to GPU,\n exporting, loading, etc.\n - ``nn.Parameter`` - A kind of Tensor, that is *automatically\n registered as a parameter when assigned as an attribute to a*\n ``Module``.\n - ``autograd.Function`` - Implements *forward and backward definitions\n of an autograd operation*. Every ``Tensor`` operation creates at\n least a single ``Function`` node that connects to functions that\n created a ``Tensor`` and *encodes its history*.\n\n**At this point, we covered:**\n - Defining a neural network\n - Processing inputs and calling backward\n\n**Still Left:**\n - Computing the loss\n - Updating the weights of the network\n\nLoss Function\n-------------\nA loss function takes the (output, target) pair of inputs, and computes a\nvalue that estimates how far away the output is from the target.\n\nThere are several different\n`loss functions `_ under the\nnn package .\nA simple loss is: ``nn.MSELoss`` which computes the mean-squared error\nbetween the input and the target.\n\nFor example:\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "output = net(input)\ntarget = torch.randn(10) # a dummy target, for example\ntarget = target.view(1, -1) # make it the same shape as output\ncriterion = nn.MSELoss()\n\nloss = criterion(output, target)\nprint(loss)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, if you follow ``loss`` in the backward direction, using its\n``.grad_fn`` attribute, you will see a graph of computations that looks\nlike this:\n\n::\n\n input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d\n -> view -> linear -> relu -> linear -> relu -> linear\n -> MSELoss\n -> loss\n\nSo, when we call ``loss.backward()``, the whole graph is differentiated\nw.r.t. the loss, and all Tensors in the graph that has ``requires_grad=True``\nwill have their ``.grad`` Tensor accumulated with the gradient.\n\nFor illustration, let us follow a few steps backward:\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print(loss.grad_fn) # MSELoss\nprint(loss.grad_fn.next_functions[0][0]) # Linear\nprint(loss.grad_fn.next_functions[0][0].next_functions[0][0]) # ReLU" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Backprop\n--------\nTo backpropagate the error all we have to do is to ``loss.backward()``.\nYou need to clear the existing gradients though, else gradients will be\naccumulated to existing gradients.\n\n\nNow we shall call ``loss.backward()``, and have a look at conv1's bias\ngradients before and after the backward.\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "net.zero_grad() # zeroes the gradient buffers of all parameters\n\nprint('conv1.bias.grad before backward')\nprint(net.conv1.bias.grad)\n\nloss.backward()\n\nprint('conv1.bias.grad after backward')\nprint(net.conv1.bias.grad)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, we have seen how to use loss functions.\n\n**Read Later:**\n\n The neural network package contains various modules and loss functions\n that form the building blocks of deep neural networks. A full list with\n documentation is `here `_.\n\n**The only thing left to learn is:**\n\n - Updating the weights of the network\n\nUpdate the weights\n------------------\nThe simplest update rule used in practice is the Stochastic Gradient\nDescent (SGD):\n\n ``weight = weight - learning_rate * gradient``\n\nWe can implement this using simple python code:\n\n.. code:: python\n\n learning_rate = 0.01\n for f in net.parameters():\n f.data.sub_(f.grad.data * learning_rate)\n\nHowever, as you use neural networks, you want to use various different\nupdate rules such as SGD, Nesterov-SGD, Adam, RMSProp, etc.\nTo enable this, we built a small package: ``torch.optim`` that\nimplements all these methods. Using it is very simple:\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import torch.optim as optim\n\n# create your optimizer\noptimizer = optim.SGD(net.parameters(), lr=0.01)\n\n# in your training loop:\noptimizer.zero_grad() # zero the gradient buffers\noutput = net(input)\nloss = criterion(output, target)\nloss.backward()\noptimizer.step() # Does the update" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + ".. Note::\n\n Observe how gradient buffers had to be manually set to zero using\n ``optimizer.zero_grad()``. This is because gradients are accumulated\n as explained in `Backprop`_ section.\n\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file