diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..723ef36 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.idea \ No newline at end of file diff --git a/DL_demo/LICENSE.md b/DL_demo/LICENSE.md new file mode 100755 index 0000000..abed9dd --- /dev/null +++ b/DL_demo/LICENSE.md @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2016 Koki Saitoh + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/DL_demo/README.md b/DL_demo/README.md new file mode 100755 index 0000000..c05b227 --- /dev/null +++ b/DL_demo/README.md @@ -0,0 +1,51 @@ +# 深度学习入门 + + + +## 文件结构 + +|文件夹名 |说明 | +|:-- |:-- | +|ch01 |第1章使用的源代码 | +|ch02 |第2章使用的源代码 | +|... |... | +|ch08 |第8章使用的源代码 | +|common |共同使用的源代码 | +|dataset |数据集用的源代码 | + + +源代码的解释请参考本书。 + +## 必要条件 +执行源代码需要按照以下软件。 + +* Python 3.x +* NumPy +* Matplotlib + +※Python的版本为Python 3。 + +## 执行方法 + +前进到各章节的文件夹,执行Python命令。 + +``` +$ cd ch01 +$ python man.py + +$ cd ../ch05 +$ python train_nueralnet.py +``` + +## 使用许可 + +本源代码使用[MIT许可协议](http://www.opensource.org/licenses/MIT)。 +无论是否为商业行为,均可自由使用。 + +## 勘误表 + +本书的勘误信息在以下网址中公开。读者可以在以下网址中查看和提交勘误。 + +http://www.ituring.com.cn/book/1921 + + diff --git a/DL_demo/__init__.py b/DL_demo/__init__.py new file mode 100644 index 0000000..13a78a5 --- /dev/null +++ b/DL_demo/__init__.py @@ -0,0 +1,14 @@ +# +#!/usr/bin/python3.6.2 +# -*- coding: utf-8 -*- +# @Time    : 2018/12/5 上午11:29 +# @Author  : Wenson +# @Desc : ============================================== +# Life is Short I Use Python!!!                      === +# If this runs wrong,don't ask me,I don't know why;  === +# If this runs right,thank god,and I don't know why. === +# Maybe the answer,my friend,is blowing in the wind. === +# ====================================================== +# @Project : Keras_start +# @FileName: __init__.py.py +# @Software: PyCharm diff --git a/DL_demo/ch01/hungry.py b/DL_demo/ch01/hungry.py new file mode 100755 index 0000000..a2cb2a8 --- /dev/null +++ b/DL_demo/ch01/hungry.py @@ -0,0 +1 @@ +print("I'm hungry!") diff --git a/DL_demo/ch01/img_show.py b/DL_demo/ch01/img_show.py new file mode 100755 index 0000000..4deeb72 --- /dev/null +++ b/DL_demo/ch01/img_show.py @@ -0,0 +1,8 @@ +# coding: utf-8 +import matplotlib.pyplot as plt +from matplotlib.image import imread + +img = imread('../dataset/lena.png') #读入图像 +plt.imshow(img) + +plt.show() \ No newline at end of file diff --git a/DL_demo/ch01/man.py b/DL_demo/ch01/man.py new file mode 100755 index 0000000..72a7587 --- /dev/null +++ b/DL_demo/ch01/man.py @@ -0,0 +1,17 @@ +# coding: utf-8 +class Man: + """示例类""" # 示例类 + + def __init__(self, name): + self.name = name + print("Initilized!") + + def hello(self): + print("Hello " + self.name + "!") + + def goodbye(self): + print("Good-bye " + self.name + "!") + +m = Man("David") +m.hello() +m.goodbye() \ No newline at end of file diff --git a/DL_demo/ch01/simple_graph.py b/DL_demo/ch01/simple_graph.py new file mode 100755 index 0000000..4ad11ae --- /dev/null +++ b/DL_demo/ch01/simple_graph.py @@ -0,0 +1,11 @@ +# coding: utf-8 +import numpy as np +import matplotlib.pyplot as plt + +# 生成数据 +x = np.arange(0, 6, 0.1) # 以0.1为单位,生成0到6的数据 +y = np.sin(x) + +# 绘制图形 +plt.plot(x, y) +plt.show() \ No newline at end of file diff --git a/DL_demo/ch01/sin_cos_graph.py b/DL_demo/ch01/sin_cos_graph.py new file mode 100755 index 0000000..4505714 --- /dev/null +++ b/DL_demo/ch01/sin_cos_graph.py @@ -0,0 +1,17 @@ +# coding: utf-8 +import numpy as np +import matplotlib.pyplot as plt + +# 生成数据 +x = np.arange(0, 6, 0.1) # 以0.1为单位,生成0到6的数据 +y1 = np.sin(x) +y2 = np.cos(x) + +# 绘制图形 +plt.plot(x, y1, label="sin") +plt.plot(x, y2, linestyle = "--", label="cos") +plt.xlabel("x") # x轴的标签 +plt.ylabel("y") # y轴的标签 +plt.title('sin & cos') +plt.legend() +plt.show() \ No newline at end of file diff --git a/DL_demo/ch01/sin_graph.py b/DL_demo/ch01/sin_graph.py new file mode 100755 index 0000000..88f5a01 --- /dev/null +++ b/DL_demo/ch01/sin_graph.py @@ -0,0 +1,11 @@ +# coding: utf-8 +import numpy as np +import matplotlib.pyplot as plt + +# 生成数据 +x = np.arange(0, 6, 0.1) +y = np.sin(x) + +# 绘制图形 +plt.plot(x, y) +plt.show() diff --git a/DL_demo/ch02/and_gate.py b/DL_demo/ch02/and_gate.py new file mode 100755 index 0000000..afe5504 --- /dev/null +++ b/DL_demo/ch02/and_gate.py @@ -0,0 +1,18 @@ +# coding: utf-8 +import numpy as np + + +def AND(x1, x2): + x = np.array([x1, x2]) + w = np.array([0.5, 0.5]) + b = -0.7 + tmp = np.sum(w*x) + b + if tmp <= 0: + return 0 + else: + return 1 + +if __name__ == '__main__': + for xs in [(0, 0), (1, 0), (0, 1), (1, 1)]: + y = AND(xs[0], xs[1]) + print(str(xs) + " -> " + str(y)) diff --git a/DL_demo/ch02/nand_gate.py b/DL_demo/ch02/nand_gate.py new file mode 100755 index 0000000..7305206 --- /dev/null +++ b/DL_demo/ch02/nand_gate.py @@ -0,0 +1,18 @@ +# coding: utf-8 +import numpy as np + + +def NAND(x1, x2): + x = np.array([x1, x2]) + w = np.array([-0.5, -0.5]) + b = 0.7 + tmp = np.sum(w*x) + b + if tmp <= 0: + return 0 + else: + return 1 + +if __name__ == '__main__': + for xs in [(0, 0), (1, 0), (0, 1), (1, 1)]: + y = NAND(xs[0], xs[1]) + print(str(xs) + " -> " + str(y)) diff --git a/DL_demo/ch02/or_gate.py b/DL_demo/ch02/or_gate.py new file mode 100755 index 0000000..ba43554 --- /dev/null +++ b/DL_demo/ch02/or_gate.py @@ -0,0 +1,18 @@ +# coding: utf-8 +import numpy as np + + +def OR(x1, x2): + x = np.array([x1, x2]) + w = np.array([0.5, 0.5]) + b = -0.2 + tmp = np.sum(w*x) + b + if tmp <= 0: + return 0 + else: + return 1 + +if __name__ == '__main__': + for xs in [(0, 0), (1, 0), (0, 1), (1, 1)]: + y = OR(xs[0], xs[1]) + print(str(xs) + " -> " + str(y)) \ No newline at end of file diff --git a/DL_demo/ch02/xor_gate.py b/DL_demo/ch02/xor_gate.py new file mode 100755 index 0000000..0a31449 --- /dev/null +++ b/DL_demo/ch02/xor_gate.py @@ -0,0 +1,16 @@ +# coding: utf-8 +from and_gate import AND +from or_gate import OR +from nand_gate import NAND + + +def XOR(x1, x2): + s1 = NAND(x1, x2) + s2 = OR(x1, x2) + y = AND(s1, s2) + return y + +if __name__ == '__main__': + for xs in [(0, 0), (1, 0), (0, 1), (1, 1)]: + y = XOR(xs[0], xs[1]) + print(str(xs) + " -> " + str(y)) \ No newline at end of file diff --git a/DL_demo/ch03/mnist_show.py b/DL_demo/ch03/mnist_show.py new file mode 100755 index 0000000..828fa8f --- /dev/null +++ b/DL_demo/ch03/mnist_show.py @@ -0,0 +1,23 @@ +# coding: utf-8 +import sys, os +sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 +import numpy as np +from dataset.mnist import load_mnist +from PIL import Image + + +def img_show(img): + pil_img = Image.fromarray(np.uint8(img)) + pil_img.show() + +(x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, normalize=False) + +img = x_train[0] +label = t_train[0] +print(label) # 5 + +print(img.shape) # (784,) +img = img.reshape(28, 28) # 把图像的形状变为原来的尺寸 +print(img.shape) # (28, 28) + +img_show(img) diff --git a/DL_demo/ch03/neuralnet_mnist.py b/DL_demo/ch03/neuralnet_mnist.py new file mode 100755 index 0000000..dee26fa --- /dev/null +++ b/DL_demo/ch03/neuralnet_mnist.py @@ -0,0 +1,44 @@ +# coding: utf-8 +import sys, os +sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 +import numpy as np +import pickle +from dataset.mnist import load_mnist +from common.functions import sigmoid, softmax + + +def get_data(): + (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, flatten=True, one_hot_label=False) + return x_test, t_test + + +def init_network(): + with open("sample_weight.pkl", 'rb') as f: + network = pickle.load(f) + return network + + +def predict(network, x): + W1, W2, W3 = network['W1'], network['W2'], network['W3'] + b1, b2, b3 = network['b1'], network['b2'], network['b3'] + + a1 = np.dot(x, W1) + b1 + z1 = sigmoid(a1) + a2 = np.dot(z1, W2) + b2 + z2 = sigmoid(a2) + a3 = np.dot(z2, W3) + b3 + y = softmax(a3) + + return y + + +x, t = get_data() +network = init_network() +accuracy_cnt = 0 +for i in range(len(x)): + y = predict(network, x[i]) + p= np.argmax(y) # 获取概率最高的元素的索引 + if p == t[i]: + accuracy_cnt += 1 + +print("Accuracy:" + str(float(accuracy_cnt) / len(x))) \ No newline at end of file diff --git a/DL_demo/ch03/neuralnet_mnist_batch.py b/DL_demo/ch03/neuralnet_mnist_batch.py new file mode 100755 index 0000000..3145414 --- /dev/null +++ b/DL_demo/ch03/neuralnet_mnist_batch.py @@ -0,0 +1,47 @@ +# coding: utf-8 +import sys, os +sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 +import numpy as np +import pickle +from dataset.mnist import load_mnist +from common.functions import sigmoid, softmax + + +def get_data(): + (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, flatten=True, one_hot_label=False) + return x_test, t_test + + +def init_network(): + with open("sample_weight.pkl", 'rb') as f: + network = pickle.load(f) + return network + + +def predict(network, x): + w1, w2, w3 = network['W1'], network['W2'], network['W3'] + b1, b2, b3 = network['b1'], network['b2'], network['b3'] + + a1 = np.dot(x, w1) + b1 + z1 = sigmoid(a1) + a2 = np.dot(z1, w2) + b2 + z2 = sigmoid(a2) + a3 = np.dot(z2, w3) + b3 + y = softmax(a3) + + return y + + +x, t = get_data() +network = init_network() + +batch_size = 100 # 批数量 +accuracy_cnt = 0 + +for i in range(0, len(x), batch_size): + x_batch = x[i:i+batch_size] + y_batch = predict(network, x_batch) + p = np.argmax(y_batch, axis=1) + accuracy_cnt += np.sum(p == t[i:i+batch_size]) + +print("Accuracy:" + str(float(accuracy_cnt) / len(x))) diff --git a/DL_demo/ch03/relu.py b/DL_demo/ch03/relu.py new file mode 100755 index 0000000..8c8d4cc --- /dev/null +++ b/DL_demo/ch03/relu.py @@ -0,0 +1,13 @@ +# coding: utf-8 +import numpy as np +import matplotlib.pylab as plt + + +def relu(x): + return np.maximum(0, x) + +x = np.arange(-5.0, 5.0, 0.1) +y = relu(x) +plt.plot(x, y) +plt.ylim(-1.0, 5.5) +plt.show() diff --git a/DL_demo/ch03/sample_weight.pkl b/DL_demo/ch03/sample_weight.pkl new file mode 100755 index 0000000..0e92475 Binary files /dev/null and b/DL_demo/ch03/sample_weight.pkl differ diff --git a/DL_demo/ch03/sig_step_compare.py b/DL_demo/ch03/sig_step_compare.py new file mode 100755 index 0000000..a4df829 --- /dev/null +++ b/DL_demo/ch03/sig_step_compare.py @@ -0,0 +1,20 @@ +# coding: utf-8 +import numpy as np +import matplotlib.pylab as plt + + +def sigmoid(x): + return 1 / (1 + np.exp(-x)) + + +def step_function(x): + return np.array(x > 0, dtype=np.int) + +x = np.arange(-5.0, 5.0, 0.1) +y1 = sigmoid(x) +y2 = step_function(x) + +plt.plot(x, y1) +plt.plot(x, y2, 'k--') +plt.ylim(-0.1, 1.1) #指定图中绘制的y轴的范围 +plt.show() diff --git a/DL_demo/ch03/sigmoid.py b/DL_demo/ch03/sigmoid.py new file mode 100755 index 0000000..f863894 --- /dev/null +++ b/DL_demo/ch03/sigmoid.py @@ -0,0 +1,13 @@ +# coding: utf-8 +import numpy as np +import matplotlib.pylab as plt + + +def sigmoid(x): + return 1 / (1 + np.exp(-x)) + +X = np.arange(-5.0, 5.0, 0.1) +Y = sigmoid(X) +plt.plot(X, Y) +plt.ylim(-0.1, 1.1) +plt.show() diff --git a/DL_demo/ch03/step_function.py b/DL_demo/ch03/step_function.py new file mode 100755 index 0000000..5f42383 --- /dev/null +++ b/DL_demo/ch03/step_function.py @@ -0,0 +1,13 @@ +# coding: utf-8 +import numpy as np +import matplotlib.pylab as plt + + +def step_function(x): + return np.array(x > 0, dtype=np.int) + +X = np.arange(-5.0, 5.0, 0.1) +Y = step_function(X) +plt.plot(X, Y) +plt.ylim(-0.1, 1.1) # 指定图中绘制的y轴的范围 +plt.show() diff --git a/DL_demo/ch04/gradient_1d.py b/DL_demo/ch04/gradient_1d.py new file mode 100755 index 0000000..378402f --- /dev/null +++ b/DL_demo/ch04/gradient_1d.py @@ -0,0 +1,31 @@ +# coding: utf-8 +import numpy as np +import matplotlib.pylab as plt + + +def numerical_diff(f, x): + h = 1e-4 # 0.0001 + return (f(x+h) - f(x-h)) / (2*h) + + +def function_1(x): + return 0.01*x**2 + 0.1*x + + +def tangent_line(f, x): + d = numerical_diff(f, x) + print(d) + y = f(x) - d*x + return lambda t: d*t + y + +x = np.arange(0.0, 20.0, 0.1) +y = function_1(x) +plt.xlabel("x") +plt.ylabel("f(x)") + +tf = tangent_line(function_1, 5) +y2 = tf(x) + +plt.plot(x, y) +plt.plot(x, y2) +plt.show() diff --git a/DL_demo/ch04/gradient_2d.py b/DL_demo/ch04/gradient_2d.py new file mode 100755 index 0000000..80cc1c8 --- /dev/null +++ b/DL_demo/ch04/gradient_2d.py @@ -0,0 +1,71 @@ +# coding: utf-8 +# cf.http://d.hatena.ne.jp/white_wheels/20100327/p3 +import numpy as np +import matplotlib.pylab as plt +from mpl_toolkits.mplot3d import Axes3D + + +def _numerical_gradient_no_batch(f, x): + h = 1e-4 # 0.0001 + grad = np.zeros_like(x) + + for idx in range(x.size): + tmp_val = x[idx] + x[idx] = float(tmp_val) + h + fxh1 = f(x) # f(x+h) + + x[idx] = tmp_val - h + fxh2 = f(x) # f(x-h) + grad[idx] = (fxh1 - fxh2) / (2*h) + + x[idx] = tmp_val # 还原值 + + return grad + + +def numerical_gradient(f, X): + if X.ndim == 1: + return _numerical_gradient_no_batch(f, X) + else: + grad = np.zeros_like(X) + + for idx, x in enumerate(X): + grad[idx] = _numerical_gradient_no_batch(f, x) + + return grad + + +def function_2(x): + if x.ndim == 1: + return np.sum(x**2) + else: + return np.sum(x**2, axis=1) + + +def tangent_line(f, x): + d = numerical_gradient(f, x) + print(d) + y = f(x) - d*x + return lambda t: d*t + y + +if __name__ == '__main__': + x0 = np.arange(-2, 2.5, 0.25) + x1 = np.arange(-2, 2.5, 0.25) + X, Y = np.meshgrid(x0, x1) + + X = X.flatten() + Y = Y.flatten() + + grad = numerical_gradient(function_2, np.array([X, Y]) ) + print(grad) + + plt.figure() + plt.quiver(X, Y, -grad[0], -grad[1], angles="xy",color="#666666")#,headwidth=10,scale=40,color="#444444") + plt.xlim([-2, 2]) + plt.ylim([-2, 2]) + plt.xlabel('x0') + plt.ylabel('x1') + plt.grid() + plt.legend() + plt.draw() + plt.show() \ No newline at end of file diff --git a/DL_demo/ch04/gradient_method.py b/DL_demo/ch04/gradient_method.py new file mode 100755 index 0000000..a7fb030 --- /dev/null +++ b/DL_demo/ch04/gradient_method.py @@ -0,0 +1,37 @@ +# coding: utf-8 +import numpy as np +import matplotlib.pylab as plt +from gradient_2d import numerical_gradient + + +def gradient_descent(f, init_x, lr=0.01, step_num=100): + x = init_x + x_history = [] + + for i in range(step_num): + x_history.append( x.copy() ) + + grad = numerical_gradient(f, x) + x -= lr * grad + + return x, np.array(x_history) + + +def function_2(x): + return x[0]**2 + x[1]**2 + +init_x = np.array([-3.0, 4.0]) + +lr = 0.1 +step_num = 20 +x, x_history = gradient_descent(function_2, init_x, lr=lr, step_num=step_num) + +plt.plot( [-5, 5], [0,0], '--b') +plt.plot( [0,0], [-5, 5], '--b') +plt.plot(x_history[:,0], x_history[:,1], 'o') + +plt.xlim(-3.5, 3.5) +plt.ylim(-4.5, 4.5) +plt.xlabel("X0") +plt.ylabel("X1") +plt.show() diff --git a/DL_demo/ch04/gradient_simplenet.py b/DL_demo/ch04/gradient_simplenet.py new file mode 100755 index 0000000..35f6dd5 --- /dev/null +++ b/DL_demo/ch04/gradient_simplenet.py @@ -0,0 +1,31 @@ +# coding: utf-8 +import sys, os +sys.path.append(os.pardir) # 为了导入父目录中的文件而进行的设定 +import numpy as np +from common.functions import softmax, cross_entropy_error +from common.gradient import numerical_gradient + + +class simpleNet: + def __init__(self): + self.W = np.random.randn(2,3) + + def predict(self, x): + return np.dot(x, self.W) + + def loss(self, x, t): + z = self.predict(x) + y = softmax(z) + loss = cross_entropy_error(y, t) + + return loss + +x = np.array([0.6, 0.9]) +t = np.array([0, 0, 1]) + +net = simpleNet() + +f = lambda w: net.loss(x, t) +dW = numerical_gradient(f, net.W) + +print(dW) diff --git a/DL_demo/ch04/train_neuralnet.py b/DL_demo/ch04/train_neuralnet.py new file mode 100755 index 0000000..838d21a --- /dev/null +++ b/DL_demo/ch04/train_neuralnet.py @@ -0,0 +1,57 @@ +# coding: utf-8 +import sys, os +sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 +import numpy as np +import matplotlib.pyplot as plt +from dataset.mnist import load_mnist +from two_layer_net import TwoLayerNet + +# 读入数据 +(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) + +network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) + +iters_num = 10000 # 适当设定循环的次数 +train_size = x_train.shape[0] +batch_size = 100 +learning_rate = 0.1 + +train_loss_list = [] +train_acc_list = [] +test_acc_list = [] + +iter_per_epoch = max(train_size / batch_size, 1) + +for i in range(iters_num): + batch_mask = np.random.choice(train_size, batch_size) + x_batch = x_train[batch_mask] + t_batch = t_train[batch_mask] + + # 计算梯度 + #grad = network.numerical_gradient(x_batch, t_batch) + grad = network.gradient(x_batch, t_batch) + + # 更新参数 + for key in ('W1', 'b1', 'W2', 'b2'): + network.params[key] -= learning_rate * grad[key] + + loss = network.loss(x_batch, t_batch) + train_loss_list.append(loss) + + if i % iter_per_epoch == 0: + train_acc = network.accuracy(x_train, t_train) + test_acc = network.accuracy(x_test, t_test) + train_acc_list.append(train_acc) + test_acc_list.append(test_acc) + print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc)) + +# 绘制图形 +markers = {'train': 'o', 'test': 's'} +x = np.arange(len(train_acc_list)) +plt.plot(x, train_acc_list, label='train acc') +plt.plot(x, test_acc_list, label='test acc', linestyle='--') +plt.xlabel("epochs") +plt.ylabel("accuracy") +plt.ylim(0, 1.0) +plt.legend(loc='lower right') +plt.show() \ No newline at end of file diff --git a/DL_demo/ch04/two_layer_net.py b/DL_demo/ch04/two_layer_net.py new file mode 100755 index 0000000..c1b243c --- /dev/null +++ b/DL_demo/ch04/two_layer_net.py @@ -0,0 +1,78 @@ +# coding: utf-8 +import sys, os +sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 +from common.functions import * +from common.gradient import numerical_gradient + + +class TwoLayerNet: + + def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): + # 初始化权重 + self.params = {} + self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size) + self.params['b1'] = np.zeros(hidden_size) + self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size) + self.params['b2'] = np.zeros(output_size) + + def predict(self, x): + W1, W2 = self.params['W1'], self.params['W2'] + b1, b2 = self.params['b1'], self.params['b2'] + + a1 = np.dot(x, W1) + b1 + z1 = sigmoid(a1) + a2 = np.dot(z1, W2) + b2 + y = softmax(a2) + + return y + + # x:输入数据, t:监督数据 + def loss(self, x, t): + y = self.predict(x) + + return cross_entropy_error(y, t) + + def accuracy(self, x, t): + y = self.predict(x) + y = np.argmax(y, axis=1) + t = np.argmax(t, axis=1) + + accuracy = np.sum(y == t) / float(x.shape[0]) + return accuracy + + # x:输入数据, t:监督数据 + def numerical_gradient(self, x, t): + loss_W = lambda W: self.loss(x, t) + + grads = {} + grads['W1'] = numerical_gradient(loss_W, self.params['W1']) + grads['b1'] = numerical_gradient(loss_W, self.params['b1']) + grads['W2'] = numerical_gradient(loss_W, self.params['W2']) + grads['b2'] = numerical_gradient(loss_W, self.params['b2']) + + return grads + + def gradient(self, x, t): + W1, W2 = self.params['W1'], self.params['W2'] + b1, b2 = self.params['b1'], self.params['b2'] + grads = {} + + batch_num = x.shape[0] + + # forward + a1 = np.dot(x, W1) + b1 + z1 = sigmoid(a1) + a2 = np.dot(z1, W2) + b2 + y = softmax(a2) + + # backward + dy = (y - t) / batch_num + grads['W2'] = np.dot(z1.T, dy) + grads['b2'] = np.sum(dy, axis=0) + + da1 = np.dot(dy, W2.T) + dz1 = sigmoid_grad(a1) * da1 + grads['W1'] = np.dot(x.T, dz1) + grads['b1'] = np.sum(dz1, axis=0) + + return grads \ No newline at end of file diff --git a/DL_demo/ch05/buy_apple.py b/DL_demo/ch05/buy_apple.py new file mode 100755 index 0000000..8334755 --- /dev/null +++ b/DL_demo/ch05/buy_apple.py @@ -0,0 +1,24 @@ +# coding: utf-8 +from layer_naive import * + + +apple = 100 +apple_num = 2 +tax = 1.1 + +mul_apple_layer = MulLayer() +mul_tax_layer = MulLayer() + +# forward +apple_price = mul_apple_layer.forward(apple, apple_num) +price = mul_tax_layer.forward(apple_price, tax) + +# backward +dprice = 1 +dapple_price, dtax = mul_tax_layer.backward(dprice) +dapple, dapple_num = mul_apple_layer.backward(dapple_price) + +print("price:", int(price)) +print("dApple:", dapple) +print("dApple_num:", int(dapple_num)) +print("dTax:", dtax) diff --git a/DL_demo/ch05/buy_apple_orange.py b/DL_demo/ch05/buy_apple_orange.py new file mode 100755 index 0000000..9bdc6af --- /dev/null +++ b/DL_demo/ch05/buy_apple_orange.py @@ -0,0 +1,34 @@ +# coding: utf-8 +from layer_naive import * + +apple = 100 +apple_num = 2 +orange = 150 +orange_num = 3 +tax = 1.1 + +# layer +mul_apple_layer = MulLayer() +mul_orange_layer = MulLayer() +add_apple_orange_layer = AddLayer() +mul_tax_layer = MulLayer() + +# forward +apple_price = mul_apple_layer.forward(apple, apple_num) # (1) +orange_price = mul_orange_layer.forward(orange, orange_num) # (2) +all_price = add_apple_orange_layer.forward(apple_price, orange_price) # (3) +price = mul_tax_layer.forward(all_price, tax) # (4) + +# backward +dprice = 1 +dall_price, dtax = mul_tax_layer.backward(dprice) # (4) +dapple_price, dorange_price = add_apple_orange_layer.backward(dall_price) # (3) +dorange, dorange_num = mul_orange_layer.backward(dorange_price) # (2) +dapple, dapple_num = mul_apple_layer.backward(dapple_price) # (1) + +print("price:", int(price)) +print("dApple:", dapple) +print("dApple_num:", int(dapple_num)) +print("dOrange:", dorange) +print("dOrange_num:", int(dorange_num)) +print("dTax:", dtax) diff --git a/DL_demo/ch05/gradient_check.py b/DL_demo/ch05/gradient_check.py new file mode 100755 index 0000000..1471800 --- /dev/null +++ b/DL_demo/ch05/gradient_check.py @@ -0,0 +1,21 @@ +# coding: utf-8 +import sys, os +sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 +import numpy as np +from dataset.mnist import load_mnist +from two_layer_net import TwoLayerNet + +# 读入数据 +(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) + +network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) + +x_batch = x_train[:3] +t_batch = t_train[:3] + +grad_numerical = network.numerical_gradient(x_batch, t_batch) +grad_backprop = network.gradient(x_batch, t_batch) + +for key in grad_numerical.keys(): + diff = np.average( np.abs(grad_backprop[key] - grad_numerical[key]) ) + print(key + ":" + str(diff)) \ No newline at end of file diff --git a/DL_demo/ch05/layer_naive.py b/DL_demo/ch05/layer_naive.py new file mode 100755 index 0000000..922336a --- /dev/null +++ b/DL_demo/ch05/layer_naive.py @@ -0,0 +1,36 @@ +# coding: utf-8 + + +class MulLayer: + def __init__(self): + self.x = None + self.y = None + + def forward(self, x, y): + self.x = x + self.y = y + out = x * y + + return out + + def backward(self, dout): + dx = dout * self.y + dy = dout * self.x + + return dx, dy + + +class AddLayer: + def __init__(self): + pass + + def forward(self, x, y): + out = x + y + + return out + + def backward(self, dout): + dx = dout * 1 + dy = dout * 1 + + return dx, dy diff --git a/DL_demo/ch05/train_neuralnet.py b/DL_demo/ch05/train_neuralnet.py new file mode 100755 index 0000000..3693057 --- /dev/null +++ b/DL_demo/ch05/train_neuralnet.py @@ -0,0 +1,46 @@ +# coding: utf-8 +import sys, os +sys.path.append(os.pardir) + +import numpy as np +from dataset.mnist import load_mnist +from two_layer_net import TwoLayerNet + +# 读入数据 +(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) + +network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) + +iters_num = 10000 +train_size = x_train.shape[0] +batch_size = 100 +learning_rate = 0.1 + +train_loss_list = [] +train_acc_list = [] +test_acc_list = [] + +iter_per_epoch = max(train_size / batch_size, 1) + +for i in range(iters_num): + batch_mask = np.random.choice(train_size, batch_size) + x_batch = x_train[batch_mask] + t_batch = t_train[batch_mask] + + # 梯度 + #grad = network.numerical_gradient(x_batch, t_batch) + grad = network.gradient(x_batch, t_batch) + + # 更新 + for key in ('W1', 'b1', 'W2', 'b2'): + network.params[key] -= learning_rate * grad[key] + + loss = network.loss(x_batch, t_batch) + train_loss_list.append(loss) + + if i % iter_per_epoch == 0: + train_acc = network.accuracy(x_train, t_train) + test_acc = network.accuracy(x_test, t_test) + train_acc_list.append(train_acc) + test_acc_list.append(test_acc) + print(train_acc, test_acc) diff --git a/DL_demo/ch05/two_layer_net.py b/DL_demo/ch05/two_layer_net.py new file mode 100755 index 0000000..589ccf4 --- /dev/null +++ b/DL_demo/ch05/two_layer_net.py @@ -0,0 +1,77 @@ +# coding: utf-8 +import sys, os +sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 +import numpy as np +from common.layers import * +from common.gradient import numerical_gradient +from collections import OrderedDict + + +class TwoLayerNet: + + def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01): + # 初始化权重 + self.params = {} + self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size) + self.params['b1'] = np.zeros(hidden_size) + self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size) + self.params['b2'] = np.zeros(output_size) + + # 生成层 + self.layers = OrderedDict() + self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) + self.layers['Relu1'] = Relu() + self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) + + self.lastLayer = SoftmaxWithLoss() + + def predict(self, x): + for layer in self.layers.values(): + x = layer.forward(x) + + return x + + # x:输入数据, t:监督数据 + def loss(self, x, t): + y = self.predict(x) + return self.lastLayer.forward(y, t) + + def accuracy(self, x, t): + y = self.predict(x) + y = np.argmax(y, axis=1) + if t.ndim != 1 : t = np.argmax(t, axis=1) + + accuracy = np.sum(y == t) / float(x.shape[0]) + return accuracy + + # x:输入数据, t:监督数据 + def numerical_gradient(self, x, t): + loss_W = lambda W: self.loss(x, t) + + grads = {} + grads['W1'] = numerical_gradient(loss_W, self.params['W1']) + grads['b1'] = numerical_gradient(loss_W, self.params['b1']) + grads['W2'] = numerical_gradient(loss_W, self.params['W2']) + grads['b2'] = numerical_gradient(loss_W, self.params['b2']) + + return grads + + def gradient(self, x, t): + # forward + self.loss(x, t) + + # backward + dout = 1 + dout = self.lastLayer.backward(dout) + + layers = list(self.layers.values()) + layers.reverse() + for layer in layers: + dout = layer.backward(dout) + + # 设定 + grads = {} + grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers['Affine1'].db + grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers['Affine2'].db + + return grads diff --git a/DL_demo/ch06/batch_norm_gradient_check.py b/DL_demo/ch06/batch_norm_gradient_check.py new file mode 100755 index 0000000..33c5a86 --- /dev/null +++ b/DL_demo/ch06/batch_norm_gradient_check.py @@ -0,0 +1,23 @@ +# coding: utf-8 +import sys, os +sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 +import numpy as np +from dataset.mnist import load_mnist +from common.multi_layer_net_extend import MultiLayerNetExtend + +# 读入数据 +(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) + +network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100], output_size=10, + use_batchnorm=True) + +x_batch = x_train[:1] +t_batch = t_train[:1] + +grad_backprop = network.gradient(x_batch, t_batch) +grad_numerical = network.numerical_gradient(x_batch, t_batch) + + +for key in grad_numerical.keys(): + diff = np.average( np.abs(grad_backprop[key] - grad_numerical[key]) ) + print(key + ":" + str(diff)) \ No newline at end of file diff --git a/DL_demo/ch06/batch_norm_test.py b/DL_demo/ch06/batch_norm_test.py new file mode 100755 index 0000000..d08d446 --- /dev/null +++ b/DL_demo/ch06/batch_norm_test.py @@ -0,0 +1,87 @@ +# coding: utf-8 +import sys, os +sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 +import numpy as np +import matplotlib.pyplot as plt +from dataset.mnist import load_mnist +from common.multi_layer_net_extend import MultiLayerNetExtend +from common.optimizer import SGD, Adam + +(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) + +# 减少学习数据 +x_train = x_train[:1000] +t_train = t_train[:1000] + +max_epochs = 20 +train_size = x_train.shape[0] +batch_size = 100 +learning_rate = 0.01 + + +def __train(weight_init_std): + bn_network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, + weight_init_std=weight_init_std, use_batchnorm=True) + network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, + weight_init_std=weight_init_std) + optimizer = SGD(lr=learning_rate) + + train_acc_list = [] + bn_train_acc_list = [] + + iter_per_epoch = max(train_size / batch_size, 1) + epoch_cnt = 0 + + for i in range(1000000000): + batch_mask = np.random.choice(train_size, batch_size) + x_batch = x_train[batch_mask] + t_batch = t_train[batch_mask] + + for _network in (bn_network, network): + grads = _network.gradient(x_batch, t_batch) + optimizer.update(_network.params, grads) + + if i % iter_per_epoch == 0: + train_acc = network.accuracy(x_train, t_train) + bn_train_acc = bn_network.accuracy(x_train, t_train) + train_acc_list.append(train_acc) + bn_train_acc_list.append(bn_train_acc) + + print("epoch:" + str(epoch_cnt) + " | " + str(train_acc) + " - " + str(bn_train_acc)) + + epoch_cnt += 1 + if epoch_cnt >= max_epochs: + break + + return train_acc_list, bn_train_acc_list + + +# 3.绘制图形========== +weight_scale_list = np.logspace(0, -4, num=16) +x = np.arange(max_epochs) + +for i, w in enumerate(weight_scale_list): + print( "============== " + str(i+1) + "/16" + " ==============") + train_acc_list, bn_train_acc_list = __train(w) + + plt.subplot(4,4,i+1) + plt.title("W:" + str(w)) + if i == 15: + plt.plot(x, bn_train_acc_list, label='Batch Normalization', markevery=2) + plt.plot(x, train_acc_list, linestyle = "--", label='Normal(without BatchNorm)', markevery=2) + else: + plt.plot(x, bn_train_acc_list, markevery=2) + plt.plot(x, train_acc_list, linestyle="--", markevery=2) + + plt.ylim(0, 1.0) + if i % 4: + plt.yticks([]) + else: + plt.ylabel("accuracy") + if i < 12: + plt.xticks([]) + else: + plt.xlabel("epochs") + plt.legend(loc='lower right') + +plt.show() \ No newline at end of file diff --git a/DL_demo/ch06/hyperparameter_optimization.py b/DL_demo/ch06/hyperparameter_optimization.py new file mode 100755 index 0000000..4347ea5 --- /dev/null +++ b/DL_demo/ch06/hyperparameter_optimization.py @@ -0,0 +1,77 @@ +# coding: utf-8 +import sys, os +sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 +import numpy as np +import matplotlib.pyplot as plt +from dataset.mnist import load_mnist +from common.multi_layer_net import MultiLayerNet +from common.util import shuffle_dataset +from common.trainer import Trainer + +(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) + +# 为了实现高速化,减少训练数据 +x_train = x_train[:500] +t_train = t_train[:500] + +# 分割验证数据 +validation_rate = 0.20 +validation_num = x_train.shape[0] * validation_rate +x_train, t_train = shuffle_dataset(x_train, t_train) +x_val = x_train[:validation_num] +t_val = t_train[:validation_num] +x_train = x_train[validation_num:] +t_train = t_train[validation_num:] + + +def __train(lr, weight_decay, epocs=50): + network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], + output_size=10, weight_decay_lambda=weight_decay) + trainer = Trainer(network, x_train, t_train, x_val, t_val, + epochs=epocs, mini_batch_size=100, + optimizer='sgd', optimizer_param={'lr': lr}, verbose=False) + trainer.train() + + return trainer.test_acc_list, trainer.train_acc_list + + +# 超参数的随机搜索====================================== +optimization_trial = 100 +results_val = {} +results_train = {} +for _ in range(optimization_trial): + # 指定搜索的超参数的范围=============== + weight_decay = 10 ** np.random.uniform(-8, -4) + lr = 10 ** np.random.uniform(-6, -2) + # ================================================ + + val_acc_list, train_acc_list = __train(lr, weight_decay) + print("val acc:" + str(val_acc_list[-1]) + " | lr:" + str(lr) + ", weight decay:" + str(weight_decay)) + key = "lr:" + str(lr) + ", weight decay:" + str(weight_decay) + results_val[key] = val_acc_list + results_train[key] = train_acc_list + +# 绘制图形======================================================== +print("=========== Hyper-Parameter Optimization Result ===========") +graph_draw_num = 20 +col_num = 5 +row_num = int(np.ceil(graph_draw_num / col_num)) +i = 0 + +for key, val_acc_list in sorted(results_val.items(), key=lambda x:x[1][-1], reverse=True): + print("Best-" + str(i+1) + "(val acc:" + str(val_acc_list[-1]) + ") | " + key) + + plt.subplot(row_num, col_num, i+1) + plt.title("Best-" + str(i+1)) + plt.ylim(0.0, 1.0) + if i % 5: plt.yticks([]) + plt.xticks([]) + x = np.arange(len(val_acc_list)) + plt.plot(x, val_acc_list) + plt.plot(x, results_train[key], "--") + i += 1 + + if i >= graph_draw_num: + break + +plt.show() diff --git a/DL_demo/ch06/optimizer_compare_mnist.py b/DL_demo/ch06/optimizer_compare_mnist.py new file mode 100755 index 0000000..58f5f7e --- /dev/null +++ b/DL_demo/ch06/optimizer_compare_mnist.py @@ -0,0 +1,66 @@ +# coding: utf-8 +import os +import sys +sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 +import matplotlib.pyplot as plt +from dataset.mnist import load_mnist +from common.util import smooth_curve +from common.multi_layer_net import MultiLayerNet +from common.optimizer import * + + +# 0:读入MNIST数据========== +(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) + +train_size = x_train.shape[0] +batch_size = 128 +max_iterations = 2000 + + +# 1:进行实验的设置========== +optimizers = {} +optimizers['SGD'] = SGD() +optimizers['Momentum'] = Momentum() +optimizers['AdaGrad'] = AdaGrad() +optimizers['Adam'] = Adam() +#optimizers['RMSprop'] = RMSprop() + +networks = {} +train_loss = {} +for key in optimizers.keys(): + networks[key] = MultiLayerNet( + input_size=784, hidden_size_list=[100, 100, 100, 100], + output_size=10) + train_loss[key] = [] + + +# 2:开始训练========== +for i in range(max_iterations): + batch_mask = np.random.choice(train_size, batch_size) + x_batch = x_train[batch_mask] + t_batch = t_train[batch_mask] + + for key in optimizers.keys(): + grads = networks[key].gradient(x_batch, t_batch) + optimizers[key].update(networks[key].params, grads) + + loss = networks[key].loss(x_batch, t_batch) + train_loss[key].append(loss) + + if i % 100 == 0: + print( "===========" + "iteration:" + str(i) + "===========") + for key in optimizers.keys(): + loss = networks[key].loss(x_batch, t_batch) + print(key + ":" + str(loss)) + + +# 3.绘制图形========== +markers = {"SGD": "o", "Momentum": "x", "AdaGrad": "s", "Adam": "D"} +x = np.arange(max_iterations) +for key in optimizers.keys(): + plt.plot(x, smooth_curve(train_loss[key]), marker=markers[key], markevery=100, label=key) +plt.xlabel("iterations") +plt.ylabel("loss") +plt.ylim(0, 1) +plt.legend() +plt.show() diff --git a/DL_demo/ch06/optimizer_compare_naive.py b/DL_demo/ch06/optimizer_compare_naive.py new file mode 100755 index 0000000..abe6d36 --- /dev/null +++ b/DL_demo/ch06/optimizer_compare_naive.py @@ -0,0 +1,70 @@ +# coding: utf-8 +import sys, os +sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 +import numpy as np +import matplotlib.pyplot as plt +from collections import OrderedDict +from common.optimizer import * + + +def f(x, y): + return x**2 / 20.0 + y**2 + + +def df(x, y): + return x / 10.0, 2.0*y + +init_pos = (-7.0, 2.0) +params = {} +params['x'], params['y'] = init_pos[0], init_pos[1] +grads = {} +grads['x'], grads['y'] = 0, 0 + + +optimizers = OrderedDict() +optimizers["SGD"] = SGD(lr=0.95) +optimizers["Momentum"] = Momentum(lr=0.1) +optimizers["AdaGrad"] = AdaGrad(lr=1.5) +optimizers["Adam"] = Adam(lr=0.3) + +idx = 1 + +for key in optimizers: + optimizer = optimizers[key] + x_history = [] + y_history = [] + params['x'], params['y'] = init_pos[0], init_pos[1] + + for i in range(30): + x_history.append(params['x']) + y_history.append(params['y']) + + grads['x'], grads['y'] = df(params['x'], params['y']) + optimizer.update(params, grads) + + + x = np.arange(-10, 10, 0.01) + y = np.arange(-5, 5, 0.01) + + X, Y = np.meshgrid(x, y) + Z = f(X, Y) + + # for simple contour line + mask = Z > 7 + Z[mask] = 0 + + # plot + plt.subplot(2, 2, idx) + idx += 1 + plt.plot(x_history, y_history, 'o-', color="red") + plt.contour(X, Y, Z) + plt.ylim(-10, 10) + plt.xlim(-10, 10) + plt.plot(0, 0, '+') + #colorbar() + #spring() + plt.title(key) + plt.xlabel("x") + plt.ylabel("y") + +plt.show() \ No newline at end of file diff --git a/DL_demo/ch06/overfit_dropout.py b/DL_demo/ch06/overfit_dropout.py new file mode 100755 index 0000000..c09bb2d --- /dev/null +++ b/DL_demo/ch06/overfit_dropout.py @@ -0,0 +1,40 @@ +# coding: utf-8 +import os +import sys +sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 +import numpy as np +import matplotlib.pyplot as plt +from dataset.mnist import load_mnist +from common.multi_layer_net_extend import MultiLayerNetExtend +from common.trainer import Trainer + +(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) + +# 为了再现过拟合,减少学习数据 +x_train = x_train[:300] +t_train = t_train[:300] + +# 设定是否使用Dropuout,以及比例 ======================== +use_dropout = True # 不使用Dropout的情况下为False +dropout_ratio = 0.2 +# ==================================================== + +network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], + output_size=10, use_dropout=use_dropout, dropout_ration=dropout_ratio) +trainer = Trainer(network, x_train, t_train, x_test, t_test, + epochs=301, mini_batch_size=100, + optimizer='sgd', optimizer_param={'lr': 0.01}, verbose=True) +trainer.train() + +train_acc_list, test_acc_list = trainer.train_acc_list, trainer.test_acc_list + +# 绘制图形========== +markers = {'train': 'o', 'test': 's'} +x = np.arange(len(train_acc_list)) +plt.plot(x, train_acc_list, marker='o', label='train', markevery=10) +plt.plot(x, test_acc_list, marker='s', label='test', markevery=10) +plt.xlabel("epochs") +plt.ylabel("accuracy") +plt.ylim(0, 1.0) +plt.legend(loc='lower right') +plt.show() \ No newline at end of file diff --git a/DL_demo/ch06/overfit_weight_decay.py b/DL_demo/ch06/overfit_weight_decay.py new file mode 100755 index 0000000..92c5a7b --- /dev/null +++ b/DL_demo/ch06/overfit_weight_decay.py @@ -0,0 +1,68 @@ +# coding: utf-8 +import os +import sys + +sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 +import numpy as np +import matplotlib.pyplot as plt +from dataset.mnist import load_mnist +from common.multi_layer_net import MultiLayerNet +from common.optimizer import SGD + +(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) + +# 为了再现过拟合,减少学习数据 +x_train = x_train[:300] +t_train = t_train[:300] + +# weight decay(权值衰减)的设定 ======================= +#weight_decay_lambda = 0 # 不使用权值衰减的情况 +weight_decay_lambda = 0.1 +# ==================================================== + +network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, + weight_decay_lambda=weight_decay_lambda) +optimizer = SGD(lr=0.01) + +max_epochs = 201 +train_size = x_train.shape[0] +batch_size = 100 + +train_loss_list = [] +train_acc_list = [] +test_acc_list = [] + +iter_per_epoch = max(train_size / batch_size, 1) +epoch_cnt = 0 + +for i in range(1000000000): + batch_mask = np.random.choice(train_size, batch_size) + x_batch = x_train[batch_mask] + t_batch = t_train[batch_mask] + + grads = network.gradient(x_batch, t_batch) + optimizer.update(network.params, grads) + + if i % iter_per_epoch == 0: + train_acc = network.accuracy(x_train, t_train) + test_acc = network.accuracy(x_test, t_test) + train_acc_list.append(train_acc) + test_acc_list.append(test_acc) + + print("epoch:" + str(epoch_cnt) + ", train acc:" + str(train_acc) + ", test acc:" + str(test_acc)) + + epoch_cnt += 1 + if epoch_cnt >= max_epochs: + break + + +# 3.绘制图形========== +markers = {'train': 'o', 'test': 's'} +x = np.arange(max_epochs) +plt.plot(x, train_acc_list, marker='o', label='train', markevery=10) +plt.plot(x, test_acc_list, marker='s', label='test', markevery=10) +plt.xlabel("epochs") +plt.ylabel("accuracy") +plt.ylim(0, 1.0) +plt.legend(loc='lower right') +plt.show() \ No newline at end of file diff --git a/DL_demo/ch06/weight_init_activation_histogram.py b/DL_demo/ch06/weight_init_activation_histogram.py new file mode 100755 index 0000000..933cf83 --- /dev/null +++ b/DL_demo/ch06/weight_init_activation_histogram.py @@ -0,0 +1,53 @@ +# coding: utf-8 +import numpy as np +import matplotlib.pyplot as plt + + +def sigmoid(x): + return 1 / (1 + np.exp(-x)) + + +def ReLU(x): + return np.maximum(0, x) + + +def tanh(x): + return np.tanh(x) + +input_data = np.random.randn(1000, 100) # 1000个数据 +node_num = 100 # 各隐藏层的节点(神经元)数 +hidden_layer_size = 5 # 隐藏层有5层 +activations = {} # 激活值的结果保存在这里 + +x = input_data + +for i in range(hidden_layer_size): + if i != 0: + x = activations[i-1] + + # 改变初始值进行实验! + w = np.random.randn(node_num, node_num) * 1 + # w = np.random.randn(node_num, node_num) * 0.01 + # w = np.random.randn(node_num, node_num) * np.sqrt(1.0 / node_num) + # w = np.random.randn(node_num, node_num) * np.sqrt(2.0 / node_num) + + + a = np.dot(x, w) + + + # 将激活函数的种类也改变,来进行实验! + z = sigmoid(a) + # z = ReLU(a) + # z = tanh(a) + + activations[i] = z + +# 绘制直方图 +for i, a in activations.items(): + plt.subplot(1, len(activations), i+1) + plt.title(str(i+1) + "-layer") + if i != 0: plt.yticks([], []) + # plt.xlim(0.1, 1) + # plt.ylim(0, 7000) + plt.hist(a.flatten(), 30, range=(0,1)) +plt.show() diff --git a/DL_demo/ch06/weight_init_compare.py b/DL_demo/ch06/weight_init_compare.py new file mode 100755 index 0000000..cf15609 --- /dev/null +++ b/DL_demo/ch06/weight_init_compare.py @@ -0,0 +1,63 @@ +# coding: utf-8 +import os +import sys + +sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 +import numpy as np +import matplotlib.pyplot as plt +from dataset.mnist import load_mnist +from common.util import smooth_curve +from common.multi_layer_net import MultiLayerNet +from common.optimizer import SGD + + +# 0:读入MNIST数据========== +(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) + +train_size = x_train.shape[0] +batch_size = 128 +max_iterations = 2000 + + +# 1:进行实验的设置========== +weight_init_types = {'std=0.01': 0.01, 'Xavier': 'sigmoid', 'He': 'relu'} +optimizer = SGD(lr=0.01) + +networks = {} +train_loss = {} +for key, weight_type in weight_init_types.items(): + networks[key] = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100], + output_size=10, weight_init_std=weight_type) + train_loss[key] = [] + + +# 2:开始训练========== +for i in range(max_iterations): + batch_mask = np.random.choice(train_size, batch_size) + x_batch = x_train[batch_mask] + t_batch = t_train[batch_mask] + + for key in weight_init_types.keys(): + grads = networks[key].gradient(x_batch, t_batch) + optimizer.update(networks[key].params, grads) + + loss = networks[key].loss(x_batch, t_batch) + train_loss[key].append(loss) + + if i % 100 == 0: + print("===========" + "iteration:" + str(i) + "===========") + for key in weight_init_types.keys(): + loss = networks[key].loss(x_batch, t_batch) + print(key + ":" + str(loss)) + + +# 3.绘制图形========== +markers = {'std=0.01': 'o', 'Xavier': 's', 'He': 'D'} +x = np.arange(max_iterations) +for key in weight_init_types.keys(): + plt.plot(x, smooth_curve(train_loss[key]), marker=markers[key], markevery=100, label=key) +plt.xlabel("iterations") +plt.ylabel("loss") +plt.ylim(0, 2.5) +plt.legend() +plt.show() \ No newline at end of file diff --git a/DL_demo/ch07/apply_filter.py b/DL_demo/ch07/apply_filter.py new file mode 100755 index 0000000..488f208 --- /dev/null +++ b/DL_demo/ch07/apply_filter.py @@ -0,0 +1,54 @@ +# coding: utf-8 +import sys, os +sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 +import numpy as np +import matplotlib.pyplot as plt +from simple_convnet import SimpleConvNet +from matplotlib.image import imread +from common.layers import Convolution + +def filter_show(filters, nx=4, show_num=16): + """ + c.f. https://gist.github.com/aidiary/07d530d5e08011832b12#file-draw_weight-py + """ + FN, C, FH, FW = filters.shape + ny = int(np.ceil(show_num / nx)) + + fig = plt.figure() + fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05) + + for i in range(show_num): + ax = fig.add_subplot(4, 4, i+1, xticks=[], yticks=[]) + ax.imshow(filters[i, 0], cmap=plt.cm.gray_r, interpolation='nearest') + + +network = SimpleConvNet(input_dim=(1,28,28), + conv_param = {'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1}, + hidden_size=100, output_size=10, weight_init_std=0.01) + +# 学习后的权重 +network.load_params("params.pkl") + +filter_show(network.params['W1'], 16) + +img = imread('../dataset/lena_gray.png') +img = img.reshape(1, 1, *img.shape) + +fig = plt.figure() + +w_idx = 1 + +for i in range(16): + w = network.params['W1'][i] + b = 0 # network.params['b1'][i] + + w = w.reshape(1, *w.shape) + #b = b.reshape(1, *b.shape) + conv_layer = Convolution(w, b) + out = conv_layer.forward(img) + out = out.reshape(out.shape[2], out.shape[3]) + + ax = fig.add_subplot(4, 4, i+1, xticks=[], yticks=[]) + ax.imshow(out, cmap=plt.cm.gray_r, interpolation='nearest') + +plt.show() \ No newline at end of file diff --git a/DL_demo/ch07/gradient_check.py b/DL_demo/ch07/gradient_check.py new file mode 100755 index 0000000..9d5cdff --- /dev/null +++ b/DL_demo/ch07/gradient_check.py @@ -0,0 +1,16 @@ +# coding: utf-8 +import numpy as np +from simple_convnet import SimpleConvNet + +network = SimpleConvNet(input_dim=(1,10, 10), + conv_param = {'filter_num':10, 'filter_size':3, 'pad':0, 'stride':1}, + hidden_size=10, output_size=10, weight_init_std=0.01) + +X = np.random.rand(100).reshape((1, 1, 10, 10)) +T = np.array([1]).reshape((1,1)) + +grad_num = network.numerical_gradient(X, T) +grad = network.gradient(X, T) + +for key, val in grad_num.items(): + print(key, np.abs(grad_num[key] - grad[key]).mean()) \ No newline at end of file diff --git a/DL_demo/ch07/params.pkl b/DL_demo/ch07/params.pkl new file mode 100755 index 0000000..7497eed Binary files /dev/null and b/DL_demo/ch07/params.pkl differ diff --git a/DL_demo/ch07/simple_convnet.py b/DL_demo/ch07/simple_convnet.py new file mode 100755 index 0000000..af0651b --- /dev/null +++ b/DL_demo/ch07/simple_convnet.py @@ -0,0 +1,160 @@ +# coding: utf-8 +import sys, os +sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 +import pickle +import numpy as np +from collections import OrderedDict +from common.layers import * +from common.gradient import numerical_gradient + + +class SimpleConvNet: + """简单的ConvNet + + conv - relu - pool - affine - relu - affine - softmax + + Parameters + ---------- + input_size : 输入大小(MNIST的情况下为784) + hidden_size_list : 隐藏层的神经元数量的列表(e.g. [100, 100, 100]) + output_size : 输出大小(MNIST的情况下为10) + activation : 'relu' or 'sigmoid' + weight_init_std : 指定权重的标准差(e.g. 0.01) + 指定'relu'或'he'的情况下设定“He的初始值” + 指定'sigmoid'或'xavier'的情况下设定“Xavier的初始值” + """ + def __init__(self, input_dim=(1, 28, 28), + conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1}, + hidden_size=100, output_size=10, weight_init_std=0.01): + filter_num = conv_param['filter_num'] + filter_size = conv_param['filter_size'] + filter_pad = conv_param['pad'] + filter_stride = conv_param['stride'] + input_size = input_dim[1] + conv_output_size = (input_size - filter_size + 2*filter_pad) / filter_stride + 1 + pool_output_size = int(filter_num * (conv_output_size/2) * (conv_output_size/2)) + + # 初始化权重 + self.params = {} + self.params['W1'] = weight_init_std * \ + np.random.randn(filter_num, input_dim[0], filter_size, filter_size) + self.params['b1'] = np.zeros(filter_num) + self.params['W2'] = weight_init_std * \ + np.random.randn(pool_output_size, hidden_size) + self.params['b2'] = np.zeros(hidden_size) + self.params['W3'] = weight_init_std * \ + np.random.randn(hidden_size, output_size) + self.params['b3'] = np.zeros(output_size) + + # 生成层 + self.layers = OrderedDict() + self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'], + conv_param['stride'], conv_param['pad']) + self.layers['Relu1'] = Relu() + self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2) + self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2']) + self.layers['Relu2'] = Relu() + self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3']) + + self.last_layer = SoftmaxWithLoss() + + def predict(self, x): + for layer in self.layers.values(): + x = layer.forward(x) + + return x + + def loss(self, x, t): + """求损失函数 + 参数x是输入数据、t是教师标签 + """ + y = self.predict(x) + return self.last_layer.forward(y, t) + + def accuracy(self, x, t, batch_size=100): + if t.ndim != 1 : t = np.argmax(t, axis=1) + + acc = 0.0 + + for i in range(int(x.shape[0] / batch_size)): + tx = x[i*batch_size:(i+1)*batch_size] + tt = t[i*batch_size:(i+1)*batch_size] + y = self.predict(tx) + y = np.argmax(y, axis=1) + acc += np.sum(y == tt) + + return acc / x.shape[0] + + def numerical_gradient(self, x, t): + """求梯度(数值微分) + + Parameters + ---------- + x : 输入数据 + t : 教师标签 + + Returns + ------- + 具有各层的梯度的字典变量 + grads['W1']、grads['W2']、...是各层的权重 + grads['b1']、grads['b2']、...是各层的偏置 + """ + loss_w = lambda w: self.loss(x, t) + + grads = {} + for idx in (1, 2, 3): + grads['W' + str(idx)] = numerical_gradient(loss_w, self.params['W' + str(idx)]) + grads['b' + str(idx)] = numerical_gradient(loss_w, self.params['b' + str(idx)]) + + return grads + + def gradient(self, x, t): + """求梯度(误差反向传播法) + + Parameters + ---------- + x : 输入数据 + t : 教师标签 + + Returns + ------- + 具有各层的梯度的字典变量 + grads['W1']、grads['W2']、...是各层的权重 + grads['b1']、grads['b2']、...是各层的偏置 + """ + # forward + self.loss(x, t) + + # backward + dout = 1 + dout = self.last_layer.backward(dout) + + layers = list(self.layers.values()) + layers.reverse() + for layer in layers: + dout = layer.backward(dout) + + # 设定 + grads = {} + grads['W1'], grads['b1'] = self.layers['Conv1'].dW, self.layers['Conv1'].db + grads['W2'], grads['b2'] = self.layers['Affine1'].dW, self.layers['Affine1'].db + grads['W3'], grads['b3'] = self.layers['Affine2'].dW, self.layers['Affine2'].db + + return grads + + def save_params(self, file_name="params.pkl"): + params = {} + for key, val in self.params.items(): + params[key] = val + with open(file_name, 'wb') as f: + pickle.dump(params, f) + + def load_params(self, file_name="params.pkl"): + with open(file_name, 'rb') as f: + params = pickle.load(f) + for key, val in params.items(): + self.params[key] = val + + for i, key in enumerate(['Conv1', 'Affine1', 'Affine2']): + self.layers[key].W = self.params['W' + str(i+1)] + self.layers[key].b = self.params['b' + str(i+1)] \ No newline at end of file diff --git a/DL_demo/ch07/train_convnet.py b/DL_demo/ch07/train_convnet.py new file mode 100755 index 0000000..2596a9c --- /dev/null +++ b/DL_demo/ch07/train_convnet.py @@ -0,0 +1,42 @@ +# coding: utf-8 +import sys, os +sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 +import numpy as np +import matplotlib.pyplot as plt +from dataset.mnist import load_mnist +from simple_convnet import SimpleConvNet +from common.trainer import Trainer + +# 读入数据 +(x_train, t_train), (x_test, t_test) = load_mnist(flatten=False) + +# 处理花费时间较长的情况下减少数据 +#x_train, t_train = x_train[:5000], t_train[:5000] +#x_test, t_test = x_test[:1000], t_test[:1000] + +max_epochs = 20 + +network = SimpleConvNet(input_dim=(1,28,28), + conv_param = {'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1}, + hidden_size=100, output_size=10, weight_init_std=0.01) + +trainer = Trainer(network, x_train, t_train, x_test, t_test, + epochs=max_epochs, mini_batch_size=100, + optimizer='Adam', optimizer_param={'lr': 0.001}, + evaluate_sample_num_per_epoch=1000) +trainer.train() + +# 保存参数 +network.save_params("params.pkl") +print("Saved Network Parameters!") + +# 绘制图形 +markers = {'train': 'o', 'test': 's'} +x = np.arange(max_epochs) +plt.plot(x, trainer.train_acc_list, marker='o', label='train', markevery=2) +plt.plot(x, trainer.test_acc_list, marker='s', label='test', markevery=2) +plt.xlabel("epochs") +plt.ylabel("accuracy") +plt.ylim(0, 1.0) +plt.legend(loc='lower right') +plt.show() diff --git a/DL_demo/ch07/visualize_filter.py b/DL_demo/ch07/visualize_filter.py new file mode 100755 index 0000000..da53839 --- /dev/null +++ b/DL_demo/ch07/visualize_filter.py @@ -0,0 +1,28 @@ +# coding: utf-8 +import numpy as np +import matplotlib.pyplot as plt +from simple_convnet import SimpleConvNet + +def filter_show(filters, nx=8, margin=3, scale=10): + """ + c.f. https://gist.github.com/aidiary/07d530d5e08011832b12#file-draw_weight-py + """ + FN, C, FH, FW = filters.shape + ny = int(np.ceil(FN / nx)) + + fig = plt.figure() + fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05) + + for i in range(FN): + ax = fig.add_subplot(ny, nx, i+1, xticks=[], yticks=[]) + ax.imshow(filters[i, 0], cmap=plt.cm.gray_r, interpolation='nearest') + plt.show() + + +network = SimpleConvNet() +# 随机进行初始化后的权重 +filter_show(network.params['W1']) + +# 学习后的权重 +network.load_params("params.pkl") +filter_show(network.params['W1']) \ No newline at end of file diff --git a/DL_demo/ch08/awesome_net.py b/DL_demo/ch08/awesome_net.py new file mode 100755 index 0000000..f23f101 --- /dev/null +++ b/DL_demo/ch08/awesome_net.py @@ -0,0 +1 @@ +# Create your awesome net!! \ No newline at end of file diff --git a/DL_demo/ch08/deep_convnet.py b/DL_demo/ch08/deep_convnet.py new file mode 100755 index 0000000..d974c7b --- /dev/null +++ b/DL_demo/ch08/deep_convnet.py @@ -0,0 +1,136 @@ +# coding: utf-8 +import sys, os +sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 +import pickle +import numpy as np +from collections import OrderedDict +from common.layers import * + + +class DeepConvNet: + """识别率为99%以上的高精度的ConvNet + + 网络结构如下所示 + conv - relu - conv- relu - pool - + conv - relu - conv- relu - pool - + conv - relu - conv- relu - pool - + affine - relu - dropout - affine - dropout - softmax + """ + def __init__(self, input_dim=(1, 28, 28), + conv_param_1 = {'filter_num':16, 'filter_size':3, 'pad':1, 'stride':1}, + conv_param_2 = {'filter_num':16, 'filter_size':3, 'pad':1, 'stride':1}, + conv_param_3 = {'filter_num':32, 'filter_size':3, 'pad':1, 'stride':1}, + conv_param_4 = {'filter_num':32, 'filter_size':3, 'pad':2, 'stride':1}, + conv_param_5 = {'filter_num':64, 'filter_size':3, 'pad':1, 'stride':1}, + conv_param_6 = {'filter_num':64, 'filter_size':3, 'pad':1, 'stride':1}, + hidden_size=50, output_size=10): + # 初始化权重=========== + # 各层的神经元平均与前一层的几个神经元有连接(TODO:自动计算) + pre_node_nums = np.array([1*3*3, 16*3*3, 16*3*3, 32*3*3, 32*3*3, 64*3*3, 64*4*4, hidden_size]) + wight_init_scales = np.sqrt(2.0 / pre_node_nums) # 使用ReLU的情况下推荐的初始值 + + self.params = {} + pre_channel_num = input_dim[0] + for idx, conv_param in enumerate([conv_param_1, conv_param_2, conv_param_3, conv_param_4, conv_param_5, conv_param_6]): + self.params['W' + str(idx+1)] = wight_init_scales[idx] * np.random.randn(conv_param['filter_num'], pre_channel_num, conv_param['filter_size'], conv_param['filter_size']) + self.params['b' + str(idx+1)] = np.zeros(conv_param['filter_num']) + pre_channel_num = conv_param['filter_num'] + self.params['W7'] = wight_init_scales[6] * np.random.randn(64*4*4, hidden_size) + self.params['b7'] = np.zeros(hidden_size) + self.params['W8'] = wight_init_scales[7] * np.random.randn(hidden_size, output_size) + self.params['b8'] = np.zeros(output_size) + + # 生成层=========== + self.layers = [] + self.layers.append(Convolution(self.params['W1'], self.params['b1'], + conv_param_1['stride'], conv_param_1['pad'])) + self.layers.append(Relu()) + self.layers.append(Convolution(self.params['W2'], self.params['b2'], + conv_param_2['stride'], conv_param_2['pad'])) + self.layers.append(Relu()) + self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2)) + self.layers.append(Convolution(self.params['W3'], self.params['b3'], + conv_param_3['stride'], conv_param_3['pad'])) + self.layers.append(Relu()) + self.layers.append(Convolution(self.params['W4'], self.params['b4'], + conv_param_4['stride'], conv_param_4['pad'])) + self.layers.append(Relu()) + self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2)) + self.layers.append(Convolution(self.params['W5'], self.params['b5'], + conv_param_5['stride'], conv_param_5['pad'])) + self.layers.append(Relu()) + self.layers.append(Convolution(self.params['W6'], self.params['b6'], + conv_param_6['stride'], conv_param_6['pad'])) + self.layers.append(Relu()) + self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2)) + self.layers.append(Affine(self.params['W7'], self.params['b7'])) + self.layers.append(Relu()) + self.layers.append(Dropout(0.5)) + self.layers.append(Affine(self.params['W8'], self.params['b8'])) + self.layers.append(Dropout(0.5)) + + self.last_layer = SoftmaxWithLoss() + + def predict(self, x, train_flg=False): + for layer in self.layers: + if isinstance(layer, Dropout): + x = layer.forward(x, train_flg) + else: + x = layer.forward(x) + return x + + def loss(self, x, t): + y = self.predict(x, train_flg=True) + return self.last_layer.forward(y, t) + + def accuracy(self, x, t, batch_size=100): + if t.ndim != 1 : t = np.argmax(t, axis=1) + + acc = 0.0 + + for i in range(int(x.shape[0] / batch_size)): + tx = x[i*batch_size:(i+1)*batch_size] + tt = t[i*batch_size:(i+1)*batch_size] + y = self.predict(tx, train_flg=False) + y = np.argmax(y, axis=1) + acc += np.sum(y == tt) + + return acc / x.shape[0] + + def gradient(self, x, t): + # forward + self.loss(x, t) + + # backward + dout = 1 + dout = self.last_layer.backward(dout) + + tmp_layers = self.layers.copy() + tmp_layers.reverse() + for layer in tmp_layers: + dout = layer.backward(dout) + + # 设定 + grads = {} + for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)): + grads['W' + str(i+1)] = self.layers[layer_idx].dW + grads['b' + str(i+1)] = self.layers[layer_idx].db + + return grads + + def save_params(self, file_name="params.pkl"): + params = {} + for key, val in self.params.items(): + params[key] = val + with open(file_name, 'wb') as f: + pickle.dump(params, f) + + def load_params(self, file_name="params.pkl"): + with open(file_name, 'rb') as f: + params = pickle.load(f) + for key, val in params.items(): + self.params[key] = val + + for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)): + self.layers[layer_idx].W = self.params['W' + str(i+1)] + self.layers[layer_idx].b = self.params['b' + str(i+1)] diff --git a/DL_demo/ch08/deep_convnet_params.pkl b/DL_demo/ch08/deep_convnet_params.pkl new file mode 100755 index 0000000..7be9d9e Binary files /dev/null and b/DL_demo/ch08/deep_convnet_params.pkl differ diff --git a/DL_demo/ch08/half_float_network.py b/DL_demo/ch08/half_float_network.py new file mode 100755 index 0000000..f12c0ab --- /dev/null +++ b/DL_demo/ch08/half_float_network.py @@ -0,0 +1,28 @@ +# coding: utf-8 +import sys, os +sys.path.append(os.pardir) # 为了导入父目录而进行的设定 +import numpy as np +import matplotlib.pyplot as plt +from deep_convnet import DeepConvNet +from dataset.mnist import load_mnist + + +(x_train, t_train), (x_test, t_test) = load_mnist(flatten=False) + +network = DeepConvNet() +network.load_params("deep_convnet_params.pkl") + +sampled = 10000 # 为了实现高速化 +x_test = x_test[:sampled] +t_test = t_test[:sampled] + +print("caluculate accuracy (float64) ... ") +print(network.accuracy(x_test, t_test)) + +# 转换为float16型 +x_test = x_test.astype(np.float16) +for param in network.params.values(): + param[...] = param.astype(np.float16) + +print("caluculate accuracy (float16) ... ") +print(network.accuracy(x_test, t_test)) diff --git a/DL_demo/ch08/misclassified_mnist.py b/DL_demo/ch08/misclassified_mnist.py new file mode 100755 index 0000000..8e911a8 --- /dev/null +++ b/DL_demo/ch08/misclassified_mnist.py @@ -0,0 +1,60 @@ +# coding: utf-8 +import sys, os +sys.path.append(os.pardir) # 为了导入父目录而进行的设定 +import numpy as np +import matplotlib.pyplot as plt +from deep_convnet import DeepConvNet +from dataset.mnist import load_mnist + + +(x_train, t_train), (x_test, t_test) = load_mnist(flatten=False) + +network = DeepConvNet() +network.load_params("deep_convnet_params.pkl") + +print("calculating test accuracy ... ") +#sampled = 1000 +#x_test = x_test[:sampled] +#t_test = t_test[:sampled] + +classified_ids = [] + +acc = 0.0 +batch_size = 100 + +for i in range(int(x_test.shape[0] / batch_size)): + tx = x_test[i*batch_size:(i+1)*batch_size] + tt = t_test[i*batch_size:(i+1)*batch_size] + y = network.predict(tx, train_flg=False) + y = np.argmax(y, axis=1) + classified_ids.append(y) + acc += np.sum(y == tt) + +acc = acc / x_test.shape[0] +print("test accuracy:" + str(acc)) + +classified_ids = np.array(classified_ids) +classified_ids = classified_ids.flatten() + +max_view = 20 +current_view = 1 + +fig = plt.figure() +fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.2, wspace=0.2) + +mis_pairs = {} +for i, val in enumerate(classified_ids == t_test): + if not val: + ax = fig.add_subplot(4, 5, current_view, xticks=[], yticks=[]) + ax.imshow(x_test[i].reshape(28, 28), cmap=plt.cm.gray_r, interpolation='nearest') + mis_pairs[current_view] = (t_test[i], classified_ids[i]) + + current_view += 1 + if current_view > max_view: + break + +print("======= misclassified result =======") +print("{view index: (label, inference), ...}") +print(mis_pairs) + +plt.show() diff --git a/DL_demo/ch08/train_deepnet.py b/DL_demo/ch08/train_deepnet.py new file mode 100755 index 0000000..9cdf3fb --- /dev/null +++ b/DL_demo/ch08/train_deepnet.py @@ -0,0 +1,21 @@ +# coding: utf-8 +import sys, os +sys.path.append(os.pardir) # 为了导入父目录而进行的设定 +import numpy as np +import matplotlib.pyplot as plt +from dataset.mnist import load_mnist +from deep_convnet import DeepConvNet +from common.trainer import Trainer + +(x_train, t_train), (x_test, t_test) = load_mnist(flatten=False) + +network = DeepConvNet() +trainer = Trainer(network, x_train, t_train, x_test, t_test, + epochs=20, mini_batch_size=100, + optimizer='Adam', optimizer_param={'lr':0.001}, + evaluate_sample_num_per_epoch=1000) +trainer.train() + +# 保存参数 +network.save_params("deep_convnet_params.pkl") +print("Saved Network Parameters!") diff --git a/DL_demo/common/__init__.py b/DL_demo/common/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/DL_demo/common/functions.py b/DL_demo/common/functions.py new file mode 100755 index 0000000..ec02dd0 --- /dev/null +++ b/DL_demo/common/functions.py @@ -0,0 +1,61 @@ +# coding: utf-8 +import numpy as np + + +def identity_function(x): + return x + + +def step_function(x): + return np.array(x > 0, dtype=np.int) + + +def sigmoid(x): + return 1 / (1 + np.exp(-x)) + + +def sigmoid_grad(x): + return (1.0 - sigmoid(x)) * sigmoid(x) + + +def relu(x): + return np.maximum(0, x) + + +def relu_grad(x): + grad = np.zeros(x) + grad[x>=0] = 1 + return grad + + +def softmax(x): + if x.ndim == 2: + x = x.T + x = x - np.max(x, axis=0) + y = np.exp(x) / np.sum(np.exp(x), axis=0) + return y.T + + x = x - np.max(x) # 溢出对策 + return np.exp(x) / np.sum(np.exp(x)) + + +def mean_squared_error(y, t): + return 0.5 * np.sum((y-t)**2) + + +def cross_entropy_error(y, t): + if y.ndim == 1: + t = t.reshape(1, t.size) + y = y.reshape(1, y.size) + + # 监督数据是one-hot-vector的情况下,转换为正确解标签的索引 + if t.size == y.size: + t = t.argmax(axis=1) + + batch_size = y.shape[0] + return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size + + +def softmax_loss(X, t): + y = softmax(X) + return cross_entropy_error(y, t) diff --git a/DL_demo/common/gradient.py b/DL_demo/common/gradient.py new file mode 100755 index 0000000..31cb887 --- /dev/null +++ b/DL_demo/common/gradient.py @@ -0,0 +1,52 @@ +# coding: utf-8 +import numpy as np + +def _numerical_gradient_1d(f, x): + h = 1e-4 # 0.0001 + grad = np.zeros_like(x) + + for idx in range(x.size): + tmp_val = x[idx] + x[idx] = float(tmp_val) + h + fxh1 = f(x) # f(x+h) + + x[idx] = tmp_val - h + fxh2 = f(x) # f(x-h) + grad[idx] = (fxh1 - fxh2) / (2*h) + + x[idx] = tmp_val # 还原值 + + return grad + + +def numerical_gradient_2d(f, X): + if X.ndim == 1: + return _numerical_gradient_1d(f, X) + else: + grad = np.zeros_like(X) + + for idx, x in enumerate(X): + grad[idx] = _numerical_gradient_1d(f, x) + + return grad + + +def numerical_gradient(f, x): + h = 1e-4 # 0.0001 + grad = np.zeros_like(x) + + it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) + while not it.finished: + idx = it.multi_index + tmp_val = x[idx] + x[idx] = float(tmp_val) + h + fxh1 = f(x) # f(x+h) + + x[idx] = tmp_val - h + fxh2 = f(x) # f(x-h) + grad[idx] = (fxh1 - fxh2) / (2*h) + + x[idx] = tmp_val # 还原值 + it.iternext() + + return grad \ No newline at end of file diff --git a/DL_demo/common/layers.py b/DL_demo/common/layers.py new file mode 100755 index 0000000..ae50d80 --- /dev/null +++ b/DL_demo/common/layers.py @@ -0,0 +1,284 @@ +# coding: utf-8 +import numpy as np +from common.functions import * +from common.util import im2col, col2im + + +class Relu: + def __init__(self): + self.mask = None + + def forward(self, x): + self.mask = (x <= 0) + out = x.copy() + out[self.mask] = 0 + + return out + + def backward(self, dout): + dout[self.mask] = 0 + dx = dout + + return dx + + +class Sigmoid: + def __init__(self): + self.out = None + + def forward(self, x): + out = sigmoid(x) + self.out = out + return out + + def backward(self, dout): + dx = dout * (1.0 - self.out) * self.out + + return dx + + +class Affine: + def __init__(self, W, b): + self.W =W + self.b = b + + self.x = None + self.original_x_shape = None + # 权重和偏置参数的导数 + self.dW = None + self.db = None + + def forward(self, x): + # 对应张量 + self.original_x_shape = x.shape + x = x.reshape(x.shape[0], -1) + self.x = x + + out = np.dot(self.x, self.W) + self.b + + return out + + def backward(self, dout): + dx = np.dot(dout, self.W.T) + self.dW = np.dot(self.x.T, dout) + self.db = np.sum(dout, axis=0) + + dx = dx.reshape(*self.original_x_shape) # 还原输入数据的形状(对应张量) + return dx + + +class SoftmaxWithLoss: + def __init__(self): + self.loss = None + self.y = None # softmax的输出 + self.t = None # 监督数据 + + def forward(self, x, t): + self.t = t + self.y = softmax(x) + self.loss = cross_entropy_error(self.y, self.t) + + return self.loss + + def backward(self, dout=1): + batch_size = self.t.shape[0] + if self.t.size == self.y.size: # 监督数据是one-hot-vector的情况 + dx = (self.y - self.t) / batch_size + else: + dx = self.y.copy() + dx[np.arange(batch_size), self.t] -= 1 + dx = dx / batch_size + + return dx + + +class Dropout: + """ + http://arxiv.org/abs/1207.0580 + """ + def __init__(self, dropout_ratio=0.5): + self.dropout_ratio = dropout_ratio + self.mask = None + + def forward(self, x, train_flg=True): + if train_flg: + self.mask = np.random.rand(*x.shape) > self.dropout_ratio + return x * self.mask + else: + return x * (1.0 - self.dropout_ratio) + + def backward(self, dout): + return dout * self.mask + + +class BatchNormalization: + """ + http://arxiv.org/abs/1502.03167 + """ + def __init__(self, gamma, beta, momentum=0.9, running_mean=None, running_var=None): + self.gamma = gamma + self.beta = beta + self.momentum = momentum + self.input_shape = None # Conv层的情况下为4维,全连接层的情况下为2维 + + # 测试时使用的平均值和方差 + self.running_mean = running_mean + self.running_var = running_var + + # backward时使用的中间数据 + self.batch_size = None + self.xc = None + self.std = None + self.dgamma = None + self.dbeta = None + + def forward(self, x, train_flg=True): + self.input_shape = x.shape + if x.ndim != 2: + N, C, H, W = x.shape + x = x.reshape(N, -1) + + out = self.__forward(x, train_flg) + + return out.reshape(*self.input_shape) + + def __forward(self, x, train_flg): + if self.running_mean is None: + N, D = x.shape + self.running_mean = np.zeros(D) + self.running_var = np.zeros(D) + + if train_flg: + mu = x.mean(axis=0) + xc = x - mu + var = np.mean(xc**2, axis=0) + std = np.sqrt(var + 10e-7) + xn = xc / std + + self.batch_size = x.shape[0] + self.xc = xc + self.xn = xn + self.std = std + self.running_mean = self.momentum * self.running_mean + (1-self.momentum) * mu + self.running_var = self.momentum * self.running_var + (1-self.momentum) * var + else: + xc = x - self.running_mean + xn = xc / ((np.sqrt(self.running_var + 10e-7))) + + out = self.gamma * xn + self.beta + return out + + def backward(self, dout): + if dout.ndim != 2: + N, C, H, W = dout.shape + dout = dout.reshape(N, -1) + + dx = self.__backward(dout) + + dx = dx.reshape(*self.input_shape) + return dx + + def __backward(self, dout): + dbeta = dout.sum(axis=0) + dgamma = np.sum(self.xn * dout, axis=0) + dxn = self.gamma * dout + dxc = dxn / self.std + dstd = -np.sum((dxn * self.xc) / (self.std * self.std), axis=0) + dvar = 0.5 * dstd / self.std + dxc += (2.0 / self.batch_size) * self.xc * dvar + dmu = np.sum(dxc, axis=0) + dx = dxc - dmu / self.batch_size + + self.dgamma = dgamma + self.dbeta = dbeta + + return dx + + +class Convolution: + def __init__(self, W, b, stride=1, pad=0): + self.W = W + self.b = b + self.stride = stride + self.pad = pad + + # 中间数据(backward时使用) + self.x = None + self.col = None + self.col_W = None + + # 权重和偏置参数的梯度 + self.dW = None + self.db = None + + def forward(self, x): + FN, C, FH, FW = self.W.shape + N, C, H, W = x.shape + out_h = 1 + int((H + 2*self.pad - FH) / self.stride) + out_w = 1 + int((W + 2*self.pad - FW) / self.stride) + + col = im2col(x, FH, FW, self.stride, self.pad) + col_W = self.W.reshape(FN, -1).T + + out = np.dot(col, col_W) + self.b + out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2) + + self.x = x + self.col = col + self.col_W = col_W + + return out + + def backward(self, dout): + FN, C, FH, FW = self.W.shape + dout = dout.transpose(0,2,3,1).reshape(-1, FN) + + self.db = np.sum(dout, axis=0) + self.dW = np.dot(self.col.T, dout) + self.dW = self.dW.transpose(1, 0).reshape(FN, C, FH, FW) + + dcol = np.dot(dout, self.col_W.T) + dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad) + + return dx + + +class Pooling: + def __init__(self, pool_h, pool_w, stride=1, pad=0): + self.pool_h = pool_h + self.pool_w = pool_w + self.stride = stride + self.pad = pad + + self.x = None + self.arg_max = None + + def forward(self, x): + N, C, H, W = x.shape + out_h = int(1 + (H - self.pool_h) / self.stride) + out_w = int(1 + (W - self.pool_w) / self.stride) + + col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad) + col = col.reshape(-1, self.pool_h*self.pool_w) + + arg_max = np.argmax(col, axis=1) + out = np.max(col, axis=1) + out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2) + + self.x = x + self.arg_max = arg_max + + return out + + def backward(self, dout): + dout = dout.transpose(0, 2, 3, 1) + + pool_size = self.pool_h * self.pool_w + dmax = np.zeros((dout.size, pool_size)) + dmax[np.arange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten() + dmax = dmax.reshape(dout.shape + (pool_size,)) + + dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1) + dx = col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad) + + return dx diff --git a/DL_demo/common/multi_layer_net.py b/DL_demo/common/multi_layer_net.py new file mode 100755 index 0000000..ff103b5 --- /dev/null +++ b/DL_demo/common/multi_layer_net.py @@ -0,0 +1,160 @@ +# coding: utf-8 +import sys, os +sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 +import numpy as np +from collections import OrderedDict +from common.layers import * +from common.gradient import numerical_gradient + + +class MultiLayerNet: + """全连接的多层神经网络 + + Parameters + ---------- + input_size : 输入大小(MNIST的情况下为784) + hidden_size_list : 隐藏层的神经元数量的列表(e.g. [100, 100, 100]) + output_size : 输出大小(MNIST的情况下为10) + activation : 'relu' or 'sigmoid' + weight_init_std : 指定权重的标准差(e.g. 0.01) + 指定'relu'或'he'的情况下设定“He的初始值” + 指定'sigmoid'或'xavier'的情况下设定“Xavier的初始值” + weight_decay_lambda : Weight Decay(L2范数)的强度 + """ + def __init__(self, input_size, hidden_size_list, output_size, + activation='relu', weight_init_std='relu', weight_decay_lambda=0): + self.input_size = input_size + self.output_size = output_size + self.hidden_size_list = hidden_size_list + self.hidden_layer_num = len(hidden_size_list) + self.weight_decay_lambda = weight_decay_lambda + self.params = {} + + # 初始化权重 + self.__init_weight(weight_init_std) + + # 生成层 + activation_layer = {'sigmoid': Sigmoid, 'relu': Relu} + self.layers = OrderedDict() + for idx in range(1, self.hidden_layer_num+1): + self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], + self.params['b' + str(idx)]) + self.layers['Activation_function' + str(idx)] = activation_layer[activation]() + + idx = self.hidden_layer_num + 1 + self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], + self.params['b' + str(idx)]) + + self.last_layer = SoftmaxWithLoss() + + def __init_weight(self, weight_init_std): + """设定权重的初始值 + + Parameters + ---------- + weight_init_std : 指定权重的标准差(e.g. 0.01) + 指定'relu'或'he'的情况下设定“He的初始值” + 指定'sigmoid'或'xavier'的情况下设定“Xavier的初始值” + """ + all_size_list = [self.input_size] + self.hidden_size_list + [self.output_size] + for idx in range(1, len(all_size_list)): + scale = weight_init_std + if str(weight_init_std).lower() in ('relu', 'he'): + scale = np.sqrt(2.0 / all_size_list[idx - 1]) # 使用ReLU的情况下推荐的初始值 + elif str(weight_init_std).lower() in ('sigmoid', 'xavier'): + scale = np.sqrt(1.0 / all_size_list[idx - 1]) # 使用sigmoid的情况下推荐的初始值 + + self.params['W' + str(idx)] = scale * np.random.randn(all_size_list[idx-1], all_size_list[idx]) + self.params['b' + str(idx)] = np.zeros(all_size_list[idx]) + + def predict(self, x): + for layer in self.layers.values(): + x = layer.forward(x) + + return x + + def loss(self, x, t): + """求损失函数 + + Parameters + ---------- + x : 输入数据 + t : 教师标签 + + Returns + ------- + 损失函数的值 + """ + y = self.predict(x) + + weight_decay = 0 + for idx in range(1, self.hidden_layer_num + 2): + W = self.params['W' + str(idx)] + weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W ** 2) + + return self.last_layer.forward(y, t) + weight_decay + + def accuracy(self, x, t): + y = self.predict(x) + y = np.argmax(y, axis=1) + if t.ndim != 1 : t = np.argmax(t, axis=1) + + accuracy = np.sum(y == t) / float(x.shape[0]) + return accuracy + + def numerical_gradient(self, x, t): + """求梯度(数值微分) + + Parameters + ---------- + x : 输入数据 + t : 教师标签 + + Returns + ------- + 具有各层的梯度的字典变量 + grads['W1']、grads['W2']、...是各层的权重 + grads['b1']、grads['b2']、...是各层的偏置 + """ + loss_W = lambda W: self.loss(x, t) + + grads = {} + for idx in range(1, self.hidden_layer_num+2): + grads['W' + str(idx)] = numerical_gradient(loss_W, self.params['W' + str(idx)]) + grads['b' + str(idx)] = numerical_gradient(loss_W, self.params['b' + str(idx)]) + + return grads + + def gradient(self, x, t): + """求梯度(误差反向传播法) + + Parameters + ---------- + x : 输入数据 + t : 教师标签 + + Returns + ------- + 具有各层的梯度的字典变量 + grads['W1']、grads['W2']、...是各层的权重 + grads['b1']、grads['b2']、...是各层的偏置 + """ + # forward + self.loss(x, t) + + # backward + dout = 1 + dout = self.last_layer.backward(dout) + + layers = list(self.layers.values()) + layers.reverse() + for layer in layers: + dout = layer.backward(dout) + + # 设定 + grads = {} + for idx in range(1, self.hidden_layer_num+2): + grads['W' + str(idx)] = self.layers['Affine' + str(idx)].dW + self.weight_decay_lambda * self.layers['Affine' + str(idx)].W + grads['b' + str(idx)] = self.layers['Affine' + str(idx)].db + + return grads diff --git a/DL_demo/common/multi_layer_net_extend.py b/DL_demo/common/multi_layer_net_extend.py new file mode 100755 index 0000000..75f1c75 --- /dev/null +++ b/DL_demo/common/multi_layer_net_extend.py @@ -0,0 +1,163 @@ +# coding: utf-8 +import sys, os +sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 +import numpy as np +from collections import OrderedDict +from common.layers import * +from common.gradient import numerical_gradient + +class MultiLayerNetExtend: + """扩展版的全连接的多层神经网络 + + 具有Weiht Decay、Dropout、Batch Normalization的功能 + + Parameters + ---------- + input_size : 输入大小(MNIST的情况下为784) + hidden_size_list : 隐藏层的神经元数量的列表(e.g. [100, 100, 100]) + output_size : 输出大小(MNIST的情况下为10) + activation : 'relu' or 'sigmoid' + weight_init_std : 指定权重的标准差(e.g. 0.01) + 指定'relu'或'he'的情况下设定“He的初始值” + 指定'sigmoid'或'xavier'的情况下设定“Xavier的初始值” + weight_decay_lambda : Weight Decay(L2范数)的强度 + use_dropout: 是否使用Dropout + dropout_ration : Dropout的比例 + use_batchNorm: 是否使用Batch Normalization + """ + def __init__(self, input_size, hidden_size_list, output_size, + activation='relu', weight_init_std='relu', weight_decay_lambda=0, + use_dropout = False, dropout_ration = 0.5, use_batchnorm=False): + self.input_size = input_size + self.output_size = output_size + self.hidden_size_list = hidden_size_list + self.hidden_layer_num = len(hidden_size_list) + self.use_dropout = use_dropout + self.weight_decay_lambda = weight_decay_lambda + self.use_batchnorm = use_batchnorm + self.params = {} + + # 初始化权重 + self.__init_weight(weight_init_std) + + # 生成层 + activation_layer = {'sigmoid': Sigmoid, 'relu': Relu} + self.layers = OrderedDict() + for idx in range(1, self.hidden_layer_num+1): + self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], + self.params['b' + str(idx)]) + if self.use_batchnorm: + self.params['gamma' + str(idx)] = np.ones(hidden_size_list[idx-1]) + self.params['beta' + str(idx)] = np.zeros(hidden_size_list[idx-1]) + self.layers['BatchNorm' + str(idx)] = BatchNormalization(self.params['gamma' + str(idx)], self.params['beta' + str(idx)]) + + self.layers['Activation_function' + str(idx)] = activation_layer[activation]() + + if self.use_dropout: + self.layers['Dropout' + str(idx)] = Dropout(dropout_ration) + + idx = self.hidden_layer_num + 1 + self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)]) + + self.last_layer = SoftmaxWithLoss() + + def __init_weight(self, weight_init_std): + """设定权重的初始值 + + Parameters + ---------- + weight_init_std : 指定权重的标准差(e.g. 0.01) + 指定'relu'或'he'的情况下设定“He的初始值” + 指定'sigmoid'或'xavier'的情况下设定“Xavier的初始值” + """ + all_size_list = [self.input_size] + self.hidden_size_list + [self.output_size] + for idx in range(1, len(all_size_list)): + scale = weight_init_std + if str(weight_init_std).lower() in ('relu', 'he'): + scale = np.sqrt(2.0 / all_size_list[idx - 1]) # 使用ReLU的情况下推荐的初始值 + elif str(weight_init_std).lower() in ('sigmoid', 'xavier'): + scale = np.sqrt(1.0 / all_size_list[idx - 1]) # 使用sigmoid的情况下推荐的初始值 + self.params['W' + str(idx)] = scale * np.random.randn(all_size_list[idx-1], all_size_list[idx]) + self.params['b' + str(idx)] = np.zeros(all_size_list[idx]) + + def predict(self, x, train_flg=False): + for key, layer in self.layers.items(): + if "Dropout" in key or "BatchNorm" in key: + x = layer.forward(x, train_flg) + else: + x = layer.forward(x) + + return x + + def loss(self, x, t, train_flg=False): + """求损失函数 + 参数x是输入数据,t是教师标签 + """ + y = self.predict(x, train_flg) + + weight_decay = 0 + for idx in range(1, self.hidden_layer_num + 2): + W = self.params['W' + str(idx)] + weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2) + + return self.last_layer.forward(y, t) + weight_decay + + def accuracy(self, X, T): + Y = self.predict(X, train_flg=False) + Y = np.argmax(Y, axis=1) + if T.ndim != 1 : T = np.argmax(T, axis=1) + + accuracy = np.sum(Y == T) / float(X.shape[0]) + return accuracy + + def numerical_gradient(self, X, T): + """求梯度(数值微分) + + Parameters + ---------- + X : 输入数据 + T : 教师标签 + + Returns + ------- + 具有各层的梯度的字典变量 + grads['W1']、grads['W2']、...是各层的权重 + grads['b1']、grads['b2']、...是各层的偏置 + """ + loss_W = lambda W: self.loss(X, T, train_flg=True) + + grads = {} + for idx in range(1, self.hidden_layer_num+2): + grads['W' + str(idx)] = numerical_gradient(loss_W, self.params['W' + str(idx)]) + grads['b' + str(idx)] = numerical_gradient(loss_W, self.params['b' + str(idx)]) + + if self.use_batchnorm and idx != self.hidden_layer_num+1: + grads['gamma' + str(idx)] = numerical_gradient(loss_W, self.params['gamma' + str(idx)]) + grads['beta' + str(idx)] = numerical_gradient(loss_W, self.params['beta' + str(idx)]) + + return grads + + def gradient(self, x, t): + # forward + self.loss(x, t, train_flg=True) + + # backward + dout = 1 + dout = self.last_layer.backward(dout) + + layers = list(self.layers.values()) + layers.reverse() + for layer in layers: + dout = layer.backward(dout) + + # 设定 + grads = {} + for idx in range(1, self.hidden_layer_num+2): + grads['W' + str(idx)] = self.layers['Affine' + str(idx)].dW + self.weight_decay_lambda * self.params['W' + str(idx)] + grads['b' + str(idx)] = self.layers['Affine' + str(idx)].db + + if self.use_batchnorm and idx != self.hidden_layer_num+1: + grads['gamma' + str(idx)] = self.layers['BatchNorm' + str(idx)].dgamma + grads['beta' + str(idx)] = self.layers['BatchNorm' + str(idx)].dbeta + + return grads \ No newline at end of file diff --git a/DL_demo/common/optimizer.py b/DL_demo/common/optimizer.py new file mode 100755 index 0000000..9c22bcd --- /dev/null +++ b/DL_demo/common/optimizer.py @@ -0,0 +1,130 @@ +# coding: utf-8 +import numpy as np + +class SGD: + + """随机梯度下降法(Stochastic Gradient Descent)""" + + def __init__(self, lr=0.01): + self.lr = lr + + def update(self, params, grads): + for key in params.keys(): + params[key] -= self.lr * grads[key] + + +class Momentum: + + """Momentum SGD""" + + def __init__(self, lr=0.01, momentum=0.9): + self.lr = lr + self.momentum = momentum + self.v = None + + def update(self, params, grads): + if self.v is None: + self.v = {} + for key, val in params.items(): + self.v[key] = np.zeros_like(val) + + for key in params.keys(): + self.v[key] = self.momentum*self.v[key] - self.lr*grads[key] + params[key] += self.v[key] + + +class Nesterov: + + """Nesterov's Accelerated Gradient (http://arxiv.org/abs/1212.0901)""" + + def __init__(self, lr=0.01, momentum=0.9): + self.lr = lr + self.momentum = momentum + self.v = None + + def update(self, params, grads): + if self.v is None: + self.v = {} + for key, val in params.items(): + self.v[key] = np.zeros_like(val) + + for key in params.keys(): + self.v[key] *= self.momentum + self.v[key] -= self.lr * grads[key] + params[key] += self.momentum * self.momentum * self.v[key] + params[key] -= (1 + self.momentum) * self.lr * grads[key] + + +class AdaGrad: + + """AdaGrad""" + + def __init__(self, lr=0.01): + self.lr = lr + self.h = None + + def update(self, params, grads): + if self.h is None: + self.h = {} + for key, val in params.items(): + self.h[key] = np.zeros_like(val) + + for key in params.keys(): + self.h[key] += grads[key] * grads[key] + params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7) + + +class RMSprop: + + """RMSprop""" + + def __init__(self, lr=0.01, decay_rate = 0.99): + self.lr = lr + self.decay_rate = decay_rate + self.h = None + + def update(self, params, grads): + if self.h is None: + self.h = {} + for key, val in params.items(): + self.h[key] = np.zeros_like(val) + + for key in params.keys(): + self.h[key] *= self.decay_rate + self.h[key] += (1 - self.decay_rate) * grads[key] * grads[key] + params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7) + + +class Adam: + + """Adam (http://arxiv.org/abs/1412.6980v8)""" + + def __init__(self, lr=0.001, beta1=0.9, beta2=0.999): + self.lr = lr + self.beta1 = beta1 + self.beta2 = beta2 + self.iter = 0 + self.m = None + self.v = None + + def update(self, params, grads): + if self.m is None: + self.m, self.v = {}, {} + for key, val in params.items(): + self.m[key] = np.zeros_like(val) + self.v[key] = np.zeros_like(val) + + self.iter += 1 + lr_t = self.lr * np.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter) + + for key in params.keys(): + #self.m[key] = self.beta1*self.m[key] + (1-self.beta1)*grads[key] + #self.v[key] = self.beta2*self.v[key] + (1-self.beta2)*(grads[key]**2) + self.m[key] += (1 - self.beta1) * (grads[key] - self.m[key]) + self.v[key] += (1 - self.beta2) * (grads[key]**2 - self.v[key]) + + params[key] -= lr_t * self.m[key] / (np.sqrt(self.v[key]) + 1e-7) + + #unbias_m += (1 - self.beta1) * (grads[key] - self.m[key]) # correct bias + #unbisa_b += (1 - self.beta2) * (grads[key]*grads[key] - self.v[key]) # correct bias + #params[key] += self.lr * unbias_m / (np.sqrt(unbisa_b) + 1e-7) diff --git a/DL_demo/common/trainer.py b/DL_demo/common/trainer.py new file mode 100755 index 0000000..1878105 --- /dev/null +++ b/DL_demo/common/trainer.py @@ -0,0 +1,78 @@ +# coding: utf-8 +import sys, os +sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 +import numpy as np +from common.optimizer import * + +class Trainer: + """进行神经网络的训练的类 + """ + def __init__(self, network, x_train, t_train, x_test, t_test, + epochs=20, mini_batch_size=100, + optimizer='SGD', optimizer_param={'lr':0.01}, + evaluate_sample_num_per_epoch=None, verbose=True): + self.network = network + self.verbose = verbose + self.x_train = x_train + self.t_train = t_train + self.x_test = x_test + self.t_test = t_test + self.epochs = epochs + self.batch_size = mini_batch_size + self.evaluate_sample_num_per_epoch = evaluate_sample_num_per_epoch + + # optimzer + optimizer_class_dict = {'sgd':SGD, 'momentum':Momentum, 'nesterov':Nesterov, + 'adagrad':AdaGrad, 'rmsprpo':RMSprop, 'adam':Adam} + self.optimizer = optimizer_class_dict[optimizer.lower()](**optimizer_param) + + self.train_size = x_train.shape[0] + self.iter_per_epoch = max(self.train_size / mini_batch_size, 1) + self.max_iter = int(epochs * self.iter_per_epoch) + self.current_iter = 0 + self.current_epoch = 0 + + self.train_loss_list = [] + self.train_acc_list = [] + self.test_acc_list = [] + + def train_step(self): + batch_mask = np.random.choice(self.train_size, self.batch_size) + x_batch = self.x_train[batch_mask] + t_batch = self.t_train[batch_mask] + + grads = self.network.gradient(x_batch, t_batch) + self.optimizer.update(self.network.params, grads) + + loss = self.network.loss(x_batch, t_batch) + self.train_loss_list.append(loss) + if self.verbose: print("train loss:" + str(loss)) + + if self.current_iter % self.iter_per_epoch == 0: + self.current_epoch += 1 + + x_train_sample, t_train_sample = self.x_train, self.t_train + x_test_sample, t_test_sample = self.x_test, self.t_test + if not self.evaluate_sample_num_per_epoch is None: + t = self.evaluate_sample_num_per_epoch + x_train_sample, t_train_sample = self.x_train[:t], self.t_train[:t] + x_test_sample, t_test_sample = self.x_test[:t], self.t_test[:t] + + train_acc = self.network.accuracy(x_train_sample, t_train_sample) + test_acc = self.network.accuracy(x_test_sample, t_test_sample) + self.train_acc_list.append(train_acc) + self.test_acc_list.append(test_acc) + + if self.verbose: print("=== epoch:" + str(self.current_epoch) + ", train acc:" + str(train_acc) + ", test acc:" + str(test_acc) + " ===") + self.current_iter += 1 + + def train(self): + for i in range(self.max_iter): + self.train_step() + + test_acc = self.network.accuracy(self.x_test, self.t_test) + + if self.verbose: + print("=============== Final Test Accuracy ===============") + print("test acc:" + str(test_acc)) + diff --git a/DL_demo/common/util.py b/DL_demo/common/util.py new file mode 100755 index 0000000..9e0f0b3 --- /dev/null +++ b/DL_demo/common/util.py @@ -0,0 +1,99 @@ +# coding: utf-8 +import numpy as np + + +def smooth_curve(x): + """用于使损失函数的图形变圆滑 + + 参考:http://glowingpython.blogspot.jp/2012/02/convolution-with-numpy.html + """ + window_len = 11 + s = np.r_[x[window_len-1:0:-1], x, x[-1:-window_len:-1]] + w = np.kaiser(window_len, 2) + y = np.convolve(w/w.sum(), s, mode='valid') + return y[5:len(y)-5] + + +def shuffle_dataset(x, t): + """打乱数据集 + + Parameters + ---------- + x : 训练数据 + t : 监督数据 + + Returns + ------- + x, t : 打乱的训练数据和监督数据 + """ + permutation = np.random.permutation(x.shape[0]) + x = x[permutation,:] if x.ndim == 2 else x[permutation,:,:,:] + t = t[permutation] + + return x, t + +def conv_output_size(input_size, filter_size, stride=1, pad=0): + return (input_size + 2*pad - filter_size) / stride + 1 + + +def im2col(input_data, filter_h, filter_w, stride=1, pad=0): + """ + + Parameters + ---------- + input_data : 由(数据量, 通道, 高, 长)的4维数组构成的输入数据 + filter_h : 滤波器的高 + filter_w : 滤波器的长 + stride : 步幅 + pad : 填充 + + Returns + ------- + col : 2维数组 + """ + N, C, H, W = input_data.shape + out_h = (H + 2*pad - filter_h)//stride + 1 + out_w = (W + 2*pad - filter_w)//stride + 1 + + img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant') + col = np.zeros((N, C, filter_h, filter_w, out_h, out_w)) + + for y in range(filter_h): + y_max = y + stride*out_h + for x in range(filter_w): + x_max = x + stride*out_w + col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride] + + col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N*out_h*out_w, -1) + return col + + +def col2im(col, input_shape, filter_h, filter_w, stride=1, pad=0): + """ + + Parameters + ---------- + col : + input_shape : 输入数据的形状(例:(10, 1, 28, 28)) + filter_h : + filter_w + stride + pad + + Returns + ------- + + """ + N, C, H, W = input_shape + out_h = (H + 2*pad - filter_h)//stride + 1 + out_w = (W + 2*pad - filter_w)//stride + 1 + col = col.reshape(N, out_h, out_w, C, filter_h, filter_w).transpose(0, 3, 4, 5, 1, 2) + + img = np.zeros((N, C, H + 2*pad + stride - 1, W + 2*pad + stride - 1)) + for y in range(filter_h): + y_max = y + stride*out_h + for x in range(filter_w): + x_max = x + stride*out_w + img[:, :, y:y_max:stride, x:x_max:stride] += col[:, :, y, x, :, :] + + return img[:, :, pad:H + pad, pad:W + pad] \ No newline at end of file diff --git a/DL_demo/dataset/__init__.py b/DL_demo/dataset/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/DL_demo/dataset/lena.png b/DL_demo/dataset/lena.png new file mode 100755 index 0000000..768f4a5 Binary files /dev/null and b/DL_demo/dataset/lena.png differ diff --git a/DL_demo/dataset/lena_gray.png b/DL_demo/dataset/lena_gray.png new file mode 100755 index 0000000..fe2fd39 Binary files /dev/null and b/DL_demo/dataset/lena_gray.png differ diff --git a/DL_demo/dataset/mnist.py b/DL_demo/dataset/mnist.py new file mode 100755 index 0000000..0d53709 --- /dev/null +++ b/DL_demo/dataset/mnist.py @@ -0,0 +1,128 @@ +# coding: utf-8 +try: + import urllib.request +except ImportError: + raise ImportError('You should use Python 3.x') +import os.path +import gzip +import pickle +import os +import numpy as np + + +url_base = 'http://yann.lecun.com/exdb/mnist/' +key_file = { + 'train_img':'train-images-idx3-ubyte.gz', + 'train_label':'train-labels-idx1-ubyte.gz', + 'test_img':'t10k-images-idx3-ubyte.gz', + 'test_label':'t10k-labels-idx1-ubyte.gz' +} + +dataset_dir = os.path.dirname(os.path.abspath(__file__)) +save_file = dataset_dir + "/mnist.pkl" + +train_num = 60000 +test_num = 10000 +img_dim = (1, 28, 28) +img_size = 784 + + +def _download(file_name): + file_path = dataset_dir + "/" + file_name + + if os.path.exists(file_path): + return + + print("Downloading " + file_name + " ... ") + urllib.request.urlretrieve(url_base + file_name, file_path) + print("Done") + +def download_mnist(): + for v in key_file.values(): + _download(v) + +def _load_label(file_name): + file_path = dataset_dir + "/" + file_name + + print("Converting " + file_name + " to NumPy Array ...") + with gzip.open(file_path, 'rb') as f: + labels = np.frombuffer(f.read(), np.uint8, offset=8) + print("Done") + + return labels + +def _load_img(file_name): + file_path = dataset_dir + "/" + file_name + + print("Converting " + file_name + " to NumPy Array ...") + with gzip.open(file_path, 'rb') as f: + data = np.frombuffer(f.read(), np.uint8, offset=16) + data = data.reshape(-1, img_size) + print("Done") + + return data + +def _convert_numpy(): + dataset = {} + dataset['train_img'] = _load_img(key_file['train_img']) + dataset['train_label'] = _load_label(key_file['train_label']) + dataset['test_img'] = _load_img(key_file['test_img']) + dataset['test_label'] = _load_label(key_file['test_label']) + + return dataset + +def init_mnist(): + download_mnist() + dataset = _convert_numpy() + print("Creating pickle file ...") + with open(save_file, 'wb') as f: + pickle.dump(dataset, f, -1) + print("Done!") + +def _change_one_hot_label(X): + T = np.zeros((X.size, 10)) + for idx, row in enumerate(T): + row[X[idx]] = 1 + + return T + + +def load_mnist(normalize=True, flatten=True, one_hot_label=False): + """读入MNIST数据集 + + Parameters + ---------- + normalize : 将图像的像素值正规化为0.0~1.0 + one_hot_label : + one_hot_label为True的情况下,标签作为one-hot数组返回 + one-hot数组是指[0,0,1,0,0,0,0,0,0,0]这样的数组 + flatten : 是否将图像展开为一维数组 + + Returns + ------- + (训练图像, 训练标签), (测试图像, 测试标签) + """ + if not os.path.exists(save_file): + init_mnist() + + with open(save_file, 'rb') as f: + dataset = pickle.load(f) + + if normalize: + for key in ('train_img', 'test_img'): + dataset[key] = dataset[key].astype(np.float32) + dataset[key] /= 255.0 + + if one_hot_label: + dataset['train_label'] = _change_one_hot_label(dataset['train_label']) + dataset['test_label'] = _change_one_hot_label(dataset['test_label']) + + if not flatten: + for key in ('train_img', 'test_img'): + dataset[key] = dataset[key].reshape(-1, 1, 28, 28) + + return (dataset['train_img'], dataset['train_label']), (dataset['test_img'], dataset['test_label']) + + +if __name__ == '__main__': + init_mnist() diff --git a/DL_step_by_step/__init__.py b/DL_step_by_step/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/DL_step_by_step/ch3/MNIST.py b/DL_step_by_step/ch3/MNIST.py new file mode 100644 index 0000000..57d50a7 --- /dev/null +++ b/DL_step_by_step/ch3/MNIST.py @@ -0,0 +1,91 @@ +import sys, os +sys.path.extend(['/Users/songwenxuan/PycharmProjects/Keras_start']) +from DL_step_by_step.dataset.mnist import load_mnist +from DL_step_by_step.ch3.sigmoid_func import sigmoid +from DL_step_by_step.ch3.softmax_func import softmax + +import numpy as np +from PIL import Image +import pickle + + + + +def img_show(img): + pil_img = Image.fromarray(np.uint8(img)) + pil_img.show() + + + +# img = x_train[0] +# label = t_train[0] +# print(label) +# print(img.shape) +# img = img.reshape(28, 28) +# print(img.shape) +# img_show(img) + + +def get_data(): + (x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, normalize=True, one_hot_label=False) + print(x_train[0]) + print(x_train.shape) + print(t_train[0]) + print(t_train.shape) + print(x_test.shape) + print(t_test.shape) + return x_test, t_test + +def init_network(): + with open("sample_weight.pkl", 'rb') as f: + network = pickle.load(f) + + return network + +def predict(network, x): + W1, W2, W3 = network['W1'], network['W2'], network['W3'] + b1, b2, b3 = network['b1'], network['b2'], network['b3'] + + a1 = np.dot(x, W1) + b1 + z1 = sigmoid(a1) + + a2 = np.dot(z1, W2) + b2 + z2 = sigmoid(a2) + + a3 = np.dot(z2, W3) + b3 + y = softmax(a3) + + return y + + +x, t = get_data() +network = init_network() +print(type(network)) + +batch_size = 100 + +for key in network: + print(key) + print(network[key].shape) + +accuracy_cnt = 0 +# for i in range(len(x)): +# y = predict(network, x[i]) +# # print(y) +# p = np.argmax(y) +# # print(p) +# if p == t[i]: +# accuracy_cnt += 1 + + +for i in range(0, len(x), batch_size): + x_batch = x [i: i + batch_size] + # print(x_batch.shape) + y_batch = predict(network, x_batch) + # print(y_batch.shape) + p = np.argmax(y_batch, axis=1) + # print(p.shape) + # input() + accuracy_cnt += np.sum(p == t[i:i+batch_size]) + +print("Accuracy : " +str(float(accuracy_cnt) /len(x))) \ No newline at end of file diff --git a/DL_step_by_step/ch3/ReLU_func.py b/DL_step_by_step/ch3/ReLU_func.py new file mode 100644 index 0000000..c742523 --- /dev/null +++ b/DL_step_by_step/ch3/ReLU_func.py @@ -0,0 +1,11 @@ +import numpy as np +import matplotlib.pyplot as plt + +def relu(x): + return np.maximum(0, x) + +x = np.arange(-5.0 , 5.0, 0.1) +y = relu(x) + +plt.plot(x, y) +plt.show() \ No newline at end of file diff --git a/DL_step_by_step/ch3/__init__.py b/DL_step_by_step/ch3/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/DL_step_by_step/ch3/axis_test.py b/DL_step_by_step/ch3/axis_test.py new file mode 100644 index 0000000..5fab883 --- /dev/null +++ b/DL_step_by_step/ch3/axis_test.py @@ -0,0 +1,7 @@ +import numpy as np + +x = np.array([[0.1, 0.8, 0.1], [0.3, 0.1, 0.6], [0.2, 0.5, 0.3], [0.8, 0.1, 0.1]]) + +print(x) +y = np.argmax(x, axis=0) +print(y) \ No newline at end of file diff --git a/DL_step_by_step/ch3/matrix_test.py b/DL_step_by_step/ch3/matrix_test.py new file mode 100644 index 0000000..b388819 --- /dev/null +++ b/DL_step_by_step/ch3/matrix_test.py @@ -0,0 +1,23 @@ +import numpy as np +import matplotlib.pyplot as plt + +# A = np.array([1, 2, 3, 4]) +# print(A) +# print(np.ndim(A)) +# print(A.shape) +# print(A.shape[0]) +# +# B = np.array([[1, 2], [3, 4], [5, 6]]) +# print(B) +# print(np.ndim(B)) +# print(B.shape) + +A = np.array([[1, 2],[3, 4]]) +print(A) +print(A.shape) + +B = np.array([[5, 6],[7, 8]]) +print(B) +print(B.shape) +z = np.dot(A, B) +print(z) \ No newline at end of file diff --git a/DL_step_by_step/ch3/sample_weight.pkl b/DL_step_by_step/ch3/sample_weight.pkl new file mode 100755 index 0000000..0e92475 Binary files /dev/null and b/DL_step_by_step/ch3/sample_weight.pkl differ diff --git a/DL_step_by_step/ch3/sigmoid_func.py b/DL_step_by_step/ch3/sigmoid_func.py new file mode 100644 index 0000000..6b3350e --- /dev/null +++ b/DL_step_by_step/ch3/sigmoid_func.py @@ -0,0 +1,18 @@ +import numpy as np +import matplotlib.pyplot as plt + + +def sigmoid(x): + return 1/(1 + np.exp(-x)) + + +if __name__ == '__main__': + print("") + x = np.arange(-10.0, 10.0, 0.1) + y = sigmoid(x) + y2 = x * 0 + 0.5 + + plt.plot(x, y) + plt.plot(x, y2) + plt.ylim(-0.1, 1.1) + plt.show() \ No newline at end of file diff --git a/DL_step_by_step/ch3/softmax_func.py b/DL_step_by_step/ch3/softmax_func.py new file mode 100644 index 0000000..6869e97 --- /dev/null +++ b/DL_step_by_step/ch3/softmax_func.py @@ -0,0 +1,41 @@ +import numpy as np + +a = np.array([0.3, 2.9, 4.0]) + +print(a) +exp_a = np.exp(a) +print(exp_a) + +sum_exp_a = np.sum(exp_a) +print(sum_exp_a) + +y = exp_a / sum_exp_a +print(y) + +def softmax(a): + c = np.max(a) + exp_a = np.exp(a - c) #溢出对策 + sum_exp_a = np.sum(exp_a) + y = exp_a / sum_exp_a + return y + + +print("a") +a = np.array([1010, 1000, 990]) +print(a) +# z1 = np.exp(a) / np.sum(np.exp(a)) +# print(z1) + +c = np.max(a) +print(c) +d = a - c +print(d) +z2 =np.exp(d) / np.sum(np.exp(d)) +print(z2) + +print("stage 2") + +a = np.array([0.3, 2.9, 4.0]) +y = softmax(a) +print(y) +print(np.sum(y)) \ No newline at end of file diff --git a/DL_step_by_step/ch3/step_func.py b/DL_step_by_step/ch3/step_func.py new file mode 100644 index 0000000..0a67aa7 --- /dev/null +++ b/DL_step_by_step/ch3/step_func.py @@ -0,0 +1,14 @@ +import numpy as np +import matplotlib.pyplot as plt + + +def step_function(x): + return np.array(x>0, dtype=np.int) + +x = np.arange(-5.0, 5.0, 0.1) +y = step_function(x) +print(x) +print(y) +plt.plot(x, y) +plt.ylim(-0.1, 1.1) +plt.show() \ No newline at end of file diff --git a/DL_step_by_step/ch3/threeLayers_net.py b/DL_step_by_step/ch3/threeLayers_net.py new file mode 100644 index 0000000..8538e14 --- /dev/null +++ b/DL_step_by_step/ch3/threeLayers_net.py @@ -0,0 +1,64 @@ +import numpy as np +import matplotlib.pyplot as plt + +from DL_step_by_step.ch3.sigmoid_func import sigmoid + +X = np.array([1.0, 0.5]) + +W1 = np.array([[0.1, 0.3, 0.5], [0.2, 0.4, 0.6]]) +B1 = np.array([0.1, 0.2 ,0.3]) + +print("X1 : ") +print(X) +print(X.shape) + +print("W1 : ") +print(W1) +print(W1.shape) + +print("B1 : ") +print(B1) +print(B1.shape) + +A1 = np.dot(X, W1) + B1 +print("A1 : ") +print(A1) +print(A1) + +Z1 = sigmoid(A1) +print("Z1 : ") +print(Z1) +print(Z1.shape) + +W2 = np.array([[0.1, 0.4], [0.2, 0.5], [0.3, 0.6]]) +B2 = np.array([0.1, 0.2]) + +print("W2 : ") +print(W2) +print(W2.shape) + +print("B2 : ") +print(B2) +print(B2.shape) + +A2 = np.dot(Z1, W2) + B2 +print("A2 : ") +print(A2) +print(A2.shape) + +Z2 = sigmoid(A2) +print("Z2 : ") +print(Z2) +print(Z2.shape) + + +def indentity_function(x): + return x + + +W3 = np.array([[0.1, 0.3], [0.2, 0.4]]) + +B3 = np.array([0.1, 0.2]) + +A3 = np.dot(Z2, W3) + B3 +Y = indentity_function(A3) diff --git a/DL_step_by_step/ch3/threeLayers_net_summary.py b/DL_step_by_step/ch3/threeLayers_net_summary.py new file mode 100644 index 0000000..cb63691 --- /dev/null +++ b/DL_step_by_step/ch3/threeLayers_net_summary.py @@ -0,0 +1,34 @@ +import numpy as np +from DL_step_by_step.ch3.sigmoid_func import sigmoid +from DL_step_by_step.ch3.threeLayers_net import indentity_function + +def init_network(): + network = {} + network['W1'] = np.array([[0.1, 0.3, 0.5], [0.2, 0.4, 0.6]]) + network['b1'] = np.array([0.1, 0.2, 0.3]) + network['W2'] = np.array([[0.1, 0.4], [0.2, 0.5], [0.3, 0.6]]) + network['b2'] = np.array([0.1, 0.2]) + network['W3'] = np.array([[0.1, 0.3], [0.2, 0.4]]) + network['b3'] = np.array([0.1, 0.2]) + + return network + +def forward(network, x): + W1, W2, W3 = network['W1'], network['W2'], network['W3'] + b1, b2, b3 = network['b1'], network['b2'], network['b3'] + + a1 = np.dot(x, W1) + b1 + z1 = sigmoid(a1) + + a2 = np.dot(z1, W2) + b2 + z2 = sigmoid(a2) + + a3 = np.dot(z2, W3) + b3 + y = indentity_function(a3) + + return y + +network = init_network() +x = np.array([1.0, 0.5]) +y = forward(network, x) +print(y) diff --git a/DL_step_by_step/ch4/Q1.py b/DL_step_by_step/ch4/Q1.py new file mode 100644 index 0000000..df86a3f --- /dev/null +++ b/DL_step_by_step/ch4/Q1.py @@ -0,0 +1,68 @@ +# +#!/usr/bin/python3.6.2 +# -*- coding: utf-8 -*- +# @Time    : 2018/12/7 上午11:15 +# @Author  : Wenson +# @Desc : ============================================== +# Life is Short I Use Python!!!                      === +# If this runs wrong,don't ask me,I don't know why;  === +# If this runs right,thank god,and I don't know why. === +# Maybe the answer,my friend,is blowing in the wind. === +# ====================================================== +# @Project : Keras_start +# @FileName: Q1.py +# @Software: PyCharm + +import sys +sys.path.extend(['/Users/songwenxuan/PycharmProjects/Keras_start']) +from DL_step_by_step.common.common_func import numerical_gradient +import matplotlib.pylab as plt + +import numpy as np + + + +def gradient_descent(f, init_x, lr=0.01, step_num=100): + x = init_x + x_history = [] + + for i in range(step_num): + x_history.append(x.copy()) + grad = numerical_gradient(f, x) + x-= lr*grad + + return x, np.array(x_history) + + + + + +def function_2(x): + return x[0] ** 2 + x[1] ** 2 + +init_x = np.array([-3.0, 4.0]) + +# print(init_x.ndim) +# grad,history = gradient_descent(function_2, init_x = init_x, lr = 0.1, step_num=100) +# +# print(grad) +# print(history) + +init_x = np.array([-3.0, 4.0]) + +lr = 0.1 +step_num = 100 +x, x_history = gradient_descent(function_2, init_x,lr = lr, step_num=step_num) + +print(x) + + +plt.plot( [-5, 5], [0, 0], '--b') +plt.plot([0, 0], [-5,5], '--b') +plt.plot(x_history[:, 0], x_history[:, 1], 'o') + +plt.xlim(-3.5, 3.5) +plt.ylim(-4.5, 4.5) +plt.xlabel("X0") +plt.ylabel("X1") +plt.show() \ No newline at end of file diff --git a/DL_step_by_step/ch4/__init__.py b/DL_step_by_step/ch4/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/DL_step_by_step/ch4/gradient_func.py b/DL_step_by_step/ch4/gradient_func.py new file mode 100644 index 0000000..dfefc37 --- /dev/null +++ b/DL_step_by_step/ch4/gradient_func.py @@ -0,0 +1,102 @@ +# +#!/usr/bin/python3.6.2 +# -*- coding: utf-8 -*- +# @Time    : 2018/12/5 下午6:01 +# @Author  : Wenson +# @Desc : ============================================== +# Life is Short I Use Python!!!                      === +# If this runs wrong,don't ask me,I don't know why;  === +# If this runs right,thank god,and I don't know why. === +# Maybe the answer,my friend,is blowing in the wind. === +# ====================================================== +# @Project : Keras_start +# @FileName: gradient_func.py +# @Software: PyCharm + +import numpy as np +import sys +sys.path.extend(['/Users/songwenxuan/PycharmProjects/Keras_start']) +from DL_step_by_step.common.common_func import numerical_gradient +import matplotlib.pylab as plt +from DL_step_by_step.common.common_func import function_2 + + + +# def numerical_gradient_on_batch(f, x): +# h = 1e-4 +# grad = np.zeros_like(x) +# +# for index in range(x.size): +# tmp_val = x[index] +# x[index] = tmp_val + h +# fxh1 = f(x) +# +# x[index] = tmp_val - h +# fxh2 = f(x) +# +# grad[index] = (fxh1 - fxh2) / (2 * h) +# x[index] = tmp_val +# +# return grad +# +# +# print(numerical_gradient_on_batch(function_2, np.array([3.0, 4.0]))) + + +# gradient test +# x = np.array([3.0, 4.0]) +# f= function_2 +# h = 1e-4 +# grad = np.zeros_like(x) +# +# for index in range(x.size): +# +# tmp_val = x[index] +# +# x[index] = tmp_val + h +# fxh1 = f(x) +# +# x[index] = tmp_val - h +# fxh2 = f(x) +# +# grad[index] = (fxh1 - fxh2) / (2 * h) +# x[index] = tmp_val + + +if __name__ == '__main__': + x0 = np.arange(-2, 2.5, 0.25) + x1 = np.arange(-2, 2.5, 0.25) + + + + + X, Y = np.meshgrid(x0, x1) + + print(x0.shape) + print(x1.shape) + + + X = X.flatten() + Y = Y.flatten() + # plt.scatter(X,Y) + # plt.show + + + + z = np.array([X, Y]) + + print(z.shape) + + grad = numerical_gradient(function_2, z) + + print(grad) + plt.figure() + plt.quiver(X, Y, -grad[0], -grad[1], angles="xy", color="#666666") # ,headwidth=10,scale=40,color="#444444") + plt.xlim([-2, 2]) + plt.ylim([-2, 2]) + plt.xlabel('x0') + plt.ylabel('x1') + plt.grid() + plt.legend() + plt.draw() + plt.show() \ No newline at end of file diff --git a/DL_step_by_step/ch4/loss_func.py b/DL_step_by_step/ch4/loss_func.py new file mode 100644 index 0000000..14e5429 --- /dev/null +++ b/DL_step_by_step/ch4/loss_func.py @@ -0,0 +1,60 @@ +import numpy as np +import sys +sys.path.extend(['/Users/songwenxuan/PycharmProjects/Keras_start']) +from DL_step_by_step.dataset.mnist import load_mnist + +def mean_squared_error(y, t): + """ + func: 计算均方误差 + + + 因为带了平方,后面要用梯度下降法,要求导,这样求导多出的乘2就和二分之一抵消了,一个简化后面计算的技巧 + + :param y: + :param t: + :return: + """ + return 0.5 * np.sum((y-t)**2) + + +# def cross_entropy_error(y, t): +# """ +# func: 计算交叉熵误差 +# +# :param y: +# :param t: +# :return: +# """ +# delta = 1e-7 +# return -np.sum(t*np.log(y+delta)) + + +def cross_entroy_error(y, t): + if y.ndim == 1: + t = t.reshape(1, t.size) + y = y.reshape(1, y.size) + + batch_size = y.shape[0] + + return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size + + + + + +if __name__ == '__main__': + (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) + + print(x_train.shape) + print(t_train.shape) + + train_size = x_train.shape[0] + + batch_size = 10 + + batch_mask = np.random.choice(train_size, batch_size) + + x_batch = x_train[batch_mask] + + t_batch = t_train[batch_mask] + diff --git a/DL_step_by_step/ch4/nimi_batch.py b/DL_step_by_step/ch4/nimi_batch.py new file mode 100644 index 0000000..90de997 --- /dev/null +++ b/DL_step_by_step/ch4/nimi_batch.py @@ -0,0 +1,45 @@ +# +#!/usr/bin/python3.6.2 +# -*- coding: utf-8 -*- +# @Time    : 2018/12/11 下午6:09 +# @Author  : Wenson +# @Desc : ============================================== +# Life is Short I Use Python!!!                      === +# If this runs wrong,don't ask me,I don't know why;  === +# If this runs right,thank god,and I don't know why. === +# Maybe the answer,my friend,is blowing in the wind. === +# ====================================================== +# @Project : Keras_start +# @FileName: nimi_batch.py +# @Software: PyCharm + +import numpy as np +from DL_step_by_step.dataset.mnist import load_mnist +from DL_step_by_step.ch4.twoLayerNet import TwoLayerNet + +if __name__ == '__main__': + (x_train, t_train),(x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) + train_loss_list = [] + + iters_num = 1000 + train_size = x_train.shape[0] + batch_size = 100 + learning_rate = 0.1 + + network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) + + for i in range(iters_num): + batch_mask = np.random.choice(train_size,batch_size) + x_batch = x_train[batch_mask] + t_batch = t_train[batch_mask] + + # caculate the grads + grad = network.numberical_gradient(x_batch, t_batch) + + for key in ('W1', 'b1', 'W2', 'b2'): + network.params[key] -= learning_rate * grad[key] + + loss = network.loss(x_batch, t_batch) + train_loss_list.append(loss) + + print(train_loss_list) \ No newline at end of file diff --git a/DL_step_by_step/ch4/numerical_diff.py b/DL_step_by_step/ch4/numerical_diff.py new file mode 100644 index 0000000..81cd5f3 --- /dev/null +++ b/DL_step_by_step/ch4/numerical_diff.py @@ -0,0 +1,44 @@ +import numpy as np +import sys +sys.path.extend(['/Users/songwenxuan/PycharmProjects/Keras_start']) +from DL_step_by_step.dataset.mnist import load_mnist + + +def numerical_diff(f, x): + h = 1e-4 + return (f(x+h) - f(x-h)) / (2 * h) + + +def function_1(x): + return 0.01 * x ** 2 + 0.1 * x + + +def function_2(x): + return x[0] ** 2 + x[1] ** 2 + + +def tangent_line(f, x): + d = numerical_diff(f, x) + print(d) + y = f(x) - d*x + return lambda t: d*t + y + + +import numpy as np +import matplotlib.pylab as plt + +x = np.arange(0.0, 20.0, 0.1) +y = function_1(x) +f2 = tangent_line(function_1, 5) +y2 = f2(x) + + +# plt.xlabel("x") +# plt.ylabel("f(x)") +plt.plot(x, y) +plt.plot(x, y2) +plt.show() + +print(numerical_diff(function_1, 5)) + +print(numerical_diff(function_1, 10)) \ No newline at end of file diff --git a/DL_step_by_step/ch4/numerical_diff2.py b/DL_step_by_step/ch4/numerical_diff2.py new file mode 100644 index 0000000..3e86364 --- /dev/null +++ b/DL_step_by_step/ch4/numerical_diff2.py @@ -0,0 +1,55 @@ +# +#!/usr/bin/python3.6.2 +# -*- coding: utf-8 -*- +# @Time    : 2018/12/5 上午11:38 +# @Author  : Wenson +# @Desc : ============================================== +# Life is Short I Use Python!!!                      === +# If this runs wrong,don't ask me,I don't know why;  === +# If this runs right,thank god,and I don't know why. === +# Maybe the answer,my friend,is blowing in the wind. === +# ====================================================== +# @Project : Keras_start +# @FileName: numerical_diff2.py +# @Software: PyCharm + +import numpy as np +import sys +sys.path.extend(['/Users/songwenxuan/PycharmProjects/Keras_start']) +from DL_step_by_step.dataset.mnist import load_mnist +import matplotlib.pylab as plt +from DL_step_by_step.ch4.numerical_diff import numerical_diff + + + +def function_2(x): + return x[0] ** 2 + x[1] ** 2 + +def function_3(X,Y): + return X ** 2 + Y ** 2 + +x0 = np.arange(-20.0, 20.0, 0.25) +x1 = np.arange(-20.0, 20.0, 0.25) + +X,Y = np.meshgrid(x0,x1) + +R= np.sqrt(X**2, Y**2) + +Z = function_3(X,Y) + +fig = plt.figure() +ax = fig.add_subplot(111, projection='3d') + + +# Plot a basic wireframe. +ax.plot_wireframe(X, Y, Z, rstride=10, cstride=10) + +plt.show() + + +def function_tmp1(x0): + return x0*x0 + 4.0 ** 2.0 + + +print(numerical_diff(function_tmp1, 3.0)) + diff --git a/DL_step_by_step/ch4/simpleNet.py b/DL_step_by_step/ch4/simpleNet.py new file mode 100644 index 0000000..255480b --- /dev/null +++ b/DL_step_by_step/ch4/simpleNet.py @@ -0,0 +1,60 @@ +# +#!/usr/bin/python3.6.2 +# -*- coding: utf-8 -*- +# @Time    : 2018/12/7 下午5:15 +# @Author  : Wenson +# @Desc : ============================================== +# Life is Short I Use Python!!!                      === +# If this runs wrong,don't ask me,I don't know why;  === +# If this runs right,thank god,and I don't know why. === +# Maybe the answer,my friend,is blowing in the wind. === +# ====================================================== +# @Project : Keras_start +# @FileName: simpleNet.py +# @Software: PyCharm + +import sys +sys.path.extend(['/Users/songwenxuan/PycharmProjects/Keras_start']) +from DL_step_by_step.common.common_func import * +import matplotlib.pylab as plt + + + +import numpy as np + +class simpleNet: + def __init__(self): + self.W = np.random.rand(2, 3) + + def predict(self, x): + return np.dot(x, self.W) + + def loss(self, x, t): + z = self.predict(x) + y = softmax(z) + loss = cross_entroy_error(y, t) + + return loss + + + +if __name__ == '__main__': + net = simpleNet() + print(net.W) + + x = np.array([0.6, 0.9]) + print(x.shape) + p =net.predict(x) + print(p) + + print(np.argmax(p)) + + t = np.array([0, 0, 1]) + + print(net.loss(x,t)) + + f = lambda w: net.loss(x, t) + + dW = numerical_gradient(f, net.W) + + print(dW) \ No newline at end of file diff --git a/DL_step_by_step/ch4/test.py b/DL_step_by_step/ch4/test.py new file mode 100644 index 0000000..d3c4d11 --- /dev/null +++ b/DL_step_by_step/ch4/test.py @@ -0,0 +1,24 @@ +# +#!/usr/bin/python3.6.2 +# -*- coding: utf-8 -*- +# @Time    : 2018/12/11 下午5:53 +# @Author  : Wenson +# @Desc : ============================================== +# Life is Short I Use Python!!!                      === +# If this runs wrong,don't ask me,I don't know why;  === +# If this runs right,thank god,and I don't know why. === +# Maybe the answer,my friend,is blowing in the wind. === +# ====================================================== +# @Project : Keras_start +# @FileName: test.py +# @Software: PyCharm + +import numpy as np +z = np.arange(10) + + +t = [2,7,0,9,4] +y = np.arange(50) +y = y.reshape(5, 10) + +a = y[np.arange(5), t] \ No newline at end of file diff --git a/DL_step_by_step/ch4/twoLayerNet.py b/DL_step_by_step/ch4/twoLayerNet.py new file mode 100644 index 0000000..f64f932 --- /dev/null +++ b/DL_step_by_step/ch4/twoLayerNet.py @@ -0,0 +1,109 @@ +# +#!/usr/bin/python3.6.2 +# -*- coding: utf-8 -*- +# @Time    : 2018/12/7 下午5:37 +# @Author  : Wenson +# @Desc : ============================================== +# Life is Short I Use Python!!!                      === +# If this runs wrong,don't ask me,I don't know why;  === +# If this runs right,thank god,and I don't know why. === +# Maybe the answer,my friend,is blowing in the wind. === +# ====================================================== +# @Project : Keras_start +# @FileName: twoLayerNet.py +# @Software: PyCharm + +import sys +sys.path.extend(['/Users/songwenxuan/PycharmProjects/Keras_start']) +from DL_step_by_step.common.common_func import * +import matplotlib.pylab as plt + + +class TwoLayerNet: + def __init__(self, + input_size, + hidden_size, + output_size, + weight_init_std = 0.01 + ): + self.params = {} + self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size) + self.params['b1'] = np.zeros(hidden_size) + self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size) + self.params['b2'] = np.zeros(output_size) + + def predict(self, x): + W1, W2 = self.params['W1'], self.params['W2'] + b1, b2 = self.params['b1'], self.params['b2'] + + a1 = np.dot(x, W1) + b1 + z1 = sigmoid(a1) + a2 = np.dot(z1, W2) + b2 + y = softmax(a2) + + return y + + def loss(self, x, t): + """ + + :param x: 输入数据 + :param t: 监督数据 + :return: loss损失值 + """ + y = self.predict(x) + return cross_entropy_error(y, t) + + + def accuracy(self, x, t): + """ + + :param x: + :param t: + :return: + """ + y = self.predict(x) + y = np.argmax(y, axis=1) + t = np.argmax(t, axis=1) + + accuray = np.sum(y == t) / float(x.shape[0]) + + return accuray + + def numberical_gradient(self, x, t): + loss_W = lambda W : self.loss(x, t) + + grads = {} + grads['W1'] = numerical_gradient(loss_W, self.params['W1']) + grads['b1'] = numerical_gradient(loss_W, self.params['b1']) + grads['W2'] = numerical_gradient(loss_W, self.params['W2']) + grads['b2'] = numerical_gradient(loss_W, self.params['b2']) + + return grads + + def print_params(self): + print("W1 : ") + print(self.params['W1'].shape) + print(self.params['W1']) + print("b1 : ") + print(self.params['b1'].shape) + print(self.params['b1']) + print("W2 : ") + print(self.params['W2'].shape) + print(self.params['W2']) + print("b2 : ") + print(self.params['b2'].shape) + print(self.params['b2']) + + + + +if __name__ == '__main__': + + net = TwoLayerNet(input_size=784, hidden_size=100, output_size=10) + net.print_params() + + x = np.random.rand(100, 784) + t = np.random.rand(100, 10) + + grads = net.numberical_gradient(x, t) + print(grads['W1']) \ No newline at end of file diff --git a/DL_step_by_step/ch5/TwoLayerNet.py b/DL_step_by_step/ch5/TwoLayerNet.py new file mode 100644 index 0000000..74dd044 --- /dev/null +++ b/DL_step_by_step/ch5/TwoLayerNet.py @@ -0,0 +1,32 @@ +# +#!/usr/bin/python3.6.2 +# -*- coding: utf-8 -*- +# @Time    : 2018/12/21 上午11:34 +# @Author  : Wenson +# @Desc : ============================================== +# Life is Short I Use Python!!!                      === +# If this runs wrong,don't ask me,I don't know why;  === +# If this runs right,thank god,and I don't know why. === +# Maybe the answer,my friend,is blowing in the wind. === +# ====================================================== +# @Project : DeepLearningDemo +# @FileName: TwoLayerNet.py +# @Software: PyCharm + +import sys, os +sys.path.append(os.pardir) +import numpy as np +from DL_step_by_step.common import * +from collections import OrderedDict + +class TwoLayerNet: + def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): + + self.params = {} + self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size) + self.params['b1'] = np.zeros(hidden_size) + self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size) + self.params['b2'] = np.zeros(output_size) + + self.layers = OrderedDict() + self.layers['Affine1'] = Affine() diff --git a/DL_step_by_step/ch5/__init__.py b/DL_step_by_step/ch5/__init__.py new file mode 100644 index 0000000..4ad9953 --- /dev/null +++ b/DL_step_by_step/ch5/__init__.py @@ -0,0 +1,14 @@ +# +#!/usr/bin/python3.6.2 +# -*- coding: utf-8 -*- +# @Time    : 2018/12/12 下午6:03 +# @Author  : Wenson +# @Desc : ============================================== +# Life is Short I Use Python!!!                      === +# If this runs wrong,don't ask me,I don't know why;  === +# If this runs right,thank god,and I don't know why. === +# Maybe the answer,my friend,is blowing in the wind. === +# ====================================================== +# @Project : Keras_start +# @FileName: __init__.py.py +# @Software: PyCharm diff --git a/DL_step_by_step/ch5/affine_demo.py b/DL_step_by_step/ch5/affine_demo.py new file mode 100644 index 0000000..aea4668 --- /dev/null +++ b/DL_step_by_step/ch5/affine_demo.py @@ -0,0 +1,21 @@ +# +#!/usr/bin/python3.6.2 +# -*- coding: utf-8 -*- +# @Time    : 2018/12/17 下午6:12 +# @Author  : Wenson +# @Desc : ============================================== +# Life is Short I Use Python!!!                      === +# If this runs wrong,don't ask me,I don't know why;  === +# If this runs right,thank god,and I don't know why. === +# Maybe the answer,my friend,is blowing in the wind. === +# ====================================================== +# @Project : Keras_start +# @FileName: affine_demo.py +# @Software: PyCharm + +import numpy as np + +class Affine: + def __init__(self, W, b): + self.W = W + self.b = b diff --git a/DL_step_by_step/ch5/sigmoid_layer.py b/DL_step_by_step/ch5/sigmoid_layer.py new file mode 100644 index 0000000..7acde0b --- /dev/null +++ b/DL_step_by_step/ch5/sigmoid_layer.py @@ -0,0 +1,49 @@ +# +#!/usr/bin/python3.6.2 +# -*- coding: utf-8 -*- +# @Time    : 2018/12/13 下午4:00 +# @Author  : Wenson +# @Desc : ============================================== +# Life is Short I Use Python!!!                      === +# If this runs wrong,don't ask me,I don't know why;  === +# If this runs right,thank god,and I don't know why. === +# Maybe the answer,my friend,is blowing in the wind. === +# ====================================================== +# @Project : Keras_start +# @FileName: sigmoid_layer.py +# @Software: PyCharm + +import numpy as np + + +class Sigmoid: + def __init__(self): + self.out = None + + def forward(self, x): + out = 1 / (1 + np.exp(-x)) + self.out = out + + return out + + def backward(self, dout): + dx = dout * (1.0 - self.out) * self.out + + return dx + +if __name__ == '__main__': + X_dot_W = np.array([[0, 0, 0], [10, 10, 10]]) + + B = np.array([1, 2, 3]) + + print(X_dot_W) + + print(X_dot_W + B) + + dY = np.array([[1, 2, 3], [4, 5, 6]]) + + print(dY) + + dB = np.sum(dY, axis=0) + + print(dB) \ No newline at end of file diff --git a/DL_step_by_step/ch5/simpleLayer.py b/DL_step_by_step/ch5/simpleLayer.py new file mode 100644 index 0000000..81104d9 --- /dev/null +++ b/DL_step_by_step/ch5/simpleLayer.py @@ -0,0 +1,68 @@ +# +#!/usr/bin/python3.6.2 +# -*- coding: utf-8 -*- +# @Time    : 2018/12/12 下午6:04 +# @Author  : Wenson +# @Desc : ============================================== +# Life is Short I Use Python!!!                      === +# If this runs wrong,don't ask me,I don't know why;  === +# If this runs right,thank god,and I don't know why. === +# Maybe the answer,my friend,is blowing in the wind. === +# ====================================================== +# @Project : Keras_start +# @FileName: simpleLayer.py +# @Software: PyCharm + +import numpy as np + +class MulLayer: + def __init__(self): + self.x = None + self.y = None + + def forward(self, x, y): + self.x = x + self.y = y + out = x * y + + return out + + def backward(self, dout): + dx = dout * self.y + dy = dout * self.x + + return dx, dy + +class AddLayer: + def __init__(self): + pass + + def forward(self, x, y): + out = x + y + return out + + def backward(self, dout): + dx = dout * 1 + dy = dout * 1 + return dx, dy + +if __name__ == '__main__': + apple = 100 + apple_num = 2 + + tax = 1.1 + + mul_apple_layer = MulLayer() + mul_tax_layer = MulLayer() + + apple_price = mul_apple_layer.forward(apple, apple_num) + price = mul_tax_layer.forward(apple_price, tax) + + print(price) + print(mul_apple_layer.x) + + dprice = 1 + dapple_price, dtax = mul_tax_layer.backward(dprice) + dapple, dapple_num = mul_apple_layer.backward(dapple_price) + + print(dapple, dapple_num, dtax) \ No newline at end of file diff --git a/DL_step_by_step/ch5/simpleLayer2.py b/DL_step_by_step/ch5/simpleLayer2.py new file mode 100644 index 0000000..3ef78ec --- /dev/null +++ b/DL_step_by_step/ch5/simpleLayer2.py @@ -0,0 +1,80 @@ +# +#!/usr/bin/python3.6.2 +# -*- coding: utf-8 -*- +# @Time    : 2018/12/12 下午6:04 +# @Author  : Wenson +# @Desc : ============================================== +# Life is Short I Use Python!!!                      === +# If this runs wrong,don't ask me,I don't know why;  === +# If this runs right,thank god,and I don't know why. === +# Maybe the answer,my friend,is blowing in the wind. === +# ====================================================== +# @Project : Keras_start +# @FileName: simpleLayer2.py +# @Software: PyCharm + +import numpy as np + +class MulLayer: + def __init__(self): + self.x = None + self.y = None + + def forward(self, x, y): + self.x = x + self.y = y + out = x * y + + return out + + def backward(self, dout): + dx = dout * self.y + dy = dout * self.x + + return dx, dy + +class AddLayer: + def __init__(self): + pass + + def forward(self, x, y): + out = x + y + return out + + def backward(self, dout): + dx = dout * 1 + dy = dout * 1 + return dx, dy + +if __name__ == '__main__': + apple = 100 + apple_num = 2 + orange = 150 + orange_num = 3 + tax = 1.1 + + # layer + mul_apple_layer = MulLayer() + mul_orange_layer = MulLayer() + add_apple_orange_layer = AddLayer() + mul_tax_layer = MulLayer() + + # forward + apple_price = mul_apple_layer.forward(apple, apple_num) + orange_price = mul_orange_layer.forward(orange, orange_num) + all_price = add_apple_orange_layer.forward(apple_price, orange_price) + price = mul_tax_layer.forward(all_price, tax) + + # backward + dprice = 1 + dall_price, dtax = mul_tax_layer.backward(dprice) + dapple_price, dorage_price = add_apple_orange_layer.backward(dall_price) + dorange, dorange_num = mul_orange_layer.backward(dorage_price) + dapple, dapple_num = mul_apple_layer.backward(dapple_price) + + print(price) + print(dapple_num, dapple, dorange, dorange_num, dtax) + + + + diff --git a/DL_step_by_step/ch5/simpleLayer3.py b/DL_step_by_step/ch5/simpleLayer3.py new file mode 100644 index 0000000..2535c23 --- /dev/null +++ b/DL_step_by_step/ch5/simpleLayer3.py @@ -0,0 +1,50 @@ +# +#!/usr/bin/python3.6.2 +# -*- coding: utf-8 -*- +# @Time    : 2018/12/13 上午10:36 +# @Author  : Wenson +# @Desc : ============================================== +# Life is Short I Use Python!!!                      === +# If this runs wrong,don't ask me,I don't know why;  === +# If this runs right,thank god,and I don't know why. === +# Maybe the answer,my friend,is blowing in the wind. === +# ====================================================== +# @Project : Keras_start +# @FileName: simpleLayer3.py +# @Software: PyCharm + +import numpy as np + +class Relu: + def __init__(self): + self.mask = None + + def forward(self, x): + self.mask = (x <= 0) + out = x.copy() + out[self.mask] = 0 + + return out + + def backward(self, dout): + dout[self.mask] = 0 + dx = dout + + return dx + + +if __name__ == '__main__': + x = np.array([[1.0, -0.5], [-2.0, 3.0]]) + print(x) + + mask = (x<=0) + print(mask) + + relu_layer = Relu() + signal_out = relu_layer.forward(x) + print(signal_out) + + + + + diff --git a/DL_step_by_step/ch5/softmaxWithLoss.py b/DL_step_by_step/ch5/softmaxWithLoss.py new file mode 100644 index 0000000..a513939 --- /dev/null +++ b/DL_step_by_step/ch5/softmaxWithLoss.py @@ -0,0 +1,37 @@ +# +#!/usr/bin/python3.6.2 +# -*- coding: utf-8 -*- +# @Time    : 2018/12/20 下午4:29 +# @Author  : Wenson +# @Desc : ============================================== +# Life is Short I Use Python!!!                      === +# If this runs wrong,don't ask me,I don't know why;  === +# If this runs right,thank god,and I don't know why. === +# Maybe the answer,my friend,is blowing in the wind. === +# ====================================================== +# @Project : DeepLearningDemo +# @FileName: softmaxWithLoss.py +# @Software: PyCharm + +import numpy as np +from DL_step_by_step.common.common_func import * + + +class SoftmaxWithLoss: + def __init__(self): + self.loss = None + self.y = None + self.t = None + + def forward(self, x, t): + self.t = t + self.y = softmax(x) + self.loss = cross_entropy_error(self.y, self.t) + + return self.loss + + def backword(self, dout = 1): + batch_size = self.t.shape[0] + dx = (self.y - self.t) / batch_size + + return dx \ No newline at end of file diff --git a/DL_step_by_step/ch5/test.py b/DL_step_by_step/ch5/test.py new file mode 100644 index 0000000..0f55370 --- /dev/null +++ b/DL_step_by_step/ch5/test.py @@ -0,0 +1,20 @@ +# +#!/usr/bin/python3.6.2 +# -*- coding: utf-8 -*- +# @Time    : 2018/12/13 上午10:58 +# @Author  : Wenson +# @Desc : ============================================== +# Life is Short I Use Python!!!                      === +# If this runs wrong,don't ask me,I don't know why;  === +# If this runs right,thank god,and I don't know why. === +# Maybe the answer,my friend,is blowing in the wind. === +# ====================================================== +# @Project : Keras_start +# @FileName: test.py +# @Software: PyCharm + + +y = 1 +x = y +x = 2 +print(y) \ No newline at end of file diff --git a/DL_step_by_step/common/__init__.py b/DL_step_by_step/common/__init__.py new file mode 100644 index 0000000..c6c5f6f --- /dev/null +++ b/DL_step_by_step/common/__init__.py @@ -0,0 +1,14 @@ +# +#!/usr/bin/python3.6.2 +# -*- coding: utf-8 -*- +# @Time    : 2018/12/5 下午6:11 +# @Author  : Wenson +# @Desc : ============================================== +# Life is Short I Use Python!!!                      === +# If this runs wrong,don't ask me,I don't know why;  === +# If this runs right,thank god,and I don't know why. === +# Maybe the answer,my friend,is blowing in the wind. === +# ====================================================== +# @Project : Keras_start +# @FileName: __init__.py.py +# @Software: PyCharm diff --git a/DL_step_by_step/common/common_func.py b/DL_step_by_step/common/common_func.py new file mode 100644 index 0000000..7e86ba5 --- /dev/null +++ b/DL_step_by_step/common/common_func.py @@ -0,0 +1,120 @@ +# +#!/usr/bin/python3.6.2 +# -*- coding: utf-8 -*- +# @Time    : 2018/12/5 下午6:11 +# @Author  : Wenson +# @Desc : ============================================== +# Life is Short I Use Python!!!                      === +# If this runs wrong,don't ask me,I don't know why;  === +# If this runs right,thank god,and I don't know why. === +# Maybe the answer,my friend,is blowing in the wind. === +# ====================================================== +# @Project : Keras_start +# @FileName: common_func.py +# @Software: PyCharm + +import numpy as np + +def function_2(x): + if x.ndim == 1: + return np.sum(x**2) + else: + return np.sum(x**2, axis=1) + +# def _numerical_gradient_no_batch(f, x): +# h = 1e-4 +# grad = np.zeros_like(x) +# # print("x.size") +# # print(x.shape) +# for index in range(x.size): +# # print(index) +# tmp_val = x[index] +# x[index] = float(tmp_val) + h +# fxh1 = f(x) +# +# x[index] = float(tmp_val) - h +# fxh2 = f(x) +# +# grad[index] = (fxh1 - fxh2) / (2 * h) +# x[index] = tmp_val +# +# return grad + + +def _numerical_gradient_no_batch(f, x): + h = 1e-4 # 0.0001 + grad = np.zeros_like(x) + + for idx in range(x.size): + tmp_val = x[idx] + x[idx] = float(tmp_val) + h + fxh1 = f(x) # f(x+h) + + x[idx] = tmp_val - h + fxh2 = f(x) # f(x-h) + grad[idx] = (fxh1 - fxh2) / (2 * h) + + x[idx] = tmp_val # 还原值 + + return grad + + +def numerical_gradient(f, X): + if X.ndim == 1: + return _numerical_gradient_no_batch(f, X) + else: + grad = np.zeros_like(X) + + for idx, x in enumerate(X): + grad[idx] = _numerical_gradient_no_batch(f, x) + + return grad + + +def gradient_descent(f, init_x, lr = 0.01, step_num =100): + x = init_x + for i in range(step_num): + grad = numerical_gradient(f, x) + x -= lr * grad + + return x + + +def softmax(a): + c = np.max(a) + exp_a = np.exp(a - c) #溢出对策 + sum_exp_a = np.sum(exp_a) + y = exp_a / sum_exp_a + return y + + +def sigmoid(x): + return 1/(1 + np.exp(-x)) + + +# def cross_entropy_error(y, t): +# if y.ndim == 1: +# t = t.reshape(1, t.size) +# y = y.reshape(1, y.size) +# +# batch_size = y.shape[0] +# +# return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size + + +def cross_entropy_error(y, t): + if y.ndim == 1: + t = t.reshape(1, t.size) + test_c = t.reshape(1, t.size) + y = y.reshape(1, y.size) + + # 监督数据是one-hot-vector的情况下,转换为正确解标签的索引 + if t.size == y.size: + t = t.argmax(axis=1) + + batch_size = y.shape[0] + test_a = y[np.arange(batch_size), t] + test_b = np.log(y[np.arange(batch_size), t] + 1e-7) + z = -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size + + return z \ No newline at end of file diff --git a/DL_step_by_step/dataset/__init__.py b/DL_step_by_step/dataset/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/DL_step_by_step/dataset/lena.png b/DL_step_by_step/dataset/lena.png new file mode 100755 index 0000000..768f4a5 Binary files /dev/null and b/DL_step_by_step/dataset/lena.png differ diff --git a/DL_step_by_step/dataset/lena_gray.png b/DL_step_by_step/dataset/lena_gray.png new file mode 100755 index 0000000..fe2fd39 Binary files /dev/null and b/DL_step_by_step/dataset/lena_gray.png differ diff --git a/DL_step_by_step/dataset/mnist.pkl b/DL_step_by_step/dataset/mnist.pkl new file mode 100644 index 0000000..8d88cf1 Binary files /dev/null and b/DL_step_by_step/dataset/mnist.pkl differ diff --git a/DL_step_by_step/dataset/mnist.py b/DL_step_by_step/dataset/mnist.py new file mode 100755 index 0000000..0d53709 --- /dev/null +++ b/DL_step_by_step/dataset/mnist.py @@ -0,0 +1,128 @@ +# coding: utf-8 +try: + import urllib.request +except ImportError: + raise ImportError('You should use Python 3.x') +import os.path +import gzip +import pickle +import os +import numpy as np + + +url_base = 'http://yann.lecun.com/exdb/mnist/' +key_file = { + 'train_img':'train-images-idx3-ubyte.gz', + 'train_label':'train-labels-idx1-ubyte.gz', + 'test_img':'t10k-images-idx3-ubyte.gz', + 'test_label':'t10k-labels-idx1-ubyte.gz' +} + +dataset_dir = os.path.dirname(os.path.abspath(__file__)) +save_file = dataset_dir + "/mnist.pkl" + +train_num = 60000 +test_num = 10000 +img_dim = (1, 28, 28) +img_size = 784 + + +def _download(file_name): + file_path = dataset_dir + "/" + file_name + + if os.path.exists(file_path): + return + + print("Downloading " + file_name + " ... ") + urllib.request.urlretrieve(url_base + file_name, file_path) + print("Done") + +def download_mnist(): + for v in key_file.values(): + _download(v) + +def _load_label(file_name): + file_path = dataset_dir + "/" + file_name + + print("Converting " + file_name + " to NumPy Array ...") + with gzip.open(file_path, 'rb') as f: + labels = np.frombuffer(f.read(), np.uint8, offset=8) + print("Done") + + return labels + +def _load_img(file_name): + file_path = dataset_dir + "/" + file_name + + print("Converting " + file_name + " to NumPy Array ...") + with gzip.open(file_path, 'rb') as f: + data = np.frombuffer(f.read(), np.uint8, offset=16) + data = data.reshape(-1, img_size) + print("Done") + + return data + +def _convert_numpy(): + dataset = {} + dataset['train_img'] = _load_img(key_file['train_img']) + dataset['train_label'] = _load_label(key_file['train_label']) + dataset['test_img'] = _load_img(key_file['test_img']) + dataset['test_label'] = _load_label(key_file['test_label']) + + return dataset + +def init_mnist(): + download_mnist() + dataset = _convert_numpy() + print("Creating pickle file ...") + with open(save_file, 'wb') as f: + pickle.dump(dataset, f, -1) + print("Done!") + +def _change_one_hot_label(X): + T = np.zeros((X.size, 10)) + for idx, row in enumerate(T): + row[X[idx]] = 1 + + return T + + +def load_mnist(normalize=True, flatten=True, one_hot_label=False): + """读入MNIST数据集 + + Parameters + ---------- + normalize : 将图像的像素值正规化为0.0~1.0 + one_hot_label : + one_hot_label为True的情况下,标签作为one-hot数组返回 + one-hot数组是指[0,0,1,0,0,0,0,0,0,0]这样的数组 + flatten : 是否将图像展开为一维数组 + + Returns + ------- + (训练图像, 训练标签), (测试图像, 测试标签) + """ + if not os.path.exists(save_file): + init_mnist() + + with open(save_file, 'rb') as f: + dataset = pickle.load(f) + + if normalize: + for key in ('train_img', 'test_img'): + dataset[key] = dataset[key].astype(np.float32) + dataset[key] /= 255.0 + + if one_hot_label: + dataset['train_label'] = _change_one_hot_label(dataset['train_label']) + dataset['test_label'] = _change_one_hot_label(dataset['test_label']) + + if not flatten: + for key in ('train_img', 'test_img'): + dataset[key] = dataset[key].reshape(-1, 1, 28, 28) + + return (dataset['train_img'], dataset['train_label']), (dataset['test_img'], dataset['test_label']) + + +if __name__ == '__main__': + init_mnist() diff --git a/DL_step_by_step/dataset/t10k-images-idx3-ubyte.gz b/DL_step_by_step/dataset/t10k-images-idx3-ubyte.gz new file mode 100644 index 0000000..5ace8ea Binary files /dev/null and b/DL_step_by_step/dataset/t10k-images-idx3-ubyte.gz differ diff --git a/DL_step_by_step/dataset/t10k-labels-idx1-ubyte.gz b/DL_step_by_step/dataset/t10k-labels-idx1-ubyte.gz new file mode 100644 index 0000000..a7e1415 Binary files /dev/null and b/DL_step_by_step/dataset/t10k-labels-idx1-ubyte.gz differ diff --git a/DL_step_by_step/dataset/train-images-idx3-ubyte.gz b/DL_step_by_step/dataset/train-images-idx3-ubyte.gz new file mode 100644 index 0000000..b50e4b6 Binary files /dev/null and b/DL_step_by_step/dataset/train-images-idx3-ubyte.gz differ diff --git a/DL_step_by_step/dataset/train-labels-idx1-ubyte.gz b/DL_step_by_step/dataset/train-labels-idx1-ubyte.gz new file mode 100644 index 0000000..707a576 Binary files /dev/null and b/DL_step_by_step/dataset/train-labels-idx1-ubyte.gz differ diff --git a/DL_step_by_step/test/__init__.py b/DL_step_by_step/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/DL_step_by_step/test/test_class.py b/DL_step_by_step/test/test_class.py new file mode 100644 index 0000000..4d31289 --- /dev/null +++ b/DL_step_by_step/test/test_class.py @@ -0,0 +1,17 @@ +class Man: + def __init__(self, name): + self.name = name + print("Initialized!") + + def hello(self): + print("Hello " + self.name + "!") + + def goodbye(self): + print("Good-bye " + self.name + "1") + + + +if __name__ == '__main__': + m = Man("David") + m.hello() + m.goodbye() \ No newline at end of file diff --git a/DL_step_by_step/test/test_matplotlib.py b/DL_step_by_step/test/test_matplotlib.py new file mode 100644 index 0000000..cb96c1a --- /dev/null +++ b/DL_step_by_step/test/test_matplotlib.py @@ -0,0 +1,20 @@ +import numpy as np +import matplotlib.pyplot as plt + +x = np.arange(0, 6, 0.1) +y1 = np.sin(x) +y2 = np.cos(x) + + +# plt.plot(x, y1, label="sin") +# plt.plot(x, y2, linestyle = "--", label="cos") +# plt.xlabel("x") +# plt.ylabel("y") +# plt.title('sin & cos') +# plt.legend() +# plt.show() + +from matplotlib.image import imread +img = imread('../image/miao.png') +plt.imshow(img) +plt.show() \ No newline at end of file diff --git a/DL_step_by_step/test/test_numpy.py b/DL_step_by_step/test/test_numpy.py new file mode 100644 index 0000000..bc66c29 --- /dev/null +++ b/DL_step_by_step/test/test_numpy.py @@ -0,0 +1,30 @@ +import numpy as np + +x = np.array([1.0, 2.0, 3.0]) +y = np.array([2.0, 4.0, 6.0]) + +A = np.array([[1, 2], [3, 4]]) +print(A) + +A.shape + + +B = np.array([10, 20]) + +A * B + +X = np.array([[51, 55], [14, 19], [0, 4]]) +print(X) + +X[0] +X[0][1] + +for row in X: + print(row) + +X = X.flatten() +X[np.array([0, 2, 4])] + +X>15 + +X[X>15] \ No newline at end of file diff --git a/DL_step_by_step/test/test_perceptron.py b/DL_step_by_step/test/test_perceptron.py new file mode 100644 index 0000000..055300f --- /dev/null +++ b/DL_step_by_step/test/test_perceptron.py @@ -0,0 +1,47 @@ +import numpy as np +x = np.array([0, 1]) +w = np.array([0.5, 0.5]) +b = -0.7 +w*x + +np.sum(w*x) + +np.sum(w*x) + b + +def AND(x1, x2): + x = np.array([x1, x2]) + w = np.array([0.5, 0.5]) + b = -0.7 + tmp = np.sum(w*x) + b + if tmp <=0: + return 0 + else: + return 1 + +def NAND(x1, x2): + x = np.array([x1, x2]) + w = np.array([-0.5, -0.5]) + b = 0.7 + tmp = np.sum(w * x) + b + if tmp <= 0: + return 0 + else: + return 1 + +def OR(x1, x2): + x = np.array([x1, x2]) + w = np.array([0.5, 0.5]) + b = 0.2 + tmp = np.sum(w * x) + b + if tmp <= 0: + return 0 + else: + return 1 + +def XOR(x1, x2): + s1 = NAND(x1, x2) + s2 = OR(x1, x2) + y = AND(s1, s2) + return y + +XOR(0, 1)