diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..723ef36
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.idea
\ No newline at end of file
diff --git a/DL_demo/LICENSE.md b/DL_demo/LICENSE.md
new file mode 100755
index 0000000..abed9dd
--- /dev/null
+++ b/DL_demo/LICENSE.md
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2016 Koki Saitoh
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/DL_demo/README.md b/DL_demo/README.md
new file mode 100755
index 0000000..c05b227
--- /dev/null
+++ b/DL_demo/README.md
@@ -0,0 +1,51 @@
+# 深度学习入门
+
+
+
+## 文件结构
+
+|文件夹名   |说明                         |
+|:--        |:--                          |
+|ch01       |第1章使用的源代码            |
+|ch02       |第2章使用的源代码            |
+|...        |...                          |
+|ch08       |第8章使用的源代码            |
+|common     |共同使用的源代码             |
+|dataset    |数据集用的源代码             |
+
+
+源代码的解释请参考本书。
+
+## 必要条件
+执行源代码需要按照以下软件。
+
+* Python 3.x
+* NumPy
+* Matplotlib
+
+※Python的版本为Python 3。
+
+## 执行方法
+
+前进到各章节的文件夹，执行Python命令。
+
+```
+$ cd ch01
+$ python man.py
+
+$ cd ../ch05
+$ python train_nueralnet.py
+```
+
+## 使用许可
+
+本源代码使用[MIT许可协议](http://www.opensource.org/licenses/MIT)。
+无论是否为商业行为，均可自由使用。
+
+## 勘误表
+
+本书的勘误信息在以下网址中公开。读者可以在以下网址中查看和提交勘误。
+
+http://www.ituring.com.cn/book/1921
+
+
diff --git a/DL_demo/__init__.py b/DL_demo/__init__.py
new file mode 100644
index 0000000..13a78a5
--- /dev/null
+++ b/DL_demo/__init__.py
@@ -0,0 +1,14 @@
+#
+#!/usr/bin/python3.6.2
+# -*- coding: utf-8 -*-
+# @Time    : 2018/12/5 上午11:29
+# @Author  : Wenson
+# @Desc : ==============================================
+# Life is Short I Use Python!!!                      ===
+# If this runs wrong,don't ask me,I don't know why;  ===
+# If this runs right,thank god,and I don't know why. ===
+# Maybe the answer,my friend,is blowing in the wind. ===
+# ======================================================
+# @Project : Keras_start
+# @FileName: __init__.py.py
+# @Software: PyCharm
diff --git a/DL_demo/ch01/hungry.py b/DL_demo/ch01/hungry.py
new file mode 100755
index 0000000..a2cb2a8
--- /dev/null
+++ b/DL_demo/ch01/hungry.py
@@ -0,0 +1 @@
+print("I'm hungry!")
diff --git a/DL_demo/ch01/img_show.py b/DL_demo/ch01/img_show.py
new file mode 100755
index 0000000..4deeb72
--- /dev/null
+++ b/DL_demo/ch01/img_show.py
@@ -0,0 +1,8 @@
+# coding: utf-8
+import matplotlib.pyplot as plt
+from matplotlib.image import imread
+
+img = imread('../dataset/lena.png') #读入图像
+plt.imshow(img)
+
+plt.show()
\ No newline at end of file
diff --git a/DL_demo/ch01/man.py b/DL_demo/ch01/man.py
new file mode 100755
index 0000000..72a7587
--- /dev/null
+++ b/DL_demo/ch01/man.py
@@ -0,0 +1,17 @@
+# coding: utf-8
+class Man:
+    """示例类"""   # 示例类
+
+    def __init__(self, name):
+        self.name = name
+        print("Initilized!")
+
+    def hello(self):
+        print("Hello " + self.name + "!")
+
+    def goodbye(self):
+        print("Good-bye " + self.name + "!")
+
+m = Man("David")
+m.hello()
+m.goodbye()
\ No newline at end of file
diff --git a/DL_demo/ch01/simple_graph.py b/DL_demo/ch01/simple_graph.py
new file mode 100755
index 0000000..4ad11ae
--- /dev/null
+++ b/DL_demo/ch01/simple_graph.py
@@ -0,0 +1,11 @@
+# coding: utf-8
+import numpy as np
+import matplotlib.pyplot as plt
+
+# 生成数据
+x = np.arange(0, 6, 0.1) # 以0.1为单位，生成0到6的数据
+y = np.sin(x)
+
+# 绘制图形
+plt.plot(x, y)
+plt.show()
\ No newline at end of file
diff --git a/DL_demo/ch01/sin_cos_graph.py b/DL_demo/ch01/sin_cos_graph.py
new file mode 100755
index 0000000..4505714
--- /dev/null
+++ b/DL_demo/ch01/sin_cos_graph.py
@@ -0,0 +1,17 @@
+# coding: utf-8
+import numpy as np
+import matplotlib.pyplot as plt
+
+# 生成数据
+x = np.arange(0, 6, 0.1) # 以0.1为单位，生成0到6的数据
+y1 = np.sin(x)
+y2 = np.cos(x)
+
+# 绘制图形
+plt.plot(x, y1, label="sin")
+plt.plot(x, y2, linestyle = "--", label="cos")
+plt.xlabel("x") # x轴的标签
+plt.ylabel("y") # y轴的标签
+plt.title('sin & cos')
+plt.legend()
+plt.show()
\ No newline at end of file
diff --git a/DL_demo/ch01/sin_graph.py b/DL_demo/ch01/sin_graph.py
new file mode 100755
index 0000000..88f5a01
--- /dev/null
+++ b/DL_demo/ch01/sin_graph.py
@@ -0,0 +1,11 @@
+# coding: utf-8
+import numpy as np
+import matplotlib.pyplot as plt
+
+# 生成数据
+x = np.arange(0, 6, 0.1)
+y = np.sin(x)
+
+# 绘制图形
+plt.plot(x, y)
+plt.show()
diff --git a/DL_demo/ch02/and_gate.py b/DL_demo/ch02/and_gate.py
new file mode 100755
index 0000000..afe5504
--- /dev/null
+++ b/DL_demo/ch02/and_gate.py
@@ -0,0 +1,18 @@
+# coding: utf-8
+import numpy as np
+
+
+def AND(x1, x2):
+    x = np.array([x1, x2])
+    w = np.array([0.5, 0.5])
+    b = -0.7
+    tmp = np.sum(w*x) + b
+    if tmp <= 0:
+        return 0
+    else:
+        return 1
+
+if __name__ == '__main__':
+    for xs in [(0, 0), (1, 0), (0, 1), (1, 1)]:
+        y = AND(xs[0], xs[1])
+        print(str(xs) + " -> " + str(y))
diff --git a/DL_demo/ch02/nand_gate.py b/DL_demo/ch02/nand_gate.py
new file mode 100755
index 0000000..7305206
--- /dev/null
+++ b/DL_demo/ch02/nand_gate.py
@@ -0,0 +1,18 @@
+# coding: utf-8
+import numpy as np
+
+
+def NAND(x1, x2):
+    x = np.array([x1, x2])
+    w = np.array([-0.5, -0.5])
+    b = 0.7
+    tmp = np.sum(w*x) + b
+    if tmp <= 0:
+        return 0
+    else:
+        return 1
+
+if __name__ == '__main__':
+    for xs in [(0, 0), (1, 0), (0, 1), (1, 1)]:
+        y = NAND(xs[0], xs[1])
+        print(str(xs) + " -> " + str(y))
diff --git a/DL_demo/ch02/or_gate.py b/DL_demo/ch02/or_gate.py
new file mode 100755
index 0000000..ba43554
--- /dev/null
+++ b/DL_demo/ch02/or_gate.py
@@ -0,0 +1,18 @@
+# coding: utf-8
+import numpy as np
+
+
+def OR(x1, x2):
+    x = np.array([x1, x2])
+    w = np.array([0.5, 0.5])
+    b = -0.2
+    tmp = np.sum(w*x) + b
+    if tmp <= 0:
+        return 0
+    else:
+        return 1
+
+if __name__ == '__main__':
+    for xs in [(0, 0), (1, 0), (0, 1), (1, 1)]:
+        y = OR(xs[0], xs[1])
+        print(str(xs) + " -> " + str(y))
\ No newline at end of file
diff --git a/DL_demo/ch02/xor_gate.py b/DL_demo/ch02/xor_gate.py
new file mode 100755
index 0000000..0a31449
--- /dev/null
+++ b/DL_demo/ch02/xor_gate.py
@@ -0,0 +1,16 @@
+# coding: utf-8
+from and_gate import AND
+from or_gate import OR
+from nand_gate import NAND
+
+
+def XOR(x1, x2):
+    s1 = NAND(x1, x2)
+    s2 = OR(x1, x2)
+    y = AND(s1, s2)
+    return y
+
+if __name__ == '__main__':
+    for xs in [(0, 0), (1, 0), (0, 1), (1, 1)]:
+        y = XOR(xs[0], xs[1])
+        print(str(xs) + " -> " + str(y))
\ No newline at end of file
diff --git a/DL_demo/ch03/mnist_show.py b/DL_demo/ch03/mnist_show.py
new file mode 100755
index 0000000..828fa8f
--- /dev/null
+++ b/DL_demo/ch03/mnist_show.py
@@ -0,0 +1,23 @@
+# coding: utf-8
+import sys, os
+sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
+import numpy as np
+from dataset.mnist import load_mnist
+from PIL import Image
+
+
+def img_show(img):
+    pil_img = Image.fromarray(np.uint8(img))
+    pil_img.show()
+
+(x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, normalize=False)
+
+img = x_train[0]
+label = t_train[0]
+print(label)  # 5
+
+print(img.shape)  # (784,)
+img = img.reshape(28, 28)  # 把图像的形状变为原来的尺寸
+print(img.shape)  # (28, 28)
+
+img_show(img)
diff --git a/DL_demo/ch03/neuralnet_mnist.py b/DL_demo/ch03/neuralnet_mnist.py
new file mode 100755
index 0000000..dee26fa
--- /dev/null
+++ b/DL_demo/ch03/neuralnet_mnist.py
@@ -0,0 +1,44 @@
+# coding: utf-8
+import sys, os
+sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
+import numpy as np
+import pickle
+from dataset.mnist import load_mnist
+from common.functions import sigmoid, softmax
+
+
+def get_data():
+    (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, flatten=True, one_hot_label=False)
+    return x_test, t_test
+
+
+def init_network():
+    with open("sample_weight.pkl", 'rb') as f:
+        network = pickle.load(f)
+    return network
+
+
+def predict(network, x):
+    W1, W2, W3 = network['W1'], network['W2'], network['W3']
+    b1, b2, b3 = network['b1'], network['b2'], network['b3']
+
+    a1 = np.dot(x, W1) + b1
+    z1 = sigmoid(a1)
+    a2 = np.dot(z1, W2) + b2
+    z2 = sigmoid(a2)
+    a3 = np.dot(z2, W3) + b3
+    y = softmax(a3)
+
+    return y
+
+
+x, t = get_data()
+network = init_network()
+accuracy_cnt = 0
+for i in range(len(x)):
+    y = predict(network, x[i])
+    p= np.argmax(y) # 获取概率最高的元素的索引
+    if p == t[i]:
+        accuracy_cnt += 1
+
+print("Accuracy:" + str(float(accuracy_cnt) / len(x)))
\ No newline at end of file
diff --git a/DL_demo/ch03/neuralnet_mnist_batch.py b/DL_demo/ch03/neuralnet_mnist_batch.py
new file mode 100755
index 0000000..3145414
--- /dev/null
+++ b/DL_demo/ch03/neuralnet_mnist_batch.py
@@ -0,0 +1,47 @@
+# coding: utf-8
+import sys, os
+sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
+import numpy as np
+import pickle
+from dataset.mnist import load_mnist
+from common.functions import sigmoid, softmax
+
+
+def get_data():
+    (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, flatten=True, one_hot_label=False)
+    return x_test, t_test
+
+
+def init_network():
+    with open("sample_weight.pkl", 'rb') as f:
+        network = pickle.load(f)
+    return network
+
+
+def predict(network, x):
+    w1, w2, w3 = network['W1'], network['W2'], network['W3']
+    b1, b2, b3 = network['b1'], network['b2'], network['b3']
+
+    a1 = np.dot(x, w1) + b1
+    z1 = sigmoid(a1)
+    a2 = np.dot(z1, w2) + b2
+    z2 = sigmoid(a2)
+    a3 = np.dot(z2, w3) + b3
+    y = softmax(a3)
+
+    return y
+
+
+x, t = get_data()
+network = init_network()
+
+batch_size = 100 # 批数量
+accuracy_cnt = 0
+
+for i in range(0, len(x), batch_size):
+    x_batch = x[i:i+batch_size]
+    y_batch = predict(network, x_batch)
+    p = np.argmax(y_batch, axis=1)
+    accuracy_cnt += np.sum(p == t[i:i+batch_size])
+
+print("Accuracy:" + str(float(accuracy_cnt) / len(x)))
diff --git a/DL_demo/ch03/relu.py b/DL_demo/ch03/relu.py
new file mode 100755
index 0000000..8c8d4cc
--- /dev/null
+++ b/DL_demo/ch03/relu.py
@@ -0,0 +1,13 @@
+# coding: utf-8
+import numpy as np
+import matplotlib.pylab as plt
+
+
+def relu(x):
+    return np.maximum(0, x)
+
+x = np.arange(-5.0, 5.0, 0.1)
+y = relu(x)
+plt.plot(x, y)
+plt.ylim(-1.0, 5.5)
+plt.show()
diff --git a/DL_demo/ch03/sample_weight.pkl b/DL_demo/ch03/sample_weight.pkl
new file mode 100755
index 0000000..0e92475
Binary files /dev/null and b/DL_demo/ch03/sample_weight.pkl differ
diff --git a/DL_demo/ch03/sig_step_compare.py b/DL_demo/ch03/sig_step_compare.py
new file mode 100755
index 0000000..a4df829
--- /dev/null
+++ b/DL_demo/ch03/sig_step_compare.py
@@ -0,0 +1,20 @@
+# coding: utf-8
+import numpy as np
+import matplotlib.pylab as plt
+
+
+def sigmoid(x):
+    return 1 / (1 + np.exp(-x))    
+
+
+def step_function(x):
+    return np.array(x > 0, dtype=np.int)
+
+x = np.arange(-5.0, 5.0, 0.1)
+y1 = sigmoid(x)
+y2 = step_function(x)
+
+plt.plot(x, y1)
+plt.plot(x, y2, 'k--')
+plt.ylim(-0.1, 1.1) #指定图中绘制的y轴的范围
+plt.show()
diff --git a/DL_demo/ch03/sigmoid.py b/DL_demo/ch03/sigmoid.py
new file mode 100755
index 0000000..f863894
--- /dev/null
+++ b/DL_demo/ch03/sigmoid.py
@@ -0,0 +1,13 @@
+# coding: utf-8
+import numpy as np
+import matplotlib.pylab as plt
+
+
+def sigmoid(x):
+    return 1 / (1 + np.exp(-x))    
+
+X = np.arange(-5.0, 5.0, 0.1)
+Y = sigmoid(X)
+plt.plot(X, Y)
+plt.ylim(-0.1, 1.1)
+plt.show()
diff --git a/DL_demo/ch03/step_function.py b/DL_demo/ch03/step_function.py
new file mode 100755
index 0000000..5f42383
--- /dev/null
+++ b/DL_demo/ch03/step_function.py
@@ -0,0 +1,13 @@
+# coding: utf-8
+import numpy as np
+import matplotlib.pylab as plt
+
+
+def step_function(x):
+    return np.array(x > 0, dtype=np.int)
+
+X = np.arange(-5.0, 5.0, 0.1)
+Y = step_function(X)
+plt.plot(X, Y)
+plt.ylim(-0.1, 1.1)  # 指定图中绘制的y轴的范围
+plt.show()
diff --git a/DL_demo/ch04/gradient_1d.py b/DL_demo/ch04/gradient_1d.py
new file mode 100755
index 0000000..378402f
--- /dev/null
+++ b/DL_demo/ch04/gradient_1d.py
@@ -0,0 +1,31 @@
+# coding: utf-8
+import numpy as np
+import matplotlib.pylab as plt
+
+
+def numerical_diff(f, x):
+    h = 1e-4 # 0.0001
+    return (f(x+h) - f(x-h)) / (2*h)
+
+
+def function_1(x):
+    return 0.01*x**2 + 0.1*x 
+
+
+def tangent_line(f, x):
+    d = numerical_diff(f, x)
+    print(d)
+    y = f(x) - d*x
+    return lambda t: d*t + y
+     
+x = np.arange(0.0, 20.0, 0.1)
+y = function_1(x)
+plt.xlabel("x")
+plt.ylabel("f(x)")
+
+tf = tangent_line(function_1, 5)
+y2 = tf(x)
+
+plt.plot(x, y)
+plt.plot(x, y2)
+plt.show()
diff --git a/DL_demo/ch04/gradient_2d.py b/DL_demo/ch04/gradient_2d.py
new file mode 100755
index 0000000..80cc1c8
--- /dev/null
+++ b/DL_demo/ch04/gradient_2d.py
@@ -0,0 +1,71 @@
+# coding: utf-8
+# cf.http://d.hatena.ne.jp/white_wheels/20100327/p3
+import numpy as np
+import matplotlib.pylab as plt
+from mpl_toolkits.mplot3d import Axes3D
+
+
+def _numerical_gradient_no_batch(f, x):
+    h = 1e-4 # 0.0001
+    grad = np.zeros_like(x)
+    
+    for idx in range(x.size):
+        tmp_val = x[idx]
+        x[idx] = float(tmp_val) + h
+        fxh1 = f(x) # f(x+h)
+        
+        x[idx] = tmp_val - h 
+        fxh2 = f(x) # f(x-h)
+        grad[idx] = (fxh1 - fxh2) / (2*h)
+        
+        x[idx] = tmp_val # 还原值
+        
+    return grad
+
+
+def numerical_gradient(f, X):
+    if X.ndim == 1:
+        return _numerical_gradient_no_batch(f, X)
+    else:
+        grad = np.zeros_like(X)
+        
+        for idx, x in enumerate(X):
+            grad[idx] = _numerical_gradient_no_batch(f, x)
+        
+        return grad
+
+
+def function_2(x):
+    if x.ndim == 1:
+        return np.sum(x**2)
+    else:
+        return np.sum(x**2, axis=1)
+
+
+def tangent_line(f, x):
+    d = numerical_gradient(f, x)
+    print(d)
+    y = f(x) - d*x
+    return lambda t: d*t + y
+     
+if __name__ == '__main__':
+    x0 = np.arange(-2, 2.5, 0.25)
+    x1 = np.arange(-2, 2.5, 0.25)
+    X, Y = np.meshgrid(x0, x1)
+    
+    X = X.flatten()
+    Y = Y.flatten()
+    
+    grad = numerical_gradient(function_2, np.array([X, Y]) )
+    print(grad)
+    
+    plt.figure()
+    plt.quiver(X, Y, -grad[0], -grad[1],  angles="xy",color="#666666")#,headwidth=10,scale=40,color="#444444")
+    plt.xlim([-2, 2])
+    plt.ylim([-2, 2])
+    plt.xlabel('x0')
+    plt.ylabel('x1')
+    plt.grid()
+    plt.legend()
+    plt.draw()
+    plt.show()
\ No newline at end of file
diff --git a/DL_demo/ch04/gradient_method.py b/DL_demo/ch04/gradient_method.py
new file mode 100755
index 0000000..a7fb030
--- /dev/null
+++ b/DL_demo/ch04/gradient_method.py
@@ -0,0 +1,37 @@
+# coding: utf-8
+import numpy as np
+import matplotlib.pylab as plt
+from gradient_2d import numerical_gradient
+
+
+def gradient_descent(f, init_x, lr=0.01, step_num=100):
+    x = init_x
+    x_history = []
+
+    for i in range(step_num):
+        x_history.append( x.copy() )
+
+        grad = numerical_gradient(f, x)
+        x -= lr * grad
+
+    return x, np.array(x_history)
+
+
+def function_2(x):
+    return x[0]**2 + x[1]**2
+
+init_x = np.array([-3.0, 4.0])    
+
+lr = 0.1
+step_num = 20
+x, x_history = gradient_descent(function_2, init_x, lr=lr, step_num=step_num)
+
+plt.plot( [-5, 5], [0,0], '--b')
+plt.plot( [0,0], [-5, 5], '--b')
+plt.plot(x_history[:,0], x_history[:,1], 'o')
+
+plt.xlim(-3.5, 3.5)
+plt.ylim(-4.5, 4.5)
+plt.xlabel("X0")
+plt.ylabel("X1")
+plt.show()
diff --git a/DL_demo/ch04/gradient_simplenet.py b/DL_demo/ch04/gradient_simplenet.py
new file mode 100755
index 0000000..35f6dd5
--- /dev/null
+++ b/DL_demo/ch04/gradient_simplenet.py
@@ -0,0 +1,31 @@
+# coding: utf-8
+import sys, os
+sys.path.append(os.pardir)  # 为了导入父目录中的文件而进行的设定
+import numpy as np
+from common.functions import softmax, cross_entropy_error
+from common.gradient import numerical_gradient
+
+
+class simpleNet:
+    def __init__(self):
+        self.W = np.random.randn(2,3)
+
+    def predict(self, x):
+        return np.dot(x, self.W)
+
+    def loss(self, x, t):
+        z = self.predict(x)
+        y = softmax(z)
+        loss = cross_entropy_error(y, t)
+
+        return loss
+
+x = np.array([0.6, 0.9])
+t = np.array([0, 0, 1])
+
+net = simpleNet()
+
+f = lambda w: net.loss(x, t)
+dW = numerical_gradient(f, net.W)
+
+print(dW)
diff --git a/DL_demo/ch04/train_neuralnet.py b/DL_demo/ch04/train_neuralnet.py
new file mode 100755
index 0000000..838d21a
--- /dev/null
+++ b/DL_demo/ch04/train_neuralnet.py
@@ -0,0 +1,57 @@
+# coding: utf-8
+import sys, os
+sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
+import numpy as np
+import matplotlib.pyplot as plt
+from dataset.mnist import load_mnist
+from two_layer_net import TwoLayerNet
+
+# 读入数据
+(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
+
+network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
+
+iters_num = 10000  # 适当设定循环的次数
+train_size = x_train.shape[0]
+batch_size = 100
+learning_rate = 0.1
+
+train_loss_list = []
+train_acc_list = []
+test_acc_list = []
+
+iter_per_epoch = max(train_size / batch_size, 1)
+
+for i in range(iters_num):
+    batch_mask = np.random.choice(train_size, batch_size)
+    x_batch = x_train[batch_mask]
+    t_batch = t_train[batch_mask]
+    
+    # 计算梯度
+    #grad = network.numerical_gradient(x_batch, t_batch)
+    grad = network.gradient(x_batch, t_batch)
+    
+    # 更新参数
+    for key in ('W1', 'b1', 'W2', 'b2'):
+        network.params[key] -= learning_rate * grad[key]
+    
+    loss = network.loss(x_batch, t_batch)
+    train_loss_list.append(loss)
+    
+    if i % iter_per_epoch == 0:
+        train_acc = network.accuracy(x_train, t_train)
+        test_acc = network.accuracy(x_test, t_test)
+        train_acc_list.append(train_acc)
+        test_acc_list.append(test_acc)
+        print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))
+
+# 绘制图形
+markers = {'train': 'o', 'test': 's'}
+x = np.arange(len(train_acc_list))
+plt.plot(x, train_acc_list, label='train acc')
+plt.plot(x, test_acc_list, label='test acc', linestyle='--')
+plt.xlabel("epochs")
+plt.ylabel("accuracy")
+plt.ylim(0, 1.0)
+plt.legend(loc='lower right')
+plt.show()
\ No newline at end of file
diff --git a/DL_demo/ch04/two_layer_net.py b/DL_demo/ch04/two_layer_net.py
new file mode 100755
index 0000000..c1b243c
--- /dev/null
+++ b/DL_demo/ch04/two_layer_net.py
@@ -0,0 +1,78 @@
+# coding: utf-8
+import sys, os
+sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
+from common.functions import *
+from common.gradient import numerical_gradient
+
+
+class TwoLayerNet:
+
+    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
+        # 初始化权重
+        self.params = {}
+        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
+        self.params['b1'] = np.zeros(hidden_size)
+        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
+        self.params['b2'] = np.zeros(output_size)
+
+    def predict(self, x):
+        W1, W2 = self.params['W1'], self.params['W2']
+        b1, b2 = self.params['b1'], self.params['b2']
+    
+        a1 = np.dot(x, W1) + b1
+        z1 = sigmoid(a1)
+        a2 = np.dot(z1, W2) + b2
+        y = softmax(a2)
+        
+        return y
+        
+    # x:输入数据, t:监督数据
+    def loss(self, x, t):
+        y = self.predict(x)
+        
+        return cross_entropy_error(y, t)
+    
+    def accuracy(self, x, t):
+        y = self.predict(x)
+        y = np.argmax(y, axis=1)
+        t = np.argmax(t, axis=1)
+        
+        accuracy = np.sum(y == t) / float(x.shape[0])
+        return accuracy
+        
+    # x:输入数据, t:监督数据
+    def numerical_gradient(self, x, t):
+        loss_W = lambda W: self.loss(x, t)
+        
+        grads = {}
+        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
+        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
+        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
+        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
+        
+        return grads
+        
+    def gradient(self, x, t):
+        W1, W2 = self.params['W1'], self.params['W2']
+        b1, b2 = self.params['b1'], self.params['b2']
+        grads = {}
+        
+        batch_num = x.shape[0]
+        
+        # forward
+        a1 = np.dot(x, W1) + b1
+        z1 = sigmoid(a1)
+        a2 = np.dot(z1, W2) + b2
+        y = softmax(a2)
+        
+        # backward
+        dy = (y - t) / batch_num
+        grads['W2'] = np.dot(z1.T, dy)
+        grads['b2'] = np.sum(dy, axis=0)
+        
+        da1 = np.dot(dy, W2.T)
+        dz1 = sigmoid_grad(a1) * da1
+        grads['W1'] = np.dot(x.T, dz1)
+        grads['b1'] = np.sum(dz1, axis=0)
+
+        return grads
\ No newline at end of file
diff --git a/DL_demo/ch05/buy_apple.py b/DL_demo/ch05/buy_apple.py
new file mode 100755
index 0000000..8334755
--- /dev/null
+++ b/DL_demo/ch05/buy_apple.py
@@ -0,0 +1,24 @@
+# coding: utf-8
+from layer_naive import *
+
+
+apple = 100
+apple_num = 2
+tax = 1.1
+
+mul_apple_layer = MulLayer()
+mul_tax_layer = MulLayer()
+
+# forward
+apple_price = mul_apple_layer.forward(apple, apple_num)
+price = mul_tax_layer.forward(apple_price, tax)
+
+# backward
+dprice = 1
+dapple_price, dtax = mul_tax_layer.backward(dprice)
+dapple, dapple_num = mul_apple_layer.backward(dapple_price)
+
+print("price:", int(price))
+print("dApple:", dapple)
+print("dApple_num:", int(dapple_num))
+print("dTax:", dtax)
diff --git a/DL_demo/ch05/buy_apple_orange.py b/DL_demo/ch05/buy_apple_orange.py
new file mode 100755
index 0000000..9bdc6af
--- /dev/null
+++ b/DL_demo/ch05/buy_apple_orange.py
@@ -0,0 +1,34 @@
+# coding: utf-8
+from layer_naive import *
+
+apple = 100
+apple_num = 2
+orange = 150
+orange_num = 3
+tax = 1.1
+
+# layer
+mul_apple_layer = MulLayer()
+mul_orange_layer = MulLayer()
+add_apple_orange_layer = AddLayer()
+mul_tax_layer = MulLayer()
+
+# forward
+apple_price = mul_apple_layer.forward(apple, apple_num)  # (1)
+orange_price = mul_orange_layer.forward(orange, orange_num)  # (2)
+all_price = add_apple_orange_layer.forward(apple_price, orange_price)  # (3)
+price = mul_tax_layer.forward(all_price, tax)  # (4)
+
+# backward
+dprice = 1
+dall_price, dtax = mul_tax_layer.backward(dprice)  # (4)
+dapple_price, dorange_price = add_apple_orange_layer.backward(dall_price)  # (3)
+dorange, dorange_num = mul_orange_layer.backward(dorange_price)  # (2)
+dapple, dapple_num = mul_apple_layer.backward(dapple_price)  # (1)
+
+print("price:", int(price))
+print("dApple:", dapple)
+print("dApple_num:", int(dapple_num))
+print("dOrange:", dorange)
+print("dOrange_num:", int(dorange_num))
+print("dTax:", dtax)
diff --git a/DL_demo/ch05/gradient_check.py b/DL_demo/ch05/gradient_check.py
new file mode 100755
index 0000000..1471800
--- /dev/null
+++ b/DL_demo/ch05/gradient_check.py
@@ -0,0 +1,21 @@
+# coding: utf-8
+import sys, os
+sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
+import numpy as np
+from dataset.mnist import load_mnist
+from two_layer_net import TwoLayerNet
+
+# 读入数据
+(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
+
+network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
+
+x_batch = x_train[:3]
+t_batch = t_train[:3]
+
+grad_numerical = network.numerical_gradient(x_batch, t_batch)
+grad_backprop = network.gradient(x_batch, t_batch)
+
+for key in grad_numerical.keys():
+    diff = np.average( np.abs(grad_backprop[key] - grad_numerical[key]) )
+    print(key + ":" + str(diff))
\ No newline at end of file
diff --git a/DL_demo/ch05/layer_naive.py b/DL_demo/ch05/layer_naive.py
new file mode 100755
index 0000000..922336a
--- /dev/null
+++ b/DL_demo/ch05/layer_naive.py
@@ -0,0 +1,36 @@
+# coding: utf-8
+
+
+class MulLayer:
+    def __init__(self):
+        self.x = None
+        self.y = None
+
+    def forward(self, x, y):
+        self.x = x
+        self.y = y                
+        out = x * y
+
+        return out
+
+    def backward(self, dout):
+        dx = dout * self.y
+        dy = dout * self.x
+
+        return dx, dy
+
+
+class AddLayer:
+    def __init__(self):
+        pass
+
+    def forward(self, x, y):
+        out = x + y
+
+        return out
+
+    def backward(self, dout):
+        dx = dout * 1
+        dy = dout * 1
+
+        return dx, dy
diff --git a/DL_demo/ch05/train_neuralnet.py b/DL_demo/ch05/train_neuralnet.py
new file mode 100755
index 0000000..3693057
--- /dev/null
+++ b/DL_demo/ch05/train_neuralnet.py
@@ -0,0 +1,46 @@
+# coding: utf-8
+import sys, os
+sys.path.append(os.pardir)
+
+import numpy as np
+from dataset.mnist import load_mnist
+from two_layer_net import TwoLayerNet
+
+# 读入数据
+(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
+
+network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
+
+iters_num = 10000
+train_size = x_train.shape[0]
+batch_size = 100
+learning_rate = 0.1
+
+train_loss_list = []
+train_acc_list = []
+test_acc_list = []
+
+iter_per_epoch = max(train_size / batch_size, 1)
+
+for i in range(iters_num):
+    batch_mask = np.random.choice(train_size, batch_size)
+    x_batch = x_train[batch_mask]
+    t_batch = t_train[batch_mask]
+    
+    # 梯度
+    #grad = network.numerical_gradient(x_batch, t_batch)
+    grad = network.gradient(x_batch, t_batch)
+    
+    # 更新
+    for key in ('W1', 'b1', 'W2', 'b2'):
+        network.params[key] -= learning_rate * grad[key]
+    
+    loss = network.loss(x_batch, t_batch)
+    train_loss_list.append(loss)
+    
+    if i % iter_per_epoch == 0:
+        train_acc = network.accuracy(x_train, t_train)
+        test_acc = network.accuracy(x_test, t_test)
+        train_acc_list.append(train_acc)
+        test_acc_list.append(test_acc)
+        print(train_acc, test_acc)
diff --git a/DL_demo/ch05/two_layer_net.py b/DL_demo/ch05/two_layer_net.py
new file mode 100755
index 0000000..589ccf4
--- /dev/null
+++ b/DL_demo/ch05/two_layer_net.py
@@ -0,0 +1,77 @@
+# coding: utf-8
+import sys, os
+sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
+import numpy as np
+from common.layers import *
+from common.gradient import numerical_gradient
+from collections import OrderedDict
+
+
+class TwoLayerNet:
+
+    def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01):
+        # 初始化权重
+        self.params = {}
+        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
+        self.params['b1'] = np.zeros(hidden_size)
+        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size) 
+        self.params['b2'] = np.zeros(output_size)
+
+        # 生成层
+        self.layers = OrderedDict()
+        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
+        self.layers['Relu1'] = Relu()
+        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
+
+        self.lastLayer = SoftmaxWithLoss()
+        
+    def predict(self, x):
+        for layer in self.layers.values():
+            x = layer.forward(x)
+        
+        return x
+        
+    # x:输入数据, t:监督数据
+    def loss(self, x, t):
+        y = self.predict(x)
+        return self.lastLayer.forward(y, t)
+    
+    def accuracy(self, x, t):
+        y = self.predict(x)
+        y = np.argmax(y, axis=1)
+        if t.ndim != 1 : t = np.argmax(t, axis=1)
+        
+        accuracy = np.sum(y == t) / float(x.shape[0])
+        return accuracy
+        
+    # x:输入数据, t:监督数据
+    def numerical_gradient(self, x, t):
+        loss_W = lambda W: self.loss(x, t)
+        
+        grads = {}
+        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
+        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
+        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
+        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
+        
+        return grads
+        
+    def gradient(self, x, t):
+        # forward
+        self.loss(x, t)
+
+        # backward
+        dout = 1
+        dout = self.lastLayer.backward(dout)
+        
+        layers = list(self.layers.values())
+        layers.reverse()
+        for layer in layers:
+            dout = layer.backward(dout)
+
+        # 设定
+        grads = {}
+        grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
+        grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers['Affine2'].db
+
+        return grads
diff --git a/DL_demo/ch06/batch_norm_gradient_check.py b/DL_demo/ch06/batch_norm_gradient_check.py
new file mode 100755
index 0000000..33c5a86
--- /dev/null
+++ b/DL_demo/ch06/batch_norm_gradient_check.py
@@ -0,0 +1,23 @@
+# coding: utf-8
+import sys, os
+sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
+import numpy as np
+from dataset.mnist import load_mnist
+from common.multi_layer_net_extend import MultiLayerNetExtend
+
+# 读入数据
+(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
+
+network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100], output_size=10,
+                              use_batchnorm=True)
+
+x_batch = x_train[:1]
+t_batch = t_train[:1]
+
+grad_backprop = network.gradient(x_batch, t_batch)
+grad_numerical = network.numerical_gradient(x_batch, t_batch)
+
+
+for key in grad_numerical.keys():
+    diff = np.average( np.abs(grad_backprop[key] - grad_numerical[key]) )
+    print(key + ":" + str(diff))
\ No newline at end of file
diff --git a/DL_demo/ch06/batch_norm_test.py b/DL_demo/ch06/batch_norm_test.py
new file mode 100755
index 0000000..d08d446
--- /dev/null
+++ b/DL_demo/ch06/batch_norm_test.py
@@ -0,0 +1,87 @@
+# coding: utf-8
+import sys, os
+sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
+import numpy as np
+import matplotlib.pyplot as plt
+from dataset.mnist import load_mnist
+from common.multi_layer_net_extend import MultiLayerNetExtend
+from common.optimizer import SGD, Adam
+
+(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)
+
+# 减少学习数据
+x_train = x_train[:1000]
+t_train = t_train[:1000]
+
+max_epochs = 20
+train_size = x_train.shape[0]
+batch_size = 100
+learning_rate = 0.01
+
+
+def __train(weight_init_std):
+    bn_network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, 
+                                    weight_init_std=weight_init_std, use_batchnorm=True)
+    network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10,
+                                weight_init_std=weight_init_std)
+    optimizer = SGD(lr=learning_rate)
+    
+    train_acc_list = []
+    bn_train_acc_list = []
+    
+    iter_per_epoch = max(train_size / batch_size, 1)
+    epoch_cnt = 0
+    
+    for i in range(1000000000):
+        batch_mask = np.random.choice(train_size, batch_size)
+        x_batch = x_train[batch_mask]
+        t_batch = t_train[batch_mask]
+    
+        for _network in (bn_network, network):
+            grads = _network.gradient(x_batch, t_batch)
+            optimizer.update(_network.params, grads)
+    
+        if i % iter_per_epoch == 0:
+            train_acc = network.accuracy(x_train, t_train)
+            bn_train_acc = bn_network.accuracy(x_train, t_train)
+            train_acc_list.append(train_acc)
+            bn_train_acc_list.append(bn_train_acc)
+    
+            print("epoch:" + str(epoch_cnt) + " | " + str(train_acc) + " - " + str(bn_train_acc))
+    
+            epoch_cnt += 1
+            if epoch_cnt >= max_epochs:
+                break
+                
+    return train_acc_list, bn_train_acc_list
+
+
+# 3.绘制图形==========
+weight_scale_list = np.logspace(0, -4, num=16)
+x = np.arange(max_epochs)
+
+for i, w in enumerate(weight_scale_list):
+    print( "============== " + str(i+1) + "/16" + " ==============")
+    train_acc_list, bn_train_acc_list = __train(w)
+    
+    plt.subplot(4,4,i+1)
+    plt.title("W:" + str(w))
+    if i == 15:
+        plt.plot(x, bn_train_acc_list, label='Batch Normalization', markevery=2)
+        plt.plot(x, train_acc_list, linestyle = "--", label='Normal(without BatchNorm)', markevery=2)
+    else:
+        plt.plot(x, bn_train_acc_list, markevery=2)
+        plt.plot(x, train_acc_list, linestyle="--", markevery=2)
+
+    plt.ylim(0, 1.0)
+    if i % 4:
+        plt.yticks([])
+    else:
+        plt.ylabel("accuracy")
+    if i < 12:
+        plt.xticks([])
+    else:
+        plt.xlabel("epochs")
+    plt.legend(loc='lower right')
+    
+plt.show()
\ No newline at end of file
diff --git a/DL_demo/ch06/hyperparameter_optimization.py b/DL_demo/ch06/hyperparameter_optimization.py
new file mode 100755
index 0000000..4347ea5
--- /dev/null
+++ b/DL_demo/ch06/hyperparameter_optimization.py
@@ -0,0 +1,77 @@
+# coding: utf-8
+import sys, os
+sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
+import numpy as np
+import matplotlib.pyplot as plt
+from dataset.mnist import load_mnist
+from common.multi_layer_net import MultiLayerNet
+from common.util import shuffle_dataset
+from common.trainer import Trainer
+
+(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)
+
+# 为了实现高速化，减少训练数据
+x_train = x_train[:500]
+t_train = t_train[:500]
+
+# 分割验证数据
+validation_rate = 0.20
+validation_num = x_train.shape[0] * validation_rate
+x_train, t_train = shuffle_dataset(x_train, t_train)
+x_val = x_train[:validation_num]
+t_val = t_train[:validation_num]
+x_train = x_train[validation_num:]
+t_train = t_train[validation_num:]
+
+
+def __train(lr, weight_decay, epocs=50):
+    network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100],
+                            output_size=10, weight_decay_lambda=weight_decay)
+    trainer = Trainer(network, x_train, t_train, x_val, t_val,
+                      epochs=epocs, mini_batch_size=100,
+                      optimizer='sgd', optimizer_param={'lr': lr}, verbose=False)
+    trainer.train()
+
+    return trainer.test_acc_list, trainer.train_acc_list
+
+
+# 超参数的随机搜索======================================
+optimization_trial = 100
+results_val = {}
+results_train = {}
+for _ in range(optimization_trial):
+    # 指定搜索的超参数的范围===============
+    weight_decay = 10 ** np.random.uniform(-8, -4)
+    lr = 10 ** np.random.uniform(-6, -2)
+    # ================================================
+
+    val_acc_list, train_acc_list = __train(lr, weight_decay)
+    print("val acc:" + str(val_acc_list[-1]) + " | lr:" + str(lr) + ", weight decay:" + str(weight_decay))
+    key = "lr:" + str(lr) + ", weight decay:" + str(weight_decay)
+    results_val[key] = val_acc_list
+    results_train[key] = train_acc_list
+
+# 绘制图形========================================================
+print("=========== Hyper-Parameter Optimization Result ===========")
+graph_draw_num = 20
+col_num = 5
+row_num = int(np.ceil(graph_draw_num / col_num))
+i = 0
+
+for key, val_acc_list in sorted(results_val.items(), key=lambda x:x[1][-1], reverse=True):
+    print("Best-" + str(i+1) + "(val acc:" + str(val_acc_list[-1]) + ") | " + key)
+
+    plt.subplot(row_num, col_num, i+1)
+    plt.title("Best-" + str(i+1))
+    plt.ylim(0.0, 1.0)
+    if i % 5: plt.yticks([])
+    plt.xticks([])
+    x = np.arange(len(val_acc_list))
+    plt.plot(x, val_acc_list)
+    plt.plot(x, results_train[key], "--")
+    i += 1
+
+    if i >= graph_draw_num:
+        break
+
+plt.show()
diff --git a/DL_demo/ch06/optimizer_compare_mnist.py b/DL_demo/ch06/optimizer_compare_mnist.py
new file mode 100755
index 0000000..58f5f7e
--- /dev/null
+++ b/DL_demo/ch06/optimizer_compare_mnist.py
@@ -0,0 +1,66 @@
+# coding: utf-8
+import os
+import sys
+sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
+import matplotlib.pyplot as plt
+from dataset.mnist import load_mnist
+from common.util import smooth_curve
+from common.multi_layer_net import MultiLayerNet
+from common.optimizer import *
+
+
+# 0:读入MNIST数据==========
+(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)
+
+train_size = x_train.shape[0]
+batch_size = 128
+max_iterations = 2000
+
+
+# 1:进行实验的设置==========
+optimizers = {}
+optimizers['SGD'] = SGD()
+optimizers['Momentum'] = Momentum()
+optimizers['AdaGrad'] = AdaGrad()
+optimizers['Adam'] = Adam()
+#optimizers['RMSprop'] = RMSprop()
+
+networks = {}
+train_loss = {}
+for key in optimizers.keys():
+    networks[key] = MultiLayerNet(
+        input_size=784, hidden_size_list=[100, 100, 100, 100],
+        output_size=10)
+    train_loss[key] = []    
+
+
+# 2:开始训练==========
+for i in range(max_iterations):
+    batch_mask = np.random.choice(train_size, batch_size)
+    x_batch = x_train[batch_mask]
+    t_batch = t_train[batch_mask]
+    
+    for key in optimizers.keys():
+        grads = networks[key].gradient(x_batch, t_batch)
+        optimizers[key].update(networks[key].params, grads)
+    
+        loss = networks[key].loss(x_batch, t_batch)
+        train_loss[key].append(loss)
+    
+    if i % 100 == 0:
+        print( "===========" + "iteration:" + str(i) + "===========")
+        for key in optimizers.keys():
+            loss = networks[key].loss(x_batch, t_batch)
+            print(key + ":" + str(loss))
+
+
+# 3.绘制图形==========
+markers = {"SGD": "o", "Momentum": "x", "AdaGrad": "s", "Adam": "D"}
+x = np.arange(max_iterations)
+for key in optimizers.keys():
+    plt.plot(x, smooth_curve(train_loss[key]), marker=markers[key], markevery=100, label=key)
+plt.xlabel("iterations")
+plt.ylabel("loss")
+plt.ylim(0, 1)
+plt.legend()
+plt.show()
diff --git a/DL_demo/ch06/optimizer_compare_naive.py b/DL_demo/ch06/optimizer_compare_naive.py
new file mode 100755
index 0000000..abe6d36
--- /dev/null
+++ b/DL_demo/ch06/optimizer_compare_naive.py
@@ -0,0 +1,70 @@
+# coding: utf-8
+import sys, os
+sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
+import numpy as np
+import matplotlib.pyplot as plt
+from collections import OrderedDict
+from common.optimizer import *
+
+
+def f(x, y):
+    return x**2 / 20.0 + y**2
+
+
+def df(x, y):
+    return x / 10.0, 2.0*y
+
+init_pos = (-7.0, 2.0)
+params = {}
+params['x'], params['y'] = init_pos[0], init_pos[1]
+grads = {}
+grads['x'], grads['y'] = 0, 0
+
+
+optimizers = OrderedDict()
+optimizers["SGD"] = SGD(lr=0.95)
+optimizers["Momentum"] = Momentum(lr=0.1)
+optimizers["AdaGrad"] = AdaGrad(lr=1.5)
+optimizers["Adam"] = Adam(lr=0.3)
+
+idx = 1
+
+for key in optimizers:
+    optimizer = optimizers[key]
+    x_history = []
+    y_history = []
+    params['x'], params['y'] = init_pos[0], init_pos[1]
+    
+    for i in range(30):
+        x_history.append(params['x'])
+        y_history.append(params['y'])
+        
+        grads['x'], grads['y'] = df(params['x'], params['y'])
+        optimizer.update(params, grads)
+    
+
+    x = np.arange(-10, 10, 0.01)
+    y = np.arange(-5, 5, 0.01)
+    
+    X, Y = np.meshgrid(x, y) 
+    Z = f(X, Y)
+    
+    # for simple contour line  
+    mask = Z > 7
+    Z[mask] = 0
+    
+    # plot 
+    plt.subplot(2, 2, idx)
+    idx += 1
+    plt.plot(x_history, y_history, 'o-', color="red")
+    plt.contour(X, Y, Z)
+    plt.ylim(-10, 10)
+    plt.xlim(-10, 10)
+    plt.plot(0, 0, '+')
+    #colorbar()
+    #spring()
+    plt.title(key)
+    plt.xlabel("x")
+    plt.ylabel("y")
+    
+plt.show()
\ No newline at end of file
diff --git a/DL_demo/ch06/overfit_dropout.py b/DL_demo/ch06/overfit_dropout.py
new file mode 100755
index 0000000..c09bb2d
--- /dev/null
+++ b/DL_demo/ch06/overfit_dropout.py
@@ -0,0 +1,40 @@
+# coding: utf-8
+import os
+import sys
+sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
+import numpy as np
+import matplotlib.pyplot as plt
+from dataset.mnist import load_mnist
+from common.multi_layer_net_extend import MultiLayerNetExtend
+from common.trainer import Trainer
+
+(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)
+
+# 为了再现过拟合，减少学习数据
+x_train = x_train[:300]
+t_train = t_train[:300]
+
+# 设定是否使用Dropuout，以及比例 ========================
+use_dropout = True  # 不使用Dropout的情况下为False
+dropout_ratio = 0.2
+# ====================================================
+
+network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100],
+                              output_size=10, use_dropout=use_dropout, dropout_ration=dropout_ratio)
+trainer = Trainer(network, x_train, t_train, x_test, t_test,
+                  epochs=301, mini_batch_size=100,
+                  optimizer='sgd', optimizer_param={'lr': 0.01}, verbose=True)
+trainer.train()
+
+train_acc_list, test_acc_list = trainer.train_acc_list, trainer.test_acc_list
+
+# 绘制图形==========
+markers = {'train': 'o', 'test': 's'}
+x = np.arange(len(train_acc_list))
+plt.plot(x, train_acc_list, marker='o', label='train', markevery=10)
+plt.plot(x, test_acc_list, marker='s', label='test', markevery=10)
+plt.xlabel("epochs")
+plt.ylabel("accuracy")
+plt.ylim(0, 1.0)
+plt.legend(loc='lower right')
+plt.show()
\ No newline at end of file
diff --git a/DL_demo/ch06/overfit_weight_decay.py b/DL_demo/ch06/overfit_weight_decay.py
new file mode 100755
index 0000000..92c5a7b
--- /dev/null
+++ b/DL_demo/ch06/overfit_weight_decay.py
@@ -0,0 +1,68 @@
+# coding: utf-8
+import os
+import sys
+
+sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
+import numpy as np
+import matplotlib.pyplot as plt
+from dataset.mnist import load_mnist
+from common.multi_layer_net import MultiLayerNet
+from common.optimizer import SGD
+
+(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)
+
+# 为了再现过拟合，减少学习数据
+x_train = x_train[:300]
+t_train = t_train[:300]
+
+# weight decay（权值衰减）的设定 =======================
+#weight_decay_lambda = 0 # 不使用权值衰减的情况
+weight_decay_lambda = 0.1
+# ====================================================
+
+network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10,
+                        weight_decay_lambda=weight_decay_lambda)
+optimizer = SGD(lr=0.01)
+
+max_epochs = 201
+train_size = x_train.shape[0]
+batch_size = 100
+
+train_loss_list = []
+train_acc_list = []
+test_acc_list = []
+
+iter_per_epoch = max(train_size / batch_size, 1)
+epoch_cnt = 0
+
+for i in range(1000000000):
+    batch_mask = np.random.choice(train_size, batch_size)
+    x_batch = x_train[batch_mask]
+    t_batch = t_train[batch_mask]
+
+    grads = network.gradient(x_batch, t_batch)
+    optimizer.update(network.params, grads)
+
+    if i % iter_per_epoch == 0:
+        train_acc = network.accuracy(x_train, t_train)
+        test_acc = network.accuracy(x_test, t_test)
+        train_acc_list.append(train_acc)
+        test_acc_list.append(test_acc)
+
+        print("epoch:" + str(epoch_cnt) + ", train acc:" + str(train_acc) + ", test acc:" + str(test_acc))
+
+        epoch_cnt += 1
+        if epoch_cnt >= max_epochs:
+            break
+
+
+# 3.绘制图形==========
+markers = {'train': 'o', 'test': 's'}
+x = np.arange(max_epochs)
+plt.plot(x, train_acc_list, marker='o', label='train', markevery=10)
+plt.plot(x, test_acc_list, marker='s', label='test', markevery=10)
+plt.xlabel("epochs")
+plt.ylabel("accuracy")
+plt.ylim(0, 1.0)
+plt.legend(loc='lower right')
+plt.show()
\ No newline at end of file
diff --git a/DL_demo/ch06/weight_init_activation_histogram.py b/DL_demo/ch06/weight_init_activation_histogram.py
new file mode 100755
index 0000000..933cf83
--- /dev/null
+++ b/DL_demo/ch06/weight_init_activation_histogram.py
@@ -0,0 +1,53 @@
+# coding: utf-8
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+def sigmoid(x):
+    return 1 / (1 + np.exp(-x))
+
+
+def ReLU(x):
+    return np.maximum(0, x)
+
+
+def tanh(x):
+    return np.tanh(x)
+    
+input_data = np.random.randn(1000, 100)  # 1000个数据
+node_num = 100  # 各隐藏层的节点（神经元）数
+hidden_layer_size = 5  # 隐藏层有5层
+activations = {}  # 激活值的结果保存在这里
+
+x = input_data
+
+for i in range(hidden_layer_size):
+    if i != 0:
+        x = activations[i-1]
+
+    # 改变初始值进行实验！
+    w = np.random.randn(node_num, node_num) * 1
+    # w = np.random.randn(node_num, node_num) * 0.01
+    # w = np.random.randn(node_num, node_num) * np.sqrt(1.0 / node_num)
+    # w = np.random.randn(node_num, node_num) * np.sqrt(2.0 / node_num)
+
+
+    a = np.dot(x, w)
+
+
+    # 将激活函数的种类也改变，来进行实验！
+    z = sigmoid(a)
+    # z = ReLU(a)
+    # z = tanh(a)
+
+    activations[i] = z
+
+# 绘制直方图
+for i, a in activations.items():
+    plt.subplot(1, len(activations), i+1)
+    plt.title(str(i+1) + "-layer")
+    if i != 0: plt.yticks([], [])
+    # plt.xlim(0.1, 1)
+    # plt.ylim(0, 7000)
+    plt.hist(a.flatten(), 30, range=(0,1))
+plt.show()
diff --git a/DL_demo/ch06/weight_init_compare.py b/DL_demo/ch06/weight_init_compare.py
new file mode 100755
index 0000000..cf15609
--- /dev/null
+++ b/DL_demo/ch06/weight_init_compare.py
@@ -0,0 +1,63 @@
+# coding: utf-8
+import os
+import sys
+
+sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
+import numpy as np
+import matplotlib.pyplot as plt
+from dataset.mnist import load_mnist
+from common.util import smooth_curve
+from common.multi_layer_net import MultiLayerNet
+from common.optimizer import SGD
+
+
+# 0:读入MNIST数据==========
+(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)
+
+train_size = x_train.shape[0]
+batch_size = 128
+max_iterations = 2000
+
+
+# 1:进行实验的设置==========
+weight_init_types = {'std=0.01': 0.01, 'Xavier': 'sigmoid', 'He': 'relu'}
+optimizer = SGD(lr=0.01)
+
+networks = {}
+train_loss = {}
+for key, weight_type in weight_init_types.items():
+    networks[key] = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100],
+                                  output_size=10, weight_init_std=weight_type)
+    train_loss[key] = []
+
+
+# 2:开始训练==========
+for i in range(max_iterations):
+    batch_mask = np.random.choice(train_size, batch_size)
+    x_batch = x_train[batch_mask]
+    t_batch = t_train[batch_mask]
+    
+    for key in weight_init_types.keys():
+        grads = networks[key].gradient(x_batch, t_batch)
+        optimizer.update(networks[key].params, grads)
+    
+        loss = networks[key].loss(x_batch, t_batch)
+        train_loss[key].append(loss)
+    
+    if i % 100 == 0:
+        print("===========" + "iteration:" + str(i) + "===========")
+        for key in weight_init_types.keys():
+            loss = networks[key].loss(x_batch, t_batch)
+            print(key + ":" + str(loss))
+
+
+# 3.绘制图形==========
+markers = {'std=0.01': 'o', 'Xavier': 's', 'He': 'D'}
+x = np.arange(max_iterations)
+for key in weight_init_types.keys():
+    plt.plot(x, smooth_curve(train_loss[key]), marker=markers[key], markevery=100, label=key)
+plt.xlabel("iterations")
+plt.ylabel("loss")
+plt.ylim(0, 2.5)
+plt.legend()
+plt.show()
\ No newline at end of file
diff --git a/DL_demo/ch07/apply_filter.py b/DL_demo/ch07/apply_filter.py
new file mode 100755
index 0000000..488f208
--- /dev/null
+++ b/DL_demo/ch07/apply_filter.py
@@ -0,0 +1,54 @@
+# coding: utf-8
+import sys, os
+sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
+import numpy as np
+import matplotlib.pyplot as plt
+from simple_convnet import SimpleConvNet
+from matplotlib.image import imread
+from common.layers import Convolution
+
+def filter_show(filters, nx=4, show_num=16):
+    """
+    c.f. https://gist.github.com/aidiary/07d530d5e08011832b12#file-draw_weight-py
+    """
+    FN, C, FH, FW = filters.shape
+    ny = int(np.ceil(show_num / nx))
+
+    fig = plt.figure()
+    fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)
+
+    for i in range(show_num):
+        ax = fig.add_subplot(4, 4, i+1, xticks=[], yticks=[])
+        ax.imshow(filters[i, 0], cmap=plt.cm.gray_r, interpolation='nearest')
+
+
+network = SimpleConvNet(input_dim=(1,28,28), 
+                        conv_param = {'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},
+                        hidden_size=100, output_size=10, weight_init_std=0.01)
+
+# 学习后的权重
+network.load_params("params.pkl")
+
+filter_show(network.params['W1'], 16)
+
+img = imread('../dataset/lena_gray.png')
+img = img.reshape(1, 1, *img.shape)
+
+fig = plt.figure()
+
+w_idx = 1
+
+for i in range(16):
+    w = network.params['W1'][i]
+    b = 0  # network.params['b1'][i]
+
+    w = w.reshape(1, *w.shape)
+    #b = b.reshape(1, *b.shape)
+    conv_layer = Convolution(w, b) 
+    out = conv_layer.forward(img)
+    out = out.reshape(out.shape[2], out.shape[3])
+    
+    ax = fig.add_subplot(4, 4, i+1, xticks=[], yticks=[])
+    ax.imshow(out, cmap=plt.cm.gray_r, interpolation='nearest')
+
+plt.show()
\ No newline at end of file
diff --git a/DL_demo/ch07/gradient_check.py b/DL_demo/ch07/gradient_check.py
new file mode 100755
index 0000000..9d5cdff
--- /dev/null
+++ b/DL_demo/ch07/gradient_check.py
@@ -0,0 +1,16 @@
+# coding: utf-8
+import numpy as np
+from simple_convnet import SimpleConvNet
+
+network = SimpleConvNet(input_dim=(1,10, 10), 
+                        conv_param = {'filter_num':10, 'filter_size':3, 'pad':0, 'stride':1},
+                        hidden_size=10, output_size=10, weight_init_std=0.01)
+
+X = np.random.rand(100).reshape((1, 1, 10, 10))
+T = np.array([1]).reshape((1,1))
+
+grad_num = network.numerical_gradient(X, T)
+grad = network.gradient(X, T)
+
+for key, val in grad_num.items():
+    print(key, np.abs(grad_num[key] - grad[key]).mean())
\ No newline at end of file
diff --git a/DL_demo/ch07/params.pkl b/DL_demo/ch07/params.pkl
new file mode 100755
index 0000000..7497eed
Binary files /dev/null and b/DL_demo/ch07/params.pkl differ
diff --git a/DL_demo/ch07/simple_convnet.py b/DL_demo/ch07/simple_convnet.py
new file mode 100755
index 0000000..af0651b
--- /dev/null
+++ b/DL_demo/ch07/simple_convnet.py
@@ -0,0 +1,160 @@
+# coding: utf-8
+import sys, os
+sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
+import pickle
+import numpy as np
+from collections import OrderedDict
+from common.layers import *
+from common.gradient import numerical_gradient
+
+
+class SimpleConvNet:
+    """简单的ConvNet
+
+    conv - relu - pool - affine - relu - affine - softmax
+    
+    Parameters
+    ----------
+    input_size : 输入大小（MNIST的情况下为784）
+    hidden_size_list : 隐藏层的神经元数量的列表（e.g. [100, 100, 100]）
+    output_size : 输出大小（MNIST的情况下为10）
+    activation : 'relu' or 'sigmoid'
+    weight_init_std : 指定权重的标准差（e.g. 0.01）
+        指定'relu'或'he'的情况下设定“He的初始值”
+        指定'sigmoid'或'xavier'的情况下设定“Xavier的初始值”
+    """
+    def __init__(self, input_dim=(1, 28, 28), 
+                 conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},
+                 hidden_size=100, output_size=10, weight_init_std=0.01):
+        filter_num = conv_param['filter_num']
+        filter_size = conv_param['filter_size']
+        filter_pad = conv_param['pad']
+        filter_stride = conv_param['stride']
+        input_size = input_dim[1]
+        conv_output_size = (input_size - filter_size + 2*filter_pad) / filter_stride + 1
+        pool_output_size = int(filter_num * (conv_output_size/2) * (conv_output_size/2))
+
+        # 初始化权重
+        self.params = {}
+        self.params['W1'] = weight_init_std * \
+                            np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
+        self.params['b1'] = np.zeros(filter_num)
+        self.params['W2'] = weight_init_std * \
+                            np.random.randn(pool_output_size, hidden_size)
+        self.params['b2'] = np.zeros(hidden_size)
+        self.params['W3'] = weight_init_std * \
+                            np.random.randn(hidden_size, output_size)
+        self.params['b3'] = np.zeros(output_size)
+
+        # 生成层
+        self.layers = OrderedDict()
+        self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'],
+                                           conv_param['stride'], conv_param['pad'])
+        self.layers['Relu1'] = Relu()
+        self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
+        self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
+        self.layers['Relu2'] = Relu()
+        self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])
+
+        self.last_layer = SoftmaxWithLoss()
+
+    def predict(self, x):
+        for layer in self.layers.values():
+            x = layer.forward(x)
+
+        return x
+
+    def loss(self, x, t):
+        """求损失函数
+        参数x是输入数据、t是教师标签
+        """
+        y = self.predict(x)
+        return self.last_layer.forward(y, t)
+
+    def accuracy(self, x, t, batch_size=100):
+        if t.ndim != 1 : t = np.argmax(t, axis=1)
+        
+        acc = 0.0
+        
+        for i in range(int(x.shape[0] / batch_size)):
+            tx = x[i*batch_size:(i+1)*batch_size]
+            tt = t[i*batch_size:(i+1)*batch_size]
+            y = self.predict(tx)
+            y = np.argmax(y, axis=1)
+            acc += np.sum(y == tt) 
+        
+        return acc / x.shape[0]
+
+    def numerical_gradient(self, x, t):
+        """求梯度（数值微分）
+
+        Parameters
+        ----------
+        x : 输入数据
+        t : 教师标签
+
+        Returns
+        -------
+        具有各层的梯度的字典变量
+            grads['W1']、grads['W2']、...是各层的权重
+            grads['b1']、grads['b2']、...是各层的偏置
+        """
+        loss_w = lambda w: self.loss(x, t)
+
+        grads = {}
+        for idx in (1, 2, 3):
+            grads['W' + str(idx)] = numerical_gradient(loss_w, self.params['W' + str(idx)])
+            grads['b' + str(idx)] = numerical_gradient(loss_w, self.params['b' + str(idx)])
+
+        return grads
+
+    def gradient(self, x, t):
+        """求梯度（误差反向传播法）
+
+        Parameters
+        ----------
+        x : 输入数据
+        t : 教师标签
+
+        Returns
+        -------
+        具有各层的梯度的字典变量
+            grads['W1']、grads['W2']、...是各层的权重
+            grads['b1']、grads['b2']、...是各层的偏置
+        """
+        # forward
+        self.loss(x, t)
+
+        # backward
+        dout = 1
+        dout = self.last_layer.backward(dout)
+
+        layers = list(self.layers.values())
+        layers.reverse()
+        for layer in layers:
+            dout = layer.backward(dout)
+
+        # 设定
+        grads = {}
+        grads['W1'], grads['b1'] = self.layers['Conv1'].dW, self.layers['Conv1'].db
+        grads['W2'], grads['b2'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
+        grads['W3'], grads['b3'] = self.layers['Affine2'].dW, self.layers['Affine2'].db
+
+        return grads
+        
+    def save_params(self, file_name="params.pkl"):
+        params = {}
+        for key, val in self.params.items():
+            params[key] = val
+        with open(file_name, 'wb') as f:
+            pickle.dump(params, f)
+
+    def load_params(self, file_name="params.pkl"):
+        with open(file_name, 'rb') as f:
+            params = pickle.load(f)
+        for key, val in params.items():
+            self.params[key] = val
+
+        for i, key in enumerate(['Conv1', 'Affine1', 'Affine2']):
+            self.layers[key].W = self.params['W' + str(i+1)]
+            self.layers[key].b = self.params['b' + str(i+1)]
\ No newline at end of file
diff --git a/DL_demo/ch07/train_convnet.py b/DL_demo/ch07/train_convnet.py
new file mode 100755
index 0000000..2596a9c
--- /dev/null
+++ b/DL_demo/ch07/train_convnet.py
@@ -0,0 +1,42 @@
+# coding: utf-8
+import sys, os
+sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
+import numpy as np
+import matplotlib.pyplot as plt
+from dataset.mnist import load_mnist
+from simple_convnet import SimpleConvNet
+from common.trainer import Trainer
+
+# 读入数据
+(x_train, t_train), (x_test, t_test) = load_mnist(flatten=False)
+
+# 处理花费时间较长的情况下减少数据 
+#x_train, t_train = x_train[:5000], t_train[:5000]
+#x_test, t_test = x_test[:1000], t_test[:1000]
+
+max_epochs = 20
+
+network = SimpleConvNet(input_dim=(1,28,28), 
+                        conv_param = {'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1},
+                        hidden_size=100, output_size=10, weight_init_std=0.01)
+                        
+trainer = Trainer(network, x_train, t_train, x_test, t_test,
+                  epochs=max_epochs, mini_batch_size=100,
+                  optimizer='Adam', optimizer_param={'lr': 0.001},
+                  evaluate_sample_num_per_epoch=1000)
+trainer.train()
+
+# 保存参数
+network.save_params("params.pkl")
+print("Saved Network Parameters!")
+
+# 绘制图形
+markers = {'train': 'o', 'test': 's'}
+x = np.arange(max_epochs)
+plt.plot(x, trainer.train_acc_list, marker='o', label='train', markevery=2)
+plt.plot(x, trainer.test_acc_list, marker='s', label='test', markevery=2)
+plt.xlabel("epochs")
+plt.ylabel("accuracy")
+plt.ylim(0, 1.0)
+plt.legend(loc='lower right')
+plt.show()
diff --git a/DL_demo/ch07/visualize_filter.py b/DL_demo/ch07/visualize_filter.py
new file mode 100755
index 0000000..da53839
--- /dev/null
+++ b/DL_demo/ch07/visualize_filter.py
@@ -0,0 +1,28 @@
+# coding: utf-8
+import numpy as np
+import matplotlib.pyplot as plt
+from simple_convnet import SimpleConvNet
+
+def filter_show(filters, nx=8, margin=3, scale=10):
+    """
+    c.f. https://gist.github.com/aidiary/07d530d5e08011832b12#file-draw_weight-py
+    """
+    FN, C, FH, FW = filters.shape
+    ny = int(np.ceil(FN / nx))
+
+    fig = plt.figure()
+    fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)
+
+    for i in range(FN):
+        ax = fig.add_subplot(ny, nx, i+1, xticks=[], yticks=[])
+        ax.imshow(filters[i, 0], cmap=plt.cm.gray_r, interpolation='nearest')
+    plt.show()
+
+
+network = SimpleConvNet()
+# 随机进行初始化后的权重
+filter_show(network.params['W1'])
+
+# 学习后的权重
+network.load_params("params.pkl")
+filter_show(network.params['W1'])
\ No newline at end of file
diff --git a/DL_demo/ch08/awesome_net.py b/DL_demo/ch08/awesome_net.py
new file mode 100755
index 0000000..f23f101
--- /dev/null
+++ b/DL_demo/ch08/awesome_net.py
@@ -0,0 +1 @@
+# Create your awesome net!!
\ No newline at end of file
diff --git a/DL_demo/ch08/deep_convnet.py b/DL_demo/ch08/deep_convnet.py
new file mode 100755
index 0000000..d974c7b
--- /dev/null
+++ b/DL_demo/ch08/deep_convnet.py
@@ -0,0 +1,136 @@
+# coding: utf-8
+import sys, os
+sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
+import pickle
+import numpy as np
+from collections import OrderedDict
+from common.layers import *
+
+
+class DeepConvNet:
+    """识别率为99%以上的高精度的ConvNet
+
+    网络结构如下所示
+        conv - relu - conv- relu - pool -
+        conv - relu - conv- relu - pool -
+        conv - relu - conv- relu - pool -
+        affine - relu - dropout - affine - dropout - softmax
+    """
+    def __init__(self, input_dim=(1, 28, 28),
+                 conv_param_1 = {'filter_num':16, 'filter_size':3, 'pad':1, 'stride':1},
+                 conv_param_2 = {'filter_num':16, 'filter_size':3, 'pad':1, 'stride':1},
+                 conv_param_3 = {'filter_num':32, 'filter_size':3, 'pad':1, 'stride':1},
+                 conv_param_4 = {'filter_num':32, 'filter_size':3, 'pad':2, 'stride':1},
+                 conv_param_5 = {'filter_num':64, 'filter_size':3, 'pad':1, 'stride':1},
+                 conv_param_6 = {'filter_num':64, 'filter_size':3, 'pad':1, 'stride':1},
+                 hidden_size=50, output_size=10):
+        # 初始化权重===========
+        # 各层的神经元平均与前一层的几个神经元有连接（TODO:自动计算）
+        pre_node_nums = np.array([1*3*3, 16*3*3, 16*3*3, 32*3*3, 32*3*3, 64*3*3, 64*4*4, hidden_size])
+        wight_init_scales = np.sqrt(2.0 / pre_node_nums)  # 使用ReLU的情况下推荐的初始值
+        
+        self.params = {}
+        pre_channel_num = input_dim[0]
+        for idx, conv_param in enumerate([conv_param_1, conv_param_2, conv_param_3, conv_param_4, conv_param_5, conv_param_6]):
+            self.params['W' + str(idx+1)] = wight_init_scales[idx] * np.random.randn(conv_param['filter_num'], pre_channel_num, conv_param['filter_size'], conv_param['filter_size'])
+            self.params['b' + str(idx+1)] = np.zeros(conv_param['filter_num'])
+            pre_channel_num = conv_param['filter_num']
+        self.params['W7'] = wight_init_scales[6] * np.random.randn(64*4*4, hidden_size)
+        self.params['b7'] = np.zeros(hidden_size)
+        self.params['W8'] = wight_init_scales[7] * np.random.randn(hidden_size, output_size)
+        self.params['b8'] = np.zeros(output_size)
+
+        # 生成层===========
+        self.layers = []
+        self.layers.append(Convolution(self.params['W1'], self.params['b1'], 
+                           conv_param_1['stride'], conv_param_1['pad']))
+        self.layers.append(Relu())
+        self.layers.append(Convolution(self.params['W2'], self.params['b2'], 
+                           conv_param_2['stride'], conv_param_2['pad']))
+        self.layers.append(Relu())
+        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
+        self.layers.append(Convolution(self.params['W3'], self.params['b3'], 
+                           conv_param_3['stride'], conv_param_3['pad']))
+        self.layers.append(Relu())
+        self.layers.append(Convolution(self.params['W4'], self.params['b4'],
+                           conv_param_4['stride'], conv_param_4['pad']))
+        self.layers.append(Relu())
+        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
+        self.layers.append(Convolution(self.params['W5'], self.params['b5'],
+                           conv_param_5['stride'], conv_param_5['pad']))
+        self.layers.append(Relu())
+        self.layers.append(Convolution(self.params['W6'], self.params['b6'],
+                           conv_param_6['stride'], conv_param_6['pad']))
+        self.layers.append(Relu())
+        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
+        self.layers.append(Affine(self.params['W7'], self.params['b7']))
+        self.layers.append(Relu())
+        self.layers.append(Dropout(0.5))
+        self.layers.append(Affine(self.params['W8'], self.params['b8']))
+        self.layers.append(Dropout(0.5))
+        
+        self.last_layer = SoftmaxWithLoss()
+
+    def predict(self, x, train_flg=False):
+        for layer in self.layers:
+            if isinstance(layer, Dropout):
+                x = layer.forward(x, train_flg)
+            else:
+                x = layer.forward(x)
+        return x
+
+    def loss(self, x, t):
+        y = self.predict(x, train_flg=True)
+        return self.last_layer.forward(y, t)
+
+    def accuracy(self, x, t, batch_size=100):
+        if t.ndim != 1 : t = np.argmax(t, axis=1)
+
+        acc = 0.0
+
+        for i in range(int(x.shape[0] / batch_size)):
+            tx = x[i*batch_size:(i+1)*batch_size]
+            tt = t[i*batch_size:(i+1)*batch_size]
+            y = self.predict(tx, train_flg=False)
+            y = np.argmax(y, axis=1)
+            acc += np.sum(y == tt)
+
+        return acc / x.shape[0]
+
+    def gradient(self, x, t):
+        # forward
+        self.loss(x, t)
+
+        # backward
+        dout = 1
+        dout = self.last_layer.backward(dout)
+
+        tmp_layers = self.layers.copy()
+        tmp_layers.reverse()
+        for layer in tmp_layers:
+            dout = layer.backward(dout)
+
+        # 设定
+        grads = {}
+        for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)):
+            grads['W' + str(i+1)] = self.layers[layer_idx].dW
+            grads['b' + str(i+1)] = self.layers[layer_idx].db
+
+        return grads
+
+    def save_params(self, file_name="params.pkl"):
+        params = {}
+        for key, val in self.params.items():
+            params[key] = val
+        with open(file_name, 'wb') as f:
+            pickle.dump(params, f)
+
+    def load_params(self, file_name="params.pkl"):
+        with open(file_name, 'rb') as f:
+            params = pickle.load(f)
+        for key, val in params.items():
+            self.params[key] = val
+
+        for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)):
+            self.layers[layer_idx].W = self.params['W' + str(i+1)]
+            self.layers[layer_idx].b = self.params['b' + str(i+1)]
diff --git a/DL_demo/ch08/deep_convnet_params.pkl b/DL_demo/ch08/deep_convnet_params.pkl
new file mode 100755
index 0000000..7be9d9e
Binary files /dev/null and b/DL_demo/ch08/deep_convnet_params.pkl differ
diff --git a/DL_demo/ch08/half_float_network.py b/DL_demo/ch08/half_float_network.py
new file mode 100755
index 0000000..f12c0ab
--- /dev/null
+++ b/DL_demo/ch08/half_float_network.py
@@ -0,0 +1,28 @@
+# coding: utf-8
+import sys, os
+sys.path.append(os.pardir)  # 为了导入父目录而进行的设定
+import numpy as np
+import matplotlib.pyplot as plt
+from deep_convnet import DeepConvNet
+from dataset.mnist import load_mnist
+
+
+(x_train, t_train), (x_test, t_test) = load_mnist(flatten=False)
+
+network = DeepConvNet()
+network.load_params("deep_convnet_params.pkl")
+
+sampled = 10000 # 为了实现高速化
+x_test = x_test[:sampled]
+t_test = t_test[:sampled]
+
+print("caluculate accuracy (float64) ... ")
+print(network.accuracy(x_test, t_test))
+
+# 转换为float16型
+x_test = x_test.astype(np.float16)
+for param in network.params.values():
+    param[...] = param.astype(np.float16)
+
+print("caluculate accuracy (float16) ... ")
+print(network.accuracy(x_test, t_test))
diff --git a/DL_demo/ch08/misclassified_mnist.py b/DL_demo/ch08/misclassified_mnist.py
new file mode 100755
index 0000000..8e911a8
--- /dev/null
+++ b/DL_demo/ch08/misclassified_mnist.py
@@ -0,0 +1,60 @@
+# coding: utf-8
+import sys, os
+sys.path.append(os.pardir)  # 为了导入父目录而进行的设定
+import numpy as np
+import matplotlib.pyplot as plt
+from deep_convnet import DeepConvNet
+from dataset.mnist import load_mnist
+
+
+(x_train, t_train), (x_test, t_test) = load_mnist(flatten=False)
+
+network = DeepConvNet()
+network.load_params("deep_convnet_params.pkl")
+
+print("calculating test accuracy ... ")
+#sampled = 1000
+#x_test = x_test[:sampled]
+#t_test = t_test[:sampled]
+
+classified_ids = []
+
+acc = 0.0
+batch_size = 100
+
+for i in range(int(x_test.shape[0] / batch_size)):
+    tx = x_test[i*batch_size:(i+1)*batch_size]
+    tt = t_test[i*batch_size:(i+1)*batch_size]
+    y = network.predict(tx, train_flg=False)
+    y = np.argmax(y, axis=1)
+    classified_ids.append(y)
+    acc += np.sum(y == tt)
+    
+acc = acc / x_test.shape[0]
+print("test accuracy:" + str(acc))
+
+classified_ids = np.array(classified_ids)
+classified_ids = classified_ids.flatten()
+ 
+max_view = 20
+current_view = 1
+
+fig = plt.figure()
+fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.2, wspace=0.2)
+
+mis_pairs = {}
+for i, val in enumerate(classified_ids == t_test):
+    if not val:
+        ax = fig.add_subplot(4, 5, current_view, xticks=[], yticks=[])
+        ax.imshow(x_test[i].reshape(28, 28), cmap=plt.cm.gray_r, interpolation='nearest')
+        mis_pairs[current_view] = (t_test[i], classified_ids[i])
+            
+        current_view += 1
+        if current_view > max_view:
+            break
+
+print("======= misclassified result =======")
+print("{view index: (label, inference), ...}")
+print(mis_pairs)
+
+plt.show()
diff --git a/DL_demo/ch08/train_deepnet.py b/DL_demo/ch08/train_deepnet.py
new file mode 100755
index 0000000..9cdf3fb
--- /dev/null
+++ b/DL_demo/ch08/train_deepnet.py
@@ -0,0 +1,21 @@
+# coding: utf-8
+import sys, os
+sys.path.append(os.pardir)  # 为了导入父目录而进行的设定
+import numpy as np
+import matplotlib.pyplot as plt
+from dataset.mnist import load_mnist
+from deep_convnet import DeepConvNet
+from common.trainer import Trainer
+
+(x_train, t_train), (x_test, t_test) = load_mnist(flatten=False)
+
+network = DeepConvNet()  
+trainer = Trainer(network, x_train, t_train, x_test, t_test,
+                  epochs=20, mini_batch_size=100,
+                  optimizer='Adam', optimizer_param={'lr':0.001},
+                  evaluate_sample_num_per_epoch=1000)
+trainer.train()
+
+# 保存参数
+network.save_params("deep_convnet_params.pkl")
+print("Saved Network Parameters!")
diff --git a/DL_demo/common/__init__.py b/DL_demo/common/__init__.py
new file mode 100755
index 0000000..e69de29
diff --git a/DL_demo/common/functions.py b/DL_demo/common/functions.py
new file mode 100755
index 0000000..ec02dd0
--- /dev/null
+++ b/DL_demo/common/functions.py
@@ -0,0 +1,61 @@
+# coding: utf-8
+import numpy as np
+
+
+def identity_function(x):
+    return x
+
+
+def step_function(x):
+    return np.array(x > 0, dtype=np.int)
+
+
+def sigmoid(x):
+    return 1 / (1 + np.exp(-x))    
+
+
+def sigmoid_grad(x):
+    return (1.0 - sigmoid(x)) * sigmoid(x)
+    
+
+def relu(x):
+    return np.maximum(0, x)
+
+
+def relu_grad(x):
+    grad = np.zeros(x)
+    grad[x>=0] = 1
+    return grad
+    
+
+def softmax(x):
+    if x.ndim == 2:
+        x = x.T
+        x = x - np.max(x, axis=0)
+        y = np.exp(x) / np.sum(np.exp(x), axis=0)
+        return y.T 
+
+    x = x - np.max(x) # 溢出对策
+    return np.exp(x) / np.sum(np.exp(x))
+
+
+def mean_squared_error(y, t):
+    return 0.5 * np.sum((y-t)**2)
+
+
+def cross_entropy_error(y, t):
+    if y.ndim == 1:
+        t = t.reshape(1, t.size)
+        y = y.reshape(1, y.size)
+        
+    # 监督数据是one-hot-vector的情况下，转换为正确解标签的索引
+    if t.size == y.size:
+        t = t.argmax(axis=1)
+             
+    batch_size = y.shape[0]
+    return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
+
+
+def softmax_loss(X, t):
+    y = softmax(X)
+    return cross_entropy_error(y, t)
diff --git a/DL_demo/common/gradient.py b/DL_demo/common/gradient.py
new file mode 100755
index 0000000..31cb887
--- /dev/null
+++ b/DL_demo/common/gradient.py
@@ -0,0 +1,52 @@
+# coding: utf-8
+import numpy as np
+
+def _numerical_gradient_1d(f, x):
+    h = 1e-4 # 0.0001
+    grad = np.zeros_like(x)
+    
+    for idx in range(x.size):
+        tmp_val = x[idx]
+        x[idx] = float(tmp_val) + h
+        fxh1 = f(x) # f(x+h)
+        
+        x[idx] = tmp_val - h 
+        fxh2 = f(x) # f(x-h)
+        grad[idx] = (fxh1 - fxh2) / (2*h)
+        
+        x[idx] = tmp_val # 还原值
+        
+    return grad
+
+
+def numerical_gradient_2d(f, X):
+    if X.ndim == 1:
+        return _numerical_gradient_1d(f, X)
+    else:
+        grad = np.zeros_like(X)
+        
+        for idx, x in enumerate(X):
+            grad[idx] = _numerical_gradient_1d(f, x)
+        
+        return grad
+
+
+def numerical_gradient(f, x):
+    h = 1e-4 # 0.0001
+    grad = np.zeros_like(x)
+    
+    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
+    while not it.finished:
+        idx = it.multi_index
+        tmp_val = x[idx]
+        x[idx] = float(tmp_val) + h
+        fxh1 = f(x) # f(x+h)
+        
+        x[idx] = tmp_val - h 
+        fxh2 = f(x) # f(x-h)
+        grad[idx] = (fxh1 - fxh2) / (2*h)
+        
+        x[idx] = tmp_val # 还原值
+        it.iternext()   
+        
+    return grad
\ No newline at end of file
diff --git a/DL_demo/common/layers.py b/DL_demo/common/layers.py
new file mode 100755
index 0000000..ae50d80
--- /dev/null
+++ b/DL_demo/common/layers.py
@@ -0,0 +1,284 @@
+# coding: utf-8
+import numpy as np
+from common.functions import *
+from common.util import im2col, col2im
+
+
+class Relu:
+    def __init__(self):
+        self.mask = None
+
+    def forward(self, x):
+        self.mask = (x <= 0)
+        out = x.copy()
+        out[self.mask] = 0
+
+        return out
+
+    def backward(self, dout):
+        dout[self.mask] = 0
+        dx = dout
+
+        return dx
+
+
+class Sigmoid:
+    def __init__(self):
+        self.out = None
+
+    def forward(self, x):
+        out = sigmoid(x)
+        self.out = out
+        return out
+
+    def backward(self, dout):
+        dx = dout * (1.0 - self.out) * self.out
+
+        return dx
+
+
+class Affine:
+    def __init__(self, W, b):
+        self.W =W
+        self.b = b
+        
+        self.x = None
+        self.original_x_shape = None
+        # 权重和偏置参数的导数
+        self.dW = None
+        self.db = None
+
+    def forward(self, x):
+        # 对应张量
+        self.original_x_shape = x.shape
+        x = x.reshape(x.shape[0], -1)
+        self.x = x
+
+        out = np.dot(self.x, self.W) + self.b
+
+        return out
+
+    def backward(self, dout):
+        dx = np.dot(dout, self.W.T)
+        self.dW = np.dot(self.x.T, dout)
+        self.db = np.sum(dout, axis=0)
+        
+        dx = dx.reshape(*self.original_x_shape)  # 还原输入数据的形状（对应张量）
+        return dx
+
+
+class SoftmaxWithLoss:
+    def __init__(self):
+        self.loss = None
+        self.y = None # softmax的输出
+        self.t = None # 监督数据
+
+    def forward(self, x, t):
+        self.t = t
+        self.y = softmax(x)
+        self.loss = cross_entropy_error(self.y, self.t)
+        
+        return self.loss
+
+    def backward(self, dout=1):
+        batch_size = self.t.shape[0]
+        if self.t.size == self.y.size: # 监督数据是one-hot-vector的情况
+            dx = (self.y - self.t) / batch_size
+        else:
+            dx = self.y.copy()
+            dx[np.arange(batch_size), self.t] -= 1
+            dx = dx / batch_size
+        
+        return dx
+
+
+class Dropout:
+    """
+    http://arxiv.org/abs/1207.0580
+    """
+    def __init__(self, dropout_ratio=0.5):
+        self.dropout_ratio = dropout_ratio
+        self.mask = None
+
+    def forward(self, x, train_flg=True):
+        if train_flg:
+            self.mask = np.random.rand(*x.shape) > self.dropout_ratio
+            return x * self.mask
+        else:
+            return x * (1.0 - self.dropout_ratio)
+
+    def backward(self, dout):
+        return dout * self.mask
+
+
+class BatchNormalization:
+    """
+    http://arxiv.org/abs/1502.03167
+    """
+    def __init__(self, gamma, beta, momentum=0.9, running_mean=None, running_var=None):
+        self.gamma = gamma
+        self.beta = beta
+        self.momentum = momentum
+        self.input_shape = None # Conv层的情况下为4维，全连接层的情况下为2维  
+
+        # 测试时使用的平均值和方差
+        self.running_mean = running_mean
+        self.running_var = running_var  
+        
+        # backward时使用的中间数据
+        self.batch_size = None
+        self.xc = None
+        self.std = None
+        self.dgamma = None
+        self.dbeta = None
+
+    def forward(self, x, train_flg=True):
+        self.input_shape = x.shape
+        if x.ndim != 2:
+            N, C, H, W = x.shape
+            x = x.reshape(N, -1)
+
+        out = self.__forward(x, train_flg)
+        
+        return out.reshape(*self.input_shape)
+            
+    def __forward(self, x, train_flg):
+        if self.running_mean is None:
+            N, D = x.shape
+            self.running_mean = np.zeros(D)
+            self.running_var = np.zeros(D)
+                        
+        if train_flg:
+            mu = x.mean(axis=0)
+            xc = x - mu
+            var = np.mean(xc**2, axis=0)
+            std = np.sqrt(var + 10e-7)
+            xn = xc / std
+            
+            self.batch_size = x.shape[0]
+            self.xc = xc
+            self.xn = xn
+            self.std = std
+            self.running_mean = self.momentum * self.running_mean + (1-self.momentum) * mu
+            self.running_var = self.momentum * self.running_var + (1-self.momentum) * var            
+        else:
+            xc = x - self.running_mean
+            xn = xc / ((np.sqrt(self.running_var + 10e-7)))
+            
+        out = self.gamma * xn + self.beta 
+        return out
+
+    def backward(self, dout):
+        if dout.ndim != 2:
+            N, C, H, W = dout.shape
+            dout = dout.reshape(N, -1)
+
+        dx = self.__backward(dout)
+
+        dx = dx.reshape(*self.input_shape)
+        return dx
+
+    def __backward(self, dout):
+        dbeta = dout.sum(axis=0)
+        dgamma = np.sum(self.xn * dout, axis=0)
+        dxn = self.gamma * dout
+        dxc = dxn / self.std
+        dstd = -np.sum((dxn * self.xc) / (self.std * self.std), axis=0)
+        dvar = 0.5 * dstd / self.std
+        dxc += (2.0 / self.batch_size) * self.xc * dvar
+        dmu = np.sum(dxc, axis=0)
+        dx = dxc - dmu / self.batch_size
+        
+        self.dgamma = dgamma
+        self.dbeta = dbeta
+        
+        return dx
+
+
+class Convolution:
+    def __init__(self, W, b, stride=1, pad=0):
+        self.W = W
+        self.b = b
+        self.stride = stride
+        self.pad = pad
+        
+        # 中间数据（backward时使用）
+        self.x = None   
+        self.col = None
+        self.col_W = None
+        
+        # 权重和偏置参数的梯度
+        self.dW = None
+        self.db = None
+
+    def forward(self, x):
+        FN, C, FH, FW = self.W.shape
+        N, C, H, W = x.shape
+        out_h = 1 + int((H + 2*self.pad - FH) / self.stride)
+        out_w = 1 + int((W + 2*self.pad - FW) / self.stride)
+
+        col = im2col(x, FH, FW, self.stride, self.pad)
+        col_W = self.W.reshape(FN, -1).T
+
+        out = np.dot(col, col_W) + self.b
+        out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)
+
+        self.x = x
+        self.col = col
+        self.col_W = col_W
+
+        return out
+
+    def backward(self, dout):
+        FN, C, FH, FW = self.W.shape
+        dout = dout.transpose(0,2,3,1).reshape(-1, FN)
+
+        self.db = np.sum(dout, axis=0)
+        self.dW = np.dot(self.col.T, dout)
+        self.dW = self.dW.transpose(1, 0).reshape(FN, C, FH, FW)
+
+        dcol = np.dot(dout, self.col_W.T)
+        dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad)
+
+        return dx
+
+
+class Pooling:
+    def __init__(self, pool_h, pool_w, stride=1, pad=0):
+        self.pool_h = pool_h
+        self.pool_w = pool_w
+        self.stride = stride
+        self.pad = pad
+        
+        self.x = None
+        self.arg_max = None
+
+    def forward(self, x):
+        N, C, H, W = x.shape
+        out_h = int(1 + (H - self.pool_h) / self.stride)
+        out_w = int(1 + (W - self.pool_w) / self.stride)
+
+        col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
+        col = col.reshape(-1, self.pool_h*self.pool_w)
+
+        arg_max = np.argmax(col, axis=1)
+        out = np.max(col, axis=1)
+        out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)
+
+        self.x = x
+        self.arg_max = arg_max
+
+        return out
+
+    def backward(self, dout):
+        dout = dout.transpose(0, 2, 3, 1)
+        
+        pool_size = self.pool_h * self.pool_w
+        dmax = np.zeros((dout.size, pool_size))
+        dmax[np.arange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten()
+        dmax = dmax.reshape(dout.shape + (pool_size,)) 
+        
+        dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
+        dx = col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad)
+        
+        return dx
diff --git a/DL_demo/common/multi_layer_net.py b/DL_demo/common/multi_layer_net.py
new file mode 100755
index 0000000..ff103b5
--- /dev/null
+++ b/DL_demo/common/multi_layer_net.py
@@ -0,0 +1,160 @@
+# coding: utf-8
+import sys, os
+sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
+import numpy as np
+from collections import OrderedDict
+from common.layers import *
+from common.gradient import numerical_gradient
+
+
+class MultiLayerNet:
+    """全连接的多层神经网络
+
+    Parameters
+    ----------
+    input_size : 输入大小（MNIST的情况下为784）
+    hidden_size_list : 隐藏层的神经元数量的列表（e.g. [100, 100, 100]）
+    output_size : 输出大小（MNIST的情况下为10）
+    activation : 'relu' or 'sigmoid'
+    weight_init_std : 指定权重的标准差（e.g. 0.01）
+        指定'relu'或'he'的情况下设定“He的初始值”
+        指定'sigmoid'或'xavier'的情况下设定“Xavier的初始值”
+    weight_decay_lambda : Weight Decay（L2范数）的强度
+    """
+    def __init__(self, input_size, hidden_size_list, output_size,
+                 activation='relu', weight_init_std='relu', weight_decay_lambda=0):
+        self.input_size = input_size
+        self.output_size = output_size
+        self.hidden_size_list = hidden_size_list
+        self.hidden_layer_num = len(hidden_size_list)
+        self.weight_decay_lambda = weight_decay_lambda
+        self.params = {}
+
+        # 初始化权重
+        self.__init_weight(weight_init_std)
+
+        # 生成层
+        activation_layer = {'sigmoid': Sigmoid, 'relu': Relu}
+        self.layers = OrderedDict()
+        for idx in range(1, self.hidden_layer_num+1):
+            self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)],
+                                                      self.params['b' + str(idx)])
+            self.layers['Activation_function' + str(idx)] = activation_layer[activation]()
+
+        idx = self.hidden_layer_num + 1
+        self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)],
+            self.params['b' + str(idx)])
+
+        self.last_layer = SoftmaxWithLoss()
+
+    def __init_weight(self, weight_init_std):
+        """设定权重的初始值
+
+        Parameters
+        ----------
+        weight_init_std : 指定权重的标准差（e.g. 0.01）
+            指定'relu'或'he'的情况下设定“He的初始值”
+            指定'sigmoid'或'xavier'的情况下设定“Xavier的初始值”
+        """
+        all_size_list = [self.input_size] + self.hidden_size_list + [self.output_size]
+        for idx in range(1, len(all_size_list)):
+            scale = weight_init_std
+            if str(weight_init_std).lower() in ('relu', 'he'):
+                scale = np.sqrt(2.0 / all_size_list[idx - 1])  # 使用ReLU的情况下推荐的初始值
+            elif str(weight_init_std).lower() in ('sigmoid', 'xavier'):
+                scale = np.sqrt(1.0 / all_size_list[idx - 1])  # 使用sigmoid的情况下推荐的初始值
+
+            self.params['W' + str(idx)] = scale * np.random.randn(all_size_list[idx-1], all_size_list[idx])
+            self.params['b' + str(idx)] = np.zeros(all_size_list[idx])
+
+    def predict(self, x):
+        for layer in self.layers.values():
+            x = layer.forward(x)
+
+        return x
+
+    def loss(self, x, t):
+        """求损失函数
+
+        Parameters
+        ----------
+        x : 输入数据
+        t : 教师标签
+
+        Returns
+        -------
+        损失函数的值
+        """
+        y = self.predict(x)
+
+        weight_decay = 0
+        for idx in range(1, self.hidden_layer_num + 2):
+            W = self.params['W' + str(idx)]
+            weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W ** 2)
+
+        return self.last_layer.forward(y, t) + weight_decay
+
+    def accuracy(self, x, t):
+        y = self.predict(x)
+        y = np.argmax(y, axis=1)
+        if t.ndim != 1 : t = np.argmax(t, axis=1)
+
+        accuracy = np.sum(y == t) / float(x.shape[0])
+        return accuracy
+
+    def numerical_gradient(self, x, t):
+        """求梯度（数值微分）
+
+        Parameters
+        ----------
+        x : 输入数据
+        t : 教师标签
+
+        Returns
+        -------
+        具有各层的梯度的字典变量
+            grads['W1']、grads['W2']、...是各层的权重
+            grads['b1']、grads['b2']、...是各层的偏置
+        """
+        loss_W = lambda W: self.loss(x, t)
+
+        grads = {}
+        for idx in range(1, self.hidden_layer_num+2):
+            grads['W' + str(idx)] = numerical_gradient(loss_W, self.params['W' + str(idx)])
+            grads['b' + str(idx)] = numerical_gradient(loss_W, self.params['b' + str(idx)])
+
+        return grads
+
+    def gradient(self, x, t):
+        """求梯度（误差反向传播法）
+
+        Parameters
+        ----------
+        x : 输入数据
+        t : 教师标签
+
+        Returns
+        -------
+        具有各层的梯度的字典变量
+            grads['W1']、grads['W2']、...是各层的权重
+            grads['b1']、grads['b2']、...是各层的偏置
+        """
+        # forward
+        self.loss(x, t)
+
+        # backward
+        dout = 1
+        dout = self.last_layer.backward(dout)
+
+        layers = list(self.layers.values())
+        layers.reverse()
+        for layer in layers:
+            dout = layer.backward(dout)
+
+        # 设定
+        grads = {}
+        for idx in range(1, self.hidden_layer_num+2):
+            grads['W' + str(idx)] = self.layers['Affine' + str(idx)].dW + self.weight_decay_lambda * self.layers['Affine' + str(idx)].W
+            grads['b' + str(idx)] = self.layers['Affine' + str(idx)].db
+
+        return grads
diff --git a/DL_demo/common/multi_layer_net_extend.py b/DL_demo/common/multi_layer_net_extend.py
new file mode 100755
index 0000000..75f1c75
--- /dev/null
+++ b/DL_demo/common/multi_layer_net_extend.py
@@ -0,0 +1,163 @@
+# coding: utf-8
+import sys, os
+sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定
+import numpy as np
+from collections import OrderedDict
+from common.layers import *
+from common.gradient import numerical_gradient
+
+class MultiLayerNetExtend:
+    """扩展版的全连接的多层神经网络
+    
+    具有Weiht Decay、Dropout、Batch Normalization的功能
+
+    Parameters
+    ----------
+    input_size : 输入大小（MNIST的情况下为784）
+    hidden_size_list : 隐藏层的神经元数量的列表（e.g. [100, 100, 100]）
+    output_size : 输出大小（MNIST的情况下为10）
+    activation : 'relu' or 'sigmoid'
+    weight_init_std : 指定权重的标准差（e.g. 0.01）
+        指定'relu'或'he'的情况下设定“He的初始值”
+        指定'sigmoid'或'xavier'的情况下设定“Xavier的初始值”
+    weight_decay_lambda : Weight Decay（L2范数）的强度
+    use_dropout: 是否使用Dropout
+    dropout_ration : Dropout的比例
+    use_batchNorm: 是否使用Batch Normalization
+    """
+    def __init__(self, input_size, hidden_size_list, output_size,
+                 activation='relu', weight_init_std='relu', weight_decay_lambda=0, 
+                 use_dropout = False, dropout_ration = 0.5, use_batchnorm=False):
+        self.input_size = input_size
+        self.output_size = output_size
+        self.hidden_size_list = hidden_size_list
+        self.hidden_layer_num = len(hidden_size_list)
+        self.use_dropout = use_dropout
+        self.weight_decay_lambda = weight_decay_lambda
+        self.use_batchnorm = use_batchnorm
+        self.params = {}
+
+        # 初始化权重
+        self.__init_weight(weight_init_std)
+
+        # 生成层
+        activation_layer = {'sigmoid': Sigmoid, 'relu': Relu}
+        self.layers = OrderedDict()
+        for idx in range(1, self.hidden_layer_num+1):
+            self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)],
+                                                      self.params['b' + str(idx)])
+            if self.use_batchnorm:
+                self.params['gamma' + str(idx)] = np.ones(hidden_size_list[idx-1])
+                self.params['beta' + str(idx)] = np.zeros(hidden_size_list[idx-1])
+                self.layers['BatchNorm' + str(idx)] = BatchNormalization(self.params['gamma' + str(idx)], self.params['beta' + str(idx)])
+                
+            self.layers['Activation_function' + str(idx)] = activation_layer[activation]()
+            
+            if self.use_dropout:
+                self.layers['Dropout' + str(idx)] = Dropout(dropout_ration)
+
+        idx = self.hidden_layer_num + 1
+        self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)])
+
+        self.last_layer = SoftmaxWithLoss()
+
+    def __init_weight(self, weight_init_std):
+        """设定权重的初始值
+
+        Parameters
+        ----------
+        weight_init_std : 指定权重的标准差（e.g. 0.01）
+            指定'relu'或'he'的情况下设定“He的初始值”
+            指定'sigmoid'或'xavier'的情况下设定“Xavier的初始值”
+        """
+        all_size_list = [self.input_size] + self.hidden_size_list + [self.output_size]
+        for idx in range(1, len(all_size_list)):
+            scale = weight_init_std
+            if str(weight_init_std).lower() in ('relu', 'he'):
+                scale = np.sqrt(2.0 / all_size_list[idx - 1])  # 使用ReLU的情况下推荐的初始值
+            elif str(weight_init_std).lower() in ('sigmoid', 'xavier'):
+                scale = np.sqrt(1.0 / all_size_list[idx - 1])  # 使用sigmoid的情况下推荐的初始值
+            self.params['W' + str(idx)] = scale * np.random.randn(all_size_list[idx-1], all_size_list[idx])
+            self.params['b' + str(idx)] = np.zeros(all_size_list[idx])
+
+    def predict(self, x, train_flg=False):
+        for key, layer in self.layers.items():
+            if "Dropout" in key or "BatchNorm" in key:
+                x = layer.forward(x, train_flg)
+            else:
+                x = layer.forward(x)
+
+        return x
+
+    def loss(self, x, t, train_flg=False):
+        """求损失函数
+        参数x是输入数据，t是教师标签
+        """
+        y = self.predict(x, train_flg)
+
+        weight_decay = 0
+        for idx in range(1, self.hidden_layer_num + 2):
+            W = self.params['W' + str(idx)]
+            weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2)
+
+        return self.last_layer.forward(y, t) + weight_decay
+
+    def accuracy(self, X, T):
+        Y = self.predict(X, train_flg=False)
+        Y = np.argmax(Y, axis=1)
+        if T.ndim != 1 : T = np.argmax(T, axis=1)
+
+        accuracy = np.sum(Y == T) / float(X.shape[0])
+        return accuracy
+
+    def numerical_gradient(self, X, T):
+        """求梯度（数值微分）
+
+        Parameters
+        ----------
+        X : 输入数据
+        T : 教师标签
+
+        Returns
+        -------
+        具有各层的梯度的字典变量
+            grads['W1']、grads['W2']、...是各层的权重
+            grads['b1']、grads['b2']、...是各层的偏置
+        """
+        loss_W = lambda W: self.loss(X, T, train_flg=True)
+
+        grads = {}
+        for idx in range(1, self.hidden_layer_num+2):
+            grads['W' + str(idx)] = numerical_gradient(loss_W, self.params['W' + str(idx)])
+            grads['b' + str(idx)] = numerical_gradient(loss_W, self.params['b' + str(idx)])
+            
+            if self.use_batchnorm and idx != self.hidden_layer_num+1:
+                grads['gamma' + str(idx)] = numerical_gradient(loss_W, self.params['gamma' + str(idx)])
+                grads['beta' + str(idx)] = numerical_gradient(loss_W, self.params['beta' + str(idx)])
+
+        return grads
+        
+    def gradient(self, x, t):
+        # forward
+        self.loss(x, t, train_flg=True)
+
+        # backward
+        dout = 1
+        dout = self.last_layer.backward(dout)
+
+        layers = list(self.layers.values())
+        layers.reverse()
+        for layer in layers:
+            dout = layer.backward(dout)
+
+        # 设定
+        grads = {}
+        for idx in range(1, self.hidden_layer_num+2):
+            grads['W' + str(idx)] = self.layers['Affine' + str(idx)].dW + self.weight_decay_lambda * self.params['W' + str(idx)]
+            grads['b' + str(idx)] = self.layers['Affine' + str(idx)].db
+
+            if self.use_batchnorm and idx != self.hidden_layer_num+1:
+                grads['gamma' + str(idx)] = self.layers['BatchNorm' + str(idx)].dgamma
+                grads['beta' + str(idx)] = self.layers['BatchNorm' + str(idx)].dbeta
+
+        return grads
\ No newline at end of file
diff --git a/DL_demo/common/optimizer.py b/DL_demo/common/optimizer.py
new file mode 100755
index 0000000..9c22bcd
--- /dev/null
+++ b/DL_demo/common/optimizer.py
@@ -0,0 +1,130 @@
+# coding: utf-8
+import numpy as np
+
+class SGD:
+
+    """随机梯度下降法（Stochastic Gradient Descent）"""
+
+    def __init__(self, lr=0.01):
+        self.lr = lr
+        
+    def update(self, params, grads):
+        for key in params.keys():
+            params[key] -= self.lr * grads[key] 
+
+
+class Momentum:
+
+    """Momentum SGD"""
+
+    def __init__(self, lr=0.01, momentum=0.9):
+        self.lr = lr
+        self.momentum = momentum
+        self.v = None
+        
+    def update(self, params, grads):
+        if self.v is None:
+            self.v = {}
+            for key, val in params.items():                                
+                self.v[key] = np.zeros_like(val)
+                
+        for key in params.keys():
+            self.v[key] = self.momentum*self.v[key] - self.lr*grads[key] 
+            params[key] += self.v[key]
+
+
+class Nesterov:
+
+    """Nesterov's Accelerated Gradient (http://arxiv.org/abs/1212.0901)"""
+
+    def __init__(self, lr=0.01, momentum=0.9):
+        self.lr = lr
+        self.momentum = momentum
+        self.v = None
+        
+    def update(self, params, grads):
+        if self.v is None:
+            self.v = {}
+            for key, val in params.items():
+                self.v[key] = np.zeros_like(val)
+            
+        for key in params.keys():
+            self.v[key] *= self.momentum
+            self.v[key] -= self.lr * grads[key]
+            params[key] += self.momentum * self.momentum * self.v[key]
+            params[key] -= (1 + self.momentum) * self.lr * grads[key]
+
+
+class AdaGrad:
+
+    """AdaGrad"""
+
+    def __init__(self, lr=0.01):
+        self.lr = lr
+        self.h = None
+        
+    def update(self, params, grads):
+        if self.h is None:
+            self.h = {}
+            for key, val in params.items():
+                self.h[key] = np.zeros_like(val)
+            
+        for key in params.keys():
+            self.h[key] += grads[key] * grads[key]
+            params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7)
+
+
+class RMSprop:
+
+    """RMSprop"""
+
+    def __init__(self, lr=0.01, decay_rate = 0.99):
+        self.lr = lr
+        self.decay_rate = decay_rate
+        self.h = None
+        
+    def update(self, params, grads):
+        if self.h is None:
+            self.h = {}
+            for key, val in params.items():
+                self.h[key] = np.zeros_like(val)
+            
+        for key in params.keys():
+            self.h[key] *= self.decay_rate
+            self.h[key] += (1 - self.decay_rate) * grads[key] * grads[key]
+            params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7)
+
+
+class Adam:
+
+    """Adam (http://arxiv.org/abs/1412.6980v8)"""
+
+    def __init__(self, lr=0.001, beta1=0.9, beta2=0.999):
+        self.lr = lr
+        self.beta1 = beta1
+        self.beta2 = beta2
+        self.iter = 0
+        self.m = None
+        self.v = None
+        
+    def update(self, params, grads):
+        if self.m is None:
+            self.m, self.v = {}, {}
+            for key, val in params.items():
+                self.m[key] = np.zeros_like(val)
+                self.v[key] = np.zeros_like(val)
+        
+        self.iter += 1
+        lr_t  = self.lr * np.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter)         
+        
+        for key in params.keys():
+            #self.m[key] = self.beta1*self.m[key] + (1-self.beta1)*grads[key]
+            #self.v[key] = self.beta2*self.v[key] + (1-self.beta2)*(grads[key]**2)
+            self.m[key] += (1 - self.beta1) * (grads[key] - self.m[key])
+            self.v[key] += (1 - self.beta2) * (grads[key]**2 - self.v[key])
+            
+            params[key] -= lr_t * self.m[key] / (np.sqrt(self.v[key]) + 1e-7)
+            
+            #unbias_m += (1 - self.beta1) * (grads[key] - self.m[key]) # correct bias
+            #unbisa_b += (1 - self.beta2) * (grads[key]*grads[key] - self.v[key]) # correct bias
+            #params[key] += self.lr * unbias_m / (np.sqrt(unbisa_b) + 1e-7)
diff --git a/DL_demo/common/trainer.py b/DL_demo/common/trainer.py
new file mode 100755
index 0000000..1878105
--- /dev/null
+++ b/DL_demo/common/trainer.py
@@ -0,0 +1,78 @@
+# coding: utf-8
+import sys, os
+sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
+import numpy as np
+from common.optimizer import *
+
+class Trainer:
+    """进行神经网络的训练的类
+    """
+    def __init__(self, network, x_train, t_train, x_test, t_test,
+                 epochs=20, mini_batch_size=100,
+                 optimizer='SGD', optimizer_param={'lr':0.01}, 
+                 evaluate_sample_num_per_epoch=None, verbose=True):
+        self.network = network
+        self.verbose = verbose
+        self.x_train = x_train
+        self.t_train = t_train
+        self.x_test = x_test
+        self.t_test = t_test
+        self.epochs = epochs
+        self.batch_size = mini_batch_size
+        self.evaluate_sample_num_per_epoch = evaluate_sample_num_per_epoch
+
+        # optimzer
+        optimizer_class_dict = {'sgd':SGD, 'momentum':Momentum, 'nesterov':Nesterov,
+                                'adagrad':AdaGrad, 'rmsprpo':RMSprop, 'adam':Adam}
+        self.optimizer = optimizer_class_dict[optimizer.lower()](**optimizer_param)
+        
+        self.train_size = x_train.shape[0]
+        self.iter_per_epoch = max(self.train_size / mini_batch_size, 1)
+        self.max_iter = int(epochs * self.iter_per_epoch)
+        self.current_iter = 0
+        self.current_epoch = 0
+        
+        self.train_loss_list = []
+        self.train_acc_list = []
+        self.test_acc_list = []
+
+    def train_step(self):
+        batch_mask = np.random.choice(self.train_size, self.batch_size)
+        x_batch = self.x_train[batch_mask]
+        t_batch = self.t_train[batch_mask]
+        
+        grads = self.network.gradient(x_batch, t_batch)
+        self.optimizer.update(self.network.params, grads)
+        
+        loss = self.network.loss(x_batch, t_batch)
+        self.train_loss_list.append(loss)
+        if self.verbose: print("train loss:" + str(loss))
+        
+        if self.current_iter % self.iter_per_epoch == 0:
+            self.current_epoch += 1
+            
+            x_train_sample, t_train_sample = self.x_train, self.t_train
+            x_test_sample, t_test_sample = self.x_test, self.t_test
+            if not self.evaluate_sample_num_per_epoch is None:
+                t = self.evaluate_sample_num_per_epoch
+                x_train_sample, t_train_sample = self.x_train[:t], self.t_train[:t]
+                x_test_sample, t_test_sample = self.x_test[:t], self.t_test[:t]
+                
+            train_acc = self.network.accuracy(x_train_sample, t_train_sample)
+            test_acc = self.network.accuracy(x_test_sample, t_test_sample)
+            self.train_acc_list.append(train_acc)
+            self.test_acc_list.append(test_acc)
+
+            if self.verbose: print("=== epoch:" + str(self.current_epoch) + ", train acc:" + str(train_acc) + ", test acc:" + str(test_acc) + " ===")
+        self.current_iter += 1
+
+    def train(self):
+        for i in range(self.max_iter):
+            self.train_step()
+
+        test_acc = self.network.accuracy(self.x_test, self.t_test)
+
+        if self.verbose:
+            print("=============== Final Test Accuracy ===============")
+            print("test acc:" + str(test_acc))
+
diff --git a/DL_demo/common/util.py b/DL_demo/common/util.py
new file mode 100755
index 0000000..9e0f0b3
--- /dev/null
+++ b/DL_demo/common/util.py
@@ -0,0 +1,99 @@
+# coding: utf-8
+import numpy as np
+
+
+def smooth_curve(x):
+    """用于使损失函数的图形变圆滑
+
+    参考：http://glowingpython.blogspot.jp/2012/02/convolution-with-numpy.html
+    """
+    window_len = 11
+    s = np.r_[x[window_len-1:0:-1], x, x[-1:-window_len:-1]]
+    w = np.kaiser(window_len, 2)
+    y = np.convolve(w/w.sum(), s, mode='valid')
+    return y[5:len(y)-5]
+
+
+def shuffle_dataset(x, t):
+    """打乱数据集
+
+    Parameters
+    ----------
+    x : 训练数据
+    t : 监督数据
+
+    Returns
+    -------
+    x, t : 打乱的训练数据和监督数据
+    """
+    permutation = np.random.permutation(x.shape[0])
+    x = x[permutation,:] if x.ndim == 2 else x[permutation,:,:,:]
+    t = t[permutation]
+
+    return x, t
+
+def conv_output_size(input_size, filter_size, stride=1, pad=0):
+    return (input_size + 2*pad - filter_size) / stride + 1
+
+
+def im2col(input_data, filter_h, filter_w, stride=1, pad=0):
+    """
+
+    Parameters
+    ----------
+    input_data : 由(数据量, 通道, 高, 长)的4维数组构成的输入数据
+    filter_h : 滤波器的高
+    filter_w : 滤波器的长
+    stride : 步幅
+    pad : 填充
+
+    Returns
+    -------
+    col : 2维数组
+    """
+    N, C, H, W = input_data.shape
+    out_h = (H + 2*pad - filter_h)//stride + 1
+    out_w = (W + 2*pad - filter_w)//stride + 1
+
+    img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
+    col = np.zeros((N, C, filter_h, filter_w, out_h, out_w))
+
+    for y in range(filter_h):
+        y_max = y + stride*out_h
+        for x in range(filter_w):
+            x_max = x + stride*out_w
+            col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride]
+
+    col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N*out_h*out_w, -1)
+    return col
+
+
+def col2im(col, input_shape, filter_h, filter_w, stride=1, pad=0):
+    """
+
+    Parameters
+    ----------
+    col :
+    input_shape : 输入数据的形状（例：(10, 1, 28, 28)）
+    filter_h :
+    filter_w
+    stride
+    pad
+
+    Returns
+    -------
+
+    """
+    N, C, H, W = input_shape
+    out_h = (H + 2*pad - filter_h)//stride + 1
+    out_w = (W + 2*pad - filter_w)//stride + 1
+    col = col.reshape(N, out_h, out_w, C, filter_h, filter_w).transpose(0, 3, 4, 5, 1, 2)
+
+    img = np.zeros((N, C, H + 2*pad + stride - 1, W + 2*pad + stride - 1))
+    for y in range(filter_h):
+        y_max = y + stride*out_h
+        for x in range(filter_w):
+            x_max = x + stride*out_w
+            img[:, :, y:y_max:stride, x:x_max:stride] += col[:, :, y, x, :, :]
+
+    return img[:, :, pad:H + pad, pad:W + pad]
\ No newline at end of file
diff --git a/DL_demo/dataset/__init__.py b/DL_demo/dataset/__init__.py
new file mode 100755
index 0000000..e69de29
diff --git a/DL_demo/dataset/lena.png b/DL_demo/dataset/lena.png
new file mode 100755
index 0000000..768f4a5
Binary files /dev/null and b/DL_demo/dataset/lena.png differ
diff --git a/DL_demo/dataset/lena_gray.png b/DL_demo/dataset/lena_gray.png
new file mode 100755
index 0000000..fe2fd39
Binary files /dev/null and b/DL_demo/dataset/lena_gray.png differ
diff --git a/DL_demo/dataset/mnist.py b/DL_demo/dataset/mnist.py
new file mode 100755
index 0000000..0d53709
--- /dev/null
+++ b/DL_demo/dataset/mnist.py
@@ -0,0 +1,128 @@
+# coding: utf-8
+try:
+    import urllib.request
+except ImportError:
+    raise ImportError('You should use Python 3.x')
+import os.path
+import gzip
+import pickle
+import os
+import numpy as np
+
+
+url_base = 'http://yann.lecun.com/exdb/mnist/'
+key_file = {
+    'train_img':'train-images-idx3-ubyte.gz',
+    'train_label':'train-labels-idx1-ubyte.gz',
+    'test_img':'t10k-images-idx3-ubyte.gz',
+    'test_label':'t10k-labels-idx1-ubyte.gz'
+}
+
+dataset_dir = os.path.dirname(os.path.abspath(__file__))
+save_file = dataset_dir + "/mnist.pkl"
+
+train_num = 60000
+test_num = 10000
+img_dim = (1, 28, 28)
+img_size = 784
+
+
+def _download(file_name):
+    file_path = dataset_dir + "/" + file_name
+    
+    if os.path.exists(file_path):
+        return
+
+    print("Downloading " + file_name + " ... ")
+    urllib.request.urlretrieve(url_base + file_name, file_path)
+    print("Done")
+    
+def download_mnist():
+    for v in key_file.values():
+       _download(v)
+        
+def _load_label(file_name):
+    file_path = dataset_dir + "/" + file_name
+    
+    print("Converting " + file_name + " to NumPy Array ...")
+    with gzip.open(file_path, 'rb') as f:
+            labels = np.frombuffer(f.read(), np.uint8, offset=8)
+    print("Done")
+    
+    return labels
+
+def _load_img(file_name):
+    file_path = dataset_dir + "/" + file_name
+    
+    print("Converting " + file_name + " to NumPy Array ...")    
+    with gzip.open(file_path, 'rb') as f:
+            data = np.frombuffer(f.read(), np.uint8, offset=16)
+    data = data.reshape(-1, img_size)
+    print("Done")
+    
+    return data
+    
+def _convert_numpy():
+    dataset = {}
+    dataset['train_img'] =  _load_img(key_file['train_img'])
+    dataset['train_label'] = _load_label(key_file['train_label'])    
+    dataset['test_img'] = _load_img(key_file['test_img'])
+    dataset['test_label'] = _load_label(key_file['test_label'])
+    
+    return dataset
+
+def init_mnist():
+    download_mnist()
+    dataset = _convert_numpy()
+    print("Creating pickle file ...")
+    with open(save_file, 'wb') as f:
+        pickle.dump(dataset, f, -1)
+    print("Done!")
+
+def _change_one_hot_label(X):
+    T = np.zeros((X.size, 10))
+    for idx, row in enumerate(T):
+        row[X[idx]] = 1
+        
+    return T
+    
+
+def load_mnist(normalize=True, flatten=True, one_hot_label=False):
+    """读入MNIST数据集
+    
+    Parameters
+    ----------
+    normalize : 将图像的像素值正规化为0.0~1.0
+    one_hot_label : 
+        one_hot_label为True的情况下，标签作为one-hot数组返回
+        one-hot数组是指[0,0,1,0,0,0,0,0,0,0]这样的数组
+    flatten : 是否将图像展开为一维数组
+    
+    Returns
+    -------
+    (训练图像, 训练标签), (测试图像, 测试标签)
+    """
+    if not os.path.exists(save_file):
+        init_mnist()
+        
+    with open(save_file, 'rb') as f:
+        dataset = pickle.load(f)
+    
+    if normalize:
+        for key in ('train_img', 'test_img'):
+            dataset[key] = dataset[key].astype(np.float32)
+            dataset[key] /= 255.0
+            
+    if one_hot_label:
+        dataset['train_label'] = _change_one_hot_label(dataset['train_label'])
+        dataset['test_label'] = _change_one_hot_label(dataset['test_label'])
+    
+    if not flatten:
+         for key in ('train_img', 'test_img'):
+            dataset[key] = dataset[key].reshape(-1, 1, 28, 28)
+
+    return (dataset['train_img'], dataset['train_label']), (dataset['test_img'], dataset['test_label']) 
+
+
+if __name__ == '__main__':
+    init_mnist()
diff --git a/DL_step_by_step/__init__.py b/DL_step_by_step/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/DL_step_by_step/ch3/MNIST.py b/DL_step_by_step/ch3/MNIST.py
new file mode 100644
index 0000000..57d50a7
--- /dev/null
+++ b/DL_step_by_step/ch3/MNIST.py
@@ -0,0 +1,91 @@
+import sys, os
+sys.path.extend(['/Users/songwenxuan/PycharmProjects/Keras_start'])
+from DL_step_by_step.dataset.mnist import load_mnist
+from DL_step_by_step.ch3.sigmoid_func import sigmoid
+from DL_step_by_step.ch3.softmax_func import softmax
+
+import numpy as np
+from PIL import Image
+import pickle
+
+
+
+
+def img_show(img):
+    pil_img = Image.fromarray(np.uint8(img))
+    pil_img.show()
+
+
+
+# img = x_train[0]
+# label = t_train[0]
+# print(label)
+# print(img.shape)
+# img = img.reshape(28, 28)
+# print(img.shape)
+# img_show(img)
+
+
+def get_data():
+    (x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, normalize=True, one_hot_label=False)
+    print(x_train[0])
+    print(x_train.shape)
+    print(t_train[0])
+    print(t_train.shape)
+    print(x_test.shape)
+    print(t_test.shape)
+    return x_test, t_test
+
+def init_network():
+    with open("sample_weight.pkl", 'rb') as f:
+        network = pickle.load(f)
+
+    return network
+
+def predict(network, x):
+    W1, W2, W3 = network['W1'], network['W2'], network['W3']
+    b1, b2, b3 = network['b1'], network['b2'], network['b3']
+
+    a1 = np.dot(x, W1) + b1
+    z1 = sigmoid(a1)
+
+    a2 = np.dot(z1, W2) + b2
+    z2 = sigmoid(a2)
+
+    a3 = np.dot(z2, W3) + b3
+    y = softmax(a3)
+
+    return y
+
+
+x, t = get_data()
+network = init_network()
+print(type(network))
+
+batch_size = 100
+
+for key in network:
+    print(key)
+    print(network[key].shape)
+
+accuracy_cnt = 0
+# for i in range(len(x)):
+#     y = predict(network, x[i])
+#     # print(y)
+#     p = np.argmax(y)
+#     # print(p)
+#     if p == t[i]:
+#         accuracy_cnt += 1
+
+
+for i in range(0, len(x), batch_size):
+    x_batch = x [i: i + batch_size]
+    # print(x_batch.shape)
+    y_batch = predict(network, x_batch)
+    # print(y_batch.shape)
+    p = np.argmax(y_batch, axis=1)
+    # print(p.shape)
+    # input()
+    accuracy_cnt += np.sum(p == t[i:i+batch_size])
+
+print("Accuracy : " +str(float(accuracy_cnt) /len(x)))
\ No newline at end of file
diff --git a/DL_step_by_step/ch3/ReLU_func.py b/DL_step_by_step/ch3/ReLU_func.py
new file mode 100644
index 0000000..c742523
--- /dev/null
+++ b/DL_step_by_step/ch3/ReLU_func.py
@@ -0,0 +1,11 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+def relu(x):
+    return np.maximum(0, x)
+
+x = np.arange(-5.0 , 5.0, 0.1)
+y = relu(x)
+
+plt.plot(x, y)
+plt.show()
\ No newline at end of file
diff --git a/DL_step_by_step/ch3/__init__.py b/DL_step_by_step/ch3/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/DL_step_by_step/ch3/axis_test.py b/DL_step_by_step/ch3/axis_test.py
new file mode 100644
index 0000000..5fab883
--- /dev/null
+++ b/DL_step_by_step/ch3/axis_test.py
@@ -0,0 +1,7 @@
+import numpy as np
+
+x = np.array([[0.1, 0.8, 0.1], [0.3, 0.1, 0.6], [0.2, 0.5, 0.3], [0.8, 0.1, 0.1]])
+
+print(x)
+y = np.argmax(x, axis=0)
+print(y)
\ No newline at end of file
diff --git a/DL_step_by_step/ch3/matrix_test.py b/DL_step_by_step/ch3/matrix_test.py
new file mode 100644
index 0000000..b388819
--- /dev/null
+++ b/DL_step_by_step/ch3/matrix_test.py
@@ -0,0 +1,23 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+# A = np.array([1, 2, 3, 4])
+# print(A)
+# print(np.ndim(A))
+# print(A.shape)
+# print(A.shape[0])
+#
+# B = np.array([[1, 2], [3, 4], [5, 6]])
+# print(B)
+# print(np.ndim(B))
+# print(B.shape)
+
+A = np.array([[1, 2],[3, 4]])
+print(A)
+print(A.shape)
+
+B = np.array([[5, 6],[7, 8]])
+print(B)
+print(B.shape)
+z = np.dot(A, B)
+print(z)
\ No newline at end of file
diff --git a/DL_step_by_step/ch3/sample_weight.pkl b/DL_step_by_step/ch3/sample_weight.pkl
new file mode 100755
index 0000000..0e92475
Binary files /dev/null and b/DL_step_by_step/ch3/sample_weight.pkl differ
diff --git a/DL_step_by_step/ch3/sigmoid_func.py b/DL_step_by_step/ch3/sigmoid_func.py
new file mode 100644
index 0000000..6b3350e
--- /dev/null
+++ b/DL_step_by_step/ch3/sigmoid_func.py
@@ -0,0 +1,18 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+def sigmoid(x):
+    return 1/(1 + np.exp(-x))
+
+
+if __name__ == '__main__':
+    print("")
+    x = np.arange(-10.0, 10.0, 0.1)
+    y = sigmoid(x)
+    y2 = x * 0 + 0.5
+
+    plt.plot(x, y)
+    plt.plot(x, y2)
+    plt.ylim(-0.1, 1.1)
+    plt.show()
\ No newline at end of file
diff --git a/DL_step_by_step/ch3/softmax_func.py b/DL_step_by_step/ch3/softmax_func.py
new file mode 100644
index 0000000..6869e97
--- /dev/null
+++ b/DL_step_by_step/ch3/softmax_func.py
@@ -0,0 +1,41 @@
+import numpy as np
+
+a = np.array([0.3, 2.9, 4.0])
+
+print(a)
+exp_a = np.exp(a)
+print(exp_a)
+
+sum_exp_a = np.sum(exp_a)
+print(sum_exp_a)
+
+y = exp_a / sum_exp_a
+print(y)
+
+def softmax(a):
+    c = np.max(a)
+    exp_a = np.exp(a - c) #溢出对策
+    sum_exp_a = np.sum(exp_a)
+    y = exp_a / sum_exp_a
+    return y
+
+
+print("a")
+a = np.array([1010, 1000, 990])
+print(a)
+# z1 = np.exp(a) / np.sum(np.exp(a))
+# print(z1)
+
+c = np.max(a)
+print(c)
+d = a - c
+print(d)
+z2 =np.exp(d) / np.sum(np.exp(d))
+print(z2)
+
+print("stage 2")
+
+a = np.array([0.3, 2.9, 4.0])
+y = softmax(a)
+print(y)
+print(np.sum(y))
\ No newline at end of file
diff --git a/DL_step_by_step/ch3/step_func.py b/DL_step_by_step/ch3/step_func.py
new file mode 100644
index 0000000..0a67aa7
--- /dev/null
+++ b/DL_step_by_step/ch3/step_func.py
@@ -0,0 +1,14 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+def step_function(x):
+    return np.array(x>0, dtype=np.int)
+
+x = np.arange(-5.0, 5.0, 0.1)
+y = step_function(x)
+print(x)
+print(y)
+plt.plot(x, y)
+plt.ylim(-0.1, 1.1)
+plt.show()
\ No newline at end of file
diff --git a/DL_step_by_step/ch3/threeLayers_net.py b/DL_step_by_step/ch3/threeLayers_net.py
new file mode 100644
index 0000000..8538e14
--- /dev/null
+++ b/DL_step_by_step/ch3/threeLayers_net.py
@@ -0,0 +1,64 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+from DL_step_by_step.ch3.sigmoid_func import sigmoid
+
+X = np.array([1.0, 0.5])
+
+W1 = np.array([[0.1, 0.3, 0.5], [0.2, 0.4, 0.6]])
+B1 = np.array([0.1, 0.2 ,0.3])
+
+print("X1 : ")
+print(X)
+print(X.shape)
+
+print("W1 : ")
+print(W1)
+print(W1.shape)
+
+print("B1 : ")
+print(B1)
+print(B1.shape)
+
+A1 = np.dot(X, W1) + B1
+print("A1 : ")
+print(A1)
+print(A1)
+
+Z1 = sigmoid(A1)
+print("Z1 : ")
+print(Z1)
+print(Z1.shape)
+
+W2 = np.array([[0.1, 0.4], [0.2, 0.5], [0.3, 0.6]])
+B2 = np.array([0.1, 0.2])
+
+print("W2 : ")
+print(W2)
+print(W2.shape)
+
+print("B2 : ")
+print(B2)
+print(B2.shape)
+
+A2 = np.dot(Z1, W2) + B2
+print("A2 : ")
+print(A2)
+print(A2.shape)
+
+Z2 = sigmoid(A2)
+print("Z2 : ")
+print(Z2)
+print(Z2.shape)
+
+
+def indentity_function(x):
+    return x
+
+
+W3 = np.array([[0.1, 0.3], [0.2, 0.4]])
+
+B3 = np.array([0.1, 0.2])
+
+A3 = np.dot(Z2, W3) + B3
+Y = indentity_function(A3)
diff --git a/DL_step_by_step/ch3/threeLayers_net_summary.py b/DL_step_by_step/ch3/threeLayers_net_summary.py
new file mode 100644
index 0000000..cb63691
--- /dev/null
+++ b/DL_step_by_step/ch3/threeLayers_net_summary.py
@@ -0,0 +1,34 @@
+import numpy as np
+from DL_step_by_step.ch3.sigmoid_func import sigmoid
+from DL_step_by_step.ch3.threeLayers_net import indentity_function
+
+def init_network():
+    network = {}
+    network['W1'] = np.array([[0.1, 0.3, 0.5], [0.2, 0.4, 0.6]])
+    network['b1'] = np.array([0.1, 0.2, 0.3])
+    network['W2'] = np.array([[0.1, 0.4], [0.2, 0.5], [0.3, 0.6]])
+    network['b2'] = np.array([0.1, 0.2])
+    network['W3'] = np.array([[0.1, 0.3], [0.2, 0.4]])
+    network['b3'] = np.array([0.1, 0.2])
+
+    return network
+
+def forward(network, x):
+    W1, W2, W3 = network['W1'], network['W2'], network['W3']
+    b1, b2, b3 = network['b1'], network['b2'], network['b3']
+
+    a1 = np.dot(x, W1) + b1
+    z1 = sigmoid(a1)
+
+    a2 = np.dot(z1, W2) + b2
+    z2 = sigmoid(a2)
+
+    a3 = np.dot(z2, W3) + b3
+    y = indentity_function(a3)
+
+    return y
+
+network = init_network()
+x = np.array([1.0, 0.5])
+y = forward(network, x)
+print(y)
diff --git a/DL_step_by_step/ch4/Q1.py b/DL_step_by_step/ch4/Q1.py
new file mode 100644
index 0000000..df86a3f
--- /dev/null
+++ b/DL_step_by_step/ch4/Q1.py
@@ -0,0 +1,68 @@
+#
+#!/usr/bin/python3.6.2
+# -*- coding: utf-8 -*-
+# @Time    : 2018/12/7 上午11:15
+# @Author  : Wenson
+# @Desc : ==============================================
+# Life is Short I Use Python!!!                      ===
+# If this runs wrong,don't ask me,I don't know why;  ===
+# If this runs right,thank god,and I don't know why. ===
+# Maybe the answer,my friend,is blowing in the wind. ===
+# ======================================================
+# @Project : Keras_start
+# @FileName: Q1.py
+# @Software: PyCharm
+
+import sys
+sys.path.extend(['/Users/songwenxuan/PycharmProjects/Keras_start'])
+from DL_step_by_step.common.common_func import numerical_gradient
+import matplotlib.pylab as plt
+
+import numpy as np
+
+
+
+def gradient_descent(f, init_x, lr=0.01, step_num=100):
+    x = init_x
+    x_history = []
+
+    for i in range(step_num):
+        x_history.append(x.copy())
+        grad = numerical_gradient(f, x)
+        x-= lr*grad
+
+    return x, np.array(x_history)
+
+
+
+
+
+def function_2(x):
+    return x[0] ** 2 + x[1] ** 2
+
+init_x = np.array([-3.0, 4.0])
+
+# print(init_x.ndim)
+# grad,history = gradient_descent(function_2, init_x = init_x, lr = 0.1,  step_num=100)
+#
+# print(grad)
+# print(history)
+
+init_x = np.array([-3.0, 4.0])
+
+lr = 0.1
+step_num = 100
+x, x_history = gradient_descent(function_2, init_x,lr = lr, step_num=step_num)
+
+print(x)
+
+
+plt.plot( [-5, 5], [0, 0], '--b')
+plt.plot([0, 0], [-5,5], '--b')
+plt.plot(x_history[:, 0], x_history[:, 1], 'o')
+
+plt.xlim(-3.5, 3.5)
+plt.ylim(-4.5, 4.5)
+plt.xlabel("X0")
+plt.ylabel("X1")
+plt.show()
\ No newline at end of file
diff --git a/DL_step_by_step/ch4/__init__.py b/DL_step_by_step/ch4/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/DL_step_by_step/ch4/gradient_func.py b/DL_step_by_step/ch4/gradient_func.py
new file mode 100644
index 0000000..dfefc37
--- /dev/null
+++ b/DL_step_by_step/ch4/gradient_func.py
@@ -0,0 +1,102 @@
+#
+#!/usr/bin/python3.6.2
+# -*- coding: utf-8 -*-
+# @Time    : 2018/12/5 下午6:01
+# @Author  : Wenson
+# @Desc : ==============================================
+# Life is Short I Use Python!!!                      ===
+# If this runs wrong,don't ask me,I don't know why;  ===
+# If this runs right,thank god,and I don't know why. ===
+# Maybe the answer,my friend,is blowing in the wind. ===
+# ======================================================
+# @Project : Keras_start
+# @FileName: gradient_func.py
+# @Software: PyCharm
+
+import numpy as np
+import sys
+sys.path.extend(['/Users/songwenxuan/PycharmProjects/Keras_start'])
+from DL_step_by_step.common.common_func import numerical_gradient
+import matplotlib.pylab as plt
+from DL_step_by_step.common.common_func import function_2
+
+
+
+# def numerical_gradient_on_batch(f, x):
+#     h = 1e-4
+#     grad = np.zeros_like(x)
+#
+#     for index in range(x.size):
+#         tmp_val = x[index]
+#         x[index] = tmp_val + h
+#         fxh1 = f(x)
+#
+#         x[index] = tmp_val - h
+#         fxh2 = f(x)
+#
+#         grad[index] = (fxh1 - fxh2) / (2 * h)
+#         x[index] = tmp_val
+#
+#     return grad
+#
+#
+# print(numerical_gradient_on_batch(function_2, np.array([3.0, 4.0])))
+
+
+# gradient test
+# x = np.array([3.0, 4.0])
+# f= function_2
+# h = 1e-4
+# grad = np.zeros_like(x)
+#
+# for index in range(x.size):
+#
+#     tmp_val = x[index]
+#
+#     x[index] = tmp_val + h
+#     fxh1 = f(x)
+#
+#     x[index] = tmp_val - h
+#     fxh2 = f(x)
+#
+#     grad[index] = (fxh1 - fxh2) / (2 * h)
+#     x[index] = tmp_val
+
+
+if __name__ == '__main__':
+    x0 = np.arange(-2, 2.5, 0.25)
+    x1 = np.arange(-2, 2.5, 0.25)
+
+
+
+
+    X, Y = np.meshgrid(x0, x1)
+
+    print(x0.shape)
+    print(x1.shape)
+
+
+    X = X.flatten()
+    Y = Y.flatten()
+    # plt.scatter(X,Y)
+    # plt.show
+
+
+
+    z = np.array([X, Y])
+
+    print(z.shape)
+
+    grad = numerical_gradient(function_2, z)
+
+    print(grad)
+    plt.figure()
+    plt.quiver(X, Y, -grad[0], -grad[1], angles="xy", color="#666666")  # ,headwidth=10,scale=40,color="#444444")
+    plt.xlim([-2, 2])
+    plt.ylim([-2, 2])
+    plt.xlabel('x0')
+    plt.ylabel('x1')
+    plt.grid()
+    plt.legend()
+    plt.draw()
+    plt.show()
\ No newline at end of file
diff --git a/DL_step_by_step/ch4/loss_func.py b/DL_step_by_step/ch4/loss_func.py
new file mode 100644
index 0000000..14e5429
--- /dev/null
+++ b/DL_step_by_step/ch4/loss_func.py
@@ -0,0 +1,60 @@
+import numpy as np
+import sys
+sys.path.extend(['/Users/songwenxuan/PycharmProjects/Keras_start'])
+from DL_step_by_step.dataset.mnist import load_mnist
+
+def mean_squared_error(y, t):
+    """
+    func: 计算均方误差
+
+
+    因为带了平方，后面要用梯度下降法，要求导，这样求导多出的乘2就和二分之一抵消了，一个简化后面计算的技巧
+
+    :param y:
+    :param t:
+    :return:
+    """
+    return 0.5 * np.sum((y-t)**2)
+
+
+# def cross_entropy_error(y, t):
+#     """
+#     func: 计算交叉熵误差
+#
+#     :param y:
+#     :param t:
+#     :return:
+#     """
+#     delta = 1e-7
+#     return -np.sum(t*np.log(y+delta))
+
+
+def cross_entroy_error(y, t):
+    if y.ndim == 1:
+        t = t.reshape(1, t.size)
+        y = y.reshape(1, y.size)
+
+    batch_size = y.shape[0]
+
+    return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
+
+
+
+
+
+if __name__ == '__main__':
+    (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
+
+    print(x_train.shape)
+    print(t_train.shape)
+
+    train_size = x_train.shape[0]
+
+    batch_size = 10
+
+    batch_mask = np.random.choice(train_size, batch_size)
+
+    x_batch = x_train[batch_mask]
+
+    t_batch = t_train[batch_mask]
+
diff --git a/DL_step_by_step/ch4/nimi_batch.py b/DL_step_by_step/ch4/nimi_batch.py
new file mode 100644
index 0000000..90de997
--- /dev/null
+++ b/DL_step_by_step/ch4/nimi_batch.py
@@ -0,0 +1,45 @@
+#
+#!/usr/bin/python3.6.2
+# -*- coding: utf-8 -*-
+# @Time    : 2018/12/11 下午6:09
+# @Author  : Wenson
+# @Desc : ==============================================
+# Life is Short I Use Python!!!                      ===
+# If this runs wrong,don't ask me,I don't know why;  ===
+# If this runs right,thank god,and I don't know why. ===
+# Maybe the answer,my friend,is blowing in the wind. ===
+# ======================================================
+# @Project : Keras_start
+# @FileName: nimi_batch.py
+# @Software: PyCharm
+
+import numpy as np
+from DL_step_by_step.dataset.mnist import load_mnist
+from DL_step_by_step.ch4.twoLayerNet import TwoLayerNet
+
+if __name__ == '__main__':
+    (x_train, t_train),(x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
+    train_loss_list = []
+
+    iters_num = 1000
+    train_size = x_train.shape[0]
+    batch_size = 100
+    learning_rate = 0.1
+
+    network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
+
+    for i in range(iters_num):
+        batch_mask = np.random.choice(train_size,batch_size)
+        x_batch = x_train[batch_mask]
+        t_batch = t_train[batch_mask]
+
+        # caculate the grads
+        grad = network.numberical_gradient(x_batch, t_batch)
+
+        for key in ('W1', 'b1', 'W2', 'b2'):
+            network.params[key] -= learning_rate * grad[key]
+
+        loss = network.loss(x_batch, t_batch)
+        train_loss_list.append(loss)
+
+    print(train_loss_list)
\ No newline at end of file
diff --git a/DL_step_by_step/ch4/numerical_diff.py b/DL_step_by_step/ch4/numerical_diff.py
new file mode 100644
index 0000000..81cd5f3
--- /dev/null
+++ b/DL_step_by_step/ch4/numerical_diff.py
@@ -0,0 +1,44 @@
+import numpy as np
+import sys
+sys.path.extend(['/Users/songwenxuan/PycharmProjects/Keras_start'])
+from DL_step_by_step.dataset.mnist import load_mnist
+
+
+def numerical_diff(f, x):
+    h = 1e-4
+    return (f(x+h) - f(x-h)) / (2 * h)
+
+
+def function_1(x):
+    return 0.01 * x ** 2 + 0.1 * x
+
+
+def function_2(x):
+    return x[0] ** 2 + x[1] ** 2
+
+
+def tangent_line(f, x):
+    d = numerical_diff(f, x)
+    print(d)
+    y = f(x) - d*x
+    return lambda t: d*t + y
+
+
+import numpy as np
+import matplotlib.pylab as plt
+
+x = np.arange(0.0, 20.0, 0.1)
+y = function_1(x)
+f2 = tangent_line(function_1, 5)
+y2 = f2(x)
+
+
+# plt.xlabel("x")
+# plt.ylabel("f(x)")
+plt.plot(x, y)
+plt.plot(x, y2)
+plt.show()
+
+print(numerical_diff(function_1, 5))
+
+print(numerical_diff(function_1, 10))
\ No newline at end of file
diff --git a/DL_step_by_step/ch4/numerical_diff2.py b/DL_step_by_step/ch4/numerical_diff2.py
new file mode 100644
index 0000000..3e86364
--- /dev/null
+++ b/DL_step_by_step/ch4/numerical_diff2.py
@@ -0,0 +1,55 @@
+#
+#!/usr/bin/python3.6.2
+# -*- coding: utf-8 -*-
+# @Time    : 2018/12/5 上午11:38
+# @Author  : Wenson
+# @Desc : ==============================================
+# Life is Short I Use Python!!!                      ===
+# If this runs wrong,don't ask me,I don't know why;  ===
+# If this runs right,thank god,and I don't know why. ===
+# Maybe the answer,my friend,is blowing in the wind. ===
+# ======================================================
+# @Project : Keras_start
+# @FileName: numerical_diff2.py
+# @Software: PyCharm
+
+import numpy as np
+import sys
+sys.path.extend(['/Users/songwenxuan/PycharmProjects/Keras_start'])
+from DL_step_by_step.dataset.mnist import load_mnist
+import matplotlib.pylab as plt
+from DL_step_by_step.ch4.numerical_diff import numerical_diff
+
+
+
+def function_2(x):
+    return x[0] ** 2 + x[1] ** 2
+
+def function_3(X,Y):
+    return X ** 2 + Y ** 2
+
+x0 = np.arange(-20.0, 20.0, 0.25)
+x1 = np.arange(-20.0, 20.0, 0.25)
+
+X,Y = np.meshgrid(x0,x1)
+
+R= np.sqrt(X**2, Y**2)
+
+Z = function_3(X,Y)
+
+fig = plt.figure()
+ax = fig.add_subplot(111, projection='3d')
+
+
+# Plot a basic wireframe.
+ax.plot_wireframe(X, Y, Z, rstride=10, cstride=10)
+
+plt.show()
+
+
+def function_tmp1(x0):
+    return x0*x0 + 4.0 ** 2.0
+
+
+print(numerical_diff(function_tmp1, 3.0))
+
diff --git a/DL_step_by_step/ch4/simpleNet.py b/DL_step_by_step/ch4/simpleNet.py
new file mode 100644
index 0000000..255480b
--- /dev/null
+++ b/DL_step_by_step/ch4/simpleNet.py
@@ -0,0 +1,60 @@
+#
+#!/usr/bin/python3.6.2
+# -*- coding: utf-8 -*-
+# @Time    : 2018/12/7 下午5:15
+# @Author  : Wenson
+# @Desc : ==============================================
+# Life is Short I Use Python!!!                      ===
+# If this runs wrong,don't ask me,I don't know why;  ===
+# If this runs right,thank god,and I don't know why. ===
+# Maybe the answer,my friend,is blowing in the wind. ===
+# ======================================================
+# @Project : Keras_start
+# @FileName: simpleNet.py
+# @Software: PyCharm
+
+import sys
+sys.path.extend(['/Users/songwenxuan/PycharmProjects/Keras_start'])
+from DL_step_by_step.common.common_func import *
+import matplotlib.pylab as plt
+
+
+
+import numpy as np
+
+class simpleNet:
+    def __init__(self):
+        self.W = np.random.rand(2, 3)
+
+    def predict(self, x):
+        return np.dot(x, self.W)
+
+    def loss(self, x, t):
+        z = self.predict(x)
+        y = softmax(z)
+        loss = cross_entroy_error(y, t)
+
+        return loss
+
+
+
+if __name__ == '__main__':
+    net = simpleNet()
+    print(net.W)
+
+    x = np.array([0.6, 0.9])
+    print(x.shape)
+    p =net.predict(x)
+    print(p)
+
+    print(np.argmax(p))
+
+    t = np.array([0, 0, 1])
+
+    print(net.loss(x,t))
+
+    f = lambda w: net.loss(x, t)
+
+    dW = numerical_gradient(f, net.W)
+
+    print(dW)
\ No newline at end of file
diff --git a/DL_step_by_step/ch4/test.py b/DL_step_by_step/ch4/test.py
new file mode 100644
index 0000000..d3c4d11
--- /dev/null
+++ b/DL_step_by_step/ch4/test.py
@@ -0,0 +1,24 @@
+#
+#!/usr/bin/python3.6.2
+# -*- coding: utf-8 -*-
+# @Time    : 2018/12/11 下午5:53
+# @Author  : Wenson
+# @Desc : ==============================================
+# Life is Short I Use Python!!!                      ===
+# If this runs wrong,don't ask me,I don't know why;  ===
+# If this runs right,thank god,and I don't know why. ===
+# Maybe the answer,my friend,is blowing in the wind. ===
+# ======================================================
+# @Project : Keras_start
+# @FileName: test.py
+# @Software: PyCharm
+
+import numpy as np
+z = np.arange(10)
+
+
+t = [2,7,0,9,4]
+y = np.arange(50)
+y = y.reshape(5, 10)
+
+a = y[np.arange(5), t]
\ No newline at end of file
diff --git a/DL_step_by_step/ch4/twoLayerNet.py b/DL_step_by_step/ch4/twoLayerNet.py
new file mode 100644
index 0000000..f64f932
--- /dev/null
+++ b/DL_step_by_step/ch4/twoLayerNet.py
@@ -0,0 +1,109 @@
+#
+#!/usr/bin/python3.6.2
+# -*- coding: utf-8 -*-
+# @Time    : 2018/12/7 下午5:37
+# @Author  : Wenson
+# @Desc : ==============================================
+# Life is Short I Use Python!!!                      ===
+# If this runs wrong,don't ask me,I don't know why;  ===
+# If this runs right,thank god,and I don't know why. ===
+# Maybe the answer,my friend,is blowing in the wind. ===
+# ======================================================
+# @Project : Keras_start
+# @FileName: twoLayerNet.py
+# @Software: PyCharm
+
+import sys
+sys.path.extend(['/Users/songwenxuan/PycharmProjects/Keras_start'])
+from DL_step_by_step.common.common_func import *
+import matplotlib.pylab as plt
+
+
+class TwoLayerNet:
+    def __init__(self,
+                 input_size,
+                 hidden_size,
+                 output_size,
+                 weight_init_std = 0.01
+                 ):
+        self.params = {}
+        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
+        self.params['b1'] = np.zeros(hidden_size)
+        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
+        self.params['b2'] = np.zeros(output_size)
+
+    def predict(self, x):
+        W1, W2 = self.params['W1'], self.params['W2']
+        b1, b2 = self.params['b1'], self.params['b2']
+
+        a1 = np.dot(x, W1) + b1
+        z1 = sigmoid(a1)
+        a2 = np.dot(z1, W2) + b2
+        y = softmax(a2)
+
+        return y
+
+    def loss(self, x, t):
+        """
+
+        :param x: 输入数据
+        :param t: 监督数据
+        :return: loss损失值
+        """
+        y = self.predict(x)
+        return cross_entropy_error(y, t)
+
+
+    def accuracy(self, x, t):
+        """
+
+        :param x:
+        :param t:
+        :return:
+        """
+        y = self.predict(x)
+        y = np.argmax(y, axis=1)
+        t = np.argmax(t, axis=1)
+
+        accuray = np.sum(y == t) / float(x.shape[0])
+
+        return accuray
+
+    def numberical_gradient(self, x, t):
+        loss_W = lambda W : self.loss(x, t)
+
+        grads = {}
+        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
+        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
+        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
+        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
+
+        return grads
+
+    def print_params(self):
+        print("W1 : ")
+        print(self.params['W1'].shape)
+        print(self.params['W1'])
+        print("b1 : ")
+        print(self.params['b1'].shape)
+        print(self.params['b1'])
+        print("W2 : ")
+        print(self.params['W2'].shape)
+        print(self.params['W2'])
+        print("b2 : ")
+        print(self.params['b2'].shape)
+        print(self.params['b2'])
+
+
+
+
+if __name__ == '__main__':
+
+    net = TwoLayerNet(input_size=784, hidden_size=100, output_size=10)
+    net.print_params()
+
+    x = np.random.rand(100, 784)
+    t = np.random.rand(100, 10)
+
+    grads = net.numberical_gradient(x, t)
+    print(grads['W1'])
\ No newline at end of file
diff --git a/DL_step_by_step/ch5/TwoLayerNet.py b/DL_step_by_step/ch5/TwoLayerNet.py
new file mode 100644
index 0000000..74dd044
--- /dev/null
+++ b/DL_step_by_step/ch5/TwoLayerNet.py
@@ -0,0 +1,32 @@
+#
+#!/usr/bin/python3.6.2
+# -*- coding: utf-8 -*-
+# @Time    : 2018/12/21 上午11:34
+# @Author  : Wenson
+# @Desc : ==============================================
+# Life is Short I Use Python!!!                      ===
+# If this runs wrong,don't ask me,I don't know why;  ===
+# If this runs right,thank god,and I don't know why. ===
+# Maybe the answer,my friend,is blowing in the wind. ===
+# ======================================================
+# @Project : DeepLearningDemo
+# @FileName: TwoLayerNet.py
+# @Software: PyCharm
+
+import sys, os
+sys.path.append(os.pardir)
+import numpy as np
+from DL_step_by_step.common import *
+from collections import OrderedDict
+
+class TwoLayerNet:
+    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
+
+        self.params = {}
+        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
+        self.params['b1'] = np.zeros(hidden_size)
+        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
+        self.params['b2'] = np.zeros(output_size)
+
+        self.layers = OrderedDict()
+        self.layers['Affine1'] = Affine()
diff --git a/DL_step_by_step/ch5/__init__.py b/DL_step_by_step/ch5/__init__.py
new file mode 100644
index 0000000..4ad9953
--- /dev/null
+++ b/DL_step_by_step/ch5/__init__.py
@@ -0,0 +1,14 @@
+#
+#!/usr/bin/python3.6.2
+# -*- coding: utf-8 -*-
+# @Time    : 2018/12/12 下午6:03
+# @Author  : Wenson
+# @Desc : ==============================================
+# Life is Short I Use Python!!!                      ===
+# If this runs wrong,don't ask me,I don't know why;  ===
+# If this runs right,thank god,and I don't know why. ===
+# Maybe the answer,my friend,is blowing in the wind. ===
+# ======================================================
+# @Project : Keras_start
+# @FileName: __init__.py.py
+# @Software: PyCharm
diff --git a/DL_step_by_step/ch5/affine_demo.py b/DL_step_by_step/ch5/affine_demo.py
new file mode 100644
index 0000000..aea4668
--- /dev/null
+++ b/DL_step_by_step/ch5/affine_demo.py
@@ -0,0 +1,21 @@
+#
+#!/usr/bin/python3.6.2
+# -*- coding: utf-8 -*-
+# @Time    : 2018/12/17 下午6:12
+# @Author  : Wenson
+# @Desc : ==============================================
+# Life is Short I Use Python!!!                      ===
+# If this runs wrong,don't ask me,I don't know why;  ===
+# If this runs right,thank god,and I don't know why. ===
+# Maybe the answer,my friend,is blowing in the wind. ===
+# ======================================================
+# @Project : Keras_start
+# @FileName: affine_demo.py
+# @Software: PyCharm
+
+import numpy as np
+
+class Affine:
+    def __init__(self, W, b):
+        self.W = W
+        self.b = b
diff --git a/DL_step_by_step/ch5/sigmoid_layer.py b/DL_step_by_step/ch5/sigmoid_layer.py
new file mode 100644
index 0000000..7acde0b
--- /dev/null
+++ b/DL_step_by_step/ch5/sigmoid_layer.py
@@ -0,0 +1,49 @@
+#
+#!/usr/bin/python3.6.2
+# -*- coding: utf-8 -*-
+# @Time    : 2018/12/13 下午4:00
+# @Author  : Wenson
+# @Desc : ==============================================
+# Life is Short I Use Python!!!                      ===
+# If this runs wrong,don't ask me,I don't know why;  ===
+# If this runs right,thank god,and I don't know why. ===
+# Maybe the answer,my friend,is blowing in the wind. ===
+# ======================================================
+# @Project : Keras_start
+# @FileName: sigmoid_layer.py
+# @Software: PyCharm
+
+import numpy as np
+
+
+class Sigmoid:
+    def __init__(self):
+        self.out = None
+
+    def forward(self, x):
+        out = 1 / (1 + np.exp(-x))
+        self.out = out
+
+        return out
+
+    def backward(self, dout):
+        dx = dout * (1.0 - self.out) * self.out
+
+        return dx
+
+if __name__ == '__main__':
+    X_dot_W = np.array([[0, 0, 0], [10, 10, 10]])
+
+    B = np.array([1, 2, 3])
+
+    print(X_dot_W)
+
+    print(X_dot_W + B)
+
+    dY = np.array([[1, 2, 3], [4, 5, 6]])
+
+    print(dY)
+
+    dB = np.sum(dY, axis=0)
+
+    print(dB)
\ No newline at end of file
diff --git a/DL_step_by_step/ch5/simpleLayer.py b/DL_step_by_step/ch5/simpleLayer.py
new file mode 100644
index 0000000..81104d9
--- /dev/null
+++ b/DL_step_by_step/ch5/simpleLayer.py
@@ -0,0 +1,68 @@
+#
+#!/usr/bin/python3.6.2
+# -*- coding: utf-8 -*-
+# @Time    : 2018/12/12 下午6:04
+# @Author  : Wenson
+# @Desc : ==============================================
+# Life is Short I Use Python!!!                      ===
+# If this runs wrong,don't ask me,I don't know why;  ===
+# If this runs right,thank god,and I don't know why. ===
+# Maybe the answer,my friend,is blowing in the wind. ===
+# ======================================================
+# @Project : Keras_start
+# @FileName: simpleLayer.py
+# @Software: PyCharm
+
+import numpy as np
+
+class MulLayer:
+    def __init__(self):
+        self.x = None
+        self.y = None
+
+    def forward(self, x, y):
+        self.x = x
+        self.y = y
+        out = x * y
+
+        return out
+
+    def backward(self, dout):
+        dx = dout * self.y
+        dy = dout * self.x
+
+        return dx, dy
+
+class AddLayer:
+    def __init__(self):
+        pass
+
+    def forward(self, x, y):
+        out = x + y
+        return out
+
+    def backward(self, dout):
+        dx = dout * 1
+        dy = dout * 1
+        return dx, dy
+
+if __name__ == '__main__':
+    apple = 100
+    apple_num = 2
+
+    tax = 1.1
+
+    mul_apple_layer = MulLayer()
+    mul_tax_layer = MulLayer()
+
+    apple_price = mul_apple_layer.forward(apple, apple_num)
+    price = mul_tax_layer.forward(apple_price, tax)
+
+    print(price)
+    print(mul_apple_layer.x)
+
+    dprice = 1
+    dapple_price, dtax = mul_tax_layer.backward(dprice)
+    dapple, dapple_num = mul_apple_layer.backward(dapple_price)
+
+    print(dapple, dapple_num, dtax)
\ No newline at end of file
diff --git a/DL_step_by_step/ch5/simpleLayer2.py b/DL_step_by_step/ch5/simpleLayer2.py
new file mode 100644
index 0000000..3ef78ec
--- /dev/null
+++ b/DL_step_by_step/ch5/simpleLayer2.py
@@ -0,0 +1,80 @@
+#
+#!/usr/bin/python3.6.2
+# -*- coding: utf-8 -*-
+# @Time    : 2018/12/12 下午6:04
+# @Author  : Wenson
+# @Desc : ==============================================
+# Life is Short I Use Python!!!                      ===
+# If this runs wrong,don't ask me,I don't know why;  ===
+# If this runs right,thank god,and I don't know why. ===
+# Maybe the answer,my friend,is blowing in the wind. ===
+# ======================================================
+# @Project : Keras_start
+# @FileName: simpleLayer2.py
+# @Software: PyCharm
+
+import numpy as np
+
+class MulLayer:
+    def __init__(self):
+        self.x = None
+        self.y = None
+
+    def forward(self, x, y):
+        self.x = x
+        self.y = y
+        out = x * y
+
+        return out
+
+    def backward(self, dout):
+        dx = dout * self.y
+        dy = dout * self.x
+
+        return dx, dy
+
+class AddLayer:
+    def __init__(self):
+        pass
+
+    def forward(self, x, y):
+        out = x + y
+        return out
+
+    def backward(self, dout):
+        dx = dout * 1
+        dy = dout * 1
+        return dx, dy
+
+if __name__ == '__main__':
+    apple = 100
+    apple_num = 2
+    orange = 150
+    orange_num = 3
+    tax = 1.1
+
+    # layer
+    mul_apple_layer = MulLayer()
+    mul_orange_layer = MulLayer()
+    add_apple_orange_layer = AddLayer()
+    mul_tax_layer = MulLayer()
+
+    # forward
+    apple_price = mul_apple_layer.forward(apple, apple_num)
+    orange_price = mul_orange_layer.forward(orange, orange_num)
+    all_price = add_apple_orange_layer.forward(apple_price, orange_price)
+    price = mul_tax_layer.forward(all_price, tax)
+
+    # backward
+    dprice = 1
+    dall_price, dtax = mul_tax_layer.backward(dprice)
+    dapple_price, dorage_price = add_apple_orange_layer.backward(dall_price)
+    dorange, dorange_num = mul_orange_layer.backward(dorage_price)
+    dapple, dapple_num = mul_apple_layer.backward(dapple_price)
+
+    print(price)
+    print(dapple_num, dapple, dorange, dorange_num, dtax)
+
+
+
+
diff --git a/DL_step_by_step/ch5/simpleLayer3.py b/DL_step_by_step/ch5/simpleLayer3.py
new file mode 100644
index 0000000..2535c23
--- /dev/null
+++ b/DL_step_by_step/ch5/simpleLayer3.py
@@ -0,0 +1,50 @@
+#
+#!/usr/bin/python3.6.2
+# -*- coding: utf-8 -*-
+# @Time    : 2018/12/13 上午10:36
+# @Author  : Wenson
+# @Desc : ==============================================
+# Life is Short I Use Python!!!                      ===
+# If this runs wrong,don't ask me,I don't know why;  ===
+# If this runs right,thank god,and I don't know why. ===
+# Maybe the answer,my friend,is blowing in the wind. ===
+# ======================================================
+# @Project : Keras_start
+# @FileName: simpleLayer3.py
+# @Software: PyCharm
+
+import numpy as np
+
+class Relu:
+    def __init__(self):
+        self.mask = None
+
+    def forward(self, x):
+        self.mask = (x <= 0)
+        out = x.copy()
+        out[self.mask] = 0
+
+        return out
+
+    def backward(self, dout):
+        dout[self.mask] = 0
+        dx = dout
+
+        return dx
+
+
+if __name__ == '__main__':
+    x = np.array([[1.0, -0.5], [-2.0, 3.0]])
+    print(x)
+
+    mask = (x<=0)
+    print(mask)
+
+    relu_layer =  Relu()
+    signal_out = relu_layer.forward(x)
+    print(signal_out)
+
+
+
+
+
diff --git a/DL_step_by_step/ch5/softmaxWithLoss.py b/DL_step_by_step/ch5/softmaxWithLoss.py
new file mode 100644
index 0000000..a513939
--- /dev/null
+++ b/DL_step_by_step/ch5/softmaxWithLoss.py
@@ -0,0 +1,37 @@
+#
+#!/usr/bin/python3.6.2
+# -*- coding: utf-8 -*-
+# @Time    : 2018/12/20 下午4:29
+# @Author  : Wenson
+# @Desc : ==============================================
+# Life is Short I Use Python!!!                      ===
+# If this runs wrong,don't ask me,I don't know why;  ===
+# If this runs right,thank god,and I don't know why. ===
+# Maybe the answer,my friend,is blowing in the wind. ===
+# ======================================================
+# @Project : DeepLearningDemo
+# @FileName: softmaxWithLoss.py
+# @Software: PyCharm
+
+import numpy as np
+from DL_step_by_step.common.common_func import *
+
+
+class SoftmaxWithLoss:
+    def __init__(self):
+        self.loss = None
+        self.y = None
+        self.t = None
+
+    def forward(self, x, t):
+        self.t = t
+        self.y = softmax(x)
+        self.loss = cross_entropy_error(self.y, self.t)
+
+        return self.loss
+
+    def backword(self, dout = 1):
+        batch_size = self.t.shape[0]
+        dx = (self.y - self.t) / batch_size
+
+        return dx
\ No newline at end of file
diff --git a/DL_step_by_step/ch5/test.py b/DL_step_by_step/ch5/test.py
new file mode 100644
index 0000000..0f55370
--- /dev/null
+++ b/DL_step_by_step/ch5/test.py
@@ -0,0 +1,20 @@
+#
+#!/usr/bin/python3.6.2
+# -*- coding: utf-8 -*-
+# @Time    : 2018/12/13 上午10:58
+# @Author  : Wenson
+# @Desc : ==============================================
+# Life is Short I Use Python!!!                      ===
+# If this runs wrong,don't ask me,I don't know why;  ===
+# If this runs right,thank god,and I don't know why. ===
+# Maybe the answer,my friend,is blowing in the wind. ===
+# ======================================================
+# @Project : Keras_start
+# @FileName: test.py
+# @Software: PyCharm
+
+
+y = 1
+x = y
+x = 2
+print(y)
\ No newline at end of file
diff --git a/DL_step_by_step/common/__init__.py b/DL_step_by_step/common/__init__.py
new file mode 100644
index 0000000..c6c5f6f
--- /dev/null
+++ b/DL_step_by_step/common/__init__.py
@@ -0,0 +1,14 @@
+#
+#!/usr/bin/python3.6.2
+# -*- coding: utf-8 -*-
+# @Time    : 2018/12/5 下午6:11
+# @Author  : Wenson
+# @Desc : ==============================================
+# Life is Short I Use Python!!!                      ===
+# If this runs wrong,don't ask me,I don't know why;  ===
+# If this runs right,thank god,and I don't know why. ===
+# Maybe the answer,my friend,is blowing in the wind. ===
+# ======================================================
+# @Project : Keras_start
+# @FileName: __init__.py.py
+# @Software: PyCharm
diff --git a/DL_step_by_step/common/common_func.py b/DL_step_by_step/common/common_func.py
new file mode 100644
index 0000000..7e86ba5
--- /dev/null
+++ b/DL_step_by_step/common/common_func.py
@@ -0,0 +1,120 @@
+#
+#!/usr/bin/python3.6.2
+# -*- coding: utf-8 -*-
+# @Time    : 2018/12/5 下午6:11
+# @Author  : Wenson
+# @Desc : ==============================================
+# Life is Short I Use Python!!!                      ===
+# If this runs wrong,don't ask me,I don't know why;  ===
+# If this runs right,thank god,and I don't know why. ===
+# Maybe the answer,my friend,is blowing in the wind. ===
+# ======================================================
+# @Project : Keras_start
+# @FileName: common_func.py
+# @Software: PyCharm
+
+import numpy as np
+
+def function_2(x):
+    if x.ndim == 1:
+        return np.sum(x**2)
+    else:
+        return np.sum(x**2, axis=1)
+
+# def _numerical_gradient_no_batch(f, x):
+#     h = 1e-4
+#     grad = np.zeros_like(x)
+#     # print("x.size")
+#     # print(x.shape)
+#     for index in range(x.size):
+#         # print(index)
+#         tmp_val = x[index]
+#         x[index] = float(tmp_val) + h
+#         fxh1 = f(x)
+#
+#         x[index] = float(tmp_val) - h
+#         fxh2 = f(x)
+#
+#         grad[index] = (fxh1 - fxh2) / (2 * h)
+#         x[index] = tmp_val
+#
+#     return grad
+
+
+def _numerical_gradient_no_batch(f, x):
+    h = 1e-4  # 0.0001
+    grad = np.zeros_like(x)
+
+    for idx in range(x.size):
+        tmp_val = x[idx]
+        x[idx] = float(tmp_val) + h
+        fxh1 = f(x)  # f(x+h)
+
+        x[idx] = tmp_val - h
+        fxh2 = f(x)  # f(x-h)
+        grad[idx] = (fxh1 - fxh2) / (2 * h)
+
+        x[idx] = tmp_val  # 还原值
+
+    return grad
+
+
+def numerical_gradient(f, X):
+    if X.ndim == 1:
+        return _numerical_gradient_no_batch(f, X)
+    else:
+        grad = np.zeros_like(X)
+
+        for idx, x in enumerate(X):
+            grad[idx] = _numerical_gradient_no_batch(f, x)
+
+        return grad
+
+
+def gradient_descent(f, init_x, lr = 0.01, step_num =100):
+    x = init_x
+    for i in range(step_num):
+        grad = numerical_gradient(f, x)
+        x -= lr * grad
+
+    return x
+
+
+def softmax(a):
+    c = np.max(a)
+    exp_a = np.exp(a - c) #溢出对策
+    sum_exp_a = np.sum(exp_a)
+    y = exp_a / sum_exp_a
+    return y
+
+
+def sigmoid(x):
+    return 1/(1 + np.exp(-x))
+
+
+# def cross_entropy_error(y, t):
+#     if y.ndim == 1:
+#         t = t.reshape(1, t.size)
+#         y = y.reshape(1, y.size)
+#
+#     batch_size = y.shape[0]
+#
+#     return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
+
+
+def cross_entropy_error(y, t):
+    if y.ndim == 1:
+        t = t.reshape(1, t.size)
+        test_c = t.reshape(1, t.size)
+        y = y.reshape(1, y.size)
+
+    # 监督数据是one-hot-vector的情况下，转换为正确解标签的索引
+    if t.size == y.size:
+        t = t.argmax(axis=1)
+
+    batch_size = y.shape[0]
+    test_a = y[np.arange(batch_size), t]
+    test_b = np.log(y[np.arange(batch_size), t] + 1e-7)
+    z = -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
+
+    return z
\ No newline at end of file
diff --git a/DL_step_by_step/dataset/__init__.py b/DL_step_by_step/dataset/__init__.py
new file mode 100755
index 0000000..e69de29
diff --git a/DL_step_by_step/dataset/lena.png b/DL_step_by_step/dataset/lena.png
new file mode 100755
index 0000000..768f4a5
Binary files /dev/null and b/DL_step_by_step/dataset/lena.png differ
diff --git a/DL_step_by_step/dataset/lena_gray.png b/DL_step_by_step/dataset/lena_gray.png
new file mode 100755
index 0000000..fe2fd39
Binary files /dev/null and b/DL_step_by_step/dataset/lena_gray.png differ
diff --git a/DL_step_by_step/dataset/mnist.pkl b/DL_step_by_step/dataset/mnist.pkl
new file mode 100644
index 0000000..8d88cf1
Binary files /dev/null and b/DL_step_by_step/dataset/mnist.pkl differ
diff --git a/DL_step_by_step/dataset/mnist.py b/DL_step_by_step/dataset/mnist.py
new file mode 100755
index 0000000..0d53709
--- /dev/null
+++ b/DL_step_by_step/dataset/mnist.py
@@ -0,0 +1,128 @@
+# coding: utf-8
+try:
+    import urllib.request
+except ImportError:
+    raise ImportError('You should use Python 3.x')
+import os.path
+import gzip
+import pickle
+import os
+import numpy as np
+
+
+url_base = 'http://yann.lecun.com/exdb/mnist/'
+key_file = {
+    'train_img':'train-images-idx3-ubyte.gz',
+    'train_label':'train-labels-idx1-ubyte.gz',
+    'test_img':'t10k-images-idx3-ubyte.gz',
+    'test_label':'t10k-labels-idx1-ubyte.gz'
+}
+
+dataset_dir = os.path.dirname(os.path.abspath(__file__))
+save_file = dataset_dir + "/mnist.pkl"
+
+train_num = 60000
+test_num = 10000
+img_dim = (1, 28, 28)
+img_size = 784
+
+
+def _download(file_name):
+    file_path = dataset_dir + "/" + file_name
+    
+    if os.path.exists(file_path):
+        return
+
+    print("Downloading " + file_name + " ... ")
+    urllib.request.urlretrieve(url_base + file_name, file_path)
+    print("Done")
+    
+def download_mnist():
+    for v in key_file.values():
+       _download(v)
+        
+def _load_label(file_name):
+    file_path = dataset_dir + "/" + file_name
+    
+    print("Converting " + file_name + " to NumPy Array ...")
+    with gzip.open(file_path, 'rb') as f:
+            labels = np.frombuffer(f.read(), np.uint8, offset=8)
+    print("Done")
+    
+    return labels
+
+def _load_img(file_name):
+    file_path = dataset_dir + "/" + file_name
+    
+    print("Converting " + file_name + " to NumPy Array ...")    
+    with gzip.open(file_path, 'rb') as f:
+            data = np.frombuffer(f.read(), np.uint8, offset=16)
+    data = data.reshape(-1, img_size)
+    print("Done")
+    
+    return data
+    
+def _convert_numpy():
+    dataset = {}
+    dataset['train_img'] =  _load_img(key_file['train_img'])
+    dataset['train_label'] = _load_label(key_file['train_label'])    
+    dataset['test_img'] = _load_img(key_file['test_img'])
+    dataset['test_label'] = _load_label(key_file['test_label'])
+    
+    return dataset
+
+def init_mnist():
+    download_mnist()
+    dataset = _convert_numpy()
+    print("Creating pickle file ...")
+    with open(save_file, 'wb') as f:
+        pickle.dump(dataset, f, -1)
+    print("Done!")
+
+def _change_one_hot_label(X):
+    T = np.zeros((X.size, 10))
+    for idx, row in enumerate(T):
+        row[X[idx]] = 1
+        
+    return T
+    
+
+def load_mnist(normalize=True, flatten=True, one_hot_label=False):
+    """读入MNIST数据集
+    
+    Parameters
+    ----------
+    normalize : 将图像的像素值正规化为0.0~1.0
+    one_hot_label : 
+        one_hot_label为True的情况下，标签作为one-hot数组返回
+        one-hot数组是指[0,0,1,0,0,0,0,0,0,0]这样的数组
+    flatten : 是否将图像展开为一维数组
+    
+    Returns
+    -------
+    (训练图像, 训练标签), (测试图像, 测试标签)
+    """
+    if not os.path.exists(save_file):
+        init_mnist()
+        
+    with open(save_file, 'rb') as f:
+        dataset = pickle.load(f)
+    
+    if normalize:
+        for key in ('train_img', 'test_img'):
+            dataset[key] = dataset[key].astype(np.float32)
+            dataset[key] /= 255.0
+            
+    if one_hot_label:
+        dataset['train_label'] = _change_one_hot_label(dataset['train_label'])
+        dataset['test_label'] = _change_one_hot_label(dataset['test_label'])
+    
+    if not flatten:
+         for key in ('train_img', 'test_img'):
+            dataset[key] = dataset[key].reshape(-1, 1, 28, 28)
+
+    return (dataset['train_img'], dataset['train_label']), (dataset['test_img'], dataset['test_label']) 
+
+
+if __name__ == '__main__':
+    init_mnist()
diff --git a/DL_step_by_step/dataset/t10k-images-idx3-ubyte.gz b/DL_step_by_step/dataset/t10k-images-idx3-ubyte.gz
new file mode 100644
index 0000000..5ace8ea
Binary files /dev/null and b/DL_step_by_step/dataset/t10k-images-idx3-ubyte.gz differ
diff --git a/DL_step_by_step/dataset/t10k-labels-idx1-ubyte.gz b/DL_step_by_step/dataset/t10k-labels-idx1-ubyte.gz
new file mode 100644
index 0000000..a7e1415
Binary files /dev/null and b/DL_step_by_step/dataset/t10k-labels-idx1-ubyte.gz differ
diff --git a/DL_step_by_step/dataset/train-images-idx3-ubyte.gz b/DL_step_by_step/dataset/train-images-idx3-ubyte.gz
new file mode 100644
index 0000000..b50e4b6
Binary files /dev/null and b/DL_step_by_step/dataset/train-images-idx3-ubyte.gz differ
diff --git a/DL_step_by_step/dataset/train-labels-idx1-ubyte.gz b/DL_step_by_step/dataset/train-labels-idx1-ubyte.gz
new file mode 100644
index 0000000..707a576
Binary files /dev/null and b/DL_step_by_step/dataset/train-labels-idx1-ubyte.gz differ
diff --git a/DL_step_by_step/test/__init__.py b/DL_step_by_step/test/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/DL_step_by_step/test/test_class.py b/DL_step_by_step/test/test_class.py
new file mode 100644
index 0000000..4d31289
--- /dev/null
+++ b/DL_step_by_step/test/test_class.py
@@ -0,0 +1,17 @@
+class Man:
+    def __init__(self, name):
+        self.name = name
+        print("Initialized!")
+
+    def hello(self):
+        print("Hello " + self.name + "!")
+
+    def goodbye(self):
+        print("Good-bye " + self.name + "1")
+
+
+
+if __name__ == '__main__':
+    m = Man("David")
+    m.hello()
+    m.goodbye()
\ No newline at end of file
diff --git a/DL_step_by_step/test/test_matplotlib.py b/DL_step_by_step/test/test_matplotlib.py
new file mode 100644
index 0000000..cb96c1a
--- /dev/null
+++ b/DL_step_by_step/test/test_matplotlib.py
@@ -0,0 +1,20 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+x = np.arange(0, 6, 0.1)
+y1 = np.sin(x)
+y2 = np.cos(x)
+
+
+# plt.plot(x, y1, label="sin")
+# plt.plot(x, y2, linestyle = "--", label="cos")
+# plt.xlabel("x")
+# plt.ylabel("y")
+# plt.title('sin & cos')
+# plt.legend()
+# plt.show()
+
+from matplotlib.image import imread
+img = imread('../image/miao.png')
+plt.imshow(img)
+plt.show()
\ No newline at end of file
diff --git a/DL_step_by_step/test/test_numpy.py b/DL_step_by_step/test/test_numpy.py
new file mode 100644
index 0000000..bc66c29
--- /dev/null
+++ b/DL_step_by_step/test/test_numpy.py
@@ -0,0 +1,30 @@
+import numpy as np
+
+x = np.array([1.0, 2.0, 3.0])
+y = np.array([2.0, 4.0, 6.0])
+
+A = np.array([[1, 2], [3, 4]])
+print(A)
+
+A.shape
+
+
+B = np.array([10, 20])
+
+A * B
+
+X = np.array([[51, 55], [14, 19], [0, 4]])
+print(X)
+
+X[0]
+X[0][1]
+
+for row in X:
+    print(row)
+
+X = X.flatten()
+X[np.array([0, 2, 4])]
+
+X>15
+
+X[X>15]
\ No newline at end of file
diff --git a/DL_step_by_step/test/test_perceptron.py b/DL_step_by_step/test/test_perceptron.py
new file mode 100644
index 0000000..055300f
--- /dev/null
+++ b/DL_step_by_step/test/test_perceptron.py
@@ -0,0 +1,47 @@
+import numpy as np
+x = np.array([0, 1])
+w = np.array([0.5, 0.5])
+b = -0.7
+w*x
+
+np.sum(w*x)
+
+np.sum(w*x) + b
+
+def AND(x1, x2):
+    x = np.array([x1, x2])
+    w = np.array([0.5, 0.5])
+    b = -0.7
+    tmp = np.sum(w*x) + b
+    if tmp <=0:
+        return 0
+    else:
+        return 1
+
+def NAND(x1, x2):
+    x = np.array([x1, x2])
+    w = np.array([-0.5, -0.5])
+    b = 0.7
+    tmp = np.sum(w * x) + b
+    if tmp <= 0:
+        return 0
+    else:
+        return 1
+
+def OR(x1, x2):
+    x = np.array([x1, x2])
+    w = np.array([0.5, 0.5])
+    b = 0.2
+    tmp = np.sum(w * x) + b
+    if tmp <= 0:
+        return 0
+    else:
+        return 1
+
+def XOR(x1, x2):
+    s1 = NAND(x1, x2)
+    s2 = OR(x1, x2)
+    y = AND(s1, s2)
+    return y
+
+XOR(0, 1)