diff --git a/Season1_Tensorflow1.1_Python3.5/1-3/NVIDIA Autonomous Car.mp4 b/Season1_Tensorflow1.1_Python3.5/1-3/NVIDIA Autonomous Car.mp4 new file mode 100644 index 0000000..4bdf39e Binary files /dev/null and b/Season1_Tensorflow1.1_Python3.5/1-3/NVIDIA Autonomous Car.mp4 differ diff --git a/Season1_Tensorflow1.1_Python3.5/1-3/README.md b/Season1_Tensorflow1.1_Python3.5/1-3/README.md new file mode 100644 index 0000000..0c34fda --- /dev/null +++ b/Season1_Tensorflow1.1_Python3.5/1-3/README.md @@ -0,0 +1,4 @@ +# 1 - 3 小节 +主要讲解 TensorFlow 的最最最基本使用。 + +接触过TensorFlow,或者阅读英文文档没有问题的同学可以直接开始 4 - 6 小节 diff --git a/Season1_Tensorflow1.1_Python3.5/1-3/run.py b/Season1_Tensorflow1.1_Python3.5/1-3/run.py new file mode 100644 index 0000000..00e5e20 --- /dev/null +++ b/Season1_Tensorflow1.1_Python3.5/1-3/run.py @@ -0,0 +1,137 @@ +# encoding: utf-8 +# 为了 Python3 的兼容,如果你用的 Python2.7 +from __future__ import print_function, division +import tensorflow as tf + +print('Loaded TF version', tf.__version__, '\n\n') + +# Tensor 在数学中是“张量” +# 标量,矢量/向量,张量 + +# 简单地理解 +# 标量表示值 +# 矢量表示位置(空间中的一个点) +# 张量表示整个空间 + +# 一维数组是矢量 +# 多维数组是张量, 矩阵也是张量 + + +# 4个重要的类型 +# @Variable 计算图谱中的变量 +# @Tensor 一个多维矩阵,带有很多方法 +# @Graph 一个计算图谱 +# @Session 用来运行一个计算图谱 + + +# 三个重要的函数 + +# Variable 变量 +""" +tf.Variable.__init__( + initial_value=None, @Tensor + trainable=True, + collections=None, + validate_shape=True, + caching_device=None, + name=None, + variable_def=None, + dtype=None) +""" + + +# 注意:Variable是一个Class,Tensor也是一个Class + +# Constant 常数 +# tf.constant(value, dtype=None, shape=None, name='Const') +# return: a constant @Tensor + +# Placeholder 暂时变量? +# tf.placeholder(dtype, shape=None, name=None) +# return: 一个还尚未存在的 @Tensor + +# 让我们用计算图谱来实现一些简单的函数 +# + - * / 四则运算 +def basic_operation(): + v1 = tf.Variable(10) + v2 = tf.Variable(5) + addv = v1 + v2 + print(addv) + print(type(addv)) + print(type(v1)) + + c1 = tf.constant(10) + c2 = tf.constant(5) + addc = c1 + c2 + print(addc) + print(type(addc)) + print(type(c1)) + + # 用来运行计算图谱的对象/实例? + # session is a runtime + sess = tf.Session() + + # Variable -> 初始化 -> 有值的Tensor + tf.initialize_all_variables().run(session=sess) + + print('变量是需要初始化的') + print('加法(v1, v2) = ', addv.eval(session=sess)) + print('加法(v1, v2) = ', sess.run(addv)) + print('加法(c1, c2) = ', addc.eval(session=sess)) + print('\n\n') + # 这种定义操作,再执行操作的模式被称之为“符号式编程” Symbolic Programming + + # tf.Graph.__init__() + # Creates a new, empty Graph. + graph = tf.Graph() + with graph.as_default(): + value1 = tf.constant([1, 2]) + value2 = tf.Variable([3, 4]) + mul = value1 / value2 + + with tf.Session(graph=graph) as mySess: + tf.initialize_all_variables().run() + print('一一对应的除法(value1, value2) = ', mySess.run(mul)) + print('一一对应的除法(value1, value2) = ', mul.eval()) + + # tensor.eval(session=sess) + # sess.run(tensor) + + # 省内存?placeholder才是王道 + # def use_placeholder(): + graph = tf.Graph() + with graph.as_default(): + value1 = tf.placeholder(dtype=tf.float64) + value2 = tf.Variable([3, 4], dtype=tf.float64) + mul = value1 * value2 + + with tf.Session(graph=graph) as mySess: + tf.initialize_all_variables().run() + # 我们想象一下这个数据是从远程加载进来的 + # 文件,网络 + # 假装是 10 GB + value = load_from_remote() + for partialValue in load_partial(value, 2): + runResult = mySess.run(mul, feed_dict={value1: partialValue}) + # evalResult = mul.eval(feed_dict={value1: partialValue}) + print('乘法(value1, value2) = ', runResult) + # cross validation + + +def load_from_remote(): + return [-x for x in range(1000)] + + +# 自定义的 Iterator +# yield, generator function +def load_partial(value, step): + index = 0 + while index < len(value): + yield value[index:index + step] + index += step + return + + +if __name__ == '__main__': + basic_operation() +# use_placeholder() diff --git "a/Season1_Tensorflow1.1_Python3.5/1-3/\346\214\207\345\215\227.pptx" "b/Season1_Tensorflow1.1_Python3.5/1-3/\346\214\207\345\215\227.pptx" new file mode 100644 index 0000000..49b53fe Binary files /dev/null and "b/Season1_Tensorflow1.1_Python3.5/1-3/\346\214\207\345\215\227.pptx" differ diff --git a/Season1_Tensorflow1.1_Python3.5/10-11/.gitignore b/Season1_Tensorflow1.1_Python3.5/10-11/.gitignore new file mode 100644 index 0000000..f950240 --- /dev/null +++ b/Season1_Tensorflow1.1_Python3.5/10-11/.gitignore @@ -0,0 +1,2 @@ +/board/ +/fc1_weights/ diff --git a/Season1_Tensorflow1.1_Python3.5/10-11/README.md b/Season1_Tensorflow1.1_Python3.5/10-11/README.md new file mode 100644 index 0000000..162f99f --- /dev/null +++ b/Season1_Tensorflow1.1_Python3.5/10-11/README.md @@ -0,0 +1,2 @@ +# TensorBoard 计算图谱可视化 +TensorBoard 是TF自带的强大可视化工具 diff --git a/Season1_Tensorflow1.1_Python3.5/10-11/dp.py b/Season1_Tensorflow1.1_Python3.5/10-11/dp.py new file mode 100644 index 0000000..81e0202 --- /dev/null +++ b/Season1_Tensorflow1.1_Python3.5/10-11/dp.py @@ -0,0 +1,202 @@ +# 为了 Python2 玩家们 +from __future__ import print_function, division + +# 第三方 +import tensorflow as tf +from sklearn.metrics import confusion_matrix +import numpy as np + +# 我们自己 +import load + +train_samples, train_labels = load._train_samples, load._train_labels +test_samples, test_labels = load._test_samples, load._test_labels + +print('Training set', train_samples.shape, train_labels.shape) +print(' Test set', test_samples.shape, test_labels.shape) + +image_size = load.image_size +num_labels = load.num_labels +num_channels = load.num_channels + + +def get_chunk(samples, labels, chunkSize): + """ + Iterator/Generator: get a batch of data + 这个函数是一个迭代器/生成器,用于每一次只得到 chunkSize 这么多的数据 + 用于 for loop, just like range() function + """ + if len(samples) != len(labels): + raise Exception('Length of samples and labels must equal') + stepStart = 0 # initial step + i = 0 + while stepStart < len(samples): + stepEnd = stepStart + chunkSize + if stepEnd < len(samples): + yield i, samples[stepStart:stepEnd], labels[stepStart:stepEnd] + i += 1 + stepStart = stepEnd + + +class Network(): + def __init__(self, num_hidden, batch_size): + """ + @num_hidden: 隐藏层的节点数量 + @batch_size:因为我们要节省内存,所以分批处理数据。每一批的数据量。 + """ + self.batch_size = batch_size + self.test_batch_size = 500 + + # Hyper Parameters + self.num_hidden = num_hidden + + # Graph Related + self.graph = tf.Graph() + self.tf_train_samples = None + self.tf_train_labels = None + self.tf_test_samples = None + self.tf_test_labels = None + self.tf_test_prediction = None + + # 统计 + self.merged = None + + # 初始化 + self.define_graph() + self.session = tf.Session(graph=self.graph) + self.writer = tf.summary.FileWriter('./board', self.graph) + + def define_graph(self): + """ + 定义我的的计算图谱 + """ + with self.graph.as_default(): + # 这里只是定义图谱中的各种变量 + with tf.name_scope('inputs'): + self.tf_train_samples = tf.placeholder( + tf.float32, shape=(self.batch_size, image_size, image_size, num_channels), name='tf_train_samples' + ) + self.tf_train_labels = tf.placeholder( + tf.float32, shape=(self.batch_size, num_labels), name='tf_train_labels' + ) + self.tf_test_samples = tf.placeholder( + tf.float32, shape=(self.test_batch_size, image_size, image_size, num_channels), + name='tf_test_samples' + ) + + # fully connected layer 1, fully connected + with tf.name_scope('fc1'): + fc1_weights = tf.Variable( + tf.truncated_normal([image_size * image_size, self.num_hidden], stddev=0.1), name='fc1_weights' + ) + fc1_biases = tf.Variable(tf.constant(0.1, shape=[self.num_hidden]), name='fc1_biases') + tf.summary.histogram('fc1_weights', fc1_weights) + tf.summary.histogram('fc1_biases', fc1_biases) + + # fully connected layer 2 --> output layer + with tf.name_scope('fc2'): + fc2_weights = tf.Variable( + tf.truncated_normal([self.num_hidden, num_labels], stddev=0.1), name='fc2_weights' + ) + fc2_biases = tf.Variable(tf.constant(0.1, shape=[num_labels]), name='fc2_biases') + tf.summary.histogram('fc2_weights', fc2_weights) + tf.summary.histogram('fc2_biases', fc2_biases) + + # 想在来定义图谱的运算 + def model(data): + # fully connected layer 1 + shape = data.get_shape().as_list() + reshape = tf.reshape(data, [shape[0], shape[1] * shape[2] * shape[3]]) + + with tf.name_scope('fc1_model'): + fc1_model = tf.matmul(reshape, fc1_weights) + fc1_biases + hidden = tf.nn.relu(fc1_model) + + # fully connected layer 2 + with tf.name_scope('fc2_model'): + return tf.matmul(hidden, fc2_weights) + fc2_biases + + # Training computation. + logits = model(self.tf_train_samples) + with tf.name_scope('loss'): + self.loss = tf.reduce_mean( + tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=self.tf_train_labels) + ) + tf.summary.scalar('Loss', self.loss) + + # Optimizer. + with tf.name_scope('optimizer'): + self.optimizer = tf.train.GradientDescentOptimizer(0.0001).minimize(self.loss) + + # Predictions for the training, validation, and test data. + with tf.name_scope('predictions'): + self.train_prediction = tf.nn.softmax(logits, name='train_prediction') + self.test_prediction = tf.nn.softmax(model(self.tf_test_samples), name='test_prediction') + + self.merged = tf.summary.merge_all() + + def run(self): + """ + 用到Session + """ + + # private function + def print_confusion_matrix(confusionMatrix): + print('Confusion Matrix:') + for i, line in enumerate(confusionMatrix): + print(line, line[i] / np.sum(line)) + a = 0 + for i, column in enumerate(np.transpose(confusionMatrix, (1, 0))): + a += (column[i] / np.sum(column)) * (np.sum(column) / 26000) + print(column[i] / np.sum(column), ) + print('\n', np.sum(confusionMatrix), a) + + with self.session as session: + tf.initialize_all_variables().run() + + # 训练 + print('Start Training') + # batch 1000 + for i, samples, labels in get_chunk(train_samples, train_labels, chunkSize=self.batch_size): + _, l, predictions, summary = session.run( + [self.optimizer, self.loss, self.train_prediction, self.merged], + feed_dict={self.tf_train_samples: samples, self.tf_train_labels: labels} + ) + self.writer.add_summary(summary, i) + # labels is True Labels + accuracy, _ = self.accuracy(predictions, labels) + if i % 50 == 0: + print('Minibatch loss at step %d: %f' % (i, l)) + print('Minibatch accuracy: %.1f%%' % accuracy) + # + + # 测试 + accuracies = [] + confusionMatrices = [] + for i, samples, labels in get_chunk(test_samples, test_labels, chunkSize=self.test_batch_size): + result = self.test_prediction.eval(feed_dict={self.tf_test_samples: samples}) + accuracy, cm = self.accuracy(result, labels, need_confusion_matrix=True) + accuracies.append(accuracy) + confusionMatrices.append(cm) + print('Test Accuracy: %.1f%%' % accuracy) + print(' Average Accuracy:', np.average(accuracies)) + print('Standard Deviation:', np.std(accuracies)) + print_confusion_matrix(np.add.reduce(confusionMatrices)) + # + + def accuracy(self, predictions, labels, need_confusion_matrix=False): + """ + 计算预测的正确率与召回率 + @return: accuracy and confusionMatrix as a tuple + """ + _predictions = np.argmax(predictions, 1) + _labels = np.argmax(labels, 1) + cm = confusion_matrix(_labels, _predictions) if need_confusion_matrix else None + # == is overloaded for numpy array + accuracy = (100.0 * np.sum(_predictions == _labels) / predictions.shape[0]) + return accuracy, cm + + +if __name__ == '__main__': + net = Network(num_hidden=128, batch_size=100) + net.run() diff --git a/Season1_Tensorflow1.1_Python3.5/10-11/load.py b/Season1_Tensorflow1.1_Python3.5/10-11/load.py new file mode 100644 index 0000000..aab6f79 --- /dev/null +++ b/Season1_Tensorflow1.1_Python3.5/10-11/load.py @@ -0,0 +1,120 @@ +# encoding:utf-8 +# Python2 兼容 +from __future__ import print_function, division +from scipy.io import loadmat as load +import matplotlib.pyplot as plt +import numpy as np + + +def reformat(samples, labels): + # 改变原始数据的形状 + # 0 1 2 3 3 0 1 2 + # (图片高,图片宽,通道数,图片数) -> (图片数,图片高,图片宽,通道数) + new = np.transpose(samples, (3, 0, 1, 2)).astype(np.float32) + + # labels 变成 one-hot encoding, [2] -> [0, 0, 1, 0, 0, 0, 0, 0, 0, 0] + # digit 0 , represented as 10 + # labels 变成 one-hot encoding, [10] -> [1, 0, 0, 0, 0, 0, 0, 0, 0, 0] + labels = np.array([x[0] for x in labels]) # slow code, whatever + one_hot_labels = [] + for num in labels: + one_hot = [0.0] * 10 + if num == 10: + one_hot[0] = 1.0 + else: + one_hot[num] = 1.0 + one_hot_labels.append(one_hot) + labels = np.array(one_hot_labels).astype(np.float32) + return new, labels + + +def normalize(samples): + """ + 并且灰度化: 从三色通道 -> 单色通道 省内存 + 加快训练速度 + (R + G + B) / 3 + 将图片从 0 ~ 255 线性映射到 -1.0 ~ +1.0 + @samples: numpy array + """ + a = np.add.reduce(samples, keepdims=True, axis=3) # shape (图片数,图片高,图片宽,通道数) + a = a / 3.0 + return a / 128.0 - 1.0 + + +def distribution(labels, name): + # 查看一下每个label的分布,再画个统计图 + # keys: + # 0 + # 1 + # 2 + # ... + # 9 + count = {} + for label in labels: + key = 0 if label[0] == 10 else label[0] + if key in count: + count[key] += 1 + else: + count[key] = 1 + x = [] + y = [] + for k, v in count.items(): + # print(k, v) + x.append(k) + y.append(v) + + y_pos = np.arange(len(x)) + plt.bar(y_pos, y, align='center', alpha=0.5) + plt.xticks(y_pos, x) + plt.ylabel('Count') + plt.title(name + ' Label Distribution') + plt.show() + + +def inspect(dataset, labels, i): + # 显示图片看看 + if dataset.shape[3] == 1: + shape = dataset.shape + dataset = dataset.reshape(shape[0], shape[1], shape[2]) + print(labels[i]) + plt.imshow(dataset[i]) + plt.show() + + +train = load('../data/train_32x32.mat') +test = load('../data/test_32x32.mat') +# extra = load('../data/extra_32x32.mat') + +# print('Train Samples Shape:', train['X'].shape) +# print('Train Labels Shape:', train['y'].shape) + +# print('Train Samples Shape:', test['X'].shape) +# print('Train Labels Shape:', test['y'].shape) + +# print('Train Samples Shape:', extra['X'].shape) +# print('Train Labels Shape:', extra['y'].shape) + +train_samples = train['X'] +train_labels = train['y'] +test_samples = test['X'] +test_labels = test['y'] +# extra_samples = extra['X'] +# extra_labels = extra['y'] + +n_train_samples, _train_labels = reformat(train_samples, train_labels) +n_test_samples, _test_labels = reformat(test_samples, test_labels) + +_train_samples = normalize(n_train_samples) +_test_samples = normalize(n_test_samples) + +num_labels = 10 +image_size = 32 +num_channels = 1 + +if __name__ == '__main__': + # 探索数据 + pass +# inspect(_train_samples, _train_labels, 1234) +# _train_samples = normalize(_train_samples) +# inspect(_train_samples, _train_labels, 1234) +# distribution(train_labels, 'Train Labels') +# distribution(test_labels, 'Test Labels') diff --git a/Season1_Tensorflow1.1_Python3.5/12-15/.gitignore b/Season1_Tensorflow1.1_Python3.5/12-15/.gitignore new file mode 100644 index 0000000..5c40f79 --- /dev/null +++ b/Season1_Tensorflow1.1_Python3.5/12-15/.gitignore @@ -0,0 +1 @@ +/board/ diff --git a/Season1_Tensorflow1.1_Python3.5/12-15/README.md b/Season1_Tensorflow1.1_Python3.5/12-15/README.md new file mode 100644 index 0000000..1ec5a94 --- /dev/null +++ b/Season1_Tensorflow1.1_Python3.5/12-15/README.md @@ -0,0 +1,78 @@ +<<<<<<< HEAD +关于卷积神经网络的理论知识,请一定阅读 [cs231n 的课件](http://cs231n.github.io/convolutional-networks/)。 +虽然是英文的,但是内容浅显易读,又不失细节与深度,是理解卷积神经网络的绝佳资料。 + +[Theano 的教程也有很详细的介绍 + 很爽的动画效果](http://deeplearning.net/software/theano_versions/dev/tutorial/conv_arithmetic.html)。虽然这里是TF的教程,但是知识是互通的。 + +本系列是“编程向”,所以一切理论都是点到为止。然后外链更优质的理论资源。这样大家的学习效率才高。 + +### 第 13 期涉及到的新概念 +#### Max Pooling +Pooling 是图片的缩小(Downscaling)。这个操作是损失精度的。假如说 Pooling 的 scale 是 2。那么也就是将图片长宽各缩小至 1/2。也就是每 4 个像素点只取一个。 +那么,Max Pooling 则是取最大值的那一个像素。而 Average Pooling 就是取 4 个像素点的平均值。据研究表明 Max Pooling 通常效果更好,所以在代码实例中被使用。 + +#### Relu Layer 的含义 +Relu 是激活函数,定义为: relu(x) = max(x, 0) +或者可以写成 +relu(x) = x if x > 0 else 0 +所以,relu 就是一个线性的阀值函数而已 + +#### 请参考 +[cs231n 关于 Convolutional Layer 架构的解释](http://cs231n.github.io/convolutional-networks/) +[维基百科关于 Relu 的解释](https://en.wikipedia.org/wiki/Rectifier_(neural_networks)) +======= +关于卷积神经网络的理论知识,请一定阅读 [cs231n 的课件](http://cs231n.github.io/convolutional-networks/)。 +虽然是英文的,但是内容浅显易读,又不失细节与深度,是理解卷积神经网络的绝佳资料。 + +[Theano 的教程也有很详细的介绍 + 很爽的动画效果](http://deeplearning.net/software/theano_versions/dev/tutorial/conv_arithmetic.html)。虽然这里是TF的教程,但是知识是互通的。 + +本系列是“编程向”,所以一切理论都是点到为止。然后外链更优质的理论资源。这样大家的学习效率才高。 + +### 第 13 期涉及到的新概念 +#### Max Pooling +Pooling 是图片的缩小(Downscaling)。这个操作是损失精度的。假如说 Pooling 的 scale 是 2。那么也就是将图片长宽各缩小至 1/2。也就是每 4 个像素点只取一个。 +那么,Max Pooling 则是取最大值的那一个像素。而 Average Pooling 就是取 4 个像素点的平均值。据研究表明 Max Pooling 通常效果更好,所以在代码实例中被使用。 + +#### Relu Layer 的含义 +Relu 是激活函数,定义为: relu(x) = max(x, 0) +或者可以写成 +relu(x) = x if x > 0 else 0 +所以,relu 就是一个线性的阀值函数而已 + +#### 请参考 +[cs231n 关于 Convolutional Layer 架构的解释](http://cs231n.github.io/convolutional-networks/) +[维基百科关于 Relu 的解释](https://en.wikipedia.org/wiki/Rectifier_(neural_networks)) + +## API +```Python +from dp_refined_api import Network + +# 首先,通过某种方式得到你的数据 +# First,get your data somehow +train_samples, train_labels, test_samples, test_labels = get_your_data() + +net = Network(train_batch_size=64, test_batch_size=500, pooling_scale=2) +net.define_inputs( + train_samples_shape=(64, image_size, image_size, num_channels), + train_labels_shape=(64, num_labels), + test_samples_shape=(500, image_size, image_size, num_channels) +) + +net.add_conv(patch_size=3, in_depth=num_channels, out_depth=16, activation='relu', pooling=False, name='conv1') +net.add_conv(patch_size=3, in_depth=16, out_depth=16, activation='relu', pooling=True, name='conv2') + +# 2 = 1次 pooling, 每一次缩小为 1/2 +image_size = 32 +net.add_fc(in_num_nodes=(image_size // 2) * (image_size // 2) * 16, out_num_nodes=16, activation='relu', name='fc1') +net.add_fc(in_num_nodes=16, out_num_nodes=10, activation=None, name='fc2') + +# 在添加了所有层之后,定义模型 +# After adding all layers, define the model +net.define_model() + +# 运行网络 +# Run the network +# data_iterator 是一个自定义的 Generator 函数, 用来给网络喂数据 +net.run(data_iterator, train_samples, train_labels, test_samples, test_labels) +``` +>>>>>>> cc8b03fe76bc6a6eedea52e7d3acd66edce665f7 diff --git a/Season1_Tensorflow1.1_Python3.5/12-15/dp.py b/Season1_Tensorflow1.1_Python3.5/12-15/dp.py new file mode 100644 index 0000000..fc6ceba --- /dev/null +++ b/Season1_Tensorflow1.1_Python3.5/12-15/dp.py @@ -0,0 +1,299 @@ +# 为了 Python2 玩家们 +from __future__ import print_function, division + +# 第三方 +import tensorflow as tf +from sklearn.metrics import confusion_matrix +import numpy as np + +# 我们自己 +import load + +train_samples, train_labels = load._train_samples, load._train_labels +test_samples, test_labels = load._test_samples, load._test_labels + +print('Training set', train_samples.shape, train_labels.shape) +print(' Test set', test_samples.shape, test_labels.shape) + +image_size = load.image_size +num_labels = load.num_labels +num_channels = load.num_channels + + +def get_chunk(samples, labels, chunkSize): + """ + Iterator/Generator: get a batch of data + 这个函数是一个迭代器/生成器,用于每一次只得到 chunkSize 这么多的数据 + 用于 for loop, just like range() function + """ + if len(samples) != len(labels): + raise Exception('Length of samples and labels must equal') + stepStart = 0 # initial step + i = 0 + while stepStart < len(samples): + stepEnd = stepStart + chunkSize + if stepEnd < len(samples): + yield i, samples[stepStart:stepEnd], labels[stepStart:stepEnd] + i += 1 + stepStart = stepEnd + + +class Network(): + def __init__(self, num_hidden, batch_size, conv_depth, patch_size, pooling_scale): + """ + @num_hidden: 隐藏层的节点数量 + @batch_size:因为我们要节省内存,所以分批处理数据。每一批的数据量。 + """ + self.batch_size = batch_size + self.test_batch_size = 500 + + # Hyper Parameters + self.num_hidden = num_hidden + self.patch_size = patch_size # 滑窗的大小 + self.conv1_depth = conv_depth + self.conv2_depth = conv_depth + self.conv3_depth = conv_depth + self.conv4_depth = conv_depth + self.last_conv_depth = self.conv4_depth + self.pooling_scale = pooling_scale + self.pooling_stride = self.pooling_scale # Max Pooling Stride + + # Graph Related + self.graph = tf.Graph() + self.tf_train_samples = None + self.tf_train_labels = None + self.tf_test_samples = None + self.tf_test_labels = None + self.tf_test_prediction = None + + # 统计 + self.merged = None + self.train_summaries = [] + self.test_summaries = [] + + # 初始化 + self.define_graph() + self.session = tf.Session(graph=self.graph) + self.writer = tf.summary.FileWriter('./board', self.graph) + + def define_graph(self): + """ + 定义我的的计算图谱 + """ + with self.graph.as_default(): + # 这里只是定义图谱中的各种变量 + with tf.name_scope('inputs'): + self.tf_train_samples = tf.placeholder( + tf.float32, shape=(self.batch_size, image_size, image_size, num_channels), name='tf_train_samples' + ) + self.tf_train_labels = tf.placeholder( + tf.float32, shape=(self.batch_size, num_labels), name='tf_train_labels' + ) + self.tf_test_samples = tf.placeholder( + tf.float32, shape=(self.test_batch_size, image_size, image_size, num_channels), name='tf_test_samples' + ) + + with tf.name_scope('conv1'): + conv1_weights = tf.Variable( + tf.truncated_normal([self.patch_size, self.patch_size, num_channels, self.conv1_depth], stddev=0.1)) + conv1_biases = tf.Variable(tf.zeros([self.conv1_depth])) + + with tf.name_scope('conv2'): + conv2_weights = tf.Variable( + tf.truncated_normal([self.patch_size, self.patch_size, self.conv1_depth, self.conv2_depth], + stddev=0.1)) + conv2_biases = tf.Variable(tf.constant(0.1, shape=[self.conv2_depth])) + + with tf.name_scope('conv3'): + conv3_weights = tf.Variable( + tf.truncated_normal([self.patch_size, self.patch_size, self.conv2_depth, self.conv3_depth], + stddev=0.1)) + conv3_biases = tf.Variable(tf.constant(0.1, shape=[self.conv3_depth])) + + with tf.name_scope('conv4'): + conv4_weights = tf.Variable( + tf.truncated_normal([self.patch_size, self.patch_size, self.conv3_depth, self.conv4_depth], + stddev=0.1)) + conv4_biases = tf.Variable(tf.constant(0.1, shape=[self.conv4_depth])) + + # fully connected layer 1, fully connected + with tf.name_scope('fc1'): + down_scale = self.pooling_scale ** 2 # because we do 2 times pooling of stride 2 + fc1_weights = tf.Variable( + tf.truncated_normal( + [(image_size // down_scale) * (image_size // down_scale) * self.last_conv_depth, + self.num_hidden], stddev=0.1)) + fc1_biases = tf.Variable(tf.constant(0.1, shape=[self.num_hidden])) + + self.train_summaries.append(tf.summary.histogram('fc1_weights', fc1_weights)) + self.train_summaries.append(tf.summary.histogram('fc1_biases', fc1_biases)) + + # fully connected layer 2 --> output layer + with tf.name_scope('fc2'): + fc2_weights = tf.Variable(tf.truncated_normal([self.num_hidden, num_labels], stddev=0.1), + name='fc2_weights') + fc2_biases = tf.Variable(tf.constant(0.1, shape=[num_labels]), name='fc2_biases') + self.train_summaries.append(tf.summary.histogram('fc2_weights', fc2_weights)) + self.train_summaries.append(tf.summary.histogram('fc2_biases', fc2_biases)) + + # 想在来定义图谱的运算 + def model(data, train=True): + """ + @data: original inputs + @return: logits + """ + with tf.name_scope('conv1_model'): + with tf.name_scope('convolution'): + conv1 = tf.nn.conv2d(data, filter=conv1_weights, strides=[1, 1, 1, 1], padding='SAME') + addition = conv1 + conv1_biases + hidden = tf.nn.relu(addition) + + if not train: + # transpose the output of an activation to image + # conv1_activation_relu shape: (8, 32, 32, 64) + # 64 filter maps from this convolution, that's 64 grayscale images + # image size is 32x32 + # 8 is the batch_size, which means 8 times of convolution was performed + # just use the last one (index 7) as record + + filter_map = hidden[-1] + filter_map = tf.transpose(filter_map, perm=[2, 0, 1]) + filter_map = tf.reshape(filter_map, (self.conv1_depth, 32, 32, 1)) + self.test_summaries.append( + tf.summary.image('conv1_relu', tensor=filter_map, max_outputs=self.conv1_depth)) + + with tf.name_scope('conv2_model'): + with tf.name_scope('convolution'): + conv2 = tf.nn.conv2d(hidden, filter=conv2_weights, strides=[1, 1, 1, 1], padding='SAME') + addition = conv2 + conv2_biases + hidden = tf.nn.relu(addition) + hidden = tf.nn.max_pool( + hidden, + ksize=[1, self.pooling_scale, self.pooling_scale, 1], + strides=[1, self.pooling_stride, self.pooling_stride, 1], + padding='SAME') + + with tf.name_scope('conv3_model'): + with tf.name_scope('convolution'): + conv3 = tf.nn.conv2d(hidden, filter=conv3_weights, strides=[1, 1, 1, 1], padding='SAME') + addition = conv3 + conv3_biases + hidden = tf.nn.relu(addition) + + with tf.name_scope('conv4_model'): + with tf.name_scope('convolution'): + conv4 = tf.nn.conv2d(hidden, filter=conv4_weights, strides=[1, 1, 1, 1], padding='SAME') + addition = conv4 + conv4_biases + hidden = tf.nn.relu(addition) + # if not train: + # filter_map = hidden[-1] + # filter_map = tf.transpose(filter_map, perm=[2, 0, 1]) + # filter_map = tf.reshape(filter_map, (self.conv4_depth, 16, 16, 1)) + # tf.image_summary('conv4_relu', tensor=filter_map, max_images=self.conv4_depth) + hidden = tf.nn.max_pool( + hidden, + ksize=[1, self.pooling_scale, self.pooling_scale, 1], + strides=[1, self.pooling_stride, self.pooling_stride, 1], + padding='SAME') + + # fully connected layer 1 + shape = hidden.get_shape().as_list() + reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]]) + + with tf.name_scope('fc1_model'): + fc1_model = tf.matmul(reshape, fc1_weights) + fc1_biases + hidden = tf.nn.relu(fc1_model) + + # fully connected layer 2 + with tf.name_scope('fc2_model'): + return tf.matmul(hidden, fc2_weights) + fc2_biases + + # Training computation. + logits = model(self.tf_train_samples) + with tf.name_scope('loss'): + self.loss = tf.reduce_mean( + tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=self.tf_train_labels)) + self.train_summaries.append(tf.summary.scalar('Loss', self.loss)) + + # Optimizer. + with tf.name_scope('optimizer'): + self.optimizer = tf.train.GradientDescentOptimizer(0.0001).minimize(self.loss) + + # Predictions for the training, validation, and test data. + with tf.name_scope('train'): + self.train_prediction = tf.nn.softmax(logits, name='train_prediction') + with tf.name_scope('test'): + self.test_prediction = tf.nn.softmax(model(self.tf_test_samples, train=False), name='test_prediction') + + self.merged_train_summary = tf.summary.merge_all() + self.merged_test_summary = tf.summary.merge_all() + + def run(self): + """ + 用到Session + """ + + # private function + def print_confusion_matrix(confusionMatrix): + print('Confusion Matrix:') + for i, line in enumerate(confusionMatrix): + print(line, line[i] / np.sum(line)) + a = 0 + for i, column in enumerate(np.transpose(confusionMatrix, (1, 0))): + a += (column[i] / np.sum(column)) * (np.sum(column) / 26000) + print(column[i] / np.sum(column), ) + print('\n', np.sum(confusionMatrix), a) + + with self.session as session: + tf.initialize_all_variables().run() + + ### 训练 + print('Start Training') + # batch 1000 + for i, samples, labels in get_chunk(train_samples, train_labels, chunkSize=self.batch_size): + _, l, predictions, summary = session.run( + [self.optimizer, self.loss, self.train_prediction, self.merged_train_summary], + feed_dict={self.tf_train_samples: samples, self.tf_train_labels: labels} + ) + self.writer.add_summary(summary, i) + # labels is True Labels + accuracy, _ = self.accuracy(predictions, labels) + if i % 50 == 0: + print('Minibatch loss at step %d: %f' % (i, l)) + print('Minibatch accuracy: %.1f%%' % accuracy) + ### + + ### 测试 + accuracies = [] + confusionMatrices = [] + for i, samples, labels in get_chunk(test_samples, test_labels, chunkSize=self.test_batch_size): + result, summary = session.run( + [self.test_prediction, self.merged_test_summary], + feed_dict={self.tf_test_samples: samples} + ) + # result = self.test_prediction.eval() + self.writer.add_summary(summary, i) + accuracy, cm = self.accuracy(result, labels, need_confusion_matrix=True) + accuracies.append(accuracy) + confusionMatrices.append(cm) + print('Test Accuracy: %.1f%%' % accuracy) + print(' Average Accuracy:', np.average(accuracies)) + print('Standard Deviation:', np.std(accuracies)) + print_confusion_matrix(np.add.reduce(confusionMatrices)) + ### + + def accuracy(self, predictions, labels, need_confusion_matrix=False): + """ + 计算预测的正确率与召回率 + @return: accuracy and confusionMatrix as a tuple + """ + _predictions = np.argmax(predictions, 1) + _labels = np.argmax(labels, 1) + cm = confusion_matrix(_labels, _predictions) if need_confusion_matrix else None + # == is overloaded for numpy array + accuracy = (100.0 * np.sum(_predictions == _labels) / predictions.shape[0]) + return accuracy, cm + + +if __name__ == '__main__': + net = Network(num_hidden=16, batch_size=64, patch_size=3, conv_depth=16, pooling_scale=2) + net.run() diff --git a/Season1_Tensorflow1.1_Python3.5/12-15/dp_refined_api.py b/Season1_Tensorflow1.1_Python3.5/12-15/dp_refined_api.py new file mode 100644 index 0000000..75365d6 --- /dev/null +++ b/Season1_Tensorflow1.1_Python3.5/12-15/dp_refined_api.py @@ -0,0 +1,235 @@ +# 新的 refined api 不支持 Python2 +import tensorflow as tf +from sklearn.metrics import confusion_matrix +import numpy as np + + +class Network(): + def __init__(self, train_batch_size, test_batch_size, pooling_scale): + """ + @num_hidden: 隐藏层的节点数量 + @batch_size:因为我们要节省内存,所以分批处理数据。每一批的数据量。 + """ + self.train_batch_size = train_batch_size + self.test_batch_size = test_batch_size + + # Hyper Parameters + self.conv_config = [] # list of dict + self.fc_config = [] # list of dict + self.conv_weights = [] + self.conv_biases = [] + self.fc_weights = [] + self.fc_biases = [] + self.pooling_scale = pooling_scale + self.pooling_stride = pooling_scale + + # Graph Related + self.tf_train_samples = None + self.tf_train_labels = None + self.tf_test_samples = None + self.tf_test_labels = None + + # 统计 + self.merged = None + self.train_summaries = [] + self.test_summaries = [] + + def add_conv(self, *, patch_size, in_depth, out_depth, activation='relu', pooling=False, name): + """ + This function does not define operations in the graph, but only store config in self.conv_layer_config + """ + self.conv_config.append({ + 'patch_size': patch_size, + 'in_depth': in_depth, + 'out_depth': out_depth, + 'activation': activation, + 'pooling': pooling, + 'name': name + }) + with tf.name_scope(name): + weights = tf.Variable( + tf.truncated_normal([patch_size, patch_size, in_depth, out_depth], stddev=0.1), name=name + '_weights') + biases = tf.Variable(tf.constant(0.1, shape=[out_depth]), name=name + '_biases') + self.conv_weights.append(weights) + self.conv_biases.append(biases) + + def add_fc(self, *, in_num_nodes, out_num_nodes, activation='relu', name): + """ + add fc layer config to slef.fc_layer_config + """ + self.fc_config.append({ + 'in_num_nodes': in_num_nodes, + 'out_num_nodes': out_num_nodes, + 'activation': activation, + 'name': name + }) + with tf.name_scope(name): + weights = tf.Variable(tf.truncated_normal([in_num_nodes, out_num_nodes], stddev=0.1)) + biases = tf.Variable(tf.constant(0.1, shape=[out_num_nodes])) + self.fc_weights.append(weights) + self.fc_biases.append(biases) + self.train_summaries.append(tf.summary.histogram(str(len(self.fc_weights)) + '_weights', weights)) + self.train_summaries.append(tf.summary.histogram(str(len(self.fc_biases)) + '_biases', biases)) + + # should make the definition as an exposed API, instead of implemented in the function + def define_inputs(self, *, train_samples_shape, train_labels_shape, test_samples_shape): + # 这里只是定义图谱中的各种变量 + with tf.name_scope('inputs'): + self.tf_train_samples = tf.placeholder(tf.float32, shape=train_samples_shape, name='tf_train_samples') + self.tf_train_labels = tf.placeholder(tf.float32, shape=train_labels_shape, name='tf_train_labels') + self.tf_test_samples = tf.placeholder(tf.float32, shape=test_samples_shape, name='tf_test_samples') + + def define_model(self): + """ + 定义我的的计算图谱 + """ + + def model(data_flow, train=True): + """ + @data: original inputs + @return: logits + """ + # Define Convolutional Layers + for i, (weights, biases, config) in enumerate(zip(self.conv_weights, self.conv_biases, self.conv_config)): + with tf.name_scope(config['name'] + '_model'): + with tf.name_scope('convolution'): + # default 1,1,1,1 stride and SAME padding + data_flow = tf.nn.conv2d(data_flow, filter=weights, strides=[1, 1, 1, 1], padding='SAME') + data_flow = data_flow + biases + if not train: + self.visualize_filter_map(data_flow, how_many=config['out_depth'], + display_size=32 // (i // 2 + 1), name=config['name'] + '_conv') + if config['activation'] == 'relu': + data_flow = tf.nn.relu(data_flow) + if not train: + self.visualize_filter_map(data_flow, how_many=config['out_depth'], + display_size=32 // (i // 2 + 1), name=config['name'] + '_relu') + else: + raise Exception('Activation Func can only be Relu right now. You passed', config['activation']) + if config['pooling']: + data_flow = tf.nn.max_pool( + data_flow, + ksize=[1, self.pooling_scale, self.pooling_scale, 1], + strides=[1, self.pooling_stride, self.pooling_stride, 1], + padding='SAME') + if not train: + self.visualize_filter_map(data_flow, how_many=config['out_depth'], + display_size=32 // (i // 2 + 1) // 2, + name=config['name'] + '_pooling') + + # Define Fully Connected Layers + for i, (weights, biases, config) in enumerate(zip(self.fc_weights, self.fc_biases, self.fc_config)): + if i == 0: + shape = data_flow.get_shape().as_list() + data_flow = tf.reshape(data_flow, [shape[0], shape[1] * shape[2] * shape[3]]) + with tf.name_scope(config['name'] + 'model'): + data_flow = tf.matmul(data_flow, weights) + biases + if config['activation'] == 'relu': + data_flow = tf.nn.relu(data_flow) + elif config['activation'] is None: + pass + else: + raise Exception('Activation Func can only be Relu or None right now. You passed', + config['activation']) + return data_flow + + # Training computation. + logits = model(self.tf_train_samples) + with tf.name_scope('loss'): + self.loss = tf.reduce_mean( + tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=self.tf_train_labels)) + self.train_summaries.append(tf.summary.scalar('Loss', self.loss)) + + # Optimizer. + with tf.name_scope('optimizer'): + self.optimizer = tf.train.GradientDescentOptimizer(0.0001).minimize(self.loss) + + # Predictions for the training, validation, and test data. + with tf.name_scope('train'): + self.train_prediction = tf.nn.softmax(logits, name='train_prediction') + with tf.name_scope('test'): + self.test_prediction = tf.nn.softmax(model(self.tf_test_samples, train=False), name='test_prediction') + + # 注意这里不能随便替换为merge_all + self.merged_train_summary = tf.summary.merge(self.train_summaries) + self.merged_test_summary = tf.summary.merge(self.test_summaries) + + def run(self, data_iterator, train_samples, train_labels, test_samples, test_labels): + """ + 用到Session + :data_iterator: a function that yields chuck of data + """ + + # private function + def print_confusion_matrix(confusionMatrix): + print('Confusion Matrix:') + for i, line in enumerate(confusionMatrix): + print(line, line[i] / np.sum(line)) + a = 0 + for i, column in enumerate(np.transpose(confusionMatrix, (1, 0))): + a += (column[i] / np.sum(column)) * (np.sum(column) / 26000) + print(column[i] / np.sum(column), ) + print('\n', np.sum(confusionMatrix), a) + + self.writer = tf.summary.FileWriter('./board', tf.get_default_graph()) + + with tf.Session(graph=tf.get_default_graph()) as session: + tf.initialize_all_variables().run() + + # 训练 + print('Start Training') + # batch 1000 + for i, samples, labels in data_iterator(train_samples, train_labels, self.train_batch_size): + _, l, predictions, summary = session.run( + [self.optimizer, self.loss, self.train_prediction, self.merged_train_summary], + feed_dict={self.tf_train_samples: samples, self.tf_train_labels: labels} + ) + self.writer.add_summary(summary, i) + # labels is True Labels + accuracy, _ = self.accuracy(predictions, labels) + if i % 50 == 0: + print('Minibatch loss at step %d: %f' % (i, l)) + print('Minibatch accuracy: %.1f%%' % accuracy) + # + + # # 测试 + accuracies = [] + confusionMatrices = [] + for i, samples, labels in data_iterator(test_samples, test_labels, self.test_batch_size): + print('samples shape', samples.shape) + result, summary = session.run( + [self.test_prediction, self.merged_test_summary], + feed_dict={self.tf_test_samples: samples} + ) + # result = self.test_prediction.eval(feed_dict={self.tf_test_samples: samples}) + self.writer.add_summary(summary, i) + accuracy, cm = self.accuracy(result, labels, need_confusion_matrix=True) + accuracies.append(accuracy) + confusionMatrices.append(cm) + print('Test Accuracy: %.1f%%' % accuracy) + print(' Average Accuracy:', np.average(accuracies)) + print('Standard Deviation:', np.std(accuracies)) + print_confusion_matrix(np.add.reduce(confusionMatrices)) + # + + def accuracy(self, predictions, labels, need_confusion_matrix=False): + """ + 计算预测的正确率与召回率 + @return: accuracy and confusionMatrix as a tuple + """ + _predictions = np.argmax(predictions, 1) + _labels = np.argmax(labels, 1) + cm = confusion_matrix(_labels, _predictions) if need_confusion_matrix else None + # == is overloaded for numpy array + accuracy = (100.0 * np.sum(_predictions == _labels) / predictions.shape[0]) + return accuracy, cm + + def visualize_filter_map(self, tensor, *, how_many, display_size, name): + print(tensor.get_shape) + filter_map = tensor[-1] + print(filter_map.get_shape()) + filter_map = tf.transpose(filter_map, perm=[2, 0, 1]) + print(filter_map.get_shape()) + filter_map = tf.reshape(filter_map, (how_many, display_size, display_size, 1)) + print(how_many) + self.test_summaries.append(tf.summary.image(name, tensor=filter_map, max_outputs=how_many)) diff --git a/Season1_Tensorflow1.1_Python3.5/12-15/load.py b/Season1_Tensorflow1.1_Python3.5/12-15/load.py new file mode 100644 index 0000000..d3f6494 --- /dev/null +++ b/Season1_Tensorflow1.1_Python3.5/12-15/load.py @@ -0,0 +1,120 @@ +# encoding:utf-8 +# Python2 兼容 +from __future__ import print_function, division +from scipy.io import loadmat as load +import matplotlib.pyplot as plt +import numpy as np + + +def reformat(samples, labels): + # 改变原始数据的形状 + # 0 1 2 3 3 0 1 2 + # (图片高,图片宽,通道数,图片数) -> (图片数,图片高,图片宽,通道数) + new = np.transpose(samples, (3, 0, 1, 2)).astype(np.float32) + + # labels 变成 one-hot encoding, [2] -> [0, 0, 1, 0, 0, 0, 0, 0, 0, 0] + # digit 0 , represented as 10 + # labels 变成 one-hot encoding, [10] -> [1, 0, 0, 0, 0, 0, 0, 0, 0, 0] + labels = np.array([x[0] for x in labels]) # slow code, whatever + one_hot_labels = [] + for num in labels: + one_hot = [0.0] * 10 + if num == 10: + one_hot[0] = 1.0 + else: + one_hot[num] = 1.0 + one_hot_labels.append(one_hot) + labels = np.array(one_hot_labels).astype(np.float32) + return new, labels + + +def normalize(samples): + """ + 并且灰度化: 从三色通道 -> 单色通道 省内存 + 加快训练速度 + (R + G + B) / 3 + 将图片从 0 ~ 255 线性映射到 -1.0 ~ +1.0 + @samples: numpy array + """ + a = np.add.reduce(samples, keepdims=True, axis=3) # shape (图片数,图片高,图片宽,通道数) + a = a / 3.0 + return a / 128.0 - 1.0 + + +def distribution(labels, name): + # 查看一下每个label的分布,再画个统计图 + # keys: + # 0 + # 1 + # 2 + # ... + # 9 + count = {} + for label in labels: + key = 0 if label[0] == 10 else label[0] + if key in count: + count[key] += 1 + else: + count[key] = 1 + x = [] + y = [] + for k, v in count.items(): + # print(k, v) + x.append(k) + y.append(v) + + y_pos = np.arange(len(x)) + plt.bar(y_pos, y, align='center', alpha=0.5) + plt.xticks(y_pos, x) + plt.ylabel('Count') + plt.title(name + ' Label Distribution') + plt.show() + + +def inspect(dataset, labels, i): + # 显示图片看看 + if dataset.shape[3] == 1: + shape = dataset.shape + dataset = dataset.reshape(shape[0], shape[1], shape[2]) + print(labels[i]) + plt.imshow(dataset[i]) + plt.show() + + +train = load('../data/train_32x32.mat') +test = load('../data/test_32x32.mat') +# extra = load('../data/extra_32x32.mat') + +# print('Train Samples Shape:', train['X'].shape) +# print('Train Labels Shape:', train['y'].shape) + +# print('Train Samples Shape:', test['X'].shape) +# print('Train Labels Shape:', test['y'].shape) + +# print('Train Samples Shape:', extra['X'].shape) +# print('Train Labels Shape:', extra['y'].shape) + +train_samples = train['X'] +train_labels = train['y'] +test_samples = test['X'] +test_labels = test['y'] +# extra_samples = extra['X'] +# extra_labels = extra['y'] + +n_train_samples, _train_labels = reformat(train_samples, train_labels) +n_test_samples, _test_labels = reformat(test_samples, test_labels) + +_train_samples = normalize(n_train_samples) +_test_samples = normalize(n_test_samples) + +num_labels = 10 +image_size = 32 +num_channels = 1 + +if __name__ == '__main__': + # 探索数据 + pass + inspect(_train_samples, _train_labels, 1234) +# _train_samples = normalize(_train_samples) +# inspect(_train_samples, _train_labels, 1234) +# distribution(train_labels, 'Train Labels') +# distribution(test_labels, 'Test Labels') diff --git a/Season1_Tensorflow1.1_Python3.5/12-15/main.py b/Season1_Tensorflow1.1_Python3.5/12-15/main.py new file mode 100644 index 0000000..d9cbe96 --- /dev/null +++ b/Season1_Tensorflow1.1_Python3.5/12-15/main.py @@ -0,0 +1,55 @@ +if __name__ == '__main__': + import load + from dp_refined_api import Network + + train_samples, train_labels = load._train_samples, load._train_labels + test_samples, test_labels = load._test_samples, load._test_labels + + print('Training set', train_samples.shape, train_labels.shape) + print(' Test set', test_samples.shape, test_labels.shape) + + image_size = load.image_size + num_labels = load.num_labels + num_channels = load.num_channels + + + def get_chunk(samples, labels, chunk_size): + """ + Iterator/Generator: get a batch of data + 这个函数是一个迭代器/生成器,用于每一次只得到 chunk_size 这么多的数据 + 用于 for loop, just like range() function + """ + if len(samples) != len(labels): + raise Exception('Length of samples and labels must equal') + stepStart = 0 # initial step + i = 0 + while stepStart < len(samples): + stepEnd = stepStart + chunk_size + if stepEnd < len(samples): + yield i, samples[stepStart:stepEnd], labels[stepStart:stepEnd] + i += 1 + stepStart = stepEnd + + + net = Network(train_batch_size=64, test_batch_size=500, pooling_scale=2) + net.define_inputs( + train_samples_shape=(64, image_size, image_size, num_channels), + train_labels_shape=(64, num_labels), + test_samples_shape=(500, image_size, image_size, num_channels) + ) + # + net.add_conv(patch_size=3, in_depth=num_channels, out_depth=16, activation='relu', pooling=False, name='conv1') + net.add_conv(patch_size=3, in_depth=16, out_depth=16, activation='relu', pooling=True, name='conv2') + net.add_conv(patch_size=3, in_depth=16, out_depth=16, activation='relu', pooling=False, name='conv3') + net.add_conv(patch_size=3, in_depth=16, out_depth=16, activation='relu', pooling=True, name='conv4') + + # 4 = 两次 pooling, 每一次缩小为 1/2 + # 16 = conv4 out_depth + net.add_fc(in_num_nodes=(image_size // 4) * (image_size // 4) * 16, out_num_nodes=16, activation='relu', name='fc1') + net.add_fc(in_num_nodes=16, out_num_nodes=10, activation=None, name='fc2') + + net.define_model() + net.run(get_chunk, train_samples, train_labels, test_samples, test_labels) + +else: + raise Exception('main.py: Should Not Be Imported!!! Must Run by "python main.py"') diff --git "a/Season1_Tensorflow1.1_Python3.5/12-15/\345\244\215\344\271\240\344\270\200\344\270\213\345\215\267\347\247\257\347\245\236\347\273\217\347\275\221\347\273\234.pptx" "b/Season1_Tensorflow1.1_Python3.5/12-15/\345\244\215\344\271\240\344\270\200\344\270\213\345\215\267\347\247\257\347\245\236\347\273\217\347\275\221\347\273\234.pptx" new file mode 100644 index 0000000..153b3b4 Binary files /dev/null and "b/Season1_Tensorflow1.1_Python3.5/12-15/\345\244\215\344\271\240\344\270\200\344\270\213\345\215\267\347\247\257\347\245\236\347\273\217\347\275\221\347\273\234.pptx" differ diff --git a/Season1_Tensorflow1.1_Python3.5/16-19/.gitignore b/Season1_Tensorflow1.1_Python3.5/16-19/.gitignore new file mode 100644 index 0000000..5c40f79 --- /dev/null +++ b/Season1_Tensorflow1.1_Python3.5/16-19/.gitignore @@ -0,0 +1 @@ +/board/ diff --git a/Season1_Tensorflow1.1_Python3.5/16-19/README.md b/Season1_Tensorflow1.1_Python3.5/16-19/README.md new file mode 100644 index 0000000..dec7c02 --- /dev/null +++ b/Season1_Tensorflow1.1_Python3.5/16-19/README.md @@ -0,0 +1,15 @@ +## 这一次给大家讲一讲一些常用的优化的方法。 +### Regularization +L1 与 L2 Loss Function 的一些问题。给FC用的。但是我实际上不太懂。[Quora答案](https://www.quora.com/What-is-the-difference-between-L1-and-L2-regularization)和[一篇博客](http://www.chioka.in/differences-between-l1-and-l2-as-loss-function-and-regularization/)挺有帮助的。 + +### Dropout +随机扔掉传向末尾FC层的信号,使得末尾FC层不能完全相信所得输入。这个方法神奇地提高了正确率。可以将其理解为一种 Week Learner Ensemble 的方法。 + +想深究的同学[插这里](http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf) + +### Update Function / Optimization Function +1. 普通的 Gradient Descent +2. Momentum Update +3. Adam Update + +还有其他的 Update。但是这三个足以把一些原则性的问题讲清楚,所以教程就选择了这三个。 diff --git a/Season1_Tensorflow1.1_Python3.5/16-19/dp.py b/Season1_Tensorflow1.1_Python3.5/16-19/dp.py new file mode 100644 index 0000000..c175494 --- /dev/null +++ b/Season1_Tensorflow1.1_Python3.5/16-19/dp.py @@ -0,0 +1,273 @@ +# 新的 refined api 不支持 Python2 +import tensorflow as tf +from sklearn.metrics import confusion_matrix +import numpy as np + + +class Network(): + def __init__(self, train_batch_size, test_batch_size, pooling_scale, + optimize_method='adam'): + """ + @num_hidden: 隐藏层的节点数量 + @batch_size:因为我们要节省内存,所以分批处理数据。每一批的数据量。 + """ + self.optimize_method = optimize_method + + self.train_batch_size = train_batch_size + self.test_batch_size = test_batch_size + + # Hyper Parameters + self.conv_config = [] # list of dict + self.fc_config = [] # list of dict + self.conv_weights = [] + self.conv_biases = [] + self.fc_weights = [] + self.fc_biases = [] + self.pooling_scale = pooling_scale + self.pooling_stride = pooling_scale + + # Graph Related + self.tf_train_samples = None + self.tf_train_labels = None + self.tf_test_samples = None + self.tf_test_labels = None + + # 统计 + self.merged = None + self.train_summaries = [] + self.test_summaries = [] + + def add_conv(self, *, patch_size, in_depth, out_depth, activation='relu', pooling=False, name): + """ + This function does not define operations in the graph, but only store config in self.conv_layer_config + """ + self.conv_config.append({ + 'patch_size': patch_size, + 'in_depth': in_depth, + 'out_depth': out_depth, + 'activation': activation, + 'pooling': pooling, + 'name': name + }) + with tf.name_scope(name): + weights = tf.Variable( + tf.truncated_normal([patch_size, patch_size, in_depth, out_depth], stddev=0.1), name=name + '_weights') + biases = tf.Variable(tf.constant(0.1, shape=[out_depth]), name=name + '_biases') + self.conv_weights.append(weights) + self.conv_biases.append(biases) + + def add_fc(self, *, in_num_nodes, out_num_nodes, activation='relu', name): + """ + add fc layer config to slef.fc_layer_config + """ + self.fc_config.append({ + 'in_num_nodes': in_num_nodes, + 'out_num_nodes': out_num_nodes, + 'activation': activation, + 'name': name + }) + with tf.name_scope(name): + weights = tf.Variable(tf.truncated_normal([in_num_nodes, out_num_nodes], stddev=0.1)) + biases = tf.Variable(tf.constant(0.1, shape=[out_num_nodes])) + self.fc_weights.append(weights) + self.fc_biases.append(biases) + self.train_summaries.append(tf.summary.histogram(str(len(self.fc_weights)) + '_weights', weights)) + self.train_summaries.append(tf.summary.histogram(str(len(self.fc_biases)) + '_biases', biases)) + + def apply_regularization(self, _lambda): + # L2 regularization for the fully connected parameters + regularization = 0.0 + for weights, biases in zip(self.fc_weights, self.fc_biases): + regularization += tf.nn.l2_loss(weights) + tf.nn.l2_loss(biases) + # 1e5 + return _lambda * regularization + + # should make the definition as an exposed API, instead of implemented in the function + def define_inputs(self, *, train_samples_shape, train_labels_shape, test_samples_shape): + # 这里只是定义图谱中的各种变量 + with tf.name_scope('inputs'): + self.tf_train_samples = tf.placeholder(tf.float32, shape=train_samples_shape, name='tf_train_samples') + self.tf_train_labels = tf.placeholder(tf.float32, shape=train_labels_shape, name='tf_train_labels') + self.tf_test_samples = tf.placeholder(tf.float32, shape=test_samples_shape, name='tf_test_samples') + + def define_model(self): + """ + 定义我的的计算图谱 + """ + + def model(data_flow, train=True): + """ + @data: original inputs + @return: logits + """ + # Define Convolutional Layers + for i, (weights, biases, config) in enumerate(zip(self.conv_weights, self.conv_biases, self.conv_config)): + with tf.name_scope(config['name'] + '_model'): + with tf.name_scope('convolution'): + # default 1,1,1,1 stride and SAME padding + data_flow = tf.nn.conv2d(data_flow, filter=weights, strides=[1, 1, 1, 1], padding='SAME') + data_flow = data_flow + biases + if not train: + self.visualize_filter_map(data_flow, how_many=config['out_depth'], + display_size=32 // (i // 2 + 1), name=config['name'] + '_conv') + if config['activation'] == 'relu': + data_flow = tf.nn.relu(data_flow) + if not train: + self.visualize_filter_map(data_flow, how_many=config['out_depth'], + display_size=32 // (i // 2 + 1), name=config['name'] + '_relu') + else: + raise Exception('Activation Func can only be Relu right now. You passed', config['activation']) + if config['pooling']: + data_flow = tf.nn.max_pool( + data_flow, + ksize=[1, self.pooling_scale, self.pooling_scale, 1], + strides=[1, self.pooling_stride, self.pooling_stride, 1], + padding='SAME') + if not train: + self.visualize_filter_map(data_flow, how_many=config['out_depth'], + display_size=32 // (i // 2 + 1) // 2, + name=config['name'] + '_pooling') + + # Define Fully Connected Layers + for i, (weights, biases, config) in enumerate(zip(self.fc_weights, self.fc_biases, self.fc_config)): + if i == 0: + shape = data_flow.get_shape().as_list() + data_flow = tf.reshape(data_flow, [shape[0], shape[1] * shape[2] * shape[3]]) + with tf.name_scope(config['name'] + 'model'): + + # Dropout + if train and i == len(self.fc_weights) - 1: + data_flow = tf.nn.dropout(data_flow, 0.5, seed=4926) + # + + data_flow = tf.matmul(data_flow, weights) + biases + if config['activation'] == 'relu': + data_flow = tf.nn.relu(data_flow) + elif config['activation'] is None: + pass + else: + raise Exception('Activation Func can only be Relu or None right now. You passed', + config['activation']) + return data_flow + + # Training computation. + logits = model(self.tf_train_samples) + with tf.name_scope('loss'): + self.loss = tf.reduce_mean( + tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=self.tf_train_labels)) + self.loss += self.apply_regularization(_lambda=5e-4) + self.train_summaries.append(tf.summary.scalar('Loss', self.loss)) + + # learning rate decay + global_step = tf.Variable(0) + lr = 0.001 + dr = 0.99 + learning_rate = tf.train.exponential_decay( + learning_rate=lr, + global_step=global_step * self.train_batch_size, + decay_steps=100, + decay_rate=dr, + staircase=True + ) + + # Optimizer. + with tf.name_scope('optimizer'): + if self.optimize_method == 'gradient': + self.optimizer = tf.train \ + .GradientDescentOptimizer(learning_rate) \ + .minimize(self.loss) + elif self.optimize_method == 'momentum': + self.optimizer = tf.train \ + .MomentumOptimizer(learning_rate, 0.5) \ + .minimize(self.loss) + elif (self.optimize_method == 'adam'): + self.optimizer = tf.train \ + .AdamOptimizer(learning_rate) \ + .minimize(self.loss) + + # Predictions for the training, validation, and test data. + with tf.name_scope('train'): + self.train_prediction = tf.nn.softmax(logits, name='train_prediction') + with tf.name_scope('test'): + self.test_prediction = tf.nn.softmax(model(self.tf_test_samples, train=False), name='test_prediction') + + self.merged_train_summary = tf.summary.merge(self.train_summaries) + self.merged_test_summary = tf.summary.merge(self.test_summaries) + + def run(self, data_iterator, train_samples, train_labels, test_samples, test_labels): + """ + 用到Session + :data_iterator: a function that yields chuck of data + """ + + # private function + def print_confusion_matrix(confusionMatrix): + print('Confusion Matrix:') + for i, line in enumerate(confusionMatrix): + print(line, line[i] / np.sum(line)) + a = 0 + for i, column in enumerate(np.transpose(confusionMatrix, (1, 0))): + a += (column[i] / np.sum(column)) * (np.sum(column) / 26000) + print(column[i] / np.sum(column), ) + print('\n', np.sum(confusionMatrix), a) + + self.writer = tf.summary.FileWriter('./board', tf.get_default_graph()) + + with tf.Session(graph=tf.get_default_graph()) as session: + tf.initialize_all_variables().run() + + # 训练 + print('Start Training') + # batch 1000 + for i, samples, labels in data_iterator(train_samples, train_labels, chunk_size=self.train_batch_size): + _, l, predictions, summary = session.run( + [self.optimizer, self.loss, self.train_prediction, self.merged_train_summary], + feed_dict={self.tf_train_samples: samples, self.tf_train_labels: labels} + ) + self.writer.add_summary(summary, i) + # labels is True Labels + accuracy, _ = self.accuracy(predictions, labels) + if i % 50 == 0: + print('Minibatch loss at step %d: %f' % (i, l)) + print('Minibatch accuracy: %.1f%%' % accuracy) + # + + # 测试 + accuracies = [] + confusionMatrices = [] + for i, samples, labels in data_iterator(test_samples, test_labels, self.test_batch_size): + result, summary = session.run( + [self.test_prediction, self.merged_test_summary], + feed_dict={self.tf_test_samples: samples} + ) + self.writer.add_summary(summary, i) + accuracy, cm = self.accuracy(result, labels, need_confusion_matrix=True) + accuracies.append(accuracy) + confusionMatrices.append(cm) + print('Test Accuracy: %.1f%%' % accuracy) + print(' Average Accuracy:', np.average(accuracies)) + print('Standard Deviation:', np.std(accuracies)) + print_confusion_matrix(np.add.reduce(confusionMatrices)) + # + + def accuracy(self, predictions, labels, need_confusion_matrix=False): + """ + 计算预测的正确率与召回率 + @return: accuracy and confusionMatrix as a tuple + """ + _predictions = np.argmax(predictions, 1) + _labels = np.argmax(labels, 1) + cm = confusion_matrix(_labels, _predictions) if need_confusion_matrix else None + # == is overloaded for numpy array + accuracy = (100.0 * np.sum(_predictions == _labels) / predictions.shape[0]) + return accuracy, cm + + def visualize_filter_map(self, tensor, *, how_many, display_size, name): + print(tensor.get_shape) + filter_map = tensor[-1] + print(filter_map.get_shape()) + filter_map = tf.transpose(filter_map, perm=[2, 0, 1]) + print(filter_map.get_shape()) + filter_map = tf.reshape(filter_map, (how_many, display_size, display_size, 1)) + print(how_many) + self.test_summaries.append(tf.summary.image(name, tensor=filter_map, max_outputs=how_many)) diff --git a/Season1_Tensorflow1.1_Python3.5/16-19/load.py b/Season1_Tensorflow1.1_Python3.5/16-19/load.py new file mode 100644 index 0000000..d3f6494 --- /dev/null +++ b/Season1_Tensorflow1.1_Python3.5/16-19/load.py @@ -0,0 +1,120 @@ +# encoding:utf-8 +# Python2 兼容 +from __future__ import print_function, division +from scipy.io import loadmat as load +import matplotlib.pyplot as plt +import numpy as np + + +def reformat(samples, labels): + # 改变原始数据的形状 + # 0 1 2 3 3 0 1 2 + # (图片高,图片宽,通道数,图片数) -> (图片数,图片高,图片宽,通道数) + new = np.transpose(samples, (3, 0, 1, 2)).astype(np.float32) + + # labels 变成 one-hot encoding, [2] -> [0, 0, 1, 0, 0, 0, 0, 0, 0, 0] + # digit 0 , represented as 10 + # labels 变成 one-hot encoding, [10] -> [1, 0, 0, 0, 0, 0, 0, 0, 0, 0] + labels = np.array([x[0] for x in labels]) # slow code, whatever + one_hot_labels = [] + for num in labels: + one_hot = [0.0] * 10 + if num == 10: + one_hot[0] = 1.0 + else: + one_hot[num] = 1.0 + one_hot_labels.append(one_hot) + labels = np.array(one_hot_labels).astype(np.float32) + return new, labels + + +def normalize(samples): + """ + 并且灰度化: 从三色通道 -> 单色通道 省内存 + 加快训练速度 + (R + G + B) / 3 + 将图片从 0 ~ 255 线性映射到 -1.0 ~ +1.0 + @samples: numpy array + """ + a = np.add.reduce(samples, keepdims=True, axis=3) # shape (图片数,图片高,图片宽,通道数) + a = a / 3.0 + return a / 128.0 - 1.0 + + +def distribution(labels, name): + # 查看一下每个label的分布,再画个统计图 + # keys: + # 0 + # 1 + # 2 + # ... + # 9 + count = {} + for label in labels: + key = 0 if label[0] == 10 else label[0] + if key in count: + count[key] += 1 + else: + count[key] = 1 + x = [] + y = [] + for k, v in count.items(): + # print(k, v) + x.append(k) + y.append(v) + + y_pos = np.arange(len(x)) + plt.bar(y_pos, y, align='center', alpha=0.5) + plt.xticks(y_pos, x) + plt.ylabel('Count') + plt.title(name + ' Label Distribution') + plt.show() + + +def inspect(dataset, labels, i): + # 显示图片看看 + if dataset.shape[3] == 1: + shape = dataset.shape + dataset = dataset.reshape(shape[0], shape[1], shape[2]) + print(labels[i]) + plt.imshow(dataset[i]) + plt.show() + + +train = load('../data/train_32x32.mat') +test = load('../data/test_32x32.mat') +# extra = load('../data/extra_32x32.mat') + +# print('Train Samples Shape:', train['X'].shape) +# print('Train Labels Shape:', train['y'].shape) + +# print('Train Samples Shape:', test['X'].shape) +# print('Train Labels Shape:', test['y'].shape) + +# print('Train Samples Shape:', extra['X'].shape) +# print('Train Labels Shape:', extra['y'].shape) + +train_samples = train['X'] +train_labels = train['y'] +test_samples = test['X'] +test_labels = test['y'] +# extra_samples = extra['X'] +# extra_labels = extra['y'] + +n_train_samples, _train_labels = reformat(train_samples, train_labels) +n_test_samples, _test_labels = reformat(test_samples, test_labels) + +_train_samples = normalize(n_train_samples) +_test_samples = normalize(n_test_samples) + +num_labels = 10 +image_size = 32 +num_channels = 1 + +if __name__ == '__main__': + # 探索数据 + pass + inspect(_train_samples, _train_labels, 1234) +# _train_samples = normalize(_train_samples) +# inspect(_train_samples, _train_labels, 1234) +# distribution(train_labels, 'Train Labels') +# distribution(test_labels, 'Test Labels') diff --git a/Season1_Tensorflow1.1_Python3.5/16-19/main.py b/Season1_Tensorflow1.1_Python3.5/16-19/main.py new file mode 100644 index 0000000..6c1edb5 --- /dev/null +++ b/Season1_Tensorflow1.1_Python3.5/16-19/main.py @@ -0,0 +1,55 @@ +if __name__ == '__main__': + import load + from dp import Network + + train_samples, train_labels = load._train_samples, load._train_labels + test_samples, test_labels = load._test_samples, load._test_labels + + print('Training set', train_samples.shape, train_labels.shape) + print(' Test set', test_samples.shape, test_labels.shape) + + image_size = load.image_size + num_labels = load.num_labels + num_channels = load.num_channels + + + def get_chunk(samples, labels, chunk_size): + """ + Iterator/Generator: get a batch of data + 这个函数是一个迭代器/生成器,用于每一次只得到 chunk_size 这么多的数据 + 用于 for loop, just like range() function + """ + if len(samples) != len(labels): + raise Exception('Length of samples and labels must equal') + step_start = 0 # initial step + i = 0 + while step_start < len(samples): + step_end = step_start + chunk_size + if step_end < len(samples): + yield i, samples[step_start:step_end], labels[step_start:step_end] + i += 1 + step_start = step_end + + print('num:', image_size, num_channels, num_channels) + net = Network(train_batch_size=64, test_batch_size=500, pooling_scale=2) + net.define_inputs( + train_samples_shape=(64, image_size, image_size, num_channels), + train_labels_shape=(64, num_labels), + test_samples_shape=(500, image_size, image_size, num_channels) + ) + # + net.add_conv(patch_size=3, in_depth=num_channels, out_depth=16, activation='relu', pooling=False, name='conv1') + net.add_conv(patch_size=3, in_depth=16, out_depth=16, activation='relu', pooling=True, name='conv2') + net.add_conv(patch_size=3, in_depth=16, out_depth=16, activation='relu', pooling=False, name='conv3') + net.add_conv(patch_size=3, in_depth=16, out_depth=16, activation='relu', pooling=True, name='conv4') + + # 4 = 两次 pooling, 每一次缩小为 1/2 + # 16 = conv4 out_depth + net.add_fc(in_num_nodes=(image_size // 4) * (image_size // 4) * 16, out_num_nodes=16, activation='relu', name='fc1') + net.add_fc(in_num_nodes=16, out_num_nodes=10, activation=None, name='fc2') + + net.define_model() + net.run(get_chunk, train_samples, train_labels, test_samples, test_labels) + +else: + raise Exception('main.py: Should Not Be Imported!!! Must Run by "python main.py"') diff --git a/Season1_Tensorflow1.1_Python3.5/20/.gitignore b/Season1_Tensorflow1.1_Python3.5/20/.gitignore new file mode 100644 index 0000000..112f5ec --- /dev/null +++ b/Season1_Tensorflow1.1_Python3.5/20/.gitignore @@ -0,0 +1,2 @@ +/board/ +/model/ diff --git a/Season1_Tensorflow1.1_Python3.5/20/dp.py b/Season1_Tensorflow1.1_Python3.5/20/dp.py new file mode 100644 index 0000000..2640256 --- /dev/null +++ b/Season1_Tensorflow1.1_Python3.5/20/dp.py @@ -0,0 +1,348 @@ +# 新的 refined api 不支持 Python2 +import tensorflow as tf +from sklearn.metrics import confusion_matrix +import numpy as np + + +class Network(): + def __init__(self, train_batch_size, test_batch_size, pooling_scale, + dropout_rate, base_learning_rate, decay_rate, + optimizeMethod='adam', save_path='model/default.ckpt'): + """ + @num_hidden: 隐藏层的节点数量 + @batch_size:因为我们要节省内存,所以分批处理数据。每一批的数据量。 + """ + self.optimizeMethod = optimizeMethod + self.dropout_rate = dropout_rate + self.base_learning_rate = base_learning_rate + self.decay_rate = decay_rate + + self.train_batch_size = train_batch_size + self.test_batch_size = test_batch_size + + # Hyper Parameters + self.conv_config = [] # list of dict + self.fc_config = [] # list of dict + self.conv_weights = [] + self.conv_biases = [] + self.fc_weights = [] + self.fc_biases = [] + self.pooling_scale = pooling_scale + self.pooling_stride = pooling_scale + + # Graph Related + self.tf_train_samples = None + self.tf_train_labels = None + self.tf_test_samples = None + self.tf_test_labels = None + + # 统计 + self.writer = None + self.merged = None + self.train_summaries = [] + self.test_summaries = [] + + # save 保存训练的模型 + self.saver = None + self.save_path = save_path + + def add_conv(self, *, patch_size, in_depth, out_depth, activation='relu', pooling=False, name): + """ + This function does not define operations in the graph, but only store config in self.conv_layer_config + """ + self.conv_config.append({ + 'patch_size': patch_size, + 'in_depth': in_depth, + 'out_depth': out_depth, + 'activation': activation, + 'pooling': pooling, + 'name': name + }) + with tf.name_scope(name): + weights = tf.Variable( + tf.truncated_normal([patch_size, patch_size, in_depth, out_depth], stddev=0.1), name=name + '_weights') + biases = tf.Variable(tf.constant(0.1, shape=[out_depth]), name=name + '_biases') + self.conv_weights.append(weights) + self.conv_biases.append(biases) + + def add_fc(self, *, in_num_nodes, out_num_nodes, activation='relu', name): + """ + add fc layer config to slef.fc_layer_config + """ + self.fc_config.append({ + 'in_num_nodes': in_num_nodes, + 'out_num_nodes': out_num_nodes, + 'activation': activation, + 'name': name + }) + with tf.name_scope(name): + weights = tf.Variable(tf.truncated_normal([in_num_nodes, out_num_nodes], stddev=0.1)) + biases = tf.Variable(tf.constant(0.1, shape=[out_num_nodes])) + self.fc_weights.append(weights) + self.fc_biases.append(biases) + self.train_summaries.append(tf.summary.histogram(str(len(self.fc_weights)) + '_weights', weights)) + self.train_summaries.append(tf.summary.histogram(str(len(self.fc_biases)) + '_biases', biases)) + + def apply_regularization(self, _lambda): + # L2 regularization for the fully connected parameters + regularization = 0.0 + for weights, biases in zip(self.fc_weights, self.fc_biases): + regularization += tf.nn.l2_loss(weights) + tf.nn.l2_loss(biases) + # 1e5 + return _lambda * regularization + + # should make the definition as an exposed API, instead of implemented in the function + def define_inputs(self, *, train_samples_shape, train_labels_shape, test_samples_shape): + # 这里只是定义图谱中的各种变量 + with tf.name_scope('inputs'): + self.tf_train_samples = tf.placeholder(tf.float32, shape=train_samples_shape, name='tf_train_samples') + self.tf_train_labels = tf.placeholder(tf.float32, shape=train_labels_shape, name='tf_train_labels') + self.tf_test_samples = tf.placeholder(tf.float32, shape=test_samples_shape, name='tf_test_samples') + + def define_model(self): + """ + 定义我的的计算图谱 + """ + + def model(data_flow, train=True): + """ + @data: original inputs + @return: logits + """ + # Define Convolutional Layers + for i, (weights, biases, config) in enumerate(zip(self.conv_weights, self.conv_biases, self.conv_config)): + with tf.name_scope(config['name'] + '_model'): + with tf.name_scope('convolution'): + # default 1,1,1,1 stride and SAME padding + data_flow = tf.nn.conv2d(data_flow, filter=weights, strides=[1, 1, 1, 1], padding='SAME') + data_flow = data_flow + biases + if not train: + self.visualize_filter_map(data_flow, how_many=config['out_depth'], + display_size=32 // (i // 2 + 1), name=config['name'] + '_conv') + if config['activation'] == 'relu': + data_flow = tf.nn.relu(data_flow) + if not train: + self.visualize_filter_map(data_flow, how_many=config['out_depth'], + display_size=32 // (i // 2 + 1), name=config['name'] + '_relu') + else: + raise Exception('Activation Func can only be Relu right now. You passed', config['activation']) + if config['pooling']: + data_flow = tf.nn.max_pool( + data_flow, + ksize=[1, self.pooling_scale, self.pooling_scale, 1], + strides=[1, self.pooling_stride, self.pooling_stride, 1], + padding='SAME') + if not train: + self.visualize_filter_map(data_flow, how_many=config['out_depth'], + display_size=32 // (i // 2 + 1) // 2, + name=config['name'] + '_pooling') + + # Define Fully Connected Layers + for i, (weights, biases, config) in enumerate(zip(self.fc_weights, self.fc_biases, self.fc_config)): + if i == 0: + shape = data_flow.get_shape().as_list() + data_flow = tf.reshape(data_flow, [shape[0], shape[1] * shape[2] * shape[3]]) + with tf.name_scope(config['name'] + 'model'): + + ### Dropout + if train and i == len(self.fc_weights) - 1: + data_flow = tf.nn.dropout(data_flow, self.dropout_rate, seed=4926) + ### + + data_flow = tf.matmul(data_flow, weights) + biases + if config['activation'] == 'relu': + data_flow = tf.nn.relu(data_flow) + elif config['activation'] is None: + pass + else: + raise Exception('Activation Func can only be Relu or None right now. You passed', + config['activation']) + return data_flow + + # Training computation. + logits = model(self.tf_train_samples) + with tf.name_scope('loss'): + self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=self.tf_train_labels)) + self.loss += self.apply_regularization(_lambda=5e-4) + self.train_summaries.append(tf.summary.scalar('Loss', self.loss)) + + # learning rate decay + global_step = tf.Variable(0) + learning_rate = tf.train.exponential_decay( + learning_rate=self.base_learning_rate, + global_step=global_step * self.train_batch_size, + decay_steps=100, + decay_rate=self.decay_rate, + staircase=True + ) + + # Optimizer. + with tf.name_scope('optimizer'): + if (self.optimizeMethod == 'gradient'): + self.optimizer = tf.train \ + .GradientDescentOptimizer(learning_rate) \ + .minimize(self.loss) + elif (self.optimizeMethod == 'momentum'): + self.optimizer = tf.train \ + .MomentumOptimizer(learning_rate, 0.5) \ + .minimize(self.loss) + elif (self.optimizeMethod == 'adam'): + self.optimizer = tf.train \ + .AdamOptimizer(learning_rate) \ + .minimize(self.loss) + + # Predictions for the training, validation, and test data. + with tf.name_scope('train'): + self.train_prediction = tf.nn.softmax(logits, name='train_prediction') + tf.add_to_collection("prediction", self.train_prediction) + with tf.name_scope('test'): + self.test_prediction = tf.nn.softmax(model(self.tf_test_samples, train=False), name='test_prediction') + tf.add_to_collection("prediction", self.test_prediction) + + single_shape = (1, 32, 32, 1) + single_input = tf.placeholder(tf.float32, shape=single_shape, name='single_input') + self.single_prediction = tf.nn.softmax(model(single_input, train=False), name='single_prediction') + tf.add_to_collection("prediction", self.single_prediction) + + self.merged_train_summary = tf.summary.merge(self.train_summaries) + self.merged_test_summary = tf.summary.merge(self.test_summaries) + + # 放在定义Graph之后,保存这张计算图 + self.saver = tf.train.Saver(tf.all_variables()) + + def run(self, train_samples, train_labels, test_samples, test_labels, *, train_data_iterator, iteration_steps, + test_data_iterator): + """ + 用到Session + :data_iterator: a function that yields chuck of data + """ + self.writer = tf.summary.FileWriter('./board', tf.get_default_graph()) + + with tf.Session(graph=tf.get_default_graph()) as session: + tf.initialize_all_variables().run() + + ### 训练 + print('Start Training') + # batch 1000 + for i, samples, labels in train_data_iterator(train_samples, train_labels, iteration_steps=iteration_steps, + chunkSize=self.train_batch_size): + _, l, predictions, summary = session.run( + [self.optimizer, self.loss, self.train_prediction, self.merged_train_summary], + feed_dict={self.tf_train_samples: samples, self.tf_train_labels: labels} + ) + self.writer.add_summary(summary, i) + # labels is True Labels + accuracy, _ = self.accuracy(predictions, labels) + if i % 50 == 0: + print('Minibatch loss at step %d: %f' % (i, l)) + print('Minibatch accuracy: %.1f%%' % accuracy) + ### + + ### 测试 + accuracies = [] + confusionMatrices = [] + for i, samples, labels in test_data_iterator(test_samples, test_labels, chunkSize=self.test_batch_size): + result, summary = session.run( + [self.test_prediction, self.merged_test_summary], + feed_dict={self.tf_test_samples: samples} + ) + self.writer.add_summary(summary, i) + accuracy, cm = self.accuracy(result, labels, need_confusion_matrix=True) + accuracies.append(accuracy) + confusionMatrices.append(cm) + print('Test Accuracy: %.1f%%' % accuracy) + print(' Average Accuracy:', np.average(accuracies)) + print('Standard Deviation:', np.std(accuracies)) + self.print_confusion_matrix(np.add.reduce(confusionMatrices)) + ### + + def train(self, train_samples, train_labels, *, data_iterator, iteration_steps): + self.writer = tf.summary.FileWriter('./board', tf.get_default_graph()) + with tf.Session(graph=tf.get_default_graph()) as session: + tf.initialize_all_variables().run() + + ### 训练 + print('Start Training') + # batch 1000 + for i, samples, labels in data_iterator(train_samples, train_labels, iteration_steps=iteration_steps, + chunkSize=self.train_batch_size): + _, l, predictions, summary = session.run( + [self.optimizer, self.loss, self.train_prediction, self.merged_train_summary], + feed_dict={self.tf_train_samples: samples, self.tf_train_labels: labels} + ) + self.writer.add_summary(summary, i) + # labels is True Labels + accuracy, _ = self.accuracy(predictions, labels) + if i % 50 == 0: + print('Minibatch loss at step %d: %f' % (i, l)) + print('Minibatch accuracy: %.1f%%' % accuracy) + ### + + # 检查要存放的路径值否存在。这里假定只有一层路径。 + import os + if os.path.isdir(self.save_path.split('/')[0]): + save_path = self.saver.save(session, self.save_path) + print("Model saved in file: %s" % save_path) + else: + os.makedirs(self.save_path.split('/')[0]) + save_path = self.saver.save(session, self.save_path) + print("Model saved in file: %s" % save_path) + + def test(self, test_samples, test_labels, *, data_iterator): + if self.saver is None: + self.define_model() + if self.writer is None: + self.writer = tf.summary.FileWriter('./board', tf.get_default_graph()) + + print('Before session') + with tf.Session(graph=tf.get_default_graph()) as session: + self.saver.restore(session, self.save_path) + ### 测试 + accuracies = [] + confusionMatrices = [] + for i, samples, labels in data_iterator(test_samples, test_labels, chunkSize=self.test_batch_size): + result = session.run( + self.test_prediction, + feed_dict={self.tf_test_samples: samples} + ) + # self.writer.add_summary(summary, i) + accuracy, cm = self.accuracy(result, labels, need_confusion_matrix=True) + accuracies.append(accuracy) + confusionMatrices.append(cm) + print('Test Accuracy: %.1f%%' % accuracy) + print(' Average Accuracy:', np.average(accuracies)) + print('Standard Deviation:', np.std(accuracies)) + self.print_confusion_matrix(np.add.reduce(confusionMatrices)) + ### + + def accuracy(self, predictions, labels, need_confusion_matrix=False): + """ + 计算预测的正确率与召回率 + @return: accuracy and confusionMatrix as a tuple + """ + _predictions = np.argmax(predictions, 1) + _labels = np.argmax(labels, 1) + cm = confusion_matrix(_labels, _predictions) if need_confusion_matrix else None + # == is overloaded for numpy array + accuracy = (100.0 * np.sum(_predictions == _labels) / predictions.shape[0]) + return accuracy, cm + + def visualize_filter_map(self, tensor, *, how_many, display_size, name): + # print(tensor.get_shape) + filter_map = tensor[-1] + # print(filter_map.get_shape()) + filter_map = tf.transpose(filter_map, perm=[2, 0, 1]) + # print(filter_map.get_shape()) + filter_map = tf.reshape(filter_map, (how_many, display_size, display_size, 1)) + # print(how_many) + self.test_summaries.append(tf.summary.image(name, tensor=filter_map, max_outputs=how_many)) + + def print_confusion_matrix(self, confusionMatrix): + print('Confusion Matrix:') + for i, line in enumerate(confusionMatrix): + print(line, line[i] / np.sum(line)) + a = 0 + for i, column in enumerate(np.transpose(confusionMatrix, (1, 0))): + a += (column[i] / np.sum(column)) * (np.sum(column) / 26000) + print(column[i] / np.sum(column), ) + print('\n', np.sum(confusionMatrix), a) diff --git a/Season1_Tensorflow1.1_Python3.5/20/load.py b/Season1_Tensorflow1.1_Python3.5/20/load.py new file mode 100644 index 0000000..d3f6494 --- /dev/null +++ b/Season1_Tensorflow1.1_Python3.5/20/load.py @@ -0,0 +1,120 @@ +# encoding:utf-8 +# Python2 兼容 +from __future__ import print_function, division +from scipy.io import loadmat as load +import matplotlib.pyplot as plt +import numpy as np + + +def reformat(samples, labels): + # 改变原始数据的形状 + # 0 1 2 3 3 0 1 2 + # (图片高,图片宽,通道数,图片数) -> (图片数,图片高,图片宽,通道数) + new = np.transpose(samples, (3, 0, 1, 2)).astype(np.float32) + + # labels 变成 one-hot encoding, [2] -> [0, 0, 1, 0, 0, 0, 0, 0, 0, 0] + # digit 0 , represented as 10 + # labels 变成 one-hot encoding, [10] -> [1, 0, 0, 0, 0, 0, 0, 0, 0, 0] + labels = np.array([x[0] for x in labels]) # slow code, whatever + one_hot_labels = [] + for num in labels: + one_hot = [0.0] * 10 + if num == 10: + one_hot[0] = 1.0 + else: + one_hot[num] = 1.0 + one_hot_labels.append(one_hot) + labels = np.array(one_hot_labels).astype(np.float32) + return new, labels + + +def normalize(samples): + """ + 并且灰度化: 从三色通道 -> 单色通道 省内存 + 加快训练速度 + (R + G + B) / 3 + 将图片从 0 ~ 255 线性映射到 -1.0 ~ +1.0 + @samples: numpy array + """ + a = np.add.reduce(samples, keepdims=True, axis=3) # shape (图片数,图片高,图片宽,通道数) + a = a / 3.0 + return a / 128.0 - 1.0 + + +def distribution(labels, name): + # 查看一下每个label的分布,再画个统计图 + # keys: + # 0 + # 1 + # 2 + # ... + # 9 + count = {} + for label in labels: + key = 0 if label[0] == 10 else label[0] + if key in count: + count[key] += 1 + else: + count[key] = 1 + x = [] + y = [] + for k, v in count.items(): + # print(k, v) + x.append(k) + y.append(v) + + y_pos = np.arange(len(x)) + plt.bar(y_pos, y, align='center', alpha=0.5) + plt.xticks(y_pos, x) + plt.ylabel('Count') + plt.title(name + ' Label Distribution') + plt.show() + + +def inspect(dataset, labels, i): + # 显示图片看看 + if dataset.shape[3] == 1: + shape = dataset.shape + dataset = dataset.reshape(shape[0], shape[1], shape[2]) + print(labels[i]) + plt.imshow(dataset[i]) + plt.show() + + +train = load('../data/train_32x32.mat') +test = load('../data/test_32x32.mat') +# extra = load('../data/extra_32x32.mat') + +# print('Train Samples Shape:', train['X'].shape) +# print('Train Labels Shape:', train['y'].shape) + +# print('Train Samples Shape:', test['X'].shape) +# print('Train Labels Shape:', test['y'].shape) + +# print('Train Samples Shape:', extra['X'].shape) +# print('Train Labels Shape:', extra['y'].shape) + +train_samples = train['X'] +train_labels = train['y'] +test_samples = test['X'] +test_labels = test['y'] +# extra_samples = extra['X'] +# extra_labels = extra['y'] + +n_train_samples, _train_labels = reformat(train_samples, train_labels) +n_test_samples, _test_labels = reformat(test_samples, test_labels) + +_train_samples = normalize(n_train_samples) +_test_samples = normalize(n_test_samples) + +num_labels = 10 +image_size = 32 +num_channels = 1 + +if __name__ == '__main__': + # 探索数据 + pass + inspect(_train_samples, _train_labels, 1234) +# _train_samples = normalize(_train_samples) +# inspect(_train_samples, _train_labels, 1234) +# distribution(train_labels, 'Train Labels') +# distribution(test_labels, 'Test Labels') diff --git a/Season1_Tensorflow1.1_Python3.5/20/main.py b/Season1_Tensorflow1.1_Python3.5/20/main.py new file mode 100644 index 0000000..ff1a64d --- /dev/null +++ b/Season1_Tensorflow1.1_Python3.5/20/main.py @@ -0,0 +1,78 @@ +if __name__ == '__main__': + import load + from dp import Network + + train_samples, train_labels = load._train_samples, load._train_labels + test_samples, test_labels = load._test_samples, load._test_labels + + print('Training set', train_samples.shape, train_labels.shape) + print(' Test set', test_samples.shape, test_labels.shape) + + image_size = load.image_size + num_labels = load.num_labels + num_channels = load.num_channels + + + def train_data_iterator(samples, labels, iteration_steps, chunkSize): + """ + Iterator/Generator: get a batch of data + 这个函数是一个迭代器/生成器,用于每一次只得到 chunkSize 这么多的数据 + 用于 for loop, just like range() function + """ + if len(samples) != len(labels): + raise Exception('Length of samples and labels must equal') + stepStart = 0 # initial step + i = 0 + while i < iteration_steps: + stepStart = (i * chunkSize) % (labels.shape[0] - chunkSize) + yield i, samples[stepStart:stepStart + chunkSize], labels[stepStart:stepStart + chunkSize] + i += 1 + + + def test_data_iterator(samples, labels, chunkSize): + """ + Iterator/Generator: get a batch of data + 这个函数是一个迭代器/生成器,用于每一次只得到 chunkSize 这么多的数据 + 用于 for loop, just like range() function + """ + if len(samples) != len(labels): + raise Exception('Length of samples and labels must equal') + stepStart = 0 # initial step + i = 0 + while stepStart < len(samples): + stepEnd = stepStart + chunkSize + if stepEnd < len(samples): + yield i, samples[stepStart:stepEnd], labels[stepStart:stepEnd] + i += 1 + stepStart = stepEnd + + + net = Network( + train_batch_size=64, test_batch_size=500, pooling_scale=2, + dropout_rate=0.9, + base_learning_rate=0.001, decay_rate=0.99) + net.define_inputs( + train_samples_shape=(64, image_size, image_size, num_channels), + train_labels_shape=(64, num_labels), + test_samples_shape=(500, image_size, image_size, num_channels), + ) + # + net.add_conv(patch_size=3, in_depth=num_channels, out_depth=32, activation='relu', pooling=False, name='conv1') + net.add_conv(patch_size=3, in_depth=32, out_depth=32, activation='relu', pooling=True, name='conv2') + net.add_conv(patch_size=3, in_depth=32, out_depth=32, activation='relu', pooling=False, name='conv3') + net.add_conv(patch_size=3, in_depth=32, out_depth=32, activation='relu', pooling=True, name='conv4') + + # 4 = 两次 pooling, 每一次缩小为 1/2 + # 32 = conv4 out_depth + net.add_fc(in_num_nodes=(image_size // 4) * (image_size // 4) * 32, out_num_nodes=128, activation='relu', + name='fc1') + net.add_fc(in_num_nodes=128, out_num_nodes=10, activation=None, name='fc2') + + net.define_model() + # net.run(train_samples, train_labels, test_samples, test_labels, train_data_iterator=train_data_iterator, + # iteration_steps=3000, test_data_iterator=test_data_iterator) + net.train(train_samples, train_labels, data_iterator=train_data_iterator, iteration_steps=2000) + net.test(test_samples, test_labels, data_iterator=test_data_iterator) + +else: + raise Exception('main.py: Should Not Be Imported!!! Must Run by "python main.py"') diff --git "a/Season1_Tensorflow1.1_Python3.5/20/\346\200\273\347\273\223.pptx" "b/Season1_Tensorflow1.1_Python3.5/20/\346\200\273\347\273\223.pptx" new file mode 100644 index 0000000..75d8c2c Binary files /dev/null and "b/Season1_Tensorflow1.1_Python3.5/20/\346\200\273\347\273\223.pptx" differ diff --git a/Season1_Tensorflow1.1_Python3.5/4-6/load.py b/Season1_Tensorflow1.1_Python3.5/4-6/load.py new file mode 100644 index 0000000..d129886 --- /dev/null +++ b/Season1_Tensorflow1.1_Python3.5/4-6/load.py @@ -0,0 +1,122 @@ +# encoding:utf-8 +# Python2 兼容 +from __future__ import print_function, division +from scipy.io import loadmat as load +import matplotlib.pyplot as plt +import numpy as np + + +def reformat(samples, labels): + # 改变原始数据的形状 + # 0 1 2 3 3 0 1 2 + # (图片高,图片宽,通道数,图片数) -> (图片数,图片高,图片宽,通道数) + new = np.transpose(samples, (3, 0, 1, 2)).astype(np.float32) + + # labels 变成 one-hot encoding, [2] -> [0, 0, 1, 0, 0, 0, 0, 0, 0, 0] + # digit 0 , represented as 10 + # labels 变成 one-hot encoding, [10] -> [1, 0, 0, 0, 0, 0, 0, 0, 0, 0] + labels = np.array([x[0] for x in labels]) # slow code, whatever + one_hot_labels = [] + for num in labels: + one_hot = [0.0] * 10 + if num == 10: + one_hot[0] = 1.0 + else: + one_hot[num] = 1.0 + one_hot_labels.append(one_hot) + labels = np.array(one_hot_labels).astype(np.float32) + return new, labels + + +def normalize(samples): + """ + 并且灰度化: 从三色通道 -> 单色通道 省内存 + 加快训练速度 + (R + G + B) / 3 + 将图片从 0 ~ 255 线性映射到 -1.0 ~ +1.0 + @samples: numpy array + """ + a = np.add.reduce(samples, keepdims=True, axis=3) # shape (图片数,图片高,图片宽,通道数) + a = a / 3.0 + return a / 128.0 - 1.0 + + +def distribution(labels, name): + # 查看一下每个label的分布,再画个统计图 + # keys: + # 0 + # 1 + # 2 + # ... + # 9 + count = {} + for label in labels: + key = 0 if label[0] == 10 else label[0] + if key in count: + count[key] += 1 + else: + count[key] = 1 + x = [] + y = [] + for k, v in count.items(): + # print(k, v) + x.append(k) + y.append(v) + + y_pos = np.arange(len(x)) + plt.bar(y_pos, y, align='center', alpha=0.5) + plt.xticks(y_pos, x) + plt.ylabel('Count') + plt.title(name + ' Label Distribution') + plt.show() + + +def inspect(dataset, labels, i): + # 显示图片看看 + print(labels[i]) + ''' + if dataset.shape[3] == 1: + shape = dataset.shape + dataset = dataset.reshape(shape[0], shape[1], shape[2]) + plt.imshow(dataset[i]) + ''' # 可以改为以下 + plt.imshow(dataset[i].squeeze()) + plt.show() + + +train = load('../data/train_32x32.mat') +test = load('../data/test_32x32.mat') +# extra = load('../data/extra_32x32.mat') + +print('Train Samples Shape:', train['X'].shape) +print('Train Labels Shape:', train['y'].shape) + +print('Train Samples Shape:', test['X'].shape) +print('Train Labels Shape:', test['y'].shape) + +# print('Train Samples Shape:', extra['X'].shape) +# print('Train Labels Shape:', extra['y'].shape) + +train_samples = train['X'] +train_labels = train['y'] +# test_samples = test['X'] +# test_labels = test['y'] +# test_samples = extra['X'] +# test_labels = extra['y'] + +_train_samples, _train_labels = reformat(train_samples, train_labels) +# _test_samples, _test_labels = reformat(test_samples, test_labels) +# +# _train_dataset = normalize(n_train_dataset) +# _test_dataset = normalize(n_test_dataset) + +num_labels = 10 +image_size = 32 + +if __name__ == '__main__': + # 探索数据 + pass +# inspect(_train_samples, _train_labels, 1234) +# _train_samples = normalize(_train_samples) +# inspect(_train_samples, _train_labels, 1234) +# distribution(train_labels, 'Train Labels') +# distribution(test_labels, 'Test Labels') diff --git a/Season1_Tensorflow1.1_Python3.5/4-6/run.py b/Season1_Tensorflow1.1_Python3.5/4-6/run.py new file mode 100644 index 0000000..c4ee3af --- /dev/null +++ b/Season1_Tensorflow1.1_Python3.5/4-6/run.py @@ -0,0 +1,15 @@ +#《TF Girls 修炼指南》第四期 + +# 正式开始机器学习 + +# 首先我们要确定一个目标: 图像识别 + +# 我这里就用Udacity Deep Learning的作业作为辅助了 + +# 1. 下载数据 http://ufldl.stanford.edu/housenumbers/ +# 2. 探索数据 +# 3. 处理数据 +# 4. 构建一个基本网络, 基本的概念+代码 , TensorFlow的世界 +# 5. 卷积ji +# 6. 来实验吧 +# 7. 微调与结果 diff --git a/Season1_Tensorflow1.1_Python3.5/7-9/README.md b/Season1_Tensorflow1.1_Python3.5/7-9/README.md new file mode 100644 index 0000000..26583b1 --- /dev/null +++ b/Season1_Tensorflow1.1_Python3.5/7-9/README.md @@ -0,0 +1,3 @@ +# 神经网络的学习资料 +大家一定要看斯坦福的 cs231n![这里是课程大纲](http://cs231n.github.io/) +其中 Module 1 讲到了神经网络以及很多相关知识。 diff --git a/Season1_Tensorflow1.1_Python3.5/7-9/dp.py b/Season1_Tensorflow1.1_Python3.5/7-9/dp.py new file mode 100644 index 0000000..7de814d --- /dev/null +++ b/Season1_Tensorflow1.1_Python3.5/7-9/dp.py @@ -0,0 +1,174 @@ +from __future__ import print_function, division +import tensorflow as tf +from sklearn.metrics import confusion_matrix +import numpy as np +import load + +train_samples, train_labels = load._train_samples, load._train_labels +test_samples, test_labels = load._test_samples, load._test_labels + +print('Training set', train_samples.shape, train_labels.shape) +print(' Test set', test_samples.shape, test_labels.shape) + +image_size = load.image_size +num_labels = load.num_labels +num_channels = load.num_channels + + +def get_chunk(samples, labels, chunkSize): + """ + Iterator/Generator: get a batch of data + 这个函数是一个迭代器/生成器,用于每一次只得到 chunkSize 这么多的数据 + 用于 for loop, just like range() function + """ + if len(samples) != len(labels): + raise Exception('Length of samples and labels must equal') + stepStart = 0 # initial step + i = 0 + while stepStart < len(samples): + stepEnd = stepStart + chunkSize + if stepEnd < len(samples): + yield i, samples[stepStart:stepEnd], labels[stepStart:stepEnd] + i += 1 + stepStart = stepEnd + + +class Network(): + def __init__(self, num_hidden, batch_size): + """ + @num_hidden: 隐藏层的节点数量 + @batch_size:因为我们要节省内存,所以分批处理数据。每一批的数据量。 + """ + self.batch_size = batch_size + self.test_batch_size = 500 + + # Hyper Parameters + self.num_hidden = num_hidden + + # Graph Related + self.graph = tf.Graph() + self.tf_train_samples = None + self.tf_train_labels = None + self.tf_test_samples = None + self.tf_test_labels = None + self.tf_test_prediction = None + + def define_graph(self): + """ + 定义我的的计算图谱 + """ + with self.graph.as_default(): + # 这里只是定义图谱中的各种变量 + self.tf_train_samples = tf.placeholder( + tf.float32, shape=(self.batch_size, image_size, image_size, num_channels) + ) + self.tf_train_labels = tf.placeholder( + tf.float32, shape=(self.batch_size, num_labels) + ) + self.tf_test_samples = tf.placeholder( + tf.float32, shape=(self.test_batch_size, image_size, image_size, num_channels) + ) + + # fully connected layer 1, fully connected + fc1_weights = tf.Variable( + tf.truncated_normal([image_size * image_size, self.num_hidden], stddev=0.1) + ) + fc1_biases = tf.Variable(tf.constant(0.1, shape=[self.num_hidden])) + + # fully connected layer 2 --> output layer + fc2_weights = tf.Variable( + tf.truncated_normal([self.num_hidden, num_labels], stddev=0.1) + ) + fc2_biases = tf.Variable(tf.constant(0.1, shape=[num_labels])) + + # 想在来定义图谱的运算 + def model(data): + # fully connected layer 1 + shape = data.get_shape().as_list() + print(data.get_shape(), shape) + reshape = tf.reshape(data, [shape[0], shape[1] * shape[2] * shape[3]]) + print(reshape.get_shape(), fc1_weights.get_shape(), fc1_biases.get_shape()) + hidden = tf.nn.relu(tf.matmul(reshape, fc1_weights) + fc1_biases) + + # fully connected layer 2 + return tf.matmul(hidden, fc2_weights) + fc2_biases + + # Training computation. + logits = model(self.tf_train_samples) + self.loss = tf.reduce_mean( + tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=self.tf_train_labels) + ) + + # Optimizer. + self.optimizer = tf.train.GradientDescentOptimizer(0.0001).minimize(self.loss) + + # Predictions for the training, validation, and test data. + self.train_prediction = tf.nn.softmax(logits) + self.test_prediction = tf.nn.softmax(model(self.tf_test_samples)) + + def run(self): + """ + 用到Session + """ + + # private function + def print_confusion_matrix(confusionMatrix): + print('Confusion Matrix:') + for i, line in enumerate(confusionMatrix): + print(line, line[i] / np.sum(line)) + a = 0 + for i, column in enumerate(np.transpose(confusionMatrix, (1, 0))): + a += (column[i] / np.sum(column)) * (np.sum(column) / 26000) + print(column[i] / np.sum(column), ) + print('\n', np.sum(confusionMatrix), a) + + self.session = tf.Session(graph=self.graph) + with self.session as session: + tf.initialize_all_variables().run() + + ### 训练 + print('Start Training') + # batch 1000 + for i, samples, labels in get_chunk(train_samples, train_labels, chunkSize=self.batch_size): + _, l, predictions = session.run( + [self.optimizer, self.loss, self.train_prediction], + feed_dict={self.tf_train_samples: samples, self.tf_train_labels: labels} + ) + # labels is True Labels + accuracy, _ = self.accuracy(predictions, labels) + if i % 50 == 0: + print('Minibatch loss at step %d: %f' % (i, l)) + print('Minibatch accuracy: %.1f%%' % accuracy) + ### + + ### 测试 + accuracies = [] + confusionMatrices = [] + for i, samples, labels in get_chunk(test_samples, test_labels, chunkSize=self.test_batch_size): + result = self.test_prediction.eval(feed_dict={self.tf_test_samples: samples}) + accuracy, cm = self.accuracy(result, labels, need_confusion_matrix=True) + accuracies.append(accuracy) + confusionMatrices.append(cm) + print('Test Accuracy: %.1f%%' % accuracy) + print(' Average Accuracy:', np.average(accuracies)) + print('Standard Deviation:', np.std(accuracies)) + print_confusion_matrix(np.add.reduce(confusionMatrices)) + ### + + def accuracy(self, predictions, labels, need_confusion_matrix=False): + """ + 计算预测的正确率与召回率 + @return: accuracy and confusionMatrix as a tuple + """ + _predictions = np.argmax(predictions, 1) + _labels = np.argmax(labels, 1) + cm = confusion_matrix(_labels, _predictions) if need_confusion_matrix else None + # == is overloaded for numpy array + accuracy = (100.0 * np.sum(_predictions == _labels) / predictions.shape[0]) + return accuracy, cm + + +if __name__ == '__main__': + net = Network(num_hidden=128, batch_size=100) + net.define_graph() + net.run() diff --git a/Season1_Tensorflow1.1_Python3.5/7-9/load.py b/Season1_Tensorflow1.1_Python3.5/7-9/load.py new file mode 100644 index 0000000..aab6f79 --- /dev/null +++ b/Season1_Tensorflow1.1_Python3.5/7-9/load.py @@ -0,0 +1,120 @@ +# encoding:utf-8 +# Python2 兼容 +from __future__ import print_function, division +from scipy.io import loadmat as load +import matplotlib.pyplot as plt +import numpy as np + + +def reformat(samples, labels): + # 改变原始数据的形状 + # 0 1 2 3 3 0 1 2 + # (图片高,图片宽,通道数,图片数) -> (图片数,图片高,图片宽,通道数) + new = np.transpose(samples, (3, 0, 1, 2)).astype(np.float32) + + # labels 变成 one-hot encoding, [2] -> [0, 0, 1, 0, 0, 0, 0, 0, 0, 0] + # digit 0 , represented as 10 + # labels 变成 one-hot encoding, [10] -> [1, 0, 0, 0, 0, 0, 0, 0, 0, 0] + labels = np.array([x[0] for x in labels]) # slow code, whatever + one_hot_labels = [] + for num in labels: + one_hot = [0.0] * 10 + if num == 10: + one_hot[0] = 1.0 + else: + one_hot[num] = 1.0 + one_hot_labels.append(one_hot) + labels = np.array(one_hot_labels).astype(np.float32) + return new, labels + + +def normalize(samples): + """ + 并且灰度化: 从三色通道 -> 单色通道 省内存 + 加快训练速度 + (R + G + B) / 3 + 将图片从 0 ~ 255 线性映射到 -1.0 ~ +1.0 + @samples: numpy array + """ + a = np.add.reduce(samples, keepdims=True, axis=3) # shape (图片数,图片高,图片宽,通道数) + a = a / 3.0 + return a / 128.0 - 1.0 + + +def distribution(labels, name): + # 查看一下每个label的分布,再画个统计图 + # keys: + # 0 + # 1 + # 2 + # ... + # 9 + count = {} + for label in labels: + key = 0 if label[0] == 10 else label[0] + if key in count: + count[key] += 1 + else: + count[key] = 1 + x = [] + y = [] + for k, v in count.items(): + # print(k, v) + x.append(k) + y.append(v) + + y_pos = np.arange(len(x)) + plt.bar(y_pos, y, align='center', alpha=0.5) + plt.xticks(y_pos, x) + plt.ylabel('Count') + plt.title(name + ' Label Distribution') + plt.show() + + +def inspect(dataset, labels, i): + # 显示图片看看 + if dataset.shape[3] == 1: + shape = dataset.shape + dataset = dataset.reshape(shape[0], shape[1], shape[2]) + print(labels[i]) + plt.imshow(dataset[i]) + plt.show() + + +train = load('../data/train_32x32.mat') +test = load('../data/test_32x32.mat') +# extra = load('../data/extra_32x32.mat') + +# print('Train Samples Shape:', train['X'].shape) +# print('Train Labels Shape:', train['y'].shape) + +# print('Train Samples Shape:', test['X'].shape) +# print('Train Labels Shape:', test['y'].shape) + +# print('Train Samples Shape:', extra['X'].shape) +# print('Train Labels Shape:', extra['y'].shape) + +train_samples = train['X'] +train_labels = train['y'] +test_samples = test['X'] +test_labels = test['y'] +# extra_samples = extra['X'] +# extra_labels = extra['y'] + +n_train_samples, _train_labels = reformat(train_samples, train_labels) +n_test_samples, _test_labels = reformat(test_samples, test_labels) + +_train_samples = normalize(n_train_samples) +_test_samples = normalize(n_test_samples) + +num_labels = 10 +image_size = 32 +num_channels = 1 + +if __name__ == '__main__': + # 探索数据 + pass +# inspect(_train_samples, _train_labels, 1234) +# _train_samples = normalize(_train_samples) +# inspect(_train_samples, _train_labels, 1234) +# distribution(train_labels, 'Train Labels') +# distribution(test_labels, 'Test Labels') diff --git a/Season1_Tensorflow1.1_Python3.5/7-9/note.txt b/Season1_Tensorflow1.1_Python3.5/7-9/note.txt new file mode 100644 index 0000000..c878b50 --- /dev/null +++ b/Season1_Tensorflow1.1_Python3.5/7-9/note.txt @@ -0,0 +1,34 @@ +欢迎来到《TF Girls 修炼指南 7:解密神经网络 —— 数据流图谱之谜》 + +上一期我们将数据预处理完成了。 + +这一期正式进入深度学习!!! +深度学习!!! +深度学习!!! +深度学习!!! + +这一期会讲一讲理论和基本概念。 + +因为我觉得直接上代码,没有基础的同学可能会觉得非常疑惑。 + +为了制作一个友好的系列教程,我不希望任何同学感到疑惑不解。 + +下一期,进入代码 + + +7分代码,三分理论 + + +欢迎来到《TF Girls 修炼指南 8:解密神经网络 —— 全连接神经网络》 + +我会把整个教程放在Github上。 + +我的Github: https://github.com/CreatCodeBuild + + + +欢迎来到《TF Girls 修炼指南 9:训练并测试神经网络》 + +我使用的 TensorFlow GPU 支持版。 + +我们有7万多张训练图片。不可能一次放进内存来训练 diff --git "a/Season1_Tensorflow1.1_Python3.5/7-9/\346\267\261\345\272\246\345\205\250\350\277\236\346\216\245\347\275\221\347\273\234.pptx" "b/Season1_Tensorflow1.1_Python3.5/7-9/\346\267\261\345\272\246\345\205\250\350\277\236\346\216\245\347\275\221\347\273\234.pptx" new file mode 100644 index 0000000..804c62f Binary files /dev/null and "b/Season1_Tensorflow1.1_Python3.5/7-9/\346\267\261\345\272\246\345\205\250\350\277\236\346\216\245\347\275\221\347\273\234.pptx" differ diff --git "a/Season1_Tensorflow1.1_Python3.5/7-9/\347\256\200\344\273\213.pptx" "b/Season1_Tensorflow1.1_Python3.5/7-9/\347\256\200\344\273\213.pptx" new file mode 100644 index 0000000..01665bf Binary files /dev/null and "b/Season1_Tensorflow1.1_Python3.5/7-9/\347\256\200\344\273\213.pptx" differ diff --git a/Season1_Tensorflow1.1_Python3.5/README.md b/Season1_Tensorflow1.1_Python3.5/README.md new file mode 100644 index 0000000..6145e94 --- /dev/null +++ b/Season1_Tensorflow1.1_Python3.5/README.md @@ -0,0 +1,4 @@ +# Thanks +感谢[@whyscience](https://github.com/whyscience)提供的更新 + +他 Fork 的版本在 https://github.com/whyscience/TensorFlow-and-DeepLearning-Tutorial_Eric/tree/master/Season1_Tensorflow1.1_Python3.5 diff --git a/Season2/1 Word2Vec/word2vec_tf.py b/Season2/1 Word2Vec/word2vec_tf.py index 0049ecb..f00e7a3 100644 --- a/Season2/1 Word2Vec/word2vec_tf.py +++ b/Season2/1 Word2Vec/word2vec_tf.py @@ -183,7 +183,12 @@ def generate_batch(batch_size, num_skips, skip_window): similarity = tf.matmul(valid_embeddings, normalized_embeddings, transpose_b=True) # Add variable initializer. - init = tf.global_variables_initializer() + tf_major_ver = int(tf.__version__.split(".")[0]) + tf_minor_ver = int(tf.__version__.split(".")[1]) + if(tf_major_ver==0 and tf_minor_ver<12): + init = tf.initialize_all_variables() + else: + init = tf.global_variables_initializer() # Step 5: Begin training. num_steps = 100001