From e3680e9975fcc17e1f08d8cd56b2aa75e686cfc0 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Sun, 13 Aug 2017 08:41:11 -0700
Subject: [PATCH 01/78] Send split tensors to make_parallel.

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 8fd4ceb..28664c6 100644
--- a/README.md
+++ b/README.md
@@ -553,7 +553,7 @@ def make_parallel(fn, num_gpus, **kwargs):
     for i in range(num_gpus):
         with tf.device(tf.DeviceSpec(device_type='GPU', device_index=i)):
             with tf.variable_scope(tf.get_variable_scope(), reuse=i > 0):
-                out_split.append(fn(**kwargs))
+                out_split.append(fn(**{k : v[i] for k, v in in_splits.items()}))
 
     return tf.concat(out_split, axis=0)
 

From bcd8ec9475e73df659fa0560cedda8016d4d3d17 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Sun, 13 Aug 2017 12:04:07 -0700
Subject: [PATCH 02/78] Added make_parallel to cookbook.

---
 README.md | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/README.md b/README.md
index 28664c6..3c457c2 100644
--- a/README.md
+++ b/README.md
@@ -14,6 +14,7 @@ Table of Contents
     - [Beam search](#beam_search)
     - [Merge](#merge)
     - [Entropy](#entropy)
+    - [Make parallel](#make_parallel)
 
 ## Tensorflow Basics
 <a name="basics"></a>
@@ -925,3 +926,31 @@ def entropy(logits, dims=-1):
   nplogp = probs * (tf.reduce_logsumexp(logits, dims, keep_dims=True) - logits)
   return tf.reduce_sum(nplogp, dims)
 ```
+
+## Make parallel <a name="make_parallel"></a>
+
+```python
+def make_parallel(fn, num_gpus, **kwargs):
+  """Parallelize given model on multiple gpu devices.
+
+  Args:
+    fn: Arbitrary function that takes a set of input tensors and outputs a
+        single tensor. First dimension of inputs and output tensor are assumed
+        to be batch dimension.
+    num_gpus: Number of GPU devices.
+    **kwargs: Keyword arguments to be passed to the model.
+  Returns:
+    A tensor corresponding to the model output.
+  """
+  in_splits = {}
+  for k, v in kwargs.items():
+    in_splits[k] = tf.split(v, num_gpus)
+
+  out_split = []
+  for i in range(num_gpus):
+    with tf.device(tf.DeviceSpec(device_type='GPU', device_index=i)):
+      with tf.variable_scope(tf.get_variable_scope(), reuse=i > 0):
+        out_split.append(fn(**{k : v[i] for k, v in in_splits.items()}))
+
+  return tf.concat(out_split, axis=0)
+```

From f83574f8095322102c4842c9921f5368d6e5f1af Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Sun, 13 Aug 2017 16:28:44 -0700
Subject: [PATCH 03/78] Debugging Tensorflow code.

---
 README.md | 153 ++++++++++++++++++++++++++++++++++++++++++++++++++----
 TODO      |  13 +++++
 2 files changed, 157 insertions(+), 9 deletions(-)
 create mode 100644 TODO

diff --git a/README.md b/README.md
index 3c457c2..3c8d478 100644
--- a/README.md
+++ b/README.md
@@ -2,15 +2,16 @@
 
 Table of Contents
 =================
-1. [Tensorflow Basics](#basics)
-2. [Understanding static and dynamic shapes](#shapes)
-3. [Broadcasting the good and the ugly](#broadcast)
-4. [Understanding order of execution and control dependencies](#control_deps)
-5. [Control flow operations: conditionals and loops](#control_flow)
-6. [Prototyping kernels and advanced visualization with Python ops](#python_ops)
-7. [Multi-GPU processing with data parallelism](#multi_gpu)
-8. [Building a neural network training framework with learn API](#tf_learn)
-9. [Tensorflow Cookbook](#cookbook)
+1.  [Tensorflow Basics](#basics)
+2.  [Understanding static and dynamic shapes](#shapes)
+3.  [Broadcasting the good and the ugly](#broadcast)
+4.  [Understanding order of execution and control dependencies](#control_deps)
+5.  [Control flow operations: conditionals and loops](#control_flow)
+6.  [Prototyping kernels and advanced visualization with Python ops](#python_ops)
+7.  [Multi-GPU processing with data parallelism](#multi_gpu)
+8.  [Debugging Tensorflow models](#debug)
+9.  [Building a neural network training framework with learn API](#tf_learn)
+10. [Tensorflow Cookbook](#cookbook)
     - [Beam search](#beam_search)
     - [Merge](#merge)
     - [Entropy](#entropy)
@@ -616,6 +617,140 @@ train_op = tf.train.AdamOptimizer(0.1).minimize(
 
 The only thing that we need to change to parallelize backpropagation of gradients is to set the colocate_gradients_with_ops flag to true. This ensures that gradient ops run on the same device as the original op.
 
+## Debugging Tensorflow models
+<a name="debug"></a>
+Symbolic nature of Tensorflow makes it relatively more difficult to debug Tensorflow code compared to regular python code. Here we introduce a number of tools included with Tensorflow that make debugging much easier.
+
+Probably the most common error one can make when using Tensorflow is passing Tensors of wrong shape to ops. Many Tensorflow ops can operate on tensors of different ranks and shapes. This can be convenient when using the API, but may lead to extra headache when things go wrong.
+
+For example, consider the tf.matmul op, it can multiply two matrices:
+```python
+a = tf.random_uniform([2, 3])
+b = tf.random_uniform([3, 4])
+c = tf.matmul(a, b)  # c is a tensor of shape [2, 4]
+```
+
+But the same function also does batch matrix multiplication:
+```python
+a = tf.random_uniform([10, 2, 3])
+b = tf.random_uniform([10, 3, 4])
+tf.matmul(a, b)  # c is a tensor of shape [10, 2, 4]
+```
+
+Another example that we talked about before in the [broadcasting](#broadcast) section is add operation which supports broadcasting:
+```python
+a = tf.constant([[1.], [2.]])
+b = tf.constant([1., 2.])
+c = a + b  # c is a tensor of shape [2, 2]
+```
+
+### Validating your tensors with tf.assert* ops
+
+One way to reduce the chance of unwanted behavior is to explicitly verify the rank or shape of intermediate tensors with tf.assert* ops.
+```python
+a = tf.constant([[1.], [2.]])
+b = tf.constant([1., 2.])
+check_a = tf.assert_rank(a, 1)  # This will raise an InvalidArgumentError exception
+check_b = tf.assert_rank(b, 1)
+with tf.control_dependencies([check_a, check_b]):
+    c = a + b  # c is a tensor of shape [2, 2]
+```
+Remember that assertion nodes like other operations are part of the graph and if not evaluated would get pruned during Session.run(). So make sure to make explicit dependencies to assertion ops, to force Tensorflow to execute them.
+
+You can also use assertions to validate the value of tensors at runtime:
+```python
+check_pos = tf.assert_positive(a)
+```
+See the official docs for a full list of assertion ops.
+
+### Logging tensor values with tf.Print
+
+Another useful built-in function is tf.Print which logs the given tensors to the standard error:
+
+```python
+input_copy = tf.Print(input, tensors_to_print_list)
+```
+Note that tf.Print returns a copy of its first argument as output. One way to force tf.Print to run is to pass its output to another op that gets executed. For example if we want to print value of tensors a and b before adding them we could do something like this:
+```python
+a = ...
+b = ...
+a = tf.Print(a, [a, b])
+c = a + b
+```
+
+Alternatively we could manually define a control dependency.
+
+### Check your gradients with tf.compute_gradient_error
+
+__Not__ all the operations in Tensorflow come with gradients, and it's possible to write a non-automatic differentiable graph in Tensorflow without knowing.
+
+Let's look at an example:
+```python
+import tensorflow as tf
+
+def non_differentiable_entropy(logits):
+    probs = tf.nn.softmax(logits)
+    return tf.nn.softmax_cross_entropy_with_logits(labels=probs, logits=logits)
+
+w = tf.get_variable('w', shape=[5])
+y = -non_differentiable_entropy(w)
+
+opt = tf.train.AdamOptimizer()
+train_op = opt.minimize(y)
+
+sess = tf.Session()
+sess.run(tf.global_variables_initializer())
+for i in range(10000):
+    sess.run(train_op)
+
+print(sess.run(tf.nn.softmax(w)))
+```
+We are using tf.nn.softmax_cross_entropy_with_logits to define entropy over a categorical distribution. We then use Adam optimizer to find the weights with maximum entropy. If you have passed a course on information theory, you would know that uniform distribution contains maximum amount of information. So you would expect for the result to be [0.2, 0.2, 0.2, 0.2, 0.2]. But if you run this you may get unexpected results like this:
+```
+[ 0.34081486  0.24287023  0.23465775  0.08935683  0.09230034]
+```
+It turns out tf.nn.softmax_cross_entropy_with_logits has undefined gradients with respect to labels! But how may we spot this if we didn't know?
+
+Fortunately for us Tensorflow comes with a numerical differentiator that can be used to find symbolic gradient errors. Let's see how we can use it:
+
+```python
+with tf.Session():
+    diff = tf.test.compute_gradient_error(w, [5], y, [])
+    print(diff)
+```
+If you run this, you would see that the difference between the numerical and symbolic gradients are pretty high (0.06 - 0.1 in my tries).
+
+Now let's fix our function with a differentiable version of the entropy and check again:
+```python
+import tensorflow as tf
+import numpy as np
+
+def entropy(logits, dim=-1):
+    probs = tf.nn.softmax(logits, dim)
+    nplogp = probs * (tf.reduce_logsumexp(logits, dim, keep_dims=True) - logits)
+    return tf.reduce_sum(nplogp, dim)
+
+w = tf.get_variable('w', shape=[5])
+y = -non_differentiable_entropy(w)
+# y = -entropy(w)
+
+print(w.get_shape())
+print(y.get_shape())
+
+with tf.Session() as sess:
+    diff = tf.test.compute_gradient_error(w, [5], y, [])
+    print(diff)
+```
+The difference should be ~0.0001 which looks much better.
+
+Now if you run the optimizer again with the correct version you can see the final weights would be:
+```
+[ 0.2  0.2  0.2  0.2  0.2]
+```
+which are exactly what we wanted.
+
+Tensorflow summaries, and tfdbg (TensorFlow Debugger) are other tools that can be used for debugging. Please refer to the official docs to learn more.
+
 ## Building a neural network training framework with learn API
 <a name="tf_learn"></a>
 For simplicity, in most of the examples here we manually create sessions and we don't care about saving and loading checkpoints but this is not how we usually do things in practice. You most probably want to use the learn API to take care of session management and logging. We provide a simple but practical framework in the [code/framework](https://github.com/vahidk/EffectiveTensorflow/tree/master/code/framework) directory for training neural networks using Tensorflow. In this item we explain how this framework works.
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..c427ac6
--- /dev/null
+++ b/TODO
@@ -0,0 +1,13 @@
+RevNets in tensorflow
+
+Profiling and optimizing models
+  XLA
+
+Visualization
+
+Debugging:
+  summaries
+  tf.Debugger
+
+Distributed computing
+

From b17d16a0009e35a1f2e10eada2fb2b92e6fd5d78 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Sun, 13 Aug 2017 22:19:17 -0700
Subject: [PATCH 04/78] Fixed typos.

---
 .gitignore               |  2 +-
 README.md                |  8 ++++----
 TODO                     | 13 -------------
 code/framework/README.md |  2 +-
 4 files changed, 6 insertions(+), 19 deletions(-)
 delete mode 100644 TODO

diff --git a/.gitignore b/.gitignore
index c9c0de5..021d7a9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,4 @@
 .vscode
 *.pyc
 code/framework/data
-code/framework/output
+code/framework/output
\ No newline at end of file
diff --git a/README.md b/README.md
index 3c8d478..4fce1c7 100644
--- a/README.md
+++ b/README.md
@@ -655,7 +655,7 @@ check_b = tf.assert_rank(b, 1)
 with tf.control_dependencies([check_a, check_b]):
     c = a + b  # c is a tensor of shape [2, 2]
 ```
-Remember that assertion nodes like other operations are part of the graph and if not evaluated would get pruned during Session.run(). So make sure to make explicit dependencies to assertion ops, to force Tensorflow to execute them.
+Remember that assertion nodes like other operations are part of the graph and if not evaluated would get pruned during Session.run(). So make sure to create explicit dependencies to assertion ops, to force Tensorflow to execute them.
 
 You can also use assertions to validate the value of tensors at runtime:
 ```python
@@ -665,12 +665,12 @@ See the official docs for a full list of assertion ops.
 
 ### Logging tensor values with tf.Print
 
-Another useful built-in function is tf.Print which logs the given tensors to the standard error:
+Another useful built-in function for debugging is tf.Print which logs the given tensors to the standard error:
 
 ```python
 input_copy = tf.Print(input, tensors_to_print_list)
 ```
-Note that tf.Print returns a copy of its first argument as output. One way to force tf.Print to run is to pass its output to another op that gets executed. For example if we want to print value of tensors a and b before adding them we could do something like this:
+Note that tf.Print returns a copy of its first argument as output. One way to force tf.Print to run is to pass its output to another op that gets executed. For example if we want to print the value of tensors a and b before adding them we could do something like this:
 ```python
 a = ...
 b = ...
@@ -682,7 +682,7 @@ Alternatively we could manually define a control dependency.
 
 ### Check your gradients with tf.compute_gradient_error
 
-__Not__ all the operations in Tensorflow come with gradients, and it's possible to write a non-automatic differentiable graph in Tensorflow without knowing.
+__Not__ all the operations in Tensorflow come with gradients, and it's easy to unintentionally build graphs for which Tensorflow can not compute the gradients.
 
 Let's look at an example:
 ```python
diff --git a/TODO b/TODO
deleted file mode 100644
index c427ac6..0000000
--- a/TODO
+++ /dev/null
@@ -1,13 +0,0 @@
-RevNets in tensorflow
-
-Profiling and optimizing models
-  XLA
-
-Visualization
-
-Debugging:
-  summaries
-  tf.Debugger
-
-Distributed computing
-
diff --git a/code/framework/README.md b/code/framework/README.md
index a0de0ef..e9e3162 100644
--- a/code/framework/README.md
+++ b/code/framework/README.md
@@ -25,7 +25,7 @@ To train an mnist classification model run:
 python -m main --model=convnet_classifier --dataset=mnist
 ```
 
-To visualize the training logs in Tensorboard run:
+To visualize the training logs on Tensorboard run:
 ```
 tensorboard --logdir=output
 ```

From 77965d8f0711faf1dda5d532f70db00bbfe33800 Mon Sep 17 00:00:00 2001
From: David Moodie <davidmoodie12@gmail.com>
Date: Mon, 14 Aug 2017 07:29:19 +0100
Subject: [PATCH 05/78] Fix typo

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 4fce1c7..9208db9 100644
--- a/README.md
+++ b/README.md
@@ -79,7 +79,7 @@ f = tf.stack([tf.square(x), x, tf.ones_like(x)], 1)
 yhat = tf.squeeze(tf.matmul(f, w), 1)
 
 # The loss is defined to be the l2 distance between our estimate of y and its
-# true value. We also added a shrinkage term, tp ensure the resulting weights
+# true value. We also added a shrinkage term, to ensure the resulting weights
 # would be small.
 loss = tf.nn.l2_loss(yhat - y) + 0.1 * tf.nn.l2_loss(w)
 

From ba40bdc6bc7deb8ee5d40db1149a459dd136eaa3 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Mon, 14 Aug 2017 00:44:23 -0700
Subject: [PATCH 06/78] Clarifications.

---
 README.md | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 4fce1c7..94d3f6e 100644
--- a/README.md
+++ b/README.md
@@ -45,8 +45,7 @@ z_val = sess.run(z)
 
 print(z_val)
 ```
-Unlike numpy that immediately performs the computation and copies the result to
-the output variable z, tensorflow only gives us a handle (of type Tensor) to a node in the graph that represents the result. If we try printing the value of z directly, we get something like this:
+Unlike numpy that immediately performs the computation and produces the result, tensorflow only gives us a handle (of type Tensor) to a node in the graph that represents the result. If we try printing the value of z directly, we get something like this:
 ```
 Tensor("MatMul:0", shape=(10, 10), dtype=float32)
 ```
@@ -56,7 +55,7 @@ Since both the inputs have a fully defined shape, tensorflow is able to infer th
 __Tip__: When using Jupyter notebook make sure to call tf.reset_default_graph() at the beginning to clear the symbolic graph before defining new nodes.
 ***
 
-To understand how powerful symbolic computation can be let's have a look at another example. Assume that we have samples from a curve (say f(x) = 5x^2 + 3) and we want to estimate f(x) without knowing its parameters. We define a parametric function g(x, w) = w0 x^2 + w1 x + w2, which is a function of the input x and latent parameters w, our goal is then to find the latent parameters such that g(x, w) ≈ f(x). This can be done by minimizing the following loss function: L(w) = (f(x) - g(x, w))^2. Although there's a closed form solution for this simple problem, we opt to use a more general approach that can be applied to any arbitrary differentiable function, and that is using stochastic gradient descent. We simply compute the average gradient of L(w) with respect to w over a set of sample points and move in the opposite direction.
+To understand how powerful symbolic computation can be let's have a look at another example. Assume that we have samples from a curve (say f(x) = 5x^2 + 3) and we want to estimate f(x) based on these samples. We define a parametric function g(x, w) = w0 x^2 + w1 x + w2, which is a function of the input x and latent parameters w, our goal is then to find the latent parameters such that g(x, w) ≈ f(x). This can be done by minimizing the following loss function: L(w) = &sum; (f(x) - g(x, w))^2. Although there's a closed form solution for this simple problem, we opt to use a more general approach that can be applied to any arbitrary differentiable function, and that is using stochastic gradient descent. We simply compute the average gradient of L(w) with respect to w over a set of sample points and move in the opposite direction.
 
 Here's how it can be done in Tensorflow:
 
@@ -79,7 +78,7 @@ f = tf.stack([tf.square(x), x, tf.ones_like(x)], 1)
 yhat = tf.squeeze(tf.matmul(f, w), 1)
 
 # The loss is defined to be the l2 distance between our estimate of y and its
-# true value. We also added a shrinkage term, tp ensure the resulting weights
+# true value. We also added a shrinkage term, to ensure the resulting weights
 # would be small.
 loss = tf.nn.l2_loss(yhat - y) + 0.1 * tf.nn.l2_loss(w)
 
@@ -130,16 +129,15 @@ dynamic_shape = tf.shape(a)
 
 The static shape of a tensor can be set with Tensor.set_shape() method:
 ```python
-a.set_shape([32, 128])
+a.set_shape([32, 128])  # static shape of a is [32, 128]
+a.set_shape([None, 128])  # first dimension of a is determined dynamically
 ```
-Use this function only if you know what you are doing, in practice it's safer to do dynamic reshaping with tf.reshape() op:
 
+You can reshape a given tensor dynamically using tf.reshape function:
 ```python
 a =  tf.reshape(a, [32, 128])
 ```
-
-If you feed 'a' with values that don't match the shape, you will get an InvalidArgumentError indicating that the
-number of values fed doesn't match the expected shape.
+Note that attempts to feed 'a' with values that don't match its shape, will raise InvalidArgumentError exception.
 
 It can be convenient to have a function that returns the static shape when available and dynamic shape when it's not. The following utility function does just that:
 ```python
@@ -408,7 +406,7 @@ c = c.stack()
 
 print(tf.Session().run(c))
 ```
-Tensorflow while loops and tensor arrays are essential tools for building complex recurrent neural networks. As an exercise try writing a [beam search using](https://en.wikipedia.org/wiki/Beam_search) tf.while_loops. Can you make it more efficient with tensor arrays?
+Tensorflow while loops and tensor arrays are essential tools for building complex recurrent neural networks. As an exercise try implementing [beam search](https://en.wikipedia.org/wiki/Beam_search) using tf.while_loops. Can you make it more efficient with tensor arrays?
 
 ## Prototyping kernels and advanced visualization with Python ops
 <a name="python_ops"></a>
@@ -569,7 +567,7 @@ You can replace the model with any function that takes a set of tensors as input
 
 Let's look at a slightly more practical example. We want to train a neural network on multiple GPUs. During training we not only need to compute the forward pass but also need to compute the backward pass (the gradients). But how can we parallelize the gradient computation? This turns out to be pretty easy.
 
-Recall from the first item that we wanted to fit a second degree curve to a set of samples. We reorganized the code a bit to have the bulk of the operations in the model function:
+Recall from the first item that we wanted to fit a second degree polynomial to a set of samples. We reorganized the code a bit to have the bulk of the operations in the model function:
 ```python
 import numpy as np
 import tensorflow as tf

From d7167e88057a4414607c6aa8a0b1a5a62013fe73 Mon Sep 17 00:00:00 2001
From: cocowalla <colin.anderson333@gmail.com>
Date: Mon, 14 Aug 2017 13:23:58 +0100
Subject: [PATCH 07/78] Nitpicking grammar changes in readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 94d3f6e..80829e1 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ Table of Contents
 
 ## Tensorflow Basics
 <a name="basics"></a>
-The most striking difference between Tensorflow and other numerical computation libraries such as numpy is that operations in Tensorflow are symbolic. This is a powerful concept that allows Tensorflow to do all sort of things (e.g. automatic differentiation) that are not possible with imperative libraries such as numpy. But it also comes at the cost of making it harder to grasp. Our attempt here is demystify Tensorflow and provide some guidelines and best practices for more effective use of Tensorflow.
+The most striking difference between Tensorflow and other numerical computation libraries such as numpy is that operations in Tensorflow are symbolic. This is a powerful concept that allows Tensorflow to do all sort of things (e.g. automatic differentiation) that are not possible with imperative libraries such as numpy - but it also comes at the cost of making it harder to grasp. We attempt here to demystify Tensorflow and provide some guidelines and best practices for more effective use of Tensorflow.
 
 Let's start with a simple example, we want to multiply two random matrices. First we look at an implementation done in numpy:
 ```python

From cb4d13ed7687c020c9d242e547cdba7d2bb1daae Mon Sep 17 00:00:00 2001
From: Piper Chester <piperchester@users.noreply.github.com>
Date: Mon, 14 Aug 2017 08:31:39 -0400
Subject: [PATCH 08/78] README: fix `Tensorflow` casing

---
 README.md | 84 +++++++++++++++++++++++++++----------------------------
 1 file changed, 42 insertions(+), 42 deletions(-)

diff --git a/README.md b/README.md
index 94d3f6e..14a0151 100644
--- a/README.md
+++ b/README.md
@@ -1,25 +1,25 @@
-# Effective Tensorflow
+# Effective TensorFlow
 
 Table of Contents
 =================
-1.  [Tensorflow Basics](#basics)
+1.  [TensorFlow Basics](#basics)
 2.  [Understanding static and dynamic shapes](#shapes)
 3.  [Broadcasting the good and the ugly](#broadcast)
 4.  [Understanding order of execution and control dependencies](#control_deps)
 5.  [Control flow operations: conditionals and loops](#control_flow)
 6.  [Prototyping kernels and advanced visualization with Python ops](#python_ops)
 7.  [Multi-GPU processing with data parallelism](#multi_gpu)
-8.  [Debugging Tensorflow models](#debug)
+8.  [Debugging TensorFlow models](#debug)
 9.  [Building a neural network training framework with learn API](#tf_learn)
-10. [Tensorflow Cookbook](#cookbook)
+10. [TensorFlow Cookbook](#cookbook)
     - [Beam search](#beam_search)
     - [Merge](#merge)
     - [Entropy](#entropy)
     - [Make parallel](#make_parallel)
 
-## Tensorflow Basics
+## TensorFlow Basics
 <a name="basics"></a>
-The most striking difference between Tensorflow and other numerical computation libraries such as numpy is that operations in Tensorflow are symbolic. This is a powerful concept that allows Tensorflow to do all sort of things (e.g. automatic differentiation) that are not possible with imperative libraries such as numpy. But it also comes at the cost of making it harder to grasp. Our attempt here is demystify Tensorflow and provide some guidelines and best practices for more effective use of Tensorflow.
+The most striking difference between TensorFlow and other numerical computation libraries such as numpy is that operations in TensorFlow are symbolic. This is a powerful concept that allows TensorFlow to do all sort of things (e.g. automatic differentiation) that are not possible with imperative libraries such as numpy. But it also comes at the cost of making it harder to grasp. Our attempt here is demystify TensorFlow and provide some guidelines and best practices for more effective use of TensorFlow.
 
 Let's start with a simple example, we want to multiply two random matrices. First we look at an implementation done in numpy:
 ```python
@@ -32,7 +32,7 @@ z = np.dot(x, y)
 print(z)
 ```
 
-Now we perform the exact same computation this time in Tensorflow:
+Now we perform the exact same computation this time in TensorFlow:
 ```python
 import tensorflow as tf
 
@@ -57,13 +57,13 @@ __Tip__: When using Jupyter notebook make sure to call tf.reset_default_graph()
 
 To understand how powerful symbolic computation can be let's have a look at another example. Assume that we have samples from a curve (say f(x) = 5x^2 + 3) and we want to estimate f(x) based on these samples. We define a parametric function g(x, w) = w0 x^2 + w1 x + w2, which is a function of the input x and latent parameters w, our goal is then to find the latent parameters such that g(x, w) ≈ f(x). This can be done by minimizing the following loss function: L(w) = &sum; (f(x) - g(x, w))^2. Although there's a closed form solution for this simple problem, we opt to use a more general approach that can be applied to any arbitrary differentiable function, and that is using stochastic gradient descent. We simply compute the average gradient of L(w) with respect to w over a set of sample points and move in the opposite direction.
 
-Here's how it can be done in Tensorflow:
+Here's how it can be done in TensorFlow:
 
 ```python
 import numpy as np
 import tensorflow as tf
 
-# Placeholders are used to feed values from python to Tensorflow ops. We define
+# Placeholders are used to feed values from python to TensorFlow ops. We define
 # two placeholders, one for input feature x, and one for output y.
 x = tf.placeholder(tf.float32)
 y = tf.placeholder(tf.float32)
@@ -105,11 +105,11 @@ By running this piece of code you should see a result close to this:
 ```
 Which is a relatively close approximation to our parameters.
 
-This is just tip of the iceberg for what Tensorflow can do. Many problems such a optimizing large neural networks with millions of parameters can be implemented efficiently in Tensorflow in just a few lines of code. Tensorflow takes care of scaling across multiple devices, and threads, and supports a variety of platforms.
+This is just tip of the iceberg for what TensorFlow can do. Many problems such a optimizing large neural networks with millions of parameters can be implemented efficiently in TensorFlow in just a few lines of code. TensorFlow takes care of scaling across multiple devices, and threads, and supports a variety of platforms.
 
 ## Understanding static and dynamic shapes
 <a name="shapes"></a>
-Tensors in Tensorflow have a static shape attribute which is determined during graph construction. The static shape may be underspecified. For example we might define a float32 tensor of shape [None, 128]:
+Tensors in TensorFlow have a static shape attribute which is determined during graph construction. The static shape may be underspecified. For example we might define a float32 tensor of shape [None, 128]:
 ```python
 import tensorflow as tf
 
@@ -184,7 +184,7 @@ b = tf.reshape(b, [0, [1, 2]])
 
 ## Broadcasting the good and the ugly
 <a name="broadcast"></a>
-Tensorflow supports broadcasting elementwise operations. Normally when you want to perform operations like addition and multiplication, you need to make sure that shapes of the operands match, e.g. you can’t add a tensor of shape [3, 2] to a tensor of shape [3, 4]. But there’s a special case and that’s when you have a singular dimension. Tensorflow implicitly tiles the tensor across its singular dimensions to match the shape of the other operand. So it’s valid to add a tensor of shape [3, 2] to a tensor of shape [3, 1]
+TensorFlow supports broadcasting elementwise operations. Normally when you want to perform operations like addition and multiplication, you need to make sure that shapes of the operands match, e.g. you can’t add a tensor of shape [3, 2] to a tensor of shape [3, 4]. But there’s a special case and that’s when you have a singular dimension. TensorFlow implicitly tiles the tensor across its singular dimensions to match the shape of the other operand. So it’s valid to add a tensor of shape [3, 2] to a tensor of shape [3, 1]
 
 ```python
 import tensorflow as tf
@@ -236,7 +236,7 @@ b = tf.constant([1., 2.])
 c = tf.reduce_sum(a + b)
 ```
 
-What do you think would the value of c would after evaluation? If you guessed 6, that’s wrong. It’s going to be 12. This is because when rank of two tensors don’t match, Tensorflow automatically expands the first dimension of the tensor with lower rank before the elementwise operation, so the result of addition would be [[2, 3], [3, 4]], and the reducing over all parameters would give us 12.
+What do you think would the value of c would after evaluation? If you guessed 6, that’s wrong. It’s going to be 12. This is because when rank of two tensors don’t match, TensorFlow automatically expands the first dimension of the tensor with lower rank before the elementwise operation, so the result of addition would be [[2, 3], [3, 4]], and the reducing over all parameters would give us 12.
 
 The way to avoid this problem is to be as explicit as possible. Had we specified which dimension we would want to reduce across, catching this bug would have been much easier:
 
@@ -250,7 +250,7 @@ Here the value of c would be [5, 7], and we immediately would guess based on the
 
 ## Understanding order of execution and control dependencies
 <a name="control_deps"></a>
-As we discussed in the first item, Tensorflow doesn't immediately run the operations that are defined but rather creates corresponding nodes in a graph that can be evaluated with Session.run() method. This also enables Tensorflow to do optimizations at run time to determine the optimal order of execution and possible trimming of unused nodes. If you only have tf.Tensors in your graph you don't need to worry about dependencies but you most probably have tf.Variables too, and tf.Variables make things much more difficult. My advice to is to only use Variables if Tensors don't do the job. This might not make a lot of sense to you now, so let's start with an example.
+As we discussed in the first item, TensorFlow doesn't immediately run the operations that are defined but rather creates corresponding nodes in a graph that can be evaluated with Session.run() method. This also enables TensorFlow to do optimizations at run time to determine the optimal order of execution and possible trimming of unused nodes. If you only have tf.Tensors in your graph you don't need to worry about dependencies but you most probably have tf.Variables too, and tf.Variables make things much more difficult. My advice to is to only use Variables if Tensors don't do the job. This might not make a lot of sense to you now, so let's start with an example.
 
 ```python
 import tensorflow as tf
@@ -265,7 +265,7 @@ tf.Session().run(a)
 Evaluating "a" will return the value 3 as expected.  Note that here we are creating 3 tensors, two constant tensors and another tensor that stores the result of the addition. Note that you can't overwrite the value of a tensor. If you want to modify it you have to create a new tensor. As we did here.
 
 ***
-__TIP__: If you don't define a new graph, Tensorflow automatically creates a graph for you by default. You can use tf.get_default_graph() to get a handle to the graph. You can then inspect the graph, for example by printing all its tensors:
+__TIP__: If you don't define a new graph, TensorFlow automatically creates a graph for you by default. You can use tf.get_default_graph() to get a handle to the graph. You can then inspect the graph, for example by printing all its tensors:
 ```python
 print(tf.contrib.graph_editor.get_tensors(tf.get_default_graph()))
 ```
@@ -298,7 +298,7 @@ for i in range(10):
 ```
 Note that the tensor c here won't have a deterministic value. This value might be 3 or 7 depending on whether addition or assignment gets executed first.
 
-You should note that the order that you define ops in your code doesn't matter to Tensorflow runtime. The only thing that matters is the control dependencies. Control dependencies for tensors are straightforward. Every time you use a tensor in an operation that op will define an implicit dependency to that tensor. But things get complicated with variables because they can take many values.
+You should note that the order that you define ops in your code doesn't matter to TensorFlow runtime. The only thing that matters is the control dependencies. Control dependencies for tensors are straightforward. Every time you use a tensor in an operation that op will define an implicit dependency to that tensor. But things get complicated with variables because they can take many values.
 
 When dealing with variables, you may need to explicitly define dependencies using tf.control_dependencies() as follows:
 ```python
@@ -333,7 +333,7 @@ print(tf.Session().run(x))
 ```
 Since the predicate is True in this case, the output would be the result of the addition, which is 3.
 
-Most of the times when using Tensorflow you are using large tensors and want to perform operations in batch. A related conditional operation is tf.where, which like tf.cond takes a predicate, but selects the output based on the condition in batch.
+Most of the times when using TensorFlow you are using large tensors and want to perform operations in batch. A related conditional operation is tf.where, which like tf.cond takes a predicate, but selects the output based on the condition in batch.
 ```python
 a = tf.constant([1, 1])
 b = tf.constant([2, 2])
@@ -346,7 +346,7 @@ print(tf.Session().run(x))
 ```
 This will return [3, 2].
 
-Another widely used control flow operation is tf.while_loop. It allows building dynamic loops in Tensorflow that operate on sequences of variable length. Let's see how we can generate Fibonacci sequence with tf.while_loops:
+Another widely used control flow operation is tf.while_loop. It allows building dynamic loops in TensorFlow that operate on sequences of variable length. Let's see how we can generate Fibonacci sequence with tf.while_loops:
 ```python
 n = tf.constant(5)
 
@@ -376,7 +376,7 @@ i, a, b, c = tf.while_loop(cond, body, (2, 1, 1, tf.constant([1, 1])))
 
 print(tf.Session().run(c))
 ```
-Now if you try running this, Tensorflow will complain that the shape of the the fourth loop variable is changing. So you must make that explicit that it's intentional:
+Now if you try running this, TensorFlow will complain that the shape of the the fourth loop variable is changing. So you must make that explicit that it's intentional:
 ```
 i, a, b, c = tf.while_loop(
     cond, body, (2, 1, 1, tf.constant([1, 1])),
@@ -385,7 +385,7 @@ i, a, b, c = tf.while_loop(
                       tf.TensorShape([]),
                       tf.TensorShape([None])))
 ```
-This is not only getting ugly, but is also somewhat inefficient. Note that we are building a lot of intermediary tensors that we don't use. Tensorflow has a better solution for this kind of growing arrays. Meet tf.TensorArray. Let's do the same thing this time with tensor arrays:
+This is not only getting ugly, but is also somewhat inefficient. Note that we are building a lot of intermediary tensors that we don't use. TensorFlow has a better solution for this kind of growing arrays. Meet tf.TensorArray. Let's do the same thing this time with tensor arrays:
 ```python
 n = tf.constant(5)
 
@@ -406,13 +406,13 @@ c = c.stack()
 
 print(tf.Session().run(c))
 ```
-Tensorflow while loops and tensor arrays are essential tools for building complex recurrent neural networks. As an exercise try implementing [beam search](https://en.wikipedia.org/wiki/Beam_search) using tf.while_loops. Can you make it more efficient with tensor arrays?
+TensorFlow while loops and tensor arrays are essential tools for building complex recurrent neural networks. As an exercise try implementing [beam search](https://en.wikipedia.org/wiki/Beam_search) using tf.while_loops. Can you make it more efficient with tensor arrays?
 
 ## Prototyping kernels and advanced visualization with Python ops
 <a name="python_ops"></a>
-Operation kernels in Tensorflow are entirely written in C++ for efficiency. But writing a Tensorflow kernel in C++ can be quite a pain. So, before spending hours implementing your kernel you may want to prototype something quickly, however inefficient. With tf.py_func() you can turn any piece of python code to a Tensorflow operation.
+Operation kernels in TensorFlow are entirely written in C++ for efficiency. But writing a TensorFlow kernel in C++ can be quite a pain. So, before spending hours implementing your kernel you may want to prototype something quickly, however inefficient. With tf.py_func() you can turn any piece of python code to a TensorFlow operation.
 
-For example this is how you can implement a simple ReLU nonlinearity kernel in Tensorflow as a python op:
+For example this is how you can implement a simple ReLU nonlinearity kernel in TensorFlow as a python op:
 ```python
 import numpy as np
 import tensorflow as tf
@@ -427,7 +427,7 @@ def relu(inputs):
     def _relu_grad(x):
         return np.float32(x > 0)
 
-    # An adapter that defines a gradient op compatible with Tensorflow
+    # An adapter that defines a gradient op compatible with TensorFlow
     def _relu_grad_op(op, grad):
         x = op.inputs[0]
         x_grad = grad * tf.py_func(_relu_grad, [x], tf.float32)
@@ -444,7 +444,7 @@ def relu(inputs):
     return output
 ```
 
-To verify that the gradients are correct you can use Tensorflow's gradient checker:
+To verify that the gradients are correct you can use TensorFlow's gradient checker:
 ```python
 x = tf.random_normal([10])
 y = relu(x * x)
@@ -457,7 +457,7 @@ compute_gradient_error() computes the gradient numerically and returns the diffe
 
 Note that this implementation is pretty inefficient, and is only useful for prototyping, since the python code is not parallelizable and won't run on GPU. Once you verified your idea, you definitely would want to write it as a C++ kernel.
 
-In practice we commonly use python ops to do visualization on Tensorboard. Consider the case that you are building an image classification model and want to visualize your model predictions during training. Tensorflow allows visualizing images with tf.summary.image() function:
+In practice we commonly use python ops to do visualization on Tensorboard. Consider the case that you are building an image classification model and want to visualize your model predictions during training. TensorFlow allows visualizing images with tf.summary.image() function:
 ```python
 image = tf.placeholder(tf.float32)
 tf.summary.image("image", image)
@@ -507,7 +507,7 @@ Note that since summaries are usually only evaluated once in a while (not per st
 
 ## Multi-GPU processing with data parallelism
 <a name="multi_gpu"></a>
- If you write your software in a language like C++ for a single cpu core, making it run on multiple GPUs in parallel would require rewriting the software from scratch. But this is not the case with Tensorflow. Because of its symbolic nature, tensorflow can hide all that complexity, making it effortless to scale your program across many CPUs and GPUs.
+ If you write your software in a language like C++ for a single cpu core, making it run on multiple GPUs in parallel would require rewriting the software from scratch. But this is not the case with TensorFlow. Because of its symbolic nature, tensorflow can hide all that complexity, making it effortless to scale your program across many CPUs and GPUs.
 
  Let's start with the simple example of adding two vectors on CPU:
  ```python
@@ -615,11 +615,11 @@ train_op = tf.train.AdamOptimizer(0.1).minimize(
 
 The only thing that we need to change to parallelize backpropagation of gradients is to set the colocate_gradients_with_ops flag to true. This ensures that gradient ops run on the same device as the original op.
 
-## Debugging Tensorflow models
+## Debugging TensorFlow models
 <a name="debug"></a>
-Symbolic nature of Tensorflow makes it relatively more difficult to debug Tensorflow code compared to regular python code. Here we introduce a number of tools included with Tensorflow that make debugging much easier.
+Symbolic nature of TensorFlow makes it relatively more difficult to debug TensorFlow code compared to regular python code. Here we introduce a number of tools included with TensorFlow that make debugging much easier.
 
-Probably the most common error one can make when using Tensorflow is passing Tensors of wrong shape to ops. Many Tensorflow ops can operate on tensors of different ranks and shapes. This can be convenient when using the API, but may lead to extra headache when things go wrong.
+Probably the most common error one can make when using TensorFlow is passing Tensors of wrong shape to ops. Many TensorFlow ops can operate on tensors of different ranks and shapes. This can be convenient when using the API, but may lead to extra headache when things go wrong.
 
 For example, consider the tf.matmul op, it can multiply two matrices:
 ```python
@@ -653,7 +653,7 @@ check_b = tf.assert_rank(b, 1)
 with tf.control_dependencies([check_a, check_b]):
     c = a + b  # c is a tensor of shape [2, 2]
 ```
-Remember that assertion nodes like other operations are part of the graph and if not evaluated would get pruned during Session.run(). So make sure to create explicit dependencies to assertion ops, to force Tensorflow to execute them.
+Remember that assertion nodes like other operations are part of the graph and if not evaluated would get pruned during Session.run(). So make sure to create explicit dependencies to assertion ops, to force TensorFlow to execute them.
 
 You can also use assertions to validate the value of tensors at runtime:
 ```python
@@ -680,7 +680,7 @@ Alternatively we could manually define a control dependency.
 
 ### Check your gradients with tf.compute_gradient_error
 
-__Not__ all the operations in Tensorflow come with gradients, and it's easy to unintentionally build graphs for which Tensorflow can not compute the gradients.
+__Not__ all the operations in TensorFlow come with gradients, and it's easy to unintentionally build graphs for which TensorFlow can not compute the gradients.
 
 Let's look at an example:
 ```python
@@ -709,7 +709,7 @@ We are using tf.nn.softmax_cross_entropy_with_logits to define entropy over a ca
 ```
 It turns out tf.nn.softmax_cross_entropy_with_logits has undefined gradients with respect to labels! But how may we spot this if we didn't know?
 
-Fortunately for us Tensorflow comes with a numerical differentiator that can be used to find symbolic gradient errors. Let's see how we can use it:
+Fortunately for us TensorFlow comes with a numerical differentiator that can be used to find symbolic gradient errors. Let's see how we can use it:
 
 ```python
 with tf.Session():
@@ -747,13 +747,13 @@ Now if you run the optimizer again with the correct version you can see the fina
 ```
 which are exactly what we wanted.
 
-Tensorflow summaries, and tfdbg (TensorFlow Debugger) are other tools that can be used for debugging. Please refer to the official docs to learn more.
+TensorFlow summaries, and tfdbg (TensorFlow Debugger) are other tools that can be used for debugging. Please refer to the official docs to learn more.
 
 ## Building a neural network training framework with learn API
 <a name="tf_learn"></a>
-For simplicity, in most of the examples here we manually create sessions and we don't care about saving and loading checkpoints but this is not how we usually do things in practice. You most probably want to use the learn API to take care of session management and logging. We provide a simple but practical framework in the [code/framework](https://github.com/vahidk/EffectiveTensorflow/tree/master/code/framework) directory for training neural networks using Tensorflow. In this item we explain how this framework works.
+For simplicity, in most of the examples here we manually create sessions and we don't care about saving and loading checkpoints but this is not how we usually do things in practice. You most probably want to use the learn API to take care of session management and logging. We provide a simple but practical framework in the [code/framework](https://github.com/vahidk/EffectiveTensorFlow/tree/master/code/framework) directory for training neural networks using TensorFlow. In this item we explain how this framework works.
 
-When experimenting with neural network models you usually have a training/test split. You want to train your model on the training set, and once in a while evaluate it on test set and compute some metrics. You also need to store the model parameters as a checkpoint, and ideally you want to be able to stop and resume training. Tensorflow's learn API is designed to make this job easier, letting us focus on developing the actual model.
+When experimenting with neural network models you usually have a training/test split. You want to train your model on the training set, and once in a while evaluate it on test set and compute some metrics. You also need to store the model parameters as a checkpoint, and ideally you want to be able to stop and resume training. TensorFlow's learn API is designed to make this job easier, letting us focus on developing the actual model.
 
 The most basic way of using tf.learn API is to use tf.Estimator object directly. You need to define a model function that defines a loss function, a train op and one or a set of predictions:
 ```python
@@ -791,7 +791,7 @@ metrics = { 'accuracy': tf.metrics.accuracy }
 estimator.evaluate(input_fn=input_fn, metrics=metrics)
 ```
 
-Estimator object might be good enough for simple cases, but Tensorflow provides an even higher level object called Experiment which provides some additional useful functionality. Creating an experiment object is very easy:
+Estimator object might be good enough for simple cases, but TensorFlow provides an even higher level object called Experiment which provides some additional useful functionality. Creating an experiment object is very easy:
 
 ```python
 experiment = tf.contrib.learn.Experiment(
@@ -874,9 +874,9 @@ def input_fn():
         features=features,
         reader=tf.TFRecordReader)
 ```
-See [mnist.py](https://github.com/vahidk/EffectiveTensorflow/blob/master/code/framework/dataset/mnist.py) for an example of how to convert your data to TFRecords format.
+See [mnist.py](https://github.com/vahidk/EffectiveTensorFlow/blob/master/code/framework/dataset/mnist.py) for an example of how to convert your data to TFRecords format.
 
-The framework also comes with a simple convolutional network classifier in [convnet_classifier.py](https://github.com/vahidk/EffectiveTensorflow/blob/master/code/framework/model/convnet_classifier.py) that includes an example model and evaluation metric:
+The framework also comes with a simple convolutional network classifier in [convnet_classifier.py](https://github.com/vahidk/EffectiveTensorFlow/blob/master/code/framework/model/convnet_classifier.py) that includes an example model and evaluation metric:
 
 ```python
 def model_fn(features, labels, mode, params):
@@ -901,11 +901,11 @@ tf.contrib.learn.MetricSpec(
   prediction_key='predictions')
 ```
 
-And that's it! This is all you need to get started with Tensorflow learn API. I recommend to have a look at the [source code](https://github.com/vahidk/EffectiveTensorflow/tree/master/code/framework) and see the official python API to learn more about the learn API.
+And that's it! This is all you need to get started with TensorFlow learn API. I recommend to have a look at the [source code](https://github.com/vahidk/EffectiveTensorFlow/tree/master/code/framework) and see the official python API to learn more about the learn API.
 
-## Tensorflow Cookbook
+## TensorFlow Cookbook
 <a name="cookbook"></a>
-This section includes implementation of a set of common operations in Tensorflow.
+This section includes implementation of a set of common operations in TensorFlow.
 
 ### Beam Search <a name="beam_search"></a>
 ```python

From d00d3faf03b09e81de78d81c53ae3f6eef17c9e5 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Mon, 14 Aug 2017 09:46:14 -0700
Subject: [PATCH 09/78] Fix comment in broadcasting.

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 96109f3..68331c1 100644
--- a/README.md
+++ b/README.md
@@ -191,7 +191,7 @@ import tensorflow as tf
 
 a = tf.constant([[1., 2.], [3., 4.]])
 b = tf.constant([[1.], [2.]])
-# c = a + tf.tile(a, [1, 2])
+# c = a + tf.tile(b, [1, 2])
 c = a + b
 ```
 

From c5a702ad01399a49bdaba3425e7200be29c44638 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Mon, 14 Aug 2017 19:16:54 -0700
Subject: [PATCH 10/78] Remove unused param.

---
 code/framework/model/convnet_classifier.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/code/framework/model/convnet_classifier.py b/code/framework/model/convnet_classifier.py
index 1885e94..1a7c1f0 100644
--- a/code/framework/model/convnet_classifier.py
+++ b/code/framework/model/convnet_classifier.py
@@ -11,8 +11,7 @@
 FLAGS = tf.flags.FLAGS
 
 HPARAMS = {
-  'drop_rate': 0.5,
-  'crop_margin': 8,
+  'drop_rate': 0.5
 }
 
 
@@ -40,7 +39,7 @@ def model_fn(features, labels, mode, params):
   loss = tf.losses.sparse_softmax_cross_entropy(
     labels=labels, logits=logits)
 
-  summary.labeled_image("images", images, predictions)
+  summary.labeled_image('images', images, predictions)
 
   return {'predictions': predictions}, loss
 

From 5cb1da042f99743c399647bdc07064564fcedb8e Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Mon, 14 Aug 2017 19:47:35 -0700
Subject: [PATCH 11/78] Added comments.

---
 code/framework/common/ops.py               | 5 +++++
 code/framework/common/summary.py           | 5 +++++
 code/framework/common/utils.py             | 2 ++
 code/framework/dataset/mnist.py            | 8 ++++++++
 code/framework/main.py                     | 7 +++++++
 code/framework/model/convnet_classifier.py | 4 ++++
 6 files changed, 31 insertions(+)

diff --git a/code/framework/common/ops.py b/code/framework/common/ops.py
index e29dd1c..1725ff6 100644
--- a/code/framework/common/ops.py
+++ b/code/framework/common/ops.py
@@ -1,3 +1,5 @@
+"""Common TensorFlow ops."""
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -60,6 +62,7 @@ def conv_layers(tensor,
                 activation=tf.nn.relu,
                 drop_rate=0.0,
                 **kwargs):
+  """Builds a stack of convolutional layers with dropout and max pooling."""
   for fs, ks, ps in zip(filters, kernels, pools):
     tensor = tf.layers.dropout(tensor, drop_rate)
     tensor = tf.layers.conv2d(
@@ -76,6 +79,7 @@ def conv_layers(tensor,
 
 
 def create_optimizer(optimizer, learning_rate, decay_steps=None, **kwargs):
+  """Create an optimizer object."""
   global_step = tf.train.get_or_create_global_step()
 
   if decay_steps:
@@ -88,6 +92,7 @@ def create_optimizer(optimizer, learning_rate, decay_steps=None, **kwargs):
 
 
 def average_gradients(tower_grads):
+  """Compute average gradients."""
   average_grads = []
   for grad_and_vars in zip(*tower_grads):
     grads = [g for g, _ in grad_and_vars]
diff --git a/code/framework/common/summary.py b/code/framework/common/summary.py
index a9cd6ed..6bd66ea 100644
--- a/code/framework/common/summary.py
+++ b/code/framework/common/summary.py
@@ -1,5 +1,9 @@
 """Utility functions for visualization on tensorboard."""
 
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
 import io
 import matplotlib.pyplot as plt
 import numpy as np
@@ -9,6 +13,7 @@
 
 def labeled_image(name, images, labels, max_outputs=3, flip_vertical=False,
                   color='pink', font_size=15):
+    """Writes a summary visualizing given images and corresponding labels."""
     def _visualize_image(image, label):
         # Do the actual drawing in python
         fig = plt.figure(figsize=(3, 3), dpi=80)
diff --git a/code/framework/common/utils.py b/code/framework/common/utils.py
index 263254a..6f900cc 100644
--- a/code/framework/common/utils.py
+++ b/code/framework/common/utils.py
@@ -1,3 +1,5 @@
+"""Auxiliary functions."""
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
diff --git a/code/framework/dataset/mnist.py b/code/framework/dataset/mnist.py
index 4222ed0..1a8e052 100644
--- a/code/framework/dataset/mnist.py
+++ b/code/framework/dataset/mnist.py
@@ -1,3 +1,5 @@
+"""Mnist dataset preprocessing and specifications."""
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -36,11 +38,13 @@
 
 
 def get_split(split):
+  """Returns train/test split paths."""
   output_data = os.path.join(LOCAL_DIR, 'data_%s.tfrecord' % split)
   return output_data
 
 
 def map_features(features):
+  """Adapts read data to model input."""
   def _decode_image(image):
     image = tf.to_float(tf.image.decode_image(image, channels=1)) / 255.0
     image = tf.reshape(image, [IMAGE_SIZE, IMAGE_SIZE, 1])
@@ -52,6 +56,7 @@ def _decode_image(image):
 
 
 def _download_data():
+  """Download the MNIST dataset."""
   if not os.path.exists(LOCAL_DIR):
     os.makedirs(LOCAL_DIR)
   for name in [
@@ -64,6 +69,7 @@ def _download_data():
 
 
 def _image_iterator(split):
+  """An iterator that reads and returns images and labels from MNIST."""
   image_urls = {
     tf.estimator.ModeKeys.TRAIN: TRAIN_IMAGE_URL,
     tf.estimator.ModeKeys.EVAL: TEST_IMAGE_URL
@@ -89,6 +95,7 @@ def _image_iterator(split):
 
 
 def _convert_data(split):
+  """Convert the dataset to TFRecord format."""
   def _create_example(item):
     image, label = item
     example = tf.train.Example(features=tf.train.Features(
@@ -105,6 +112,7 @@ def _create_example(item):
 
 
 def _visulize_data(split=tf.estimator.ModeKeys.TRAIN):
+  """Read an visualize the first example form the dataset."""
   path = get_split(split)
   iterator = tf.python_io.tf_record_iterator(path)
   item = next(iterator)
diff --git a/code/framework/main.py b/code/framework/main.py
index b42300b..c7e543d 100644
--- a/code/framework/main.py
+++ b/code/framework/main.py
@@ -1,3 +1,5 @@
+"""Main module."""
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -41,6 +43,7 @@
 }
 
 def get_hparams():
+  """Aggregates and returns hyper parameters."""
   hparams = HPARAMS
   hparams.update(DATASETS[FLAGS.dataset].HPARAMS)
   hparams.update(MODELS[FLAGS.model].HPARAMS)
@@ -52,6 +55,7 @@ def get_hparams():
 
 
 def make_input_fn(mode, params):
+  """Returns an input function to read the dataset."""
   def _input_fn():
     with tf.device(tf.DeviceSpec(device_type='CPU', device_index=0)):
       dataset = DATASETS[FLAGS.dataset]
@@ -71,6 +75,7 @@ def _input_fn():
 
 
 def make_model_fn():
+  """Returns a model function."""
   def _model_fn(features, labels, mode, params):
     model_fn = MODELS[FLAGS.model].model_fn
 
@@ -136,6 +141,7 @@ def _model_fn(features, labels, mode, params):
 
 
 def experiment_fn(run_config, hparams):
+  """Constructs an experiment object."""
   estimator = learn.Estimator(
     model_fn=make_model_fn(), config=run_config, params=hparams)
   eval_metrics = MODELS[FLAGS.model].eval_metrics_fn(hparams)
@@ -149,6 +155,7 @@ def experiment_fn(run_config, hparams):
 
 
 def main(unused_argv):
+  """Main entry point."""
   if FLAGS.output_dir:
     model_dir = FLAGS.output_dir
   else:
diff --git a/code/framework/model/convnet_classifier.py b/code/framework/model/convnet_classifier.py
index 1a7c1f0..e7464d1 100644
--- a/code/framework/model/convnet_classifier.py
+++ b/code/framework/model/convnet_classifier.py
@@ -1,3 +1,5 @@
+"""Simple convolutional neural network classififer."""
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -16,6 +18,7 @@
 
 
 def model_fn(features, labels, mode, params):
+  """CNN classifier model."""
   images = features['image']
   labels = labels['label']
 
@@ -45,6 +48,7 @@ def model_fn(features, labels, mode, params):
 
 
 def eval_metrics_fn(params):
+  """Eval metrics."""
   metrics_dict = {}
   metrics_dict['accuracy'] = tf.contrib.learn.MetricSpec(tf.metrics.accuracy)
   return metrics_dict

From 247b0b515c47c2ff845323b597127456438537bb Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Mon, 14 Aug 2017 20:15:54 -0700
Subject: [PATCH 12/78] Fixed typo in debugging.

---
 README.md | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 68331c1..734a9a9 100644
--- a/README.md
+++ b/README.md
@@ -703,7 +703,7 @@ for i in range(10000):
 
 print(sess.run(tf.nn.softmax(w)))
 ```
-We are using tf.nn.softmax_cross_entropy_with_logits to define entropy over a categorical distribution. We then use Adam optimizer to find the weights with maximum entropy. If you have passed a course on information theory, you would know that uniform distribution contains maximum amount of information. So you would expect for the result to be [0.2, 0.2, 0.2, 0.2, 0.2]. But if you run this you may get unexpected results like this:
+We are using tf.nn.softmax_cross_entropy_with_logits to define entropy over a categorical distribution. We then use Adam optimizer to find the weights with maximum entropy. If you have passed a course on information theory, you would know that uniform distribution contains maximum entropy. So you would expect for the result to be [0.2, 0.2, 0.2, 0.2, 0.2]. But if you run this you may get unexpected results like this:
 ```
 [ 0.34081486  0.24287023  0.23465775  0.08935683  0.09230034]
 ```
@@ -729,8 +729,7 @@ def entropy(logits, dim=-1):
     return tf.reduce_sum(nplogp, dim)
 
 w = tf.get_variable('w', shape=[5])
-y = -non_differentiable_entropy(w)
-# y = -entropy(w)
+y = -entropy(w)
 
 print(w.get_shape())
 print(y.get_shape())

From 1f1de774dec159568e87c16242f6fe88ddb230a8 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Mon, 14 Aug 2017 20:28:48 -0700
Subject: [PATCH 13/78] Consistent spacing.

---
 code/framework/common/ops.py     | 12 +++---
 code/framework/common/summary.py | 70 ++++++++++++++++----------------
 2 files changed, 41 insertions(+), 41 deletions(-)

diff --git a/code/framework/common/ops.py b/code/framework/common/ops.py
index 1725ff6..3615332 100644
--- a/code/framework/common/ops.py
+++ b/code/framework/common/ops.py
@@ -66,12 +66,12 @@ def conv_layers(tensor,
   for fs, ks, ps in zip(filters, kernels, pools):
     tensor = tf.layers.dropout(tensor, drop_rate)
     tensor = tf.layers.conv2d(
-        tensor,
-        filters=fs,
-        kernel_size=ks,
-        padding=padding,
-        activation=activation,
-        **kwargs)
+      tensor,
+      filters=fs,
+      kernel_size=ks,
+      padding=padding,
+      activation=activation,
+      **kwargs)
     if ps and ps > 1:
       tensor = tf.layers.max_pooling2d(
         inputs=tensor, pool_size=ps, strides=ps, padding=padding)
diff --git a/code/framework/common/summary.py b/code/framework/common/summary.py
index 6bd66ea..84c23a1 100644
--- a/code/framework/common/summary.py
+++ b/code/framework/common/summary.py
@@ -13,38 +13,38 @@
 
 def labeled_image(name, images, labels, max_outputs=3, flip_vertical=False,
                   color='pink', font_size=15):
-    """Writes a summary visualizing given images and corresponding labels."""
-    def _visualize_image(image, label):
-        # Do the actual drawing in python
-        fig = plt.figure(figsize=(3, 3), dpi=80)
-        ax = fig.add_subplot(111)
-        if flip_vertical:
-            image = image[::-1,...]
-        ax.imshow(image.squeeze())
-        ax.text(0, 0, str(label),
-          horizontalalignment='left',
-          verticalalignment='top',
-          color=color,
-          fontsize=font_size)
-        fig.canvas.draw()
-
-        # Write the plot as a memory file.
-        buf = io.BytesIO()
-        data = fig.savefig(buf, format='png')
-        buf.seek(0)
-
-        # Read the image and convert to numpy array
-        img = PIL.Image.open(buf)
-        return np.array(img.getdata()).reshape(img.size[0], img.size[1], -1)
-
-    def _visualize_images(images, labels):
-        # Only display the given number of examples in the batch
-        outputs = []
-        for i in range(max_outputs):
-            output = _visualize_image(images[i], labels[i])
-            outputs.append(output)
-        return np.array(outputs, dtype=np.uint8)
-
-    # Run the python op.
-    figs = tf.py_func(_visualize_images, [images, labels], tf.uint8)
-    return tf.summary.image(name, figs)
+  """Writes a summary visualizing given images and corresponding labels."""
+  def _visualize_image(image, label):
+    # Do the actual drawing in python
+    fig = plt.figure(figsize=(3, 3), dpi=80)
+    ax = fig.add_subplot(111)
+    if flip_vertical:
+      image = image[::-1,...]
+    ax.imshow(image.squeeze())
+    ax.text(0, 0, str(label),
+      horizontalalignment='left',
+      verticalalignment='top',
+      color=color,
+      fontsize=font_size)
+    fig.canvas.draw()
+
+    # Write the plot as a memory file.
+    buf = io.BytesIO()
+    data = fig.savefig(buf, format='png')
+    buf.seek(0)
+
+    # Read the image and convert to numpy array
+    img = PIL.Image.open(buf)
+    return np.array(img.getdata()).reshape(img.size[0], img.size[1], -1)
+
+  def _visualize_images(images, labels):
+    # Only display the given number of examples in the batch
+    outputs = []
+    for i in range(max_outputs):
+      output = _visualize_image(images[i], labels[i])
+      outputs.append(output)
+    return np.array(outputs, dtype=np.uint8)
+
+  # Run the python op.
+  figs = tf.py_func(_visualize_images, [images, labels], tf.uint8)
+  return tf.summary.image(name, figs)

From 7f77e3f8d33a1a3a48febc7b82691bd193b4a531 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Mon, 14 Aug 2017 22:52:11 -0700
Subject: [PATCH 14/78] Added cifar10 and cifar100 datasets.

---
 code/framework/README.md           |  10 +-
 code/framework/dataset/cifar10.py  | 142 +++++++++++++++++++++++++++++
 code/framework/dataset/cifar100.py | 142 +++++++++++++++++++++++++++++
 code/framework/main.py             |   6 +-
 4 files changed, 296 insertions(+), 4 deletions(-)
 create mode 100644 code/framework/dataset/cifar10.py
 create mode 100644 code/framework/dataset/cifar100.py

diff --git a/code/framework/README.md b/code/framework/README.md
index e9e3162..9beefcc 100644
--- a/code/framework/README.md
+++ b/code/framework/README.md
@@ -8,8 +8,10 @@ examples of how to define custom datasets and models.
 pip install tensorflow numpy pillow matplotlib six
 ```
 
-## Mnist
-To download the mnist dataset run:
+## Preparing datasets
+Currently the framework includes code for preprocessing mnist, cifar10, and cifar100 datasets.
+
+To download and preprocess the mnist dataset run:
 ```
 python -m dataset.mnist convert
 ```
@@ -19,7 +21,9 @@ Run the following to visualize an example:
 python -m dataset.mnist visualize
 ```
 
-## Usage
+In the above snippets you could replace mnist with cifar10 or cifar100 to preprocess the respective datasets.
+
+## Training
 To train an mnist classification model run:
 ```
 python -m main --model=convnet_classifier --dataset=mnist
diff --git a/code/framework/dataset/cifar10.py b/code/framework/dataset/cifar10.py
new file mode 100644
index 0000000..839c158
--- /dev/null
+++ b/code/framework/dataset/cifar10.py
@@ -0,0 +1,142 @@
+"""Mnist dataset preprocessing and specifications."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+from six.moves import cPickle
+from six.moves import urllib
+import struct
+import sys
+import tarfile
+import tensorflow as tf
+
+from common import utils
+
+REMOTE_URL = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
+LOCAL_DIR = os.path.join('data/cifar10/')
+ARCHIVE_NAME = 'cifar-10-python.tar.gz'
+DATA_DIR = 'cifar-10-batches-py/'
+TRAIN_BATCHES = ['data_batch_%d' % (i + 1) for i in range(5)]
+TEST_BATCHES = ['test_batch']
+
+IMAGE_SIZE = 32
+NUM_CLASSES = 10
+
+
+FEATURES = {
+  'image': tf.FixedLenFeature([], tf.string),
+  'label': tf.FixedLenFeature([], tf.int64),
+}
+
+HPARAMS = {
+  'image_size': IMAGE_SIZE,
+  'num_classes': NUM_CLASSES,
+}
+
+
+def get_split(split):
+  """Returns train/test split paths."""
+  output_data = os.path.join(LOCAL_DIR, 'data_%s.tfrecord' % split)
+  return output_data
+
+
+def map_features(features):
+  """Adapts read data to model input."""
+  def _decode_image(image):
+    image = tf.to_float(tf.image.decode_image(image, channels=3)) / 255.0
+    image = tf.reshape(image, [IMAGE_SIZE, IMAGE_SIZE, 3])
+    return image
+
+  image = tf.map_fn(_decode_image, features['image'], tf.float32)
+  label = features['label']
+  return {'image': image}, {'label': label}
+
+
+def _download_data():
+  """Download the MNIST dataset."""
+  if not os.path.exists(LOCAL_DIR):
+    os.makedirs(LOCAL_DIR)
+  if not os.path.exists(LOCAL_DIR + ARCHIVE_NAME):
+    print('Downloading...')
+    urllib.request.urlretrieve(REMOTE_URL, LOCAL_DIR + ARCHIVE_NAME)
+  if not os.path.exists(LOCAL_DIR + DATA_DIR):
+    print('Extracting files...')
+    tar = tarfile.open(LOCAL_DIR + ARCHIVE_NAME)
+    tar.extractall(LOCAL_DIR)
+    tar.close()
+
+
+def _image_iterator(split):
+  """An iterator that reads and returns images and labels from MNIST."""
+  batches = {
+    tf.estimator.ModeKeys.TRAIN: TRAIN_BATCHES,
+    tf.estimator.ModeKeys.EVAL: TEST_BATCHES
+  }[split]
+
+  for batch in batches:
+    with open('%s%s%s' % (LOCAL_DIR, DATA_DIR, batch), 'rb') as fo:
+      dict = cPickle.load(fo)
+      images = np.array(dict['data'])
+      labels = np.array(dict['labels'])
+
+      num = images.shape[0]
+      images = np.reshape(images, [num, 3, IMAGE_SIZE, IMAGE_SIZE])
+      images = np.transpose(images, [0, 2, 3, 1])
+      print('Loaded %d examples.' % num)
+
+      for i in range(num):
+        yield utils.encode_image(images[i]), labels[i]
+
+
+def _convert_data(split):
+  """Convert the dataset to TFRecord format."""
+  def _create_example(item):
+    image, label = item
+    example = tf.train.Example(features=tf.train.Features(
+      feature={
+        'image': tf.train.Feature(
+          bytes_list=tf.train.BytesList(value=[image])),
+        'label': tf.train.Feature(
+          int64_list=tf.train.Int64List(value=[label.astype(np.int64)]))
+      }))
+    return example
+
+  utils.parallel_record_writer(
+    _image_iterator(split), _create_example, get_split(split))
+
+
+def _visulize_data(split=tf.estimator.ModeKeys.TRAIN):
+  """Read an visualize the first example form the dataset."""
+  path = get_split(split)
+  iterator = tf.python_io.tf_record_iterator(path)
+  item = next(iterator)
+
+  example = tf.train.Example()
+  example.ParseFromString(item)
+
+  image = utils.decode_image(
+    example.features.feature['image'].bytes_list.value[0])
+  label = example.features.feature['label'].int64_list.value[0]
+
+  plt.imshow(image.astype(np.uint8))
+  plt.title('Label: %d' % label)
+  plt.show()
+
+
+if __name__ == '__main__':
+  if len(sys.argv) != 2:
+    print('Usage: python dataset.mnist <convert|visualize>')
+    sys.exit(1)
+
+  if sys.argv[1] == 'convert':
+    _download_data()
+    _convert_data(tf.estimator.ModeKeys.TRAIN)
+    _convert_data(tf.estimator.ModeKeys.EVAL)
+  elif sys.argv[1] == 'visualize':
+    _visulize_data()
+  else:
+    print('Unknown command', sys.argv[1])
diff --git a/code/framework/dataset/cifar100.py b/code/framework/dataset/cifar100.py
new file mode 100644
index 0000000..0cad246
--- /dev/null
+++ b/code/framework/dataset/cifar100.py
@@ -0,0 +1,142 @@
+"""Mnist dataset preprocessing and specifications."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+from six.moves import cPickle
+from six.moves import urllib
+import struct
+import sys
+import tarfile
+import tensorflow as tf
+
+from common import utils
+
+REMOTE_URL = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz'
+LOCAL_DIR = os.path.join('data/cifar100/')
+ARCHIVE_NAME = 'cifar-100-python.tar.gz'
+DATA_DIR = 'cifar-100-python/'
+TRAIN_BATCHES = ['train']
+TEST_BATCHES = ['test']
+
+IMAGE_SIZE = 32
+NUM_CLASSES = 100
+
+
+FEATURES = {
+  'image': tf.FixedLenFeature([], tf.string),
+  'label': tf.FixedLenFeature([], tf.int64),
+}
+
+HPARAMS = {
+  'image_size': IMAGE_SIZE,
+  'num_classes': NUM_CLASSES,
+}
+
+
+def get_split(split):
+  """Returns train/test split paths."""
+  output_data = os.path.join(LOCAL_DIR, 'data_%s.tfrecord' % split)
+  return output_data
+
+
+def map_features(features):
+  """Adapts read data to model input."""
+  def _decode_image(image):
+    image = tf.to_float(tf.image.decode_image(image, channels=3)) / 255.0
+    image = tf.reshape(image, [IMAGE_SIZE, IMAGE_SIZE, 3])
+    return image
+
+  image = tf.map_fn(_decode_image, features['image'], tf.float32)
+  label = features['label']
+  return {'image': image}, {'label': label}
+
+
+def _download_data():
+  """Download the MNIST dataset."""
+  if not os.path.exists(LOCAL_DIR):
+    os.makedirs(LOCAL_DIR)
+  if not os.path.exists(LOCAL_DIR + ARCHIVE_NAME):
+    print('Downloading...')
+    urllib.request.urlretrieve(REMOTE_URL, LOCAL_DIR + ARCHIVE_NAME)
+  if not os.path.exists(LOCAL_DIR + DATA_DIR):
+    print('Extracting files...')
+    tar = tarfile.open(LOCAL_DIR + ARCHIVE_NAME)
+    tar.extractall(LOCAL_DIR)
+    tar.close()
+
+
+def _image_iterator(split):
+  """An iterator that reads and returns images and labels from MNIST."""
+  batches = {
+    tf.estimator.ModeKeys.TRAIN: TRAIN_BATCHES,
+    tf.estimator.ModeKeys.EVAL: TEST_BATCHES
+  }[split]
+
+  for batch in batches:
+    with open('%s%s%s' % (LOCAL_DIR, DATA_DIR, batch), 'rb') as fo:
+      dict = cPickle.load(fo)
+      images = np.array(dict['data'])
+      labels = np.array(dict['fine_labels'])
+
+      num = images.shape[0]
+      images = np.reshape(images, [num, 3, IMAGE_SIZE, IMAGE_SIZE])
+      images = np.transpose(images, [0, 2, 3, 1])
+      print('Loaded %d examples.' % num)
+
+      for i in range(num):
+        yield utils.encode_image(images[i]), labels[i]
+
+
+def _convert_data(split):
+  """Convert the dataset to TFRecord format."""
+  def _create_example(item):
+    image, label = item
+    example = tf.train.Example(features=tf.train.Features(
+      feature={
+        'image': tf.train.Feature(
+          bytes_list=tf.train.BytesList(value=[image])),
+        'label': tf.train.Feature(
+          int64_list=tf.train.Int64List(value=[label.astype(np.int64)]))
+      }))
+    return example
+
+  utils.parallel_record_writer(
+    _image_iterator(split), _create_example, get_split(split))
+
+
+def _visulize_data(split=tf.estimator.ModeKeys.TRAIN):
+  """Read an visualize the first example form the dataset."""
+  path = get_split(split)
+  iterator = tf.python_io.tf_record_iterator(path)
+  item = next(iterator)
+
+  example = tf.train.Example()
+  example.ParseFromString(item)
+
+  image = utils.decode_image(
+    example.features.feature['image'].bytes_list.value[0])
+  label = example.features.feature['label'].int64_list.value[0]
+
+  plt.imshow(image.astype(np.uint8))
+  plt.title('Label: %d' % label)
+  plt.show()
+
+
+if __name__ == '__main__':
+  if len(sys.argv) != 2:
+    print('Usage: python dataset.mnist <convert|visualize>')
+    sys.exit(1)
+
+  if sys.argv[1] == 'convert':
+    _download_data()
+    _convert_data(tf.estimator.ModeKeys.TRAIN)
+    _convert_data(tf.estimator.ModeKeys.EVAL)
+  elif sys.argv[1] == 'visualize':
+    _visulize_data()
+  else:
+    print('Unknown command', sys.argv[1])
diff --git a/code/framework/main.py b/code/framework/main.py
index c7e543d..4f6e3e6 100644
--- a/code/framework/main.py
+++ b/code/framework/main.py
@@ -8,6 +8,8 @@
 import tensorflow as tf
 
 from common import ops
+import dataset.cifar10
+import dataset.cifar100
 import dataset.mnist
 import model.convnet_classifier
 
@@ -32,7 +34,9 @@
 }
 
 DATASETS = {
-  'mnist': dataset.mnist
+  'cifar10': dataset.cifar10,
+  'cifar100': dataset.cifar100,
+  'mnist': dataset.mnist,
 }
 
 HPARAMS = {

From 1bcb06f30a1fdb61d14314975b834f68ac637dec Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Mon, 14 Aug 2017 22:53:56 -0700
Subject: [PATCH 15/78] Fix comments.

---
 code/framework/dataset/cifar10.py  | 8 ++++----
 code/framework/dataset/cifar100.py | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/code/framework/dataset/cifar10.py b/code/framework/dataset/cifar10.py
index 839c158..4d45561 100644
--- a/code/framework/dataset/cifar10.py
+++ b/code/framework/dataset/cifar10.py
@@ -1,4 +1,4 @@
-"""Mnist dataset preprocessing and specifications."""
+"""Cifar10 dataset preprocessing and specifications."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -57,7 +57,7 @@ def _decode_image(image):
 
 
 def _download_data():
-  """Download the MNIST dataset."""
+  """Download the cifar dataset."""
   if not os.path.exists(LOCAL_DIR):
     os.makedirs(LOCAL_DIR)
   if not os.path.exists(LOCAL_DIR + ARCHIVE_NAME):
@@ -71,7 +71,7 @@ def _download_data():
 
 
 def _image_iterator(split):
-  """An iterator that reads and returns images and labels from MNIST."""
+  """An iterator that reads and returns images and labels from cifar."""
   batches = {
     tf.estimator.ModeKeys.TRAIN: TRAIN_BATCHES,
     tf.estimator.ModeKeys.EVAL: TEST_BATCHES
@@ -129,7 +129,7 @@ def _visulize_data(split=tf.estimator.ModeKeys.TRAIN):
 
 if __name__ == '__main__':
   if len(sys.argv) != 2:
-    print('Usage: python dataset.mnist <convert|visualize>')
+    print('Usage: python dataset.cifar10 <convert|visualize>')
     sys.exit(1)
 
   if sys.argv[1] == 'convert':
diff --git a/code/framework/dataset/cifar100.py b/code/framework/dataset/cifar100.py
index 0cad246..145484e 100644
--- a/code/framework/dataset/cifar100.py
+++ b/code/framework/dataset/cifar100.py
@@ -1,4 +1,4 @@
-"""Mnist dataset preprocessing and specifications."""
+"""Cifar100 dataset preprocessing and specifications."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -57,7 +57,7 @@ def _decode_image(image):
 
 
 def _download_data():
-  """Download the MNIST dataset."""
+  """Download the cifar dataset."""
   if not os.path.exists(LOCAL_DIR):
     os.makedirs(LOCAL_DIR)
   if not os.path.exists(LOCAL_DIR + ARCHIVE_NAME):
@@ -71,7 +71,7 @@ def _download_data():
 
 
 def _image_iterator(split):
-  """An iterator that reads and returns images and labels from MNIST."""
+  """An iterator that reads and returns images and labels from cifar."""
   batches = {
     tf.estimator.ModeKeys.TRAIN: TRAIN_BATCHES,
     tf.estimator.ModeKeys.EVAL: TEST_BATCHES
@@ -129,7 +129,7 @@ def _visulize_data(split=tf.estimator.ModeKeys.TRAIN):
 
 if __name__ == '__main__':
   if len(sys.argv) != 2:
-    print('Usage: python dataset.mnist <convert|visualize>')
+    print('Usage: python dataset.cifar100 <convert|visualize>')
     sys.exit(1)
 
   if sys.argv[1] == 'convert':

From 2b9553696ecaf612d45ce4823d3ea151452c9b8f Mon Sep 17 00:00:00 2001
From: Jonathan Yan <yanjon@users.noreply.github.com>
Date: Tue, 15 Aug 2017 19:37:57 +0800
Subject: [PATCH 16/78] Fix typo in "Understanding static and dynamic shapes"

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 734a9a9..9f5993a 100644
--- a/README.md
+++ b/README.md
@@ -151,8 +151,8 @@ def get_shape(tensor):
 
 Now imagine we want to convert a Tensor of rank 3 to a tensor of rank 2 by collapsing the second and third dimensions into one. We can use our get_shape() function to do that:
 ```python
-b = placeholder(tf.float32, [None, 10, 32])
-shape = get_shape(tensor)
+b = tf.placeholder(tf.float32, [None, 10, 32])
+shape = get_shape(b)
 b = tf.reshape(b, [shape[0], shape[1] * shape[2]])
 ```
 Note that this works whether the shapes are statically specified or not.

From 79034b416073eced805d6a78db23dc4c09a25891 Mon Sep 17 00:00:00 2001
From: Jonathan Yan <yanjon@users.noreply.github.com>
Date: Tue, 15 Aug 2017 19:55:40 +0800
Subject: [PATCH 17/78] Fix another typo

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 9f5993a..d3f6c28 100644
--- a/README.md
+++ b/README.md
@@ -178,8 +178,8 @@ def reshape(tensor, dims_list):
 
 Then collapsing the second dimension becomes very easy:
 ```python
-b = placeholder(tf.float32, [None, 10, 32])
-b = tf.reshape(b, [0, [1, 2]])
+b = tf.placeholder(tf.float32, [None, 10, 32])
+b = reshape(b, [0, [1, 2]])
 ```
 
 ## Broadcasting the good and the ugly

From cdceb2d02996dba4cc36a874c3bced2ddcc7b47d Mon Sep 17 00:00:00 2001
From: Remi Rampin <remirampin@gmail.com>
Date: Tue, 15 Aug 2017 11:15:45 -0400
Subject: [PATCH 18/78] tf.reshape() only need the same number of elements

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d3f6c28..e72dac6 100644
--- a/README.md
+++ b/README.md
@@ -137,7 +137,7 @@ You can reshape a given tensor dynamically using tf.reshape function:
 ```python
 a =  tf.reshape(a, [32, 128])
 ```
-Note that attempts to feed 'a' with values that don't match its shape, will raise InvalidArgumentError exception.
+Note that attempts to feed 'a' with tensors with a different total number of elements, will raise an InvalidArgumentError exception.
 
 It can be convenient to have a function that returns the static shape when available and dynamic shape when it's not. The following utility function does just that:
 ```python

From 32620bd2284daa1e9639bd14f6871570480b6534 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Tue, 15 Aug 2017 08:22:34 -0700
Subject: [PATCH 19/78] Update README.md

---
 README.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/README.md b/README.md
index e72dac6..e8d706b 100644
--- a/README.md
+++ b/README.md
@@ -118,7 +118,6 @@ a = tf.placeholder(tf.float32, [None, 128])
 This means that the first dimension can be of any size and will be determined dynamically during Session.run(). You can query the static shape of a Tensor as follows:
 
 ```python
-static_shape = a.shape  # returns TensorShape([Dimension(None), Dimension(128)])
 static_shape = a.shape.as_list()  # returns [None, 128]
 ```
 
@@ -137,7 +136,6 @@ You can reshape a given tensor dynamically using tf.reshape function:
 ```python
 a =  tf.reshape(a, [32, 128])
 ```
-Note that attempts to feed 'a' with tensors with a different total number of elements, will raise an InvalidArgumentError exception.
 
 It can be convenient to have a function that returns the static shape when available and dynamic shape when it's not. The following utility function does just that:
 ```python

From 30c4b29001caaa4c8b26f619ef5de6490878f3b0 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Tue, 15 Aug 2017 18:57:41 -0700
Subject: [PATCH 20/78] Added scopes item.

---
 README.md | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 90 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index e8d706b..92b3240 100644
--- a/README.md
+++ b/README.md
@@ -4,14 +4,15 @@ Table of Contents
 =================
 1.  [TensorFlow Basics](#basics)
 2.  [Understanding static and dynamic shapes](#shapes)
-3.  [Broadcasting the good and the ugly](#broadcast)
-4.  [Understanding order of execution and control dependencies](#control_deps)
-5.  [Control flow operations: conditionals and loops](#control_flow)
-6.  [Prototyping kernels and advanced visualization with Python ops](#python_ops)
-7.  [Multi-GPU processing with data parallelism](#multi_gpu)
-8.  [Debugging TensorFlow models](#debug)
-9.  [Building a neural network training framework with learn API](#tf_learn)
-10. [TensorFlow Cookbook](#cookbook)
+3.  [Scopes and when to use them](#scopes)
+4.  [Broadcasting the good and the ugly](#broadcast)
+5.  [Understanding order of execution and control dependencies](#control_deps)
+6.  [Control flow operations: conditionals and loops](#control_flow)
+7.  [Prototyping kernels and advanced visualization with Python ops](#python_ops)
+8.  [Multi-GPU processing with data parallelism](#multi_gpu)
+9.  [Debugging TensorFlow models](#debug)
+10. [Building a neural network training framework with learn API](#tf_learn)
+11. [TensorFlow Cookbook](#cookbook)
     - [Beam search](#beam_search)
     - [Merge](#merge)
     - [Entropy](#entropy)
@@ -109,7 +110,7 @@ This is just tip of the iceberg for what TensorFlow can do. Many problems such a
 
 ## Understanding static and dynamic shapes
 <a name="shapes"></a>
-Tensors in TensorFlow have a static shape attribute which is determined during graph construction. The static shape may be underspecified. For example we might define a float32 tensor of shape [None, 128]:
+Tensors in TensorFlow have a static shape attribute which is determined during graph construction. The static shape may be underspecified. For example we might define a tensor of shape [None, 128]:
 ```python
 import tensorflow as tf
 
@@ -180,6 +181,86 @@ b = tf.placeholder(tf.float32, [None, 10, 32])
 b = reshape(b, [0, [1, 2]])
 ```
 
+## Scopes and when to use them
+<a name="scopes"></a>
+
+Variables and tensors in TensorFlow have a name attribute that is used to identify them in the graph. If you don't specify a name when creating a variable or a tensor, TensorFlow automatically assigns a name for you:
+
+```python
+a = tf.Variable(1)
+print(a.name)  # prints "Variable:0"
+
+b = tf.constant(1)
+print(b.name)  # prints "Const:0"
+```
+
+You can overwrite the default name by explicitly specifying it:
+
+```python
+a = tf.Variable(1, name="a")
+print(a.name)  # prints "a:0"
+
+b = tf.constant(1, name="b")
+print(b.name)  # prints "b:0"
+```
+
+TensorFlow introduces two different context managers to alter the name of tensors and variables. The first is tf.name_scope which modifies the name of tensors:
+
+```python
+with tf.name_scope('scope'):
+  a = tf.get_variable(name="a", shape=[])
+  print(a.name)  # prints "a:0"
+
+  b = tf.constant(1, name="b")
+  print(b.name)  # prints "scope/b:0"
+```
+
+The other is tf.variable_scope which modifies the name of both tensors and variables:
+
+```python
+with tf.variable_scope('scope'):
+  a = tf.get_variable(name="a", shape=[])
+  print(a.name)  # prints "scope/a:0"
+
+  b = tf.constant(1, name="b")
+  print(b.name)  # prints "scope/b:0"
+```
+
+Note that there are two ways to define new variables in TensorFlow, by calling tf.get_variable or by creating a tf.Variable object. But we rarely use tf.Variable in practice.
+
+tf.get_variable enables variable sharing which is useful when building neural network models. Calling tf.get_variable with a new name results in creating a new variable, but if a variable with a same name exists it will raise a ValueError exception, telling us that re-declaring a variable is not allowed:
+
+```python
+with tf.variable_scope('scope'):
+  a1 = tf.get_variable(name="a", shape=[])
+  a2 = tf.get_variable(name="a", shape=[])  # Disallowed
+```
+
+But what if we actually want to reuse a previously declared variable? Variable scopes also provide the functionality to do that:
+```python
+with tf.variable_scope('scope'):
+  a1 = tf.get_variable(name="a", shape=[])
+with tf.variable_scope('scope', reuse=True):
+  a2 = tf.get_variable(name="a", shape=[])  # OK
+```
+
+This becomes handy for example when using built-in neural network layers:
+```python
+features1 = tf.layers.conv2d(image1, filters=32, kernel_size=3)
+# Use the same convolution weights to process the second image:
+with tf.variable_scope(tf.get_variable_scope(), reuse=True):
+  features2 = tf.layers.conv2d(image2, filters=32, kernel_size=3)
+```
+
+This syntax may not look very clean to some. Especially if you want to do lots of variable sharing keeping track of when to define new variables and when to reuse them can be cumbersome and error prone. TensorFlow templates are designed to handle this automatically:
+```python
+conv3x32 = tf.make_template("conv3x32", lambda x: tf.layers.conv2d(x, 32, 3))
+features1 = conv3x32(image1)
+features2 = conv3x32(image2)  # Will reuse the convolution weights.
+```
+You can turn any function to a TensorFlow template. Upon the first call to a template, the variables defined inside the function would be declared and in the consecutive invocations they would automatically get reused.
+
+
 ## Broadcasting the good and the ugly
 <a name="broadcast"></a>
 TensorFlow supports broadcasting elementwise operations. Normally when you want to perform operations like addition and multiplication, you need to make sure that shapes of the operands match, e.g. you can’t add a tensor of shape [3, 2] to a tensor of shape [3, 4]. But there’s a special case and that’s when you have a singular dimension. TensorFlow implicitly tiles the tensor across its singular dimensions to match the shape of the other operand. So it’s valid to add a tensor of shape [3, 2] to a tensor of shape [3, 1]

From a160a6174c07585f99d83cc4c192bdceac30964e Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Tue, 15 Aug 2017 20:41:18 -0700
Subject: [PATCH 21/78] Preface note.

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 92b3240..99b37d8 100644
--- a/README.md
+++ b/README.md
@@ -18,6 +18,8 @@ Table of Contents
     - [Entropy](#entropy)
     - [Make parallel](#make_parallel)
 
+> _My attempt is to gradually expand this series by adding new articles and keep the content up to date with the latest releases of TensorFlow API. If you have suggestions on how to improve this series or find the explanation ambiguous, feel free to create an issue, send patches, or reach out on [twitter](https://twitter.com/VahidK) -Vahid K._
+
 ## TensorFlow Basics
 <a name="basics"></a>
 The most striking difference between TensorFlow and other numerical computation libraries such as numpy is that operations in TensorFlow are symbolic. This is a powerful concept that allows TensorFlow to do all sort of things (e.g. automatic differentiation) that are not possible with imperative libraries such as numpy. But it also comes at the cost of making it harder to grasp. Our attempt here is to demystify TensorFlow and provide some guidelines and best practices for more effective use of TensorFlow.

From 83022cad0d6de2f93eef06c8cdac82ee7909b211 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Tue, 15 Aug 2017 20:43:31 -0700
Subject: [PATCH 22/78] Fixed typo.

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 99b37d8..00be2f8 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@ Table of Contents
     - [Entropy](#entropy)
     - [Make parallel](#make_parallel)
 
-> _My attempt is to gradually expand this series by adding new articles and keep the content up to date with the latest releases of TensorFlow API. If you have suggestions on how to improve this series or find the explanation ambiguous, feel free to create an issue, send patches, or reach out on [twitter](https://twitter.com/VahidK) -Vahid K._
+> _My attempt is to gradually expand this series by adding new articles and keep the content up to date with the latest releases of TensorFlow API. If you have suggestions on how to improve this series or find the explanations ambiguous, feel free to create an issue, send patches, or reach out on [twitter](https://twitter.com/VahidK) -Vahid K._
 
 ## TensorFlow Basics
 <a name="basics"></a>

From 2b9ecc6b0d85ec405b38e1156946d8c3f522b14e Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Wed, 16 Aug 2017 20:40:39 -0700
Subject: [PATCH 23/78] Numerical stability.

---
 README.md | 109 +++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 104 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 00be2f8..70d902d 100644
--- a/README.md
+++ b/README.md
@@ -11,8 +11,9 @@ Table of Contents
 7.  [Prototyping kernels and advanced visualization with Python ops](#python_ops)
 8.  [Multi-GPU processing with data parallelism](#multi_gpu)
 9.  [Debugging TensorFlow models](#debug)
-10. [Building a neural network training framework with learn API](#tf_learn)
-11. [TensorFlow Cookbook](#cookbook)
+10. [Numerical stability in TensorFlow](#stable)
+11. [Building a neural network training framework with learn API](#tf_learn)
+12. [TensorFlow Cookbook](#cookbook)
     - [Beam search](#beam_search)
     - [Merge](#merge)
     - [Entropy](#entropy)
@@ -22,9 +23,9 @@ Table of Contents
 
 ## TensorFlow Basics
 <a name="basics"></a>
-The most striking difference between TensorFlow and other numerical computation libraries such as numpy is that operations in TensorFlow are symbolic. This is a powerful concept that allows TensorFlow to do all sort of things (e.g. automatic differentiation) that are not possible with imperative libraries such as numpy. But it also comes at the cost of making it harder to grasp. Our attempt here is to demystify TensorFlow and provide some guidelines and best practices for more effective use of TensorFlow.
+The most striking difference between TensorFlow and other numerical computation libraries such as NumPy is that operations in TensorFlow are symbolic. This is a powerful concept that allows TensorFlow to do all sort of things (e.g. automatic differentiation) that are not possible with imperative libraries such as NumPy. But it also comes at the cost of making it harder to grasp. Our attempt here is to demystify TensorFlow and provide some guidelines and best practices for more effective use of TensorFlow.
 
-Let's start with a simple example, we want to multiply two random matrices. First we look at an implementation done in numpy:
+Let's start with a simple example, we want to multiply two random matrices. First we look at an implementation done in NumPy:
 ```python
 import numpy as np
 
@@ -48,7 +49,7 @@ z_val = sess.run(z)
 
 print(z_val)
 ```
-Unlike numpy that immediately performs the computation and produces the result, tensorflow only gives us a handle (of type Tensor) to a node in the graph that represents the result. If we try printing the value of z directly, we get something like this:
+Unlike NumPy that immediately performs the computation and produces the result, tensorflow only gives us a handle (of type Tensor) to a node in the graph that represents the result. If we try printing the value of z directly, we get something like this:
 ```
 Tensor("MatMul:0", shape=(10, 10), dtype=float32)
 ```
@@ -829,6 +830,104 @@ which are exactly what we wanted.
 
 TensorFlow summaries, and tfdbg (TensorFlow Debugger) are other tools that can be used for debugging. Please refer to the official docs to learn more.
 
+## Numerical stability in TensorFlow
+<a name="stable"></a>
+When using any numerical computation library such as NumPy or TensorFlow, it's important to note that writing mathematically correct code doesn't necessarily lead to correct results. You also need to make sure that the computations are stable.
+
+Let's start with a simple example. From primary school we know that x * y / y is equal to x for any non zero value of x. But let's see if that's always true in practice:
+```python
+import numpy as np
+
+x = np.float32(1)
+
+y = np.float32(1e-50)  # y would be stored as zero
+z = x * y / y
+
+print(z)  # prints nan
+```
+
+The reason for the incorrect result is that y is simply too small for float32 type. A similar problem occurs when y is too large:
+
+```python
+y = np.float32(1e39)  # y would be stored as inf
+z = x * y / y
+
+print(z)  # prints 0
+```
+
+The smallest positive value that float32 type can represent is 1.4013e-45 and anything below that would be stored as zero. Also, any number beyond 3.40282e+38, would be stored as inf.
+
+```python
+print(np.nextafter(np.float32(0), np.float32(1)))  # prints 1.4013e-45
+print(np.finfo(np.float32).max)  # print 3.40282e+38
+```
+
+To make sure that your computations are stable, you want to avoid values with small or very large absolute value. This may sound very obvious, but these kind of problems can become extremely hard to debug especially when doing gradient descent in TensorFlow. This is because you not only need to make sure that all the values in the forward pass are within the valid range of your data types, but also you need to make sure of the same for the backward pass (during gradient computation).
+
+Let's look at a real example. We want to compute the softmax over a vector of logits. A naive implementation would look something like this:
+```python
+import tensorflow as tf
+
+def unstable_softmax(logits):
+    exp = tf.exp(logits)
+    return exp / tf.reduce_sum(exp)
+
+tf.Session().run(unstable_softmax([1000., 0.]))  # prints [ nan, 0.]
+```
+Note that computing the exponential of logits for relatively small numbers results to gigantic results that are out of float32 range. The largest valid logit for our naive softmax implementation is ln(3.40282e+38) = 88.7, anything beyond that leads to a nan outcome.
+
+But how can we make this more stable? The solution is rather simple. It's easy to see that exp(x - c) / &sum; exp(x - c) = exp(x) / &sum; exp(x). Therefore we can subtract any constant from the logits and the result would remain the same. We choose this constant to be the maximum of logits. This way the domain of the exponential function would be limited to [-inf, 0], and consequently its range would be [0.0, 1.0] which is desirable:
+
+```python
+import tensorflow as tf
+
+def softmax(logits):
+    exp = tf.exp(logits - tf.reduce_max(logits))
+    return exp / tf.reduce_sum(exp)
+
+tf.Session().run(softmax([1000., 0.]))  # prints [ 1., 0.]
+```
+
+Let's look at a more complicated case. Consider we have a classification problem. We use the softmax function to produce probabilities from our logits. We then define our loss function to be the cross entropy between our predictions and the labels. Recall that cross entropy for a categorical distribution can be simply defined as xe(p, q) = -&sum; p_i log(q_i). So a naive implementation of the cross entropy would look like this:
+
+```python
+def unstable_softmax_cross_entropy(labels, logits):
+    logits = tf.log(softmax(logits))
+    return -tf.reduce_sum(labels * logits)
+
+labels = tf.constant([0.5, 0.5])
+logits = tf.constant([1000., 0.])
+
+xe = unstable_softmax_cross_entropy(labels, logits)
+
+print(tf.Session().run(xe))  # prints inf
+```
+
+Note that in this implementation as the softmax output approaches zero, the log's output approaches infinity which causes instability in our computation. We can rewrite this by expanding the softmax and doing some simplifications:
+
+```python
+def softmax_cross_entropy(labels, logits):
+    scaled_logits = logits - tf.reduce_max(logits)
+    normalized_logits = scaled_logits - tf.reduce_logsumexp(scaled_logits)
+    return -tf.reduce_sum(labels * normalized_logits)
+
+labels = tf.constant([0.5, 0.5])
+logits = tf.constant([1000., 0.])
+
+xe = softmax_cross_entropy(labels, logits)
+
+print(tf.Session().run(xe))  # prints 500.0
+```
+
+We can also verify that the gradients are also computed correctly:
+```python
+g = tf.gradients(xe, logits)
+print(tf.Session().run(g))  # prints [0.5, -0.5]
+```
+which is correct.
+
+Let me remind again that extra care must be taken when doing gradient descent to make sure that the range of your functions as well as the gradients for each layer are within a valid range. Exponential and logarithmic functions when used naively are especially problematic because they can map small numbers to enormous ones and the other way around.
+
 ## Building a neural network training framework with learn API
 <a name="tf_learn"></a>
 For simplicity, in most of the examples here we manually create sessions and we don't care about saving and loading checkpoints but this is not how we usually do things in practice. You most probably want to use the learn API to take care of session management and logging. We provide a simple but practical framework in the [code/framework](https://github.com/vahidk/EffectiveTensorFlow/tree/master/code/framework) directory for training neural networks using TensorFlow. In this item we explain how this framework works.

From df02f32b48c5c3fcc54ea05c93183b4f0619867a Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Thu, 17 Aug 2017 21:53:49 -0700
Subject: [PATCH 24/78] Switch to dataset API.

---
 README.md                                     |  2 +-
 code/framework/README.md                      |  2 +-
 code/framework/common/ops.py                  |  8 +-
 code/framework/common/summary.py              |  8 +-
 code/framework/common/utils.py                |  2 +-
 code/framework/dataset/cifar10.py             | 81 ++++++++++---------
 code/framework/dataset/cifar100.py            | 81 ++++++++++---------
 code/framework/dataset/mnist.py               | 77 +++++++++---------
 code/framework/main.py                        | 73 ++++++++---------
 ...onvnet_classifier.py => cnn_classifier.py} | 12 +--
 10 files changed, 173 insertions(+), 173 deletions(-)
 rename code/framework/model/{convnet_classifier.py => cnn_classifier.py} (80%)

diff --git a/README.md b/README.md
index 70d902d..cfd51e4 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ Table of Contents
     - [Entropy](#entropy)
     - [Make parallel](#make_parallel)
 
-> _My attempt is to gradually expand this series by adding new articles and keep the content up to date with the latest releases of TensorFlow API. If you have suggestions on how to improve this series or find the explanations ambiguous, feel free to create an issue, send patches, or reach out on [twitter](https://twitter.com/VahidK) -Vahid K._
+> _My attempt is to gradually expand this series by adding new articles and keep the content up to date with the latest releases of TensorFlow API. If you have suggestions on how to improve this series or find the explanations ambiguous, feel free to create an issue, send patches, or reach out on [twitter](https://twitter.com/VahidK) -V. K._
 
 ## TensorFlow Basics
 <a name="basics"></a>
diff --git a/code/framework/README.md b/code/framework/README.md
index 9beefcc..f136bbb 100644
--- a/code/framework/README.md
+++ b/code/framework/README.md
@@ -26,7 +26,7 @@ In the above snippets you could replace mnist with cifar10 or cifar100 to prepro
 ## Training
 To train an mnist classification model run:
 ```
-python -m main --model=convnet_classifier --dataset=mnist
+python -m main --model=cnn_classifier --dataset=mnist
 ```
 
 To visualize the training logs on Tensorboard run:
diff --git a/code/framework/common/ops.py b/code/framework/common/ops.py
index 3615332..c62b200 100644
--- a/code/framework/common/ops.py
+++ b/code/framework/common/ops.py
@@ -40,7 +40,7 @@ def dense_layers(tensor,
                  name=None,
                  **kwargs):
   """Builds a stack of fully connected layers with optional dropout."""
-  with tf.variable_scope(name, default_name='dense_layers'):
+  with tf.variable_scope(name, default_name="dense_layers"):
     for i, size in enumerate(sizes):
       if i == len(sizes) - 1 and linear_top_layer:
         activation = None
@@ -48,7 +48,7 @@ def dense_layers(tensor,
       tensor = tf.layers.dense(
           tensor,
           size,
-          name='dense_layer_%d' % i,
+          name="dense_layer_%d" % i,
           activation=activation,
           **kwargs)
   return tensor
@@ -58,7 +58,7 @@ def conv_layers(tensor,
                 filters,
                 kernels,
                 pools,
-                padding='same',
+                padding="same",
                 activation=tf.nn.relu,
                 drop_rate=0.0,
                 **kwargs):
@@ -85,7 +85,7 @@ def create_optimizer(optimizer, learning_rate, decay_steps=None, **kwargs):
   if decay_steps:
     learning_rate = tf.train.exponential_decay(
       learning_rate, global_step, decay_steps, 0.5, staircase=True)
-    tf.summary.scalar('learning_rate', learning_rate)
+    tf.summary.scalar("learning_rate", learning_rate)
 
   return tf.contrib.layers.OPTIMIZER_CLS_NAMES[optimizer](
     learning_rate, **kwargs)
diff --git a/code/framework/common/summary.py b/code/framework/common/summary.py
index 84c23a1..cb634b2 100644
--- a/code/framework/common/summary.py
+++ b/code/framework/common/summary.py
@@ -12,7 +12,7 @@
 
 
 def labeled_image(name, images, labels, max_outputs=3, flip_vertical=False,
-                  color='pink', font_size=15):
+                  color="pink", font_size=15):
   """Writes a summary visualizing given images and corresponding labels."""
   def _visualize_image(image, label):
     # Do the actual drawing in python
@@ -22,15 +22,15 @@ def _visualize_image(image, label):
       image = image[::-1,...]
     ax.imshow(image.squeeze())
     ax.text(0, 0, str(label),
-      horizontalalignment='left',
-      verticalalignment='top',
+      horizontalalignment="left",
+      verticalalignment="top",
       color=color,
       fontsize=font_size)
     fig.canvas.draw()
 
     # Write the plot as a memory file.
     buf = io.BytesIO()
-    data = fig.savefig(buf, format='png')
+    data = fig.savefig(buf, format="png")
     buf.seek(0)
 
     # Read the image and convert to numpy array
diff --git a/code/framework/common/utils.py b/code/framework/common/utils.py
index 6f900cc..30a531a 100644
--- a/code/framework/common/utils.py
+++ b/code/framework/common/utils.py
@@ -53,7 +53,7 @@ def _map_fn(inputs, outputs):
   writer.close()
 
 
-def encode_image(data, format='png'):
+def encode_image(data, format="png"):
   """Encodes a numpy array to string."""
   im = PIL.Image.fromarray(data)
   buf = io.BytesIO()
diff --git a/code/framework/dataset/cifar10.py b/code/framework/dataset/cifar10.py
index 4d45561..362e645 100644
--- a/code/framework/dataset/cifar10.py
+++ b/code/framework/dataset/cifar10.py
@@ -16,44 +16,45 @@
 
 from common import utils
 
-REMOTE_URL = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
-LOCAL_DIR = os.path.join('data/cifar10/')
-ARCHIVE_NAME = 'cifar-10-python.tar.gz'
-DATA_DIR = 'cifar-10-batches-py/'
-TRAIN_BATCHES = ['data_batch_%d' % (i + 1) for i in range(5)]
-TEST_BATCHES = ['test_batch']
+REMOTE_URL = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
+LOCAL_DIR = os.path.join("data/cifar10/")
+ARCHIVE_NAME = "cifar-10-python.tar.gz"
+DATA_DIR = "cifar-10-batches-py/"
+TRAIN_BATCHES = ["data_batch_%d" % (i + 1) for i in range(5)]
+TEST_BATCHES = ["test_batch"]
 
 IMAGE_SIZE = 32
 NUM_CLASSES = 10
 
-
-FEATURES = {
-  'image': tf.FixedLenFeature([], tf.string),
-  'label': tf.FixedLenFeature([], tf.int64),
-}
-
 HPARAMS = {
-  'image_size': IMAGE_SIZE,
-  'num_classes': NUM_CLASSES,
+  "image_size": IMAGE_SIZE,
+  "num_classes": NUM_CLASSES,
 }
 
 
 def get_split(split):
   """Returns train/test split paths."""
-  output_data = os.path.join(LOCAL_DIR, 'data_%s.tfrecord' % split)
+  output_data = os.path.join(LOCAL_DIR, "data_%s.tfrecord" % split)
   return output_data
 
 
-def map_features(features):
-  """Adapts read data to model input."""
-  def _decode_image(image):
-    image = tf.to_float(tf.image.decode_image(image, channels=3)) / 255.0
-    image = tf.reshape(image, [IMAGE_SIZE, IMAGE_SIZE, 3])
-    return image
+def create(split):
+  """Create an instance of the dataset object."""
+  return tf.contrib.data.TFRecordDataset(get_split(split))
+
+
+def parser_fn(record):
+  """Parse input record to features and labels."""
+  features = tf.parse_single_example(record, {
+    "image": tf.FixedLenFeature([], tf.string),
+    "label": tf.FixedLenFeature([], tf.int64),
+  })
+
+  image = tf.to_float(tf.image.decode_image(features["image"], 3)) / 255.0
+  image = tf.reshape(image, [IMAGE_SIZE, IMAGE_SIZE, 3])
+  label = features["label"]
 
-  image = tf.map_fn(_decode_image, features['image'], tf.float32)
-  label = features['label']
-  return {'image': image}, {'label': label}
+  return {"image": image}, {"label": label}
 
 
 def _download_data():
@@ -61,10 +62,10 @@ def _download_data():
   if not os.path.exists(LOCAL_DIR):
     os.makedirs(LOCAL_DIR)
   if not os.path.exists(LOCAL_DIR + ARCHIVE_NAME):
-    print('Downloading...')
+    print("Downloading...")
     urllib.request.urlretrieve(REMOTE_URL, LOCAL_DIR + ARCHIVE_NAME)
   if not os.path.exists(LOCAL_DIR + DATA_DIR):
-    print('Extracting files...')
+    print("Extracting files...")
     tar = tarfile.open(LOCAL_DIR + ARCHIVE_NAME)
     tar.extractall(LOCAL_DIR)
     tar.close()
@@ -78,15 +79,15 @@ def _image_iterator(split):
   }[split]
 
   for batch in batches:
-    with open('%s%s%s' % (LOCAL_DIR, DATA_DIR, batch), 'rb') as fo:
+    with open("%s%s%s" % (LOCAL_DIR, DATA_DIR, batch), "rb") as fo:
       dict = cPickle.load(fo)
-      images = np.array(dict['data'])
-      labels = np.array(dict['labels'])
+      images = np.array(dict["data"])
+      labels = np.array(dict["labels"])
 
       num = images.shape[0]
       images = np.reshape(images, [num, 3, IMAGE_SIZE, IMAGE_SIZE])
       images = np.transpose(images, [0, 2, 3, 1])
-      print('Loaded %d examples.' % num)
+      print("Loaded %d examples." % num)
 
       for i in range(num):
         yield utils.encode_image(images[i]), labels[i]
@@ -98,9 +99,9 @@ def _create_example(item):
     image, label = item
     example = tf.train.Example(features=tf.train.Features(
       feature={
-        'image': tf.train.Feature(
+        "image": tf.train.Feature(
           bytes_list=tf.train.BytesList(value=[image])),
-        'label': tf.train.Feature(
+        "label": tf.train.Feature(
           int64_list=tf.train.Int64List(value=[label.astype(np.int64)]))
       }))
     return example
@@ -119,24 +120,24 @@ def _visulize_data(split=tf.estimator.ModeKeys.TRAIN):
   example.ParseFromString(item)
 
   image = utils.decode_image(
-    example.features.feature['image'].bytes_list.value[0])
-  label = example.features.feature['label'].int64_list.value[0]
+    example.features.feature["image"].bytes_list.value[0])
+  label = example.features.feature["label"].int64_list.value[0]
 
   plt.imshow(image.astype(np.uint8))
-  plt.title('Label: %d' % label)
+  plt.title("Label: %d" % label)
   plt.show()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
   if len(sys.argv) != 2:
-    print('Usage: python dataset.cifar10 <convert|visualize>')
+    print("Usage: python dataset.cifar10 <convert|visualize>")
     sys.exit(1)
 
-  if sys.argv[1] == 'convert':
+  if sys.argv[1] == "convert":
     _download_data()
     _convert_data(tf.estimator.ModeKeys.TRAIN)
     _convert_data(tf.estimator.ModeKeys.EVAL)
-  elif sys.argv[1] == 'visualize':
+  elif sys.argv[1] == "visualize":
     _visulize_data()
   else:
-    print('Unknown command', sys.argv[1])
+    print("Unknown command", sys.argv[1])
diff --git a/code/framework/dataset/cifar100.py b/code/framework/dataset/cifar100.py
index 145484e..795eb29 100644
--- a/code/framework/dataset/cifar100.py
+++ b/code/framework/dataset/cifar100.py
@@ -16,44 +16,45 @@
 
 from common import utils
 
-REMOTE_URL = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz'
-LOCAL_DIR = os.path.join('data/cifar100/')
-ARCHIVE_NAME = 'cifar-100-python.tar.gz'
-DATA_DIR = 'cifar-100-python/'
-TRAIN_BATCHES = ['train']
-TEST_BATCHES = ['test']
+REMOTE_URL = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz"
+LOCAL_DIR = os.path.join("data/cifar100/")
+ARCHIVE_NAME = "cifar-100-python.tar.gz"
+DATA_DIR = "cifar-100-python/"
+TRAIN_BATCHES = ["train"]
+TEST_BATCHES = ["test"]
 
 IMAGE_SIZE = 32
 NUM_CLASSES = 100
 
-
-FEATURES = {
-  'image': tf.FixedLenFeature([], tf.string),
-  'label': tf.FixedLenFeature([], tf.int64),
-}
-
 HPARAMS = {
-  'image_size': IMAGE_SIZE,
-  'num_classes': NUM_CLASSES,
+  "image_size": IMAGE_SIZE,
+  "num_classes": NUM_CLASSES,
 }
 
 
 def get_split(split):
   """Returns train/test split paths."""
-  output_data = os.path.join(LOCAL_DIR, 'data_%s.tfrecord' % split)
+  output_data = os.path.join(LOCAL_DIR, "data_%s.tfrecord" % split)
   return output_data
 
 
-def map_features(features):
-  """Adapts read data to model input."""
-  def _decode_image(image):
-    image = tf.to_float(tf.image.decode_image(image, channels=3)) / 255.0
-    image = tf.reshape(image, [IMAGE_SIZE, IMAGE_SIZE, 3])
-    return image
+def create(split):
+  """Create an instance of the dataset object."""
+  return tf.contrib.data.TFRecordDataset(get_split(split))
+
+
+def parser_fn(record):
+  """Parse input record to features and labels."""
+  features = tf.parse_single_example(record, {
+    "image": tf.FixedLenFeature([], tf.string),
+    "label": tf.FixedLenFeature([], tf.int64),
+  })
+
+  image = tf.to_float(tf.image.decode_image(features["image"], 3)) / 255.0
+  image = tf.reshape(image, [IMAGE_SIZE, IMAGE_SIZE, 3])
+  label = features["label"]
 
-  image = tf.map_fn(_decode_image, features['image'], tf.float32)
-  label = features['label']
-  return {'image': image}, {'label': label}
+  return {"image": image}, {"label": label}
 
 
 def _download_data():
@@ -61,10 +62,10 @@ def _download_data():
   if not os.path.exists(LOCAL_DIR):
     os.makedirs(LOCAL_DIR)
   if not os.path.exists(LOCAL_DIR + ARCHIVE_NAME):
-    print('Downloading...')
+    print("Downloading...")
     urllib.request.urlretrieve(REMOTE_URL, LOCAL_DIR + ARCHIVE_NAME)
   if not os.path.exists(LOCAL_DIR + DATA_DIR):
-    print('Extracting files...')
+    print("Extracting files...")
     tar = tarfile.open(LOCAL_DIR + ARCHIVE_NAME)
     tar.extractall(LOCAL_DIR)
     tar.close()
@@ -78,15 +79,15 @@ def _image_iterator(split):
   }[split]
 
   for batch in batches:
-    with open('%s%s%s' % (LOCAL_DIR, DATA_DIR, batch), 'rb') as fo:
+    with open("%s%s%s" % (LOCAL_DIR, DATA_DIR, batch), "rb") as fo:
       dict = cPickle.load(fo)
-      images = np.array(dict['data'])
-      labels = np.array(dict['fine_labels'])
+      images = np.array(dict["data"])
+      labels = np.array(dict["fine_labels"])
 
       num = images.shape[0]
       images = np.reshape(images, [num, 3, IMAGE_SIZE, IMAGE_SIZE])
       images = np.transpose(images, [0, 2, 3, 1])
-      print('Loaded %d examples.' % num)
+      print("Loaded %d examples." % num)
 
       for i in range(num):
         yield utils.encode_image(images[i]), labels[i]
@@ -98,9 +99,9 @@ def _create_example(item):
     image, label = item
     example = tf.train.Example(features=tf.train.Features(
       feature={
-        'image': tf.train.Feature(
+        "image": tf.train.Feature(
           bytes_list=tf.train.BytesList(value=[image])),
-        'label': tf.train.Feature(
+        "label": tf.train.Feature(
           int64_list=tf.train.Int64List(value=[label.astype(np.int64)]))
       }))
     return example
@@ -119,24 +120,24 @@ def _visulize_data(split=tf.estimator.ModeKeys.TRAIN):
   example.ParseFromString(item)
 
   image = utils.decode_image(
-    example.features.feature['image'].bytes_list.value[0])
-  label = example.features.feature['label'].int64_list.value[0]
+    example.features.feature["image"].bytes_list.value[0])
+  label = example.features.feature["label"].int64_list.value[0]
 
   plt.imshow(image.astype(np.uint8))
-  plt.title('Label: %d' % label)
+  plt.title("Label: %d" % label)
   plt.show()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
   if len(sys.argv) != 2:
-    print('Usage: python dataset.cifar100 <convert|visualize>')
+    print("Usage: python dataset.cifar100 <convert|visualize>")
     sys.exit(1)
 
-  if sys.argv[1] == 'convert':
+  if sys.argv[1] == "convert":
     _download_data()
     _convert_data(tf.estimator.ModeKeys.TRAIN)
     _convert_data(tf.estimator.ModeKeys.EVAL)
-  elif sys.argv[1] == 'visualize':
+  elif sys.argv[1] == "visualize":
     _visulize_data()
   else:
-    print('Unknown command', sys.argv[1])
+    print("Unknown command", sys.argv[1])
diff --git a/code/framework/dataset/mnist.py b/code/framework/dataset/mnist.py
index 1a8e052..8b9d473 100644
--- a/code/framework/dataset/mnist.py
+++ b/code/framework/dataset/mnist.py
@@ -15,44 +15,45 @@
 
 from common import utils
 
-REMOTE_URL = 'http://yann.lecun.com/exdb/mnist/'
-LOCAL_DIR = os.path.join('data/mnist/')
-TRAIN_IMAGE_URL = 'train-images-idx3-ubyte.gz'
-TRAIN_LABEL_URL = 'train-labels-idx1-ubyte.gz'
-TEST_IMAGE_URL = 't10k-images-idx3-ubyte.gz'
-TEST_LABEL_URL = 't10k-labels-idx1-ubyte.gz'
+REMOTE_URL = "http://yann.lecun.com/exdb/mnist/"
+LOCAL_DIR = os.path.join("data/mnist/")
+TRAIN_IMAGE_URL = "train-images-idx3-ubyte.gz"
+TRAIN_LABEL_URL = "train-labels-idx1-ubyte.gz"
+TEST_IMAGE_URL = "t10k-images-idx3-ubyte.gz"
+TEST_LABEL_URL = "t10k-labels-idx1-ubyte.gz"
 
 IMAGE_SIZE = 28
 NUM_CLASSES = 10
 
-
-FEATURES = {
-  'image': tf.FixedLenFeature([], tf.string),
-  'label': tf.FixedLenFeature([], tf.int64),
-}
-
 HPARAMS = {
-  'image_size': IMAGE_SIZE,
-  'num_classes': NUM_CLASSES,
+  "image_size": IMAGE_SIZE,
+  "num_classes": NUM_CLASSES,
 }
 
 
 def get_split(split):
   """Returns train/test split paths."""
-  output_data = os.path.join(LOCAL_DIR, 'data_%s.tfrecord' % split)
+  output_data = os.path.join(LOCAL_DIR, "data_%s.tfrecord" % split)
   return output_data
 
 
-def map_features(features):
-  """Adapts read data to model input."""
-  def _decode_image(image):
-    image = tf.to_float(tf.image.decode_image(image, channels=1)) / 255.0
-    image = tf.reshape(image, [IMAGE_SIZE, IMAGE_SIZE, 1])
-    return image
+def create(split):
+  """Create an instance of the dataset object."""
+  return tf.contrib.data.TFRecordDataset(get_split(split))
+
+
+def parser_fn(record):
+  """Parse input record to features and labels."""
+  features = tf.parse_single_example(record, {
+    "image": tf.FixedLenFeature([], tf.string),
+    "label": tf.FixedLenFeature([], tf.int64),
+  })
+
+  image = tf.to_float(tf.image.decode_image(features["image"], 1)) / 255.0
+  image = tf.reshape(image, [IMAGE_SIZE, IMAGE_SIZE, 1])
+  label = features["label"]
 
-  image = tf.map_fn(_decode_image, features['image'], tf.float32)
-  label = features['label']
-  return {'image': image}, {'label': label}
+  return {"image": image}, {"label": label}
 
 
 def _download_data():
@@ -79,16 +80,16 @@ def _image_iterator(split):
     tf.estimator.ModeKeys.EVAL: TEST_LABEL_URL
   }[split]
 
-  with gzip.open(LOCAL_DIR + image_urls, 'rb') as f:
+  with gzip.open(LOCAL_DIR + image_urls, "rb") as f:
     magic, num, rows, cols = struct.unpack(">IIII", f.read(16))
     images = np.frombuffer(f.read(num * rows * cols), dtype=np.uint8)
     images = np.reshape(images, [num, rows, cols])
-    print('Loaded %d images of size [%d, %d].' % (num, rows, cols))
+    print("Loaded %d images of size [%d, %d]." % (num, rows, cols))
 
-  with gzip.open(LOCAL_DIR + label_urls, 'rb') as f:
+  with gzip.open(LOCAL_DIR + label_urls, "rb") as f:
     magic, num = struct.unpack(">II", f.read(8))
     labels = np.frombuffer(f.read(num), dtype=np.int8)
-    print('Loaded %d labels.' % num)
+    print("Loaded %d labels." % num)
 
   for i in range(num):
     yield utils.encode_image(images[i]), labels[i]
@@ -100,9 +101,9 @@ def _create_example(item):
     image, label = item
     example = tf.train.Example(features=tf.train.Features(
       feature={
-        'image': tf.train.Feature(
+        "image": tf.train.Feature(
           bytes_list=tf.train.BytesList(value=[image])),
-        'label': tf.train.Feature(
+        "label": tf.train.Feature(
           int64_list=tf.train.Int64List(value=[label.astype(np.int64)]))
       }))
     return example
@@ -121,24 +122,24 @@ def _visulize_data(split=tf.estimator.ModeKeys.TRAIN):
   example.ParseFromString(item)
 
   image = utils.decode_image(
-    example.features.feature['image'].bytes_list.value[0])
-  label = example.features.feature['label'].int64_list.value[0]
+    example.features.feature["image"].bytes_list.value[0])
+  label = example.features.feature["label"].int64_list.value[0]
 
   plt.imshow(image.squeeze())
-  plt.title('Label: %d' % label)
+  plt.title("Label: %d" % label)
   plt.show()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
   if len(sys.argv) != 2:
-    print('Usage: python dataset.mnist <convert|visualize>')
+    print("Usage: python dataset.mnist <convert|visualize>")
     sys.exit(1)
 
-  if sys.argv[1] == 'convert':
+  if sys.argv[1] == "convert":
     _download_data()
     _convert_data(tf.estimator.ModeKeys.TRAIN)
     _convert_data(tf.estimator.ModeKeys.EVAL)
-  elif sys.argv[1] == 'visualize':
+  elif sys.argv[1] == "visualize":
     _visulize_data()
   else:
-    print('Unknown command', sys.argv[1])
+    print("Unknown command", sys.argv[1])
diff --git a/code/framework/main.py b/code/framework/main.py
index 4f6e3e6..5b20947 100644
--- a/code/framework/main.py
+++ b/code/framework/main.py
@@ -11,39 +11,40 @@
 import dataset.cifar10
 import dataset.cifar100
 import dataset.mnist
-import model.convnet_classifier
+import model.cnn_classifier
 
 tf.logging.set_verbosity(tf.logging.INFO)
 
-tf.flags.DEFINE_string('model', 'convnet_classifier', 'Model name.')
-tf.flags.DEFINE_string('dataset', 'mnist', 'Dataset name.')
-tf.flags.DEFINE_string('output_dir', '', 'Optional output dir.')
-tf.flags.DEFINE_string('schedule', 'train_and_evaluate', 'Schedule.')
-tf.flags.DEFINE_string('hparams', '', 'Hyper parameters.')
-tf.flags.DEFINE_integer('save_summary_steps', 10, 'Summary steps.')
-tf.flags.DEFINE_integer('save_checkpoints_steps', 10, 'Checkpoint steps.')
-tf.flags.DEFINE_integer('eval_steps', None, 'Number of eval steps.')
-tf.flags.DEFINE_integer('eval_frequency', 10, 'Eval frequency.')
-tf.flags.DEFINE_integer('num_gpus', 0, 'Numner of gpus.')
+tf.flags.DEFINE_string("model", "cnn_classifier", "Model name.")
+tf.flags.DEFINE_string("dataset", "mnist", "Dataset name.")
+tf.flags.DEFINE_string("output_dir", "", "Optional output dir.")
+tf.flags.DEFINE_string("schedule", "train_and_evaluate", "Schedule.")
+tf.flags.DEFINE_string("hparams", "", "Hyper parameters.")
+tf.flags.DEFINE_integer("num_epochs", 100, "Number of training epochs.")
+tf.flags.DEFINE_integer("save_summary_steps", 10, "Summary steps.")
+tf.flags.DEFINE_integer("save_checkpoints_steps", 10, "Checkpoint steps.")
+tf.flags.DEFINE_integer("eval_steps", None, "Number of eval steps.")
+tf.flags.DEFINE_integer("eval_frequency", 10, "Eval frequency.")
+tf.flags.DEFINE_integer("num_gpus", 0, "Numner of gpus.")
 
 FLAGS = tf.flags.FLAGS
 learn = tf.contrib.learn
 
 MODELS = {
-  'convnet_classifier': model.convnet_classifier
+  "cnn_classifier": model.cnn_classifier
 }
 
 DATASETS = {
-  'cifar10': dataset.cifar10,
-  'cifar100': dataset.cifar100,
-  'mnist': dataset.mnist,
+  "cifar10": dataset.cifar10,
+  "cifar100": dataset.cifar100,
+  "mnist": dataset.mnist,
 }
 
 HPARAMS = {
-  'optimizer': 'Adam',
-  'learning_rate': 0.001,
-  'decay_steps': 10000,
-  'batch_size': 128
+  "optimizer": "Adam",
+  "learning_rate": 0.001,
+  "decay_steps": 10000,
+  "batch_size": 128
 }
 
 def get_hparams():
@@ -61,19 +62,15 @@ def get_hparams():
 def make_input_fn(mode, params):
   """Returns an input function to read the dataset."""
   def _input_fn():
-    with tf.device(tf.DeviceSpec(device_type='CPU', device_index=0)):
-      dataset = DATASETS[FLAGS.dataset]
-      tensors = learn.read_batch_features(
-        file_pattern=dataset.get_split(mode),
-        batch_size=params.batch_size,
-        features=dataset.FEATURES,
-        reader=tf.TFRecordReader,
-        randomize_input=True if mode == learn.ModeKeys.TRAIN else False,
-        num_epochs=None if mode == learn.ModeKeys.TRAIN else 1,
-        queue_capacity=params.batch_size*3,
-        feature_queue_capacity=params.batch_size*2,
-        reader_num_threads=8 if mode == learn.ModeKeys.TRAIN else 1)
-      features, labels = dataset.map_features(tensors)
+    with tf.device(tf.DeviceSpec(device_type="CPU", device_index=0)):
+      dataset = DATASETS[FLAGS.dataset].create(mode)
+      if mode == learn.ModeKeys.TRAIN:
+        dataset = dataset.repeat(FLAGS.num_epochs)
+        dataset = dataset.shuffle(params.batch_size * 5)
+      dataset = dataset.map(DATASETS[FLAGS.dataset].parser_fn, num_threads=8)
+      dataset = dataset.batch(params.batch_size)
+      iterator = dataset.make_one_shot_iterator()
+      features, labels = iterator.get_next()
     return features, labels
   return _input_fn
 
@@ -98,8 +95,8 @@ def _model_fn(features, labels, mode, params):
         params.optimizer, params.learning_rate, params.decay_steps)
 
       for i in range(FLAGS.num_gpus):
-        with tf.device(tf.DeviceSpec(device_type='GPU', device_index=i)):
-          with tf.name_scope('tower_%d' % i):
+        with tf.device(tf.DeviceSpec(device_type="GPU", device_index=i)):
+          with tf.name_scope("tower_%d" % i):
             with tf.variable_scope(tf.get_variable_scope(), reuse=i > 0):
               device_features = {k: v[i] for k, v in split_features.iteritems()}
               device_labels = {k: v[i] for k, v in split_labels.iteritems()}
@@ -124,7 +121,7 @@ def _model_fn(features, labels, mode, params):
 
       loss = tf.add_n(losses) if losses else None
     else:
-      with tf.device(tf.DeviceSpec(device_type='GPU', device_index=0)):
+      with tf.device(tf.DeviceSpec(device_type="GPU", device_index=0)):
         predictions, loss = model_fn(features, labels, mode, params)
 
         train_op = None
@@ -133,7 +130,7 @@ def _model_fn(features, labels, mode, params):
             params.optimizer, params.learning_rate, params.decay_steps)
           train_op = opt.minimize(loss, global_step=global_step)
 
-    tf.summary.scalar('loss/loss', loss)
+    tf.summary.scalar("loss/loss", loss)
 
     return tf.contrib.learn.ModelFnOps(
       mode=mode,
@@ -163,7 +160,7 @@ def main(unused_argv):
   if FLAGS.output_dir:
     model_dir = FLAGS.output_dir
   else:
-    model_dir = 'output/%s_%s' % (FLAGS.model, FLAGS.dataset)
+    model_dir = "output/%s_%s" % (FLAGS.model, FLAGS.dataset)
   session_config = tf.ConfigProto()
   session_config.allow_soft_placement = True
   session_config.gpu_options.allow_growth = True
@@ -181,5 +178,5 @@ def main(unused_argv):
     hparams=get_hparams())
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
   tf.app.run()
diff --git a/code/framework/model/convnet_classifier.py b/code/framework/model/cnn_classifier.py
similarity index 80%
rename from code/framework/model/convnet_classifier.py
rename to code/framework/model/cnn_classifier.py
index e7464d1..0ea64d6 100644
--- a/code/framework/model/convnet_classifier.py
+++ b/code/framework/model/cnn_classifier.py
@@ -13,14 +13,14 @@
 FLAGS = tf.flags.FLAGS
 
 HPARAMS = {
-  'drop_rate': 0.5
+  "drop_rate": 0.5
 }
 
 
 def model_fn(features, labels, mode, params):
   """CNN classifier model."""
-  images = features['image']
-  labels = labels['label']
+  images = features["image"]
+  labels = labels["label"]
 
   drop_rate = params.drop_rate if mode == tf.estimator.ModeKeys.TRAIN else 0.0
 
@@ -42,13 +42,13 @@ def model_fn(features, labels, mode, params):
   loss = tf.losses.sparse_softmax_cross_entropy(
     labels=labels, logits=logits)
 
-  summary.labeled_image('images', images, predictions)
+  summary.labeled_image("images", images, predictions)
 
-  return {'predictions': predictions}, loss
+  return {"predictions": predictions}, loss
 
 
 def eval_metrics_fn(params):
   """Eval metrics."""
   metrics_dict = {}
-  metrics_dict['accuracy'] = tf.contrib.learn.MetricSpec(tf.metrics.accuracy)
+  metrics_dict["accuracy"] = tf.contrib.learn.MetricSpec(tf.metrics.accuracy)
   return metrics_dict

From 38706191333b8db00b6349d7a9fd83ccc9bb7226 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Thu, 17 Aug 2017 22:08:18 -0700
Subject: [PATCH 25/78] Fixed broken link.

---
 README.md | 68 +++++++++++++++++++++++++++----------------------------
 1 file changed, 34 insertions(+), 34 deletions(-)

diff --git a/README.md b/README.md
index cfd51e4..9b44cd5 100644
--- a/README.md
+++ b/README.md
@@ -210,7 +210,7 @@ print(b.name)  # prints "b:0"
 TensorFlow introduces two different context managers to alter the name of tensors and variables. The first is tf.name_scope which modifies the name of tensors:
 
 ```python
-with tf.name_scope('scope'):
+with tf.name_scope("scope"):
   a = tf.get_variable(name="a", shape=[])
   print(a.name)  # prints "a:0"
 
@@ -221,7 +221,7 @@ with tf.name_scope('scope'):
 The other is tf.variable_scope which modifies the name of both tensors and variables:
 
 ```python
-with tf.variable_scope('scope'):
+with tf.variable_scope("scope"):
   a = tf.get_variable(name="a", shape=[])
   print(a.name)  # prints "scope/a:0"
 
@@ -234,16 +234,16 @@ Note that there are two ways to define new variables in TensorFlow, by calling t
 tf.get_variable enables variable sharing which is useful when building neural network models. Calling tf.get_variable with a new name results in creating a new variable, but if a variable with a same name exists it will raise a ValueError exception, telling us that re-declaring a variable is not allowed:
 
 ```python
-with tf.variable_scope('scope'):
+with tf.variable_scope("scope"):
   a1 = tf.get_variable(name="a", shape=[])
   a2 = tf.get_variable(name="a", shape=[])  # Disallowed
 ```
 
 But what if we actually want to reuse a previously declared variable? Variable scopes also provide the functionality to do that:
 ```python
-with tf.variable_scope('scope'):
+with tf.variable_scope("scope"):
   a1 = tf.get_variable(name="a", shape=[])
-with tf.variable_scope('scope', reuse=True):
+with tf.variable_scope("scope", reuse=True):
   a2 = tf.get_variable(name="a", shape=[])  # OK
 ```
 
@@ -552,20 +552,20 @@ import numpy as np
 import PIL
 import tensorflow as tf
 
-def visualize_labeled_images(images, labels, max_outputs=3, name='image'):
+def visualize_labeled_images(images, labels, max_outputs=3, name="image"):
     def _visualize_image(image, label):
         # Do the actual drawing in python
         fig = plt.figure(figsize=(3, 3), dpi=80)
         ax = fig.add_subplot(111)
         ax.imshow(image[::-1,...])
         ax.text(0, 0, str(label),
-          horizontalalignment='left',
-          verticalalignment='top')
+          horizontalalignment="left",
+          verticalalignment="top")
         fig.canvas.draw()
 
         # Write the plot as a memory file.
         buf = io.BytesIO()
-        data = fig.savefig(buf, format='png')
+        data = fig.savefig(buf, format="png")
         buf.seek(0)
 
         # Read the image and convert to numpy array
@@ -595,7 +595,7 @@ Note that since summaries are usually only evaluated once in a while (not per st
  ```python
  import tensorflow as tf
 
-with tf.device(tf.DeviceSpec(device_type='CPU', device_index=0)):
+with tf.device(tf.DeviceSpec(device_type="CPU", device_index=0)):
     a = tf.random_uniform([1000, 100])
     b = tf.random_uniform([1000, 100])
     c = a + b
@@ -605,7 +605,7 @@ tf.Session().run(c)
 
 The same thing can as simply be done on GPU:
 ```python
-with tf.device(tf.DeviceSpec(device_type='GPU', device_index=0)):
+with tf.device(tf.DeviceSpec(device_type="GPU", device_index=0)):
     a = tf.random_uniform([1000, 100])
     b = tf.random_uniform([1000, 100])
     c = a + b
@@ -618,7 +618,7 @@ split_b = tf.split(b, 2)
 
 split_c = []
 for i in range(2):
-    with tf.device(tf.DeviceSpec(device_type='GPU', device_index=i)):
+    with tf.device(tf.DeviceSpec(device_type="GPU", device_index=i)):
         split_c.append(split_a[i] + split_b[i])
 
 c = tf.concat(split_c, axis=0)
@@ -633,7 +633,7 @@ def make_parallel(fn, num_gpus, **kwargs):
 
     out_split = []
     for i in range(num_gpus):
-        with tf.device(tf.DeviceSpec(device_type='GPU', device_index=i)):
+        with tf.device(tf.DeviceSpec(device_type="GPU", device_index=i)):
             with tf.variable_scope(tf.get_variable_scope(), reuse=i > 0):
                 out_split.append(fn(**{k : v[i] for k, v in in_splits.items()}))
 
@@ -772,7 +772,7 @@ def non_differentiable_entropy(logits):
     probs = tf.nn.softmax(logits)
     return tf.nn.softmax_cross_entropy_with_logits(labels=probs, logits=logits)
 
-w = tf.get_variable('w', shape=[5])
+w = tf.get_variable("w", shape=[5])
 y = -non_differentiable_entropy(w)
 
 opt = tf.train.AdamOptimizer()
@@ -810,7 +810,7 @@ def entropy(logits, dim=-1):
     nplogp = probs * (tf.reduce_logsumexp(logits, dim, keep_dims=True) - logits)
     return tf.reduce_sum(nplogp, dim)
 
-w = tf.get_variable('w', shape=[5])
+w = tf.get_variable("w", shape=[5])
 y = -entropy(w)
 
 print(w.get_shape())
@@ -966,7 +966,7 @@ estimator.fit(input_fn=input_fn, max_steps=...)
 
 and to evaluate the model, call Estimator.evaluate(), providing a set of metrics:
 ```
-metrics = { 'accuracy': tf.metrics.accuracy }
+metrics = { "accuracy": tf.metrics.accuracy }
 estimator.evaluate(input_fn=input_fn, metrics=metrics)
 ```
 
@@ -989,9 +989,9 @@ An even higher level way of running experiments is by using learn_runner.run() f
 ```python
 import tensorflow as tf
 
-tf.flags.DEFINE_string('output_dir', '', 'Optional output dir.')
-tf.flags.DEFINE_string('schedule', 'train_and_evaluate', 'Schedule.')
-tf.flags.DEFINE_string('hparams', '', 'Hyper parameters.')
+tf.flags.DEFINE_string("output_dir", "", "Optional output dir.")
+tf.flags.DEFINE_string("schedule", "train_and_evaluate", "Schedule.")
+tf.flags.DEFINE_string("hparams", "", "Hyper parameters.")
 
 FLAGS = tf.flags.FLAGS
 learn = tf.contrib.learn
@@ -1016,10 +1016,10 @@ def main(unused_argv):
     schedule=FLAGS.schedule,
     hparams=hparams)
 
-if __name__ == '__main__':
+if __name__ == "__main__":
   tf.app.run()
 ```
-The schedule flag decides which member function of the Experiment object gets called. So, if you for example set schedule to 'train_and_evaluate', experiment.train_and_evaluate() would be called.
+The schedule flag decides which member function of the Experiment object gets called. So, if you for example set schedule to "train_and_evaluate", experiment.train_and_evaluate() would be called.
 
 Now let's have a look at how we might actually write an input function. One way to do this  is through python ops (See [this item](#python_ops) for more information on python ops).
 ```python
@@ -1044,8 +1044,8 @@ An alternative way is to write your data as TFRecords format and use the multi-t
 ```python
 def input_fn():
     features = {
-        'image': tf.FixedLenFeature([], tf.string),
-        'label': tf.FixedLenFeature([], tf.int64),
+        "image": tf.FixedLenFeature([], tf.string),
+        "label": tf.FixedLenFeature([], tf.int64),
     }
     tensors = tf.contrib.learn.read_batch_features(
         file_pattern=...,
@@ -1055,29 +1055,29 @@ def input_fn():
 ```
 See [mnist.py](https://github.com/vahidk/EffectiveTensorFlow/blob/master/code/framework/dataset/mnist.py) for an example of how to convert your data to TFRecords format.
 
-The framework also comes with a simple convolutional network classifier in [convnet_classifier.py](https://github.com/vahidk/EffectiveTensorFlow/blob/master/code/framework/model/convnet_classifier.py) that includes an example model and evaluation metric:
+The framework also comes with a simple convolutional network classifier in [cnn_classifier.py](https://github.com/vahidk/EffectiveTensorFlow/blob/master/code/framework/model/cnn_classifier.py) that includes an example model and evaluation metric:
 
 ```python
 def model_fn(features, labels, mode, params):
-  images = features['image']
-  labels = labels['label']
+  images = features["image"]
+  labels = labels["label"]
 
   predictions = ...
   loss = ...
 
-  return {'predictions': predictions}, loss
+  return {"predictions": predictions}, loss
 
 def eval_metrics_fn(params):
   return {
-    'accuracy': tf.contrib.learn.MetricSpec(tf.metrics.accuracy)
+    "accuracy": tf.contrib.learn.MetricSpec(tf.metrics.accuracy)
   }
 ```
 MetricSpec connects our model to the given metric function (e.g. tf.metrics.accuracy). Since our label and predictions solely include a single tensor, everything automagically works. Although if your label/prediction includes multiple tensors, you need to explicitly specify which tensors you want to pass to the metric function:
 ```python
 tf.contrib.learn.MetricSpec(
   tf.metrics.accuracy,
-  label_key='label',
-  prediction_key='predictions')
+  label_key="label",
+  prediction_key="predictions")
 ```
 
 And that's it! This is all you need to get started with TensorFlow learn API. I recommend to have a look at the [source code](https://github.com/vahidk/EffectiveTensorFlow/tree/master/code/framework) and see the official python API to learn more about the learn API.
@@ -1123,7 +1123,7 @@ def batch_gather(tensor, indices):
   return output
 
 def rnn_beam_search(update_fn, initial_state, sequence_length, beam_width,
-                    begin_token_id, end_token_id, name='rnn'):
+                    begin_token_id, end_token_id, name="rnn"):
   """Beam-search decoder for recurrent models.
 
   Args:
@@ -1201,13 +1201,13 @@ def merge(tensors, units, activation=tf.nn.relu, name=None, **kwargs):
     tensors: A list of tensor with the same rank.
     units: Number of units in the projection function.
   """
-  with tf.variable_scope(name, default_name='merge'):
+  with tf.variable_scope(name, default_name="merge"):
     # Apply linear projection to input tensors.
     projs = []
     for i, tensor in enumerate(tensors):
       proj = tf.layers.dense(
           tensor, units, activation=None,
-          name='proj_%d' % i,
+          name="proj_%d" % i,
           **kwargs)
       projs.append(proj)
 
@@ -1260,7 +1260,7 @@ def make_parallel(fn, num_gpus, **kwargs):
 
   out_split = []
   for i in range(num_gpus):
-    with tf.device(tf.DeviceSpec(device_type='GPU', device_index=i)):
+    with tf.device(tf.DeviceSpec(device_type="GPU", device_index=i)):
       with tf.variable_scope(tf.get_variable_scope(), reuse=i > 0):
         out_split.append(fn(**{k : v[i] for k, v in in_splits.items()}))
 

From 7b940f7bd33c3406644333411c452ab8251bf473 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Fri, 18 Aug 2017 21:40:39 -0700
Subject: [PATCH 26/78] Moved framework.

---
 .gitignore                             |   4 -
 .gitmodules                            |   3 +
 README.md                              |   7 +-
 code/framework                         |   1 +
 code/framework/README.md               |  35 -----
 code/framework/__init__.py             |   0
 code/framework/common/__init__.py      |   0
 code/framework/common/ops.py           | 102 --------------
 code/framework/common/summary.py       |  50 -------
 code/framework/common/utils.py         |  70 ----------
 code/framework/dataset/__init__.py     |   0
 code/framework/dataset/cifar10.py      | 143 -------------------
 code/framework/dataset/cifar100.py     | 143 -------------------
 code/framework/dataset/mnist.py        | 145 --------------------
 code/framework/main.py                 | 182 -------------------------
 code/framework/model/__init__.py       |   0
 code/framework/model/cnn_classifier.py |  54 --------
 17 files changed, 10 insertions(+), 929 deletions(-)
 create mode 100644 .gitmodules
 create mode 160000 code/framework
 delete mode 100644 code/framework/README.md
 delete mode 100644 code/framework/__init__.py
 delete mode 100644 code/framework/common/__init__.py
 delete mode 100644 code/framework/common/ops.py
 delete mode 100644 code/framework/common/summary.py
 delete mode 100644 code/framework/common/utils.py
 delete mode 100644 code/framework/dataset/__init__.py
 delete mode 100644 code/framework/dataset/cifar10.py
 delete mode 100644 code/framework/dataset/cifar100.py
 delete mode 100644 code/framework/dataset/mnist.py
 delete mode 100644 code/framework/main.py
 delete mode 100644 code/framework/model/__init__.py
 delete mode 100644 code/framework/model/cnn_classifier.py

diff --git a/.gitignore b/.gitignore
index 021d7a9..e43b0f9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1 @@
 .DS_Store
-.vscode
-*.pyc
-code/framework/data
-code/framework/output
\ No newline at end of file
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..45f0014
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "code/framework"]
+	path = code/framework
+	url = https://github.com/vahidk/TensorflowFramework
diff --git a/README.md b/README.md
index 9b44cd5..22eaf2a 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,12 @@ Table of Contents
     - [Entropy](#entropy)
     - [Make parallel](#make_parallel)
 
-> _My attempt is to gradually expand this series by adding new articles and keep the content up to date with the latest releases of TensorFlow API. If you have suggestions on how to improve this series or find the explanations ambiguous, feel free to create an issue, send patches, or reach out on [twitter](https://twitter.com/VahidK) -V. K._
+__Note__: _My attempt is to gradually expand this series by adding new articles and keep the content up to date with the latest releases of TensorFlow API. If you have suggestions on how to improve this series or find the explanations ambiguous, feel free to create an issue, send patches, or reach out on [twitter](https://twitter.com/VahidK). -V. K._
+
+__Update__: In the latest release, the accompanied framework is moved to it's own [repository](https://github.com/vahidk/TensorflowFramework). To download the framework run:
+```
+git clone https://github.com/vahidk/TensorflowFramework.git
+```
 
 ## TensorFlow Basics
 <a name="basics"></a>
diff --git a/code/framework b/code/framework
new file mode 160000
index 0000000..fd52650
--- /dev/null
+++ b/code/framework
@@ -0,0 +1 @@
+Subproject commit fd5265012b70d418130bbf21535a24680c7559ef
diff --git a/code/framework/README.md b/code/framework/README.md
deleted file mode 100644
index f136bbb..0000000
--- a/code/framework/README.md
+++ /dev/null
@@ -1,35 +0,0 @@
-# Tensorflow training framework
-
-Follow the example in dataset/mnist.py and model/convnet_classifier.py for
-examples of how to define custom datasets and models.
-
-## Install dependencies
-```
-pip install tensorflow numpy pillow matplotlib six
-```
-
-## Preparing datasets
-Currently the framework includes code for preprocessing mnist, cifar10, and cifar100 datasets.
-
-To download and preprocess the mnist dataset run:
-```
-python -m dataset.mnist convert
-```
-
-Run the following to visualize an example:
-```
-python -m dataset.mnist visualize
-```
-
-In the above snippets you could replace mnist with cifar10 or cifar100 to preprocess the respective datasets.
-
-## Training
-To train an mnist classification model run:
-```
-python -m main --model=cnn_classifier --dataset=mnist
-```
-
-To visualize the training logs on Tensorboard run:
-```
-tensorboard --logdir=output
-```
diff --git a/code/framework/__init__.py b/code/framework/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/code/framework/common/__init__.py b/code/framework/common/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/code/framework/common/ops.py b/code/framework/common/ops.py
deleted file mode 100644
index c62b200..0000000
--- a/code/framework/common/ops.py
+++ /dev/null
@@ -1,102 +0,0 @@
-"""Common TensorFlow ops."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import tensorflow as tf
-
-
-def get_shape(tensor):
-  """Returns static shape if available and dynamic shape otherwise."""
-  static_shape = tensor.shape.as_list()
-  dynamic_shape = tf.unstack(tf.shape(tensor))
-  dims = [s[1] if s[0] is None else s[0]
-          for s in zip(static_shape, dynamic_shape)]
-  return dims
-
-
-def reshape(tensor, dims_list):
-  """Reshape the given tensor by collapsing dimensions."""
-  shape = get_shape(tensor)
-  dims_prod = []
-  for dims in dims_list:
-    if isinstance(dims, int):
-      dims_prod.append(shape[dims])
-    elif all([isinstance(shape[d], int) for d in dims]):
-      dims_prod.append(np.prod([shape[d] for d in dims]))
-    else:
-      dims_prod.append(tf.prod([shape[d] for d in dims]))
-  tensor = tf.reshape(tensor, dims_prod)
-  return tensor
-
-
-def dense_layers(tensor,
-                 sizes,
-                 activation=tf.nn.relu,
-                 linear_top_layer=False,
-                 drop_rate=0.0,
-                 name=None,
-                 **kwargs):
-  """Builds a stack of fully connected layers with optional dropout."""
-  with tf.variable_scope(name, default_name="dense_layers"):
-    for i, size in enumerate(sizes):
-      if i == len(sizes) - 1 and linear_top_layer:
-        activation = None
-      tensor = tf.layers.dropout(tensor, drop_rate)
-      tensor = tf.layers.dense(
-          tensor,
-          size,
-          name="dense_layer_%d" % i,
-          activation=activation,
-          **kwargs)
-  return tensor
-
-
-def conv_layers(tensor,
-                filters,
-                kernels,
-                pools,
-                padding="same",
-                activation=tf.nn.relu,
-                drop_rate=0.0,
-                **kwargs):
-  """Builds a stack of convolutional layers with dropout and max pooling."""
-  for fs, ks, ps in zip(filters, kernels, pools):
-    tensor = tf.layers.dropout(tensor, drop_rate)
-    tensor = tf.layers.conv2d(
-      tensor,
-      filters=fs,
-      kernel_size=ks,
-      padding=padding,
-      activation=activation,
-      **kwargs)
-    if ps and ps > 1:
-      tensor = tf.layers.max_pooling2d(
-        inputs=tensor, pool_size=ps, strides=ps, padding=padding)
-  return tensor
-
-
-def create_optimizer(optimizer, learning_rate, decay_steps=None, **kwargs):
-  """Create an optimizer object."""
-  global_step = tf.train.get_or_create_global_step()
-
-  if decay_steps:
-    learning_rate = tf.train.exponential_decay(
-      learning_rate, global_step, decay_steps, 0.5, staircase=True)
-    tf.summary.scalar("learning_rate", learning_rate)
-
-  return tf.contrib.layers.OPTIMIZER_CLS_NAMES[optimizer](
-    learning_rate, **kwargs)
-
-
-def average_gradients(tower_grads):
-  """Compute average gradients."""
-  average_grads = []
-  for grad_and_vars in zip(*tower_grads):
-    grads = [g for g, _ in grad_and_vars]
-    grad = tf.reduce_mean(tf.stack(grads, axis=0), axis=0)
-    v = grad_and_vars[0][1]
-    average_grads.append((grad, v))
-  return average_grads
diff --git a/code/framework/common/summary.py b/code/framework/common/summary.py
deleted file mode 100644
index cb634b2..0000000
--- a/code/framework/common/summary.py
+++ /dev/null
@@ -1,50 +0,0 @@
-"""Utility functions for visualization on tensorboard."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import io
-import matplotlib.pyplot as plt
-import numpy as np
-import PIL
-import tensorflow as tf
-
-
-def labeled_image(name, images, labels, max_outputs=3, flip_vertical=False,
-                  color="pink", font_size=15):
-  """Writes a summary visualizing given images and corresponding labels."""
-  def _visualize_image(image, label):
-    # Do the actual drawing in python
-    fig = plt.figure(figsize=(3, 3), dpi=80)
-    ax = fig.add_subplot(111)
-    if flip_vertical:
-      image = image[::-1,...]
-    ax.imshow(image.squeeze())
-    ax.text(0, 0, str(label),
-      horizontalalignment="left",
-      verticalalignment="top",
-      color=color,
-      fontsize=font_size)
-    fig.canvas.draw()
-
-    # Write the plot as a memory file.
-    buf = io.BytesIO()
-    data = fig.savefig(buf, format="png")
-    buf.seek(0)
-
-    # Read the image and convert to numpy array
-    img = PIL.Image.open(buf)
-    return np.array(img.getdata()).reshape(img.size[0], img.size[1], -1)
-
-  def _visualize_images(images, labels):
-    # Only display the given number of examples in the batch
-    outputs = []
-    for i in range(max_outputs):
-      output = _visualize_image(images[i], labels[i])
-      outputs.append(output)
-    return np.array(outputs, dtype=np.uint8)
-
-  # Run the python op.
-  figs = tf.py_func(_visualize_images, [images, labels], tf.uint8)
-  return tf.summary.image(name, figs)
diff --git a/code/framework/common/utils.py b/code/framework/common/utils.py
deleted file mode 100644
index 30a531a..0000000
--- a/code/framework/common/utils.py
+++ /dev/null
@@ -1,70 +0,0 @@
-"""Auxiliary functions."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import io
-import numpy as np
-import PIL
-import multiprocessing as mp
-import tensorflow as tf
-
-
-def parallel_record_writer(iterator, create_example, path, num_threads=4):
-  """Create a RecordIO file from data for efficient reading."""
-
-  def _queue(inputs):
-    for item in iterator:
-      inputs.put(item)
-    for _ in range(num_threads):
-      inputs.put(None)
-
-  def _map_fn(inputs, outputs):
-    while True:
-      item = inputs.get()
-      if item is None:
-        break
-      example = create_example(item)
-      outputs.put(example)
-    outputs.put(None)
-
-  # Read the inputs.
-  inputs = mp.Queue()
-  mp.Process(target=_queue, args=(inputs,)).start()
-
-  # Convert to tf.Example
-  outputs = mp.Queue()
-  for _ in range(num_threads):
-    mp.Process(target=_map_fn, args=(inputs, outputs)).start()
-
-  # Write the output to file.
-  writer = tf.python_io.TFRecordWriter(path)
-  counter = 0
-  while True:
-    example = outputs.get()
-    if example is None:
-      counter += 1
-      if counter == num_threads:
-        break
-      else:
-        continue
-    writer.write(example.SerializeToString())
-  writer.close()
-
-
-def encode_image(data, format="png"):
-  """Encodes a numpy array to string."""
-  im = PIL.Image.fromarray(data)
-  buf = io.BytesIO()
-  data = im.save(buf, format=format)
-  buf.seek(0)
-  return buf.getvalue()
-
-
-def decode_image(data):
-  """Decode the given image to a numpy array."""
-  buf = io.BytesIO(data)
-  im = PIL.Image.open(buf)
-  data = np.array(im.getdata()).reshape([im.height, im.width, -1])
-  return data
diff --git a/code/framework/dataset/__init__.py b/code/framework/dataset/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/code/framework/dataset/cifar10.py b/code/framework/dataset/cifar10.py
deleted file mode 100644
index 362e645..0000000
--- a/code/framework/dataset/cifar10.py
+++ /dev/null
@@ -1,143 +0,0 @@
-"""Cifar10 dataset preprocessing and specifications."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import matplotlib.pyplot as plt
-import numpy as np
-import os
-from six.moves import cPickle
-from six.moves import urllib
-import struct
-import sys
-import tarfile
-import tensorflow as tf
-
-from common import utils
-
-REMOTE_URL = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
-LOCAL_DIR = os.path.join("data/cifar10/")
-ARCHIVE_NAME = "cifar-10-python.tar.gz"
-DATA_DIR = "cifar-10-batches-py/"
-TRAIN_BATCHES = ["data_batch_%d" % (i + 1) for i in range(5)]
-TEST_BATCHES = ["test_batch"]
-
-IMAGE_SIZE = 32
-NUM_CLASSES = 10
-
-HPARAMS = {
-  "image_size": IMAGE_SIZE,
-  "num_classes": NUM_CLASSES,
-}
-
-
-def get_split(split):
-  """Returns train/test split paths."""
-  output_data = os.path.join(LOCAL_DIR, "data_%s.tfrecord" % split)
-  return output_data
-
-
-def create(split):
-  """Create an instance of the dataset object."""
-  return tf.contrib.data.TFRecordDataset(get_split(split))
-
-
-def parser_fn(record):
-  """Parse input record to features and labels."""
-  features = tf.parse_single_example(record, {
-    "image": tf.FixedLenFeature([], tf.string),
-    "label": tf.FixedLenFeature([], tf.int64),
-  })
-
-  image = tf.to_float(tf.image.decode_image(features["image"], 3)) / 255.0
-  image = tf.reshape(image, [IMAGE_SIZE, IMAGE_SIZE, 3])
-  label = features["label"]
-
-  return {"image": image}, {"label": label}
-
-
-def _download_data():
-  """Download the cifar dataset."""
-  if not os.path.exists(LOCAL_DIR):
-    os.makedirs(LOCAL_DIR)
-  if not os.path.exists(LOCAL_DIR + ARCHIVE_NAME):
-    print("Downloading...")
-    urllib.request.urlretrieve(REMOTE_URL, LOCAL_DIR + ARCHIVE_NAME)
-  if not os.path.exists(LOCAL_DIR + DATA_DIR):
-    print("Extracting files...")
-    tar = tarfile.open(LOCAL_DIR + ARCHIVE_NAME)
-    tar.extractall(LOCAL_DIR)
-    tar.close()
-
-
-def _image_iterator(split):
-  """An iterator that reads and returns images and labels from cifar."""
-  batches = {
-    tf.estimator.ModeKeys.TRAIN: TRAIN_BATCHES,
-    tf.estimator.ModeKeys.EVAL: TEST_BATCHES
-  }[split]
-
-  for batch in batches:
-    with open("%s%s%s" % (LOCAL_DIR, DATA_DIR, batch), "rb") as fo:
-      dict = cPickle.load(fo)
-      images = np.array(dict["data"])
-      labels = np.array(dict["labels"])
-
-      num = images.shape[0]
-      images = np.reshape(images, [num, 3, IMAGE_SIZE, IMAGE_SIZE])
-      images = np.transpose(images, [0, 2, 3, 1])
-      print("Loaded %d examples." % num)
-
-      for i in range(num):
-        yield utils.encode_image(images[i]), labels[i]
-
-
-def _convert_data(split):
-  """Convert the dataset to TFRecord format."""
-  def _create_example(item):
-    image, label = item
-    example = tf.train.Example(features=tf.train.Features(
-      feature={
-        "image": tf.train.Feature(
-          bytes_list=tf.train.BytesList(value=[image])),
-        "label": tf.train.Feature(
-          int64_list=tf.train.Int64List(value=[label.astype(np.int64)]))
-      }))
-    return example
-
-  utils.parallel_record_writer(
-    _image_iterator(split), _create_example, get_split(split))
-
-
-def _visulize_data(split=tf.estimator.ModeKeys.TRAIN):
-  """Read an visualize the first example form the dataset."""
-  path = get_split(split)
-  iterator = tf.python_io.tf_record_iterator(path)
-  item = next(iterator)
-
-  example = tf.train.Example()
-  example.ParseFromString(item)
-
-  image = utils.decode_image(
-    example.features.feature["image"].bytes_list.value[0])
-  label = example.features.feature["label"].int64_list.value[0]
-
-  plt.imshow(image.astype(np.uint8))
-  plt.title("Label: %d" % label)
-  plt.show()
-
-
-if __name__ == "__main__":
-  if len(sys.argv) != 2:
-    print("Usage: python dataset.cifar10 <convert|visualize>")
-    sys.exit(1)
-
-  if sys.argv[1] == "convert":
-    _download_data()
-    _convert_data(tf.estimator.ModeKeys.TRAIN)
-    _convert_data(tf.estimator.ModeKeys.EVAL)
-  elif sys.argv[1] == "visualize":
-    _visulize_data()
-  else:
-    print("Unknown command", sys.argv[1])
diff --git a/code/framework/dataset/cifar100.py b/code/framework/dataset/cifar100.py
deleted file mode 100644
index 795eb29..0000000
--- a/code/framework/dataset/cifar100.py
+++ /dev/null
@@ -1,143 +0,0 @@
-"""Cifar100 dataset preprocessing and specifications."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import matplotlib.pyplot as plt
-import numpy as np
-import os
-from six.moves import cPickle
-from six.moves import urllib
-import struct
-import sys
-import tarfile
-import tensorflow as tf
-
-from common import utils
-
-REMOTE_URL = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz"
-LOCAL_DIR = os.path.join("data/cifar100/")
-ARCHIVE_NAME = "cifar-100-python.tar.gz"
-DATA_DIR = "cifar-100-python/"
-TRAIN_BATCHES = ["train"]
-TEST_BATCHES = ["test"]
-
-IMAGE_SIZE = 32
-NUM_CLASSES = 100
-
-HPARAMS = {
-  "image_size": IMAGE_SIZE,
-  "num_classes": NUM_CLASSES,
-}
-
-
-def get_split(split):
-  """Returns train/test split paths."""
-  output_data = os.path.join(LOCAL_DIR, "data_%s.tfrecord" % split)
-  return output_data
-
-
-def create(split):
-  """Create an instance of the dataset object."""
-  return tf.contrib.data.TFRecordDataset(get_split(split))
-
-
-def parser_fn(record):
-  """Parse input record to features and labels."""
-  features = tf.parse_single_example(record, {
-    "image": tf.FixedLenFeature([], tf.string),
-    "label": tf.FixedLenFeature([], tf.int64),
-  })
-
-  image = tf.to_float(tf.image.decode_image(features["image"], 3)) / 255.0
-  image = tf.reshape(image, [IMAGE_SIZE, IMAGE_SIZE, 3])
-  label = features["label"]
-
-  return {"image": image}, {"label": label}
-
-
-def _download_data():
-  """Download the cifar dataset."""
-  if not os.path.exists(LOCAL_DIR):
-    os.makedirs(LOCAL_DIR)
-  if not os.path.exists(LOCAL_DIR + ARCHIVE_NAME):
-    print("Downloading...")
-    urllib.request.urlretrieve(REMOTE_URL, LOCAL_DIR + ARCHIVE_NAME)
-  if not os.path.exists(LOCAL_DIR + DATA_DIR):
-    print("Extracting files...")
-    tar = tarfile.open(LOCAL_DIR + ARCHIVE_NAME)
-    tar.extractall(LOCAL_DIR)
-    tar.close()
-
-
-def _image_iterator(split):
-  """An iterator that reads and returns images and labels from cifar."""
-  batches = {
-    tf.estimator.ModeKeys.TRAIN: TRAIN_BATCHES,
-    tf.estimator.ModeKeys.EVAL: TEST_BATCHES
-  }[split]
-
-  for batch in batches:
-    with open("%s%s%s" % (LOCAL_DIR, DATA_DIR, batch), "rb") as fo:
-      dict = cPickle.load(fo)
-      images = np.array(dict["data"])
-      labels = np.array(dict["fine_labels"])
-
-      num = images.shape[0]
-      images = np.reshape(images, [num, 3, IMAGE_SIZE, IMAGE_SIZE])
-      images = np.transpose(images, [0, 2, 3, 1])
-      print("Loaded %d examples." % num)
-
-      for i in range(num):
-        yield utils.encode_image(images[i]), labels[i]
-
-
-def _convert_data(split):
-  """Convert the dataset to TFRecord format."""
-  def _create_example(item):
-    image, label = item
-    example = tf.train.Example(features=tf.train.Features(
-      feature={
-        "image": tf.train.Feature(
-          bytes_list=tf.train.BytesList(value=[image])),
-        "label": tf.train.Feature(
-          int64_list=tf.train.Int64List(value=[label.astype(np.int64)]))
-      }))
-    return example
-
-  utils.parallel_record_writer(
-    _image_iterator(split), _create_example, get_split(split))
-
-
-def _visulize_data(split=tf.estimator.ModeKeys.TRAIN):
-  """Read an visualize the first example form the dataset."""
-  path = get_split(split)
-  iterator = tf.python_io.tf_record_iterator(path)
-  item = next(iterator)
-
-  example = tf.train.Example()
-  example.ParseFromString(item)
-
-  image = utils.decode_image(
-    example.features.feature["image"].bytes_list.value[0])
-  label = example.features.feature["label"].int64_list.value[0]
-
-  plt.imshow(image.astype(np.uint8))
-  plt.title("Label: %d" % label)
-  plt.show()
-
-
-if __name__ == "__main__":
-  if len(sys.argv) != 2:
-    print("Usage: python dataset.cifar100 <convert|visualize>")
-    sys.exit(1)
-
-  if sys.argv[1] == "convert":
-    _download_data()
-    _convert_data(tf.estimator.ModeKeys.TRAIN)
-    _convert_data(tf.estimator.ModeKeys.EVAL)
-  elif sys.argv[1] == "visualize":
-    _visulize_data()
-  else:
-    print("Unknown command", sys.argv[1])
diff --git a/code/framework/dataset/mnist.py b/code/framework/dataset/mnist.py
deleted file mode 100644
index 8b9d473..0000000
--- a/code/framework/dataset/mnist.py
+++ /dev/null
@@ -1,145 +0,0 @@
-"""Mnist dataset preprocessing and specifications."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import gzip
-import matplotlib.pyplot as plt
-import numpy as np
-import os
-from six.moves import urllib
-import struct
-import sys
-import tensorflow as tf
-
-from common import utils
-
-REMOTE_URL = "http://yann.lecun.com/exdb/mnist/"
-LOCAL_DIR = os.path.join("data/mnist/")
-TRAIN_IMAGE_URL = "train-images-idx3-ubyte.gz"
-TRAIN_LABEL_URL = "train-labels-idx1-ubyte.gz"
-TEST_IMAGE_URL = "t10k-images-idx3-ubyte.gz"
-TEST_LABEL_URL = "t10k-labels-idx1-ubyte.gz"
-
-IMAGE_SIZE = 28
-NUM_CLASSES = 10
-
-HPARAMS = {
-  "image_size": IMAGE_SIZE,
-  "num_classes": NUM_CLASSES,
-}
-
-
-def get_split(split):
-  """Returns train/test split paths."""
-  output_data = os.path.join(LOCAL_DIR, "data_%s.tfrecord" % split)
-  return output_data
-
-
-def create(split):
-  """Create an instance of the dataset object."""
-  return tf.contrib.data.TFRecordDataset(get_split(split))
-
-
-def parser_fn(record):
-  """Parse input record to features and labels."""
-  features = tf.parse_single_example(record, {
-    "image": tf.FixedLenFeature([], tf.string),
-    "label": tf.FixedLenFeature([], tf.int64),
-  })
-
-  image = tf.to_float(tf.image.decode_image(features["image"], 1)) / 255.0
-  image = tf.reshape(image, [IMAGE_SIZE, IMAGE_SIZE, 1])
-  label = features["label"]
-
-  return {"image": image}, {"label": label}
-
-
-def _download_data():
-  """Download the MNIST dataset."""
-  if not os.path.exists(LOCAL_DIR):
-    os.makedirs(LOCAL_DIR)
-  for name in [
-    TRAIN_IMAGE_URL,
-    TRAIN_LABEL_URL,
-    TEST_IMAGE_URL,
-    TEST_LABEL_URL]:
-    if not os.path.exists(LOCAL_DIR + name):
-      urllib.request.urlretrieve(REMOTE_URL + name, LOCAL_DIR + name)
-
-
-def _image_iterator(split):
-  """An iterator that reads and returns images and labels from MNIST."""
-  image_urls = {
-    tf.estimator.ModeKeys.TRAIN: TRAIN_IMAGE_URL,
-    tf.estimator.ModeKeys.EVAL: TEST_IMAGE_URL
-  }[split]
-  label_urls = {
-    tf.estimator.ModeKeys.TRAIN: TRAIN_LABEL_URL,
-    tf.estimator.ModeKeys.EVAL: TEST_LABEL_URL
-  }[split]
-
-  with gzip.open(LOCAL_DIR + image_urls, "rb") as f:
-    magic, num, rows, cols = struct.unpack(">IIII", f.read(16))
-    images = np.frombuffer(f.read(num * rows * cols), dtype=np.uint8)
-    images = np.reshape(images, [num, rows, cols])
-    print("Loaded %d images of size [%d, %d]." % (num, rows, cols))
-
-  with gzip.open(LOCAL_DIR + label_urls, "rb") as f:
-    magic, num = struct.unpack(">II", f.read(8))
-    labels = np.frombuffer(f.read(num), dtype=np.int8)
-    print("Loaded %d labels." % num)
-
-  for i in range(num):
-    yield utils.encode_image(images[i]), labels[i]
-
-
-def _convert_data(split):
-  """Convert the dataset to TFRecord format."""
-  def _create_example(item):
-    image, label = item
-    example = tf.train.Example(features=tf.train.Features(
-      feature={
-        "image": tf.train.Feature(
-          bytes_list=tf.train.BytesList(value=[image])),
-        "label": tf.train.Feature(
-          int64_list=tf.train.Int64List(value=[label.astype(np.int64)]))
-      }))
-    return example
-
-  utils.parallel_record_writer(
-    _image_iterator(split), _create_example, get_split(split))
-
-
-def _visulize_data(split=tf.estimator.ModeKeys.TRAIN):
-  """Read an visualize the first example form the dataset."""
-  path = get_split(split)
-  iterator = tf.python_io.tf_record_iterator(path)
-  item = next(iterator)
-
-  example = tf.train.Example()
-  example.ParseFromString(item)
-
-  image = utils.decode_image(
-    example.features.feature["image"].bytes_list.value[0])
-  label = example.features.feature["label"].int64_list.value[0]
-
-  plt.imshow(image.squeeze())
-  plt.title("Label: %d" % label)
-  plt.show()
-
-
-if __name__ == "__main__":
-  if len(sys.argv) != 2:
-    print("Usage: python dataset.mnist <convert|visualize>")
-    sys.exit(1)
-
-  if sys.argv[1] == "convert":
-    _download_data()
-    _convert_data(tf.estimator.ModeKeys.TRAIN)
-    _convert_data(tf.estimator.ModeKeys.EVAL)
-  elif sys.argv[1] == "visualize":
-    _visulize_data()
-  else:
-    print("Unknown command", sys.argv[1])
diff --git a/code/framework/main.py b/code/framework/main.py
deleted file mode 100644
index 5b20947..0000000
--- a/code/framework/main.py
+++ /dev/null
@@ -1,182 +0,0 @@
-"""Main module."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-import tensorflow as tf
-
-from common import ops
-import dataset.cifar10
-import dataset.cifar100
-import dataset.mnist
-import model.cnn_classifier
-
-tf.logging.set_verbosity(tf.logging.INFO)
-
-tf.flags.DEFINE_string("model", "cnn_classifier", "Model name.")
-tf.flags.DEFINE_string("dataset", "mnist", "Dataset name.")
-tf.flags.DEFINE_string("output_dir", "", "Optional output dir.")
-tf.flags.DEFINE_string("schedule", "train_and_evaluate", "Schedule.")
-tf.flags.DEFINE_string("hparams", "", "Hyper parameters.")
-tf.flags.DEFINE_integer("num_epochs", 100, "Number of training epochs.")
-tf.flags.DEFINE_integer("save_summary_steps", 10, "Summary steps.")
-tf.flags.DEFINE_integer("save_checkpoints_steps", 10, "Checkpoint steps.")
-tf.flags.DEFINE_integer("eval_steps", None, "Number of eval steps.")
-tf.flags.DEFINE_integer("eval_frequency", 10, "Eval frequency.")
-tf.flags.DEFINE_integer("num_gpus", 0, "Numner of gpus.")
-
-FLAGS = tf.flags.FLAGS
-learn = tf.contrib.learn
-
-MODELS = {
-  "cnn_classifier": model.cnn_classifier
-}
-
-DATASETS = {
-  "cifar10": dataset.cifar10,
-  "cifar100": dataset.cifar100,
-  "mnist": dataset.mnist,
-}
-
-HPARAMS = {
-  "optimizer": "Adam",
-  "learning_rate": 0.001,
-  "decay_steps": 10000,
-  "batch_size": 128
-}
-
-def get_hparams():
-  """Aggregates and returns hyper parameters."""
-  hparams = HPARAMS
-  hparams.update(DATASETS[FLAGS.dataset].HPARAMS)
-  hparams.update(MODELS[FLAGS.model].HPARAMS)
-
-  hparams = tf.contrib.training.HParams(**hparams)
-  hparams.parse(FLAGS.hparams)
-
-  return hparams
-
-
-def make_input_fn(mode, params):
-  """Returns an input function to read the dataset."""
-  def _input_fn():
-    with tf.device(tf.DeviceSpec(device_type="CPU", device_index=0)):
-      dataset = DATASETS[FLAGS.dataset].create(mode)
-      if mode == learn.ModeKeys.TRAIN:
-        dataset = dataset.repeat(FLAGS.num_epochs)
-        dataset = dataset.shuffle(params.batch_size * 5)
-      dataset = dataset.map(DATASETS[FLAGS.dataset].parser_fn, num_threads=8)
-      dataset = dataset.batch(params.batch_size)
-      iterator = dataset.make_one_shot_iterator()
-      features, labels = iterator.get_next()
-    return features, labels
-  return _input_fn
-
-
-def make_model_fn():
-  """Returns a model function."""
-  def _model_fn(features, labels, mode, params):
-    model_fn = MODELS[FLAGS.model].model_fn
-
-    global_step = tf.train.get_or_create_global_step()
-
-    if FLAGS.num_gpus > 0 and mode == learn.ModeKeys.TRAIN:
-      split_features = {k: tf.split(v, FLAGS.num_gpus)
-                        for k, v in features.iteritems()}
-      split_labels = {k: tf.split(v, FLAGS.num_gpus)
-                      for k, v in labels.iteritems()}
-      grads = []
-      predictions = collections.defaultdict(list)
-      losses = []
-
-      opt = ops.create_optimizer(
-        params.optimizer, params.learning_rate, params.decay_steps)
-
-      for i in range(FLAGS.num_gpus):
-        with tf.device(tf.DeviceSpec(device_type="GPU", device_index=i)):
-          with tf.name_scope("tower_%d" % i):
-            with tf.variable_scope(tf.get_variable_scope(), reuse=i > 0):
-              device_features = {k: v[i] for k, v in split_features.iteritems()}
-              device_labels = {k: v[i] for k, v in split_labels.iteritems()}
-
-              device_predictions, device_loss = model_fn(
-                device_features, device_labels, mode, params)
-
-              for k, v in device_predictions.iteritems():
-                predictions[k].append(v)
-
-              if device_loss is not None:
-                losses.append(device_loss)
-
-              device_grads = opt.compute_gradients(device_loss)
-              grads.append(device_grads)
-
-      grads = ops.average_gradients(grads)
-      train_op = opt.apply_gradients(grads, global_step=global_step)
-
-      for k, v in predictions.iteritems():
-        predictions[k] = tf.concat(v, axis=0)
-
-      loss = tf.add_n(losses) if losses else None
-    else:
-      with tf.device(tf.DeviceSpec(device_type="GPU", device_index=0)):
-        predictions, loss = model_fn(features, labels, mode, params)
-
-        train_op = None
-        if mode == learn.ModeKeys.TRAIN:
-          opt = ops.create_optimizer(
-            params.optimizer, params.learning_rate, params.decay_steps)
-          train_op = opt.minimize(loss, global_step=global_step)
-
-    tf.summary.scalar("loss/loss", loss)
-
-    return tf.contrib.learn.ModelFnOps(
-      mode=mode,
-      predictions=predictions,
-      loss=loss,
-      train_op=train_op)
-
-  return _model_fn
-
-
-def experiment_fn(run_config, hparams):
-  """Constructs an experiment object."""
-  estimator = learn.Estimator(
-    model_fn=make_model_fn(), config=run_config, params=hparams)
-  eval_metrics = MODELS[FLAGS.model].eval_metrics_fn(hparams)
-  return learn.Experiment(
-    estimator=estimator,
-    train_input_fn=make_input_fn(learn.ModeKeys.TRAIN, hparams),
-    eval_input_fn=make_input_fn(learn.ModeKeys.EVAL, hparams),
-    eval_metrics=eval_metrics,
-    eval_steps=FLAGS.eval_steps,
-    min_eval_frequency=FLAGS.eval_frequency)
-
-
-def main(unused_argv):
-  """Main entry point."""
-  if FLAGS.output_dir:
-    model_dir = FLAGS.output_dir
-  else:
-    model_dir = "output/%s_%s" % (FLAGS.model, FLAGS.dataset)
-  session_config = tf.ConfigProto()
-  session_config.allow_soft_placement = True
-  session_config.gpu_options.allow_growth = True
-  run_config = learn.RunConfig(
-    model_dir=model_dir,
-    save_summary_steps=FLAGS.save_summary_steps,
-    save_checkpoints_steps=FLAGS.save_checkpoints_steps,
-    save_checkpoints_secs=None,
-    session_config=session_config)
-
-  estimator = learn.learn_runner.run(
-    experiment_fn=experiment_fn,
-    run_config=run_config,
-    schedule=FLAGS.schedule,
-    hparams=get_hparams())
-
-
-if __name__ == "__main__":
-  tf.app.run()
diff --git a/code/framework/model/__init__.py b/code/framework/model/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/code/framework/model/cnn_classifier.py b/code/framework/model/cnn_classifier.py
deleted file mode 100644
index 0ea64d6..0000000
--- a/code/framework/model/cnn_classifier.py
+++ /dev/null
@@ -1,54 +0,0 @@
-"""Simple convolutional neural network classififer."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import tensorflow as tf
-
-from common import ops
-from common import summary
-
-FLAGS = tf.flags.FLAGS
-
-HPARAMS = {
-  "drop_rate": 0.5
-}
-
-
-def model_fn(features, labels, mode, params):
-  """CNN classifier model."""
-  images = features["image"]
-  labels = labels["label"]
-
-  drop_rate = params.drop_rate if mode == tf.estimator.ModeKeys.TRAIN else 0.0
-
-  features = ops.conv_layers(
-    images,
-    filters=[32, 64, 128],
-    kernels=[3, 3, 3],
-    pools=[2, 2, 2])
-
-  features = tf.contrib.layers.flatten(features)
-
-  logits = ops.dense_layers(
-    features, [512, params.num_classes],
-    drop_rate=drop_rate,
-    linear_top_layer=True)
-
-  predictions = tf.argmax(logits, axis=1)
-
-  loss = tf.losses.sparse_softmax_cross_entropy(
-    labels=labels, logits=logits)
-
-  summary.labeled_image("images", images, predictions)
-
-  return {"predictions": predictions}, loss
-
-
-def eval_metrics_fn(params):
-  """Eval metrics."""
-  metrics_dict = {}
-  metrics_dict["accuracy"] = tf.contrib.learn.MetricSpec(tf.metrics.accuracy)
-  return metrics_dict

From 9c96cdc6e445aeff3956ff06407fd095eb614b3d Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Fri, 18 Aug 2017 21:44:51 -0700
Subject: [PATCH 27/78] Fix broken links.

---
 README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 22eaf2a..9fb665c 100644
--- a/README.md
+++ b/README.md
@@ -935,7 +935,7 @@ Let me remind again that extra care must be taken when doing gradient descent to
 
 ## Building a neural network training framework with learn API
 <a name="tf_learn"></a>
-For simplicity, in most of the examples here we manually create sessions and we don't care about saving and loading checkpoints but this is not how we usually do things in practice. You most probably want to use the learn API to take care of session management and logging. We provide a simple but practical framework in the [code/framework](https://github.com/vahidk/EffectiveTensorFlow/tree/master/code/framework) directory for training neural networks using TensorFlow. In this item we explain how this framework works.
+For simplicity, in most of the examples here we manually create sessions and we don't care about saving and loading checkpoints but this is not how we usually do things in practice. You most probably want to use the learn API to take care of session management and logging. We provide a simple but practical [framework](https://github.com/vahidk/TensorflowFramework/tree/master) for training neural networks using TensorFlow. In this item we explain how this framework works.
 
 When experimenting with neural network models you usually have a training/test split. You want to train your model on the training set, and once in a while evaluate it on test set and compute some metrics. You also need to store the model parameters as a checkpoint, and ideally you want to be able to stop and resume training. TensorFlow's learn API is designed to make this job easier, letting us focus on developing the actual model.
 
@@ -1058,9 +1058,9 @@ def input_fn():
         features=features,
         reader=tf.TFRecordReader)
 ```
-See [mnist.py](https://github.com/vahidk/EffectiveTensorFlow/blob/master/code/framework/dataset/mnist.py) for an example of how to convert your data to TFRecords format.
+See [mnist.py](https://github.com/vahidk/TensorflowFramework/blob/master/dataset/mnist.py) for an example of how to convert your data to TFRecords format.
 
-The framework also comes with a simple convolutional network classifier in [cnn_classifier.py](https://github.com/vahidk/EffectiveTensorFlow/blob/master/code/framework/model/cnn_classifier.py) that includes an example model and evaluation metric:
+The framework also comes with a simple convolutional network classifier in [cnn_classifier.py](https://github.com/vahidk/TensorflowFramework/blob/master/model/cnn_classifier.py) that includes an example model and evaluation metric:
 
 ```python
 def model_fn(features, labels, mode, params):
@@ -1085,7 +1085,7 @@ tf.contrib.learn.MetricSpec(
   prediction_key="predictions")
 ```
 
-And that's it! This is all you need to get started with TensorFlow learn API. I recommend to have a look at the [source code](https://github.com/vahidk/EffectiveTensorFlow/tree/master/code/framework) and see the official python API to learn more about the learn API.
+And that's it! This is all you need to get started with TensorFlow learn API. I recommend to have a look at the framework [source code](https://github.com/vahidk/TensorFlowFramework) and see the official python API to learn more about the learn API.
 
 ## TensorFlow Cookbook
 <a name="cookbook"></a>

From f1c7de192966d5907e9bdc09aa5eb9af64cd6c87 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Fri, 18 Aug 2017 23:02:42 -0700
Subject: [PATCH 28/78] Fixed typo.

---
 README.md      | 2 +-
 code/framework | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 9fb665c..22fbba5 100644
--- a/README.md
+++ b/README.md
@@ -21,7 +21,7 @@ Table of Contents
 
 __Note__: _My attempt is to gradually expand this series by adding new articles and keep the content up to date with the latest releases of TensorFlow API. If you have suggestions on how to improve this series or find the explanations ambiguous, feel free to create an issue, send patches, or reach out on [twitter](https://twitter.com/VahidK). -V. K._
 
-__Update__: In the latest release, the accompanied framework is moved to it's own [repository](https://github.com/vahidk/TensorflowFramework). To download the framework run:
+__Update__: The accompanied framework is now moved to its own [repository](https://github.com/vahidk/TensorflowFramework) and can be downloaded separately:
 ```
 git clone https://github.com/vahidk/TensorflowFramework.git
 ```
diff --git a/code/framework b/code/framework
index fd52650..31cf1f0 160000
--- a/code/framework
+++ b/code/framework
@@ -1 +1 @@
-Subproject commit fd5265012b70d418130bbf21535a24680c7559ef
+Subproject commit 31cf1f0896a12a1830fc24796cdf38ad2142435d

From 981fa17a363a281e6470fe9d77b73eeea3e3cdff Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Sat, 19 Aug 2017 09:39:03 -0700
Subject: [PATCH 29/78] Update intro.

---
 .gitignore | 2 ++
 README.md  | 6 ++++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index e43b0f9..00a6dd4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,3 @@
 .DS_Store
+.vscode
+*.pyc
\ No newline at end of file
diff --git a/README.md b/README.md
index 22fbba5..c862061 100644
--- a/README.md
+++ b/README.md
@@ -19,9 +19,11 @@ Table of Contents
     - [Entropy](#entropy)
     - [Make parallel](#make_parallel)
 
-__Note__: _My attempt is to gradually expand this series by adding new articles and keep the content up to date with the latest releases of TensorFlow API. If you have suggestions on how to improve this series or find the explanations ambiguous, feel free to create an issue, send patches, or reach out on [twitter](https://twitter.com/VahidK). -V. K._
+---
 
-__Update__: The accompanied framework is now moved to its own [repository](https://github.com/vahidk/TensorflowFramework) and can be downloaded separately:
+_We aim to gradually expand this series by adding new articles and keep the content up to date with the latest releases of TensorFlow API. If you have suggestions on how to improve this series or find the explanations ambiguous, feel free to create an issue, send patches, or reach out by email._
+
+ _Readers are encouraged to also checkout the accompanied neural network training framework built on top of tf.contrib.learn API. The [framework](https://github.com/vahidk/TensorflowFramework) can be downloaded separately:_
 ```
 git clone https://github.com/vahidk/TensorflowFramework.git
 ```

From c311549dd95415250a53c3d181237ae9065e4215 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Sat, 19 Aug 2017 09:44:44 -0700
Subject: [PATCH 30/78] Fixed typo.

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c862061..62e88cf 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,7 @@ Table of Contents
 
 _We aim to gradually expand this series by adding new articles and keep the content up to date with the latest releases of TensorFlow API. If you have suggestions on how to improve this series or find the explanations ambiguous, feel free to create an issue, send patches, or reach out by email._
 
- _Readers are encouraged to also checkout the accompanied neural network training framework built on top of tf.contrib.learn API. The [framework](https://github.com/vahidk/TensorflowFramework) can be downloaded separately:_
+ _We encourage you to also check out the accompanied neural network training framework built on top of tf.contrib.learn API. The [framework](https://github.com/vahidk/TensorflowFramework) can be downloaded separately:_
 ```
 git clone https://github.com/vahidk/TensorflowFramework.git
 ```

From 7143e942e9bdced836bac6fec61305819547d809 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Sun, 20 Aug 2017 13:46:18 -0700
Subject: [PATCH 31/78] Added Item 2: Take advantage of the overloaded
 operators.

---
 README.md | 95 ++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 84 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 62e88cf..a9100ce 100644
--- a/README.md
+++ b/README.md
@@ -3,17 +3,18 @@
 Table of Contents
 =================
 1.  [TensorFlow Basics](#basics)
-2.  [Understanding static and dynamic shapes](#shapes)
-3.  [Scopes and when to use them](#scopes)
-4.  [Broadcasting the good and the ugly](#broadcast)
-5.  [Understanding order of execution and control dependencies](#control_deps)
-6.  [Control flow operations: conditionals and loops](#control_flow)
-7.  [Prototyping kernels and advanced visualization with Python ops](#python_ops)
-8.  [Multi-GPU processing with data parallelism](#multi_gpu)
-9.  [Debugging TensorFlow models](#debug)
-10. [Numerical stability in TensorFlow](#stable)
-11. [Building a neural network training framework with learn API](#tf_learn)
-12. [TensorFlow Cookbook](#cookbook)
+2.  [Take advantage of the overloaded operators](#overloaded_ops)
+3.  [Understanding static and dynamic shapes](#shapes)
+4.  [Scopes and when to use them](#scopes)
+5.  [Broadcasting the good and the ugly](#broadcast)
+6.  [Understanding order of execution and control dependencies](#control_deps)
+7.  [Control flow operations: conditionals and loops](#control_flow)
+8.  [Prototyping kernels and advanced visualization with Python ops](#python_ops)
+9.  [Multi-GPU processing with data parallelism](#multi_gpu)
+10. [Debugging TensorFlow models](#debug)
+11. [Numerical stability in TensorFlow](#stable)
+12. [Building a neural network training framework with learn API](#tf_learn)
+13. [TensorFlow Cookbook](#cookbook)
     - [Beam search](#beam_search)
     - [Merge](#merge)
     - [Entropy](#entropy)
@@ -118,6 +119,78 @@ Which is a relatively close approximation to our parameters.
 
 This is just tip of the iceberg for what TensorFlow can do. Many problems such a optimizing large neural networks with millions of parameters can be implemented efficiently in TensorFlow in just a few lines of code. TensorFlow takes care of scaling across multiple devices, and threads, and supports a variety of platforms.
 
+## Take advantage of the overloaded operators
+<a name="overloaded_ops"></a>
+Just like NumPy, TensorFlow overloads a number of python operators to make building graphs easier and more readable.
+
+One of the commonly used ops is the slicing operator that can make indexing tensors very easy:
+```python
+z = x[begin:end]  # z = tf.slice(x, [begin], [end-begin])
+```
+Be very careful when using this op though. The slicing op is very inefficient and often better avoided. To understand how inefficient this op can be let's look at an example. We want manually perform reduction across the rows of a matrix:
+```python
+import tensorflow as tf
+import time
+
+x = tf.random_uniform([500, 10])
+
+z = tf.zeros([10])
+for i in range(500):
+    z += x[i]
+
+sess = tf.Session()
+start = time.time()
+sess.run(z)
+print("Took %f seconds." % (time.time() - start))
+```
+On my MacBook Pro, this took 2.67 seconds to run! The reason is that we are calling the slice op 500 times, which is going to be very slow to run. A better choice would have been to use tf.unstack op to slice the matrix into a list of vectors all at once:
+```python
+z = tf.zeros([10])
+for x_i in tf.unstack(x):
+    z += x_i
+```
+This took 0.18 seconds. Of course, the right way to do this simple reduction is to use tf.reduce_sum op:
+```python
+z = tf.reduce_sum(x, axis=0)
+```
+This took 0.008 seconds, which is 300x faster than the original implementation.
+
+TensorFlow also overloads a range of arithmetic and logical operators:
+```python
+z = -x  # z = tf.negative(x)
+z = x + y  # z = tf.add(x, y)
+z = x - y  # z = tf.subtract(x, y)
+z = x * y  # z = tf.mul(x, y)
+z = x / y  # z = tf.div(x, y)
+z = x // y  # z = tf.floordiv(x, y)
+z = x % y  # z = tf.mod(x, y)
+z = x ** y  # z = tf.pow(x, y)
+z = x @ y  # z = tf.matmul(x, y)
+z = x > y  # z = tf.greater(x, y)
+z = x >= y  # z = tf.greater_equal(x, y)
+z = x < y  # z = tf.less(x, y)
+z = x <= y  # z = tf.less_equal(x, y)
+z = abs(x)  # z = tf.abs(x)
+z = x & y  # z = tf.logical_and(x, y)
+z = x | y  # z = tf.logical_or(x, y)
+z = x ^ y  # z = tf.logical_xor(x, y)
+z = ~x  # z = tf.logical_not(x, y)
+```
+
+You can also use the augmented version of these ops. For example `x += y` and `x **= 2` are also valid.
+
+Note that Python doesn't allow overloading "and", "or", and "not" keywords.
+
+TensorFlow also doesn't allow using tensors as booleans, as it may be error prone:
+```python
+x = tf.constant(1.)
+if x:  # This will raise a TypeError error
+    ...
+```
+You can either use tf.cond(x, ...) if you want to check the value of the tensor, or use "if x is None" to check the value of the variable.
+
+Other operators that aren't supported are equal (==) and not equal (!=) operators which are overloaded in NumPy but not in TensorFlow.
+
 ## Understanding static and dynamic shapes
 <a name="shapes"></a>
 Tensors in TensorFlow have a static shape attribute which is determined during graph construction. The static shape may be underspecified. For example we might define a tensor of shape [None, 128]:

From 7044f5f324d93096f8d3592678d37c23ac9c261a Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Sun, 20 Aug 2017 14:01:30 -0700
Subject: [PATCH 32/78] Fixed typo.

---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index a9100ce..2dc519b 100644
--- a/README.md
+++ b/README.md
@@ -121,13 +121,13 @@ This is just tip of the iceberg for what TensorFlow can do. Many problems such a
 
 ## Take advantage of the overloaded operators
 <a name="overloaded_ops"></a>
-Just like NumPy, TensorFlow overloads a number of python operators to make building graphs easier and more readable.
+Just like NumPy, TensorFlow overloads a number of python operators to make building graphs easier and the code more readable.
 
-One of the commonly used ops is the slicing operator that can make indexing tensors very easy:
+The slicing op is one of the overloaded operators that can make indexing tensors very easy:
 ```python
 z = x[begin:end]  # z = tf.slice(x, [begin], [end-begin])
 ```
-Be very careful when using this op though. The slicing op is very inefficient and often better avoided. To understand how inefficient this op can be let's look at an example. We want manually perform reduction across the rows of a matrix:
+Be very careful when using this op though. The slicing op is very inefficient and often better avoided, especially when the number of slices is high. To understand how inefficient this op can be let's look at an example. We want manually perform reduction across the rows of a matrix:
 ```python
 import tensorflow as tf
 import time

From 956865086665e2f6c0b84b2b5a7f2320c27aeaba Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Sun, 20 Aug 2017 14:08:27 -0700
Subject: [PATCH 33/78] Fixed typo.

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 2dc519b..70a3f60 100644
--- a/README.md
+++ b/README.md
@@ -174,7 +174,7 @@ z = abs(x)  # z = tf.abs(x)
 z = x & y  # z = tf.logical_and(x, y)
 z = x | y  # z = tf.logical_or(x, y)
 z = x ^ y  # z = tf.logical_xor(x, y)
-z = ~x  # z = tf.logical_not(x, y)
+z = ~x  # z = tf.logical_not(x)
 ```
 
 You can also use the augmented version of these ops. For example `x += y` and `x **= 2` are also valid.

From 91fa976962239a9e22b08a350af15950f4977121 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Sun, 20 Aug 2017 14:12:39 -0700
Subject: [PATCH 34/78] Added note on tf.equal.

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 70a3f60..db5c692 100644
--- a/README.md
+++ b/README.md
@@ -189,7 +189,7 @@ if x:  # This will raise a TypeError error
 ```
 You can either use tf.cond(x, ...) if you want to check the value of the tensor, or use "if x is None" to check the value of the variable.
 
-Other operators that aren't supported are equal (==) and not equal (!=) operators which are overloaded in NumPy but not in TensorFlow.
+Other operators that aren't supported are equal (==) and not equal (!=) operators which are overloaded in NumPy but not in TensorFlow. Use the function versions instead which are `tf.equal` and `tf.not_equal`.
 
 ## Understanding static and dynamic shapes
 <a name="shapes"></a>

From a2c1fa63efc09fc4cb1f871854142c64dc45398c Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Sun, 20 Aug 2017 17:00:32 -0700
Subject: [PATCH 35/78] Reorder items.

---
 README.md | 153 +++++++++++++++++++++++++++---------------------------
 1 file changed, 77 insertions(+), 76 deletions(-)

diff --git a/README.md b/README.md
index db5c692..e8ffa96 100644
--- a/README.md
+++ b/README.md
@@ -3,10 +3,10 @@
 Table of Contents
 =================
 1.  [TensorFlow Basics](#basics)
-2.  [Take advantage of the overloaded operators](#overloaded_ops)
-3.  [Understanding static and dynamic shapes](#shapes)
-4.  [Scopes and when to use them](#scopes)
-5.  [Broadcasting the good and the ugly](#broadcast)
+2.  [Understanding static and dynamic shapes](#shapes)
+3.  [Scopes and when to use them](#scopes)
+4.  [Broadcasting the good and the ugly](#broadcast)
+5.  [Take advantage of the overloaded operators](#overloaded_ops)
 6.  [Understanding order of execution and control dependencies](#control_deps)
 7.  [Control flow operations: conditionals and loops](#control_flow)
 8.  [Prototyping kernels and advanced visualization with Python ops](#python_ops)
@@ -119,78 +119,6 @@ Which is a relatively close approximation to our parameters.
 
 This is just tip of the iceberg for what TensorFlow can do. Many problems such a optimizing large neural networks with millions of parameters can be implemented efficiently in TensorFlow in just a few lines of code. TensorFlow takes care of scaling across multiple devices, and threads, and supports a variety of platforms.
 
-## Take advantage of the overloaded operators
-<a name="overloaded_ops"></a>
-Just like NumPy, TensorFlow overloads a number of python operators to make building graphs easier and the code more readable.
-
-The slicing op is one of the overloaded operators that can make indexing tensors very easy:
-```python
-z = x[begin:end]  # z = tf.slice(x, [begin], [end-begin])
-```
-Be very careful when using this op though. The slicing op is very inefficient and often better avoided, especially when the number of slices is high. To understand how inefficient this op can be let's look at an example. We want manually perform reduction across the rows of a matrix:
-```python
-import tensorflow as tf
-import time
-
-x = tf.random_uniform([500, 10])
-
-z = tf.zeros([10])
-for i in range(500):
-    z += x[i]
-
-sess = tf.Session()
-start = time.time()
-sess.run(z)
-print("Took %f seconds." % (time.time() - start))
-```
-On my MacBook Pro, this took 2.67 seconds to run! The reason is that we are calling the slice op 500 times, which is going to be very slow to run. A better choice would have been to use tf.unstack op to slice the matrix into a list of vectors all at once:
-```python
-z = tf.zeros([10])
-for x_i in tf.unstack(x):
-    z += x_i
-```
-This took 0.18 seconds. Of course, the right way to do this simple reduction is to use tf.reduce_sum op:
-```python
-z = tf.reduce_sum(x, axis=0)
-```
-This took 0.008 seconds, which is 300x faster than the original implementation.
-
-TensorFlow also overloads a range of arithmetic and logical operators:
-```python
-z = -x  # z = tf.negative(x)
-z = x + y  # z = tf.add(x, y)
-z = x - y  # z = tf.subtract(x, y)
-z = x * y  # z = tf.mul(x, y)
-z = x / y  # z = tf.div(x, y)
-z = x // y  # z = tf.floordiv(x, y)
-z = x % y  # z = tf.mod(x, y)
-z = x ** y  # z = tf.pow(x, y)
-z = x @ y  # z = tf.matmul(x, y)
-z = x > y  # z = tf.greater(x, y)
-z = x >= y  # z = tf.greater_equal(x, y)
-z = x < y  # z = tf.less(x, y)
-z = x <= y  # z = tf.less_equal(x, y)
-z = abs(x)  # z = tf.abs(x)
-z = x & y  # z = tf.logical_and(x, y)
-z = x | y  # z = tf.logical_or(x, y)
-z = x ^ y  # z = tf.logical_xor(x, y)
-z = ~x  # z = tf.logical_not(x)
-```
-
-You can also use the augmented version of these ops. For example `x += y` and `x **= 2` are also valid.
-
-Note that Python doesn't allow overloading "and", "or", and "not" keywords.
-
-TensorFlow also doesn't allow using tensors as booleans, as it may be error prone:
-```python
-x = tf.constant(1.)
-if x:  # This will raise a TypeError error
-    ...
-```
-You can either use tf.cond(x, ...) if you want to check the value of the tensor, or use "if x is None" to check the value of the variable.
-
-Other operators that aren't supported are equal (==) and not equal (!=) operators which are overloaded in NumPy but not in TensorFlow. Use the function versions instead which are `tf.equal` and `tf.not_equal`.
-
 ## Understanding static and dynamic shapes
 <a name="shapes"></a>
 Tensors in TensorFlow have a static shape attribute which is determined during graph construction. The static shape may be underspecified. For example we might define a tensor of shape [None, 128]:
@@ -410,6 +338,79 @@ c = tf.reduce_sum(a + b, 0)
 
 Here the value of c would be [5, 7], and we immediately would guess based on the shape of the result that there’s something wrong. A general rule of thumb is to always specify the dimensions in reduction operations and when using tf.squeeze.
 
+## Take advantage of the overloaded operators
+<a name="overloaded_ops"></a>
+Just like NumPy, TensorFlow overloads a number of python operators to make building graphs easier and the code more readable.
+
+The slicing op is one of the overloaded operators that can make indexing tensors very easy:
+```python
+z = x[begin:end]  # z = tf.slice(x, [begin], [end-begin])
+```
+Be very careful when using this op though. The slicing op is very inefficient and often better avoided, especially when the number of slices is high. To understand how inefficient this op can be let's look at an example. We want manually perform reduction across the rows of a matrix:
+```python
+import tensorflow as tf
+import time
+
+x = tf.random_uniform([500, 10])
+
+z = tf.zeros([10])
+for i in range(500):
+    z += x[i]
+
+sess = tf.Session()
+start = time.time()
+sess.run(z)
+print("Took %f seconds." % (time.time() - start))
+```
+On my MacBook Pro, this took 2.67 seconds to run! The reason is that we are calling the slice op 500 times, which is going to be very slow to run. A better choice would have been to use tf.unstack op to slice the matrix into a list of vectors all at once:
+```python
+z = tf.zeros([10])
+for x_i in tf.unstack(x):
+    z += x_i
+```
+This took 0.18 seconds. Of course, the right way to do this simple reduction is to use tf.reduce_sum op:
+```python
+z = tf.reduce_sum(x, axis=0)
+```
+This took 0.008 seconds, which is 300x faster than the original implementation.
+
+TensorFlow also overloads a range of arithmetic and logical operators:
+```python
+z = -x  # z = tf.negative(x)
+z = x + y  # z = tf.add(x, y)
+z = x - y  # z = tf.subtract(x, y)
+z = x * y  # z = tf.mul(x, y)
+z = x / y  # z = tf.div(x, y)
+z = x // y  # z = tf.floordiv(x, y)
+z = x % y  # z = tf.mod(x, y)
+z = x ** y  # z = tf.pow(x, y)
+z = x @ y  # z = tf.matmul(x, y)
+z = x > y  # z = tf.greater(x, y)
+z = x >= y  # z = tf.greater_equal(x, y)
+z = x < y  # z = tf.less(x, y)
+z = x <= y  # z = tf.less_equal(x, y)
+z = abs(x)  # z = tf.abs(x)
+z = x & y  # z = tf.logical_and(x, y)
+z = x | y  # z = tf.logical_or(x, y)
+z = x ^ y  # z = tf.logical_xor(x, y)
+z = ~x  # z = tf.logical_not(x)
+```
+
+You can also use the augmented version of these ops. For example `x += y` and `x **= 2` are also valid.
+
+Note that Python doesn't allow overloading "and", "or", and "not" keywords.
+
+TensorFlow also doesn't allow using tensors as booleans, as it may be error prone:
+```python
+x = tf.constant(1.)
+if x:  # This will raise a TypeError error
+    ...
+```
+You can either use tf.cond(x, ...) if you want to check the value of the tensor, or use "if x is None" to check the value of the variable.
+
+Other operators that aren't supported are equal (==) and not equal (!=) operators which are overloaded in NumPy but not in TensorFlow. Use the function versions instead which are `tf.equal` and `tf.not_equal`.
+
+
 ## Understanding order of execution and control dependencies
 <a name="control_deps"></a>
 As we discussed in the first item, TensorFlow doesn't immediately run the operations that are defined but rather creates corresponding nodes in a graph that can be evaluated with Session.run() method. This also enables TensorFlow to do optimizations at run time to determine the optimal order of execution and possible trimming of unused nodes. If you only have tf.Tensors in your graph you don't need to worry about dependencies but you most probably have tf.Variables too, and tf.Variables make things much more difficult. My advice to is to only use Variables if Tensors don't do the job. This might not make a lot of sense to you now, so let's start with an example.

From e0db0521e983cf04a358f16aa2721bbfea6226f2 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Mon, 21 Aug 2017 21:04:04 -0700
Subject: [PATCH 36/78] Better explanation of scopes.

---
 README.md | 44 +++++++++++++++++++++++++-------------------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/README.md b/README.md
index e8ffa96..8e7a1e5 100644
--- a/README.md
+++ b/README.md
@@ -195,51 +195,57 @@ b = reshape(b, [0, [1, 2]])
 ## Scopes and when to use them
 <a name="scopes"></a>
 
-Variables and tensors in TensorFlow have a name attribute that is used to identify them in the graph. If you don't specify a name when creating a variable or a tensor, TensorFlow automatically assigns a name for you:
+Variables and tensors in TensorFlow have a name attribute that is used to identify them in the symbolic graph. If you don't specify a name when creating a variable or a tensor, TensorFlow automatically assigns a name for you:
 
 ```python
-a = tf.Variable(1)
-print(a.name)  # prints "Variable:0"
+a = tf.constant(1)
+print(a.name)  # prints "Const:0"
 
-b = tf.constant(1)
-print(b.name)  # prints "Const:0"
+b = tf.Variable(1)
+print(b.name)  # prints "Variable:0"
 ```
 
 You can overwrite the default name by explicitly specifying it:
 
 ```python
-a = tf.Variable(1, name="a")
-print(a.name)  # prints "a:0"
+a = tf.constant(1, name="a")
+print(a.name)  # prints "b:0"
 
-b = tf.constant(1, name="b")
+b = tf.Variable(1, name="b")
 print(b.name)  # prints "b:0"
 ```
 
-TensorFlow introduces two different context managers to alter the name of tensors and variables. The first is tf.name_scope which modifies the name of tensors:
+TensorFlow introduces two different context managers to alter the name of tensors and variables. The first is tf.name_scope:
 
 ```python
 with tf.name_scope("scope"):
-  a = tf.get_variable(name="a", shape=[])
-  print(a.name)  # prints "a:0"
+  a = tf.constant(1, name="a")
+  print(a.name)  # prints "scope/a:0"
 
-  b = tf.constant(1, name="b")
+  b = tf.Variable(1, name="b")
   print(b.name)  # prints "scope/b:0"
+
+  c = tf.get_variable(name="c", shape=[])
+  print(c.name)  # prints "c:0"
 ```
 
-The other is tf.variable_scope which modifies the name of both tensors and variables:
+Note that there are two ways to define new variables in TensorFlow, by creating a tf.Variable object or by calling tf.get_variable. Calling tf.get_variable with a new name results in creating a new variable, but if a variable with the same name exists it will raise a ValueError exception, telling us that re-declaring a variable is not allowed.
+
+tf.name_scope affects the name of tensors and variables created with tf.Variable, but doesn't impact the variables created with tf.get_variable.
+
+Unlike tf.name_scope, tf.variable_scope modifies the name of variables created with tf.get_variable as well:
 
 ```python
 with tf.variable_scope("scope"):
-  a = tf.get_variable(name="a", shape=[])
+  a = tf.constant(1, name="a")
   print(a.name)  # prints "scope/a:0"
 
-  b = tf.constant(1, name="b")
+  b = tf.Variable(1, name="b")
   print(b.name)  # prints "scope/b:0"
-```
 
-Note that there are two ways to define new variables in TensorFlow, by calling tf.get_variable or by creating a tf.Variable object. But we rarely use tf.Variable in practice.
-
-tf.get_variable enables variable sharing which is useful when building neural network models. Calling tf.get_variable with a new name results in creating a new variable, but if a variable with a same name exists it will raise a ValueError exception, telling us that re-declaring a variable is not allowed:
+  c = tf.get_variable(name="c", shape=[])
+  print(c.name)  # prints "scope/c:0"
+```
 
 ```python
 with tf.variable_scope("scope"):

From 1d650e21cc43f218f75fed4567b8f8ce48e73105 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Mon, 21 Aug 2017 22:31:46 -0700
Subject: [PATCH 37/78] Added note about snippets.

---
 README.md      | 2 ++
 code/framework | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 8e7a1e5..03ac2f5 100644
--- a/README.md
+++ b/README.md
@@ -29,6 +29,8 @@ _We aim to gradually expand this series by adding new articles and keep the cont
 git clone https://github.com/vahidk/TensorflowFramework.git
 ```
 
+_If you use Visual Studio Code, make sure to download the [TensorFlow Snippets](https://github.com/vahidk/tensorflow-snippets) extension, which let's you build neural network models and have them running with few keystrokes._
+
 ## TensorFlow Basics
 <a name="basics"></a>
 The most striking difference between TensorFlow and other numerical computation libraries such as NumPy is that operations in TensorFlow are symbolic. This is a powerful concept that allows TensorFlow to do all sort of things (e.g. automatic differentiation) that are not possible with imperative libraries such as NumPy. But it also comes at the cost of making it harder to grasp. Our attempt here is to demystify TensorFlow and provide some guidelines and best practices for more effective use of TensorFlow.
diff --git a/code/framework b/code/framework
index 31cf1f0..f3284af 160000
--- a/code/framework
+++ b/code/framework
@@ -1 +1 @@
-Subproject commit 31cf1f0896a12a1830fc24796cdf38ad2142435d
+Subproject commit f3284af1b1103309045e6caa9a7b543b0e7f18de

From 3af3b03b39f84085bbc2edc66ee1a5f93a8fe21f Mon Sep 17 00:00:00 2001
From: lzhbrian <lzhbrian@gmail.com>
Date: Sat, 26 Aug 2017 20:43:56 +0800
Subject: [PATCH 38/78] Fix some typos

---
 README.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 03ac2f5..6f1ed9e 100644
--- a/README.md
+++ b/README.md
@@ -29,7 +29,7 @@ _We aim to gradually expand this series by adding new articles and keep the cont
 git clone https://github.com/vahidk/TensorflowFramework.git
 ```
 
-_If you use Visual Studio Code, make sure to download the [TensorFlow Snippets](https://github.com/vahidk/tensorflow-snippets) extension, which let's you build neural network models and have them running with few keystrokes._
+_If you use Visual Studio Code, make sure to download the [TensorFlow Snippets](https://github.com/vahidk/tensorflow-snippets) extension, which lets you build neural network models and have them running with few keystrokes._
 
 ## TensorFlow Basics
 <a name="basics"></a>
@@ -119,7 +119,7 @@ By running this piece of code you should see a result close to this:
 ```
 Which is a relatively close approximation to our parameters.
 
-This is just tip of the iceberg for what TensorFlow can do. Many problems such a optimizing large neural networks with millions of parameters can be implemented efficiently in TensorFlow in just a few lines of code. TensorFlow takes care of scaling across multiple devices, and threads, and supports a variety of platforms.
+This is just tip of the iceberg for what TensorFlow can do. Many problems such as optimizing large neural networks with millions of parameters can be implemented efficiently in TensorFlow in just a few lines of code. TensorFlow takes care of scaling across multiple devices, and threads, and supports a variety of platforms.
 
 ## Understanding static and dynamic shapes
 <a name="shapes"></a>
@@ -334,7 +334,7 @@ b = tf.constant([1., 2.])
 c = tf.reduce_sum(a + b)
 ```
 
-What do you think would the value of c would after evaluation? If you guessed 6, that’s wrong. It’s going to be 12. This is because when rank of two tensors don’t match, TensorFlow automatically expands the first dimension of the tensor with lower rank before the elementwise operation, so the result of addition would be [[2, 3], [3, 4]], and the reducing over all parameters would give us 12.
+What do you think the value of c would be after evaluation? If you guessed 6, that’s wrong. It’s going to be 12. This is because when rank of two tensors don’t match, TensorFlow automatically expands the first dimension of the tensor with lower rank before the elementwise operation, so the result of addition would be [[2, 3], [3, 4]], and the reducing over all parameters would give us 12.
 
 The way to avoid this problem is to be as explicit as possible. Had we specified which dimension we would want to reduce across, catching this bug would have been much easier:
 
@@ -354,7 +354,7 @@ The slicing op is one of the overloaded operators that can make indexing tensors
 ```python
 z = x[begin:end]  # z = tf.slice(x, [begin], [end-begin])
 ```
-Be very careful when using this op though. The slicing op is very inefficient and often better avoided, especially when the number of slices is high. To understand how inefficient this op can be let's look at an example. We want manually perform reduction across the rows of a matrix:
+Be very careful when using this op though. The slicing op is very inefficient and often better avoided, especially when the number of slices is high. To understand how inefficient this op can be let's look at an example. We want to manually perform reduction across the rows of a matrix:
 ```python
 import tensorflow as tf
 import time
@@ -1161,7 +1161,7 @@ def eval_metrics_fn(params):
     "accuracy": tf.contrib.learn.MetricSpec(tf.metrics.accuracy)
   }
 ```
-MetricSpec connects our model to the given metric function (e.g. tf.metrics.accuracy). Since our label and predictions solely include a single tensor, everything automagically works. Although if your label/prediction includes multiple tensors, you need to explicitly specify which tensors you want to pass to the metric function:
+MetricSpec connects our model to the given metric function (e.g. tf.metrics.accuracy). Since our label and predictions solely include a single tensor, everything automatically works. Although if your label/prediction includes multiple tensors, you need to explicitly specify which tensors you want to pass to the metric function:
 ```python
 tf.contrib.learn.MetricSpec(
   tf.metrics.accuracy,
@@ -1194,7 +1194,7 @@ def log_prob_from_logits(logits, axis=-1):
 def batch_gather(tensor, indices):
   """Gather in batch from a tensor of arbitrary size.
 
-  In pseduocode this module will produce the following:
+  In pseudocode this module will produce the following:
   output[i] = tf.gather(tensor[i], indices[i])
 
   Args:

From c0c7f8b8f86b9b74a1f784d8ab5591a22b587348 Mon Sep 17 00:00:00 2001
From: lzhbrian <lzhbrian@gmail.com>
Date: Sat, 26 Aug 2017 21:05:22 +0800
Subject: [PATCH 39/78] Add some links to TensorFlow docs which are mentioned

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 6f1ed9e..e417c89 100644
--- a/README.md
+++ b/README.md
@@ -830,7 +830,7 @@ You can also use assertions to validate the value of tensors at runtime:
 ```python
 check_pos = tf.assert_positive(a)
 ```
-See the official docs for a full list of assertion ops.
+See the official docs for a [full list of assertion ops](https://www.tensorflow.org/api_guides/python/check_ops).
 
 ### Logging tensor values with tf.Print
 
@@ -917,7 +917,7 @@ Now if you run the optimizer again with the correct version you can see the fina
 ```
 which are exactly what we wanted.
 
-TensorFlow summaries, and tfdbg (TensorFlow Debugger) are other tools that can be used for debugging. Please refer to the official docs to learn more.
+[TensorFlow summaries](https://www.tensorflow.org/api_guides/python/summary), and [tfdbg (TensorFlow Debugger)](https://www.tensorflow.org/api_guides/python/tfdbg) are other tools that can be used for debugging. Please refer to the official docs to learn more.
 
 ## Numerical stability in TensorFlow
 <a name="stable"></a>

From bf9cbcc1349c308aaca6c71b7255cb59edce636d Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Sat, 26 Aug 2017 22:35:43 -0700
Subject: [PATCH 40/78] Added KL-Divergence recipe.

---
 README.md | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/README.md b/README.md
index e417c89..53bc352 100644
--- a/README.md
+++ b/README.md
@@ -18,6 +18,7 @@ Table of Contents
     - [Beam search](#beam_search)
     - [Merge](#merge)
     - [Entropy](#entropy)
+    - [KL-Divergence](#kld)
     - [Make parallel](#make_parallel)
 
 ---
@@ -1328,6 +1329,29 @@ def entropy(logits, dims=-1):
   return tf.reduce_sum(nplogp, dims)
 ```
 
+## KL-Divergence <a name="kld"></a>
+```python
+def gaussian_kl(q, p=(0., 0.)):
+  """Computes KL divergence between two isotropic Gaussian distributions.
+
+  To ensure numerical stability, this op uses mu, log(sigma^2) to represent
+  the distribution. If q is not provided, it's assumed to be unit Gaussian.
+
+  Args:
+    q: A tuple (mu, log(sigma^2)) representing a multi-variatie Gaussian.
+    p: A tuple (mu, log(sigma^2)) representing a multi-variatie Gaussian.
+  Returns:
+    A tensor representing KL(q, p).
+  """
+  mu1, log_sigma1_sq = q
+  mu2, log_sigma2_sq = p
+  return tf.reduce_sum(
+    0.5 * (log_sigma2_sq - log_sigma1_sq +
+           tf.exp(log_sigma1_sq - log_sigma2_sq) +
+           tf.square(mu1 - mu2) / tf.exp(log_sigma2_sq) -
+           1), axis=-1)
+```
+
 ## Make parallel <a name="make_parallel"></a>
 
 ```python

From 731e8ae44a05f02a5972134e54c9dcb8e9fc8c9a Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Mon, 28 Aug 2017 02:40:11 -0700
Subject: [PATCH 41/78] Reading data.

---
 README.md      | 185 ++++++++++++++++++++++++++++---------------------
 code/framework |   2 +-
 2 files changed, 107 insertions(+), 80 deletions(-)

diff --git a/README.md b/README.md
index 53bc352..c60240c 100644
--- a/README.md
+++ b/README.md
@@ -6,15 +6,16 @@ Table of Contents
 2.  [Understanding static and dynamic shapes](#shapes)
 3.  [Scopes and when to use them](#scopes)
 4.  [Broadcasting the good and the ugly](#broadcast)
-5.  [Take advantage of the overloaded operators](#overloaded_ops)
-6.  [Understanding order of execution and control dependencies](#control_deps)
-7.  [Control flow operations: conditionals and loops](#control_flow)
-8.  [Prototyping kernels and advanced visualization with Python ops](#python_ops)
-9.  [Multi-GPU processing with data parallelism](#multi_gpu)
-10. [Debugging TensorFlow models](#debug)
-11. [Numerical stability in TensorFlow](#stable)
-12. [Building a neural network training framework with learn API](#tf_learn)
-13. [TensorFlow Cookbook](#cookbook)
+5.  [Feeding data to TensorFlow](#data)
+6.  [Take advantage of the overloaded operators](#overloaded_ops)
+7.  [Understanding order of execution and control dependencies](#control_deps)
+8.  [Control flow operations: conditionals and loops](#control_flow)
+9.  [Prototyping kernels and advanced visualization with Python ops](#python_ops)
+10. [Multi-GPU processing with data parallelism](#multi_gpu)
+11. [Debugging TensorFlow models](#debug)
+12. [Numerical stability in TensorFlow](#stable)
+13. [Building a neural network training framework with learn API](#tf_learn)
+14. [TensorFlow Cookbook](#cookbook)
     - [Beam search](#beam_search)
     - [Merge](#merge)
     - [Entropy](#entropy)
@@ -347,6 +348,79 @@ c = tf.reduce_sum(a + b, 0)
 
 Here the value of c would be [5, 7], and we immediately would guess based on the shape of the result that there’s something wrong. A general rule of thumb is to always specify the dimensions in reduction operations and when using tf.squeeze.
 
+## Feeding data to TensorFlow
+<a name="data"></a>
+
+TensorFlow is designed to work efficiently with large amount of data. So it's important not to starve your TensorFlow model in order to maximize its performance. There are various ways that you can feed your data to TensorFlow.
+
+### Constants
+The simplest approach is to embed the data in your graph as a constant:
+```python
+import tensorflow as tf
+import numpy as np
+
+actual_data = np.random.normal(size=[100])
+
+data = tf.constant(actual_data)
+```
+
+This approach can be very efficient, but it's not very flexible. One problem with this approach is that, in order to use the model with another dataset you have to rewrite the graph. Also, you have to load all of your data at once and keep it in memory, which would only work with small datasets.
+
+### Placeholders
+Using placeholders solves both of these problems:
+```python
+import tensorflow as tf
+import numpy as np
+
+data = tf.placeholder(tf.float32)
+
+prediction = tf.square(data) + 1
+
+actual_data = np.random.normal(size=[100])
+
+tf.Session().run(prediction, feed_dic={data: actual_data})
+```
+Placeholder operator returns a tensor whose value is fetched through the feed_dict argument in the Session:run function. Note that running session.run without feeding the value of data in this case will result in an error.
+
+### Python ops
+Another approach to feed the data is by using python ops:
+```python
+def py_input_fn():
+    actual_data = np.random.normal(size=[100])
+    return actual_data
+
+data = tf.py_func(py_input_fn, [], (tf.float32))
+```
+Python ops allow you to convert a regular python function to a TensorFlow operation.
+
+### Dataset API
+The recommended way of reading the data in TensorFlow is through the dataset API.
+```python
+actual_data = np.random.normal(size=[100])
+dataset = tf.contrib.data.Dataset.from_tensor_slices(actual_data)
+data = dataset.make_one_shot_iterator().get_next()
+```
+
+If you need to read your data from file, it may be more efficient to write it in TFrecord format and use TFRecordDataset to read it:
+```python
+dataset = tf.contrib.data.Dataset.TFRecordDataset(path_to_data)
+```
+See the [official docs](https://www.tensorflow.org/api_guides/python/reading_data#Reading_from_files) for an example of how to write your dataset in TFrecord format.
+
+Dataset API allows you to make efficient data processing pipelines easily. For example this is how we process our data in the accompanied framework (See
+[trainer.py](https://github.com/vahidk/TensorflowFramework/blob/master/trainer.py)):
+
+```python
+dataset = ...
+dataset = dataset.cache()
+if mode == tf.estimator.ModeKeys.TRAIN:
+    dataset = dataset.repeat()
+    dataset = dataset.shuffle(batch_size * 5)
+dataset = dataset.map(parse, num_threads=8)
+dataset = dataset.batch(batch_size)
+```
+After reading the data with use Dataset.cache method to cache it into memory for improved efficiency. During training mode, we repeat the dataset indefinitely, allowing us to process the whole dataset many times. We also shuffle the dataset to get batches with different sample distribution. Next, we use the Dataset.map function to perform preprocessing on raw records and convert the data to usable format by the model. We then create batches of samples by calling Dataset.batch.
+
 ## Take advantage of the overloaded operators
 <a name="overloaded_ops"></a>
 Just like NumPy, TensorFlow overloads a number of python operators to make building graphs easier and the code more readable.
@@ -1024,7 +1098,7 @@ For simplicity, in most of the examples here we manually create sessions and we
 
 When experimenting with neural network models you usually have a training/test split. You want to train your model on the training set, and once in a while evaluate it on test set and compute some metrics. You also need to store the model parameters as a checkpoint, and ideally you want to be able to stop and resume training. TensorFlow's learn API is designed to make this job easier, letting us focus on developing the actual model.
 
-The most basic way of using tf.learn API is to use tf.Estimator object directly. You need to define a model function that defines a loss function, a train op and one or a set of predictions:
+The most basic way of using tf.learn API is to use tf.Estimator object directly. You need to define a model function that defines a loss function, a train op, one or a set of predictions, and optinoally a set of metric ops for evaluation:
 ```python
 import tensorflow as tf
 
@@ -1032,35 +1106,36 @@ def model_fn(features, labels, mode, params):
     predictions = ...
     loss = ...
     train_op = ...
-    return tf.contrib.learn.ModelFnOps(
+    metric_ops = ...
+    return tf.estimator.EstimatorSpec(
         mode=mode,
         predictions=predictions,
         loss=loss,
-        train_op=train_op)
+        train_op=train_op,
+        eval_metric_ops=metric_ops)
 
 params = ...
 run_config = tf.contrib.learn.RunConfig(model_dir=FLAGS.output_dir)
-estimator = tf.contrib.learn.Estimator(
+estimator = tf.estimator.Estimator(
     model_fn=model_fn, config=run_config, params=params)
 ```
 
-To train the model you would then simply call Estimator.fit() function while providing an input function to read the data.
+To train the model you would then simply call Estimator.train() function while providing an input function to read the data.
 ```python
 def input_fn():
     features = ...
     labels = ...
     return features, labels
 
-estimator.fit(input_fn=input_fn, max_steps=...)
+estimator.train(input_fn=input_fn, max_steps=...)
 ```
 
-and to evaluate the model, call Estimator.evaluate(), providing a set of metrics:
+and to evaluate the model, simply call Estimator.evaluate():
 ```
-metrics = { "accuracy": tf.metrics.accuracy }
-estimator.evaluate(input_fn=input_fn, metrics=metrics)
+estimator.evaluate(input_fn=input_fn)
 ```
 
-Estimator object might be good enough for simple cases, but TensorFlow provides an even higher level object called Experiment which provides some additional useful functionality. Creating an experiment object is very easy:
+Estimator object might be good enough for simple cases, but TensorFlow provides a higher level object called Experiment which provides some additional useful functionality. Creating an experiment object is very easy:
 
 ```python
 experiment = tf.contrib.learn.Experiment(
@@ -1084,23 +1159,22 @@ tf.flags.DEFINE_string("schedule", "train_and_evaluate", "Schedule.")
 tf.flags.DEFINE_string("hparams", "", "Hyper parameters.")
 
 FLAGS = tf.flags.FLAGS
-learn = tf.contrib.learn
 
 def experiment_fn(run_config, hparams):
-  estimator = learn.Estimator(
+  estimator = tf.estimator.Estimator(
     model_fn=make_model_fn(), config=run_config, params=hparams)
-  return learn.Experiment(
+  return tf.contrib.learn.Experiment(
     estimator=estimator,
-    train_input_fn=make_input_fn(learn.ModeKeys.TRAIN, hparams),
-    eval_input_fn=make_input_fn(learn.ModeKeys.EVAL, hparams),
+    train_input_fn=make_input_fn(tf.estimator.ModeKeys.TRAIN, hparams),
+    eval_input_fn=make_input_fn(tf.estimator.ModeKeys.EVAL, hparams),
     eval_metrics=eval_metrics_fn(hparams))
 
 def main(unused_argv):
-  run_config = learn.RunConfig(model_dir=FLAGS.output_dir)
+  run_config = tf.contrib.learn.RunConfig(model_dir=FLAGS.output_dir)
   hparams = tf.contrib.training.HParams()
   hparams.parse(FLAGS.hparams)
 
-  estimator = learn.learn_runner.run(
+  estimator = tf.contrib.learn.learn_runner.run(
     experiment_fn=experiment_fn,
     run_config=run_config,
     schedule=FLAGS.schedule,
@@ -1111,64 +1185,17 @@ if __name__ == "__main__":
 ```
 The schedule flag decides which member function of the Experiment object gets called. So, if you for example set schedule to "train_and_evaluate", experiment.train_and_evaluate() would be called.
 
-Now let's have a look at how we might actually write an input function. One way to do this  is through python ops (See [this item](#python_ops) for more information on python ops).
-```python
-def input_fn():
-    def _py_input_fn():
-        # read a new example in python
-        feature = ...
-        label = ...
-        return feature, label
-
-    # Convert that to tensors
-    feature, label = tf.py_func(_py_input_fn, [], (tf.string, tf.int64))
-
-    feature_batch, label_batch = tf.train.shuffle_batch(
-        [feature, label], batch_size=..., capacity=...,
-        min_after_dequeue=...)
-
-    return feature_batch, label_batch
-```
+The input function can return two tensors (or dictionaries of tensors) providing the features and labels to be passed to the model.
 
-An alternative way is to write your data as TFRecords format and use the multi-threaded TFRecordReader object to read the data:
 ```python
 def input_fn():
-    features = {
-        "image": tf.FixedLenFeature([], tf.string),
-        "label": tf.FixedLenFeature([], tf.int64),
-    }
-    tensors = tf.contrib.learn.read_batch_features(
-        file_pattern=...,
-        batch_size=...,
-        features=features,
-        reader=tf.TFRecordReader)
+    features = ...
+    labels = ...
+    return features, labels
 ```
-See [mnist.py](https://github.com/vahidk/TensorflowFramework/blob/master/dataset/mnist.py) for an example of how to convert your data to TFRecords format.
-
-The framework also comes with a simple convolutional network classifier in [cnn_classifier.py](https://github.com/vahidk/TensorflowFramework/blob/master/model/cnn_classifier.py) that includes an example model and evaluation metric:
-
-```python
-def model_fn(features, labels, mode, params):
-  images = features["image"]
-  labels = labels["label"]
-
-  predictions = ...
-  loss = ...
-
-  return {"predictions": predictions}, loss
+See [mnist.py](https://github.com/vahidk/TensorflowFramework/blob/master/dataset/mnist.py) for an example of how to read your data with the dataset API. To learn about various ways of reading your data in TensorFlow refer to [this item](#data).
 
-def eval_metrics_fn(params):
-  return {
-    "accuracy": tf.contrib.learn.MetricSpec(tf.metrics.accuracy)
-  }
-```
-MetricSpec connects our model to the given metric function (e.g. tf.metrics.accuracy). Since our label and predictions solely include a single tensor, everything automatically works. Although if your label/prediction includes multiple tensors, you need to explicitly specify which tensors you want to pass to the metric function:
-```python
-tf.contrib.learn.MetricSpec(
-  tf.metrics.accuracy,
-  label_key="label",
-  prediction_key="predictions")
-```
+The framework also comes with a simple convolutional network classifier in [cnn_classifier.py](https://github.com/vahidk/TensorflowFramework/blob/master/model/cnn_classifier.py) that includes an example model.
 
 And that's it! This is all you need to get started with TensorFlow learn API. I recommend to have a look at the framework [source code](https://github.com/vahidk/TensorFlowFramework) and see the official python API to learn more about the learn API.
 
diff --git a/code/framework b/code/framework
index f3284af..82bdcdd 160000
--- a/code/framework
+++ b/code/framework
@@ -1 +1 @@
-Subproject commit f3284af1b1103309045e6caa9a7b543b0e7f18de
+Subproject commit 82bdcdd14856eff6a89d864e56ba7f872a6f99ec

From 8799d046c79a044ff80b2a94d9e61a6ba9549074 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Mon, 28 Aug 2017 02:50:19 -0700
Subject: [PATCH 42/78] Fix typos.

---
 README.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index c60240c..f51273e 100644
--- a/README.md
+++ b/README.md
@@ -364,7 +364,7 @@ actual_data = np.random.normal(size=[100])
 data = tf.constant(actual_data)
 ```
 
-This approach can be very efficient, but it's not very flexible. One problem with this approach is that, in order to use the model with another dataset you have to rewrite the graph. Also, you have to load all of your data at once and keep it in memory, which would only work with small datasets.
+This approach can be very efficient, but it's not very flexible. One problem with this approach is that, in order to use your model with another dataset you have to rewrite the graph. Also, you have to load all of your data at once and keep it in memory which would only work with small datasets.
 
 ### Placeholders
 Using placeholders solves both of these problems:
@@ -380,21 +380,21 @@ actual_data = np.random.normal(size=[100])
 
 tf.Session().run(prediction, feed_dic={data: actual_data})
 ```
-Placeholder operator returns a tensor whose value is fetched through the feed_dict argument in the Session:run function. Note that running session.run without feeding the value of data in this case will result in an error.
+Placeholder operator returns a tensor whose value is fetched through the feed_dict argument in Session.run function. Note that running Session.run without feeding the value of data in this case will result in an error.
 
 ### Python ops
-Another approach to feed the data is by using python ops:
+Another approach to feed the data to TensorFlow is by using Python ops:
 ```python
 def py_input_fn():
     actual_data = np.random.normal(size=[100])
     return actual_data
 
-data = tf.py_func(py_input_fn, [], (tf.float32))
+data = tf.py_func(py_input_fn, [], (tf.float32))/
 ```
-Python ops allow you to convert a regular python function to a TensorFlow operation.
+Python ops allow you to convert a regular Python function to a TensorFlow operation.
 
 ### Dataset API
-The recommended way of reading the data in TensorFlow is through the dataset API.
+The recommended way of reading the data in TensorFlow however is through the dataset API:
 ```python
 actual_data = np.random.normal(size=[100])
 dataset = tf.contrib.data.Dataset.from_tensor_slices(actual_data)
@@ -419,7 +419,7 @@ if mode == tf.estimator.ModeKeys.TRAIN:
 dataset = dataset.map(parse, num_threads=8)
 dataset = dataset.batch(batch_size)
 ```
-After reading the data with use Dataset.cache method to cache it into memory for improved efficiency. During training mode, we repeat the dataset indefinitely, allowing us to process the whole dataset many times. We also shuffle the dataset to get batches with different sample distribution. Next, we use the Dataset.map function to perform preprocessing on raw records and convert the data to usable format by the model. We then create batches of samples by calling Dataset.batch.
+After reading the data, we use Dataset.cache method to cache it into memory for improved efficiency. During the training mode, we repeat the dataset indefinitely. This allows us to process the whole dataset many times. We also shuffle the dataset to get batches with different sample distributions. Next, we use the Dataset.map function to perform preprocessing on raw records and convert the data to a usable format for the model. We then create batches of samples by calling Dataset.batch.
 
 ## Take advantage of the overloaded operators
 <a name="overloaded_ops"></a>

From d377c4f8c97714abd8973d82fbbc336ac47e4b8c Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Mon, 28 Aug 2017 08:39:00 -0700
Subject: [PATCH 43/78] Fix typo.

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index f51273e..d812694 100644
--- a/README.md
+++ b/README.md
@@ -378,7 +378,7 @@ prediction = tf.square(data) + 1
 
 actual_data = np.random.normal(size=[100])
 
-tf.Session().run(prediction, feed_dic={data: actual_data})
+tf.Session().run(prediction, feed_dict={data: actual_data})
 ```
 Placeholder operator returns a tensor whose value is fetched through the feed_dict argument in Session.run function. Note that running Session.run without feeding the value of data in this case will result in an error.
 
@@ -389,7 +389,7 @@ def py_input_fn():
     actual_data = np.random.normal(size=[100])
     return actual_data
 
-data = tf.py_func(py_input_fn, [], (tf.float32))/
+data = tf.py_func(py_input_fn, [], (tf.float32))
 ```
 Python ops allow you to convert a regular Python function to a TensorFlow operation.
 

From 1a669bb8fc9689ba4caf8235f8b941ed926ae327 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Tue, 29 Aug 2017 17:03:39 -0700
Subject: [PATCH 44/78] Added leaky relu recipe.

---
 README.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/README.md b/README.md
index d812694..17972c5 100644
--- a/README.md
+++ b/README.md
@@ -21,6 +21,7 @@ Table of Contents
     - [Entropy](#entropy)
     - [KL-Divergence](#kld)
     - [Make parallel](#make_parallel)
+    - [Leaky Relu](#leaky_relu)
 
 ---
 
@@ -1406,3 +1407,10 @@ def make_parallel(fn, num_gpus, **kwargs):
 
   return tf.concat(out_split, axis=0)
 ```
+
+## Leaky relu <a name="leaky_relu"></a>
+```python
+def leaky_relu(tensor, alpha=0.1):
+    """Computes the leaky rectified linear activation."""
+    retrun tf.maximum(x, alpha * x)
+```

From 36e9303d75cf30694af7eda702ceccc479d07d9b Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Tue, 29 Aug 2017 23:01:26 -0700
Subject: [PATCH 45/78] Updated readme.

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 17972c5..14aa778 100644
--- a/README.md
+++ b/README.md
@@ -1412,5 +1412,5 @@ def make_parallel(fn, num_gpus, **kwargs):
 ```python
 def leaky_relu(tensor, alpha=0.1):
     """Computes the leaky rectified linear activation."""
-    retrun tf.maximum(x, alpha * x)
+    return tf.maximum(x, alpha * x)
 ```

From 67b0117e1b8e15b52188556a1519f98f4e5d4a33 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Thu, 31 Aug 2017 10:27:54 -0700
Subject: [PATCH 46/78] Update beam_search recipe.

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 14aa778..e5d190a 100644
--- a/README.md
+++ b/README.md
@@ -1264,7 +1264,7 @@ def rnn_beam_search(update_fn, initial_state, sequence_length, beam_width,
   sel_sum_logprobs = tf.log([[1.] + [0.] * (beam_width - 1)])
 
   ids = tf.tile([[begin_token_id]], [batch_size, beam_width])
-  sel_ids = tf.expand_dims(ids, axis=2)
+  sel_ids = tf.zeros([batch_size, beam_width, 0], dtype=ids.dtype)
 
   mask = tf.ones([batch_size, beam_width], dtype=tf.float32)
 

From 208ac5f3ed34403833c21277ff548b57bef09c29 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Fri, 1 Sep 2017 00:15:40 -0700
Subject: [PATCH 47/78] Updated framework section.

---
 README.md | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index e5d190a..dfd3161 100644
--- a/README.md
+++ b/README.md
@@ -1121,7 +1121,7 @@ estimator = tf.estimator.Estimator(
     model_fn=model_fn, config=run_config, params=params)
 ```
 
-To train the model you would then simply call Estimator.train() function while providing an input function to read the data.
+To train the model you would then simply call Estimator.train() function while providing an input function to read the data:
 ```python
 def input_fn():
     features = ...
@@ -1142,11 +1142,10 @@ Estimator object might be good enough for simple cases, but TensorFlow provides
 experiment = tf.contrib.learn.Experiment(
     estimator=estimator,
     train_input_fn=train_input_fn,
-    eval_input_fn=eval_input_fn,
-    eval_metrics=eval_metrics)
+    eval_input_fn=eval_input_fn)
 ```
 
-Now we can call train_and_evaluate function to compute the metrics while training.
+Now we can call train_and_evaluate function to compute the metrics while training:
 ```
 experiment.train_and_evaluate()
 ```
@@ -1163,12 +1162,13 @@ FLAGS = tf.flags.FLAGS
 
 def experiment_fn(run_config, hparams):
   estimator = tf.estimator.Estimator(
-    model_fn=make_model_fn(), config=run_config, params=hparams)
+    model_fn=make_model_fn(),
+    config=run_config,
+    params=hparams)
   return tf.contrib.learn.Experiment(
     estimator=estimator,
     train_input_fn=make_input_fn(tf.estimator.ModeKeys.TRAIN, hparams),
-    eval_input_fn=make_input_fn(tf.estimator.ModeKeys.EVAL, hparams),
-    eval_metrics=eval_metrics_fn(hparams))
+    eval_input_fn=make_input_fn(tf.estimator.ModeKeys.EVAL, hparams))
 
 def main(unused_argv):
   run_config = tf.contrib.learn.RunConfig(model_dir=FLAGS.output_dir)
@@ -1186,8 +1186,7 @@ if __name__ == "__main__":
 ```
 The schedule flag decides which member function of the Experiment object gets called. So, if you for example set schedule to "train_and_evaluate", experiment.train_and_evaluate() would be called.
 
-The input function can return two tensors (or dictionaries of tensors) providing the features and labels to be passed to the model.
-
+The input function returns two tensors (or dictionaries of tensors) providing the features and labels to be passed to the model:
 ```python
 def input_fn():
     features = ...

From 1ee9c25edceaa9189c46c59d816ad5ef7d2bcb01 Mon Sep 17 00:00:00 2001
From: "Yubing Dong (Tom)" <tomtung@users.noreply.github.com>
Date: Mon, 4 Sep 2017 18:53:10 -0700
Subject: [PATCH 48/78] Correct typo

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index dfd3161..5e1317c 100644
--- a/README.md
+++ b/README.md
@@ -214,7 +214,7 @@ You can overwrite the default name by explicitly specifying it:
 
 ```python
 a = tf.constant(1, name="a")
-print(a.name)  # prints "b:0"
+print(a.name)  # prints "a:0"
 
 b = tf.Variable(1, name="b")
 print(b.name)  # prints "b:0"

From 51eb429997586bc33de917fd79347c9b2a6c66a6 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Tue, 5 Sep 2017 09:28:11 -0700
Subject: [PATCH 49/78] Update entropy recipe.

---
 README.md | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 5e1317c..a54e054 100644
--- a/README.md
+++ b/README.md
@@ -1344,16 +1344,10 @@ def merge(tensors, units, activation=tf.nn.relu, name=None, **kwargs):
 ```python
 import tensorflow as tf
 
-def softmax(logits, dims=-1):
-  """Compute softmax over specified dimensions."""
-  exp = tf.exp(logits - tf.reduce_max(logits, dims, keep_dims=True))
-  return exp / tf.reduce_sum(exp, dims, keep_dims=True)
-
-def entropy(logits, dims=-1):
+def softmax_entropy(logits, dim=-1):
   """Compute entropy over specified dimensions."""
-  probs = softmax(logits, dims)
-  nplogp = probs * (tf.reduce_logsumexp(logits, dims, keep_dims=True) - logits)
-  return tf.reduce_sum(nplogp, dims)
+  plogp = tf.nn.softmax(logits, dim) * tf.nn.log_softmax(logits, dim)
+  return -tf.reduce_sum(plogp, dim)
 ```
 
 ## KL-Divergence <a name="kld"></a>

From ec2a44eccc6032279b51a430a6bef936b8de8344 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Wed, 6 Sep 2017 02:15:17 -0700
Subject: [PATCH 50/78] Simplify entropy function.

---
 README.md | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index a54e054..9837d51 100644
--- a/README.md
+++ b/README.md
@@ -933,12 +933,12 @@ Let's look at an example:
 ```python
 import tensorflow as tf
 
-def non_differentiable_entropy(logits):
+def non_differentiable_softmax_entropy(logits):
     probs = tf.nn.softmax(logits)
     return tf.nn.softmax_cross_entropy_with_logits(labels=probs, logits=logits)
 
 w = tf.get_variable("w", shape=[5])
-y = -non_differentiable_entropy(w)
+y = -non_differentiable_softmax_entropy(w)
 
 opt = tf.train.AdamOptimizer()
 train_op = opt.minimize(y)
@@ -970,13 +970,12 @@ Now let's fix our function with a differentiable version of the entropy and chec
 import tensorflow as tf
 import numpy as np
 
-def entropy(logits, dim=-1):
-    probs = tf.nn.softmax(logits, dim)
-    nplogp = probs * (tf.reduce_logsumexp(logits, dim, keep_dims=True) - logits)
-    return tf.reduce_sum(nplogp, dim)
+def softmax_entropy(logits, dim=-1):
+    plogp = tf.nn.softmax(logits, dim) * tf.nn.log_softmax(logits, dim)
+    return -tf.reduce_sum(nplogp, dim)
 
 w = tf.get_variable("w", shape=[5])
-y = -entropy(w)
+y = -softmax_entropy(w)
 
 print(w.get_shape())
 print(y.get_shape())

From c8fbf35341cf9a99e67979f6a79a5bca55af5b37 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Wed, 6 Sep 2017 02:17:39 -0700
Subject: [PATCH 51/78] Fix typo.

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 9837d51..02d671c 100644
--- a/README.md
+++ b/README.md
@@ -972,7 +972,7 @@ import numpy as np
 
 def softmax_entropy(logits, dim=-1):
     plogp = tf.nn.softmax(logits, dim) * tf.nn.log_softmax(logits, dim)
-    return -tf.reduce_sum(nplogp, dim)
+    return -tf.reduce_sum(plogp, dim)
 
 w = tf.get_variable("w", shape=[5])
 y = -softmax_entropy(w)

From 0a799189957ed89d5848c4306951d9730a8584d2 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Wed, 6 Sep 2017 02:23:56 -0700
Subject: [PATCH 52/78] Update cookbook.

---
 README.md | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 02d671c..caf4035 100644
--- a/README.md
+++ b/README.md
@@ -16,6 +16,8 @@ Table of Contents
 12. [Numerical stability in TensorFlow](#stable)
 13. [Building a neural network training framework with learn API](#tf_learn)
 14. [TensorFlow Cookbook](#cookbook)
+    - [Get shape](#get_shape)
+    - [Batch gather](#batch_gather)
     - [Beam search](#beam_search)
     - [Merge](#merge)
     - [Entropy](#entropy)
@@ -1202,10 +1204,8 @@ And that's it! This is all you need to get started with TensorFlow learn API. I
 <a name="cookbook"></a>
 This section includes implementation of a set of common operations in TensorFlow.
 
-### Beam Search <a name="beam_search"></a>
+### Get shape <a name="get_shape"></a>
 ```python
-import tensorflow as tf
-
 def get_shape(tensor):
   """Returns static shape if available and dynamic shape otherwise."""
   static_shape = tensor.shape.as_list()
@@ -1213,11 +1213,11 @@ def get_shape(tensor):
   dims = [s[1] if s[0] is None else s[0]
           for s in zip(static_shape, dynamic_shape)]
   return dims
+```
 
-def log_prob_from_logits(logits, axis=-1):
-  """Normalize the log-probabilities so that probabilities sum to one."""
-  return logits - tf.reduce_logsumexp(logits, axis=axis, keep_dims=True)
+### Batch Gather <a name="batch_gather"></a>
 
+```python
 def batch_gather(tensor, indices):
   """Gather in batch from a tensor of arbitrary size.
 
@@ -1237,6 +1237,11 @@ def batch_gather(tensor, indices):
   offset = tf.reshape(tf.range(shape[0]) * shape[1], offset_shape)
   output = tf.gather(flat_first, indices + offset)
   return output
+```
+
+### Beam Search <a name="beam_search"></a>
+```python
+import tensorflow as tf
 
 def rnn_beam_search(update_fn, initial_state, sequence_length, beam_width,
                     begin_token_id, end_token_id, name="rnn"):
@@ -1270,7 +1275,7 @@ def rnn_beam_search(update_fn, initial_state, sequence_length, beam_width,
     with tf.variable_scope(name, reuse=True if i > 0 else None):
 
       state, logits = update_fn(state, ids)
-      logits = log_prob_from_logits(logits)
+      logits = tf.nn.log_softmax(logits)
 
       sum_logprobs = (
           tf.expand_dims(sel_sum_logprobs, axis=2) +

From e6ac9f307b2b24bc29d89a70e45d4cdb0e102cde Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Wed, 13 Sep 2017 01:24:13 -0700
Subject: [PATCH 53/78] Updated framework.

---
 README.md | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/README.md b/README.md
index caf4035..2dc3900 100644
--- a/README.md
+++ b/README.md
@@ -34,8 +34,6 @@ _We aim to gradually expand this series by adding new articles and keep the cont
 git clone https://github.com/vahidk/TensorflowFramework.git
 ```
 
-_If you use Visual Studio Code, make sure to download the [TensorFlow Snippets](https://github.com/vahidk/tensorflow-snippets) extension, which lets you build neural network models and have them running with few keystrokes._
-
 ## TensorFlow Basics
 <a name="basics"></a>
 The most striking difference between TensorFlow and other numerical computation libraries such as NumPy is that operations in TensorFlow are symbolic. This is a powerful concept that allows TensorFlow to do all sort of things (e.g. automatic differentiation) that are not possible with imperative libraries such as NumPy. But it also comes at the cost of making it harder to grasp. Our attempt here is to demystify TensorFlow and provide some guidelines and best practices for more effective use of TensorFlow.
@@ -1196,7 +1194,7 @@ def input_fn():
 ```
 See [mnist.py](https://github.com/vahidk/TensorflowFramework/blob/master/dataset/mnist.py) for an example of how to read your data with the dataset API. To learn about various ways of reading your data in TensorFlow refer to [this item](#data).
 
-The framework also comes with a simple convolutional network classifier in [cnn_classifier.py](https://github.com/vahidk/TensorflowFramework/blob/master/model/cnn_classifier.py) that includes an example model.
+The framework also comes with a simple convolutional network classifier in [alexnet.py](https://github.com/vahidk/TensorflowFramework/blob/master/model/alexnet.py) that includes an example model.
 
 And that's it! This is all you need to get started with TensorFlow learn API. I recommend to have a look at the framework [source code](https://github.com/vahidk/TensorFlowFramework) and see the official python API to learn more about the learn API.
 

From d6becaf2324e99326ccf73f9748ef69b9bad2311 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Sat, 16 Sep 2017 03:02:00 -0700
Subject: [PATCH 54/78] Added Sqeeuze/Excite and BatchNorm recipes.

---
 README.md | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 73 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 2dc3900..ee69989 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,8 @@ Table of Contents
     - [KL-Divergence](#kld)
     - [Make parallel](#make_parallel)
     - [Leaky Relu](#leaky_relu)
-
+    - [Batch normalization](#batch_norm)
+    - [Squeeze and excitation](#squeeze_excite)
 ---
 
 _We aim to gradually expand this series by adding new articles and keep the content up to date with the latest releases of TensorFlow API. If you have suggestions on how to improve this series or find the explanations ambiguous, feel free to create an issue, send patches, or reach out by email._
@@ -1409,3 +1410,74 @@ def leaky_relu(tensor, alpha=0.1):
     """Computes the leaky rectified linear activation."""
     return tf.maximum(x, alpha * x)
 ```
+
+## Batch normalization <a name="batch_norm"></a>
+```python
+def batch_normalization(tensor, training=False, epsilon=0.001, momentum=0.9, 
+                        fused_batch_norm=False, name=None):
+  """Performs batch normalization on given 4-D tensor.
+  
+  The features are assumed to be in NHWC format. Noe that you need to 
+  run UPDATE_OPS in order for this function to perform correctly, e.g.:
+
+  with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
+    train_op = optimizer.minimize(loss)
+  """
+  with tf.variable_scope(name, default_name="batch_normalization"):
+    channels = tensor.shape.as_list()[-1]
+    axes = list(range(tensor.shape.ndims - 1))
+
+    beta = tf.get_variable(
+      'beta', channels, initializer=tf.zeros_initializer())
+    gamma = tf.get_variable(
+      'gamma', channels, initializer=tf.ones_initializer())
+
+    avg_mean = tf.get_variable(
+      "avg_mean", channels, initializer=tf.zeros_initializer(),
+      trainable=False)
+    avg_variance = tf.get_variable(
+      "avg_variance", channels, initializer=tf.ones_initializer(),
+      trainable=False)
+
+    if training:
+      if fused_batch_norm:
+        mean, variance = None, None
+      else:
+        mean, variance = tf.nn.moments(tensor, axes=axes)
+    else:
+      mean, variance = avg_mean, avg_variance
+   
+    if fused_batch_norm:
+      tensor, mean, variance = tf.nn.fused_batch_norm(
+        tensor, scale=gamma, offset=beta, mean=mean, variance=variance, 
+        epsilon=epsilon, is_training=training)
+    else:
+      tensor = tf.nn.batch_normalization(
+        tensor, mean, variance, beta, gamma, epsilon)
+
+    if training:
+      update_mean = tf.assign(
+        avg_mean, avg_mean * momentum + mean * (1.0 - momentum))
+      update_variance = tf.assign(
+        avg_variance, avg_variance * momentum + variance * (1.0 - momentum))
+
+      tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_mean)
+      tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_variance)
+
+  return tensor
+```
+
+## Squeeze and excitation <a name="squeeze_excite"></a>
+```python
+def squeeze_and_excite(tensor, ratio):
+  """Squeeze and excite layer."""
+  original = tensor
+  units = tensor.shape.as_list()[-1]
+  tensor = tf.reduce_mean(tensor, [1, 2], keep_dims=True)
+  tensor = tf.layers.dense(tensor, units / ratio, use_bias=False)
+  tensor = tf.nn.relu(tensor)
+  tensor = tf.layers.dense(tensor, units, use_bias=False)
+  tensor = tf.nn.sigmoid(tensor)
+  tensor = original * tensor
+  return tensor
+```

From 6156404da53c2502b2c155aa5d225e9957e61fa4 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Sat, 16 Sep 2017 03:08:31 -0700
Subject: [PATCH 55/78] Updated refs.

---
 README.md | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index ee69989..e798cf9 100644
--- a/README.md
+++ b/README.md
@@ -1422,6 +1422,8 @@ def batch_normalization(tensor, training=False, epsilon=0.001, momentum=0.9,
 
   with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
     train_op = optimizer.minimize(loss)
+
+  Based on: https://arxiv.org/abs/1502.03167
   """
   with tf.variable_scope(name, default_name="batch_normalization"):
     channels = tensor.shape.as_list()[-1]
@@ -1469,8 +1471,11 @@ def batch_normalization(tensor, training=False, epsilon=0.001, momentum=0.9,
 
 ## Squeeze and excitation <a name="squeeze_excite"></a>
 ```python
-def squeeze_and_excite(tensor, ratio):
-  """Squeeze and excite layer."""
+def squeeze_and_excite(tensor, ratio=16):
+  """Apply squeeze/excite on given 4-D tensor.
+  
+  Based on: https://arxiv.org/abs/1709.01507
+  """
   original = tensor
   units = tensor.shape.as_list()[-1]
   tensor = tf.reduce_mean(tensor, [1, 2], keep_dims=True)

From c085b700ee505db3f2ef8da7b5f2e9e26fd993aa Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Sat, 16 Sep 2017 03:13:57 -0700
Subject: [PATCH 56/78] Added var scope.

---
 README.md | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index e798cf9..f8766c0 100644
--- a/README.md
+++ b/README.md
@@ -1471,18 +1471,19 @@ def batch_normalization(tensor, training=False, epsilon=0.001, momentum=0.9,
 
 ## Squeeze and excitation <a name="squeeze_excite"></a>
 ```python
-def squeeze_and_excite(tensor, ratio=16):
+def squeeze_and_excite(tensor, ratio=16, name=None):
   """Apply squeeze/excite on given 4-D tensor.
   
   Based on: https://arxiv.org/abs/1709.01507
   """
-  original = tensor
-  units = tensor.shape.as_list()[-1]
-  tensor = tf.reduce_mean(tensor, [1, 2], keep_dims=True)
-  tensor = tf.layers.dense(tensor, units / ratio, use_bias=False)
-  tensor = tf.nn.relu(tensor)
-  tensor = tf.layers.dense(tensor, units, use_bias=False)
-  tensor = tf.nn.sigmoid(tensor)
-  tensor = original * tensor
+  with tf.variable_scope(name, default_name="squeeze_and_excite"):
+    original = tensor
+    units = tensor.shape.as_list()[-1]
+    tensor = tf.reduce_mean(tensor, [1, 2], keep_dims=True)
+    tensor = tf.layers.dense(tensor, units / ratio, use_bias=False)
+    tensor = tf.nn.relu(tensor)
+    tensor = tf.layers.dense(tensor, units, use_bias=False)
+    tensor = tf.nn.sigmoid(tensor)
+    tensor = original * tensor
   return tensor
 ```

From f1201715c0ce86dac8950cad6075c9174517e0ea Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Sat, 16 Sep 2017 21:46:14 -0700
Subject: [PATCH 57/78] Fix typo.

---
 README.md      | 2 +-
 code/framework | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index f8766c0..cbd851a 100644
--- a/README.md
+++ b/README.md
@@ -1408,7 +1408,7 @@ def make_parallel(fn, num_gpus, **kwargs):
 ```python
 def leaky_relu(tensor, alpha=0.1):
     """Computes the leaky rectified linear activation."""
-    return tf.maximum(x, alpha * x)
+    return tf.maximum(tensor, alpha * tensor)
 ```
 
 ## Batch normalization <a name="batch_norm"></a>
diff --git a/code/framework b/code/framework
index 82bdcdd..75ae150 160000
--- a/code/framework
+++ b/code/framework
@@ -1 +1 @@
-Subproject commit 82bdcdd14856eff6a89d864e56ba7f872a6f99ec
+Subproject commit 75ae150b45291dd2e973f1d06628ce594c569024

From bbb49b721255f31d07fa8c4ae451829ebc0f6a60 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Thu, 26 Oct 2017 16:18:31 -0700
Subject: [PATCH 58/78] Update readme.

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index cbd851a..9513b83 100644
--- a/README.md
+++ b/README.md
@@ -405,7 +405,7 @@ data = dataset.make_one_shot_iterator().get_next()
 
 If you need to read your data from file, it may be more efficient to write it in TFrecord format and use TFRecordDataset to read it:
 ```python
-dataset = tf.contrib.data.Dataset.TFRecordDataset(path_to_data)
+dataset = tf.contrib.data.TFRecordDataset(path_to_data)
 ```
 See the [official docs](https://www.tensorflow.org/api_guides/python/reading_data#Reading_from_files) for an example of how to write your dataset in TFrecord format.
 

From bd9a8dd7b71f3b3f1918e54a2e0992c245fdcb14 Mon Sep 17 00:00:00 2001
From: Ilya Shutov <is@mactelabs.com>
Date: Sun, 21 Jan 2018 02:13:13 +0700
Subject: [PATCH 59/78] Typo fix

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 9513b83..ed93ac1 100644
--- a/README.md
+++ b/README.md
@@ -1099,7 +1099,7 @@ For simplicity, in most of the examples here we manually create sessions and we
 
 When experimenting with neural network models you usually have a training/test split. You want to train your model on the training set, and once in a while evaluate it on test set and compute some metrics. You also need to store the model parameters as a checkpoint, and ideally you want to be able to stop and resume training. TensorFlow's learn API is designed to make this job easier, letting us focus on developing the actual model.
 
-The most basic way of using tf.learn API is to use tf.Estimator object directly. You need to define a model function that defines a loss function, a train op, one or a set of predictions, and optinoally a set of metric ops for evaluation:
+The most basic way of using tf.learn API is to use tf.Estimator object directly. You need to define a model function that defines a loss function, a train op, one or a set of predictions, and optionally a set of metric ops for evaluation:
 ```python
 import tensorflow as tf
 

From b94b50384d67e160798c0e11b31022ad9a97032a Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Fri, 2 Mar 2018 23:21:15 -0800
Subject: [PATCH 60/78] Update README.md

---
 README.md | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index ed93ac1..a994240 100644
--- a/README.md
+++ b/README.md
@@ -269,13 +269,21 @@ with tf.variable_scope("scope", reuse=True):
 
 This becomes handy for example when using built-in neural network layers:
 ```python
-features1 = tf.layers.conv2d(image1, filters=32, kernel_size=3)
+with tf.variable_scope('my_scope'):
+  features1 = tf.layers.conv2d(image1, filters=32, kernel_size=3)
 # Use the same convolution weights to process the second image:
-with tf.variable_scope(tf.get_variable_scope(), reuse=True):
+with tf.variable_scope('my_scope', reuse=True):
   features2 = tf.layers.conv2d(image2, filters=32, kernel_size=3)
 ```
 
-This syntax may not look very clean to some. Especially if you want to do lots of variable sharing keeping track of when to define new variables and when to reuse them can be cumbersome and error prone. TensorFlow templates are designed to handle this automatically:
+Alternatively you can set reuse to tf.AUTO_REUSE which tells TensorFlow to create a new variable if a variable with the same name doesn't exist, and reuse otherwise:
+```python
+with tf.variable_scope("scope", reuse=tf.AUTO_REUSE):
+  features1 = tf.layers.conv2d(image1, filters=32, kernel_size=3)
+  features2 = tf.layers.conv2d(image2, filters=32, kernel_size=3)
+```
+
+If you want to do lots of variable sharing keeping track of when to define new variables and when to reuse them can be cumbersome and error prone. tf.AUTO_REUSE simplifies this task but adds the risk of sharing variables that weren't supposed to be shared. TensorFlow templates are another way of tackling the same problem without this risk:
 ```python
 conv3x32 = tf.make_template("conv3x32", lambda x: tf.layers.conv2d(x, 32, 3))
 features1 = conv3x32(image1)
@@ -800,7 +808,7 @@ def make_parallel(fn, num_gpus, **kwargs):
     out_split = []
     for i in range(num_gpus):
         with tf.device(tf.DeviceSpec(device_type="GPU", device_index=i)):
-            with tf.variable_scope(tf.get_variable_scope(), reuse=i > 0):
+            with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
                 out_split.append(fn(**{k : v[i] for k, v in in_splits.items()}))
 
     return tf.concat(out_split, axis=0)
@@ -1398,7 +1406,7 @@ def make_parallel(fn, num_gpus, **kwargs):
   out_split = []
   for i in range(num_gpus):
     with tf.device(tf.DeviceSpec(device_type="GPU", device_index=i)):
-      with tf.variable_scope(tf.get_variable_scope(), reuse=i > 0):
+      with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
         out_split.append(fn(**{k : v[i] for k, v in in_splits.items()}))
 
   return tf.concat(out_split, axis=0)

From 65d87c259897981076ae6ca08d48c9ff7be9a9b8 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Mon, 5 Mar 2018 00:31:48 -0800
Subject: [PATCH 61/78] Update code.

---
 code/framework | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/code/framework b/code/framework
index 75ae150..fae174c 160000
--- a/code/framework
+++ b/code/framework
@@ -1 +1 @@
-Subproject commit 75ae150b45291dd2e973f1d06628ce594c569024
+Subproject commit fae174ccf14be9f6581c03aa60e3c3d409ce261d

From ed14d64015f4762dad1ea343829c0a2bf20d844a Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Tue, 10 Jul 2018 21:05:13 -0700
Subject: [PATCH 62/78] Fixed the autoreuse code snippet.

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index a994240..05310c9 100644
--- a/README.md
+++ b/README.md
@@ -280,6 +280,8 @@ Alternatively you can set reuse to tf.AUTO_REUSE which tells TensorFlow to creat
 ```python
 with tf.variable_scope("scope", reuse=tf.AUTO_REUSE):
   features1 = tf.layers.conv2d(image1, filters=32, kernel_size=3)
+  
+with tf.variable_scope("scope", reuse=tf.AUTO_REUSE):
   features2 = tf.layers.conv2d(image2, filters=32, kernel_size=3)
 ```
 

From a504a54b73a5098049f17f70cc767f1ceabe3d29 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Tue, 10 Jul 2018 22:10:54 -0700
Subject: [PATCH 63/78] Update README.md

---
 README.md | 20 --------------------
 1 file changed, 20 deletions(-)

diff --git a/README.md b/README.md
index 05310c9..214a313 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,6 @@ Table of Contents
     - [Make parallel](#make_parallel)
     - [Leaky Relu](#leaky_relu)
     - [Batch normalization](#batch_norm)
-    - [Squeeze and excitation](#squeeze_excite)
 ---
 
 _We aim to gradually expand this series by adding new articles and keep the content up to date with the latest releases of TensorFlow API. If you have suggestions on how to improve this series or find the explanations ambiguous, feel free to create an issue, send patches, or reach out by email._
@@ -1478,22 +1477,3 @@ def batch_normalization(tensor, training=False, epsilon=0.001, momentum=0.9,
 
   return tensor
 ```
-
-## Squeeze and excitation <a name="squeeze_excite"></a>
-```python
-def squeeze_and_excite(tensor, ratio=16, name=None):
-  """Apply squeeze/excite on given 4-D tensor.
-  
-  Based on: https://arxiv.org/abs/1709.01507
-  """
-  with tf.variable_scope(name, default_name="squeeze_and_excite"):
-    original = tensor
-    units = tensor.shape.as_list()[-1]
-    tensor = tf.reduce_mean(tensor, [1, 2], keep_dims=True)
-    tensor = tf.layers.dense(tensor, units / ratio, use_bias=False)
-    tensor = tf.nn.relu(tensor)
-    tensor = tf.layers.dense(tensor, units, use_bias=False)
-    tensor = tf.nn.sigmoid(tensor)
-    tensor = original * tensor
-  return tensor
-```

From b5c1939a931f1a8393e939bb4df8e0a34d69a66c Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Wed, 11 Jul 2018 13:58:08 -0700
Subject: [PATCH 64/78] Updated format.

---
 README.md | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 214a313..f8ee979 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,7 @@
 
 Table of Contents
 =================
+## Part I: TensorFlow Fundamentals
 1.  [TensorFlow Basics](#basics)
 2.  [Understanding static and dynamic shapes](#shapes)
 3.  [Scopes and when to use them](#scopes)
@@ -15,16 +16,17 @@ Table of Contents
 11. [Debugging TensorFlow models](#debug)
 12. [Numerical stability in TensorFlow](#stable)
 13. [Building a neural network training framework with learn API](#tf_learn)
-14. [TensorFlow Cookbook](#cookbook)
-    - [Get shape](#get_shape)
-    - [Batch gather](#batch_gather)
-    - [Beam search](#beam_search)
-    - [Merge](#merge)
-    - [Entropy](#entropy)
-    - [KL-Divergence](#kld)
-    - [Make parallel](#make_parallel)
-    - [Leaky Relu](#leaky_relu)
-    - [Batch normalization](#batch_norm)
+
+## Part II: TensorFlow Cookbook
+1. [Get shape](#get_shape)
+2. [Batch gather](#batch_gather)
+3. [Beam search](#beam_search)
+4. [Merge](#merge)
+5. [Entropy](#entropy)
+6. [KL-Divergence](#kld)
+7. [Make parallel](#make_parallel)
+8. [Leaky Relu](#leaky_relu)
+9. [Batch normalization](#batch_norm)
 ---
 
 _We aim to gradually expand this series by adding new articles and keep the content up to date with the latest releases of TensorFlow API. If you have suggestions on how to improve this series or find the explanations ambiguous, feel free to create an issue, send patches, or reach out by email._
@@ -34,6 +36,9 @@ _We aim to gradually expand this series by adding new articles and keep the cont
 git clone https://github.com/vahidk/TensorflowFramework.git
 ```
 
+# Part I: TensorFlow Fundamentals
+<a name="fundamentals"></a>
+
 ## TensorFlow Basics
 <a name="basics"></a>
 The most striking difference between TensorFlow and other numerical computation libraries such as NumPy is that operations in TensorFlow are symbolic. This is a powerful concept that allows TensorFlow to do all sort of things (e.g. automatic differentiation) that are not possible with imperative libraries such as NumPy. But it also comes at the cost of making it harder to grasp. Our attempt here is to demystify TensorFlow and provide some guidelines and best practices for more effective use of TensorFlow.
@@ -1208,7 +1213,7 @@ The framework also comes with a simple convolutional network classifier in [alex
 
 And that's it! This is all you need to get started with TensorFlow learn API. I recommend to have a look at the framework [source code](https://github.com/vahidk/TensorFlowFramework) and see the official python API to learn more about the learn API.
 
-## TensorFlow Cookbook
+# Part II: TensorFlow Cookbook
 <a name="cookbook"></a>
 This section includes implementation of a set of common operations in TensorFlow.
 

From ca551331a013fbf07e950c68c8e3007cefe5a69d Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Fri, 28 Sep 2018 00:26:50 -0700
Subject: [PATCH 65/78] Updated code.

---
 code/framework | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/code/framework b/code/framework
index fae174c..4ece21d 160000
--- a/code/framework
+++ b/code/framework
@@ -1 +1 @@
-Subproject commit fae174ccf14be9f6581c03aa60e3c3d409ce261d
+Subproject commit 4ece21d1345483fb5905d7d0452de9a2ffa49863

From ba567d3ddaaba40ab7a041ad0a16c3a26978de46 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Fri, 28 Sep 2018 16:23:14 -0700
Subject: [PATCH 66/78] Updated the code.

---
 code/framework | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/code/framework b/code/framework
index 4ece21d..cdfddfc 160000
--- a/code/framework
+++ b/code/framework
@@ -1 +1 @@
-Subproject commit 4ece21d1345483fb5905d7d0452de9a2ffa49863
+Subproject commit cdfddfc81f0fd8f410ced44d1390dbde8f02aa2b

From 67b688e8dda2633b96964d8cfadb2e239ac06429 Mon Sep 17 00:00:00 2001
From: Mahmoud Akl <mahmoud.akl@gmail.com>
Date: Thu, 14 Feb 2019 12:32:37 +0100
Subject: [PATCH 67/78] Replaced tf.prod with tf.reduce_prod

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index f8ee979..f016fb5 100644
--- a/README.md
+++ b/README.md
@@ -191,7 +191,7 @@ def reshape(tensor, dims_list):
     elif all([isinstance(shape[d], int) for d in dims]):
       dims_prod.append(np.prod([shape[d] for d in dims]))
     else:
-      dims_prod.append(tf.prod([shape[d] for d in dims]))
+      dims_prod.append(tf.reduce_prod([shape[d] for d in dims]))
   tensor = tf.reshape(tensor, dims_prod)
   return tensor
 ```

From 37b0acafbe5443377b55942df723cf7a2e81747b Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Tue, 19 Feb 2019 21:50:08 -0800
Subject: [PATCH 68/78] Update README.md

---
 README.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index f016fb5..3a71e75 100644
--- a/README.md
+++ b/README.md
@@ -326,8 +326,8 @@ d = tf.layers.dense(c, 10, activation=tf.nn.relu)
 But this can be done more efficiently with broadcasting. We use the fact that f(m(x + y)) is equal to f(mx + my). So we can do the linear operations separately and use broadcasting to do implicit concatenation:
 
 ```python
-pa = tf.layers.dense(a, 10, activation=None)
-pb = tf.layers.dense(b, 10, activation=None)
+pa = tf.layers.dense(a, 10)
+pb = tf.layers.dense(b, 10)
 d = tf.nn.relu(pa + pb)
 ```
 
@@ -335,8 +335,8 @@ In fact this piece of code is pretty general and can be applied to tensors of ar
 
 ```python
 def merge(a, b, units, activation=tf.nn.relu):
-    pa = tf.layers.dense(a, units, activation=None)
-    pb = tf.layers.dense(b, units, activation=None)
+    pa = tf.layers.dense(a, units)
+    pb = tf.layers.dense(b, units)
     c = pa + pb
     if activation is not None:
         c = activation(c)
@@ -413,7 +413,7 @@ Python ops allow you to convert a regular Python function to a TensorFlow operat
 The recommended way of reading the data in TensorFlow however is through the dataset API:
 ```python
 actual_data = np.random.normal(size=[100])
-dataset = tf.contrib.data.Dataset.from_tensor_slices(actual_data)
+dataset = tf.data.Dataset.from_tensor_slices(actual_data)
 data = dataset.make_one_shot_iterator().get_next()
 ```
 
@@ -1340,7 +1340,7 @@ def merge(tensors, units, activation=tf.nn.relu, name=None, **kwargs):
     projs = []
     for i, tensor in enumerate(tensors):
       proj = tf.layers.dense(
-          tensor, units, activation=None,
+          tensor, units,
           name="proj_%d" % i,
           **kwargs)
       projs.append(proj)

From 964ca0c872a8855e52225568c2d2520fccdfcb6f Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Sat, 16 Mar 2019 11:29:01 -0700
Subject: [PATCH 69/78] Update framework.

---
 code/framework | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/code/framework b/code/framework
index cdfddfc..30a3391 160000
--- a/code/framework
+++ b/code/framework
@@ -1 +1 @@
-Subproject commit cdfddfc81f0fd8f410ced44d1390dbde8f02aa2b
+Subproject commit 30a33914c980d0e5828c1534490379c02140d845

From 5230a4fc42a63596d0694dc5f8c2ab589a40bd1e Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Sat, 16 Mar 2019 16:39:08 -0700
Subject: [PATCH 70/78] First v2 draft.

---
 README.md | 1293 ++++++++---------------------------------------------
 1 file changed, 197 insertions(+), 1096 deletions(-)

diff --git a/README.md b/README.md
index 3a71e75..442ee9e 100644
--- a/README.md
+++ b/README.md
@@ -1,47 +1,32 @@
-# Effective TensorFlow
+# Effective TensorFlow 2
 
 Table of Contents
 =================
-## Part I: TensorFlow Fundamentals
-1.  [TensorFlow Basics](#basics)
-2.  [Understanding static and dynamic shapes](#shapes)
-3.  [Scopes and when to use them](#scopes)
-4.  [Broadcasting the good and the ugly](#broadcast)
-5.  [Feeding data to TensorFlow](#data)
-6.  [Take advantage of the overloaded operators](#overloaded_ops)
-7.  [Understanding order of execution and control dependencies](#control_deps)
-8.  [Control flow operations: conditionals and loops](#control_flow)
-9.  [Prototyping kernels and advanced visualization with Python ops](#python_ops)
-10. [Multi-GPU processing with data parallelism](#multi_gpu)
-11. [Debugging TensorFlow models](#debug)
-12. [Numerical stability in TensorFlow](#stable)
-13. [Building a neural network training framework with learn API](#tf_learn)
-
-## Part II: TensorFlow Cookbook
-1. [Get shape](#get_shape)
-2. [Batch gather](#batch_gather)
-3. [Beam search](#beam_search)
-4. [Merge](#merge)
-5. [Entropy](#entropy)
-6. [KL-Divergence](#kld)
-7. [Make parallel](#make_parallel)
-8. [Leaky Relu](#leaky_relu)
-9. [Batch normalization](#batch_norm)
+## Part I: TensorFlow 2 Fundamentals
+1.  [TensorFlow 2 Basics](#basics)
+2.  [Broadcasting the good and the ugly](#broadcast)
+3.  [Take advantage of the overloaded operators](#overloaded_ops)
+4.  [Control flow operations: conditionals and loops](#control_flow)
+5.  [Prototyping kernels and advanced visualization with Python ops](#python_ops)
+6.  [Numerical stability in TensorFlow](#stable)
 ---
 
-_We aim to gradually expand this series by adding new articles and keep the content up to date with the latest releases of TensorFlow API. If you have suggestions on how to improve this series or find the explanations ambiguous, feel free to create an issue, send patches, or reach out by email._
+_We updated the guide to follow the newly released TensorFlow 2.x API. If you want the original guide for TensorFlow 1.x see the [v1 branch](https://github.com/vahidk/EffectiveTensorflow/tree/v1)._
 
- _We encourage you to also check out the accompanied neural network training framework built on top of tf.contrib.learn API. The [framework](https://github.com/vahidk/TensorflowFramework) can be downloaded separately:_
+_To install TensorFlow 2.0 (alpha) follow the [instructions on the official website](https://www.tensorflow.org/install/pip):_
+```
+pip install tensorflow==2.0.0-alpha0
 ```
-git clone https://github.com/vahidk/TensorflowFramework.git
+
+_We aim to gradually expand this series by adding new articles and keep the content up to date with the latest releases of TensorFlow API. If you have suggestions on how to improve this series or find the explanations ambiguous, feel free to create an issue, send patches, or reach out by email._
 ```
 
-# Part I: TensorFlow Fundamentals
+# Part I: TensorFlow 2.0 Fundamentals
 <a name="fundamentals"></a>
 
 ## TensorFlow Basics
 <a name="basics"></a>
-The most striking difference between TensorFlow and other numerical computation libraries such as NumPy is that operations in TensorFlow are symbolic. This is a powerful concept that allows TensorFlow to do all sort of things (e.g. automatic differentiation) that are not possible with imperative libraries such as NumPy. But it also comes at the cost of making it harder to grasp. Our attempt here is to demystify TensorFlow and provide some guidelines and best practices for more effective use of TensorFlow.
+TensorFlow 2 went under a massive redesign to make the API more accessible and easier to use. If you are familiar with numpy you will find yourself right at home when using TensorFlow 2. Unlike TensorFlow 1 which was purely symbolic, TensorFlow 2 hides its symbolic nature behind the hood to look like any other imperative library like NumPy. It's important to note the change is mostly an interface change, and TensorFlow 2 is still able to take advantage of its symbolic machinery to do everything that TensorFlow 1.x can do (e.g. automatic-differentiation and massively parallel computation on TPUs/GPUs).
 
 Let's start with a simple example, we want to multiply two random matrices. First we look at an implementation done in NumPy:
 ```python
@@ -54,28 +39,21 @@ z = np.dot(x, y)
 print(z)
 ```
 
-Now we perform the exact same computation this time in TensorFlow:
+Now we perform the exact same computation this time in TensorFlow 2.0:
 ```python
 import tensorflow as tf
 
-x = tf.random_normal([10, 10])
-y = tf.random_normal([10, 10])
+x = tf.random.normal([10, 10])
+y = tf.random.normal([10, 10])
 z = tf.matmul(x, y)
 
-sess = tf.Session()
-z_val = sess.run(z)
-
-print(z_val)
+print(z)
 ```
-Unlike NumPy that immediately performs the computation and produces the result, tensorflow only gives us a handle (of type Tensor) to a node in the graph that represents the result. If we try printing the value of z directly, we get something like this:
+Similar to NumPy TensorFlow 2 also immediately performs the computation and produces the result. The only difference is that TensorFlow uses tf.Tensor type to store the results which can be easily converted to NumPy, by calling tf.Tensor.numpy() member function: 
+
 ```
-Tensor("MatMul:0", shape=(10, 10), dtype=float32)
+print(z.numpy())
 ```
-Since both the inputs have a fully defined shape, tensorflow is able to infer the shape of the tensor as well as its type. In order to compute the value of the tensor we need to create a session and evaluate it using Session.run() method.
-
-***
-__Tip__: When using Jupyter notebook make sure to call tf.reset_default_graph() at the beginning to clear the symbolic graph before defining new nodes.
-***
 
 To understand how powerful symbolic computation can be let's have a look at another example. Assume that we have samples from a curve (say f(x) = 5x^2 + 3) and we want to estimate f(x) based on these samples. We define a parametric function g(x, w) = w0 x^2 + w1 x + w2, which is a function of the input x and latent parameters w, our goal is then to find the latent parameters such that g(x, w) ≈ f(x). This can be done by minimizing the following loss function: L(w) = &sum; (f(x) - g(x, w))^2. Although there's a closed form solution for this simple problem, we opt to use a more general approach that can be applied to any arbitrary differentiable function, and that is using stochastic gradient descent. We simply compute the average gradient of L(w) with respect to w over a set of sample points and move in the opposite direction.
 
@@ -85,41 +63,47 @@ Here's how it can be done in TensorFlow:
 import numpy as np
 import tensorflow as tf
 
-# Placeholders are used to feed values from python to TensorFlow ops. We define
-# two placeholders, one for input feature x, and one for output y.
-x = tf.placeholder(tf.float32)
-y = tf.placeholder(tf.float32)
-
 # Assuming we know that the desired function is a polynomial of 2nd degree, we
-# allocate a vector of size 3 to hold the coefficients. The variable will be
-# automatically initialized with random noise.
-w = tf.get_variable("w", shape=[3, 1])
+# allocate a vector of size 3 to hold the coefficients and initialize it with
+# random noise.
+w = tf.Variable(tf.random.normal([3, 1]))
 
-# We define yhat to be our estimate of y.
-f = tf.stack([tf.square(x), x, tf.ones_like(x)], 1)
-yhat = tf.squeeze(tf.matmul(f, w), 1)
+# We use the Adam optimizer with learning rate set to 0.1 to minimize the loss.
+opt = tf.optimizers.Adam(0.1)
 
-# The loss is defined to be the l2 distance between our estimate of y and its
-# true value. We also added a shrinkage term, to ensure the resulting weights
-# would be small.
-loss = tf.nn.l2_loss(yhat - y) + 0.1 * tf.nn.l2_loss(w)
+def model(x):
+    # We define yhat to be our estimate of y.
+    f = tf.stack([tf.square(x), x, tf.ones_like(x)], 1)
+    yhat = tf.squeeze(tf.matmul(f, w), 1)
+    return yhat
 
-# We use the Adam optimizer with learning rate set to 0.1 to minimize the loss.
-train_op = tf.train.AdamOptimizer(0.1).minimize(loss)
+def compute_loss(y, yhat):
+    # The loss is defined to be the l2 distance between our estimate of y and its
+    # true value. We also added a shrinkage term, to ensure the resulting weights
+    # would be small.
+    loss = tf.nn.l2_loss(yhat - y) + 0.1 * tf.nn.l2_loss(w)
+    return loss
 
 def generate_data():
-    x_val = np.random.uniform(-10.0, 10.0, size=100)
-    y_val = 5 * np.square(x_val) + 3
-    return x_val, y_val
+    # Generate some training data based on the true function
+    x = np.random.uniform(-10.0, 10.0, size=100).astype(np.float32)
+    y = 5 * np.square(x) + 3
+    return x, y
+
+def train_step():
+    x, y = generate_data()
+
+    def _loss_fn():
+        yhat = model(x)
+        loss = compute_loss(y, yhat)
+        return loss
+    
+    opt.minimize(_loss_fn, [w])
 
-sess = tf.Session()
-# Since we are using variables we first need to initialize them.
-sess.run(tf.global_variables_initializer())
 for _ in range(1000):
-    x_val, y_val = generate_data()
-    _, loss_val = sess.run([train_op, loss], {x: x_val, y: y_val})
-    print(loss_val)
-print(sess.run([w]))
+    train_step()
+
+print(w.numpy())
 ```
 By running this piece of code you should see a result close to this:
 ```
@@ -127,176 +111,24 @@ By running this piece of code you should see a result close to this:
 ```
 Which is a relatively close approximation to our parameters.
 
-This is just tip of the iceberg for what TensorFlow can do. Many problems such as optimizing large neural networks with millions of parameters can be implemented efficiently in TensorFlow in just a few lines of code. TensorFlow takes care of scaling across multiple devices, and threads, and supports a variety of platforms.
-
-## Understanding static and dynamic shapes
-<a name="shapes"></a>
-Tensors in TensorFlow have a static shape attribute which is determined during graph construction. The static shape may be underspecified. For example we might define a tensor of shape [None, 128]:
-```python
-import tensorflow as tf
+Note that in the above code we are running Tensorflow in imperative mode (i.e. operations get instantly executed), which is not very efficient. TensorFlow 2.0 can also turn a given piece of python code into a graph which can then optimized and efficiently parallelized on GPUs and TPUs. To get all those benefits we simply need to decorate the train_step function with tf.function decorator:
 
-a = tf.placeholder(tf.float32, [None, 128])
 ```
-This means that the first dimension can be of any size and will be determined dynamically during Session.run(). You can query the static shape of a Tensor as follows:
+@tf.function
+def train_step():
+    x, y = generate_data()
 
-```python
-static_shape = a.shape.as_list()  # returns [None, 128]
+    def _loss_fn():
+        yhat = model(x)
+        loss = compute_loss(y, yhat)
+        return loss
+    
+    opt.minimize(_loss_fn, [w])
 ```
 
-To get the dynamic shape of the tensor you can call tf.shape op, which returns a tensor representing the shape of the given tensor:
-```python
-dynamic_shape = tf.shape(a)
-```
-
-The static shape of a tensor can be set with Tensor.set_shape() method:
-```python
-a.set_shape([32, 128])  # static shape of a is [32, 128]
-a.set_shape([None, 128])  # first dimension of a is determined dynamically
-```
-
-You can reshape a given tensor dynamically using tf.reshape function:
-```python
-a =  tf.reshape(a, [32, 128])
-```
-
-It can be convenient to have a function that returns the static shape when available and dynamic shape when it's not. The following utility function does just that:
-```python
-def get_shape(tensor):
-  static_shape = tensor.shape.as_list()
-  dynamic_shape = tf.unstack(tf.shape(tensor))
-  dims = [s[1] if s[0] is None else s[0]
-          for s in zip(static_shape, dynamic_shape)]
-  return dims
-```
-
-Now imagine we want to convert a Tensor of rank 3 to a tensor of rank 2 by collapsing the second and third dimensions into one. We can use our get_shape() function to do that:
-```python
-b = tf.placeholder(tf.float32, [None, 10, 32])
-shape = get_shape(b)
-b = tf.reshape(b, [shape[0], shape[1] * shape[2]])
-```
-Note that this works whether the shapes are statically specified or not.
-
-In fact we can write a general purpose reshape function to collapse any list of dimensions:
-```python
-import tensorflow as tf
-import numpy as np
-
-def reshape(tensor, dims_list):
-  shape = get_shape(tensor)
-  dims_prod = []
-  for dims in dims_list:
-    if isinstance(dims, int):
-      dims_prod.append(shape[dims])
-    elif all([isinstance(shape[d], int) for d in dims]):
-      dims_prod.append(np.prod([shape[d] for d in dims]))
-    else:
-      dims_prod.append(tf.reduce_prod([shape[d] for d in dims]))
-  tensor = tf.reshape(tensor, dims_prod)
-  return tensor
-```
-
-Then collapsing the second dimension becomes very easy:
-```python
-b = tf.placeholder(tf.float32, [None, 10, 32])
-b = reshape(b, [0, [1, 2]])
-```
-
-## Scopes and when to use them
-<a name="scopes"></a>
-
-Variables and tensors in TensorFlow have a name attribute that is used to identify them in the symbolic graph. If you don't specify a name when creating a variable or a tensor, TensorFlow automatically assigns a name for you:
-
-```python
-a = tf.constant(1)
-print(a.name)  # prints "Const:0"
-
-b = tf.Variable(1)
-print(b.name)  # prints "Variable:0"
-```
-
-You can overwrite the default name by explicitly specifying it:
-
-```python
-a = tf.constant(1, name="a")
-print(a.name)  # prints "a:0"
-
-b = tf.Variable(1, name="b")
-print(b.name)  # prints "b:0"
-```
-
-TensorFlow introduces two different context managers to alter the name of tensors and variables. The first is tf.name_scope:
-
-```python
-with tf.name_scope("scope"):
-  a = tf.constant(1, name="a")
-  print(a.name)  # prints "scope/a:0"
-
-  b = tf.Variable(1, name="b")
-  print(b.name)  # prints "scope/b:0"
-
-  c = tf.get_variable(name="c", shape=[])
-  print(c.name)  # prints "c:0"
-```
-
-Note that there are two ways to define new variables in TensorFlow, by creating a tf.Variable object or by calling tf.get_variable. Calling tf.get_variable with a new name results in creating a new variable, but if a variable with the same name exists it will raise a ValueError exception, telling us that re-declaring a variable is not allowed.
-
-tf.name_scope affects the name of tensors and variables created with tf.Variable, but doesn't impact the variables created with tf.get_variable.
-
-Unlike tf.name_scope, tf.variable_scope modifies the name of variables created with tf.get_variable as well:
-
-```python
-with tf.variable_scope("scope"):
-  a = tf.constant(1, name="a")
-  print(a.name)  # prints "scope/a:0"
-
-  b = tf.Variable(1, name="b")
-  print(b.name)  # prints "scope/b:0"
-
-  c = tf.get_variable(name="c", shape=[])
-  print(c.name)  # prints "scope/c:0"
-```
-
-```python
-with tf.variable_scope("scope"):
-  a1 = tf.get_variable(name="a", shape=[])
-  a2 = tf.get_variable(name="a", shape=[])  # Disallowed
-```
-
-But what if we actually want to reuse a previously declared variable? Variable scopes also provide the functionality to do that:
-```python
-with tf.variable_scope("scope"):
-  a1 = tf.get_variable(name="a", shape=[])
-with tf.variable_scope("scope", reuse=True):
-  a2 = tf.get_variable(name="a", shape=[])  # OK
-```
-
-This becomes handy for example when using built-in neural network layers:
-```python
-with tf.variable_scope('my_scope'):
-  features1 = tf.layers.conv2d(image1, filters=32, kernel_size=3)
-# Use the same convolution weights to process the second image:
-with tf.variable_scope('my_scope', reuse=True):
-  features2 = tf.layers.conv2d(image2, filters=32, kernel_size=3)
-```
-
-Alternatively you can set reuse to tf.AUTO_REUSE which tells TensorFlow to create a new variable if a variable with the same name doesn't exist, and reuse otherwise:
-```python
-with tf.variable_scope("scope", reuse=tf.AUTO_REUSE):
-  features1 = tf.layers.conv2d(image1, filters=32, kernel_size=3)
-  
-with tf.variable_scope("scope", reuse=tf.AUTO_REUSE):
-  features2 = tf.layers.conv2d(image2, filters=32, kernel_size=3)
-```
-
-If you want to do lots of variable sharing keeping track of when to define new variables and when to reuse them can be cumbersome and error prone. tf.AUTO_REUSE simplifies this task but adds the risk of sharing variables that weren't supposed to be shared. TensorFlow templates are another way of tackling the same problem without this risk:
-```python
-conv3x32 = tf.make_template("conv3x32", lambda x: tf.layers.conv2d(x, 32, 3))
-features1 = conv3x32(image1)
-features2 = conv3x32(image2)  # Will reuse the convolution weights.
-```
-You can turn any function to a TensorFlow template. Upon the first call to a template, the variables defined inside the function would be declared and in the consecutive invocations they would automatically get reused.
+What's cool about tf.function is that it's also able to convert basic python statements like while, for and if into native TensorFlow functions. We will get to that later.
 
+This is just tip of the iceberg for what TensorFlow can do. Many problems such as optimizing large neural networks with millions of parameters can be implemented efficiently in TensorFlow in just a few lines of code. TensorFlow takes care of scaling across multiple devices, and threads, and supports a variety of platforms.
 
 ## Broadcasting the good and the ugly
 <a name="broadcast"></a>
@@ -309,40 +141,45 @@ a = tf.constant([[1., 2.], [3., 4.]])
 b = tf.constant([[1.], [2.]])
 # c = a + tf.tile(b, [1, 2])
 c = a + b
+
+print(c)
 ```
 
 Broadcasting allows us to perform implicit tiling which makes the code shorter, and more memory efficient, since we don’t need to store the result of the tiling operation. One neat place that this can be used is when combining features of varying length. In order to concatenate features of varying length we commonly tile the input tensors, concatenate the result and apply some nonlinearity. This is a common pattern across a variety of neural network architectures:
 
 ```python
-a = tf.random_uniform([5, 3, 5])
-b = tf.random_uniform([5, 1, 6])
+a = tf.random.uniform([5, 3, 5])
+b = tf.random.uniform([5, 1, 6])
 
 # concat a and b and apply nonlinearity
 tiled_b = tf.tile(b, [1, 3, 1])
 c = tf.concat([a, tiled_b], 2)
-d = tf.layers.dense(c, 10, activation=tf.nn.relu)
+d = tf.keras.layers.Dense(10, activation=tf.nn.relu).apply(c)
+
+print(d)
 ```
 
 But this can be done more efficiently with broadcasting. We use the fact that f(m(x + y)) is equal to f(mx + my). So we can do the linear operations separately and use broadcasting to do implicit concatenation:
 
 ```python
-pa = tf.layers.dense(a, 10)
-pb = tf.layers.dense(b, 10)
+pa = tf.keras.layers.Dense(10).apply(a)
+pb = tf.keras.layers.Dense(10).apply(b)
 d = tf.nn.relu(pa + pb)
+
+print(d)
 ```
 
 In fact this piece of code is pretty general and can be applied to tensors of arbitrary shape as long as broadcasting between tensors is possible:
 
 ```python
-def merge(a, b, units, activation=tf.nn.relu):
-    pa = tf.layers.dense(a, units)
-    pb = tf.layers.dense(b, units)
+def merge(a, b, units, activation=None):
+    pa = tf.keras.layers.Dense(units).apply(a)
+    pb = tf.keras.layers.Dense(units).apply(b)
     c = pa + pb
     if activation is not None:
         c = activation(c)
     return c
 ```
-A slightly more general form of this function is [included](#merge) in the cookbook.
 
 So far we discussed the good part of broadcasting. But what’s the ugly part you may ask? Implicit assumptions almost always make debugging harder to do. Consider the following example:
 
@@ -350,6 +187,8 @@ So far we discussed the good part of broadcasting. But what’s the ugly part yo
 a = tf.constant([[1.], [2.]])
 b = tf.constant([1., 2.])
 c = tf.reduce_sum(a + b)
+
+print(c)
 ```
 
 What do you think the value of c would be after evaluation? If you guessed 6, that’s wrong. It’s going to be 12. This is because when rank of two tensors don’t match, TensorFlow automatically expands the first dimension of the tensor with lower rank before the elementwise operation, so the result of addition would be [[2, 3], [3, 4]], and the reducing over all parameters would give us 12.
@@ -360,82 +199,11 @@ The way to avoid this problem is to be as explicit as possible. Had we specified
 a = tf.constant([[1.], [2.]])
 b = tf.constant([1., 2.])
 c = tf.reduce_sum(a + b, 0)
-```
-
-Here the value of c would be [5, 7], and we immediately would guess based on the shape of the result that there’s something wrong. A general rule of thumb is to always specify the dimensions in reduction operations and when using tf.squeeze.
 
-## Feeding data to TensorFlow
-<a name="data"></a>
-
-TensorFlow is designed to work efficiently with large amount of data. So it's important not to starve your TensorFlow model in order to maximize its performance. There are various ways that you can feed your data to TensorFlow.
-
-### Constants
-The simplest approach is to embed the data in your graph as a constant:
-```python
-import tensorflow as tf
-import numpy as np
-
-actual_data = np.random.normal(size=[100])
-
-data = tf.constant(actual_data)
-```
-
-This approach can be very efficient, but it's not very flexible. One problem with this approach is that, in order to use your model with another dataset you have to rewrite the graph. Also, you have to load all of your data at once and keep it in memory which would only work with small datasets.
-
-### Placeholders
-Using placeholders solves both of these problems:
-```python
-import tensorflow as tf
-import numpy as np
-
-data = tf.placeholder(tf.float32)
-
-prediction = tf.square(data) + 1
-
-actual_data = np.random.normal(size=[100])
-
-tf.Session().run(prediction, feed_dict={data: actual_data})
-```
-Placeholder operator returns a tensor whose value is fetched through the feed_dict argument in Session.run function. Note that running Session.run without feeding the value of data in this case will result in an error.
-
-### Python ops
-Another approach to feed the data to TensorFlow is by using Python ops:
-```python
-def py_input_fn():
-    actual_data = np.random.normal(size=[100])
-    return actual_data
-
-data = tf.py_func(py_input_fn, [], (tf.float32))
-```
-Python ops allow you to convert a regular Python function to a TensorFlow operation.
-
-### Dataset API
-The recommended way of reading the data in TensorFlow however is through the dataset API:
-```python
-actual_data = np.random.normal(size=[100])
-dataset = tf.data.Dataset.from_tensor_slices(actual_data)
-data = dataset.make_one_shot_iterator().get_next()
-```
-
-If you need to read your data from file, it may be more efficient to write it in TFrecord format and use TFRecordDataset to read it:
-```python
-dataset = tf.contrib.data.TFRecordDataset(path_to_data)
+print(c)
 ```
-See the [official docs](https://www.tensorflow.org/api_guides/python/reading_data#Reading_from_files) for an example of how to write your dataset in TFrecord format.
 
-Dataset API allows you to make efficient data processing pipelines easily. For example this is how we process our data in the accompanied framework (See
-[trainer.py](https://github.com/vahidk/TensorflowFramework/blob/master/trainer.py)):
-
-```python
-dataset = ...
-dataset = dataset.cache()
-if mode == tf.estimator.ModeKeys.TRAIN:
-    dataset = dataset.repeat()
-    dataset = dataset.shuffle(batch_size * 5)
-dataset = dataset.map(parse, num_threads=8)
-dataset = dataset.batch(batch_size)
-```
-After reading the data, we use Dataset.cache method to cache it into memory for improved efficiency. During the training mode, we repeat the dataset indefinitely. This allows us to process the whole dataset many times. We also shuffle the dataset to get batches with different sample distributions. Next, we use the Dataset.map function to perform preprocessing on raw records and convert the data to a usable format for the model. We then create batches of samples by calling Dataset.batch.
+Here the value of c would be [5, 7], and we immediately would guess based on the shape of the result that there’s something wrong. A general rule of thumb is to always specify the dimensions in reduction operations and when using tf.squeeze.
 
 ## Take advantage of the overloaded operators
 <a name="overloaded_ops"></a>
@@ -450,28 +218,26 @@ Be very careful when using this op though. The slicing op is very inefficient an
 import tensorflow as tf
 import time
 
-x = tf.random_uniform([500, 10])
+x = tf.random.uniform([500, 10])
 
 z = tf.zeros([10])
-for i in range(500):
-    z += x[i]
 
-sess = tf.Session()
 start = time.time()
-sess.run(z)
+for i in range(500):
+    z += x[i]
 print("Took %f seconds." % (time.time() - start))
 ```
-On my MacBook Pro, this took 2.67 seconds to run! The reason is that we are calling the slice op 500 times, which is going to be very slow to run. A better choice would have been to use tf.unstack op to slice the matrix into a list of vectors all at once:
+On my MacBook Pro, this took 0.045 seconds to run which is quite slow. The reason is that we are calling the slice op 500 times, which is going to be very slow to run. A better choice would have been to use tf.unstack op to slice the matrix into a list of vectors all at once:
 ```python
 z = tf.zeros([10])
 for x_i in tf.unstack(x):
     z += x_i
 ```
-This took 0.18 seconds. Of course, the right way to do this simple reduction is to use tf.reduce_sum op:
+This took 0.01 seconds. Of course, the right way to do this simple reduction is to use tf.reduce_sum op:
 ```python
 z = tf.reduce_sum(x, axis=0)
 ```
-This took 0.008 seconds, which is 300x faster than the original implementation.
+This took 0.0001 seconds, which is 300x faster than the original implementation.
 
 TensorFlow also overloads a range of arithmetic and logical operators:
 ```python
@@ -499,99 +265,24 @@ You can also use the augmented version of these ops. For example `x += y` and `x
 
 Note that Python doesn't allow overloading "and", "or", and "not" keywords.
 
-TensorFlow also doesn't allow using tensors as booleans, as it may be error prone:
-```python
-x = tf.constant(1.)
-if x:  # This will raise a TypeError error
-    ...
-```
-You can either use tf.cond(x, ...) if you want to check the value of the tensor, or use "if x is None" to check the value of the variable.
-
 Other operators that aren't supported are equal (==) and not equal (!=) operators which are overloaded in NumPy but not in TensorFlow. Use the function versions instead which are `tf.equal` and `tf.not_equal`.
 
-
-## Understanding order of execution and control dependencies
-<a name="control_deps"></a>
-As we discussed in the first item, TensorFlow doesn't immediately run the operations that are defined but rather creates corresponding nodes in a graph that can be evaluated with Session.run() method. This also enables TensorFlow to do optimizations at run time to determine the optimal order of execution and possible trimming of unused nodes. If you only have tf.Tensors in your graph you don't need to worry about dependencies but you most probably have tf.Variables too, and tf.Variables make things much more difficult. My advice to is to only use Variables if Tensors don't do the job. This might not make a lot of sense to you now, so let's start with an example.
-
-```python
-import tensorflow as tf
-
-a = tf.constant(1)
-b = tf.constant(2)
-a = a + b
-
-tf.Session().run(a)
-```
-
-Evaluating "a" will return the value 3 as expected.  Note that here we are creating 3 tensors, two constant tensors and another tensor that stores the result of the addition. Note that you can't overwrite the value of a tensor. If you want to modify it you have to create a new tensor. As we did here.
-
-***
-__TIP__: If you don't define a new graph, TensorFlow automatically creates a graph for you by default. You can use tf.get_default_graph() to get a handle to the graph. You can then inspect the graph, for example by printing all its tensors:
-```python
-print(tf.contrib.graph_editor.get_tensors(tf.get_default_graph()))
-```
-***
-
-Unlike tensors, variables can be updated. So let's see how we may use variables to do the same thing:
-```python
-a = tf.Variable(1)
-b = tf.constant(2)
-assign = tf.assign(a, a + b)
-
-sess = tf.Session()
-sess.run(tf.global_variables_initializer())
-print(sess.run(assign))
-```
-Again, we get 3 as expected. Note that tf.assign returns a tensor representing the value of the assignment.
-So far everything seemed to be fine, but let's look at a slightly more complicated example:
-
-```python
-a = tf.Variable(1)
-b = tf.constant(2)
-c = a + b
-
-assign = tf.assign(a, 5)
-
-sess = tf.Session()
-for i in range(10):
-    sess.run(tf.global_variables_initializer())
-    print(sess.run([assign, c]))
-```
-Note that the tensor c here won't have a deterministic value. This value might be 3 or 7 depending on whether addition or assignment gets executed first.
-
-You should note that the order that you define ops in your code doesn't matter to TensorFlow runtime. The only thing that matters is the control dependencies. Control dependencies for tensors are straightforward. Every time you use a tensor in an operation that op will define an implicit dependency to that tensor. But things get complicated with variables because they can take many values.
-
-When dealing with variables, you may need to explicitly define dependencies using tf.control_dependencies() as follows:
-```python
-a = tf.Variable(1)
-b = tf.constant(2)
-c = a + b
-
-with tf.control_dependencies([c]):
-    assign = tf.assign(a, 5)
-
-sess = tf.Session()
-for i in range(10):
-    sess.run(tf.global_variables_initializer())
-    print(sess.run([assign, c]))
-```
-This will make sure that the assign op will be called after the addition.
-
 ## Control flow operations: conditionals and loops
 <a name="control_flow"></a>
 When building complex models such as recurrent neural networks you may need to control the flow of operations through conditionals and loops. In this section we introduce a number of commonly used control flow ops.
 
-Let's assume you want to decide whether to multiply to or add two given tensors based on a predicate. This can be simply implemented with tf.cond which acts as a python "if" function:
+Let's assume you want to decide whether to multiply to or add two given tensors based on a predicate. This can be simply implemented with either python's built-in if statement or using tf.cond function:
 ```python
 a = tf.constant(1)
 b = tf.constant(2)
 
 p = tf.constant(True)
 
-x = tf.cond(p, lambda: a + b, lambda: a * b)
+# Alternatively:
+# x = tf.cond(p, lambda: a + b, lambda: a * b)
+x = a + b if p else a * b
 
-print(tf.Session().run(x))
+print(x.numpy())
 ```
 Since the predicate is True in this case, the output would be the result of the addition, which is 3.
 
@@ -604,27 +295,54 @@ p = tf.constant([True, False])
 
 x = tf.where(p, a + b, a * b)
 
-print(tf.Session().run(x))
+print(x.numpy())
 ```
 This will return [3, 2].
 
 Another widely used control flow operation is tf.while_loop. It allows building dynamic loops in TensorFlow that operate on sequences of variable length. Let's see how we can generate Fibonacci sequence with tf.while_loops:
+
 ```python
+@tf.function
+def fibonacci(n):
+    a = tf.constant(1)
+    b = tf.constant(1)
+
+    for i in range(2, n):
+        a, b = b, a + b
+    
+    return b
+    
 n = tf.constant(5)
+b = fibonacci(n)
+    
+print(b.numpy())
+```
+This will print 5. Note that tf.function automatically converts the given python code to use tf.while_loop so we don't need to directly interact with the TF API.
 
-def cond(i, a, b):
-    return i < n
+Now imagine we want to keep the whole series of Fibonacci sequence. We may update our body to keep a record of the history of current values:
+```python
+@tf.function
+def fibonacci(n):
+    a = tf.constant(1)
+    b = tf.constant(1)
+    c = tf.constant([1, 1])
 
-def body(i, a, b):
-    return i + 1, b, a + b
+    for i in range(2, n):
+        a, b = b, a + b
+        c = tf.concat([c, [b]], 0)
+    
+    return c
+    
+n = tf.constant(5)
+b = fibonacci(n)
+    
+print(b.numpy())
+```
 
-i, a, b = tf.while_loop(cond, body, (2, 1, 1))
+Now if you try running this, TensorFlow will complain that the shape of the the one of the loop variables is changing. 
+One way to fix this is is to use "shape invariants", but this functionality is only available when using the low-level tf.while_loop API:
 
-print(tf.Session().run(b))
-```
-This will print 5. tf.while_loops takes a condition function, and a loop body function, in addition to initial values for loop variables. These loop variables are then updated by multiple calls to the body function until the condition returns false.
 
-Now imagine we want to keep the whole series of Fibonacci sequence. We may update our body to keep a record of the history of current values:
 ```python
 n = tf.constant(5)
 
@@ -632,47 +350,47 @@ def cond(i, a, b, c):
     return i < n
 
 def body(i, a, b, c):
-    return i + 1, b, a + b, tf.concat([c, [a + b]], 0)
-
-i, a, b, c = tf.while_loop(cond, body, (2, 1, 1, tf.constant([1, 1])))
+    a, b = b, a + b
+    c = tf.concat([c, [b]], 0)
+    return i + 1, a, b, c
 
-print(tf.Session().run(c))
-```
-Now if you try running this, TensorFlow will complain that the shape of the the fourth loop variable is changing. So you must make that explicit that it's intentional:
-```
 i, a, b, c = tf.while_loop(
     cond, body, (2, 1, 1, tf.constant([1, 1])),
     shape_invariants=(tf.TensorShape([]),
                       tf.TensorShape([]),
                       tf.TensorShape([]),
                       tf.TensorShape([None])))
-```
-This is not only getting ugly, but is also somewhat inefficient. Note that we are building a lot of intermediary tensors that we don't use. TensorFlow has a better solution for this kind of growing arrays. Meet tf.TensorArray. Let's do the same thing this time with tensor arrays:
-```python
-n = tf.constant(5)
-
-c = tf.TensorArray(tf.int32, n)
-c = c.write(0, 1)
-c = c.write(1, 1)
 
-def cond(i, a, b, c):
-    return i < n
+print(c.numpy())
+```
 
-def body(i, a, b, c):
-    c = c.write(i, a + b)
-    return i + 1, b, a + b, c
+This is not only getting ugly, but is also pretty inefficient. Note that we are building a lot of intermediary tensors that we don't use. TensorFlow has a better solution for this kind of growing arrays. Meet tf.TensorArray. Let's do the same thing this time with tensor arrays:
+```python
+@tf.function
+def fibonacci(n):
+    a = tf.constant(1)
+    b = tf.constant(1)
 
-i, a, b, c = tf.while_loop(cond, body, (2, 1, 1, c))
+    c = tf.TensorArray(tf.int32, n)
+    c = c.write(0, a)
+    c = c.write(1, b)
 
-c = c.stack()
+    for i in range(2, n):
+        a, b = b, a + b
+        c = c.write(i, b)
+    
+    return c.stack()
 
-print(tf.Session().run(c))
+n = tf.constant(5)
+c = fibonacci(n)
+    
+print(c.numpy())
 ```
 TensorFlow while loops and tensor arrays are essential tools for building complex recurrent neural networks. As an exercise try implementing [beam search](https://en.wikipedia.org/wiki/Beam_search) using tf.while_loops. Can you make it more efficient with tensor arrays?
 
 ## Prototyping kernels and advanced visualization with Python ops
 <a name="python_ops"></a>
-Operation kernels in TensorFlow are entirely written in C++ for efficiency. But writing a TensorFlow kernel in C++ can be quite a pain. So, before spending hours implementing your kernel you may want to prototype something quickly, however inefficient. With tf.py_func() you can turn any piece of python code to a TensorFlow operation.
+Operation kernels in TensorFlow are entirely written in C++ for efficiency. But writing a TensorFlow kernel in C++ can be quite a pain. So, before spending hours implementing your kernel you may want to prototype something quickly, however inefficient. With tf.py_function() you can turn any piece of python code to a TensorFlow operation.
 
 For example this is how you can implement a simple ReLU nonlinearity kernel in TensorFlow as a python op:
 ```python
@@ -682,40 +400,42 @@ import uuid
 
 def relu(inputs):
     # Define the op in python
-    def _relu(x):
+    def _py_relu(x):
         return np.maximum(x, 0.)
 
     # Define the op's gradient in python
-    def _relu_grad(x):
+    def _py_relu_grad(x):
         return np.float32(x > 0)
+    
+    @tf.custom_gradient
+    def _relu(x):
+        y = tf.py_function(_py_relu, [x], tf.float32)
+        
+        def _relu_grad(dy):
+            return dy * tf.py_function(_py_relu_grad, [x], tf.float32)
+
+        return y, _relu_grad
 
-    # An adapter that defines a gradient op compatible with TensorFlow
-    def _relu_grad_op(op, grad):
-        x = op.inputs[0]
-        x_grad = grad * tf.py_func(_relu_grad, [x], tf.float32)
-        return x_grad
-
-    # Register the gradient with a unique id
-    grad_name = "MyReluGrad_" + str(uuid.uuid4())
-    tf.RegisterGradient(grad_name)(_relu_grad_op)
-
-    # Override the gradient of the custom op
-    g = tf.get_default_graph()
-    with g.gradient_override_map({"PyFunc": grad_name}):
-        output = tf.py_func(_relu, [inputs], tf.float32)
-    return output
+    return _relu(inputs)
 ```
 
-To verify that the gradients are correct you can use TensorFlow's gradient checker:
+To verify that the gradients are correct you can compare the numerical and analytical gradients and compare the vlaues.
 ```python
-x = tf.random_normal([10])
-y = relu(x * x)
+# Compute analytical gradient
+x = tf.random.normal([10], dtype=np.float32)
+with tf.GradientTape() as tape:
+    tape.watch(x)
+    y = relu(x)
+g = tape.gradient(y, x)
+print(g)
 
-with tf.Session():
-    diff = tf.test.compute_gradient_error(x, [10], y, [10])
-    print(diff)
+# Compute numerical gradient
+dx_n = 1e-5
+dy_n = relu(x + dx_n) - relu(x)
+g_n = dy_n / dx_n
+print(g_n)
 ```
-compute_gradient_error() computes the gradient numerically and returns the difference with the provided gradient. What we want is a very low difference.
+The numbers should be very close.
 
 Note that this implementation is pretty inefficient, and is only useful for prototyping, since the python code is not parallelizable and won't run on GPU. Once you verified your idea, you definitely would want to write it as a C++ kernel.
 
@@ -726,12 +446,6 @@ tf.summary.image("image", image)
 ```
 But this only visualizes the input image. In order to visualize the predictions you have to find a way to add annotations to the image which may be almost impossible with existing ops. An easier way to do this is to do the drawing in python, and wrap it in a python op:
 ```python
-import io
-import matplotlib.pyplot as plt
-import numpy as np
-import PIL
-import tensorflow as tf
-
 def visualize_labeled_images(images, labels, max_outputs=3, name="image"):
     def _visualize_image(image, label):
         # Do the actual drawing in python
@@ -761,254 +475,12 @@ def visualize_labeled_images(images, labels, max_outputs=3, name="image"):
         return np.array(outputs, dtype=np.uint8)
 
     # Run the python op.
-    figs = tf.py_func(_visualize_images, [images, labels], tf.uint8)
+    figs = tf.py_function(_visualize_images, [images, labels], tf.uint8)
     return tf.summary.image(name, figs)
 ```
 
 Note that since summaries are usually only evaluated once in a while (not per step), this implementation may be used in practice without worrying about efficiency.
 
-## Multi-GPU processing with data parallelism
-<a name="multi_gpu"></a>
- If you write your software in a language like C++ for a single cpu core, making it run on multiple GPUs in parallel would require rewriting the software from scratch. But this is not the case with TensorFlow. Because of its symbolic nature, tensorflow can hide all that complexity, making it effortless to scale your program across many CPUs and GPUs.
-
- Let's start with the simple example of adding two vectors on CPU:
- ```python
- import tensorflow as tf
-
-with tf.device(tf.DeviceSpec(device_type="CPU", device_index=0)):
-    a = tf.random_uniform([1000, 100])
-    b = tf.random_uniform([1000, 100])
-    c = a + b
-
-tf.Session().run(c)
- ```
-
-The same thing can as simply be done on GPU:
-```python
-with tf.device(tf.DeviceSpec(device_type="GPU", device_index=0)):
-    a = tf.random_uniform([1000, 100])
-    b = tf.random_uniform([1000, 100])
-    c = a + b
- ```
-
-But what if we have two GPUs and want to utilize both? To do that, we can split the data and use a separate GPU for processing each half:
-```python
-split_a = tf.split(a, 2)
-split_b = tf.split(b, 2)
-
-split_c = []
-for i in range(2):
-    with tf.device(tf.DeviceSpec(device_type="GPU", device_index=i)):
-        split_c.append(split_a[i] + split_b[i])
-
-c = tf.concat(split_c, axis=0)
- ```
-
-Let's rewrite this in a more general form so that we can replace addition with any other set of operations:
-```python
-def make_parallel(fn, num_gpus, **kwargs):
-    in_splits = {}
-    for k, v in kwargs.items():
-        in_splits[k] = tf.split(v, num_gpus)
-
-    out_split = []
-    for i in range(num_gpus):
-        with tf.device(tf.DeviceSpec(device_type="GPU", device_index=i)):
-            with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
-                out_split.append(fn(**{k : v[i] for k, v in in_splits.items()}))
-
-    return tf.concat(out_split, axis=0)
-
-
-def model(a, b):
-    return a + b
-
-c = make_parallel(model, 2, a=a, b=b)
-```
-You can replace the model with any function that takes a set of tensors as input and returns a tensor as result with the condition that both the input and output are in batch. Note that we also added a variable scope and set the reuse to true. This makes sure that we use the same variables for processing both splits. This is something that will become handy in our next example.
-
-Let's look at a slightly more practical example. We want to train a neural network on multiple GPUs. During training we not only need to compute the forward pass but also need to compute the backward pass (the gradients). But how can we parallelize the gradient computation? This turns out to be pretty easy.
-
-Recall from the first item that we wanted to fit a second degree polynomial to a set of samples. We reorganized the code a bit to have the bulk of the operations in the model function:
-```python
-import numpy as np
-import tensorflow as tf
-
-def model(x, y):
-    w = tf.get_variable("w", shape=[3, 1])
-
-    f = tf.stack([tf.square(x), x, tf.ones_like(x)], 1)
-    yhat = tf.squeeze(tf.matmul(f, w), 1)
-
-    loss = tf.square(yhat - y)
-    return loss
-
-x = tf.placeholder(tf.float32)
-y = tf.placeholder(tf.float32)
-
-loss = model(x, y)
-
-train_op = tf.train.AdamOptimizer(0.1).minimize(
-    tf.reduce_mean(loss))
-
-def generate_data():
-    x_val = np.random.uniform(-10.0, 10.0, size=100)
-    y_val = 5 * np.square(x_val) + 3
-    return x_val, y_val
-
-sess = tf.Session()
-sess.run(tf.global_variables_initializer())
-for _ in range(1000):
-    x_val, y_val = generate_data()
-    _, loss_val = sess.run([train_op, loss], {x: x_val, y: y_val})
-
-_, loss_val = sess.run([train_op, loss], {x: x_val, y: y_val})
-print(sess.run(tf.contrib.framework.get_variables_by_name("w")))
-```
-
-Now let's use make_parallel that we just wrote to parallelize this. We only need to change two lines of code from the above code:
-```python
-loss = make_parallel(model, 2, x=x, y=y)
-
-train_op = tf.train.AdamOptimizer(0.1).minimize(
-    tf.reduce_mean(loss),
-    colocate_gradients_with_ops=True)
-```
-
-The only thing that we need to change to parallelize backpropagation of gradients is to set the colocate_gradients_with_ops flag to true. This ensures that gradient ops run on the same device as the original op.
-
-## Debugging TensorFlow models
-<a name="debug"></a>
-Symbolic nature of TensorFlow makes it relatively more difficult to debug TensorFlow code compared to regular python code. Here we introduce a number of tools included with TensorFlow that make debugging much easier.
-
-Probably the most common error one can make when using TensorFlow is passing Tensors of wrong shape to ops. Many TensorFlow ops can operate on tensors of different ranks and shapes. This can be convenient when using the API, but may lead to extra headache when things go wrong.
-
-For example, consider the tf.matmul op, it can multiply two matrices:
-```python
-a = tf.random_uniform([2, 3])
-b = tf.random_uniform([3, 4])
-c = tf.matmul(a, b)  # c is a tensor of shape [2, 4]
-```
-
-But the same function also does batch matrix multiplication:
-```python
-a = tf.random_uniform([10, 2, 3])
-b = tf.random_uniform([10, 3, 4])
-tf.matmul(a, b)  # c is a tensor of shape [10, 2, 4]
-```
-
-Another example that we talked about before in the [broadcasting](#broadcast) section is add operation which supports broadcasting:
-```python
-a = tf.constant([[1.], [2.]])
-b = tf.constant([1., 2.])
-c = a + b  # c is a tensor of shape [2, 2]
-```
-
-### Validating your tensors with tf.assert* ops
-
-One way to reduce the chance of unwanted behavior is to explicitly verify the rank or shape of intermediate tensors with tf.assert* ops.
-```python
-a = tf.constant([[1.], [2.]])
-b = tf.constant([1., 2.])
-check_a = tf.assert_rank(a, 1)  # This will raise an InvalidArgumentError exception
-check_b = tf.assert_rank(b, 1)
-with tf.control_dependencies([check_a, check_b]):
-    c = a + b  # c is a tensor of shape [2, 2]
-```
-Remember that assertion nodes like other operations are part of the graph and if not evaluated would get pruned during Session.run(). So make sure to create explicit dependencies to assertion ops, to force TensorFlow to execute them.
-
-You can also use assertions to validate the value of tensors at runtime:
-```python
-check_pos = tf.assert_positive(a)
-```
-See the official docs for a [full list of assertion ops](https://www.tensorflow.org/api_guides/python/check_ops).
-
-### Logging tensor values with tf.Print
-
-Another useful built-in function for debugging is tf.Print which logs the given tensors to the standard error:
-
-```python
-input_copy = tf.Print(input, tensors_to_print_list)
-```
-Note that tf.Print returns a copy of its first argument as output. One way to force tf.Print to run is to pass its output to another op that gets executed. For example if we want to print the value of tensors a and b before adding them we could do something like this:
-```python
-a = ...
-b = ...
-a = tf.Print(a, [a, b])
-c = a + b
-```
-
-Alternatively we could manually define a control dependency.
-
-### Check your gradients with tf.compute_gradient_error
-
-__Not__ all the operations in TensorFlow come with gradients, and it's easy to unintentionally build graphs for which TensorFlow can not compute the gradients.
-
-Let's look at an example:
-```python
-import tensorflow as tf
-
-def non_differentiable_softmax_entropy(logits):
-    probs = tf.nn.softmax(logits)
-    return tf.nn.softmax_cross_entropy_with_logits(labels=probs, logits=logits)
-
-w = tf.get_variable("w", shape=[5])
-y = -non_differentiable_softmax_entropy(w)
-
-opt = tf.train.AdamOptimizer()
-train_op = opt.minimize(y)
-
-sess = tf.Session()
-sess.run(tf.global_variables_initializer())
-for i in range(10000):
-    sess.run(train_op)
-
-print(sess.run(tf.nn.softmax(w)))
-```
-We are using tf.nn.softmax_cross_entropy_with_logits to define entropy over a categorical distribution. We then use Adam optimizer to find the weights with maximum entropy. If you have passed a course on information theory, you would know that uniform distribution contains maximum entropy. So you would expect for the result to be [0.2, 0.2, 0.2, 0.2, 0.2]. But if you run this you may get unexpected results like this:
-```
-[ 0.34081486  0.24287023  0.23465775  0.08935683  0.09230034]
-```
-It turns out tf.nn.softmax_cross_entropy_with_logits has undefined gradients with respect to labels! But how may we spot this if we didn't know?
-
-Fortunately for us TensorFlow comes with a numerical differentiator that can be used to find symbolic gradient errors. Let's see how we can use it:
-
-```python
-with tf.Session():
-    diff = tf.test.compute_gradient_error(w, [5], y, [])
-    print(diff)
-```
-If you run this, you would see that the difference between the numerical and symbolic gradients are pretty high (0.06 - 0.1 in my tries).
-
-Now let's fix our function with a differentiable version of the entropy and check again:
-```python
-import tensorflow as tf
-import numpy as np
-
-def softmax_entropy(logits, dim=-1):
-    plogp = tf.nn.softmax(logits, dim) * tf.nn.log_softmax(logits, dim)
-    return -tf.reduce_sum(plogp, dim)
-
-w = tf.get_variable("w", shape=[5])
-y = -softmax_entropy(w)
-
-print(w.get_shape())
-print(y.get_shape())
-
-with tf.Session() as sess:
-    diff = tf.test.compute_gradient_error(w, [5], y, [])
-    print(diff)
-```
-The difference should be ~0.0001 which looks much better.
-
-Now if you run the optimizer again with the correct version you can see the final weights would be:
-```
-[ 0.2  0.2  0.2  0.2  0.2]
-```
-which are exactly what we wanted.
-
-[TensorFlow summaries](https://www.tensorflow.org/api_guides/python/summary), and [tfdbg (TensorFlow Debugger)](https://www.tensorflow.org/api_guides/python/tfdbg) are other tools that can be used for debugging. Please refer to the official docs to learn more.
-
 ## Numerical stability in TensorFlow
 <a name="stable"></a>
 When using any numerical computation library such as NumPy or TensorFlow, it's important to note that writing mathematically correct code doesn't necessarily lead to correct results. You also need to make sure that the computations are stable.
@@ -1031,7 +503,7 @@ The reason for the incorrect result is that y is simply too small for float32 ty
 y = np.float32(1e39)  # y would be stored as inf
 z = x * y / y
 
-print(z)  # prints 0
+print(z)  # prints nan
 ```
 
 The smallest positive value that float32 type can represent is 1.4013e-45 and anything below that would be stored as zero. Also, any number beyond 3.40282e+38, would be stored as inf.
@@ -1051,7 +523,7 @@ def unstable_softmax(logits):
     exp = tf.exp(logits)
     return exp / tf.reduce_sum(exp)
 
-tf.Session().run(unstable_softmax([1000., 0.]))  # prints [ nan, 0.]
+print(unstable_softmax([1000., 0.]).numpy())  # prints [ nan, 0.]
 ```
 Note that computing the exponential of logits for relatively small numbers results to gigantic results that are out of float32 range. The largest valid logit for our naive softmax implementation is ln(3.40282e+38) = 88.7, anything beyond that leads to a nan outcome.
 
@@ -1064,14 +536,14 @@ def softmax(logits):
     exp = tf.exp(logits - tf.reduce_max(logits))
     return exp / tf.reduce_sum(exp)
 
-tf.Session().run(softmax([1000., 0.]))  # prints [ 1., 0.]
+print(softmax([1000., 0.]).numpy())  # prints [ 1., 0.]
 ```
 
 Let's look at a more complicated case. Consider we have a classification problem. We use the softmax function to produce probabilities from our logits. We then define our loss function to be the cross entropy between our predictions and the labels. Recall that cross entropy for a categorical distribution can be simply defined as xe(p, q) = -&sum; p_i log(q_i). So a naive implementation of the cross entropy would look like this:
 
 ```python
 def unstable_softmax_cross_entropy(labels, logits):
-    logits = tf.log(softmax(logits))
+    logits = tf.math.log(softmax(logits))
     return -tf.reduce_sum(labels * logits)
 
 labels = tf.constant([0.5, 0.5])
@@ -1079,7 +551,7 @@ logits = tf.constant([1000., 0.])
 
 xe = unstable_softmax_cross_entropy(labels, logits)
 
-print(tf.Session().run(xe))  # prints inf
+print(xe.numpy())  # prints inf
 ```
 
 Note that in this implementation as the softmax output approaches zero, the log's output approaches infinity which causes instability in our computation. We can rewrite this by expanding the softmax and doing some simplifications:
@@ -1095,390 +567,19 @@ logits = tf.constant([1000., 0.])
 
 xe = softmax_cross_entropy(labels, logits)
 
-print(tf.Session().run(xe))  # prints 500.0
+print(xe.numpy())  # prints 500.0
 ```
 
 We can also verify that the gradients are also computed correctly:
 ```python
-g = tf.gradients(xe, logits)
-print(tf.Session().run(g))  # prints [0.5, -0.5]
+with tf.GradientTape() as tape:
+    tape.watch(logits)
+    xe = softmax_cross_entropy(labels, logits)
+    
+g = tape.gradient(xe, logits)
+print(g.numpy())  # prints [0.5, -0.5]
 ```
 which is correct.
 
 Let me remind again that extra care must be taken when doing gradient descent to make sure that the range of your functions as well as the gradients for each layer are within a valid range. Exponential and logarithmic functions when used naively are especially problematic because they can map small numbers to enormous ones and the other way around.
 
-## Building a neural network training framework with learn API
-<a name="tf_learn"></a>
-For simplicity, in most of the examples here we manually create sessions and we don't care about saving and loading checkpoints but this is not how we usually do things in practice. You most probably want to use the learn API to take care of session management and logging. We provide a simple but practical [framework](https://github.com/vahidk/TensorflowFramework/tree/master) for training neural networks using TensorFlow. In this item we explain how this framework works.
-
-When experimenting with neural network models you usually have a training/test split. You want to train your model on the training set, and once in a while evaluate it on test set and compute some metrics. You also need to store the model parameters as a checkpoint, and ideally you want to be able to stop and resume training. TensorFlow's learn API is designed to make this job easier, letting us focus on developing the actual model.
-
-The most basic way of using tf.learn API is to use tf.Estimator object directly. You need to define a model function that defines a loss function, a train op, one or a set of predictions, and optionally a set of metric ops for evaluation:
-```python
-import tensorflow as tf
-
-def model_fn(features, labels, mode, params):
-    predictions = ...
-    loss = ...
-    train_op = ...
-    metric_ops = ...
-    return tf.estimator.EstimatorSpec(
-        mode=mode,
-        predictions=predictions,
-        loss=loss,
-        train_op=train_op,
-        eval_metric_ops=metric_ops)
-
-params = ...
-run_config = tf.contrib.learn.RunConfig(model_dir=FLAGS.output_dir)
-estimator = tf.estimator.Estimator(
-    model_fn=model_fn, config=run_config, params=params)
-```
-
-To train the model you would then simply call Estimator.train() function while providing an input function to read the data:
-```python
-def input_fn():
-    features = ...
-    labels = ...
-    return features, labels
-
-estimator.train(input_fn=input_fn, max_steps=...)
-```
-
-and to evaluate the model, simply call Estimator.evaluate():
-```
-estimator.evaluate(input_fn=input_fn)
-```
-
-Estimator object might be good enough for simple cases, but TensorFlow provides a higher level object called Experiment which provides some additional useful functionality. Creating an experiment object is very easy:
-
-```python
-experiment = tf.contrib.learn.Experiment(
-    estimator=estimator,
-    train_input_fn=train_input_fn,
-    eval_input_fn=eval_input_fn)
-```
-
-Now we can call train_and_evaluate function to compute the metrics while training:
-```
-experiment.train_and_evaluate()
-```
-
-An even higher level way of running experiments is by using learn_runner.run() function. Here's how our main function looks like in the provided framework:
-```python
-import tensorflow as tf
-
-tf.flags.DEFINE_string("output_dir", "", "Optional output dir.")
-tf.flags.DEFINE_string("schedule", "train_and_evaluate", "Schedule.")
-tf.flags.DEFINE_string("hparams", "", "Hyper parameters.")
-
-FLAGS = tf.flags.FLAGS
-
-def experiment_fn(run_config, hparams):
-  estimator = tf.estimator.Estimator(
-    model_fn=make_model_fn(),
-    config=run_config,
-    params=hparams)
-  return tf.contrib.learn.Experiment(
-    estimator=estimator,
-    train_input_fn=make_input_fn(tf.estimator.ModeKeys.TRAIN, hparams),
-    eval_input_fn=make_input_fn(tf.estimator.ModeKeys.EVAL, hparams))
-
-def main(unused_argv):
-  run_config = tf.contrib.learn.RunConfig(model_dir=FLAGS.output_dir)
-  hparams = tf.contrib.training.HParams()
-  hparams.parse(FLAGS.hparams)
-
-  estimator = tf.contrib.learn.learn_runner.run(
-    experiment_fn=experiment_fn,
-    run_config=run_config,
-    schedule=FLAGS.schedule,
-    hparams=hparams)
-
-if __name__ == "__main__":
-  tf.app.run()
-```
-The schedule flag decides which member function of the Experiment object gets called. So, if you for example set schedule to "train_and_evaluate", experiment.train_and_evaluate() would be called.
-
-The input function returns two tensors (or dictionaries of tensors) providing the features and labels to be passed to the model:
-```python
-def input_fn():
-    features = ...
-    labels = ...
-    return features, labels
-```
-See [mnist.py](https://github.com/vahidk/TensorflowFramework/blob/master/dataset/mnist.py) for an example of how to read your data with the dataset API. To learn about various ways of reading your data in TensorFlow refer to [this item](#data).
-
-The framework also comes with a simple convolutional network classifier in [alexnet.py](https://github.com/vahidk/TensorflowFramework/blob/master/model/alexnet.py) that includes an example model.
-
-And that's it! This is all you need to get started with TensorFlow learn API. I recommend to have a look at the framework [source code](https://github.com/vahidk/TensorFlowFramework) and see the official python API to learn more about the learn API.
-
-# Part II: TensorFlow Cookbook
-<a name="cookbook"></a>
-This section includes implementation of a set of common operations in TensorFlow.
-
-### Get shape <a name="get_shape"></a>
-```python
-def get_shape(tensor):
-  """Returns static shape if available and dynamic shape otherwise."""
-  static_shape = tensor.shape.as_list()
-  dynamic_shape = tf.unstack(tf.shape(tensor))
-  dims = [s[1] if s[0] is None else s[0]
-          for s in zip(static_shape, dynamic_shape)]
-  return dims
-```
-
-### Batch Gather <a name="batch_gather"></a>
-
-```python
-def batch_gather(tensor, indices):
-  """Gather in batch from a tensor of arbitrary size.
-
-  In pseudocode this module will produce the following:
-  output[i] = tf.gather(tensor[i], indices[i])
-
-  Args:
-    tensor: Tensor of arbitrary size.
-    indices: Vector of indices.
-  Returns:
-    output: A tensor of gathered values.
-  """
-  shape = get_shape(tensor)
-  flat_first = tf.reshape(tensor, [shape[0] * shape[1]] + shape[2:])
-  indices = tf.convert_to_tensor(indices)
-  offset_shape = [shape[0]] + [1] * (indices.shape.ndims - 1)
-  offset = tf.reshape(tf.range(shape[0]) * shape[1], offset_shape)
-  output = tf.gather(flat_first, indices + offset)
-  return output
-```
-
-### Beam Search <a name="beam_search"></a>
-```python
-import tensorflow as tf
-
-def rnn_beam_search(update_fn, initial_state, sequence_length, beam_width,
-                    begin_token_id, end_token_id, name="rnn"):
-  """Beam-search decoder for recurrent models.
-
-  Args:
-    update_fn: Function to compute the next state and logits given the current
-               state and ids.
-    initial_state: Recurrent model states.
-    sequence_length: Length of the generated sequence.
-    beam_width: Beam width.
-    begin_token_id: Begin token id.
-    end_token_id: End token id.
-    name: Scope of the variables.
-  Returns:
-    ids: Output indices.
-    logprobs: Output log probabilities probabilities.
-  """
-  batch_size = initial_state.shape.as_list()[0]
-
-  state = tf.tile(tf.expand_dims(initial_state, axis=1), [1, beam_width, 1])
-
-  sel_sum_logprobs = tf.log([[1.] + [0.] * (beam_width - 1)])
-
-  ids = tf.tile([[begin_token_id]], [batch_size, beam_width])
-  sel_ids = tf.zeros([batch_size, beam_width, 0], dtype=ids.dtype)
-
-  mask = tf.ones([batch_size, beam_width], dtype=tf.float32)
-
-  for i in range(sequence_length):
-    with tf.variable_scope(name, reuse=True if i > 0 else None):
-
-      state, logits = update_fn(state, ids)
-      logits = tf.nn.log_softmax(logits)
-
-      sum_logprobs = (
-          tf.expand_dims(sel_sum_logprobs, axis=2) +
-          (logits * tf.expand_dims(mask, axis=2)))
-
-      num_classes = logits.shape.as_list()[-1]
-
-      sel_sum_logprobs, indices = tf.nn.top_k(
-          tf.reshape(sum_logprobs, [batch_size, num_classes * beam_width]),
-          k=beam_width)
-
-      ids = indices % num_classes
-
-      beam_ids = indices // num_classes
-
-      state = batch_gather(state, beam_ids)
-
-      sel_ids = tf.concat([batch_gather(sel_ids, beam_ids),
-                           tf.expand_dims(ids, axis=2)], axis=2)
-
-      mask = (batch_gather(mask, beam_ids) *
-              tf.to_float(tf.not_equal(ids, end_token_id)))
-
-  return sel_ids, sel_sum_logprobs
-```
-
-## Merge <a name="merge"></a>
-
-```python
-import tensorflow as tf
-
-def merge(tensors, units, activation=tf.nn.relu, name=None, **kwargs):
-  """Merge features with broadcasting support.
-
-  This operation concatenates multiple features of varying length and applies
-  non-linear transformation to the outcome.
-
-  Example:
-    a = tf.zeros([m, 1, d1])
-    b = tf.zeros([1, n, d2])
-    c = merge([a, b], d3)  # shape of c would be [m, n, d3].
-
-  Args:
-    tensors: A list of tensor with the same rank.
-    units: Number of units in the projection function.
-  """
-  with tf.variable_scope(name, default_name="merge"):
-    # Apply linear projection to input tensors.
-    projs = []
-    for i, tensor in enumerate(tensors):
-      proj = tf.layers.dense(
-          tensor, units,
-          name="proj_%d" % i,
-          **kwargs)
-      projs.append(proj)
-
-    # Compute sum of tensors.
-    result = projs.pop()
-    for proj in projs:
-      result = result + proj
-
-    # Apply nonlinearity.
-    if activation:
-      result = activation(result)
-  return result
-```
-
-## Entropy <a name="entropy"></a>
-
-```python
-import tensorflow as tf
-
-def softmax_entropy(logits, dim=-1):
-  """Compute entropy over specified dimensions."""
-  plogp = tf.nn.softmax(logits, dim) * tf.nn.log_softmax(logits, dim)
-  return -tf.reduce_sum(plogp, dim)
-```
-
-## KL-Divergence <a name="kld"></a>
-```python
-def gaussian_kl(q, p=(0., 0.)):
-  """Computes KL divergence between two isotropic Gaussian distributions.
-
-  To ensure numerical stability, this op uses mu, log(sigma^2) to represent
-  the distribution. If q is not provided, it's assumed to be unit Gaussian.
-
-  Args:
-    q: A tuple (mu, log(sigma^2)) representing a multi-variatie Gaussian.
-    p: A tuple (mu, log(sigma^2)) representing a multi-variatie Gaussian.
-  Returns:
-    A tensor representing KL(q, p).
-  """
-  mu1, log_sigma1_sq = q
-  mu2, log_sigma2_sq = p
-  return tf.reduce_sum(
-    0.5 * (log_sigma2_sq - log_sigma1_sq +
-           tf.exp(log_sigma1_sq - log_sigma2_sq) +
-           tf.square(mu1 - mu2) / tf.exp(log_sigma2_sq) -
-           1), axis=-1)
-```
-
-## Make parallel <a name="make_parallel"></a>
-
-```python
-def make_parallel(fn, num_gpus, **kwargs):
-  """Parallelize given model on multiple gpu devices.
-
-  Args:
-    fn: Arbitrary function that takes a set of input tensors and outputs a
-        single tensor. First dimension of inputs and output tensor are assumed
-        to be batch dimension.
-    num_gpus: Number of GPU devices.
-    **kwargs: Keyword arguments to be passed to the model.
-  Returns:
-    A tensor corresponding to the model output.
-  """
-  in_splits = {}
-  for k, v in kwargs.items():
-    in_splits[k] = tf.split(v, num_gpus)
-
-  out_split = []
-  for i in range(num_gpus):
-    with tf.device(tf.DeviceSpec(device_type="GPU", device_index=i)):
-      with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
-        out_split.append(fn(**{k : v[i] for k, v in in_splits.items()}))
-
-  return tf.concat(out_split, axis=0)
-```
-
-## Leaky relu <a name="leaky_relu"></a>
-```python
-def leaky_relu(tensor, alpha=0.1):
-    """Computes the leaky rectified linear activation."""
-    return tf.maximum(tensor, alpha * tensor)
-```
-
-## Batch normalization <a name="batch_norm"></a>
-```python
-def batch_normalization(tensor, training=False, epsilon=0.001, momentum=0.9, 
-                        fused_batch_norm=False, name=None):
-  """Performs batch normalization on given 4-D tensor.
-  
-  The features are assumed to be in NHWC format. Noe that you need to 
-  run UPDATE_OPS in order for this function to perform correctly, e.g.:
-
-  with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
-    train_op = optimizer.minimize(loss)
-
-  Based on: https://arxiv.org/abs/1502.03167
-  """
-  with tf.variable_scope(name, default_name="batch_normalization"):
-    channels = tensor.shape.as_list()[-1]
-    axes = list(range(tensor.shape.ndims - 1))
-
-    beta = tf.get_variable(
-      'beta', channels, initializer=tf.zeros_initializer())
-    gamma = tf.get_variable(
-      'gamma', channels, initializer=tf.ones_initializer())
-
-    avg_mean = tf.get_variable(
-      "avg_mean", channels, initializer=tf.zeros_initializer(),
-      trainable=False)
-    avg_variance = tf.get_variable(
-      "avg_variance", channels, initializer=tf.ones_initializer(),
-      trainable=False)
-
-    if training:
-      if fused_batch_norm:
-        mean, variance = None, None
-      else:
-        mean, variance = tf.nn.moments(tensor, axes=axes)
-    else:
-      mean, variance = avg_mean, avg_variance
-   
-    if fused_batch_norm:
-      tensor, mean, variance = tf.nn.fused_batch_norm(
-        tensor, scale=gamma, offset=beta, mean=mean, variance=variance, 
-        epsilon=epsilon, is_training=training)
-    else:
-      tensor = tf.nn.batch_normalization(
-        tensor, mean, variance, beta, gamma, epsilon)
-
-    if training:
-      update_mean = tf.assign(
-        avg_mean, avg_mean * momentum + mean * (1.0 - momentum))
-      update_variance = tf.assign(
-        avg_variance, avg_variance * momentum + variance * (1.0 - momentum))
-
-      tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_mean)
-      tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_variance)
-
-  return tensor
-```

From 178b0146917f5e1e101638f78346162567d406d7 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Sat, 16 Mar 2019 16:42:24 -0700
Subject: [PATCH 71/78] Bug fix.

---
 README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/README.md b/README.md
index 442ee9e..d98a994 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,6 @@ pip install tensorflow==2.0.0-alpha0
 ```
 
 _We aim to gradually expand this series by adding new articles and keep the content up to date with the latest releases of TensorFlow API. If you have suggestions on how to improve this series or find the explanations ambiguous, feel free to create an issue, send patches, or reach out by email._
-```
 
 # Part I: TensorFlow 2.0 Fundamentals
 <a name="fundamentals"></a>

From 5b35bbe7ba417544c55ea18828dfdbee181d4ab9 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Sat, 16 Mar 2019 16:43:30 -0700
Subject: [PATCH 72/78] Enable syntax highlighting.

---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index d98a994..619ce5c 100644
--- a/README.md
+++ b/README.md
@@ -50,7 +50,7 @@ print(z)
 ```
 Similar to NumPy TensorFlow 2 also immediately performs the computation and produces the result. The only difference is that TensorFlow uses tf.Tensor type to store the results which can be easily converted to NumPy, by calling tf.Tensor.numpy() member function: 
 
-```
+```python
 print(z.numpy())
 ```
 
@@ -105,14 +105,14 @@ for _ in range(1000):
 print(w.numpy())
 ```
 By running this piece of code you should see a result close to this:
-```
+```python
 [4.9924135, 0.00040895029, 3.4504161]
 ```
 Which is a relatively close approximation to our parameters.
 
 Note that in the above code we are running Tensorflow in imperative mode (i.e. operations get instantly executed), which is not very efficient. TensorFlow 2.0 can also turn a given piece of python code into a graph which can then optimized and efficiently parallelized on GPUs and TPUs. To get all those benefits we simply need to decorate the train_step function with tf.function decorator:
 
-```
+```python
 @tf.function
 def train_step():
     x, y = generate_data()

From 933a7dd04133f45c63baedb3c6ba1455d5d749bc Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Sat, 16 Mar 2019 16:47:45 -0700
Subject: [PATCH 73/78] Added note for TensorFlow 2.0 version.

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 3a71e75..b7aa109 100644
--- a/README.md
+++ b/README.md
@@ -29,6 +29,8 @@ Table of Contents
 9. [Batch normalization](#batch_norm)
 ---
 
+_We updated the guide to follow the newly released TensorFlow 2.x API. Click [here for v1 branch](https://github.com/vahidk/EffectiveTensorflow/tree/v1), and [here for v2 branch](https://github.com/vahidk/EffectiveTensorflow/tree/v2)._
+
 _We aim to gradually expand this series by adding new articles and keep the content up to date with the latest releases of TensorFlow API. If you have suggestions on how to improve this series or find the explanations ambiguous, feel free to create an issue, send patches, or reach out by email._
 
  _We encourage you to also check out the accompanied neural network training framework built on top of tf.contrib.learn API. The [framework](https://github.com/vahidk/TensorflowFramework) can be downloaded separately:_

From 2d6aeb262cf055bee80143da2d7ab3a44bd1240d Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Sat, 16 Mar 2019 16:56:15 -0700
Subject: [PATCH 74/78] Bug fix.

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 619ce5c..26fb625 100644
--- a/README.md
+++ b/README.md
@@ -236,7 +236,7 @@ This took 0.01 seconds. Of course, the right way to do this simple reduction is
 ```python
 z = tf.reduce_sum(x, axis=0)
 ```
-This took 0.0001 seconds, which is 300x faster than the original implementation.
+This took 0.0001 seconds, which is 100x faster than the original implementation.
 
 TensorFlow also overloads a range of arithmetic and logical operators:
 ```python

From d998bef4512a28b22e42477b6d9bfdeacab6b0e6 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Sat, 16 Mar 2019 17:41:51 -0700
Subject: [PATCH 75/78] Remove reference to depreacted API.

---
 README.md | 63 ++++---------------------------------------------------
 1 file changed, 4 insertions(+), 59 deletions(-)

diff --git a/README.md b/README.md
index b7aa109..74ea689 100644
--- a/README.md
+++ b/README.md
@@ -33,7 +33,7 @@ _We updated the guide to follow the newly released TensorFlow 2.x API. Click [he
 
 _We aim to gradually expand this series by adding new articles and keep the content up to date with the latest releases of TensorFlow API. If you have suggestions on how to improve this series or find the explanations ambiguous, feel free to create an issue, send patches, or reach out by email._
 
- _We encourage you to also check out the accompanied neural network training framework built on top of tf.contrib.learn API. The [framework](https://github.com/vahidk/TensorflowFramework) can be downloaded separately:_
+ _We encourage you to also check out the accompanied neural network training framework built on top of tf.estimator API. The [framework](https://github.com/vahidk/TensorflowFramework) can be downloaded separately:_
 ```
 git clone https://github.com/vahidk/TensorflowFramework.git
 ```
@@ -421,7 +421,7 @@ data = dataset.make_one_shot_iterator().get_next()
 
 If you need to read your data from file, it may be more efficient to write it in TFrecord format and use TFRecordDataset to read it:
 ```python
-dataset = tf.contrib.data.TFRecordDataset(path_to_data)
+dataset = tf.data.TFRecordDataset(path_to_data)
 ```
 See the [official docs](https://www.tensorflow.org/api_guides/python/reading_data#Reading_from_files) for an example of how to write your dataset in TFrecord format.
 
@@ -1132,76 +1132,21 @@ def model_fn(features, labels, mode, params):
         eval_metric_ops=metric_ops)
 
 params = ...
-run_config = tf.contrib.learn.RunConfig(model_dir=FLAGS.output_dir)
+run_config = tf.estimator.RunConfig(model_dir=FLAGS.output_dir)
 estimator = tf.estimator.Estimator(
     model_fn=model_fn, config=run_config, params=params)
 ```
 
 To train the model you would then simply call Estimator.train() function while providing an input function to read the data:
 ```python
-def input_fn():
-    features = ...
-    labels = ...
-    return features, labels
-
 estimator.train(input_fn=input_fn, max_steps=...)
 ```
 
 and to evaluate the model, simply call Estimator.evaluate():
-```
-estimator.evaluate(input_fn=input_fn)
-```
-
-Estimator object might be good enough for simple cases, but TensorFlow provides a higher level object called Experiment which provides some additional useful functionality. Creating an experiment object is very easy:
-
 ```python
-experiment = tf.contrib.learn.Experiment(
-    estimator=estimator,
-    train_input_fn=train_input_fn,
-    eval_input_fn=eval_input_fn)
-```
-
-Now we can call train_and_evaluate function to compute the metrics while training:
-```
-experiment.train_and_evaluate()
+estimator.evaluate(input_fn=input_fn)
 ```
 
-An even higher level way of running experiments is by using learn_runner.run() function. Here's how our main function looks like in the provided framework:
-```python
-import tensorflow as tf
-
-tf.flags.DEFINE_string("output_dir", "", "Optional output dir.")
-tf.flags.DEFINE_string("schedule", "train_and_evaluate", "Schedule.")
-tf.flags.DEFINE_string("hparams", "", "Hyper parameters.")
-
-FLAGS = tf.flags.FLAGS
-
-def experiment_fn(run_config, hparams):
-  estimator = tf.estimator.Estimator(
-    model_fn=make_model_fn(),
-    config=run_config,
-    params=hparams)
-  return tf.contrib.learn.Experiment(
-    estimator=estimator,
-    train_input_fn=make_input_fn(tf.estimator.ModeKeys.TRAIN, hparams),
-    eval_input_fn=make_input_fn(tf.estimator.ModeKeys.EVAL, hparams))
-
-def main(unused_argv):
-  run_config = tf.contrib.learn.RunConfig(model_dir=FLAGS.output_dir)
-  hparams = tf.contrib.training.HParams()
-  hparams.parse(FLAGS.hparams)
-
-  estimator = tf.contrib.learn.learn_runner.run(
-    experiment_fn=experiment_fn,
-    run_config=run_config,
-    schedule=FLAGS.schedule,
-    hparams=hparams)
-
-if __name__ == "__main__":
-  tf.app.run()
-```
-The schedule flag decides which member function of the Experiment object gets called. So, if you for example set schedule to "train_and_evaluate", experiment.train_and_evaluate() would be called.
-
 The input function returns two tensors (or dictionaries of tensors) providing the features and labels to be passed to the model:
 ```python
 def input_fn():

From 0c2b285e8686b20cb940927fd4d87427a9bbcd07 Mon Sep 17 00:00:00 2001
From: Windaway <coldswamp@gmail.com>
Date: Mon, 22 Apr 2019 22:31:42 +0800
Subject: [PATCH 76/78] Create Readme(chs).md

Add Chinese edition.
---
 README(chs).md | 583 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 583 insertions(+)
 create mode 100644 README(chs).md

diff --git a/README(chs).md b/README(chs).md
new file mode 100644
index 0000000..c9bcfce
--- /dev/null
+++ b/README(chs).md
@@ -0,0 +1,583 @@
+# Effective TensorFlow 2 中文版
+
+目录
+=================
+## Part I: TensorFlow 2 基础
+1.  [TensorFlow 2 基础](#basics)
+2.  [广播](#broadcast)
+3.  [利用重载OPs](#overloaded_ops)
+4.  [控制流操作: 条件与循环](#control_flow)
+5.  [原型核和使用Python OPs可视化](#python_ops)
+6.  [TensorFlow中的数值稳定性](#stable)
+---
+
+_我们针对新发布的 TensorFlow 2.x API 更新了教程. 如果你想看 TensorFlow 1.x 的教程请移步 [v1 branch](https://github.com/vahidk/EffectiveTensorflow/tree/v1)._
+
+_安装 TensorFlow 2.0 (alpha) 请参照 [官方网站](https://www.tensorflow.org/install/pip):_
+```
+pip install tensorflow==2.0.0-alpha0
+```
+
+_我们致力于逐步扩展新的文章，并保持与Tensorflow API更新同步。如果你有任何建议请提出来。_
+
+# Part I: TensorFlow 2.0 基础
+<a name="fundamentals"></a>
+
+## TensorFlow 基础
+<a name="basics"></a>
+重新设计的TensorFlow 2带来了更方便使用的API。如果你熟悉numpy，你用Tensorflow 2会很爽。不像完全静态图符号计算的Tensorflow 1，TF2隐藏静态图那部分，变得像个numpy。值得注意的是，虽然交互变化了，但是TF2仍然有静态图抽象的优势，TF1能做的TF2都能做。 
+
+让我们从一个简单的例子开始吧，我们那俩随机矩阵乘起来。我们先看看Numpy怎么做这事先。
+```python
+import numpy as np
+
+x = np.random.normal(size=[10, 10])
+y = np.random.normal(size=[10, 10])
+z = np.dot(x, y)
+
+print(z)
+```
+
+现在看看用TensorFlow 2.0怎么办:
+```python
+import tensorflow as tf
+
+x = tf.random.normal([10, 10])
+y = tf.random.normal([10, 10])
+z = tf.matmul(x, y)
+
+print(z)
+```
+与NumPy差不多，TensorFlow 2也马上执行并返回结果。唯一的不同是TensorFlow用tf.Tensor类型存储结果，当然这种数据可以方便的转换为NumPy数据，调用tf.Tensor.numpy()成员函数就行: 
+
+```python
+print(z.numpy())
+```
+
+为了理解符号计算的强大，让我们看看另一个例子。假设我们有从一个曲线(举个栗子 f(x) = 5x^2 + 3)上采集的样本点，并且我们要基于这些样本估计f(x)。我们建立了一个参数化函数g(x, w) = w0 x^2 + w1 x + w2，这个函数有输入x和隐藏参数w，我们的目标就是找出隐藏参数让g(x, w) ≈ f(x)。这个可以通过最小化以下的loss函数:L(w) = &sum; (f(x) - g(x, w))^2。虽然这个问题有解析解，但是我们更乐意用一个可以应用到任意可微分方程上的通用方法，嗯，SGD。我们仅需要计算L(w) 在不同样本点上关于w的平均提督，然后往梯度反方向调整就行。
+
+
+那么，怎么用TensorFlow实现呢:
+
+```python
+import numpy as np
+import tensorflow as tf
+
+# 假设我们知道我们期望的多项式方程是二阶方程，
+# 我们分配一个长3的向量并用随机噪声初始化。
+
+w = tf.Variable(tf.random.normal([3, 1]))
+
+# 用Adam优化器优化，初始学习率0.1
+opt = tf.optimizers.Adam(0.1)
+
+def model(x):
+    # 定义yhat为y的估计
+    f = tf.stack([tf.square(x), x, tf.ones_like(x)], 1)
+    yhat = tf.squeeze(tf.matmul(f, w), 1)
+    return yhat
+
+def compute_loss(y, yhat):
+    # loss用y和yhat之间的L2距离估计。
+    # 对w加了正则项保证w较小。
+    loss = tf.nn.l2_loss(yhat - y) + 0.1 * tf.nn.l2_loss(w)
+    return loss
+
+def generate_data():
+    # 根据真实函数生成一些训练样本
+    x = np.random.uniform(-10.0, 10.0, size=100).astype(np.float32)
+    y = 5 * np.square(x) + 3
+    return x, y
+
+def train_step():
+    x, y = generate_data()
+
+    def _loss_fn():
+        yhat = model(x)
+        loss = compute_loss(y, yhat)
+        return loss
+    
+    opt.minimize(_loss_fn, [w])
+
+for _ in range(1000):
+    train_step()
+
+print(w.numpy())
+```
+运行这段代码你会看到近似下面这个的结果:
+```python
+[4.9924135, 0.00040895029, 3.4504161]
+```
+这和我们的参数很接近了.
+
+注意，上面的代码是交互式执行 (i.e. eager模式下ops直接执行)，这种操作并不高效. TensorFlow 2.0也提供静态图执行的法子，方便在GPUs和TPUs上快速并行执行。开启也很简单对于训练阶段函数用tf.function修饰就OK:
+
+```python
+@tf.function
+def train_step():
+    x, y = generate_data()
+
+    def _loss_fn():
+        yhat = model(x)
+        loss = compute_loss(y, yhat)
+        return loss
+    
+    opt.minimize(_loss_fn, [w])
+```
+
+tf.function多牛逼，他也可以吧while、for之类函数转换进去。我们后面细说。
+
+这些只是TF能做的冰山一角。很多有几百万参数的复杂神经网络可以在TF用几行代码搞定。TF也可以在不同设备，不同线程上处理。
+
+## 广播操作
+<a name="broadcast"></a>
+TF支持广播元素操作。一般来说，如果你想执行加法或者乘法之类操作，你得确保相加或者相乘元素形状匹配，比如你不能把形状为[3, 2]的tensor加到形状为[3, 4]的tensor上。但是有个特例，就是当你把一个tensor和另一有维度长度是1的tensor是去加去乘，TF会把银行的把那个维扩展，让两个tensor可操作。（去看numpy的广播机制吧）
+
+```python
+import tensorflow as tf
+
+a = tf.constant([[1., 2.], [3., 4.]])
+b = tf.constant([[1.], [2.]])
+# c = a + tf.tile(b, [1, 2])
+c = a + b
+
+print(c)
+```
+
+广播可以让我们代码更短更高效。我们可以把不同长度的特征连接起来。比如用一些非线性操作复制特定维度，这在很多神经网络里经常用的到：
+
+
+```python
+a = tf.random.uniform([5, 3, 5])
+b = tf.random.uniform([5, 1, 6])
+
+# 连接a和b
+tiled_b = tf.tile(b, [1, 3, 1])
+c = tf.concat([a, tiled_b], 2)
+d = tf.keras.layers.Dense(10, activation=tf.nn.relu).apply(c)
+
+print(d)
+```
+
+但这个用了广播就更简单了，我们可以用f(m(x + y))等效f(mx + my)这个特性。然后隐含用广播来做连接。
+
+```python
+pa = tf.keras.layers.Dense(10).apply(a)
+pb = tf.keras.layers.Dense(10).apply(b)
+d = tf.nn.relu(pa + pb)
+
+print(d)
+```
+
+事实下面的代码在可以广播的场景下更好用。
+
+```python
+def merge(a, b, units, activation=None):
+    pa = tf.keras.layers.Dense(units).apply(a)
+    pb = tf.keras.layers.Dense(units).apply(b)
+    c = pa + pb
+    if activation is not None:
+        c = activation(c)
+    return c
+```
+
+所以，我们说了广播的好处，那么广播有啥坏处呢。隐含的广播可能导致debug麻烦。
+
+```python
+a = tf.constant([[1.], [2.]])
+b = tf.constant([1., 2.])
+c = tf.reduce_sum(a + b)
+
+print(c)
+```
+
+所以c的结果是啥？正确答案是12，当tensor形状不一样，TF自动的进行了广播。
+
+避免这个问题的法子就是尽量显式，比如reduce时候注明维度。
+
+```python
+a = tf.constant([[1.], [2.]])
+b = tf.constant([1., 2.])
+c = tf.reduce_sum(a + b, 0)
+
+print(c)
+```
+
+这里c得到[5, 7], 然后很容易发现问题。以后用reduce和tf.squeeze操作时最好注明维度。
+
+## 利用重载函数
+<a name="overloaded_ops"></a>
+就像numpy，TF重载一些python操作来让graph构建更容易更可读。
+
+切片操作可以方便的索引tensor:
+```python
+z = x[begin:end]  # z = tf.slice(x, [begin], [end-begin])
+```
+尽量不要用切片，因为这个效率很逊。为了理解这玩意效率到底有多逊，让我们康康一个例子。下面将做一个列方向上的reduce_sum。
+
+```python
+import tensorflow as tf
+import time
+
+x = tf.random.uniform([500, 10])
+
+z = tf.zeros([10])
+
+start = time.time()
+for i in range(500):
+    z += x[i]
+print("Took %f seconds." % (time.time() - start))
+```
+我的水果Pro上执行这段花了0.045秒，好逊。这是因为执行了500次切片，很慢的，更好的法子是矩阵分解。
+```python
+z = tf.zeros([10])
+for x_i in tf.unstack(x):
+    z += x_i
+```
+花了0.01秒，当然，最勇的法子是用tf.reduce_sum操作:
+```python
+z = tf.reduce_sum(x, axis=0)
+```
+这个操作用了0.0001秒, 比最初的方法快了100倍。
+
+TF也重载了一堆算数和逻辑操作
+```python
+z = -x  # z = tf.negative(x)
+z = x + y  # z = tf.add(x, y)
+z = x - y  # z = tf.subtract(x, y)
+z = x * y  # z = tf.mul(x, y)
+z = x / y  # z = tf.div(x, y)
+z = x // y  # z = tf.floordiv(x, y)
+z = x % y  # z = tf.mod(x, y)
+z = x ** y  # z = tf.pow(x, y)
+z = x @ y  # z = tf.matmul(x, y)
+z = x > y  # z = tf.greater(x, y)
+z = x >= y  # z = tf.greater_equal(x, y)
+z = x < y  # z = tf.less(x, y)
+z = x <= y  # z = tf.less_equal(x, y)
+z = abs(x)  # z = tf.abs(x)
+z = x & y  # z = tf.logical_and(x, y)
+z = x | y  # z = tf.logical_or(x, y)
+z = x ^ y  # z = tf.logical_xor(x, y)
+z = ~x  # z = tf.logical_not(x)
+```
+
+你也可以这些操作的扩展用法。 比如`x += y` 和 `x **= 2`。
+
+注意，py不允许and or not之类的重载。
+
+其他比如等于(==) 和不等(!=) 等被NumPy重载的操作并没有被TensorFlow实现，请用函数版本的 `tf.equal` 和 `tf.not_equal`。（less_equal,greater_equal之类也得用函数式）
+
+## 控制流，条件与循环
+<a name="control_flow"></a>
+当我们构建一个复杂的模型，比如递归神经网络，我们需要用条件或者循环来控制操作流。这一节里我们介绍一些常用的流控制操作。
+
+假设你想根据一个判断式来决定是否相乘或相加俩tensor。这个可以用py内置函数或者用tf.cond函数。
+
+```python
+a = tf.constant(1)
+b = tf.constant(2)
+
+p = tf.constant(True)
+
+# 或者:
+# x = tf.cond(p, lambda: a + b, lambda: a * b)
+x = a + b if p else a * b
+
+print(x.numpy())
+```
+由于判断式为真，因此输出相加结果，等于3。
+
+大多数时候你在TF里用很大的tensor，并且想把操作应用到batch上。用tf.where就能对一个batch得到满足判断式的成分进行操作。
+```python
+a = tf.constant([1, 1])
+b = tf.constant([2, 2])
+
+p = tf.constant([True, False])
+
+x = tf.where(p, a + b, a * b)
+
+print(x.numpy())
+```
+结果得到[3, 2].
+
+另一个常用的操作是tf.while_loop，他允许在TF里用动态循环处理可变长度序列。来个例子:
+
+```python
+@tf.function
+def fibonacci(n):
+    a = tf.constant(1)
+    b = tf.constant(1)
+
+    for i in range(2, n):
+        a, b = b, a + b
+    
+    return b
+    
+n = tf.constant(5)
+b = fibonacci(n)
+    
+print(b.numpy())
+```
+输出5. 注意tf.function装饰器自动把python代码转换为tf.while_loop因此我们不用折腾TF API。
+
+现在想一下，我们想要保持完整的斐波那契数列的话，我们需要更新代码来保存历史值:
+```python
+@tf.function
+def fibonacci(n):
+    a = tf.constant(1)
+    b = tf.constant(1)
+    c = tf.constant([1, 1])
+
+    for i in range(2, n):
+        a, b = b, a + b
+        c = tf.concat([c, [b]], 0)
+    
+    return c
+    
+n = tf.constant(5)
+b = fibonacci(n)
+    
+print(b.numpy())
+```
+
+如果你这么执行了，TF会反馈循环值发生变化。
+解决这个问题可以用 "shape invariants"，但是这个只能在底层tf.while_loop API里用。
+
+
+```python
+n = tf.constant(5)
+
+def cond(i, a, b, c):
+    return i < n
+
+def body(i, a, b, c):
+    a, b = b, a + b
+    c = tf.concat([c, [b]], 0)
+    return i + 1, a, b, c
+
+i, a, b, c = tf.while_loop(
+    cond, body, (2, 1, 1, tf.constant([1, 1])),
+    shape_invariants=(tf.TensorShape([]),
+                      tf.TensorShape([]),
+                      tf.TensorShape([]),
+                      tf.TensorShape([None])))
+
+print(c.numpy())
+```
+这个又丑又慢。我们建立一堆没用的中间tensor。TF有更好的解决方法，用tf.TensorArray就行了:
+```python
+@tf.function
+def fibonacci(n):
+    a = tf.constant(1)
+    b = tf.constant(1)
+
+    c = tf.TensorArray(tf.int32, n)
+    c = c.write(0, a)
+    c = c.write(1, b)
+
+    for i in range(2, n):
+        a, b = b, a + b
+        c = c.write(i, b)
+    
+    return c.stack()
+
+n = tf.constant(5)
+c = fibonacci(n)
+    
+print(c.numpy())
+```
+TF while循环再建立负载递归神经网络时候很有用。这里有个实验，[beam search](https://en.wikipedia.org/wiki/Beam_search) 他用了tf.while_loops，你那么勇应该可以用tensor arrays实现的更高效吧。
+
+## 原型核和用Python OPs可视化
+<a name="python_ops"></a>
+TF里操作kernel使用Cpp实现来保证效率。但用Cpp写TensorFlow kernel很烦诶，所以你在实现自己的kernel前可以实验下自己想法是否奏效。用tf.py_function() 你可以把任何python操作编程tf操作。
+
+下面就是自己实现一个非线性的Relu:
+```python
+import numpy as np
+import tensorflow as tf
+import uuid
+
+def relu(inputs):
+    # Define the op in python
+    def _py_relu(x):
+        return np.maximum(x, 0.)
+
+    # Define the op's gradient in python
+    def _py_relu_grad(x):
+        return np.float32(x > 0)
+    
+    @tf.custom_gradient
+    def _relu(x):
+        y = tf.py_function(_py_relu, [x], tf.float32)
+        
+        def _relu_grad(dy):
+            return dy * tf.py_function(_py_relu_grad, [x], tf.float32)
+
+        return y, _relu_grad
+
+    return _relu(inputs)
+```
+为了验证梯度的正确性，你应该比较解析和数值梯度。
+```python
+# 计算解析梯度
+x = tf.random.normal([10], dtype=np.float32)
+with tf.GradientTape() as tape:
+    tape.watch(x)
+    y = relu(x)
+g = tape.gradient(y, x)
+print(g)
+
+# 计算数值梯度
+dx_n = 1e-5
+dy_n = relu(x + dx_n) - relu(x)
+g_n = dy_n / dx_n
+print(g_n)
+```
+这俩值应该很接近。
+
+注意这个实现很低效，因此只应该用在原型里，因为python代码超慢，后面你会想Cpp重新实现计算kernel的，大概。
+
+实际，我们通常用python操作来做可视化。比如你做图像分类，你在训练时想可视化你的模型预测，用Tensorboard看tf.summary.image()保存的结果吧:
+```python
+image = tf.placeholder(tf.float32)
+tf.summary.image("image", image)
+```
+但是你这只能可视化输入图，没法知道预测值，用tf的操作肯定嗝屁了，你可以用python操作:
+```python
+def visualize_labeled_images(images, labels, max_outputs=3, name="image"):
+    def _visualize_image(image, label):
+        #  python里绘图
+        fig = plt.figure(figsize=(3, 3), dpi=80)
+        ax = fig.add_subplot(111)
+        ax.imshow(image[::-1,...])
+        ax.text(0, 0, str(label),
+          horizontalalignment="left",
+          verticalalignment="top")
+        fig.canvas.draw()
+
+        # 写入内存中
+        buf = io.BytesIO()
+        data = fig.savefig(buf, format="png")
+        buf.seek(0)
+
+        # Pillow解码图像
+        img = PIL.Image.open(buf)
+        return np.array(img.getdata()).reshape(img.size[0], img.size[1], -1)
+
+    def _visualize_images(images, labels):
+        # 只显示batch中部分图
+        outputs = []
+        for i in range(max_outputs):
+            output = _visualize_image(images[i], labels[i])
+            outputs.append(output)
+        return np.array(outputs, dtype=np.uint8)
+
+    # 、运行python op.
+    figs = tf.py_function(_visualize_images, [images, labels], tf.uint8)
+    return tf.summary.image(name, figs)
+```
+
+由于验证测试过一段时间测试一次，所以不用担心效率。
+
+## Numerical stability in TensorFlow
+<a name="stable"></a>
+用TF或者Numpy之类数学计算库的时候，既要考虑数学计算的正确性，也要注意数值计算的稳定性。
+
+举个例子，小学就教了x * y / y在y不等于0情况下等于x，但是实际:
+```python
+import numpy as np
+
+x = np.float32(1)
+
+y = np.float32(1e-50)  # y 被当成0了
+z = x * y / y
+
+print(z)  # prints nan
+```
+
+对于单精度浮点y太小了，直接被当成0了，当然y很大的时候也有问题:
+
+```python
+y = np.float32(1e39)  # y 被当成无穷大
+z = x * y / y
+
+print(z)  # prints nan
+```
+
+单精度浮点的最小值是1.4013e-45，任何比他小的值都被当成0，同样的任何大于3.40282e+38的,会被当成无穷大。
+
+```python
+print(np.nextafter(np.float32(0), np.float32(1)))  # prints 1.4013e-45
+print(np.finfo(np.float32).max)  # print 3.40282e+38
+```
+为了保证你计算的稳定，你必须避免过小值或者过大值。这个听起来理所当然，但是在TF进行梯度下降的时候可能很难debug。你在FP时候要保证稳定，在BP时候还要保证。
+
+让我们看一个例子，我们想要在一个logits向量上计算softmax，一个naive的实现就像：
+```python
+import tensorflow as tf
+
+def unstable_softmax(logits):
+    exp = tf.exp(logits)
+    return exp / tf.reduce_sum(exp)
+
+print(unstable_softmax([1000., 0.]).numpy())  # prints [ nan, 0.]
+```
+所以你logits的exp的值，即使logits很小会得到很大的值，说不定超过单精度的范围。最大的不溢出logit值是ln(3.40282e+38) = 88.7，比他大的就会导致nan。
+
+所以怎么让这玩意稳定，exp(x - c) / &sum; exp(x - c) = exp(x) / &sum; exp(x)就搞掂了。如果我们logits减去一个数，结果还是一样的，一般减去logits最大值。这样exp函数的输入被限定在[-inf, 0]，然后输出就是[0.0, 1.0]，就很棒:
+
+```python
+import tensorflow as tf
+
+def softmax(logits):
+    exp = tf.exp(logits - tf.reduce_max(logits))
+    return exp / tf.reduce_sum(exp)
+
+print(softmax([1000., 0.]).numpy())  # prints [ 1., 0.]
+```
+
+我们看一个更加复杂的情况，考虑一个分类问题，我们用softmax来得到logits的可能性，之后用交叉熵计算预测和真值。交叉熵这么算xe(p, q) = -&sum; p_i log(q_i)。然后一个naive的实现如下:
+
+```python
+def unstable_softmax_cross_entropy(labels, logits):
+    logits = tf.math.log(softmax(logits))
+    return -tf.reduce_sum(labels * logits)
+
+labels = tf.constant([0.5, 0.5])
+logits = tf.constant([1000., 0.])
+
+xe = unstable_softmax_cross_entropy(labels, logits)
+
+print(xe.numpy())  # prints inf
+```
+
+由于softmax输出结果接近0，log的输出接近无限导致了计算的不稳定，我们扩展softmax做了简化:
+
+```python
+def softmax_cross_entropy(labels, logits):
+    scaled_logits = logits - tf.reduce_max(logits)
+    normalized_logits = scaled_logits - tf.reduce_logsumexp(scaled_logits)
+    return -tf.reduce_sum(labels * normalized_logits)
+
+labels = tf.constant([0.5, 0.5])
+logits = tf.constant([1000., 0.])
+
+xe = softmax_cross_entropy(labels, logits)
+
+print(xe.numpy())  # prints 500.0
+```
+
+我们也证明了梯度计算的正确性:
+```python
+with tf.GradientTape() as tape:
+    tape.watch(logits)
+    xe = softmax_cross_entropy(labels, logits)
+    
+g = tape.gradient(xe, logits)
+print(g.numpy())  # prints [0.5, -0.5]
+```
+这就对了。
+
+必须再次提醒，在做梯度相关操作时候必须注意保证每一层梯度都在有效范围内，exp和log操作由于可以把小数变得很大，因此可能让计算变得不稳定，所以使用exp和log操作必须十分谨慎。

From 6db561a5d7297061558589178a9b02945aed6f35 Mon Sep 17 00:00:00 2001
From: Windaway <coldswamp@gmail.com>
Date: Mon, 22 Apr 2019 22:59:23 +0800
Subject: [PATCH 77/78] Update README(chs).md

---
 README(chs).md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README(chs).md b/README(chs).md
index c9bcfce..3b422ab 100644
--- a/README(chs).md
+++ b/README(chs).md
@@ -553,7 +553,7 @@ xe = unstable_softmax_cross_entropy(labels, logits)
 print(xe.numpy())  # prints inf
 ```
 
-由于softmax输出结果接近0，log的输出接近无限导致了计算的不稳定，我们扩展softmax做了简化:
+由于softmax输出结果接近0，log的输出接近无限导致了计算的不稳定，我们扩展softmax并简化了计算交叉熵:
 
 ```python
 def softmax_cross_entropy(labels, logits):

From 4f7ac847a81930e93015f9b7fcfb8e11dd0ebdb7 Mon Sep 17 00:00:00 2001
From: Vahid Kazemi <vkazemi@gmail.com>
Date: Wed, 21 Oct 2020 22:23:20 -0700
Subject: [PATCH 78/78] Update framework.

---
 code/framework | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/code/framework b/code/framework
index 30a3391..a9377d0 160000
--- a/code/framework
+++ b/code/framework
@@ -1 +1 @@
-Subproject commit 30a33914c980d0e5828c1534490379c02140d845
+Subproject commit a9377d0dd8f5ac93e810876fbe8987990e3c728f