From fd34749139e026cf6a7a3003e1ea5286cc5d27a1 Mon Sep 17 00:00:00 2001
From: Boutaounte Faissal <b.faissal@gmail.com>
Date: Sun, 10 Apr 2022 20:24:02 -0400
Subject: [PATCH 01/24] Update
 chapter11_part04_sequence-to-sequence-learning.ipynb

---
 chapter11_part04_sequence-to-sequence-learning.ipynb | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/chapter11_part04_sequence-to-sequence-learning.ipynb b/chapter11_part04_sequence-to-sequence-learning.ipynb
index a08929dedb..9dca630eb5 100644
--- a/chapter11_part04_sequence-to-sequence-learning.ipynb
+++ b/chapter11_part04_sequence-to-sequence-learning.ipynb
@@ -106,6 +106,8 @@
     "import tensorflow as tf\n",
     "import string\n",
     "import re\n",
+    "from tensorflow import keras\n",
+    "from tensorflow.keras import layers\n",
     "\n",
     "strip_chars = string.punctuation + \"\u00bf\"\n",
     "strip_chars = strip_chars.replace(\"[\", \"\")\n",
@@ -618,4 +620,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 0
-}
\ No newline at end of file
+}

From 37b121362c0c1ff3a30b52bc172549bbfb3da548 Mon Sep 17 00:00:00 2001
From: Rama Ramakrishnan <rama@alum.mit.edu>
Date: Thu, 5 May 2022 14:55:21 -0400
Subject: [PATCH 02/24] Added line to build model before calling model.fit.

---
 chapter13_best-practices-for-the-real-world.ipynb | 1 +
 1 file changed, 1 insertion(+)

diff --git a/chapter13_best-practices-for-the-real-world.ipynb b/chapter13_best-practices-for-the-real-world.ipynb
index 68736349e6..1d4b3b28c6 100644
--- a/chapter13_best-practices-for-the-real-world.ipynb
+++ b/chapter13_best-practices-for-the-real-world.ipynb
@@ -244,6 +244,7 @@
    "source": [
     "def get_best_trained_model(hp):\n",
     "    best_epoch = get_best_epoch(hp)\n",
+    "    model = build_model(hp)\n",
     "    model.fit(\n",
     "        x_train_full, y_train_full,\n",
     "        batch_size=128, epochs=int(best_epoch * 1.2))\n",

From dca5bad52f77955a16799045faa74d04fe3d55eb Mon Sep 17 00:00:00 2001
From: Kuz-man <kuzman.belev@gmail.com>
Date: Sat, 14 Jan 2023 21:09:15 +0200
Subject: [PATCH 03/24] fix typo of downloaded file

the downloaded file is named `dogs-vs-cats.zip` and not `train.zip`.
---
 chapter08_intro-to-dl-for-computer-vision.ipynb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/chapter08_intro-to-dl-for-computer-vision.ipynb b/chapter08_intro-to-dl-for-computer-vision.ipynb
index 60072bce8a..85394fa258 100644
--- a/chapter08_intro-to-dl-for-computer-vision.ipynb
+++ b/chapter08_intro-to-dl-for-computer-vision.ipynb
@@ -272,7 +272,7 @@
    },
    "outputs": [],
    "source": [
-    "!unzip -qq train.zip"
+    "!unzip -qq dogs-vs-cats.zip"
    ]
   },
   {
@@ -1210,4 +1210,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 0
-}
\ No newline at end of file
+}

From 8fa694cd5a9d4e710f41e5702ca287c859f73210 Mon Sep 17 00:00:00 2001
From: Kuz-man <kuzman.belev@gmail.com>
Date: Sat, 14 Jan 2023 21:16:54 +0200
Subject: [PATCH 04/24] add a new cell with unzipping train.zip

once dogs-vs-cats.zip is unzip it produces train.zip and test1.zip, so we need to also unzip train.zip
---
 chapter08_intro-to-dl-for-computer-vision.ipynb | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/chapter08_intro-to-dl-for-computer-vision.ipynb b/chapter08_intro-to-dl-for-computer-vision.ipynb
index 85394fa258..2459d444c4 100644
--- a/chapter08_intro-to-dl-for-computer-vision.ipynb
+++ b/chapter08_intro-to-dl-for-computer-vision.ipynb
@@ -275,6 +275,17 @@
     "!unzip -qq dogs-vs-cats.zip"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "!unzip -qq train.zip"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {

From a6a24eda56b9933e65ba290abb63febedeee98a3 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Sun, 12 Feb 2023 16:49:44 -0800
Subject: [PATCH 05/24] Round of fixes

---
 chapter11_part04_sequence-to-sequence-learning.ipynb | 2 ++
 chapter12_part01_text-generation.ipynb               | 8 +++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/chapter11_part04_sequence-to-sequence-learning.ipynb b/chapter11_part04_sequence-to-sequence-learning.ipynb
index 9dca630eb5..8f7bf72641 100644
--- a/chapter11_part04_sequence-to-sequence-learning.ipynb
+++ b/chapter11_part04_sequence-to-sequence-learning.ipynb
@@ -405,6 +405,8 @@
     "            padding_mask = tf.cast(\n",
     "                mask[:, tf.newaxis, :], dtype=\"int32\")\n",
     "            padding_mask = tf.minimum(padding_mask, causal_mask)\n",
+    "        else:\n",
+    "            padding_mask = mask\n",
     "        attention_output_1 = self.attention_1(\n",
     "            query=inputs,\n",
     "            value=inputs,\n",
diff --git a/chapter12_part01_text-generation.ipynb b/chapter12_part01_text-generation.ipynb
index 1c43438d3b..f683c1d73b 100644
--- a/chapter12_part01_text-generation.ipynb
+++ b/chapter12_part01_text-generation.ipynb
@@ -293,6 +293,8 @@
     "            padding_mask = tf.cast(\n",
     "                mask[:, tf.newaxis, :], dtype=\"int32\")\n",
     "            padding_mask = tf.minimum(padding_mask, causal_mask)\n",
+    "        else:\n",
+    "            padding_mask = mask\n",
     "        attention_output_1 = self.attention_1(\n",
     "            query=inputs,\n",
     "            value=inputs,\n",
@@ -391,6 +393,8 @@
     "        self.model_input_length = model_input_length\n",
     "        self.temperatures = temperatures\n",
     "        self.print_freq = print_freq\n",
+    "        vectorized_prompt = text_vectorization([prompt])[0].numpy()\n",
+    "        self.prompt_length = np.nonzero(vectorized_prompt == 0)[0][0]\n",
     "\n",
     "    def on_epoch_end(self, epoch, logs=None):\n",
     "        if (epoch + 1) % self.print_freq != 0:\n",
@@ -401,7 +405,9 @@
     "            for i in range(self.generate_length):\n",
     "                tokenized_sentence = text_vectorization([sentence])\n",
     "                predictions = self.model(tokenized_sentence)\n",
-    "                next_token = sample_next(predictions[0, i, :])\n",
+    "                next_token = sample_next(\n",
+    "                    predictions[0, self.prompt_length - 1 + i, :]\n",
+    "                )\n",
     "                sampled_token = tokens_index[next_token]\n",
     "                sentence += \" \" + sampled_token\n",
     "            print(sentence)\n",

From 03def3a9d2b127f5ee7e136f4afdd2a546a7f5d0 Mon Sep 17 00:00:00 2001
From: Matt Watson <mattdangerw@gmail.com>
Date: Sat, 22 Mar 2025 16:11:34 -0700
Subject: [PATCH 06/24] Add third edition notebooks

---
 chapter02_mathematical-building-blocks.ipynb  |  433 ++--
 chapter03_introduction-to-ml-frameworks.ipynb | 1761 +++++++++++++++++
 ...getting-started-with-neural-networks.ipynb |  743 +++----
 chapter05_fundamentals-of-ml.ipynb            |  671 ++++---
 chapter07_working-with-keras.ipynb            | 1256 ++++++++----
 chapter08_image-classification.ipynb          | 1004 ++++++++++
 ..._convnet-architecture-best-practices.ipynb |  356 ++++
 ...nterpreting-what-vision-models-learn.ipynb |  962 +++++++++
 chapter11_image-segmentation.ipynb            |  659 ++++++
 chapter12_object-detection.ipynb              |  506 +++++
 chapter13_timeseries-forecasting.ipynb        |  689 +++++++
 chapter14_text-classification.ipynb           | 1400 +++++++++++++
 ..._language-models-and-the-transformer.ipynb | 1180 +++++++++++
 ...r16_generative-large-language-models.ipynb | 1120 +++++++++++
 chapter17_image-generation.ipynb              | 1145 +++++++++++
 ...18_best-practices-for-the-real-world.ipynb |  801 ++++++++
 ...apter02_mathematical-building-blocks.ipynb | 1469 ++++++++++++++
 ...apter03_introduction-to-keras-and-tf.ipynb |    0
 ...getting-started-with-neural-networks.ipynb | 1413 +++++++++++++
 .../chapter05_fundamentals-of-ml.ipynb        |  786 ++++++++
 .../chapter07_working-with-keras.ipynb        | 1439 ++++++++++++++
 ...er08_intro-to-dl-for-computer-vision.ipynb |    0
 .../chapter09_part01_image-segmentation.ipynb |    0
 ...modern-convnet-architecture-patterns.ipynb |    0
 ...t03_interpreting-what-convnets-learn.ipynb |    0
 .../chapter10_dl-for-timeseries.ipynb         |    0
 .../chapter11_part01_introduction.ipynb       |    0
 .../chapter11_part02_sequence-models.ipynb    |    0
 .../chapter11_part03_transformer.ipynb        |    0
 ...part04_sequence-to-sequence-learning.ipynb |    0
 .../chapter12_part01_text-generation.ipynb    |    0
 .../chapter12_part02_deep-dream.ipynb         |    0
 ...apter12_part03_neural-style-transfer.ipynb |    0
 ...er12_part04_variational-autoencoders.ipynb |    0
 .../chapter12_part05_gans.ipynb               |    0
 ...13_best-practices-for-the-real-world.ipynb |    0
 .../chapter14_conclusions.ipynb               |    0
 37 files changed, 18461 insertions(+), 1332 deletions(-)
 create mode 100644 chapter03_introduction-to-ml-frameworks.ipynb
 create mode 100644 chapter08_image-classification.ipynb
 create mode 100644 chapter09_convnet-architecture-best-practices.ipynb
 create mode 100644 chapter10_interpreting-what-vision-models-learn.ipynb
 create mode 100644 chapter11_image-segmentation.ipynb
 create mode 100644 chapter12_object-detection.ipynb
 create mode 100644 chapter13_timeseries-forecasting.ipynb
 create mode 100644 chapter14_text-classification.ipynb
 create mode 100644 chapter15_language-models-and-the-transformer.ipynb
 create mode 100644 chapter16_generative-large-language-models.ipynb
 create mode 100644 chapter17_image-generation.ipynb
 create mode 100644 chapter18_best-practices-for-the-real-world.ipynb
 create mode 100644 second_edition/chapter02_mathematical-building-blocks.ipynb
 rename chapter03_introduction-to-keras-and-tf.ipynb => second_edition/chapter03_introduction-to-keras-and-tf.ipynb (100%)
 create mode 100644 second_edition/chapter04_getting-started-with-neural-networks.ipynb
 create mode 100644 second_edition/chapter05_fundamentals-of-ml.ipynb
 create mode 100644 second_edition/chapter07_working-with-keras.ipynb
 rename chapter08_intro-to-dl-for-computer-vision.ipynb => second_edition/chapter08_intro-to-dl-for-computer-vision.ipynb (100%)
 rename chapter09_part01_image-segmentation.ipynb => second_edition/chapter09_part01_image-segmentation.ipynb (100%)
 rename chapter09_part02_modern-convnet-architecture-patterns.ipynb => second_edition/chapter09_part02_modern-convnet-architecture-patterns.ipynb (100%)
 rename chapter09_part03_interpreting-what-convnets-learn.ipynb => second_edition/chapter09_part03_interpreting-what-convnets-learn.ipynb (100%)
 rename chapter10_dl-for-timeseries.ipynb => second_edition/chapter10_dl-for-timeseries.ipynb (100%)
 rename chapter11_part01_introduction.ipynb => second_edition/chapter11_part01_introduction.ipynb (100%)
 rename chapter11_part02_sequence-models.ipynb => second_edition/chapter11_part02_sequence-models.ipynb (100%)
 rename chapter11_part03_transformer.ipynb => second_edition/chapter11_part03_transformer.ipynb (100%)
 rename chapter11_part04_sequence-to-sequence-learning.ipynb => second_edition/chapter11_part04_sequence-to-sequence-learning.ipynb (100%)
 rename chapter12_part01_text-generation.ipynb => second_edition/chapter12_part01_text-generation.ipynb (100%)
 rename chapter12_part02_deep-dream.ipynb => second_edition/chapter12_part02_deep-dream.ipynb (100%)
 rename chapter12_part03_neural-style-transfer.ipynb => second_edition/chapter12_part03_neural-style-transfer.ipynb (100%)
 rename chapter12_part04_variational-autoencoders.ipynb => second_edition/chapter12_part04_variational-autoencoders.ipynb (100%)
 rename chapter12_part05_gans.ipynb => second_edition/chapter12_part05_gans.ipynb (100%)
 rename chapter13_best-practices-for-the-real-world.ipynb => second_edition/chapter13_best-practices-for-the-real-world.ipynb (100%)
 rename chapter14_conclusions.ipynb => second_edition/chapter14_conclusions.ipynb (100%)

diff --git a/chapter02_mathematical-building-blocks.ipynb b/chapter02_mathematical-building-blocks.ipynb
index 01edc9becc..0a507c0a49 100644
--- a/chapter02_mathematical-building-blocks.ipynb
+++ b/chapter02_mathematical-building-blocks.ipynb
@@ -6,25 +6,30 @@
     "colab_type": "text"
    },
    "source": [
-    "This is a companion notebook for the book [Deep Learning with Python, Second Edition](https://www.manning.com/books/deep-learning-with-python-second-edition?a_aid=keras&a_bid=76564dff). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThis notebook was generated for TensorFlow 2.6."
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 0,
    "metadata": {
-    "colab_type": "text"
+    "colab_type": "code"
    },
+   "outputs": [],
    "source": [
-    "# The mathematical building blocks of neural networks"
+    "!pip install keras-nightly --upgrade -q"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 0,
    "metadata": {
-    "colab_type": "text"
+    "colab_type": "code"
    },
+   "outputs": [],
    "source": [
-    "## A first look at a neural network"
+    "import os\n",
+    "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
   {
@@ -33,7 +38,7 @@
     "colab_type": "text"
    },
    "source": [
-    "**Loading the MNIST dataset in Keras**"
+    "### A first look at a neural network"
    ]
   },
   {
@@ -44,7 +49,8 @@
    },
    "outputs": [],
    "source": [
-    "from tensorflow.keras.datasets import mnist\n",
+    "from keras.datasets import mnist\n",
+    "\n",
     "(train_images, train_labels), (test_images, test_labels) = mnist.load_data()"
    ]
   },
@@ -114,15 +120,6 @@
     "test_labels"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**The network architecture**"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -131,21 +128,15 @@
    },
    "outputs": [],
    "source": [
-    "from tensorflow import keras\n",
-    "from tensorflow.keras import layers\n",
-    "model = keras.Sequential([\n",
-    "    layers.Dense(512, activation=\"relu\"),\n",
-    "    layers.Dense(10, activation=\"softmax\")\n",
-    "])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**The compilation step**"
+    "import keras\n",
+    "from keras import layers\n",
+    "\n",
+    "model = keras.Sequential(\n",
+    "    [\n",
+    "        layers.Dense(512, activation=\"relu\"),\n",
+    "        layers.Dense(10, activation=\"softmax\"),\n",
+    "    ]\n",
+    ")"
    ]
   },
   {
@@ -156,18 +147,11 @@
    },
    "outputs": [],
    "source": [
-    "model.compile(optimizer=\"rmsprop\",\n",
-    "              loss=\"sparse_categorical_crossentropy\",\n",
-    "              metrics=[\"accuracy\"])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Preparing the image data**"
+    "model.compile(\n",
+    "    optimizer=\"adam\",\n",
+    "    loss=\"sparse_categorical_crossentropy\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")"
    ]
   },
   {
@@ -184,15 +168,6 @@
     "test_images = test_images.astype(\"float32\") / 255"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**\"Fitting\" the model**"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -204,15 +179,6 @@
     "model.fit(train_images, train_labels, epochs=5, batch_size=128)"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Using the model to make predictions**"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -259,15 +225,6 @@
     "test_labels[0]"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Evaluating the model on new data**"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -286,7 +243,7 @@
     "colab_type": "text"
    },
    "source": [
-    "## Data representations for neural networks"
+    "### Data representations for neural networks"
    ]
   },
   {
@@ -295,7 +252,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Scalars (rank-0 tensors)"
+    "#### Scalars (rank-0 tensors)"
    ]
   },
   {
@@ -328,7 +285,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Vectors (rank-1 tensors)"
+    "#### Vectors (rank-1 tensors)"
    ]
   },
   {
@@ -360,7 +317,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Matrices (rank-2 tensors)"
+    "#### Matrices (rank-2 tensors)"
    ]
   },
   {
@@ -383,7 +340,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Rank-3 and higher-rank tensors"
+    "#### Rank-3 tensors and higher-rank tensors"
    ]
   },
   {
@@ -412,7 +369,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Key attributes"
+    "#### Key attributes"
    ]
   },
   {
@@ -423,7 +380,8 @@
    },
    "outputs": [],
    "source": [
-    "from tensorflow.keras.datasets import mnist\n",
+    "from keras.datasets import mnist\n",
+    "\n",
     "(train_images, train_labels), (test_images, test_labels) = mnist.load_data()"
    ]
   },
@@ -460,15 +418,6 @@
     "train_images.dtype"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Displaying the fourth digit**"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -478,9 +427,10 @@
    "outputs": [],
    "source": [
     "import matplotlib.pyplot as plt\n",
+    "\n",
     "digit = train_images[4]\n",
     "plt.imshow(digit, cmap=plt.cm.binary)\n",
-    "plt.show()"
+    "plt.savefig(\"The-fourth-sample-in-our-dataset.png\", dpi=300)"
    ]
   },
   {
@@ -500,7 +450,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Manipulating tensors in NumPy"
+    "#### Manipulating tensors in NumPy"
    ]
   },
   {
@@ -567,7 +517,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### The notion of data batches"
+    "#### The notion of data batches"
    ]
   },
   {
@@ -601,7 +551,7 @@
    "outputs": [],
    "source": [
     "n = 3\n",
-    "batch = train_images[128 * n:128 * (n + 1)]"
+    "batch = train_images[128 * n : 128 * (n + 1)]"
    ]
   },
   {
@@ -610,7 +560,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Real-world examples of data tensors"
+    "#### Real-world examples of data tensors"
    ]
   },
   {
@@ -619,7 +569,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Vector data"
+    "##### Vector data"
    ]
   },
   {
@@ -628,7 +578,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Timeseries data or sequence data"
+    "##### Timeseries data or sequence data"
    ]
   },
   {
@@ -637,7 +587,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Image data"
+    "##### Image data"
    ]
   },
   {
@@ -646,7 +596,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Video data"
+    "##### Video data"
    ]
   },
   {
@@ -655,7 +605,7 @@
     "colab_type": "text"
    },
    "source": [
-    "## The gears of neural networks: tensor operations"
+    "### The gears of neural networks: tensor operations"
    ]
   },
   {
@@ -664,7 +614,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Element-wise operations"
+    "#### Element-wise operations"
    ]
   },
   {
@@ -718,7 +668,7 @@
     "t0 = time.time()\n",
     "for _ in range(1000):\n",
     "    z = x + y\n",
-    "    z = np.maximum(z, 0.)\n",
+    "    z = np.maximum(z, 0.0)\n",
     "print(\"Took: {0:.2f} s\".format(time.time() - t0))"
    ]
   },
@@ -743,7 +693,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Broadcasting"
+    "#### Broadcasting"
    ]
   },
   {
@@ -755,6 +705,7 @@
    "outputs": [],
    "source": [
     "import numpy as np\n",
+    "\n",
     "X = np.random.random((32, 10))\n",
     "y = np.random.random((10,))"
    ]
@@ -778,7 +729,7 @@
    },
    "outputs": [],
    "source": [
-    "Y = np.concatenate([y] * 32, axis=0)"
+    "Y = np.tile(y, (32, 1))"
    ]
   },
   {
@@ -809,6 +760,7 @@
    "outputs": [],
    "source": [
     "import numpy as np\n",
+    "\n",
     "x = np.random.random((64, 3, 32, 10))\n",
     "y = np.random.random((32, 10))\n",
     "z = np.maximum(x, y)"
@@ -820,7 +772,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Tensor product"
+    "#### Tensor product"
    ]
   },
   {
@@ -833,7 +785,9 @@
    "source": [
     "x = np.random.random((32,))\n",
     "y = np.random.random((32,))\n",
-    "z = np.dot(x, y)"
+    "\n",
+    "z = np.matmul(x, y)\n",
+    "z = x @ y"
    ]
   },
   {
@@ -844,11 +798,11 @@
    },
    "outputs": [],
    "source": [
-    "def naive_vector_dot(x, y):\n",
+    "def naive_vector_product(x, y):\n",
     "    assert len(x.shape) == 1\n",
     "    assert len(y.shape) == 1\n",
     "    assert x.shape[0] == y.shape[0]\n",
-    "    z = 0.\n",
+    "    z = 0.0\n",
     "    for i in range(x.shape[0]):\n",
     "        z += x[i] * y[i]\n",
     "    return z"
@@ -862,7 +816,7 @@
    },
    "outputs": [],
    "source": [
-    "def naive_matrix_vector_dot(x, y):\n",
+    "def naive_matrix_vector_product(x, y):\n",
     "    assert len(x.shape) == 2\n",
     "    assert len(y.shape) == 1\n",
     "    assert x.shape[1] == y.shape[0]\n",
@@ -881,10 +835,10 @@
    },
    "outputs": [],
    "source": [
-    "def naive_matrix_vector_dot(x, y):\n",
+    "def naive_matrix_vector_product(x, y):\n",
     "    z = np.zeros(x.shape[0])\n",
     "    for i in range(x.shape[0]):\n",
-    "        z[i] = naive_vector_dot(x[i, :], y)\n",
+    "        z[i] = naive_vector_product(x[i, :], y)\n",
     "    return z"
    ]
   },
@@ -896,7 +850,7 @@
    },
    "outputs": [],
    "source": [
-    "def naive_matrix_dot(x, y):\n",
+    "def naive_matrix_product(x, y):\n",
     "    assert len(x.shape) == 2\n",
     "    assert len(y.shape) == 2\n",
     "    assert x.shape[1] == y.shape[0]\n",
@@ -905,7 +859,7 @@
     "        for j in range(y.shape[1]):\n",
     "            row_x = x[i, :]\n",
     "            column_y = y[:, j]\n",
-    "            z[i, j] = naive_vector_dot(row_x, column_y)\n",
+    "            z[i, j] = naive_vector_product(row_x, column_y)\n",
     "    return z"
    ]
   },
@@ -915,7 +869,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Tensor reshaping"
+    "#### Tensor reshaping"
    ]
   },
   {
@@ -938,8 +892,8 @@
    "outputs": [],
    "source": [
     "x = np.array([[0., 1.],\n",
-    "             [2., 3.],\n",
-    "             [4., 5.]])\n",
+    "              [2., 3.],\n",
+    "              [4., 5.]])\n",
     "x.shape"
    ]
   },
@@ -963,18 +917,21 @@
    },
    "outputs": [],
    "source": [
-    "x = np.zeros((300, 20))\n",
-    "x = np.transpose(x)\n",
-    "x.shape"
+    "x = x.reshape((2, 3))\n",
+    "x"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 0,
    "metadata": {
-    "colab_type": "text"
+    "colab_type": "code"
    },
+   "outputs": [],
    "source": [
-    "### Geometric interpretation of tensor operations"
+    "x = np.zeros((300, 20))\n",
+    "x = np.transpose(x)\n",
+    "x.shape"
    ]
   },
   {
@@ -983,7 +940,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### A geometric interpretation of deep learning"
+    "#### Geometric interpretation of tensor operations"
    ]
   },
   {
@@ -992,7 +949,7 @@
     "colab_type": "text"
    },
    "source": [
-    "## The engine of neural networks: gradient-based optimization"
+    "#### A geometric interpretation of deep learning"
    ]
   },
   {
@@ -1001,7 +958,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### What's a derivative?"
+    "### The engine of neural networks: gradient-based optimization"
    ]
   },
   {
@@ -1010,7 +967,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Derivative of a tensor operation: the gradient"
+    "#### What's a derivative?"
    ]
   },
   {
@@ -1019,7 +976,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Stochastic gradient descent"
+    "#### Derivative of a tensor operation: the gradient"
    ]
   },
   {
@@ -1028,7 +985,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Chaining derivatives: The Backpropagation algorithm"
+    "#### Stochastic gradient descent"
    ]
   },
   {
@@ -1037,7 +994,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### The chain rule"
+    "#### Chaining derivatives: the Backpropagation algorithm"
    ]
   },
   {
@@ -1046,7 +1003,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Automatic differentiation with computation graphs"
+    "##### The chain rule"
    ]
   },
   {
@@ -1055,52 +1012,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### The gradient tape in TensorFlow"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "import tensorflow as tf\n",
-    "x = tf.Variable(0.)\n",
-    "with tf.GradientTape() as tape:\n",
-    "    y = 2 * x + 3\n",
-    "grad_of_y_wrt_x = tape.gradient(y, x)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "x = tf.Variable(tf.random.uniform((2, 2)))\n",
-    "with tf.GradientTape() as tape:\n",
-    "    y = 2 * x + 3\n",
-    "grad_of_y_wrt_x = tape.gradient(y, x)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "W = tf.Variable(tf.random.uniform((2, 2)))\n",
-    "b = tf.Variable(tf.zeros((2,)))\n",
-    "x = tf.random.uniform((2, 2))\n",
-    "with tf.GradientTape() as tape:\n",
-    "    y = tf.matmul(x, W) + b\n",
-    "grad_of_y_wrt_W_and_b = tape.gradient(y, [W, b])"
+    "##### Automatic differentiation with computation graphs"
    ]
   },
   {
@@ -1109,7 +1021,7 @@
     "colab_type": "text"
    },
    "source": [
-    "## Looking back at our first example"
+    "### Looking back at our first example"
    ]
   },
   {
@@ -1135,10 +1047,12 @@
    },
    "outputs": [],
    "source": [
-    "model = keras.Sequential([\n",
-    "    layers.Dense(512, activation=\"relu\"),\n",
-    "    layers.Dense(10, activation=\"softmax\")\n",
-    "])"
+    "model = keras.Sequential(\n",
+    "    [\n",
+    "        layers.Dense(512, activation=\"relu\"),\n",
+    "        layers.Dense(10, activation=\"softmax\"),\n",
+    "    ]\n",
+    ")"
    ]
   },
   {
@@ -1149,9 +1063,11 @@
    },
    "outputs": [],
    "source": [
-    "model.compile(optimizer=\"rmsprop\",\n",
-    "              loss=\"sparse_categorical_crossentropy\",\n",
-    "              metrics=[\"accuracy\"])"
+    "model.compile(\n",
+    "    optimizer=\"adam\",\n",
+    "    loss=\"sparse_categorical_crossentropy\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")"
    ]
   },
   {
@@ -1162,7 +1078,12 @@
    },
    "outputs": [],
    "source": [
-    "model.fit(train_images, train_labels, epochs=5, batch_size=128)"
+    "model.fit(\n",
+    "    train_images,\n",
+    "    train_labels,\n",
+    "    epochs=5,\n",
+    "    batch_size=128,\n",
+    ")"
    ]
   },
   {
@@ -1171,7 +1092,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Reimplementing our first example from scratch in TensorFlow"
+    "#### Reimplementing our first example from scratch"
    ]
   },
   {
@@ -1180,7 +1101,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### A simple Dense class"
+    "##### A simple Dense class"
    ]
   },
   {
@@ -1191,22 +1112,34 @@
    },
    "outputs": [],
    "source": [
-    "import tensorflow as tf\n",
+    "keras.config.set_backend(\"tensorflow\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import keras\n",
+    "from keras import ops\n",
     "\n",
     "class NaiveDense:\n",
-    "    def __init__(self, input_size, output_size, activation):\n",
+    "    def __init__(self, input_size, output_size, activation=None):\n",
     "        self.activation = activation\n",
-    "\n",
-    "        w_shape = (input_size, output_size)\n",
-    "        w_initial_value = tf.random.uniform(w_shape, minval=0, maxval=1e-1)\n",
-    "        self.W = tf.Variable(w_initial_value)\n",
-    "\n",
-    "        b_shape = (output_size,)\n",
-    "        b_initial_value = tf.zeros(b_shape)\n",
-    "        self.b = tf.Variable(b_initial_value)\n",
+    "        self.W = keras.Variable(\n",
+    "            shape=(input_size, output_size), initializer=\"uniform\"\n",
+    "        )\n",
+    "        self.b = keras.Variable(shape=(output_size,), initializer=\"zeros\")\n",
     "\n",
     "    def __call__(self, inputs):\n",
-    "        return self.activation(tf.matmul(inputs, self.W) + self.b)\n",
+    "        x = ops.matmul(inputs, self.W)\n",
+    "        x = x + self.b\n",
+    "        if self.activation is not None:\n",
+    "            x = self.activation(x)\n",
+    "        return x\n",
     "\n",
     "    @property\n",
     "    def weights(self):\n",
@@ -1219,7 +1152,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### A simple Sequential class"
+    "##### A simple Sequential class"
    ]
   },
   {
@@ -1237,15 +1170,15 @@
     "    def __call__(self, inputs):\n",
     "        x = inputs\n",
     "        for layer in self.layers:\n",
-    "           x = layer(x)\n",
+    "            x = layer(x)\n",
     "        return x\n",
     "\n",
     "    @property\n",
     "    def weights(self):\n",
-    "       weights = []\n",
-    "       for layer in self.layers:\n",
-    "           weights += layer.weights\n",
-    "       return weights"
+    "        weights = []\n",
+    "        for layer in self.layers:\n",
+    "            weights += layer.weights\n",
+    "        return weights"
    ]
   },
   {
@@ -1256,10 +1189,12 @@
    },
    "outputs": [],
    "source": [
-    "model = NaiveSequential([\n",
-    "    NaiveDense(input_size=28 * 28, output_size=512, activation=tf.nn.relu),\n",
-    "    NaiveDense(input_size=512, output_size=10, activation=tf.nn.softmax)\n",
-    "])\n",
+    "model = NaiveSequential(\n",
+    "    [\n",
+    "        NaiveDense(input_size=28 * 28, output_size=512, activation=ops.relu),\n",
+    "        NaiveDense(input_size=512, output_size=10, activation=ops.softmax),\n",
+    "    ]\n",
+    ")\n",
     "assert len(model.weights) == 4"
    ]
   },
@@ -1269,7 +1204,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### A batch generator"
+    "##### A batch generator"
    ]
   },
   {
@@ -1304,26 +1239,16 @@
     "colab_type": "text"
    },
    "source": [
-    "### Running one training step"
+    "#### Running one training step"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 0,
+   "cell_type": "markdown",
    "metadata": {
-    "colab_type": "code"
+    "colab_type": "text"
    },
-   "outputs": [],
    "source": [
-    "def one_training_step(model, images_batch, labels_batch):\n",
-    "    with tf.GradientTape() as tape:\n",
-    "        predictions = model(images_batch)\n",
-    "        per_sample_losses = tf.keras.losses.sparse_categorical_crossentropy(\n",
-    "            labels_batch, predictions)\n",
-    "        average_loss = tf.reduce_mean(per_sample_losses)\n",
-    "    gradients = tape.gradient(average_loss, model.weights)\n",
-    "    update_weights(gradients, model.weights)\n",
-    "    return average_loss"
+    "##### The weight update step"
    ]
   },
   {
@@ -1338,7 +1263,7 @@
     "\n",
     "def update_weights(gradients, weights):\n",
     "    for g, w in zip(gradients, weights):\n",
-    "        w.assign_sub(g * learning_rate)"
+    "        w.assign(w - g * learning_rate)"
    ]
   },
   {
@@ -1349,7 +1274,7 @@
    },
    "outputs": [],
    "source": [
-    "from tensorflow.keras import optimizers\n",
+    "from keras import optimizers\n",
     "\n",
     "optimizer = optimizers.SGD(learning_rate=1e-3)\n",
     "\n",
@@ -1363,7 +1288,50 @@
     "colab_type": "text"
    },
    "source": [
-    "### The full training loop"
+    "##### Gradient computation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf\n",
+    "\n",
+    "x = tf.zeros(shape=())\n",
+    "with tf.GradientTape() as tape:\n",
+    "    y = 2 * x + 3\n",
+    "grad_of_y_wrt_x = tape.gradient(y, x)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def one_training_step(model, images_batch, labels_batch):\n",
+    "    with tf.GradientTape() as tape:\n",
+    "        predictions = model(images_batch)\n",
+    "        loss = ops.sparse_categorical_crossentropy(labels_batch, predictions)\n",
+    "        average_loss = ops.mean(loss)\n",
+    "    gradients = tape.gradient(average_loss, model.weights)\n",
+    "    update_weights(gradients, model.weights)\n",
+    "    return average_loss"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### The full training loop"
    ]
   },
   {
@@ -1393,7 +1361,8 @@
    },
    "outputs": [],
    "source": [
-    "from tensorflow.keras.datasets import mnist\n",
+    "from keras.datasets import mnist\n",
+    "\n",
     "(train_images, train_labels), (test_images, test_labels) = mnist.load_data()\n",
     "\n",
     "train_images = train_images.reshape((60000, 28 * 28))\n",
@@ -1410,7 +1379,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Evaluating the model"
+    "#### Evaluating the model"
    ]
   },
   {
@@ -1422,10 +1391,9 @@
    "outputs": [],
    "source": [
     "predictions = model(test_images)\n",
-    "predictions = predictions.numpy()\n",
-    "predicted_labels = np.argmax(predictions, axis=1)\n",
+    "predicted_labels = ops.argmax(predictions, axis=1)\n",
     "matches = predicted_labels == test_labels\n",
-    "print(f\"accuracy: {matches.mean():.2f}\")"
+    "f\"accuracy: {ops.mean(matches):.2f}\""
    ]
   },
   {
@@ -1434,14 +1402,15 @@
     "colab_type": "text"
    },
    "source": [
-    "## Summary"
+    "### Chapter summary"
    ]
   }
  ],
  "metadata": {
+  "accelerator": "GPU",
   "colab": {
    "collapsed_sections": [],
-   "name": "chapter02_mathematical-building-blocks.i",
+   "name": "chapter02_mathematical-building-blocks",
    "private_outputs": false,
    "provenance": [],
    "toc_visible": true
@@ -1461,7 +1430,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.0"
+   "version": "3.10.0"
   }
  },
  "nbformat": 4,
diff --git a/chapter03_introduction-to-ml-frameworks.ipynb b/chapter03_introduction-to-ml-frameworks.ipynb
new file mode 100644
index 0000000000..4eea8270c9
--- /dev/null
+++ b/chapter03_introduction-to-ml-frameworks.ipynb
@@ -0,0 +1,1761 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install keras-nightly --upgrade -q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.environ[\"KERAS_BACKEND\"] = \"jax\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### A brief history of deep learning frameworks"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### How these frameworks relate to each other"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Introduction to TensorFlow"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### First steps with TensorFlow"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Tensors and Variables in TensorFlow"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "###### Constant tensors"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf\n",
+    "tf.ones(shape=(2, 1))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "tf.zeros(shape=(2, 1))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "tf.constant([1, 2, 3], dtype=\"float32\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "###### Random tensors"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "x = tf.random.normal(shape=(3, 1), mean=0., stddev=1.)\n",
+    "print(x)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "x = tf.random.uniform(shape=(3, 1), minval=0., maxval=1.)\n",
+    "print(x)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "###### Tensor assignment and the Variable class"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "x = np.ones(shape=(2, 2))\n",
+    "x[0, 0] = 0.0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "v = tf.Variable(initial_value=tf.random.normal(shape=(3, 1)))\n",
+    "print(v)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "v.assign(tf.ones((3, 1)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "v[0, 0].assign(3.)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "v.assign_add(tf.ones((3, 1)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Tensor operations: doing math in TensorFlow"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "a = tf.ones((2, 2))\n",
+    "b = tf.square(a)\n",
+    "c = tf.sqrt(a)\n",
+    "d = b + c\n",
+    "e = tf.matmul(a, b)\n",
+    "f = tf.concat((a, b), axis=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def dense(inputs, W, b):\n",
+    "    return tf.nn.relu(tf.matmul(inputs, W) + b)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Gradients in TensorFlow: a second look at the `GradientTape` API"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "input_var = tf.Variable(initial_value=3.0)\n",
+    "with tf.GradientTape() as tape:\n",
+    "    result = tf.square(input_var)\n",
+    "gradient = tape.gradient(result, input_var)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "input_const = tf.constant(3.0)\n",
+    "with tf.GradientTape() as tape:\n",
+    "    tape.watch(input_const)\n",
+    "    result = tf.square(input_const)\n",
+    "gradient = tape.gradient(result, input_const)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "time = tf.Variable(0.0)\n",
+    "with tf.GradientTape() as outer_tape:\n",
+    "    with tf.GradientTape() as inner_tape:\n",
+    "        position = 4.9 * time**2\n",
+    "    speed = inner_tape.gradient(position, time)\n",
+    "acceleration = outer_tape.gradient(speed, time)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Making TensorFlow functions fast using compilation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "@tf.function\n",
+    "def dense(inputs, W, b):\n",
+    "    return tf.nn.relu(tf.matmul(inputs, W) + b)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "@tf.function(jit_compile=True)\n",
+    "def dense(inputs, W, b):\n",
+    "    return tf.nn.relu(tf.matmul(inputs, W) + b)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### An end-to-end example: a linear classifier in pure TensorFlow"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "num_samples_per_class = 1000\n",
+    "negative_samples = np.random.multivariate_normal(\n",
+    "    mean=[0, 3], cov=[[1, 0.5], [0.5, 1]], size=num_samples_per_class\n",
+    ")\n",
+    "positive_samples = np.random.multivariate_normal(\n",
+    "    mean=[3, 0], cov=[[1, 0.5], [0.5, 1]], size=num_samples_per_class\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "inputs = np.vstack((negative_samples, positive_samples)).astype(np.float32)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "targets = np.vstack(\n",
+    "    (\n",
+    "        np.zeros((num_samples_per_class, 1), dtype=\"float32\"),\n",
+    "        np.ones((num_samples_per_class, 1), dtype=\"float32\"),\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "plt.scatter(inputs[:, 0], inputs[:, 1], c=targets[:, 0])\n",
+    "plt.savefig(\"linear_model_inputs.png\", dpi=300)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "input_dim = 2\n",
+    "output_dim = 1\n",
+    "W = tf.Variable(initial_value=tf.random.uniform(shape=(input_dim, output_dim)))\n",
+    "b = tf.Variable(initial_value=tf.zeros(shape=(output_dim,)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def model(inputs, W, b):\n",
+    "    return tf.matmul(inputs, W) + b"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def mean_squared_error(targets, predictions):\n",
+    "    per_sample_losses = tf.square(targets - predictions)\n",
+    "    return tf.reduce_mean(per_sample_losses)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "learning_rate = 0.1\n",
+    "\n",
+    "@tf.function(jit_compile=True)\n",
+    "def training_step(inputs, targets, W, b):\n",
+    "    with tf.GradientTape() as tape:\n",
+    "        predictions = model(inputs, W, b)\n",
+    "        loss = mean_squared_error(predictions, targets)\n",
+    "    grad_loss_wrt_W, grad_loss_wrt_b = tape.gradient(loss, [W, b])\n",
+    "    W.assign_sub(grad_loss_wrt_W * learning_rate)\n",
+    "    b.assign_sub(grad_loss_wrt_b * learning_rate)\n",
+    "    return loss"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "for step in range(40):\n",
+    "    loss = training_step(inputs, targets, W, b)\n",
+    "    print(f\"Loss at step {step}: {loss:.4f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "predictions = model(inputs, W, b)\n",
+    "plt.scatter(inputs[:, 0], inputs[:, 1], c=predictions[:, 0] > 0.5)\n",
+    "plt.savefig(\"linear_model_predictions.png\", dpi=300)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "x = np.linspace(-1, 4, 100)\n",
+    "y = -W[0] / W[1] * x + (0.5 - b) / W[1]\n",
+    "plt.plot(x, y, \"-r\")\n",
+    "plt.scatter(inputs[:, 0], inputs[:, 1], c=predictions[:, 0] > 0.5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### What makes the TensorFlow approach unique"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Introduction to PyTorch"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### First steps with PyTorch"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Tensors and Parameters in PyTorch"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "###### Constant tensors"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "torch.ones(size=(2, 1))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "torch.zeros(size=(2, 1))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "torch.tensor([1, 2, 3], dtype=torch.float32)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "###### Random tensors"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "torch.normal(\n",
+    "mean=torch.zeros(size=(3, 1)),\n",
+    "std=torch.ones(size=(3, 1)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "torch.rand(3, 1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "###### Tensor assignment and the Parameter class"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "x = torch.zeros(size=(2, 1))\n",
+    "x[0, 0] = 1.\n",
+    "x"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "x = torch.zeros(size=(2, 1))\n",
+    "p = torch.nn.parameter.Parameter(data=x)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Tensor operations: doing math in PyTorch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "a = torch.ones((2, 2))\n",
+    "b = torch.square(a)\n",
+    "c = torch.sqrt(a)\n",
+    "d = b + c\n",
+    "e = torch.matmul(a, b)\n",
+    "f = torch.cat((a, b), axis=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def dense(inputs, W, b):\n",
+    "    return torch.nn.relu(torch.matmul(inputs, W) + b)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Computing gradients with PyTorch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "input_var = torch.tensor(3.0, requires_grad=True)\n",
+    "result = torch.square(input_var)\n",
+    "result.backward()\n",
+    "gradient = input_var.grad\n",
+    "gradient"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "result = torch.square(input_var)\n",
+    "result.backward()\n",
+    "input_var.grad"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "input_var.grad = None"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### An end-to-end example: a linear classifier in pure PyTorch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "input_dim = 2\n",
+    "output_dim = 1\n",
+    "\n",
+    "W = torch.rand(input_dim, output_dim, requires_grad=True)\n",
+    "b = torch.zeros(output_dim, requires_grad=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def model(inputs, W, b):\n",
+    "    return torch.matmul(inputs, W) + b"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def mean_squared_error(targets, predictions):\n",
+    "    per_sample_losses = torch.square(targets - predictions)\n",
+    "    return torch.mean(per_sample_losses)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "learning_rate = 0.1\n",
+    "\n",
+    "def training_step(inputs, targets, W, b):\n",
+    "    predictions = model(inputs)\n",
+    "    loss = mean_squared_error(targets, predictions)\n",
+    "    loss.backward()\n",
+    "    grad_loss_wrt_W, grad_loss_wrt_b = W.grad, b.grad\n",
+    "    with torch.no_grad():\n",
+    "        W -= grad_loss_wrt_W * learning_rate\n",
+    "        b -= grad_loss_wrt_b * learning_rate\n",
+    "    W.grad = None\n",
+    "    b.grad = None\n",
+    "    return loss"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Packaging state and computation with Modules"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "class LinearModel(torch.nn.Module):\n",
+    "    def __init__(self):\n",
+    "        super().__init__()\n",
+    "        self.W = torch.nn.Parameter(torch.rand(input_dim, output_dim))\n",
+    "        self.b = torch.nn.Parameter(torch.zeros(output_dim))\n",
+    "\n",
+    "    def forward(self, inputs):\n",
+    "        return torch.matmul(inputs, self.W) + self.b"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = LinearModel()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "torch_inputs = torch.tensor(inputs)\n",
+    "output = model(torch_inputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def training_step(inputs, targets):\n",
+    "    predictions = model(inputs)\n",
+    "    loss = mean_squared_error(targets, predictions)\n",
+    "    loss.backward()\n",
+    "    optimizer.step()\n",
+    "    model.zero_grad()\n",
+    "    return loss"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Making PyTorch modules fast using compilation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "compiled_model = model.compile()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "@torch.compile\n",
+    "def dense(inputs, W, b):\n",
+    "    return torch.nn.relu(torch.matmul(inputs, W) + b)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### What makes the PyTorch approach unique"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Introduction to JAX"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### First steps with JAX"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Tensors in JAX"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from jax import numpy as jnp\n",
+    "jnp.ones(shape=(2, 1))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "jnp.zeros(shape=(2, 1))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "jnp.array([1, 2, 3], dtype=\"float32\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Random tensors"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "np.random.normal(size=(3,))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "np.random.normal(size=(3,))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def apply_noise(x, seed):\n",
+    "    np.random.seed(seed)\n",
+    "    x = x * np.random.normal((3,))\n",
+    "    return x\n",
+    "\n",
+    "seed = 1337\n",
+    "y = apply_noise(x, seed)\n",
+    "seed += 1\n",
+    "z = apply_noise(x, seed)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import jax\n",
+    "\n",
+    "seed_key = jax.random.PRNGKey(1337)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "seed_key = jax.random.PRNGKey(0)\n",
+    "jax.random.normal(seed_key, shape=(3,))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "seed_key = jax.random.PRNGKey(123)\n",
+    "jax.random.normal(seed_key, shape=(3,))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "jax.random.normal(seed_key, shape=(3,))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "seed_key = jax.random.PRNGKey(123)\n",
+    "jax.random.normal(seed_key, shape=(3,))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "new_seed_key = jax.random.split(seed_key, num=1)[0]\n",
+    "jax.random.normal(new_seed_key, shape=(3,))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Tensor assignment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "x = jnp.array([1, 2, 3], dtype=\"float32\")\n",
+    "new_x = x.at[0].set(10)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Tensor operations: doing math in JAX"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "a = jnp.ones((2, 2))\n",
+    "b = jnp.square(a)\n",
+    "c = jnp.sqrt(a)\n",
+    "d = b + c\n",
+    "e = jnp.matmul(a, b)\n",
+    "e *= d"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def dense(inputs, W, b):\n",
+    "    return jax.nn.relu(jnp.matmul(inputs, W) + b)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Computing gradients with JAX"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def compute_loss(input_var):\n",
+    "    return jnp.square(input_var)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "grad_fn = jax.grad(compute_loss)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "input_var = jnp.array(3.0)\n",
+    "grad_of_loss_wrt_input_var = grad_fn(input_var)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### JAX gradient-computation best practices"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "###### Returning the loss value"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "grad_fn = jax.value_and_grad(compute_loss)\n",
+    "output, grad_of_loss_wrt_input_var = grad_fn(input_var)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "###### Getting gradients for a complex function"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "###### Returning auxiliary outputs"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Making JAX functions fast with `@jax.jit`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "@jax.jit\n",
+    "def dense(inputs, W, b):\n",
+    "    return jax.nn.relu(jnp.matmul(inputs, W) + b)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### An end-to-end example: a linear classifier in pure JAX"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def model(inputs, W, b):\n",
+    "    return jnp.matmul(inputs, W) + b\n",
+    "\n",
+    "def mean_squared_error(targets, predictions):\n",
+    "    per_sample_losses = jnp.square(targets - predictions)\n",
+    "    return jnp.mean(per_sample_losses)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def compute_loss(state, inputs, targets):\n",
+    "    W, b = state\n",
+    "    predictions = model(inputs, W, b)\n",
+    "    loss = mean_squared_error(targets, predictions)\n",
+    "    return loss"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "grad_fn = jax.value_and_grad(compute_loss)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "@jax.jit\n",
+    "def training_step(inputs, targets, W, b):\n",
+    "    loss, grads = grad_fn((W, b), inputs, targets)\n",
+    "    grad_wrt_W, grad_wrt_b = grads\n",
+    "    W = W - grad_wrt_W * learning_rate\n",
+    "    b = b - grad_wrt_b * learning_rate\n",
+    "    return loss, W, b"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "input_dim = 2\n",
+    "output_dim = 1\n",
+    "\n",
+    "W = jax.numpy.array(np.random.uniform(size=(input_dim, output_dim)))\n",
+    "b = jax.numpy.array(np.zeros(shape=(output_dim,)))\n",
+    "state = (W, b)\n",
+    "for step in range(40):\n",
+    "    loss, W, b = training_step(inputs, targets, W, b)\n",
+    "    print(f\"Loss at step {step}: {loss:.4f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### What makes the JAX approach unique"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Introduction to Keras"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### First steps with Keras"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Picking a backend framework"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "os.environ[\"KERAS_BACKEND\"] = \"jax\"\n",
+    "\n",
+    "import keras"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Anatomy of a neural network: understanding core Keras APIs"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Layers: the building blocks of deep learning"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### The base `Layer` class in Keras"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import keras\n",
+    "\n",
+    "class SimpleDense(keras.Layer):\n",
+    "    def __init__(self, units, activation=None):\n",
+    "        super().__init__()\n",
+    "        self.units = units\n",
+    "        self.activation = activation\n",
+    "\n",
+    "    def build(self, input_shape):\n",
+    "        batch_dim, input_dim = input_shape\n",
+    "        self.W = self.add_weight(\n",
+    "            shape=(input_dim, self.units), initializer=\"random_normal\"\n",
+    "        )\n",
+    "        self.b = self.add_weight(shape=(self.units,), initializer=\"zeros\")\n",
+    "\n",
+    "    def call(self, inputs):\n",
+    "        y = keras.ops.matmul(inputs, self.W) + self.b\n",
+    "        if self.activation is not None:\n",
+    "            y = self.activation(y)\n",
+    "        return y"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "my_dense = SimpleDense(units=32, activation=keras.ops.relu)\n",
+    "input_tensor = keras.ops.ones(shape=(2, 784))\n",
+    "output_tensor = my_dense(input_tensor)\n",
+    "print(output_tensor.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Automatic shape inference: building layers on the fly"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from keras import layers\n",
+    "\n",
+    "layer = layers.Dense(32, activation=\"relu\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from keras import models\n",
+    "from keras import layers\n",
+    "\n",
+    "model = models.Sequential(\n",
+    "    [\n",
+    "        layers.Dense(32, activation=\"relu\"),\n",
+    "        layers.Dense(32),\n",
+    "    ]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = keras.Sequential(\n",
+    "    [\n",
+    "        SimpleDense(32, activation=\"relu\"),\n",
+    "        SimpleDense(64, activation=\"relu\"),\n",
+    "        SimpleDense(32, activation=\"relu\"),\n",
+    "        SimpleDense(10, activation=\"softmax\"),\n",
+    "    ]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### From layers to models"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### The \"compile\" step: configuring the learning process"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = keras.Sequential([keras.layers.Dense(1)])\n",
+    "model.compile(\n",
+    "    optimizer=\"rmsprop\",\n",
+    "    loss=\"mean_squared_error\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.compile(\n",
+    "    optimizer=keras.optimizers.RMSprop(),\n",
+    "    loss=keras.losses.MeanSquaredError(),\n",
+    "    metrics=[keras.metrics.BinaryAccuracy()],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Picking a loss function"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Understanding the `fit` method"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "history = model.fit(\n",
+    "    inputs,\n",
+    "    targets,\n",
+    "    epochs=5,\n",
+    "    batch_size=128,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "history.history"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Monitoring loss & metrics on validation data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = keras.Sequential([keras.layers.Dense(1)])\n",
+    "model.compile(\n",
+    "    optimizer=keras.optimizers.RMSprop(learning_rate=0.1),\n",
+    "    loss=keras.losses.MeanSquaredError(),\n",
+    "    metrics=[keras.metrics.BinaryAccuracy()],\n",
+    ")\n",
+    "\n",
+    "indices_permutation = np.random.permutation(len(inputs))\n",
+    "shuffled_inputs = inputs[indices_permutation]\n",
+    "shuffled_targets = targets[indices_permutation]\n",
+    "\n",
+    "num_validation_samples = int(0.3 * len(inputs))\n",
+    "val_inputs = shuffled_inputs[:num_validation_samples]\n",
+    "val_targets = shuffled_targets[:num_validation_samples]\n",
+    "training_inputs = shuffled_inputs[num_validation_samples:]\n",
+    "training_targets = shuffled_targets[num_validation_samples:]\n",
+    "model.fit(\n",
+    "    training_inputs,\n",
+    "    training_targets,\n",
+    "    epochs=5,\n",
+    "    batch_size=16,\n",
+    "    validation_data=(val_inputs, val_targets),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Inference: using a model after training"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "predictions = model.predict(val_inputs, batch_size=128)\n",
+    "print(predictions[:10])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Chapter summary"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "collapsed_sections": [],
+   "name": "chapter03_introduction-to-ml-frameworks",
+   "private_outputs": false,
+   "provenance": [],
+   "toc_visible": true
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/chapter04_getting-started-with-neural-networks.ipynb b/chapter04_getting-started-with-neural-networks.ipynb
index ba77a17d45..b91c31d941 100644
--- a/chapter04_getting-started-with-neural-networks.ipynb
+++ b/chapter04_getting-started-with-neural-networks.ipynb
@@ -6,25 +6,30 @@
     "colab_type": "text"
    },
    "source": [
-    "This is a companion notebook for the book [Deep Learning with Python, Second Edition](https://www.manning.com/books/deep-learning-with-python-second-edition?a_aid=keras&a_bid=76564dff). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThis notebook was generated for TensorFlow 2.6."
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 0,
    "metadata": {
-    "colab_type": "text"
+    "colab_type": "code"
    },
+   "outputs": [],
    "source": [
-    "# Getting started with neural networks: Classification and regression"
+    "!pip install keras-nightly --upgrade -q"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 0,
    "metadata": {
-    "colab_type": "text"
+    "colab_type": "code"
    },
+   "outputs": [],
    "source": [
-    "## Classifying movie reviews: A binary classification example"
+    "import os\n",
+    "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
   {
@@ -33,7 +38,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### The IMDB dataset"
+    "### Classifying movie reviews: a binary classification example"
    ]
   },
   {
@@ -42,7 +47,7 @@
     "colab_type": "text"
    },
    "source": [
-    "**Loading the IMDB dataset**"
+    "#### The IMDb dataset"
    ]
   },
   {
@@ -53,9 +58,11 @@
    },
    "outputs": [],
    "source": [
-    "from tensorflow.keras.datasets import imdb\n",
+    "from keras.datasets import imdb\n",
+    "\n",
     "(train_data, train_labels), (test_data, test_labels) = imdb.load_data(\n",
-    "    num_words=10000)"
+    "    num_words=10000\n",
+    ")"
    ]
   },
   {
@@ -91,15 +98,6 @@
     "max([max(sequence) for sequence in train_data])"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Decoding reviews back to text**"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -109,19 +107,21 @@
    "outputs": [],
    "source": [
     "word_index = imdb.get_word_index()\n",
-    "reverse_word_index = dict(\n",
-    "    [(value, key) for (key, value) in word_index.items()])\n",
+    "reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])\n",
     "decoded_review = \" \".join(\n",
-    "    [reverse_word_index.get(i - 3, \"?\") for i in train_data[0]])"
+    "    [reverse_word_index.get(i - 3, \"?\") for i in train_data[0]]\n",
+    ")"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 0,
    "metadata": {
-    "colab_type": "text"
+    "colab_type": "code"
    },
+   "outputs": [],
    "source": [
-    "### Preparing the data"
+    "decoded_review[:100]"
    ]
   },
   {
@@ -130,7 +130,7 @@
     "colab_type": "text"
    },
    "source": [
-    "**Encoding the integer sequences via multi-hot encoding**"
+    "#### Preparing the data"
    ]
   },
   {
@@ -142,14 +142,15 @@
    "outputs": [],
    "source": [
     "import numpy as np\n",
-    "def vectorize_sequences(sequences, dimension=10000):\n",
-    "    results = np.zeros((len(sequences), dimension))\n",
+    "\n",
+    "def multi_hot_encode(sequences, num_classes):\n",
+    "    results = np.zeros((len(sequences), num_classes))\n",
     "    for i, sequence in enumerate(sequences):\n",
-    "        for j in sequence:\n",
-    "            results[i, j] = 1.\n",
+    "        results[i][sequence] = 1.0\n",
     "    return results\n",
-    "x_train = vectorize_sequences(train_data)\n",
-    "x_test = vectorize_sequences(test_data)"
+    "\n",
+    "x_train = multi_hot_encode(train_data, num_classes=10000)\n",
+    "x_test = multi_hot_encode(test_data, num_classes=10000)"
    ]
   },
   {
@@ -181,16 +182,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Building your model"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Model definition**"
+    "#### Building your model"
    ]
   },
   {
@@ -201,23 +193,16 @@
    },
    "outputs": [],
    "source": [
-    "from tensorflow import keras\n",
-    "from tensorflow.keras import layers\n",
+    "import keras\n",
+    "from keras import layers\n",
     "\n",
-    "model = keras.Sequential([\n",
-    "    layers.Dense(16, activation=\"relu\"),\n",
-    "    layers.Dense(16, activation=\"relu\"),\n",
-    "    layers.Dense(1, activation=\"sigmoid\")\n",
-    "])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Compiling the model**"
+    "model = keras.Sequential(\n",
+    "    [\n",
+    "        layers.Dense(16, activation=\"relu\"),\n",
+    "        layers.Dense(16, activation=\"relu\"),\n",
+    "        layers.Dense(1, activation=\"sigmoid\"),\n",
+    "    ]\n",
+    ")"
    ]
   },
   {
@@ -228,9 +213,11 @@
    },
    "outputs": [],
    "source": [
-    "model.compile(optimizer=\"rmsprop\",\n",
-    "              loss=\"binary_crossentropy\",\n",
-    "              metrics=[\"accuracy\"])"
+    "model.compile(\n",
+    "    optimizer=\"adam\",\n",
+    "    loss=\"binary_crossentropy\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")"
    ]
   },
   {
@@ -239,16 +226,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Validating your approach"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Setting aside a validation set**"
+    "#### Validating your approach"
    ]
   },
   {
@@ -266,12 +244,20 @@
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 0,
    "metadata": {
-    "colab_type": "text"
+    "colab_type": "code"
    },
+   "outputs": [],
    "source": [
-    "**Training your model**"
+    "history = model.fit(\n",
+    "    partial_x_train,\n",
+    "    partial_y_train,\n",
+    "    epochs=20,\n",
+    "    batch_size=512,\n",
+    "    validation_data=(x_val, y_val),\n",
+    ")"
    ]
   },
   {
@@ -282,11 +268,13 @@
    },
    "outputs": [],
    "source": [
-    "history = model.fit(partial_x_train,\n",
-    "                    partial_y_train,\n",
-    "                    epochs=20,\n",
-    "                    batch_size=512,\n",
-    "                    validation_data=(x_val, y_val))"
+    "history = model.fit(\n",
+    "    x_train,\n",
+    "    y_train,\n",
+    "    epochs=20,\n",
+    "    batch_size=512,\n",
+    "    validation_split=0.2,\n",
+    ")"
    ]
   },
   {
@@ -301,15 +289,6 @@
     "history_dict.keys()"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Plotting the training and validation loss**"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -319,26 +298,19 @@
    "outputs": [],
    "source": [
     "import matplotlib.pyplot as plt\n",
+    "\n",
     "history_dict = history.history\n",
     "loss_values = history_dict[\"loss\"]\n",
     "val_loss_values = history_dict[\"val_loss\"]\n",
     "epochs = range(1, len(loss_values) + 1)\n",
-    "plt.plot(epochs, loss_values, \"bo\", label=\"Training loss\")\n",
+    "plt.plot(epochs, loss_values, \"r--\", label=\"Training loss\")\n",
     "plt.plot(epochs, val_loss_values, \"b\", label=\"Validation loss\")\n",
-    "plt.title(\"Training and validation loss\")\n",
+    "plt.title(\"[IMDB] Training and validation loss\")\n",
     "plt.xlabel(\"Epochs\")\n",
+    "plt.xticks(epochs)\n",
     "plt.ylabel(\"Loss\")\n",
     "plt.legend()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Plotting the training and validation accuracy**"
+    "plt.savefig(\"imdb_loss_plot.png\", dpi=300)"
    ]
   },
   {
@@ -352,22 +324,14 @@
     "plt.clf()\n",
     "acc = history_dict[\"accuracy\"]\n",
     "val_acc = history_dict[\"val_accuracy\"]\n",
-    "plt.plot(epochs, acc, \"bo\", label=\"Training acc\")\n",
+    "plt.plot(epochs, acc, \"r--\", label=\"Training acc\")\n",
     "plt.plot(epochs, val_acc, \"b\", label=\"Validation acc\")\n",
-    "plt.title(\"Training and validation accuracy\")\n",
+    "plt.title(\"[IMDB] Training and validation accuracy\")\n",
     "plt.xlabel(\"Epochs\")\n",
+    "plt.xticks(epochs)\n",
     "plt.ylabel(\"Accuracy\")\n",
     "plt.legend()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Retraining a model from scratch**"
+    "plt.savefig(\"imdb_accuracy_plot.png\", dpi=300)"
    ]
   },
   {
@@ -378,14 +342,18 @@
    },
    "outputs": [],
    "source": [
-    "model = keras.Sequential([\n",
-    "    layers.Dense(16, activation=\"relu\"),\n",
-    "    layers.Dense(16, activation=\"relu\"),\n",
-    "    layers.Dense(1, activation=\"sigmoid\")\n",
-    "])\n",
-    "model.compile(optimizer=\"rmsprop\",\n",
-    "              loss=\"binary_crossentropy\",\n",
-    "              metrics=[\"accuracy\"])\n",
+    "model = keras.Sequential(\n",
+    "    [\n",
+    "        layers.Dense(16, activation=\"relu\"),\n",
+    "        layers.Dense(16, activation=\"relu\"),\n",
+    "        layers.Dense(1, activation=\"sigmoid\"),\n",
+    "    ]\n",
+    ")\n",
+    "model.compile(\n",
+    "    optimizer=\"adam\",\n",
+    "    loss=\"binary_crossentropy\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")\n",
     "model.fit(x_train, y_train, epochs=4, batch_size=512)\n",
     "results = model.evaluate(x_test, y_test)"
    ]
@@ -407,7 +375,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Using a trained model to generate predictions on new data"
+    "#### Using a trained model to generate predictions on new data"
    ]
   },
   {
@@ -427,16 +395,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Further experiments"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "### Wrapping up"
+    "#### Further experiments"
    ]
   },
   {
@@ -445,7 +404,7 @@
     "colab_type": "text"
    },
    "source": [
-    "## Classifying newswires: A multiclass classification example"
+    "#### Wrapping up"
    ]
   },
   {
@@ -454,7 +413,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### The Reuters dataset"
+    "### Classifying newswires: a multiclass classification example"
    ]
   },
   {
@@ -463,7 +422,7 @@
     "colab_type": "text"
    },
    "source": [
-    "**Loading the Reuters dataset**"
+    "#### The Reuters dataset"
    ]
   },
   {
@@ -474,9 +433,11 @@
    },
    "outputs": [],
    "source": [
-    "from tensorflow.keras.datasets import reuters\n",
+    "from keras.datasets import reuters\n",
+    "\n",
     "(train_data, train_labels), (test_data, test_labels) = reuters.load_data(\n",
-    "    num_words=10000)"
+    "    num_words=10000\n",
+    ")"
    ]
   },
   {
@@ -512,15 +473,6 @@
     "train_data[10]"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Decoding newswires back to text**"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -531,8 +483,9 @@
    "source": [
     "word_index = reuters.get_word_index()\n",
     "reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])\n",
-    "decoded_newswire = \" \".join([reverse_word_index.get(i - 3, \"?\") for i in\n",
-    "    train_data[0]])"
+    "decoded_newswire = \" \".join(\n",
+    "    [reverse_word_index.get(i - 3, \"?\") for i in train_data[10]]\n",
+    ")"
    ]
   },
   {
@@ -552,16 +505,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Preparing the data"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Encoding the input data**"
+    "#### Preparing the data"
    ]
   },
   {
@@ -572,17 +516,8 @@
    },
    "outputs": [],
    "source": [
-    "x_train = vectorize_sequences(train_data)\n",
-    "x_test = vectorize_sequences(test_data)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Encoding the labels**"
+    "x_train = multi_hot_encode(train_data, num_classes=10000)\n",
+    "x_test = multi_hot_encode(test_data, num_classes=10000)"
    ]
   },
   {
@@ -593,13 +528,14 @@
    },
    "outputs": [],
    "source": [
-    "def to_one_hot(labels, dimension=46):\n",
-    "    results = np.zeros((len(labels), dimension))\n",
+    "def one_hot_encode(labels, num_classes=46):\n",
+    "    results = np.zeros((len(labels), num_classes))\n",
     "    for i, label in enumerate(labels):\n",
-    "        results[i, label] = 1.\n",
+    "        results[i, label] = 1.0\n",
     "    return results\n",
-    "y_train = to_one_hot(train_labels)\n",
-    "y_test = to_one_hot(test_labels)"
+    "\n",
+    "y_train = one_hot_encode(train_labels)\n",
+    "y_test = one_hot_encode(test_labels)"
    ]
   },
   {
@@ -610,7 +546,8 @@
    },
    "outputs": [],
    "source": [
-    "from tensorflow.keras.utils import to_categorical\n",
+    "from keras.utils import to_categorical\n",
+    "\n",
     "y_train = to_categorical(train_labels)\n",
     "y_test = to_categorical(test_labels)"
    ]
@@ -621,16 +558,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Building your model"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Model definition**"
+    "#### Building your model"
    ]
   },
   {
@@ -641,20 +569,13 @@
    },
    "outputs": [],
    "source": [
-    "model = keras.Sequential([\n",
-    "    layers.Dense(64, activation=\"relu\"),\n",
-    "    layers.Dense(64, activation=\"relu\"),\n",
-    "    layers.Dense(46, activation=\"softmax\")\n",
-    "])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Compiling the model**"
+    "model = keras.Sequential(\n",
+    "    [\n",
+    "        layers.Dense(64, activation=\"relu\"),\n",
+    "        layers.Dense(64, activation=\"relu\"),\n",
+    "        layers.Dense(46, activation=\"softmax\"),\n",
+    "    ]\n",
+    ")"
    ]
   },
   {
@@ -665,9 +586,14 @@
    },
    "outputs": [],
    "source": [
-    "model.compile(optimizer=\"rmsprop\",\n",
-    "              loss=\"categorical_crossentropy\",\n",
-    "              metrics=[\"accuracy\"])"
+    "top_3_accuracy = keras.metrics.TopKCategoricalAccuracy(\n",
+    "    k=3, name=\"top_3_accuracy\"\n",
+    ")\n",
+    "model.compile(\n",
+    "    optimizer=\"adam\",\n",
+    "    loss=\"categorical_crossentropy\",\n",
+    "    metrics=[\"accuracy\", top_3_accuracy],\n",
+    ")"
    ]
   },
   {
@@ -676,16 +602,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Validating your approach"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Setting aside a validation set**"
+    "#### Validating your approach"
    ]
   },
   {
@@ -702,15 +619,6 @@
     "partial_y_train = y_train[1000:]"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Training the model**"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -719,20 +627,13 @@
    },
    "outputs": [],
    "source": [
-    "history = model.fit(partial_x_train,\n",
-    "                    partial_y_train,\n",
-    "                    epochs=20,\n",
-    "                    batch_size=512,\n",
-    "                    validation_data=(x_val, y_val))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Plotting the training and validation loss**"
+    "history = model.fit(\n",
+    "    partial_x_train,\n",
+    "    partial_y_train,\n",
+    "    epochs=20,\n",
+    "    batch_size=512,\n",
+    "    validation_data=(x_val, y_val),\n",
+    ")"
    ]
   },
   {
@@ -746,22 +647,14 @@
     "loss = history.history[\"loss\"]\n",
     "val_loss = history.history[\"val_loss\"]\n",
     "epochs = range(1, len(loss) + 1)\n",
-    "plt.plot(epochs, loss, \"bo\", label=\"Training loss\")\n",
+    "plt.plot(epochs, loss, \"r--\", label=\"Training loss\")\n",
     "plt.plot(epochs, val_loss, \"b\", label=\"Validation loss\")\n",
     "plt.title(\"Training and validation loss\")\n",
     "plt.xlabel(\"Epochs\")\n",
+    "plt.xticks(epochs)\n",
     "plt.ylabel(\"Loss\")\n",
     "plt.legend()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Plotting the training and validation accuracy**"
+    "plt.savefig(\"reuters_loss_plot.png\", dpi=300)"
    ]
   },
   {
@@ -775,22 +668,35 @@
     "plt.clf()\n",
     "acc = history.history[\"accuracy\"]\n",
     "val_acc = history.history[\"val_accuracy\"]\n",
-    "plt.plot(epochs, acc, \"bo\", label=\"Training accuracy\")\n",
+    "plt.plot(epochs, acc, \"r--\", label=\"Training accuracy\")\n",
     "plt.plot(epochs, val_acc, \"b\", label=\"Validation accuracy\")\n",
     "plt.title(\"Training and validation accuracy\")\n",
     "plt.xlabel(\"Epochs\")\n",
+    "plt.xticks(epochs)\n",
     "plt.ylabel(\"Accuracy\")\n",
     "plt.legend()\n",
-    "plt.show()"
+    "plt.savefig(\"reuters_accuracy_plot.png\", dpi=300)"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 0,
    "metadata": {
-    "colab_type": "text"
+    "colab_type": "code"
    },
+   "outputs": [],
    "source": [
-    "**Retraining a model from scratch**"
+    "plt.clf()\n",
+    "acc = history.history[\"top_3_accuracy\"]\n",
+    "val_acc = history.history[\"val_top_3_accuracy\"]\n",
+    "plt.plot(epochs, acc, \"r--\", label=\"Training top-3 accuracy\")\n",
+    "plt.plot(epochs, val_acc, \"b\", label=\"Validation top-3 accuracy\")\n",
+    "plt.title(\"Training and validation top-3 accuracy\")\n",
+    "plt.xlabel(\"Epochs\")\n",
+    "plt.xticks(epochs)\n",
+    "plt.ylabel(\"Top-3 accuracy\")\n",
+    "plt.legend()\n",
+    "plt.savefig(\"reuters_top_3_accuracy_plot.png\", dpi=300)"
    ]
   },
   {
@@ -801,18 +707,24 @@
    },
    "outputs": [],
    "source": [
-    "model = keras.Sequential([\n",
-    "  layers.Dense(64, activation=\"relu\"),\n",
-    "  layers.Dense(64, activation=\"relu\"),\n",
-    "  layers.Dense(46, activation=\"softmax\")\n",
-    "])\n",
-    "model.compile(optimizer=\"rmsprop\",\n",
-    "              loss=\"categorical_crossentropy\",\n",
-    "              metrics=[\"accuracy\"])\n",
-    "model.fit(x_train,\n",
-    "          y_train,\n",
-    "          epochs=9,\n",
-    "          batch_size=512)\n",
+    "model = keras.Sequential(\n",
+    "    [\n",
+    "        layers.Dense(64, activation=\"relu\"),\n",
+    "        layers.Dense(64, activation=\"relu\"),\n",
+    "        layers.Dense(46, activation=\"softmax\"),\n",
+    "    ]\n",
+    ")\n",
+    "model.compile(\n",
+    "    optimizer=\"adam\",\n",
+    "    loss=\"categorical_crossentropy\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")\n",
+    "model.fit(\n",
+    "    x_train,\n",
+    "    y_train,\n",
+    "    epochs=9,\n",
+    "    batch_size=512,\n",
+    ")\n",
     "results = model.evaluate(x_test, y_test)"
    ]
   },
@@ -838,7 +750,7 @@
     "import copy\n",
     "test_labels_copy = copy.copy(test_labels)\n",
     "np.random.shuffle(test_labels_copy)\n",
-    "hits_array = np.array(test_labels) == np.array(test_labels_copy)\n",
+    "hits_array = np.array(test_labels)\n",
     "hits_array.mean()"
    ]
   },
@@ -848,7 +760,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Generating predictions on new data"
+    "#### Generating predictions on new data"
    ]
   },
   {
@@ -901,7 +813,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### A different way to handle the labels and the loss"
+    "#### A different way to handle the labels and the loss"
    ]
   },
   {
@@ -924,9 +836,11 @@
    },
    "outputs": [],
    "source": [
-    "model.compile(optimizer=\"rmsprop\",\n",
-    "              loss=\"sparse_categorical_crossentropy\",\n",
-    "              metrics=[\"accuracy\"])"
+    "model.compile(\n",
+    "    optimizer=\"adam\",\n",
+    "    loss=\"sparse_categorical_crossentropy\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")"
    ]
   },
   {
@@ -935,16 +849,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### The importance of having sufficiently large intermediate layers"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**A model with an information bottleneck**"
+    "#### The importance of having sufficiently large intermediate layers"
    ]
   },
   {
@@ -955,28 +860,25 @@
    },
    "outputs": [],
    "source": [
-    "model = keras.Sequential([\n",
-    "    layers.Dense(64, activation=\"relu\"),\n",
-    "    layers.Dense(4, activation=\"relu\"),\n",
-    "    layers.Dense(46, activation=\"softmax\")\n",
-    "])\n",
-    "model.compile(optimizer=\"rmsprop\",\n",
-    "              loss=\"categorical_crossentropy\",\n",
-    "              metrics=[\"accuracy\"])\n",
-    "model.fit(partial_x_train,\n",
-    "          partial_y_train,\n",
-    "          epochs=20,\n",
-    "          batch_size=128,\n",
-    "          validation_data=(x_val, y_val))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "### Further experiments"
+    "model = keras.Sequential(\n",
+    "    [\n",
+    "        layers.Dense(64, activation=\"relu\"),\n",
+    "        layers.Dense(4, activation=\"relu\"),\n",
+    "        layers.Dense(46, activation=\"softmax\"),\n",
+    "    ]\n",
+    ")\n",
+    "model.compile(\n",
+    "    optimizer=\"adam\",\n",
+    "    loss=\"categorical_crossentropy\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")\n",
+    "model.fit(\n",
+    "    partial_x_train,\n",
+    "    partial_y_train,\n",
+    "    epochs=20,\n",
+    "    batch_size=128,\n",
+    "    validation_data=(x_val, y_val),\n",
+    ")"
    ]
   },
   {
@@ -985,7 +887,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Wrapping up"
+    "#### Further experiments"
    ]
   },
   {
@@ -994,7 +896,7 @@
     "colab_type": "text"
    },
    "source": [
-    "## Predicting house prices: A regression example"
+    "#### Wrapping up"
    ]
   },
   {
@@ -1003,7 +905,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### The Boston Housing Price dataset"
+    "### Predicting house prices: a regression example"
    ]
   },
   {
@@ -1012,7 +914,7 @@
     "colab_type": "text"
    },
    "source": [
-    "**Loading the Boston housing dataset**"
+    "#### The California Housing Price dataset"
    ]
   },
   {
@@ -1023,8 +925,11 @@
    },
    "outputs": [],
    "source": [
-    "from tensorflow.keras.datasets import boston_housing\n",
-    "(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()"
+    "from keras.datasets import california_housing\n",
+    "\n",
+    "(train_data, train_targets), (test_data, test_targets) = (\n",
+    "    california_housing.load_data(version=\"small\")\n",
+    ")"
    ]
   },
   {
@@ -1066,16 +971,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Preparing the data"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Normalizing the data**"
+    "#### Preparing the data"
    ]
   },
   {
@@ -1087,20 +983,21 @@
    "outputs": [],
    "source": [
     "mean = train_data.mean(axis=0)\n",
-    "train_data -= mean\n",
     "std = train_data.std(axis=0)\n",
-    "train_data /= std\n",
-    "test_data -= mean\n",
-    "test_data /= std"
+    "x_train = (train_data - mean) / std\n",
+    "x_test = (test_data - mean) / std"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 0,
    "metadata": {
-    "colab_type": "text"
+    "colab_type": "code"
    },
+   "outputs": [],
    "source": [
-    "### Building your model"
+    "y_train = train_targets / 100000\n",
+    "y_test = test_targets / 100000"
    ]
   },
   {
@@ -1109,7 +1006,7 @@
     "colab_type": "text"
    },
    "source": [
-    "**Model definition**"
+    "#### Building your model"
    ]
   },
   {
@@ -1120,13 +1017,19 @@
    },
    "outputs": [],
    "source": [
-    "def build_model():\n",
-    "    model = keras.Sequential([\n",
-    "        layers.Dense(64, activation=\"relu\"),\n",
-    "        layers.Dense(64, activation=\"relu\"),\n",
-    "        layers.Dense(1)\n",
-    "    ])\n",
-    "    model.compile(optimizer=\"rmsprop\", loss=\"mse\", metrics=[\"mae\"])\n",
+    "def get_model():\n",
+    "    model = keras.Sequential(\n",
+    "        [\n",
+    "            layers.Dense(64, activation=\"relu\"),\n",
+    "            layers.Dense(64, activation=\"relu\"),\n",
+    "            layers.Dense(1),\n",
+    "        ]\n",
+    "    )\n",
+    "    model.compile(\n",
+    "        optimizer=\"adam\",\n",
+    "        loss=\"mean_squared_error\",\n",
+    "        metrics=[\"mean_absolute_error\"],\n",
+    "    )\n",
     "    return model"
    ]
   },
@@ -1136,16 +1039,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Validating your approach using K-fold validation"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**K-fold validation**"
+    "#### Validating your approach using K-fold validation"
    ]
   },
   {
@@ -1157,25 +1051,31 @@
    "outputs": [],
    "source": [
     "k = 4\n",
-    "num_val_samples = len(train_data) // k\n",
-    "num_epochs = 100\n",
+    "num_val_samples = len(x_train) // k\n",
+    "num_epochs = 50\n",
     "all_scores = []\n",
     "for i in range(k):\n",
-    "    print(f\"Processing fold #{i}\")\n",
-    "    val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples]\n",
-    "    val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples]\n",
-    "    partial_train_data = np.concatenate(\n",
-    "        [train_data[:i * num_val_samples],\n",
-    "         train_data[(i + 1) * num_val_samples:]],\n",
-    "        axis=0)\n",
-    "    partial_train_targets = np.concatenate(\n",
-    "        [train_targets[:i * num_val_samples],\n",
-    "         train_targets[(i + 1) * num_val_samples:]],\n",
-    "        axis=0)\n",
-    "    model = build_model()\n",
-    "    model.fit(partial_train_data, partial_train_targets,\n",
-    "              epochs=num_epochs, batch_size=16, verbose=0)\n",
-    "    val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)\n",
+    "    print(f\"Processing fold #{i + 1}\")\n",
+    "    fold_x_val = x_train[i * num_val_samples : (i + 1) * num_val_samples]\n",
+    "    fold_y_val = y_train[i * num_val_samples : (i + 1) * num_val_samples]\n",
+    "    fold_x_train = np.concatenate(\n",
+    "        [x_train[: i * num_val_samples], x_train[(i + 1) * num_val_samples :]],\n",
+    "        axis=0,\n",
+    "    )\n",
+    "    fold_y_train = np.concatenate(\n",
+    "        [y_train[: i * num_val_samples], y_train[(i + 1) * num_val_samples :]],\n",
+    "        axis=0,\n",
+    "    )\n",
+    "    model = get_model()\n",
+    "    model.fit(\n",
+    "        fold_x_train,\n",
+    "        fold_y_train,\n",
+    "        epochs=num_epochs,\n",
+    "        batch_size=16,\n",
+    "        verbose=0,\n",
+    "    )\n",
+    "    scores = model.evaluate(fold_x_val, fold_y_val, verbose=0)\n",
+    "    val_loss, val_mae = scores\n",
     "    all_scores.append(val_mae)"
    ]
   },
@@ -1187,7 +1087,7 @@
    },
    "outputs": [],
    "source": [
-    "all_scores"
+    "[round(value, 3) for value in all_scores]"
    ]
   },
   {
@@ -1198,16 +1098,7 @@
    },
    "outputs": [],
    "source": [
-    "np.mean(all_scores)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Saving the validation logs at each fold**"
+    "round(np.mean(all_scores), 3)"
    ]
   },
   {
@@ -1218,37 +1109,35 @@
    },
    "outputs": [],
    "source": [
-    "num_epochs = 500\n",
+    "k = 4\n",
+    "num_val_samples = len(x_train) // k\n",
+    "num_epochs = 200\n",
     "all_mae_histories = []\n",
     "for i in range(k):\n",
-    "    print(f\"Processing fold #{i}\")\n",
-    "    val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples]\n",
-    "    val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples]\n",
-    "    partial_train_data = np.concatenate(\n",
-    "        [train_data[:i * num_val_samples],\n",
-    "         train_data[(i + 1) * num_val_samples:]],\n",
-    "        axis=0)\n",
-    "    partial_train_targets = np.concatenate(\n",
-    "        [train_targets[:i * num_val_samples],\n",
-    "         train_targets[(i + 1) * num_val_samples:]],\n",
-    "        axis=0)\n",
-    "    model = build_model()\n",
-    "    history = model.fit(partial_train_data, partial_train_targets,\n",
-    "                        validation_data=(val_data, val_targets),\n",
-    "                        epochs=num_epochs, batch_size=16, verbose=0)\n",
-    "    mae_history = history.history[\"val_mae\"]\n",
+    "    print(f\"Processing fold #{i + 1}\")\n",
+    "    fold_x_val = x_train[i * num_val_samples : (i + 1) * num_val_samples]\n",
+    "    fold_y_val = y_train[i * num_val_samples : (i + 1) * num_val_samples]\n",
+    "    fold_x_train = np.concatenate(\n",
+    "        [x_train[: i * num_val_samples], x_train[(i + 1) * num_val_samples :]],\n",
+    "        axis=0,\n",
+    "    )\n",
+    "    fold_y_train = np.concatenate(\n",
+    "        [y_train[: i * num_val_samples], y_train[(i + 1) * num_val_samples :]],\n",
+    "        axis=0,\n",
+    "    )\n",
+    "    model = get_model()\n",
+    "    history = model.fit(\n",
+    "        fold_x_train,\n",
+    "        fold_y_train,\n",
+    "        validation_data=(fold_x_val, fold_y_val),\n",
+    "        epochs=num_epochs,\n",
+    "        batch_size=16,\n",
+    "        verbose=0,\n",
+    "    )\n",
+    "    mae_history = history.history[\"val_mean_absolute_error\"]\n",
     "    all_mae_histories.append(mae_history)"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Building the history of successive mean K-fold validation scores**"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -1258,16 +1147,8 @@
    "outputs": [],
    "source": [
     "average_mae_history = [\n",
-    "    np.mean([x[i] for x in all_mae_histories]) for i in range(num_epochs)]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Plotting validation scores**"
+    "    np.mean([x[i] for x in all_mae_histories]) for i in range(num_epochs)\n",
+    "]"
    ]
   },
   {
@@ -1278,19 +1159,11 @@
    },
    "outputs": [],
    "source": [
-    "plt.plot(range(1, len(average_mae_history) + 1), average_mae_history)\n",
+    "epochs = range(1, len(average_mae_history) + 1)\n",
+    "plt.plot(epochs, average_mae_history)\n",
     "plt.xlabel(\"Epochs\")\n",
     "plt.ylabel(\"Validation MAE\")\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Plotting validation scores, excluding the first 10 data points**"
+    "plt.savefig(\"california_housing_validation_mae_plot.png\", dpi=300)"
    ]
   },
   {
@@ -1302,19 +1175,11 @@
    "outputs": [],
    "source": [
     "truncated_mae_history = average_mae_history[10:]\n",
-    "plt.plot(range(1, len(truncated_mae_history) + 1), truncated_mae_history)\n",
+    "epochs = range(10, len(truncated_mae_history) + 10)\n",
+    "plt.plot(epochs, truncated_mae_history)\n",
     "plt.xlabel(\"Epochs\")\n",
     "plt.ylabel(\"Validation MAE\")\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Training the final model**"
+    "plt.savefig(\"california_housing_validation_mae_plot_zoomed.png\", dpi=300)"
    ]
   },
   {
@@ -1325,10 +1190,11 @@
    },
    "outputs": [],
    "source": [
-    "model = build_model()\n",
-    "model.fit(train_data, train_targets,\n",
-    "          epochs=130, batch_size=16, verbose=0)\n",
-    "test_mse_score, test_mae_score = model.evaluate(test_data, test_targets)"
+    "model = get_model()\n",
+    "model.fit(x_train, y_train, epochs=130, batch_size=16, verbose=0)\n",
+    "test_mean_squared_error, test_mean_absolute_error = model.evaluate(\n",
+    "    x_test, y_test\n",
+    ")"
    ]
   },
   {
@@ -1339,7 +1205,7 @@
    },
    "outputs": [],
    "source": [
-    "test_mae_score"
+    "round(test_mean_absolute_error, 3)"
    ]
   },
   {
@@ -1348,7 +1214,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Generating predictions on new data"
+    "#### Generating predictions on new data"
    ]
   },
   {
@@ -1369,7 +1235,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Wrapping up"
+    "#### Wrapping up"
    ]
   },
   {
@@ -1378,14 +1244,15 @@
     "colab_type": "text"
    },
    "source": [
-    "## Summary"
+    "### Chapter summary"
    ]
   }
  ],
  "metadata": {
+  "accelerator": "GPU",
   "colab": {
    "collapsed_sections": [],
-   "name": "chapter04_getting-started-with-neural-networks.i",
+   "name": "chapter04_getting-started-with-neural-networks",
    "private_outputs": false,
    "provenance": [],
    "toc_visible": true
@@ -1405,7 +1272,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.0"
+   "version": "3.10.0"
   }
  },
  "nbformat": 4,
diff --git a/chapter05_fundamentals-of-ml.ipynb b/chapter05_fundamentals-of-ml.ipynb
index dd61f4ead8..74114d9dda 100644
--- a/chapter05_fundamentals-of-ml.ipynb
+++ b/chapter05_fundamentals-of-ml.ipynb
@@ -6,25 +6,30 @@
     "colab_type": "text"
    },
    "source": [
-    "This is a companion notebook for the book [Deep Learning with Python, Second Edition](https://www.manning.com/books/deep-learning-with-python-second-edition?a_aid=keras&a_bid=76564dff). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThis notebook was generated for TensorFlow 2.6."
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 0,
    "metadata": {
-    "colab_type": "text"
+    "colab_type": "code"
    },
+   "outputs": [],
    "source": [
-    "# Fundamentals of machine learning"
+    "!pip install keras-nightly --upgrade -q"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 0,
    "metadata": {
-    "colab_type": "text"
+    "colab_type": "code"
    },
+   "outputs": [],
    "source": [
-    "## Generalization: The goal of machine learning"
+    "import os\n",
+    "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
   {
@@ -33,7 +38,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Underfitting and overfitting"
+    "### Generalization: the goal of machine learning"
    ]
   },
   {
@@ -42,7 +47,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Noisy training data"
+    "#### Underfitting and overfitting"
    ]
   },
   {
@@ -51,7 +56,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Ambiguous features"
+    "##### Noisy training data"
    ]
   },
   {
@@ -60,7 +65,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Rare features and spurious correlations"
+    "##### Ambiguous features"
    ]
   },
   {
@@ -69,7 +74,7 @@
     "colab_type": "text"
    },
    "source": [
-    "**Adding white-noise channels or all-zeros channels to MNIST**"
+    "##### Rare features and spurious correlations"
    ]
   },
   {
@@ -80,7 +85,7 @@
    },
    "outputs": [],
    "source": [
-    "from tensorflow.keras.datasets import mnist\n",
+    "from keras.datasets import mnist\n",
     "import numpy as np\n",
     "\n",
     "(train_images, train_labels), _ = mnist.load_data()\n",
@@ -88,19 +93,12 @@
     "train_images = train_images.astype(\"float32\") / 255\n",
     "\n",
     "train_images_with_noise_channels = np.concatenate(\n",
-    "    [train_images, np.random.random((len(train_images), 784))], axis=1)\n",
+    "    [train_images, np.random.random((len(train_images), 784))], axis=1\n",
+    ")\n",
     "\n",
     "train_images_with_zeros_channels = np.concatenate(\n",
-    "    [train_images, np.zeros((len(train_images), 784))], axis=1)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Training the same model on MNIST data with noise channels or all-zero channels**"
+    "    [train_images, np.zeros((len(train_images), 784))], axis=1\n",
+    ")"
    ]
   },
   {
@@ -111,41 +109,40 @@
    },
    "outputs": [],
    "source": [
-    "from tensorflow import keras\n",
-    "from tensorflow.keras import layers\n",
+    "import keras\n",
+    "from keras import layers\n",
     "\n",
     "def get_model():\n",
-    "    model = keras.Sequential([\n",
-    "        layers.Dense(512, activation=\"relu\"),\n",
-    "        layers.Dense(10, activation=\"softmax\")\n",
-    "    ])\n",
-    "    model.compile(optimizer=\"rmsprop\",\n",
-    "                  loss=\"sparse_categorical_crossentropy\",\n",
-    "                  metrics=[\"accuracy\"])\n",
+    "    model = keras.Sequential(\n",
+    "        [\n",
+    "            layers.Dense(512, activation=\"relu\"),\n",
+    "            layers.Dense(10, activation=\"softmax\"),\n",
+    "        ]\n",
+    "    )\n",
+    "    model.compile(\n",
+    "        optimizer=\"adam\",\n",
+    "        loss=\"sparse_categorical_crossentropy\",\n",
+    "        metrics=[\"accuracy\"],\n",
+    "    )\n",
     "    return model\n",
     "\n",
     "model = get_model()\n",
     "history_noise = model.fit(\n",
-    "    train_images_with_noise_channels, train_labels,\n",
+    "    train_images_with_noise_channels,\n",
+    "    train_labels,\n",
     "    epochs=10,\n",
     "    batch_size=128,\n",
-    "    validation_split=0.2)\n",
+    "    validation_split=0.2,\n",
+    ")\n",
     "\n",
     "model = get_model()\n",
     "history_zeros = model.fit(\n",
-    "    train_images_with_zeros_channels, train_labels,\n",
+    "    train_images_with_zeros_channels,\n",
+    "    train_labels,\n",
     "    epochs=10,\n",
     "    batch_size=128,\n",
-    "    validation_split=0.2)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Plotting a validation accuracy comparison**"
+    "    validation_split=0.2,\n",
+    ")"
    ]
   },
   {
@@ -157,17 +154,28 @@
    "outputs": [],
    "source": [
     "import matplotlib.pyplot as plt\n",
+    "\n",
     "val_acc_noise = history_noise.history[\"val_accuracy\"]\n",
     "val_acc_zeros = history_zeros.history[\"val_accuracy\"]\n",
     "epochs = range(1, 11)\n",
-    "plt.plot(epochs, val_acc_noise, \"b-\",\n",
-    "         label=\"Validation accuracy with noise channels\")\n",
-    "plt.plot(epochs, val_acc_zeros, \"b--\",\n",
-    "         label=\"Validation accuracy with zeros channels\")\n",
+    "plt.plot(\n",
+    "    epochs,\n",
+    "    val_acc_noise,\n",
+    "    \"b-\",\n",
+    "    label=\"Validation accuracy with noise channels\",\n",
+    ")\n",
+    "plt.plot(\n",
+    "    epochs,\n",
+    "    val_acc_zeros,\n",
+    "    \"r--\",\n",
+    "    label=\"Validation accuracy with zeros channels\",\n",
+    ")\n",
     "plt.title(\"Effect of noise channels on validation accuracy\")\n",
     "plt.xlabel(\"Epochs\")\n",
+    "plt.xticks(epochs)\n",
     "plt.ylabel(\"Accuracy\")\n",
-    "plt.legend()"
+    "plt.legend()\n",
+    "plt.savefig(\"mnist_with_added_noise_channels_or_zeros_channels.png\", dpi=300)"
    ]
   },
   {
@@ -176,16 +184,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### The nature of generalization in deep learning"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Fitting a MNIST model with randomly shuffled labels**"
+    "#### The nature of generalization in deep learning"
    ]
   },
   {
@@ -203,26 +202,24 @@
     "random_train_labels = train_labels[:]\n",
     "np.random.shuffle(random_train_labels)\n",
     "\n",
-    "model = keras.Sequential([\n",
-    "    layers.Dense(512, activation=\"relu\"),\n",
-    "    layers.Dense(10, activation=\"softmax\")\n",
-    "])\n",
-    "model.compile(optimizer=\"rmsprop\",\n",
-    "              loss=\"sparse_categorical_crossentropy\",\n",
-    "              metrics=[\"accuracy\"])\n",
-    "model.fit(train_images, random_train_labels,\n",
-    "          epochs=100,\n",
-    "          batch_size=128,\n",
-    "          validation_split=0.2)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "#### The manifold hypothesis"
+    "model = keras.Sequential(\n",
+    "    [\n",
+    "        layers.Dense(512, activation=\"relu\"),\n",
+    "        layers.Dense(10, activation=\"softmax\"),\n",
+    "    ]\n",
+    ")\n",
+    "model.compile(\n",
+    "    optimizer=\"rmsprop\",\n",
+    "    loss=\"sparse_categorical_crossentropy\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")\n",
+    "model.fit(\n",
+    "    train_images,\n",
+    "    random_train_labels,\n",
+    "    epochs=100,\n",
+    "    batch_size=128,\n",
+    "    validation_split=0.2,\n",
+    ")"
    ]
   },
   {
@@ -231,7 +228,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Interpolation as a source of generalization"
+    "##### The manifold hypothesis"
    ]
   },
   {
@@ -240,7 +237,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Why deep learning works"
+    "##### Interpolation as a source of generalization"
    ]
   },
   {
@@ -249,7 +246,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Training data is paramount"
+    "##### Why deep learning works"
    ]
   },
   {
@@ -258,7 +255,7 @@
     "colab_type": "text"
    },
    "source": [
-    "## Evaluating machine-learning models"
+    "##### Training data is paramount"
    ]
   },
   {
@@ -267,7 +264,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Training, validation, and test sets"
+    "### Evaluating machine-learning models"
    ]
   },
   {
@@ -276,7 +273,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Simple hold-out validation"
+    "#### Training, validation, and test sets"
    ]
   },
   {
@@ -285,7 +282,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### K-fold validation"
+    "##### Simple hold-out validation"
    ]
   },
   {
@@ -294,7 +291,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Iterated K-fold validation with shuffling"
+    "##### K-fold validation"
    ]
   },
   {
@@ -303,7 +300,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Beating a common-sense baseline"
+    "##### Iterated K-fold validation with shuffling"
    ]
   },
   {
@@ -312,7 +309,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Things to keep in mind about model evaluation"
+    "#### Beating a common-sense baseline"
    ]
   },
   {
@@ -321,7 +318,7 @@
     "colab_type": "text"
    },
    "source": [
-    "## Improving model fit"
+    "#### Things to keep in mind about model evaluation"
    ]
   },
   {
@@ -330,7 +327,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Tuning key gradient descent parameters"
+    "### Improving model fit"
    ]
   },
   {
@@ -339,7 +336,7 @@
     "colab_type": "text"
    },
    "source": [
-    "**Training a MNIST model with an incorrectly high learning rate**"
+    "#### Tuning key gradient descent parameters"
    ]
   },
   {
@@ -354,26 +351,20 @@
     "train_images = train_images.reshape((60000, 28 * 28))\n",
     "train_images = train_images.astype(\"float32\") / 255\n",
     "\n",
-    "model = keras.Sequential([\n",
-    "    layers.Dense(512, activation=\"relu\"),\n",
-    "    layers.Dense(10, activation=\"softmax\")\n",
-    "])\n",
-    "model.compile(optimizer=keras.optimizers.RMSprop(1.),\n",
-    "              loss=\"sparse_categorical_crossentropy\",\n",
-    "              metrics=[\"accuracy\"])\n",
-    "model.fit(train_images, train_labels,\n",
-    "          epochs=10,\n",
-    "          batch_size=128,\n",
-    "          validation_split=0.2)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**The same model with a more appropriate learning rate**"
+    "model = keras.Sequential(\n",
+    "    [\n",
+    "        layers.Dense(512, activation=\"relu\"),\n",
+    "        layers.Dense(10, activation=\"softmax\"),\n",
+    "    ]\n",
+    ")\n",
+    "model.compile(\n",
+    "    optimizer=keras.optimizers.RMSprop(learning_rate=1.0),\n",
+    "    loss=\"sparse_categorical_crossentropy\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")\n",
+    "model.fit(\n",
+    "    train_images, train_labels, epochs=10, batch_size=128, validation_split=0.2\n",
+    ")"
    ]
   },
   {
@@ -384,17 +375,20 @@
    },
    "outputs": [],
    "source": [
-    "model = keras.Sequential([\n",
-    "    layers.Dense(512, activation=\"relu\"),\n",
-    "    layers.Dense(10, activation=\"softmax\")\n",
-    "])\n",
-    "model.compile(optimizer=keras.optimizers.RMSprop(1e-2),\n",
-    "              loss=\"sparse_categorical_crossentropy\",\n",
-    "              metrics=[\"accuracy\"])\n",
-    "model.fit(train_images, train_labels,\n",
-    "          epochs=10,\n",
-    "          batch_size=128,\n",
-    "          validation_split=0.2)"
+    "model = keras.Sequential(\n",
+    "    [\n",
+    "        layers.Dense(512, activation=\"relu\"),\n",
+    "        layers.Dense(10, activation=\"softmax\"),\n",
+    "    ]\n",
+    ")\n",
+    "model.compile(\n",
+    "    optimizer=keras.optimizers.RMSprop(learning_rate=1e-2),\n",
+    "    loss=\"sparse_categorical_crossentropy\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")\n",
+    "model.fit(\n",
+    "    train_images, train_labels, epochs=10, batch_size=128, validation_split=0.2\n",
+    ")"
    ]
   },
   {
@@ -403,7 +397,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Leveraging better architecture priors"
+    "#### Leveraging better architecture priors"
    ]
   },
   {
@@ -412,16 +406,26 @@
     "colab_type": "text"
    },
    "source": [
-    "### Increasing model capacity"
+    "#### Increasing model capacity"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 0,
    "metadata": {
-    "colab_type": "text"
+    "colab_type": "code"
    },
+   "outputs": [],
    "source": [
-    "**A simple logistic regression on MNIST**"
+    "model = keras.Sequential([layers.Dense(10, activation=\"softmax\")])\n",
+    "model.compile(\n",
+    "    optimizer=\"rmsprop\",\n",
+    "    loss=\"sparse_categorical_crossentropy\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")\n",
+    "history_small_model = model.fit(\n",
+    "    train_images, train_labels, epochs=20, batch_size=128, validation_split=0.2\n",
+    ")"
    ]
   },
   {
@@ -432,15 +436,45 @@
    },
    "outputs": [],
    "source": [
-    "model = keras.Sequential([layers.Dense(10, activation=\"softmax\")])\n",
-    "model.compile(optimizer=\"rmsprop\",\n",
-    "              loss=\"sparse_categorical_crossentropy\",\n",
-    "              metrics=[\"accuracy\"])\n",
-    "history_small_model = model.fit(\n",
-    "    train_images, train_labels,\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "val_loss = history_small_model.history[\"val_loss\"]\n",
+    "epochs = range(1, 21)\n",
+    "plt.plot(epochs, val_loss, \"b-\", label=\"Validation loss\")\n",
+    "plt.title(\"Validation loss for a model with insufficient capacity\")\n",
+    "plt.xlabel(\"Epochs\")\n",
+    "plt.ylabel(\"Loss\")\n",
+    "plt.legend()\n",
+    "plt.savefig(\"effect_of_insufficient_model_capacity_on_val_loss.png\", dpi=300)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = keras.Sequential(\n",
+    "    [\n",
+    "        layers.Dense(128, activation=\"relu\"),\n",
+    "        layers.Dense(128, activation=\"relu\"),\n",
+    "        layers.Dense(10, activation=\"softmax\"),\n",
+    "    ]\n",
+    ")\n",
+    "model.compile(\n",
+    "    optimizer=\"rmsprop\",\n",
+    "    loss=\"sparse_categorical_crossentropy\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")\n",
+    "history_large_model = model.fit(\n",
+    "    train_images,\n",
+    "    train_labels,\n",
     "    epochs=20,\n",
     "    batch_size=128,\n",
-    "    validation_split=0.2)"
+    "    validation_split=0.2,\n",
+    ")"
    ]
   },
   {
@@ -451,15 +485,14 @@
    },
    "outputs": [],
    "source": [
-    "import matplotlib.pyplot as plt\n",
     "val_loss = history_small_model.history[\"val_loss\"]\n",
     "epochs = range(1, 21)\n",
-    "plt.plot(epochs, val_loss, \"b--\",\n",
-    "         label=\"Validation loss\")\n",
-    "plt.title(\"Effect of insufficient model capacity on validation loss\")\n",
+    "plt.plot(epochs, val_loss, \"b-\", label=\"Validation loss\")\n",
+    "plt.title(\"Validation loss for a model with appropriate capacity\")\n",
     "plt.xlabel(\"Epochs\")\n",
     "plt.ylabel(\"Loss\")\n",
-    "plt.legend()"
+    "plt.legend()\n",
+    "plt.savefig(\"effect_of_correct_model_capacity_on_val_loss.png\", dpi=300)"
    ]
   },
   {
@@ -470,28 +503,43 @@
    },
    "outputs": [],
    "source": [
-    "model = keras.Sequential([\n",
-    "    layers.Dense(96, activation=\"relu\"),\n",
-    "    layers.Dense(96, activation=\"relu\"),\n",
-    "    layers.Dense(10, activation=\"softmax\"),\n",
-    "])\n",
-    "model.compile(optimizer=\"rmsprop\",\n",
-    "              loss=\"sparse_categorical_crossentropy\",\n",
-    "              metrics=[\"accuracy\"])\n",
-    "history_large_model = model.fit(\n",
-    "    train_images, train_labels,\n",
+    "model = keras.Sequential(\n",
+    "    [\n",
+    "        layers.Dense(2048, activation=\"relu\"),\n",
+    "        layers.Dense(2048, activation=\"relu\"),\n",
+    "        layers.Dense(10, activation=\"softmax\"),\n",
+    "    ]\n",
+    ")\n",
+    "model.compile(\n",
+    "    optimizer=\"rmsprop\",\n",
+    "    loss=\"sparse_categorical_crossentropy\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")\n",
+    "history_very_large_model = model.fit(\n",
+    "    train_images,\n",
+    "    train_labels,\n",
     "    epochs=20,\n",
     "    batch_size=128,\n",
-    "    validation_split=0.2)"
+    "    validation_split=0.2,\n",
+    ")"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 0,
    "metadata": {
-    "colab_type": "text"
+    "colab_type": "code"
    },
+   "outputs": [],
    "source": [
-    "## Improving generalization"
+    "val_loss = history_very_large_model.history[\"val_loss\"]\n",
+    "epochs = range(1, 21)\n",
+    "plt.plot(epochs, val_loss, \"b-\", label=\"Validation loss\")\n",
+    "plt.title(\"Validation loss for a model with too much capacity\")\n",
+    "plt.xlabel(\"Epochs\")\n",
+    "plt.ylabel(\"Loss\")\n",
+    "plt.legend()\n",
+    "plt.savefig(\"effect_of_excessive_model_capacity_on_val_loss.png\", dpi=300)"
    ]
   },
   {
@@ -500,7 +548,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Dataset curation"
+    "### Improving generalization"
    ]
   },
   {
@@ -509,7 +557,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Feature engineering"
+    "#### Dataset curation"
    ]
   },
   {
@@ -518,7 +566,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Using early stopping"
+    "#### Feature engineering"
    ]
   },
   {
@@ -527,7 +575,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Regularizing your model"
+    "#### Using early stopping"
    ]
   },
   {
@@ -536,7 +584,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Reducing the network's size"
+    "#### Regularizing your model"
    ]
   },
   {
@@ -545,7 +593,7 @@
     "colab_type": "text"
    },
    "source": [
-    "**Original model**"
+    "##### Reducing the network's size"
    ]
   },
   {
@@ -556,35 +604,37 @@
    },
    "outputs": [],
    "source": [
-    "from tensorflow.keras.datasets import imdb\n",
+    "from keras.datasets import imdb\n",
+    "\n",
     "(train_data, train_labels), _ = imdb.load_data(num_words=10000)\n",
     "\n",
     "def vectorize_sequences(sequences, dimension=10000):\n",
     "    results = np.zeros((len(sequences), dimension))\n",
     "    for i, sequence in enumerate(sequences):\n",
-    "        results[i, sequence] = 1.\n",
+    "        results[i, sequence] = 1.0\n",
     "    return results\n",
+    "\n",
     "train_data = vectorize_sequences(train_data)\n",
     "\n",
-    "model = keras.Sequential([\n",
-    "    layers.Dense(16, activation=\"relu\"),\n",
-    "    layers.Dense(16, activation=\"relu\"),\n",
-    "    layers.Dense(1, activation=\"sigmoid\")\n",
-    "])\n",
-    "model.compile(optimizer=\"rmsprop\",\n",
-    "              loss=\"binary_crossentropy\",\n",
-    "              metrics=[\"accuracy\"])\n",
-    "history_original = model.fit(train_data, train_labels,\n",
-    "                             epochs=20, batch_size=512, validation_split=0.4)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Version of the model with lower capacity**"
+    "model = keras.Sequential(\n",
+    "    [\n",
+    "        layers.Dense(16, activation=\"relu\"),\n",
+    "        layers.Dense(16, activation=\"relu\"),\n",
+    "        layers.Dense(1, activation=\"sigmoid\"),\n",
+    "    ]\n",
+    ")\n",
+    "model.compile(\n",
+    "    optimizer=\"rmsprop\",\n",
+    "    loss=\"binary_crossentropy\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")\n",
+    "history_original = model.fit(\n",
+    "    train_data,\n",
+    "    train_labels,\n",
+    "    epochs=20,\n",
+    "    batch_size=512,\n",
+    "    validation_split=0.4,\n",
+    ")"
    ]
   },
   {
@@ -595,26 +645,56 @@
    },
    "outputs": [],
    "source": [
-    "model = keras.Sequential([\n",
-    "    layers.Dense(4, activation=\"relu\"),\n",
-    "    layers.Dense(4, activation=\"relu\"),\n",
-    "    layers.Dense(1, activation=\"sigmoid\")\n",
-    "])\n",
-    "model.compile(optimizer=\"rmsprop\",\n",
-    "              loss=\"binary_crossentropy\",\n",
-    "              metrics=[\"accuracy\"])\n",
+    "model = keras.Sequential(\n",
+    "    [\n",
+    "        layers.Dense(4, activation=\"relu\"),\n",
+    "        layers.Dense(4, activation=\"relu\"),\n",
+    "        layers.Dense(1, activation=\"sigmoid\"),\n",
+    "    ]\n",
+    ")\n",
+    "model.compile(\n",
+    "    optimizer=\"rmsprop\",\n",
+    "    loss=\"binary_crossentropy\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")\n",
     "history_smaller_model = model.fit(\n",
-    "    train_data, train_labels,\n",
-    "    epochs=20, batch_size=512, validation_split=0.4)"
+    "    train_data,\n",
+    "    train_labels,\n",
+    "    epochs=20,\n",
+    "    batch_size=512,\n",
+    "    validation_split=0.4,\n",
+    ")"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 0,
    "metadata": {
-    "colab_type": "text"
+    "colab_type": "code"
    },
+   "outputs": [],
    "source": [
-    "**Version of the model with higher capacity**"
+    "original_val_loss = history_original.history[\"val_loss\"]\n",
+    "smaller_model_val_loss = history_smaller_model.history[\"val_loss\"]\n",
+    "epochs = range(1, 21)\n",
+    "plt.plot(\n",
+    "    epochs,\n",
+    "    original_val_loss,\n",
+    "    \"r--\",\n",
+    "    label=\"Validation loss of original model\",\n",
+    ")\n",
+    "plt.plot(\n",
+    "    epochs,\n",
+    "    smaller_model_val_loss,\n",
+    "    \"b-\",\n",
+    "    label=\"Validation loss of smaller model\",\n",
+    ")\n",
+    "plt.title(\"Original model vs. smaller model (IMDB review classification)\")\n",
+    "plt.xlabel(\"Epochs\")\n",
+    "plt.ylabel(\"Loss\")\n",
+    "plt.xticks(epochs)\n",
+    "plt.legend()\n",
+    "plt.savefig(\"original_model_vs_smaller_model_imdb.png\", dpi=300)"
    ]
   },
   {
@@ -625,26 +705,56 @@
    },
    "outputs": [],
    "source": [
-    "model = keras.Sequential([\n",
-    "    layers.Dense(512, activation=\"relu\"),\n",
-    "    layers.Dense(512, activation=\"relu\"),\n",
-    "    layers.Dense(1, activation=\"sigmoid\")\n",
-    "])\n",
-    "model.compile(optimizer=\"rmsprop\",\n",
-    "              loss=\"binary_crossentropy\",\n",
-    "              metrics=[\"accuracy\"])\n",
+    "model = keras.Sequential(\n",
+    "    [\n",
+    "        layers.Dense(512, activation=\"relu\"),\n",
+    "        layers.Dense(512, activation=\"relu\"),\n",
+    "        layers.Dense(1, activation=\"sigmoid\"),\n",
+    "    ]\n",
+    ")\n",
+    "model.compile(\n",
+    "    optimizer=\"rmsprop\",\n",
+    "    loss=\"binary_crossentropy\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")\n",
     "history_larger_model = model.fit(\n",
-    "    train_data, train_labels,\n",
-    "    epochs=20, batch_size=512, validation_split=0.4)"
+    "    train_data,\n",
+    "    train_labels,\n",
+    "    epochs=20,\n",
+    "    batch_size=512,\n",
+    "    validation_split=0.4,\n",
+    ")"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 0,
    "metadata": {
-    "colab_type": "text"
+    "colab_type": "code"
    },
+   "outputs": [],
    "source": [
-    "#### Adding weight regularization"
+    "original_val_loss = history_original.history[\"val_loss\"]\n",
+    "larger_model_val_loss = history_smaller_model.history[\"val_loss\"]\n",
+    "epochs = range(1, 21)\n",
+    "plt.plot(\n",
+    "    epochs,\n",
+    "    original_val_loss,\n",
+    "    \"r--\",\n",
+    "    label=\"Validation loss of original model\",\n",
+    ")\n",
+    "plt.plot(\n",
+    "    epochs,\n",
+    "    larger_model_val_loss,\n",
+    "    \"b-\",\n",
+    "    label=\"Validation loss of larger model\",\n",
+    ")\n",
+    "plt.title(\"Original model vs. larger model (IMDB review classification)\")\n",
+    "plt.xlabel(\"Epochs\")\n",
+    "plt.ylabel(\"Loss\")\n",
+    "plt.xticks(epochs)\n",
+    "plt.legend()\n",
+    "plt.savefig(\"original_model_vs_larger_model_imdb.png\", dpi=300)"
    ]
   },
   {
@@ -653,7 +763,7 @@
     "colab_type": "text"
    },
    "source": [
-    "**Adding L2 weight regularization to the model**"
+    "##### Adding weight regularization"
    ]
   },
   {
@@ -664,31 +774,60 @@
    },
    "outputs": [],
    "source": [
-    "from tensorflow.keras import regularizers\n",
-    "model = keras.Sequential([\n",
-    "    layers.Dense(16,\n",
-    "                 kernel_regularizer=regularizers.l2(0.002),\n",
-    "                 activation=\"relu\"),\n",
-    "    layers.Dense(16,\n",
-    "                 kernel_regularizer=regularizers.l2(0.002),\n",
-    "                 activation=\"relu\"),\n",
-    "    layers.Dense(1, activation=\"sigmoid\")\n",
-    "])\n",
-    "model.compile(optimizer=\"rmsprop\",\n",
-    "              loss=\"binary_crossentropy\",\n",
-    "              metrics=[\"accuracy\"])\n",
+    "from keras.regularizers import l2\n",
+    "\n",
+    "model = keras.Sequential(\n",
+    "    [\n",
+    "        layers.Dense(16, kernel_regularizer=l2(0.002), activation=\"relu\"),\n",
+    "        layers.Dense(16, kernel_regularizer=l2(0.002), activation=\"relu\"),\n",
+    "        layers.Dense(1, activation=\"sigmoid\"),\n",
+    "    ]\n",
+    ")\n",
+    "model.compile(\n",
+    "    optimizer=\"rmsprop\",\n",
+    "    loss=\"binary_crossentropy\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")\n",
     "history_l2_reg = model.fit(\n",
-    "    train_data, train_labels,\n",
-    "    epochs=20, batch_size=512, validation_split=0.4)"
+    "    train_data,\n",
+    "    train_labels,\n",
+    "    epochs=20,\n",
+    "    batch_size=512,\n",
+    "    validation_split=0.4,\n",
+    ")"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 0,
    "metadata": {
-    "colab_type": "text"
+    "colab_type": "code"
    },
+   "outputs": [],
    "source": [
-    "**Different weight regularizers available in Keras**"
+    "original_val_loss = history_original.history[\"val_loss\"]\n",
+    "l2_val_loss = history_l2_reg.history[\"val_loss\"]\n",
+    "epochs = range(1, 21)\n",
+    "plt.plot(\n",
+    "    epochs,\n",
+    "    original_val_loss,\n",
+    "    \"r--\",\n",
+    "    label=\"Validation loss of original model\",\n",
+    ")\n",
+    "plt.plot(\n",
+    "    epochs,\n",
+    "    l2_val_loss,\n",
+    "    \"b-\",\n",
+    "    label=\"Validation loss of L2-regularized model\",\n",
+    ")\n",
+    "plt.title(\n",
+    "    \"Original model vs. L2-regularized model (IMDB review classification)\"\n",
+    ")\n",
+    "plt.xlabel(\"Epochs\")\n",
+    "plt.ylabel(\"Loss\")\n",
+    "plt.xticks(epochs)\n",
+    "plt.legend()\n",
+    "plt.savefig(\"original_model_vs_l2_regularized_model_imdb.png\", dpi=300)"
    ]
   },
   {
@@ -699,7 +838,8 @@
    },
    "outputs": [],
    "source": [
-    "from tensorflow.keras import regularizers\n",
+    "from keras import regularizers\n",
+    "\n",
     "regularizers.l1(0.001)\n",
     "regularizers.l1_l2(l1=0.001, l2=0.001)"
    ]
@@ -710,16 +850,38 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Adding dropout"
+    "##### Adding dropout"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 0,
    "metadata": {
-    "colab_type": "text"
+    "colab_type": "code"
    },
+   "outputs": [],
    "source": [
-    "**Adding dropout to the IMDB model**"
+    "model = keras.Sequential(\n",
+    "    [\n",
+    "        layers.Dense(16, activation=\"relu\"),\n",
+    "        layers.Dropout(0.5),\n",
+    "        layers.Dense(16, activation=\"relu\"),\n",
+    "        layers.Dropout(0.5),\n",
+    "        layers.Dense(1, activation=\"sigmoid\"),\n",
+    "    ]\n",
+    ")\n",
+    "model.compile(\n",
+    "    optimizer=\"rmsprop\",\n",
+    "    loss=\"binary_crossentropy\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")\n",
+    "history_dropout = model.fit(\n",
+    "    train_data,\n",
+    "    train_labels,\n",
+    "    epochs=20,\n",
+    "    batch_size=512,\n",
+    "    validation_split=0.4,\n",
+    ")"
    ]
   },
   {
@@ -730,19 +892,29 @@
    },
    "outputs": [],
    "source": [
-    "model = keras.Sequential([\n",
-    "    layers.Dense(16, activation=\"relu\"),\n",
-    "    layers.Dropout(0.5),\n",
-    "    layers.Dense(16, activation=\"relu\"),\n",
-    "    layers.Dropout(0.5),\n",
-    "    layers.Dense(1, activation=\"sigmoid\")\n",
-    "])\n",
-    "model.compile(optimizer=\"rmsprop\",\n",
-    "              loss=\"binary_crossentropy\",\n",
-    "              metrics=[\"accuracy\"])\n",
-    "history_dropout = model.fit(\n",
-    "    train_data, train_labels,\n",
-    "    epochs=20, batch_size=512, validation_split=0.4)"
+    "original_val_loss = history_original.history[\"val_loss\"]\n",
+    "l2_val_loss = history_l2_reg.history[\"val_loss\"]\n",
+    "epochs = range(1, 21)\n",
+    "plt.plot(\n",
+    "    epochs,\n",
+    "    original_val_loss,\n",
+    "    \"r--\",\n",
+    "    label=\"Validation loss of original model\",\n",
+    ")\n",
+    "plt.plot(\n",
+    "    epochs,\n",
+    "    l2_val_loss,\n",
+    "    \"b-\",\n",
+    "    label=\"Validation loss of dropout-regularized model\",\n",
+    ")\n",
+    "plt.title(\n",
+    "    \"Original model vs. dropout-regularized model (IMDB review classification)\"\n",
+    ")\n",
+    "plt.xlabel(\"Epochs\")\n",
+    "plt.ylabel(\"Loss\")\n",
+    "plt.xticks(epochs)\n",
+    "plt.legend()\n",
+    "plt.savefig(\"original_model_vs_dropout_regularized_model_imdb.png\", dpi=300)"
    ]
   },
   {
@@ -751,14 +923,15 @@
     "colab_type": "text"
    },
    "source": [
-    "## Summary"
+    "### Chapter summary"
    ]
   }
  ],
  "metadata": {
+  "accelerator": "GPU",
   "colab": {
    "collapsed_sections": [],
-   "name": "chapter05_fundamentals-of-ml.i",
+   "name": "chapter05_fundamentals-of-ml",
    "private_outputs": false,
    "provenance": [],
    "toc_visible": true
@@ -778,7 +951,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.0"
+   "version": "3.10.0"
   }
  },
  "nbformat": 4,
diff --git a/chapter07_working-with-keras.ipynb b/chapter07_working-with-keras.ipynb
index 632d7c7e99..dd9f0393c1 100644
--- a/chapter07_working-with-keras.ipynb
+++ b/chapter07_working-with-keras.ipynb
@@ -6,25 +6,30 @@
     "colab_type": "text"
    },
    "source": [
-    "This is a companion notebook for the book [Deep Learning with Python, Second Edition](https://www.manning.com/books/deep-learning-with-python-second-edition?a_aid=keras&a_bid=76564dff). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThis notebook was generated for TensorFlow 2.6."
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 0,
    "metadata": {
-    "colab_type": "text"
+    "colab_type": "code"
    },
+   "outputs": [],
    "source": [
-    "# Working with Keras: A deep dive"
+    "!pip install keras-nightly --upgrade -q"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 0,
    "metadata": {
-    "colab_type": "text"
+    "colab_type": "code"
    },
+   "outputs": [],
    "source": [
-    "## A spectrum of workflows"
+    "import os\n",
+    "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
   {
@@ -33,7 +38,7 @@
     "colab_type": "text"
    },
    "source": [
-    "## Different ways to build Keras models"
+    "### A spectrum of workflows"
    ]
   },
   {
@@ -42,7 +47,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### The Sequential model"
+    "### Different ways to build Keras models"
    ]
   },
   {
@@ -51,7 +56,7 @@
     "colab_type": "text"
    },
    "source": [
-    "**The `Sequential` class**"
+    "#### The Sequential model"
    ]
   },
   {
@@ -62,22 +67,15 @@
    },
    "outputs": [],
    "source": [
-    "from tensorflow import keras\n",
-    "from tensorflow.keras import layers\n",
+    "import keras\n",
+    "from keras import layers\n",
     "\n",
-    "model = keras.Sequential([\n",
-    "    layers.Dense(64, activation=\"relu\"),\n",
-    "    layers.Dense(10, activation=\"softmax\")\n",
-    "])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Incrementally building a Sequential model**"
+    "model = keras.Sequential(\n",
+    "    [\n",
+    "        layers.Dense(64, activation=\"relu\"),\n",
+    "        layers.Dense(10, activation=\"softmax\"),\n",
+    "    ]\n",
+    ")"
    ]
   },
   {
@@ -94,12 +92,14 @@
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 0,
    "metadata": {
-    "colab_type": "text"
+    "colab_type": "code"
    },
+   "outputs": [],
    "source": [
-    "**Calling a model for the first time to build it**"
+    "model.weights"
    ]
   },
   {
@@ -114,15 +114,6 @@
     "model.weights"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**The summary method**"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -131,16 +122,7 @@
    },
    "outputs": [],
    "source": [
-    "model.summary()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Naming models and layers with the `name` argument**"
+    "model.summary(line_length=80)"
    ]
   },
   {
@@ -155,16 +137,7 @@
     "model.add(layers.Dense(64, activation=\"relu\", name=\"my_first_layer\"))\n",
     "model.add(layers.Dense(10, activation=\"softmax\", name=\"my_last_layer\"))\n",
     "model.build((None, 3))\n",
-    "model.summary()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Specifying the input shape of your model in advance**"
+    "model.summary(line_length=80)"
    ]
   },
   {
@@ -188,7 +161,7 @@
    },
    "outputs": [],
    "source": [
-    "model.summary()"
+    "model.summary(line_length=80)"
    ]
   },
   {
@@ -200,16 +173,7 @@
    "outputs": [],
    "source": [
     "model.add(layers.Dense(10, activation=\"softmax\"))\n",
-    "model.summary()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "### The Functional API"
+    "model.summary(line_length=80)"
    ]
   },
   {
@@ -218,7 +182,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### A simple example"
+    "#### The Functional API"
    ]
   },
   {
@@ -227,7 +191,7 @@
     "colab_type": "text"
    },
    "source": [
-    "**A simple Functional model with two `Dense` layers**"
+    "##### A simple example"
    ]
   },
   {
@@ -241,7 +205,7 @@
     "inputs = keras.Input(shape=(3,), name=\"my_input\")\n",
     "features = layers.Dense(64, activation=\"relu\")(inputs)\n",
     "outputs = layers.Dense(10, activation=\"softmax\")(features)\n",
-    "model = keras.Model(inputs=inputs, outputs=outputs)"
+    "model = keras.Model(inputs=inputs, outputs=outputs, name=\"my_functional_model\")"
    ]
   },
   {
@@ -308,7 +272,7 @@
    "outputs": [],
    "source": [
     "outputs = layers.Dense(10, activation=\"softmax\")(features)\n",
-    "model = keras.Model(inputs=inputs, outputs=outputs)"
+    "model = keras.Model(inputs=inputs, outputs=outputs, name=\"my_functional_model\")"
    ]
   },
   {
@@ -319,16 +283,7 @@
    },
    "outputs": [],
    "source": [
-    "model.summary()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "#### Multi-input, multi-output models"
+    "model.summary(line_length=80)"
    ]
   },
   {
@@ -337,7 +292,7 @@
     "colab_type": "text"
    },
    "source": [
-    "**A multi-input, multi-output Functional model**"
+    "##### Multi-input, multi-output models"
    ]
   },
   {
@@ -357,22 +312,17 @@
     "tags = keras.Input(shape=(num_tags,), name=\"tags\")\n",
     "\n",
     "features = layers.Concatenate()([title, text_body, tags])\n",
-    "features = layers.Dense(64, activation=\"relu\")(features)\n",
+    "features = layers.Dense(64, activation=\"relu\", name=\"dense_features\")(features)\n",
     "\n",
     "priority = layers.Dense(1, activation=\"sigmoid\", name=\"priority\")(features)\n",
     "department = layers.Dense(\n",
-    "    num_departments, activation=\"softmax\", name=\"department\")(features)\n",
+    "    num_departments, activation=\"softmax\", name=\"department\"\n",
+    ")(features)\n",
     "\n",
-    "model = keras.Model(inputs=[title, text_body, tags], outputs=[priority, department])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "#### Training a multi-input, multi-output model"
+    "model = keras.Model(\n",
+    "    inputs=[title, text_body, tags],\n",
+    "    outputs=[priority, department],\n",
+    ")"
    ]
   },
   {
@@ -381,7 +331,7 @@
     "colab_type": "text"
    },
    "source": [
-    "**Training a model by providing lists of input & target arrays**"
+    "##### Training a multi-input, multi-output model"
    ]
   },
   {
@@ -403,44 +353,64 @@
     "priority_data = np.random.random(size=(num_samples, 1))\n",
     "department_data = np.random.randint(0, 2, size=(num_samples, num_departments))\n",
     "\n",
-    "model.compile(optimizer=\"rmsprop\",\n",
-    "              loss=[\"mean_squared_error\", \"categorical_crossentropy\"],\n",
-    "              metrics=[[\"mean_absolute_error\"], [\"accuracy\"]])\n",
-    "model.fit([title_data, text_body_data, tags_data],\n",
-    "          [priority_data, department_data],\n",
-    "          epochs=1)\n",
-    "model.evaluate([title_data, text_body_data, tags_data],\n",
-    "               [priority_data, department_data])\n",
-    "priority_preds, department_preds = model.predict([title_data, text_body_data, tags_data])"
+    "model.compile(\n",
+    "    optimizer=\"adam\",\n",
+    "    loss=[\"mean_squared_error\", \"categorical_crossentropy\"],\n",
+    "    metrics=[[\"mean_absolute_error\"], [\"accuracy\"]],\n",
+    ")\n",
+    "model.fit(\n",
+    "    [title_data, text_body_data, tags_data],\n",
+    "    [priority_data, department_data],\n",
+    "    epochs=1,\n",
+    ")\n",
+    "model.evaluate(\n",
+    "    [title_data, text_body_data, tags_data], [priority_data, department_data]\n",
+    ")\n",
+    "priority_preds, department_preds = model.predict(\n",
+    "    [title_data, text_body_data, tags_data]\n",
+    ")"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 0,
    "metadata": {
-    "colab_type": "text"
+    "colab_type": "code"
    },
+   "outputs": [],
    "source": [
-    "**Training a model by providing dicts of input & target arrays**"
+    "model.compile(\n",
+    "    optimizer=\"adam\",\n",
+    "    loss={\n",
+    "        \"priority\": \"mean_squared_error\",\n",
+    "        \"department\": \"categorical_crossentropy\",\n",
+    "    },\n",
+    "    metrics={\n",
+    "        \"priority\": [\"mean_absolute_error\"],\n",
+    "        \"department\": [\"accuracy\"],\n",
+    "    },\n",
+    ")\n",
+    "model.fit(\n",
+    "    {\"title\": title_data, \"text_body\": text_body_data, \"tags\": tags_data},\n",
+    "    {\"priority\": priority_data, \"department\": department_data},\n",
+    "    epochs=1,\n",
+    ")\n",
+    "model.evaluate(\n",
+    "    {\"title\": title_data, \"text_body\": text_body_data, \"tags\": tags_data},\n",
+    "    {\"priority\": priority_data, \"department\": department_data},\n",
+    ")\n",
+    "priority_preds, department_preds = model.predict(\n",
+    "    {\"title\": title_data, \"text_body\": text_body_data, \"tags\": tags_data}\n",
+    ")"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 0,
+   "cell_type": "markdown",
    "metadata": {
-    "colab_type": "code"
+    "colab_type": "text"
    },
-   "outputs": [],
    "source": [
-    "model.compile(optimizer=\"rmsprop\",\n",
-    "              loss={\"priority\": \"mean_squared_error\", \"department\": \"categorical_crossentropy\"},\n",
-    "              metrics={\"priority\": [\"mean_absolute_error\"], \"department\": [\"accuracy\"]})\n",
-    "model.fit({\"title\": title_data, \"text_body\": text_body_data, \"tags\": tags_data},\n",
-    "          {\"priority\": priority_data, \"department\": department_data},\n",
-    "          epochs=1)\n",
-    "model.evaluate({\"title\": title_data, \"text_body\": text_body_data, \"tags\": tags_data},\n",
-    "               {\"priority\": priority_data, \"department\": department_data})\n",
-    "priority_preds, department_preds = model.predict(\n",
-    "    {\"title\": title_data, \"text_body\": text_body_data, \"tags\": tags_data})"
+    "##### The power of the Functional API: access to layer connectivity"
    ]
   },
   {
@@ -449,7 +419,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### The power of the Functional API: Access to layer connectivity"
+    "###### Plotting layer connectivity"
    ]
   },
   {
@@ -471,7 +441,12 @@
    },
    "outputs": [],
    "source": [
-    "keras.utils.plot_model(model, \"ticket_classifier_with_shape_info.png\", show_shapes=True)"
+    "keras.utils.plot_model(\n",
+    "    model,\n",
+    "    \"ticket_classifier_with_shape_info.png\",\n",
+    "    show_shapes=True,\n",
+    "    show_layer_names=True,\n",
+    ")"
    ]
   },
   {
@@ -480,7 +455,7 @@
     "colab_type": "text"
    },
    "source": [
-    "**Retrieving the inputs or outputs of a layer in a Functional model**"
+    "###### Feature extraction with a Functional model"
    ]
   },
   {
@@ -516,15 +491,6 @@
     "model.layers[3].output"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Creating a new model by reusing intermediate layer outputs**"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -537,8 +503,8 @@
     "difficulty = layers.Dense(3, activation=\"softmax\", name=\"difficulty\")(features)\n",
     "\n",
     "new_model = keras.Model(\n",
-    "    inputs=[title, text_body, tags],\n",
-    "    outputs=[priority, department, difficulty])"
+    "    inputs=[title, text_body, tags], outputs=[priority, department, difficulty]\n",
+    ")"
    ]
   },
   {
@@ -549,16 +515,12 @@
    },
    "outputs": [],
    "source": [
-    "keras.utils.plot_model(new_model, \"updated_ticket_classifier.png\", show_shapes=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "### Subclassing the Model class"
+    "keras.utils.plot_model(\n",
+    "    new_model,\n",
+    "    \"updated_ticket_classifier.png\",\n",
+    "    show_shapes=True,\n",
+    "    show_layer_names=True,\n",
+    ")"
    ]
   },
   {
@@ -567,7 +529,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Rewriting our previous example as a subclassed model"
+    "#### Subclassing the `Model` class"
    ]
   },
   {
@@ -576,7 +538,7 @@
     "colab_type": "text"
    },
    "source": [
-    "**A simple subclassed model**"
+    "##### Rewriting our previous example as a subclassed model"
    ]
   },
   {
@@ -588,14 +550,14 @@
    "outputs": [],
    "source": [
     "class CustomerTicketModel(keras.Model):\n",
-    "\n",
     "    def __init__(self, num_departments):\n",
     "        super().__init__()\n",
     "        self.concat_layer = layers.Concatenate()\n",
     "        self.mixing_layer = layers.Dense(64, activation=\"relu\")\n",
     "        self.priority_scorer = layers.Dense(1, activation=\"sigmoid\")\n",
     "        self.department_classifier = layers.Dense(\n",
-    "            num_departments, activation=\"softmax\")\n",
+    "            num_departments, activation=\"softmax\"\n",
+    "        )\n",
     "\n",
     "    def call(self, inputs):\n",
     "        title = inputs[\"title\"]\n",
@@ -620,7 +582,8 @@
     "model = CustomerTicketModel(num_departments=4)\n",
     "\n",
     "priority, department = model(\n",
-    "    {\"title\": title_data, \"text_body\": text_body_data, \"tags\": tags_data})"
+    "    {\"title\": title_data, \"text_body\": text_body_data, \"tags\": tags_data}\n",
+    ")"
    ]
   },
   {
@@ -631,30 +594,23 @@
    },
    "outputs": [],
    "source": [
-    "model.compile(optimizer=\"rmsprop\",\n",
-    "              loss=[\"mean_squared_error\", \"categorical_crossentropy\"],\n",
-    "              metrics=[[\"mean_absolute_error\"], [\"accuracy\"]])\n",
-    "model.fit({\"title\": title_data,\n",
-    "           \"text_body\": text_body_data,\n",
-    "           \"tags\": tags_data},\n",
-    "          [priority_data, department_data],\n",
-    "          epochs=1)\n",
-    "model.evaluate({\"title\": title_data,\n",
-    "                \"text_body\": text_body_data,\n",
-    "                \"tags\": tags_data},\n",
-    "               [priority_data, department_data])\n",
-    "priority_preds, department_preds = model.predict({\"title\": title_data,\n",
-    "                                                  \"text_body\": text_body_data,\n",
-    "                                                  \"tags\": tags_data})"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "#### Beware: What subclassed models don't support"
+    "model.compile(\n",
+    "    optimizer=\"adam\",\n",
+    "    loss=[\"mean_squared_error\", \"categorical_crossentropy\"],\n",
+    "    metrics=[[\"mean_absolute_error\"], [\"accuracy\"]],\n",
+    ")\n",
+    "model.fit(\n",
+    "    {\"title\": title_data, \"text_body\": text_body_data, \"tags\": tags_data},\n",
+    "    [priority_data, department_data],\n",
+    "    epochs=1,\n",
+    ")\n",
+    "model.evaluate(\n",
+    "    {\"title\": title_data, \"text_body\": text_body_data, \"tags\": tags_data},\n",
+    "    [priority_data, department_data],\n",
+    ")\n",
+    "priority_preds, department_preds = model.predict(\n",
+    "    {\"title\": title_data, \"text_body\": text_body_data, \"tags\": tags_data}\n",
+    ")"
    ]
   },
   {
@@ -663,7 +619,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Mixing and matching different components"
+    "##### Beware: what subclassed models don't support"
    ]
   },
   {
@@ -672,7 +628,7 @@
     "colab_type": "text"
    },
    "source": [
-    "**Creating a Functional model that includes a subclassed model**"
+    "#### Mixing and matching different components"
    ]
   },
   {
@@ -684,7 +640,6 @@
    "outputs": [],
    "source": [
     "class Classifier(keras.Model):\n",
-    "\n",
     "    def __init__(self, num_classes=2):\n",
     "        super().__init__()\n",
     "        if num_classes == 2:\n",
@@ -704,15 +659,6 @@
     "model = keras.Model(inputs=inputs, outputs=outputs)"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Creating a subclassed model that includes a Functional model**"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -726,7 +672,6 @@
     "binary_classifier = keras.Model(inputs=inputs, outputs=outputs)\n",
     "\n",
     "class MyModel(keras.Model):\n",
-    "\n",
     "    def __init__(self, num_classes=2):\n",
     "        super().__init__()\n",
     "        self.dense = layers.Dense(64, activation=\"relu\")\n",
@@ -745,16 +690,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Remember: Use the right tool for the job"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "## Using built-in training and evaluation loops"
+    "#### Remember: use the right tool for the job"
    ]
   },
   {
@@ -763,7 +699,7 @@
     "colab_type": "text"
    },
    "source": [
-    "**The standard workflow: `compile()`, `fit()`, `evaluate()`, `predict()`**"
+    "### Using built-in training and evaluation loops"
    ]
   },
   {
@@ -774,7 +710,7 @@
    },
    "outputs": [],
    "source": [
-    "from tensorflow.keras.datasets import mnist\n",
+    "from keras.datasets import mnist\n",
     "\n",
     "def get_mnist_model():\n",
     "    inputs = keras.Input(shape=(28 * 28,))\n",
@@ -791,12 +727,17 @@
     "train_labels, val_labels = labels[10000:], labels[:10000]\n",
     "\n",
     "model = get_mnist_model()\n",
-    "model.compile(optimizer=\"rmsprop\",\n",
-    "              loss=\"sparse_categorical_crossentropy\",\n",
-    "              metrics=[\"accuracy\"])\n",
-    "model.fit(train_images, train_labels,\n",
-    "          epochs=3,\n",
-    "          validation_data=(val_images, val_labels))\n",
+    "model.compile(\n",
+    "    optimizer=\"adam\",\n",
+    "    loss=\"sparse_categorical_crossentropy\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")\n",
+    "model.fit(\n",
+    "    train_images,\n",
+    "    train_labels,\n",
+    "    epochs=3,\n",
+    "    validation_data=(val_images, val_labels),\n",
+    ")\n",
     "test_metrics = model.evaluate(test_images, test_labels)\n",
     "predictions = model.predict(test_images)"
    ]
@@ -807,16 +748,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Writing your own metrics"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Implementing a custom metric by subclassing the `Metric` class**"
+    "#### Writing your own metrics"
    ]
   },
   {
@@ -827,25 +759,25 @@
    },
    "outputs": [],
    "source": [
-    "import tensorflow as tf\n",
+    "from keras import ops\n",
     "\n",
     "class RootMeanSquaredError(keras.metrics.Metric):\n",
-    "\n",
     "    def __init__(self, name=\"rmse\", **kwargs):\n",
     "        super().__init__(name=name, **kwargs)\n",
     "        self.mse_sum = self.add_weight(name=\"mse_sum\", initializer=\"zeros\")\n",
     "        self.total_samples = self.add_weight(\n",
-    "            name=\"total_samples\", initializer=\"zeros\", dtype=\"int32\")\n",
+    "            name=\"total_samples\", initializer=\"zeros\"\n",
+    "        )\n",
     "\n",
     "    def update_state(self, y_true, y_pred, sample_weight=None):\n",
-    "        y_true = tf.one_hot(y_true, depth=tf.shape(y_pred)[1])\n",
-    "        mse = tf.reduce_sum(tf.square(y_true - y_pred))\n",
+    "        y_true = ops.one_hot(y_true, num_classes=ops.shape(y_pred)[1])\n",
+    "        mse = ops.sum(ops.square(y_true - y_pred))\n",
     "        self.mse_sum.assign_add(mse)\n",
-    "        num_samples = tf.shape(y_pred)[0]\n",
+    "        num_samples = ops.shape(y_pred)[0]\n",
     "        self.total_samples.assign_add(num_samples)\n",
     "\n",
     "    def result(self):\n",
-    "        return tf.sqrt(self.mse_sum / tf.cast(self.total_samples, tf.float32))\n",
+    "        return ops.sqrt(self.mse_sum / self.total_samples)\n",
     "\n",
     "    def reset_state(self):\n",
     "        self.mse_sum.assign(0.)\n",
@@ -861,12 +793,17 @@
    "outputs": [],
    "source": [
     "model = get_mnist_model()\n",
-    "model.compile(optimizer=\"rmsprop\",\n",
-    "              loss=\"sparse_categorical_crossentropy\",\n",
-    "              metrics=[\"accuracy\", RootMeanSquaredError()])\n",
-    "model.fit(train_images, train_labels,\n",
-    "          epochs=3,\n",
-    "          validation_data=(val_images, val_labels))\n",
+    "model.compile(\n",
+    "    optimizer=\"adam\",\n",
+    "    loss=\"sparse_categorical_crossentropy\",\n",
+    "    metrics=[\"accuracy\", RootMeanSquaredError()],\n",
+    ")\n",
+    "model.fit(\n",
+    "    train_images,\n",
+    "    train_labels,\n",
+    "    epochs=3,\n",
+    "    validation_data=(val_images, val_labels),\n",
+    ")\n",
     "test_metrics = model.evaluate(test_images, test_labels)"
    ]
   },
@@ -876,16 +813,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Using callbacks"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "#### The EarlyStopping and ModelCheckpoint callbacks"
+    "#### Using Callbacks"
    ]
   },
   {
@@ -894,7 +822,7 @@
     "colab_type": "text"
    },
    "source": [
-    "**Using the `callbacks` argument in the `fit()` method**"
+    "##### The `EarlyStopping` and `ModelCheckpoint` callbacks"
    ]
   },
   {
@@ -907,23 +835,28 @@
    "source": [
     "callbacks_list = [\n",
     "    keras.callbacks.EarlyStopping(\n",
-    "        monitor=\"val_accuracy\",\n",
-    "        patience=2,\n",
+    "        monitor=\"accuracy\",\n",
+    "        patience=1,\n",
     "    ),\n",
     "    keras.callbacks.ModelCheckpoint(\n",
     "        filepath=\"checkpoint_path.keras\",\n",
     "        monitor=\"val_loss\",\n",
     "        save_best_only=True,\n",
-    "    )\n",
+    "    ),\n",
     "]\n",
     "model = get_mnist_model()\n",
-    "model.compile(optimizer=\"rmsprop\",\n",
-    "              loss=\"sparse_categorical_crossentropy\",\n",
-    "              metrics=[\"accuracy\"])\n",
-    "model.fit(train_images, train_labels,\n",
-    "          epochs=10,\n",
-    "          callbacks=callbacks_list,\n",
-    "          validation_data=(val_images, val_labels))"
+    "model.compile(\n",
+    "    optimizer=\"adam\",\n",
+    "    loss=\"sparse_categorical_crossentropy\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")\n",
+    "model.fit(\n",
+    "    train_images,\n",
+    "    train_labels,\n",
+    "    epochs=10,\n",
+    "    callbacks=callbacks_list,\n",
+    "    validation_data=(val_images, val_labels),\n",
+    ")"
    ]
   },
   {
@@ -943,16 +876,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Writing your own callbacks"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Creating a custom callback by subclassing the `Callback` class**"
+    "#### Writing your own callbacks"
    ]
   },
   {
@@ -974,12 +898,15 @@
     "\n",
     "    def on_epoch_end(self, epoch, logs):\n",
     "        plt.clf()\n",
-    "        plt.plot(range(len(self.per_batch_losses)), self.per_batch_losses,\n",
-    "                 label=\"Training loss for each batch\")\n",
+    "        plt.plot(\n",
+    "            range(len(self.per_batch_losses)),\n",
+    "            self.per_batch_losses,\n",
+    "            label=\"Training loss for each batch\",\n",
+    "        )\n",
     "        plt.xlabel(f\"Batch (epoch {epoch})\")\n",
     "        plt.ylabel(\"Loss\")\n",
     "        plt.legend()\n",
-    "        plt.savefig(f\"plot_at_epoch_{epoch}\")\n",
+    "        plt.savefig(f\"plot_at_epoch_{epoch}\", dpi=300)\n",
     "        self.per_batch_losses = []"
    ]
   },
@@ -992,13 +919,18 @@
    "outputs": [],
    "source": [
     "model = get_mnist_model()\n",
-    "model.compile(optimizer=\"rmsprop\",\n",
-    "              loss=\"sparse_categorical_crossentropy\",\n",
-    "              metrics=[\"accuracy\"])\n",
-    "model.fit(train_images, train_labels,\n",
-    "          epochs=10,\n",
-    "          callbacks=[LossHistory()],\n",
-    "          validation_data=(val_images, val_labels))"
+    "model.compile(\n",
+    "    optimizer=\"adam\",\n",
+    "    loss=\"sparse_categorical_crossentropy\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")\n",
+    "model.fit(\n",
+    "    train_images,\n",
+    "    train_labels,\n",
+    "    epochs=10,\n",
+    "    callbacks=[LossHistory()],\n",
+    "    validation_data=(val_images, val_labels),\n",
+    ")"
    ]
   },
   {
@@ -1007,7 +939,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Monitoring and visualization with TensorBoard"
+    "#### Monitoring and visualization with TensorBoard"
    ]
   },
   {
@@ -1019,17 +951,22 @@
    "outputs": [],
    "source": [
     "model = get_mnist_model()\n",
-    "model.compile(optimizer=\"rmsprop\",\n",
-    "              loss=\"sparse_categorical_crossentropy\",\n",
-    "              metrics=[\"accuracy\"])\n",
+    "model.compile(\n",
+    "    optimizer=\"adam\",\n",
+    "    loss=\"sparse_categorical_crossentropy\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")\n",
     "\n",
     "tensorboard = keras.callbacks.TensorBoard(\n",
     "    log_dir=\"/full_path_to_your_log_dir\",\n",
     ")\n",
-    "model.fit(train_images, train_labels,\n",
-    "          epochs=10,\n",
-    "          validation_data=(val_images, val_labels),\n",
-    "          callbacks=[tensorboard])"
+    "model.fit(\n",
+    "    train_images,\n",
+    "    train_labels,\n",
+    "    epochs=10,\n",
+    "    validation_data=(val_images, val_labels),\n",
+    "    callbacks=[tensorboard],\n",
+    ")"
    ]
   },
   {
@@ -1050,7 +987,7 @@
     "colab_type": "text"
    },
    "source": [
-    "## Writing your own training and evaluation loops"
+    "### Writing your own training and evaluation loops"
    ]
   },
   {
@@ -1059,7 +996,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Training versus inference"
+    "#### Training versus inference"
    ]
   },
   {
@@ -1068,23 +1005,16 @@
     "colab_type": "text"
    },
    "source": [
-    "### Low-level usage of metrics"
+    "#### Writing custom training step functions"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 0,
+   "cell_type": "markdown",
    "metadata": {
-    "colab_type": "code"
+    "colab_type": "text"
    },
-   "outputs": [],
    "source": [
-    "metric = keras.metrics.SparseCategoricalAccuracy()\n",
-    "targets = [0, 1, 2]\n",
-    "predictions = [[1, 0, 0], [0, 1, 0], [0, 0, 1]]\n",
-    "metric.update_state(targets, predictions)\n",
-    "current_result = metric.result()\n",
-    "print(f\"result: {current_result:.2f}\")"
+    "##### A TensorFlow training step function"
    ]
   },
   {
@@ -1095,29 +1025,18 @@
    },
    "outputs": [],
    "source": [
-    "values = [0, 1, 2, 3, 4]\n",
-    "mean_tracker = keras.metrics.Mean()\n",
-    "for value in values:\n",
-    "    mean_tracker.update_state(value)\n",
-    "print(f\"Mean of values: {mean_tracker.result():.2f}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "### A complete training and evaluation loop"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "**Writing a step-by-step training loop: the training step function**"
+    "keras.config.set_backend(\"tensorflow\")\n",
+    "\n",
+    "import keras\n",
+    "from keras import layers\n",
+    "\n",
+    "def get_mnist_model():\n",
+    "    inputs = keras.Input(shape=(28 * 28,))\n",
+    "    features = layers.Dense(512, activation=\"relu\")(inputs)\n",
+    "    features = layers.Dropout(0.5)(features)\n",
+    "    outputs = layers.Dense(10, activation=\"softmax\")(features)\n",
+    "    model = keras.Model(inputs, outputs)\n",
+    "    return model"
    ]
   },
   {
@@ -1128,28 +1047,33 @@
    },
    "outputs": [],
    "source": [
-    "model = get_mnist_model()\n",
+    "import tensorflow as tf\n",
     "\n",
+    "model = get_mnist_model()\n",
     "loss_fn = keras.losses.SparseCategoricalCrossentropy()\n",
-    "optimizer = keras.optimizers.RMSprop()\n",
-    "metrics = [keras.metrics.SparseCategoricalAccuracy()]\n",
-    "loss_tracking_metric = keras.metrics.Mean()\n",
+    "optimizer = keras.optimizers.Adam()\n",
     "\n",
     "def train_step(inputs, targets):\n",
     "    with tf.GradientTape() as tape:\n",
     "        predictions = model(inputs, training=True)\n",
     "        loss = loss_fn(targets, predictions)\n",
     "    gradients = tape.gradient(loss, model.trainable_weights)\n",
-    "    optimizer.apply_gradients(zip(gradients, model.trainable_weights))\n",
-    "\n",
-    "    logs = {}\n",
-    "    for metric in metrics:\n",
-    "        metric.update_state(targets, predictions)\n",
-    "        logs[metric.name] = metric.result()\n",
-    "\n",
-    "    loss_tracking_metric.update_state(loss)\n",
-    "    logs[\"loss\"] = loss_tracking_metric.result()\n",
-    "    return logs"
+    "    optimizer.apply(gradients, model.trainable_weights)\n",
+    "    return loss"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "batch_size = 32\n",
+    "inputs = train_images[:batch_size]\n",
+    "targets = train_labels[:batch_size]\n",
+    "loss = train_step(inputs, targets)"
    ]
   },
   {
@@ -1158,7 +1082,7 @@
     "colab_type": "text"
    },
    "source": [
-    "**Writing a step-by-step training loop: resetting the metrics**"
+    "##### A PyTorch training step function"
    ]
   },
   {
@@ -1169,19 +1093,43 @@
    },
    "outputs": [],
    "source": [
-    "def reset_metrics():\n",
-    "    for metric in metrics:\n",
-    "        metric.reset_state()\n",
-    "    loss_tracking_metric.reset_state()"
+    "keras.config.set_backend(\"torch\")\n",
+    "\n",
+    "import keras\n",
+    "from keras import layers\n",
+    "\n",
+    "def get_mnist_model():\n",
+    "    inputs = keras.Input(shape=(28 * 28,))\n",
+    "    features = layers.Dense(512, activation=\"relu\")(inputs)\n",
+    "    features = layers.Dropout(0.5)(features)\n",
+    "    outputs = layers.Dense(10, activation=\"softmax\")(features)\n",
+    "    model = keras.Model(inputs, outputs)\n",
+    "    return model"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 0,
    "metadata": {
-    "colab_type": "text"
+    "colab_type": "code"
    },
+   "outputs": [],
    "source": [
-    "**Writing a step-by-step training loop: the loop itself**"
+    "import torch\n",
+    "\n",
+    "model = get_mnist_model()\n",
+    "loss_fn = keras.losses.SparseCategoricalCrossentropy()\n",
+    "optimizer = keras.optimizers.Adam()\n",
+    "\n",
+    "def train_step(inputs, targets):\n",
+    "    predictions = model(inputs, training=True)\n",
+    "    loss = loss_fn(targets, predictions)\n",
+    "    loss.backward()\n",
+    "    gradients = [weight.value.grad for weight in model.trainable_weights]\n",
+    "    with torch.no_grad():\n",
+    "        optimizer.apply(gradients, model.trainable_weights)\n",
+    "    model.zero_grad()\n",
+    "    return loss"
    ]
   },
   {
@@ -1192,16 +1140,10 @@
    },
    "outputs": [],
    "source": [
-    "training_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels))\n",
-    "training_dataset = training_dataset.batch(32)\n",
-    "epochs = 3\n",
-    "for epoch in range(epochs):\n",
-    "    reset_metrics()\n",
-    "    for inputs_batch, targets_batch in training_dataset:\n",
-    "        logs = train_step(inputs_batch, targets_batch)\n",
-    "    print(f\"Results at the end of epoch {epoch}\")\n",
-    "    for key, value in logs.items():\n",
-    "        print(f\"...{key}: {value:.4f}\")"
+    "batch_size = 32\n",
+    "inputs = train_images[:batch_size]\n",
+    "targets = train_labels[:batch_size]\n",
+    "loss = train_step(inputs, targets)"
    ]
   },
   {
@@ -1210,7 +1152,7 @@
     "colab_type": "text"
    },
    "source": [
-    "**Writing a step-by-step evaluation loop**"
+    "##### A JAX training step function"
    ]
   },
   {
@@ -1221,36 +1163,98 @@
    },
    "outputs": [],
    "source": [
-    "def test_step(inputs, targets):\n",
-    "    predictions = model(inputs, training=False)\n",
-    "    loss = loss_fn(targets, predictions)\n",
+    "keras.config.set_backend(\"jax\")\n",
     "\n",
-    "    logs = {}\n",
-    "    for metric in metrics:\n",
-    "        metric.update_state(targets, predictions)\n",
-    "        logs[\"val_\" + metric.name] = metric.result()\n",
+    "import keras\n",
+    "from keras import layers\n",
     "\n",
-    "    loss_tracking_metric.update_state(loss)\n",
-    "    logs[\"val_loss\"] = loss_tracking_metric.result()\n",
-    "    return logs\n",
+    "def get_mnist_model():\n",
+    "    inputs = keras.Input(shape=(28 * 28,))\n",
+    "    features = layers.Dense(512, activation=\"relu\")(inputs)\n",
+    "    features = layers.Dropout(0.5)(features)\n",
+    "    outputs = layers.Dense(10, activation=\"softmax\")(features)\n",
+    "    model = keras.Model(inputs, outputs)\n",
+    "    return model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = get_mnist_model()\n",
+    "loss_fn = keras.losses.SparseCategoricalCrossentropy()\n",
     "\n",
-    "val_dataset = tf.data.Dataset.from_tensor_slices((val_images, val_labels))\n",
-    "val_dataset = val_dataset.batch(32)\n",
-    "reset_metrics()\n",
-    "for inputs_batch, targets_batch in val_dataset:\n",
-    "    logs = test_step(inputs_batch, targets_batch)\n",
-    "print(\"Evaluation results:\")\n",
-    "for key, value in logs.items():\n",
-    "    print(f\"...{key}: {value:.4f}\")"
+    "def compute_loss_and_updates(\n",
+    "    trainable_variables, non_trainable_variables, inputs, targets\n",
+    "):\n",
+    "    outputs, non_trainable_variables = model.stateless_call(\n",
+    "        trainable_variables, non_trainable_variables, inputs\n",
+    "    )\n",
+    "    loss = loss_fn(targets, outputs)\n",
+    "    return loss, non_trainable_variables"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 0,
    "metadata": {
-    "colab_type": "text"
+    "colab_type": "code"
    },
+   "outputs": [],
    "source": [
-    "### Make it fast with tf.function"
+    "import jax\n",
+    "\n",
+    "grad_fn = jax.value_and_grad(compute_loss_and_updates, has_aux=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "optimizer = keras.optimizers.Adam()\n",
+    "optimizer.build(model.trainable_variables)\n",
+    "\n",
+    "def train_step(state, inputs, targets):\n",
+    "    (trainable_variables, non_trainable_variables, optimizer_variables) = state\n",
+    "    (loss, non_trainable_variables), grads = grad_fn(\n",
+    "        trainable_variables, non_trainable_variables, inputs, targets\n",
+    "    )\n",
+    "    trainable_variables, optimizer_variables = optimizer.stateless_apply(\n",
+    "        optimizer_variables, grads, trainable_variables\n",
+    "    )\n",
+    "    return loss, (\n",
+    "        trainable_variables,\n",
+    "        non_trainable_variables,\n",
+    "        optimizer_variables,\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "batch_size = 32\n",
+    "inputs = train_images[:batch_size]\n",
+    "targets = train_labels[:batch_size]\n",
+    "\n",
+    "trainable_variables = [v.value for v in model.trainable_variables]\n",
+    "non_trainable_variables = [v.value for v in model.non_trainable_variables]\n",
+    "optimizer_variables = [v.value for v in optimizer.variables]\n",
+    "\n",
+    "state = (trainable_variables, non_trainable_variables, optimizer_variables)\n",
+    "loss, state = train_step(state, inputs, targets)"
    ]
   },
   {
@@ -1259,7 +1263,7 @@
     "colab_type": "text"
    },
    "source": [
-    "**Adding a `tf.function` decorator to our evaluation-step function**"
+    "#### Low-level usage of metrics"
    ]
   },
   {
@@ -1270,28 +1274,51 @@
    },
    "outputs": [],
    "source": [
-    "@tf.function\n",
-    "def test_step(inputs, targets):\n",
-    "    predictions = model(inputs, training=False)\n",
-    "    loss = loss_fn(targets, predictions)\n",
+    "from keras import ops\n",
     "\n",
-    "    logs = {}\n",
-    "    for metric in metrics:\n",
-    "        metric.update_state(targets, predictions)\n",
-    "        logs[\"val_\" + metric.name] = metric.result()\n",
+    "metric = keras.metrics.SparseCategoricalAccuracy()\n",
+    "targets = ops.array([0, 1, 2])\n",
+    "predictions = ops.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])\n",
+    "metric.update_state(targets, predictions)\n",
+    "current_result = metric.result()\n",
+    "print(f\"result: {current_result:.2f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "values = ops.array([0, 1, 2, 3, 4])\n",
+    "mean_tracker = keras.metrics.Mean()\n",
+    "for value in values:\n",
+    "    mean_tracker.update_state(value)\n",
+    "print(f\"Mean of values: {mean_tracker.result():.2f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "metric = keras.metrics.SparseCategoricalAccuracy()\n",
+    "targets = ops.array([0, 1, 2])\n",
+    "predictions = ops.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])\n",
     "\n",
-    "    loss_tracking_metric.update_state(loss)\n",
-    "    logs[\"val_loss\"] = loss_tracking_metric.result()\n",
-    "    return logs\n",
+    "metric_variables = metric.variables\n",
+    "metric_variables = metric.stateless_update_state(\n",
+    "    metric_variables, targets, predictions\n",
+    ")\n",
+    "current_result = metric.stateless_result(metric_variables)\n",
+    "print(f\"result: {current_result:.2f}\")\n",
     "\n",
-    "val_dataset = tf.data.Dataset.from_tensor_slices((val_images, val_labels))\n",
-    "val_dataset = val_dataset.batch(32)\n",
-    "reset_metrics()\n",
-    "for inputs_batch, targets_batch in val_dataset:\n",
-    "    logs = test_step(inputs_batch, targets_batch)\n",
-    "print(\"Evaluation results:\")\n",
-    "for key, value in logs.items():\n",
-    "    print(f\"...{key}: {value:.4f}\")"
+    "metric_variables = metric.stateless_reset_state()"
    ]
   },
   {
@@ -1300,7 +1327,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Leveraging fit() with a custom training loop"
+    "#### Leveraging `fit()` with a custom training loop"
    ]
   },
   {
@@ -1309,7 +1336,7 @@
     "colab_type": "text"
    },
    "source": [
-    "**Implementing a custom training step to use with `fit()`**"
+    "##### Customizing `fit()` with TensorFlow"
    ]
   },
   {
@@ -1320,6 +1347,20 @@
    },
    "outputs": [],
    "source": [
+    "keras.config.set_backend(\"tensorflow\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import keras\n",
+    "from keras import layers\n",
+    "\n",
     "loss_fn = keras.losses.SparseCategoricalCrossentropy()\n",
     "loss_tracker = keras.metrics.Mean(name=\"loss\")\n",
     "\n",
@@ -1329,8 +1370,8 @@
     "        with tf.GradientTape() as tape:\n",
     "            predictions = self(inputs, training=True)\n",
     "            loss = loss_fn(targets, predictions)\n",
-    "        gradients = tape.gradient(loss, self.trainable_weights)\n",
-    "        self.optimizer.apply_gradients(zip(gradients, self.trainable_weights))\n",
+    "        gradients = tape.gradient(loss, model.trainable_weights)\n",
+    "        self.optimizer.apply(gradients, model.trainable_weights)\n",
     "\n",
     "        loss_tracker.update_state(loss)\n",
     "        return {\"loss\": loss_tracker.result()}\n",
@@ -1348,16 +1389,225 @@
    },
    "outputs": [],
    "source": [
-    "inputs = keras.Input(shape=(28 * 28,))\n",
-    "features = layers.Dense(512, activation=\"relu\")(inputs)\n",
-    "features = layers.Dropout(0.5)(features)\n",
-    "outputs = layers.Dense(10, activation=\"softmax\")(features)\n",
-    "model = CustomModel(inputs, outputs)\n",
+    "def get_custom_model():\n",
+    "    inputs = keras.Input(shape=(28 * 28,))\n",
+    "    features = layers.Dense(512, activation=\"relu\")(inputs)\n",
+    "    features = layers.Dropout(0.5)(features)\n",
+    "    outputs = layers.Dense(10, activation=\"softmax\")(features)\n",
+    "    model = CustomModel(inputs, outputs)\n",
+    "    model.compile(optimizer=keras.optimizers.Adam())\n",
+    "    return model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = get_custom_model()\n",
+    "model.fit(train_images, train_labels, epochs=3)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Customizing `fit()` with PyTorch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "keras.config.set_backend(\"torch\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import keras\n",
+    "from keras import layers\n",
+    "\n",
+    "loss_fn = keras.losses.SparseCategoricalCrossentropy()\n",
+    "loss_tracker = keras.metrics.Mean(name=\"loss\")\n",
+    "\n",
+    "class CustomModel(keras.Model):\n",
+    "    def train_step(self, data):\n",
+    "        inputs, targets = data\n",
+    "        predictions = self(inputs, training=True)\n",
+    "        loss = loss_fn(targets, predictions)\n",
+    "\n",
+    "        loss.backward()\n",
+    "        trainable_weights = [v for v in self.trainable_weights]\n",
+    "        gradients = [v.value.grad for v in trainable_weights]\n",
+    "\n",
+    "        with torch.no_grad():\n",
+    "            self.optimizer.apply(gradients, trainable_weights)\n",
+    "\n",
+    "        loss_tracker.update_state(loss)\n",
+    "        return {\"loss\": loss_tracker.result()}\n",
+    "\n",
+    "    @property\n",
+    "    def metrics(self):\n",
+    "        return [loss_tracker]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = get_custom_model()\n",
+    "model.fit(train_images, train_labels, epochs=3)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Customizing `fit()` with JAX"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "keras.config.set_backend(\"jax\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import keras\n",
+    "from keras import layers\n",
+    "\n",
+    "loss_fn = keras.losses.SparseCategoricalCrossentropy()\n",
     "\n",
-    "model.compile(optimizer=keras.optimizers.RMSprop())\n",
+    "class CustomModel(keras.Model):\n",
+    "    def compute_loss_and_updates(\n",
+    "        self,\n",
+    "        trainable_variables,\n",
+    "        non_trainable_variables,\n",
+    "        inputs,\n",
+    "        targets,\n",
+    "        training=False,\n",
+    "    ):\n",
+    "        predictions, non_trainable_variables = self.stateless_call(\n",
+    "            trainable_variables,\n",
+    "            non_trainable_variables,\n",
+    "            inputs,\n",
+    "            training=training,\n",
+    "        )\n",
+    "        loss = loss_fn(targets, predictions)\n",
+    "        return loss, non_trainable_variables\n",
+    "\n",
+    "    def train_step(self, state, data):\n",
+    "        (\n",
+    "            trainable_variables,\n",
+    "            non_trainable_variables,\n",
+    "            optimizer_variables,\n",
+    "            metrics_variables,\n",
+    "        ) = state\n",
+    "        inputs, targets = data\n",
+    "\n",
+    "        grad_fn = jax.value_and_grad(\n",
+    "            self.compute_loss_and_updates, has_aux=True\n",
+    "        )\n",
+    "\n",
+    "        (loss, non_trainable_variables), grads = grad_fn(\n",
+    "            trainable_variables,\n",
+    "            non_trainable_variables,\n",
+    "            inputs,\n",
+    "            targets,\n",
+    "            training=True,\n",
+    "        )\n",
+    "\n",
+    "        (\n",
+    "            trainable_variables,\n",
+    "            optimizer_variables,\n",
+    "        ) = self.optimizer.stateless_apply(\n",
+    "            optimizer_variables, grads, trainable_variables\n",
+    "        )\n",
+    "\n",
+    "        logs = {\"loss\": loss}\n",
+    "        state = (\n",
+    "            trainable_variables,\n",
+    "            non_trainable_variables,\n",
+    "            optimizer_variables,\n",
+    "            metrics_variables,\n",
+    "        )\n",
+    "        return logs, state"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = get_custom_model()\n",
     "model.fit(train_images, train_labels, epochs=3)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Handling metrics in a custom `train_step()`"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### `train_step()` metrics handling with TensorFlow"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "keras.config.set_backend(\"tensorflow\")"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -1366,15 +1616,25 @@
    },
    "outputs": [],
    "source": [
+    "import keras\n",
+    "from keras import layers\n",
+    "\n",
     "class CustomModel(keras.Model):\n",
     "    def train_step(self, data):\n",
     "        inputs, targets = data\n",
     "        with tf.GradientTape() as tape:\n",
     "            predictions = self(inputs, training=True)\n",
-    "            loss = self.compiled_loss(targets, predictions)\n",
-    "        gradients = tape.gradient(loss, self.trainable_weights)\n",
-    "        self.optimizer.apply_gradients(zip(gradients, self.trainable_weights))\n",
-    "        self.compiled_metrics.update_state(targets, predictions)\n",
+    "            loss = self.compute_loss(y=targets, y_pred=predictions)\n",
+    "\n",
+    "        gradients = tape.gradient(loss, model.trainable_weights)\n",
+    "        self.optimizer.apply(gradients, model.trainable_weights)\n",
+    "\n",
+    "        for metric in self.metrics:\n",
+    "            if metric.name == \"loss\":\n",
+    "                metric.update_state(loss)\n",
+    "            else:\n",
+    "                metric.update_state(targets, predictions)\n",
+    "\n",
     "        return {m.name: m.result() for m in self.metrics}"
    ]
   },
@@ -1386,15 +1646,98 @@
    },
    "outputs": [],
    "source": [
-    "inputs = keras.Input(shape=(28 * 28,))\n",
-    "features = layers.Dense(512, activation=\"relu\")(inputs)\n",
-    "features = layers.Dropout(0.5)(features)\n",
-    "outputs = layers.Dense(10, activation=\"softmax\")(features)\n",
-    "model = CustomModel(inputs, outputs)\n",
+    "def get_custom_model():\n",
+    "    inputs = keras.Input(shape=(28 * 28,))\n",
+    "    features = layers.Dense(512, activation=\"relu\")(inputs)\n",
+    "    features = layers.Dropout(0.5)(features)\n",
+    "    outputs = layers.Dense(10, activation=\"softmax\")(features)\n",
+    "    model = CustomModel(inputs, outputs)\n",
+    "    model.compile(\n",
+    "        optimizer=keras.optimizers.Adam(),\n",
+    "        loss=keras.losses.SparseCategoricalCrossentropy(),\n",
+    "        metrics=[keras.metrics.SparseCategoricalAccuracy()],\n",
+    "    )\n",
+    "    return model\n",
+    "\n",
+    "model = get_custom_model()\n",
+    "model.fit(train_images, train_labels, epochs=3)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### `train_step()` metrics handling with PyTorch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "keras.config.set_backend(\"torch\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import keras\n",
+    "from keras import layers\n",
+    "\n",
+    "class CustomModel(keras.Model):\n",
+    "    def train_step(self, data):\n",
+    "        inputs, targets = data\n",
+    "        predictions = self(inputs, training=True)\n",
+    "        loss = self.compute_loss(y=targets, y_pred=predictions)\n",
+    "\n",
+    "        loss.backward()\n",
+    "        trainable_weights = [v for v in self.trainable_weights]\n",
+    "        gradients = [v.value.grad for v in trainable_weights]\n",
+    "\n",
+    "        with torch.no_grad():\n",
+    "            self.optimizer.apply(gradients, trainable_weights)\n",
+    "\n",
+    "        for metric in self.metrics:\n",
+    "            if metric.name == \"loss\":\n",
+    "                metric.update_state(loss)\n",
+    "            else:\n",
+    "                metric.update_state(targets, predictions)\n",
+    "\n",
+    "        return {m.name: m.result() for m in self.metrics}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def get_custom_model():\n",
+    "    inputs = keras.Input(shape=(28 * 28,))\n",
+    "    features = layers.Dense(512, activation=\"relu\")(inputs)\n",
+    "    features = layers.Dropout(0.5)(features)\n",
+    "    outputs = layers.Dense(10, activation=\"softmax\")(features)\n",
+    "    model = CustomModel(inputs, outputs)\n",
+    "    model.compile(\n",
+    "        optimizer=keras.optimizers.Adam(),\n",
+    "        loss=keras.losses.SparseCategoricalCrossentropy(),\n",
+    "        metrics=[keras.metrics.SparseCategoricalAccuracy()],\n",
+    "    )\n",
+    "    return model\n",
     "\n",
-    "model.compile(optimizer=keras.optimizers.RMSprop(),\n",
-    "              loss=keras.losses.SparseCategoricalCrossentropy(),\n",
-    "              metrics=[keras.metrics.SparseCategoricalAccuracy()])\n",
+    "model = get_custom_model()\n",
     "model.fit(train_images, train_labels, epochs=3)"
    ]
   },
@@ -1404,14 +1747,101 @@
     "colab_type": "text"
    },
    "source": [
-    "## Summary"
+    "##### `train_step()` metrics handling with JAX"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "class CustomModel(keras.Model):\n",
+    "    def compute_loss_and_updates(\n",
+    "        self,\n",
+    "        trainable_variables,\n",
+    "        non_trainable_variables,\n",
+    "        inputs,\n",
+    "        targets,\n",
+    "        training=False,\n",
+    "    ):\n",
+    "        predictions, non_trainable_variables = self.stateless_call(\n",
+    "            trainable_variables,\n",
+    "            non_trainable_variables,\n",
+    "            inputs,\n",
+    "            training=training,\n",
+    "        )\n",
+    "        loss = self.compute_loss(y=targets, y_pred=predictions)\n",
+    "        return loss, (predictions, non_trainable_variables)\n",
+    "\n",
+    "    def train_step(self, state, data):\n",
+    "        (\n",
+    "            trainable_variables,\n",
+    "            non_trainable_variables,\n",
+    "            optimizer_variables,\n",
+    "            metrics_variables,\n",
+    "        ) = state\n",
+    "        inputs, targets = data\n",
+    "\n",
+    "        grad_fn = jax.value_and_grad(\n",
+    "            self.compute_loss_and_updates, has_aux=True\n",
+    "        )\n",
+    "\n",
+    "        (loss, (predictions, non_trainable_variables)), grads = grad_fn(\n",
+    "            trainable_variables,\n",
+    "            non_trainable_variables,\n",
+    "            inputs,\n",
+    "            targets,\n",
+    "            training=True,\n",
+    "        )\n",
+    "        (\n",
+    "            trainable_variables,\n",
+    "            optimizer_variables,\n",
+    "        ) = self.optimizer.stateless_apply(\n",
+    "            optimizer_variables, grads, trainable_variables\n",
+    "        )\n",
+    "\n",
+    "        new_metrics_vars = []\n",
+    "        logs = {}\n",
+    "        for metric in self.metrics:\n",
+    "            num_prev = len(new_metrics_vars)\n",
+    "            num_current = len(metric.variables)\n",
+    "            current_vars = metrics_variables[num_prev : num_prev + num_current]\n",
+    "            if metric.name == \"loss\":\n",
+    "                current_vars = metric.stateless_update_state(current_vars, loss)\n",
+    "            else:\n",
+    "                current_vars = metric.stateless_update_state(\n",
+    "                    current_vars, targets, predictions\n",
+    "                )\n",
+    "            logs[metric.name] = metric.stateless_result(current_vars)\n",
+    "            new_metrics_vars += current_vars\n",
+    "\n",
+    "        state = (\n",
+    "            trainable_variables,\n",
+    "            non_trainable_variables,\n",
+    "            optimizer_variables,\n",
+    "            new_metrics_vars,\n",
+    "        )\n",
+    "        return logs, state"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Chapter summary"
    ]
   }
  ],
  "metadata": {
+  "accelerator": "GPU",
   "colab": {
    "collapsed_sections": [],
-   "name": "chapter07_working-with-keras.i",
+   "name": "chapter07_working-with-keras",
    "private_outputs": false,
    "provenance": [],
    "toc_visible": true
@@ -1431,7 +1861,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.0"
+   "version": "3.10.0"
   }
  },
  "nbformat": 4,
diff --git a/chapter08_image-classification.ipynb b/chapter08_image-classification.ipynb
new file mode 100644
index 0000000000..fa8e8ff058
--- /dev/null
+++ b/chapter08_image-classification.ipynb
@@ -0,0 +1,1004 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install keras-nightly --upgrade -q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.environ[\"KERAS_BACKEND\"] = \"jax\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Introduction to convnets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import keras\n",
+    "from keras import layers\n",
+    "\n",
+    "inputs = keras.Input(shape=(28, 28, 1))\n",
+    "x = layers.Conv2D(filters=64, kernel_size=3, activation=\"relu\")(inputs)\n",
+    "x = layers.MaxPooling2D(pool_size=2)(x)\n",
+    "x = layers.Conv2D(filters=128, kernel_size=3, activation=\"relu\")(x)\n",
+    "x = layers.MaxPooling2D(pool_size=2)(x)\n",
+    "x = layers.Conv2D(filters=256, kernel_size=3, activation=\"relu\")(x)\n",
+    "x = layers.GlobalAveragePooling2D()(x)\n",
+    "outputs = layers.Dense(10, activation=\"softmax\")(x)\n",
+    "model = keras.Model(inputs=inputs, outputs=outputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.summary(line_length=80)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from keras.datasets import mnist\n",
+    "\n",
+    "(train_images, train_labels), (test_images, test_labels) = mnist.load_data()\n",
+    "train_images = train_images.reshape((60000, 28, 28, 1))\n",
+    "train_images = train_images.astype(\"float32\") / 255\n",
+    "test_images = test_images.reshape((10000, 28, 28, 1))\n",
+    "test_images = test_images.astype(\"float32\") / 255\n",
+    "model.compile(\n",
+    "    optimizer=\"adam\",\n",
+    "    loss=\"sparse_categorical_crossentropy\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")\n",
+    "model.fit(train_images, train_labels, epochs=5, batch_size=64)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "test_loss, test_acc = model.evaluate(test_images, test_labels)\n",
+    "print(f\"Test accuracy: {test_acc:.3f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### The convolution operation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Understanding border effects and padding"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Understanding convolution strides"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### The max-pooling operation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "inputs = keras.Input(shape=(28, 28, 1))\n",
+    "x = layers.Conv2D(filters=64, kernel_size=3, activation=\"relu\")(inputs)\n",
+    "x = layers.Conv2D(filters=128, kernel_size=3, activation=\"relu\")(x)\n",
+    "x = layers.Conv2D(filters=256, kernel_size=3, activation=\"relu\")(x)\n",
+    "x = layers.GlobalAveragePooling2D()(x)\n",
+    "outputs = layers.Dense(10, activation=\"softmax\")(x)\n",
+    "model_no_max_pool = keras.Model(inputs=inputs, outputs=outputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model_no_max_pool.summary(line_length=80)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Training a convnet from scratch on a small dataset"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### The relevance of deep learning for small-data problems"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Downloading the data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import kagglehub\n",
+    "\n",
+    "kagglehub.login()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "download_path = kagglehub.competition_download(\"dogs-vs-cats\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import zipfile\n",
+    "\n",
+    "with zipfile.ZipFile(download_path + \"/train.zip\", \"r\") as zip_ref:\n",
+    "    zip_ref.extractall(\".\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import os, shutil, pathlib\n",
+    "\n",
+    "original_dir = pathlib.Path(\"train\")\n",
+    "new_base_dir = pathlib.Path(\"dogs_vs_cats_small\")\n",
+    "\n",
+    "def make_subset(subset_name, start_index, end_index):\n",
+    "    for category in (\"cat\", \"dog\"):\n",
+    "        dir = new_base_dir / subset_name / category\n",
+    "        os.makedirs(dir)\n",
+    "        fnames = [f\"{category}.{i}.jpg\" for i in range(start_index, end_index)]\n",
+    "        for fname in fnames:\n",
+    "            shutil.copyfile(src=original_dir / fname, dst=dir / fname)\n",
+    "\n",
+    "make_subset(\"train\", start_index=0, end_index=1000)\n",
+    "make_subset(\"validation\", start_index=1000, end_index=1500)\n",
+    "make_subset(\"test\", start_index=1500, end_index=2500)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Building your model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import keras\n",
+    "from keras import layers\n",
+    "\n",
+    "inputs = keras.Input(shape=(180, 180, 3))\n",
+    "x = layers.Rescaling(1.0 / 255)(inputs)\n",
+    "x = layers.Conv2D(filters=32, kernel_size=3, activation=\"relu\")(x)\n",
+    "x = layers.MaxPooling2D(pool_size=2)(x)\n",
+    "x = layers.Conv2D(filters=64, kernel_size=3, activation=\"relu\")(x)\n",
+    "x = layers.MaxPooling2D(pool_size=2)(x)\n",
+    "x = layers.Conv2D(filters=128, kernel_size=3, activation=\"relu\")(x)\n",
+    "x = layers.MaxPooling2D(pool_size=2)(x)\n",
+    "x = layers.Conv2D(filters=256, kernel_size=3, activation=\"relu\")(x)\n",
+    "x = layers.MaxPooling2D(pool_size=2)(x)\n",
+    "x = layers.Conv2D(filters=512, kernel_size=3, activation=\"relu\")(x)\n",
+    "x = layers.GlobalAveragePooling2D()(x)\n",
+    "outputs = layers.Dense(1, activation=\"sigmoid\")(x)\n",
+    "model = keras.Model(inputs=inputs, outputs=outputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.summary(line_length=80)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.compile(\n",
+    "    loss=\"binary_crossentropy\",\n",
+    "    optimizer=\"adam\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Data preprocessing"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from keras.utils import image_dataset_from_directory\n",
+    "\n",
+    "batch_size = 64\n",
+    "image_size = (180, 180)\n",
+    "train_dataset = image_dataset_from_directory(\n",
+    "    new_base_dir / \"train\", image_size=image_size, batch_size=batch_size\n",
+    ")\n",
+    "validation_dataset = image_dataset_from_directory(\n",
+    "    new_base_dir / \"validation\", image_size=image_size, batch_size=batch_size\n",
+    ")\n",
+    "test_dataset = image_dataset_from_directory(\n",
+    "    new_base_dir / \"test\", image_size=image_size, batch_size=batch_size\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Understanding TensorFlow Dataset objects"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import tensorflow as tf\n",
+    "\n",
+    "random_numbers = np.random.normal(size=(1000, 16))\n",
+    "dataset = tf.data.Dataset.from_tensor_slices(random_numbers)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "for i, element in enumerate(dataset):\n",
+    "    print(element.shape)\n",
+    "    if i >= 2:\n",
+    "        break"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "batched_dataset = dataset.batch(32)\n",
+    "for i, element in enumerate(batched_dataset):\n",
+    "    print(element.shape)\n",
+    "    if i >= 2:\n",
+    "        break"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "reshaped_dataset = dataset.map(\n",
+    "    lambda x: tf.reshape(x, (4, 4)),\n",
+    "    num_parallel_calls=8)\n",
+    "for i, element in enumerate(reshaped_dataset):\n",
+    "    print(element.shape)\n",
+    "    if i >= 2:\n",
+    "        break"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Fitting the model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "for data_batch, labels_batch in train_dataset:\n",
+    "    print(\"data batch shape:\", data_batch.shape)\n",
+    "    print(\"labels batch shape:\", labels_batch.shape)\n",
+    "    break"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "callbacks = [\n",
+    "    keras.callbacks.ModelCheckpoint(\n",
+    "        filepath=\"convnet_from_scratch.keras\",\n",
+    "        save_best_only=True,\n",
+    "        monitor=\"val_loss\",\n",
+    "    )\n",
+    "]\n",
+    "history = model.fit(\n",
+    "    train_dataset,\n",
+    "    epochs=50,\n",
+    "    validation_data=validation_dataset,\n",
+    "    callbacks=callbacks,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "accuracy = history.history[\"accuracy\"]\n",
+    "val_accuracy = history.history[\"val_accuracy\"]\n",
+    "loss = history.history[\"loss\"]\n",
+    "val_loss = history.history[\"val_loss\"]\n",
+    "epochs = range(1, len(accuracy) + 1)\n",
+    "\n",
+    "plt.plot(epochs, accuracy, \"r--\", label=\"Training accuracy\")\n",
+    "plt.plot(epochs, val_accuracy, \"b\", label=\"Validation accuracy\")\n",
+    "plt.title(\"Training and validation accuracy\")\n",
+    "plt.legend()\n",
+    "plt.figure()\n",
+    "\n",
+    "plt.plot(epochs, loss, \"r--\", label=\"Training loss\")\n",
+    "plt.plot(epochs, val_loss, \"b\", label=\"Validation loss\")\n",
+    "plt.title(\"Training and validation loss\")\n",
+    "plt.legend()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "test_model = keras.models.load_model(\"convnet_from_scratch.keras\")\n",
+    "test_loss, test_acc = test_model.evaluate(test_dataset)\n",
+    "print(f\"Test accuracy: {test_acc:.3f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Using data augmentation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "data_augmentation_layers = [\n",
+    "    layers.RandomFlip(\"horizontal\"),\n",
+    "    layers.RandomRotation(0.1),\n",
+    "    layers.RandomZoom(0.2),\n",
+    "]\n",
+    "\n",
+    "def data_augmentation(images, targets):\n",
+    "    for layer in data_augmentation_layers:\n",
+    "        images = layer(images)\n",
+    "    return images, targets\n",
+    "\n",
+    "augmented_train_dataset = train_dataset.map(\n",
+    "    data_augmentation, num_parallel_calls=8\n",
+    ")\n",
+    "augmented_train_dataset = augmented_train_dataset.prefetch(tf.data.AUTOTUNE)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(10, 10))\n",
+    "for image_batch, _ in train_dataset.take(1):\n",
+    "    image = image_batch[0]\n",
+    "    for i in range(9):\n",
+    "        ax = plt.subplot(3, 3, i + 1)\n",
+    "        augmented_image, _ = data_augmentation(image, None)\n",
+    "        plt.imshow(np.array(augmented_image).astype(\"uint8\"))\n",
+    "        plt.axis(\"off\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "inputs = keras.Input(shape=(180, 180, 3))\n",
+    "x = layers.Rescaling(1.0 / 255)(inputs)\n",
+    "x = layers.Conv2D(filters=32, kernel_size=3, activation=\"relu\")(x)\n",
+    "x = layers.MaxPooling2D(pool_size=2)(x)\n",
+    "x = layers.Conv2D(filters=64, kernel_size=3, activation=\"relu\")(x)\n",
+    "x = layers.MaxPooling2D(pool_size=2)(x)\n",
+    "x = layers.Conv2D(filters=128, kernel_size=3, activation=\"relu\")(x)\n",
+    "x = layers.MaxPooling2D(pool_size=2)(x)\n",
+    "x = layers.Conv2D(filters=256, kernel_size=3, activation=\"relu\")(x)\n",
+    "x = layers.MaxPooling2D(pool_size=2)(x)\n",
+    "x = layers.Conv2D(filters=512, kernel_size=3, activation=\"relu\")(x)\n",
+    "x = layers.GlobalAveragePooling2D()(x)\n",
+    "x = layers.Dropout(0.25)(x)\n",
+    "outputs = layers.Dense(1, activation=\"sigmoid\")(x)\n",
+    "model = keras.Model(inputs=inputs, outputs=outputs)\n",
+    "\n",
+    "model.compile(\n",
+    "    loss=\"binary_crossentropy\",\n",
+    "    optimizer=\"adam\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "callbacks = [\n",
+    "    keras.callbacks.ModelCheckpoint(\n",
+    "        filepath=\"convnet_from_scratch_with_augmentation.keras\",\n",
+    "        save_best_only=True,\n",
+    "        monitor=\"val_loss\",\n",
+    "    )\n",
+    "]\n",
+    "history = model.fit(\n",
+    "    augmented_train_dataset,\n",
+    "    epochs=100,\n",
+    "    validation_data=validation_dataset,\n",
+    "    callbacks=callbacks,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "test_model = keras.models.load_model(\n",
+    "    \"convnet_from_scratch_with_augmentation.keras\"\n",
+    ")\n",
+    "test_loss, test_acc = test_model.evaluate(test_dataset)\n",
+    "print(f\"Test accuracy: {test_acc:.3f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Leveraging a pretrained model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Feature extraction with a pretrained model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "conv_base = keras.applications.Xception(\n",
+    "    weights=\"imagenet\",\n",
+    "    include_top=False,\n",
+    "    input_shape=(180, 180, 3),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Fast feature extraction without data augmentation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "from keras.applications.xception import preprocess_input\n",
+    "\n",
+    "def get_features_and_labels(dataset):\n",
+    "    all_features = []\n",
+    "    all_labels = []\n",
+    "    for images, labels in dataset:\n",
+    "        preprocessed_images = preprocess_input(images)\n",
+    "        features = conv_base.predict(preprocessed_images, verbose=0)\n",
+    "        all_features.append(features)\n",
+    "        all_labels.append(labels)\n",
+    "    return np.concatenate(all_features), np.concatenate(all_labels)\n",
+    "\n",
+    "train_features, train_labels = get_features_and_labels(train_dataset)\n",
+    "val_features, val_labels = get_features_and_labels(validation_dataset)\n",
+    "test_features, test_labels = get_features_and_labels(test_dataset)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "train_features.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "inputs = keras.Input(shape=(6, 6, 2048))\n",
+    "x = layers.GlobalAveragePooling2D()(inputs)\n",
+    "x = layers.Dense(256, activation=\"relu\")(x)\n",
+    "x = layers.Dropout(0.25)(x)\n",
+    "outputs = layers.Dense(1, activation=\"sigmoid\")(x)\n",
+    "model = keras.Model(inputs, outputs)\n",
+    "model.compile(\n",
+    "    loss=\"binary_crossentropy\",\n",
+    "    optimizer=\"adam\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")\n",
+    "\n",
+    "callbacks = [\n",
+    "    keras.callbacks.ModelCheckpoint(\n",
+    "        filepath=\"feature_extraction.keras\",\n",
+    "        save_best_only=True,\n",
+    "        monitor=\"val_loss\",\n",
+    "    )\n",
+    "]\n",
+    "history = model.fit(\n",
+    "    train_features,\n",
+    "    train_labels,\n",
+    "    epochs=10,\n",
+    "    validation_data=(val_features, val_labels),\n",
+    "    callbacks=callbacks,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "acc = history.history[\"accuracy\"]\n",
+    "val_acc = history.history[\"val_accuracy\"]\n",
+    "loss = history.history[\"loss\"]\n",
+    "val_loss = history.history[\"val_loss\"]\n",
+    "epochs = range(1, len(acc) + 1)\n",
+    "plt.plot(epochs, acc, \"r--\", label=\"Training accuracy\")\n",
+    "plt.plot(epochs, val_acc, \"b\", label=\"Validation accuracy\")\n",
+    "plt.title(\"Training and validation accuracy\")\n",
+    "plt.legend()\n",
+    "plt.figure()\n",
+    "plt.plot(epochs, loss, \"r--\", label=\"Training loss\")\n",
+    "plt.plot(epochs, val_loss, \"b\", label=\"Validation loss\")\n",
+    "plt.title(\"Training and validation loss\")\n",
+    "plt.legend()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "test_model = keras.models.load_model(\"feature_extraction.keras\")\n",
+    "test_loss, test_acc = test_model.evaluate(test_features, test_labels)\n",
+    "print(f\"Test accuracy: {test_acc:.3f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Feature extraction together with data augmentation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "conv_base = keras.applications.Xception(weights=\"imagenet\", include_top=False)\n",
+    "conv_base.trainable = False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "conv_base.trainable = True\n",
+    "len(conv_base.trainable_weights)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "conv_base.trainable = False\n",
+    "len(conv_base.trainable_weights)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "inputs = keras.Input(shape=(180, 180, 3))\n",
+    "x = preprocess_input(inputs)\n",
+    "x = conv_base(x)\n",
+    "x = layers.GlobalAveragePooling2D()(x)\n",
+    "x = layers.Dense(256)(x)\n",
+    "x = layers.Dropout(0.25)(x)\n",
+    "outputs = layers.Dense(1, activation=\"sigmoid\")(x)\n",
+    "model = keras.Model(inputs, outputs)\n",
+    "model.compile(\n",
+    "    loss=\"binary_crossentropy\",\n",
+    "    optimizer=\"adam\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "callbacks = [\n",
+    "    keras.callbacks.ModelCheckpoint(\n",
+    "        filepath=\"feature_extraction_with_data_augmentation.keras\",\n",
+    "        save_best_only=True,\n",
+    "        monitor=\"val_loss\",\n",
+    "    )\n",
+    "]\n",
+    "history = model.fit(\n",
+    "    augmented_train_dataset,\n",
+    "    epochs=30,\n",
+    "    validation_data=validation_dataset,\n",
+    "    callbacks=callbacks,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "test_model = keras.models.load_model(\n",
+    "    \"feature_extraction_with_data_augmentation.keras\"\n",
+    ")\n",
+    "test_loss, test_acc = test_model.evaluate(test_dataset)\n",
+    "print(f\"Test accuracy: {test_acc:.3f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Fine-tuning a pretrained model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "conv_base.trainable = True\n",
+    "for layer in conv_base.layers[:-4]:\n",
+    "    layer.trainable = False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.compile(\n",
+    "    loss=\"binary_crossentropy\",\n",
+    "    optimizer=keras.optimizers.Adam(learning_rate=1e-5),\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")\n",
+    "\n",
+    "callbacks = [\n",
+    "    keras.callbacks.ModelCheckpoint(\n",
+    "        filepath=\"fine_tuning.keras\",\n",
+    "        save_best_only=True,\n",
+    "        monitor=\"val_loss\",\n",
+    "    )\n",
+    "]\n",
+    "history = model.fit(\n",
+    "    augmented_train_dataset,\n",
+    "    epochs=30,\n",
+    "    validation_data=validation_dataset,\n",
+    "    callbacks=callbacks,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = keras.models.load_model(\"fine_tuning.keras\")\n",
+    "test_loss, test_acc = model.evaluate(test_dataset)\n",
+    "print(f\"Test accuracy: {test_acc:.3f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Chapter summary"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "collapsed_sections": [],
+   "name": "chapter08_image-classification",
+   "private_outputs": false,
+   "provenance": [],
+   "toc_visible": true
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/chapter09_convnet-architecture-best-practices.ipynb b/chapter09_convnet-architecture-best-practices.ipynb
new file mode 100644
index 0000000000..7f114a4d20
--- /dev/null
+++ b/chapter09_convnet-architecture-best-practices.ipynb
@@ -0,0 +1,356 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install keras-nightly --upgrade -q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.environ[\"KERAS_BACKEND\"] = \"jax\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Modularity, hierarchy, and reuse"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Residual connections"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import keras\n",
+    "from keras import layers\n",
+    "\n",
+    "inputs = keras.Input(shape=(32, 32, 3))\n",
+    "x = layers.Conv2D(32, 3, activation=\"relu\")(inputs)\n",
+    "residual = x\n",
+    "x = layers.Conv2D(64, 3, activation=\"relu\", padding=\"same\")(x)\n",
+    "residual = layers.Conv2D(64, 1)(residual)\n",
+    "x = layers.add([x, residual])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "inputs = keras.Input(shape=(32, 32, 3))\n",
+    "x = layers.Conv2D(32, 3, activation=\"relu\")(inputs)\n",
+    "residual = x\n",
+    "x = layers.Conv2D(64, 3, activation=\"relu\", padding=\"same\")(x)\n",
+    "x = layers.MaxPooling2D(2, padding=\"same\")(x)\n",
+    "residual = layers.Conv2D(64, 1, strides=2)(residual)\n",
+    "x = layers.add([x, residual])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "inputs = keras.Input(shape=(32, 32, 3))\n",
+    "x = layers.Rescaling(1.0 / 255)(inputs)\n",
+    "\n",
+    "def residual_block(x, filters, pooling=False):\n",
+    "    residual = x\n",
+    "    x = layers.Conv2D(filters, 3, activation=\"relu\", padding=\"same\")(x)\n",
+    "    x = layers.Conv2D(filters, 3, activation=\"relu\", padding=\"same\")(x)\n",
+    "    if pooling:\n",
+    "        x = layers.MaxPooling2D(2, padding=\"same\")(x)\n",
+    "        residual = layers.Conv2D(filters, 1, strides=2)(residual)\n",
+    "    elif filters != residual.shape[-1]:\n",
+    "        residual = layers.Conv2D(filters, 1)(residual)\n",
+    "    x = layers.add([x, residual])\n",
+    "    return x\n",
+    "\n",
+    "x = residual_block(x, filters=32, pooling=True)\n",
+    "x = residual_block(x, filters=64, pooling=True)\n",
+    "x = residual_block(x, filters=128, pooling=False)\n",
+    "\n",
+    "x = layers.GlobalAveragePooling2D()(x)\n",
+    "outputs = layers.Dense(1, activation=\"sigmoid\")(x)\n",
+    "model = keras.Model(inputs=inputs, outputs=outputs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Batch normalization"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Depthwise separable convolutions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Putting it together: a mini Xception-like model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import kagglehub\n",
+    "\n",
+    "kagglehub.login()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import zipfile\n",
+    "\n",
+    "download_path = kagglehub.competition_download(\"dogs-vs-cats\")\n",
+    "\n",
+    "with zipfile.ZipFile(download_path + \"/train.zip\", \"r\") as zip_ref:\n",
+    "    zip_ref.extractall(\".\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import os, shutil, pathlib\n",
+    "from keras.utils import image_dataset_from_directory\n",
+    "\n",
+    "original_dir = pathlib.Path(\"train\")\n",
+    "new_base_dir = pathlib.Path(\"dogs_vs_cats_small\")\n",
+    "\n",
+    "def make_subset(subset_name, start_index, end_index):\n",
+    "    for category in (\"cat\", \"dog\"):\n",
+    "        dir = new_base_dir / subset_name / category\n",
+    "        os.makedirs(dir)\n",
+    "        fnames = [f\"{category}.{i}.jpg\" for i in range(start_index, end_index)]\n",
+    "        for fname in fnames:\n",
+    "            shutil.copyfile(src=original_dir / fname, dst=dir / fname)\n",
+    "\n",
+    "make_subset(\"train\", start_index=0, end_index=1000)\n",
+    "make_subset(\"validation\", start_index=1000, end_index=1500)\n",
+    "make_subset(\"test\", start_index=1500, end_index=2500)\n",
+    "\n",
+    "batch_size = 64\n",
+    "image_size = (180, 180)\n",
+    "train_dataset = image_dataset_from_directory(\n",
+    "    new_base_dir / \"train\",\n",
+    "    image_size=image_size,\n",
+    "    batch_size=batch_size,\n",
+    ")\n",
+    "validation_dataset = image_dataset_from_directory(\n",
+    "    new_base_dir / \"validation\",\n",
+    "    image_size=image_size,\n",
+    "    batch_size=batch_size,\n",
+    ")\n",
+    "test_dataset = image_dataset_from_directory(\n",
+    "    new_base_dir / \"test\",\n",
+    "    image_size=image_size,\n",
+    "    batch_size=batch_size,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf\n",
+    "from keras import layers\n",
+    "\n",
+    "data_augmentation_layers = [\n",
+    "    layers.RandomFlip(\"horizontal\"),\n",
+    "    layers.RandomRotation(0.1),\n",
+    "    layers.RandomZoom(0.2),\n",
+    "]\n",
+    "\n",
+    "def data_augmentation(images, targets):\n",
+    "    for layer in data_augmentation_layers:\n",
+    "        images = layer(images)\n",
+    "    return images, targets\n",
+    "\n",
+    "augmented_train_dataset = train_dataset.map(\n",
+    "    data_augmentation, num_parallel_calls=8\n",
+    ")\n",
+    "augmented_train_dataset = augmented_train_dataset.prefetch(tf.data.AUTOTUNE)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import keras\n",
+    "\n",
+    "inputs = keras.Input(shape=(180, 180, 3))\n",
+    "x = layers.Rescaling(1.0 / 255)(inputs)\n",
+    "x = layers.Conv2D(filters=32, kernel_size=5, use_bias=False)(x)\n",
+    "\n",
+    "for size in [32, 64, 128, 256, 512]:\n",
+    "    residual = x\n",
+    "\n",
+    "    x = layers.BatchNormalization()(x)\n",
+    "    x = layers.Activation(\"relu\")(x)\n",
+    "    x = layers.SeparableConv2D(size, 3, padding=\"same\", use_bias=False)(x)\n",
+    "\n",
+    "    x = layers.BatchNormalization()(x)\n",
+    "    x = layers.Activation(\"relu\")(x)\n",
+    "    x = layers.SeparableConv2D(size, 3, padding=\"same\", use_bias=False)(x)\n",
+    "\n",
+    "    x = layers.MaxPooling2D(3, strides=2, padding=\"same\")(x)\n",
+    "\n",
+    "    residual = layers.Conv2D(\n",
+    "        size, 1, strides=2, padding=\"same\", use_bias=False\n",
+    "    )(residual)\n",
+    "    x = layers.add([x, residual])\n",
+    "\n",
+    "x = layers.GlobalAveragePooling2D()(x)\n",
+    "x = layers.Dropout(0.5)(x)\n",
+    "outputs = layers.Dense(1, activation=\"sigmoid\")(x)\n",
+    "model = keras.Model(inputs=inputs, outputs=outputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.compile(\n",
+    "    loss=\"binary_crossentropy\",\n",
+    "    optimizer=\"adam\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")\n",
+    "history = model.fit(\n",
+    "    augmented_train_dataset,\n",
+    "    epochs=100,\n",
+    "    validation_data=validation_dataset,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Beyond convolution: Vision Transformers"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## Chapter summary"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "collapsed_sections": [],
+   "name": "chapter09_convnet-architecture-best-practices",
+   "private_outputs": false,
+   "provenance": [],
+   "toc_visible": true
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/chapter10_interpreting-what-vision-models-learn.ipynb b/chapter10_interpreting-what-vision-models-learn.ipynb
new file mode 100644
index 0000000000..4cc5cb308e
--- /dev/null
+++ b/chapter10_interpreting-what-vision-models-learn.ipynb
@@ -0,0 +1,962 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install keras-nightly --upgrade -q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.environ[\"KERAS_BACKEND\"] = \"jax\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Visualizing intermediate activations"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "# You can use this to load the file"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "# \"convnet_from_scratch_with_augmentation.keras\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "# you obtained in the last chapter."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "# You can use this to load the file\n",
+    "# \"convnet_from_scratch_with_augmentation.keras\"\n",
+    "# you obtained in the last chapter.\n",
+    "from google.colab import files\n",
+    "\n",
+    "files.upload()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import keras\n",
+    "model = keras.models.load_model(\n",
+    "    \"convnet_from_scratch_with_augmentation.keras\"\n",
+    ")\n",
+    "model.summary(line_length=80)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import keras\n",
+    "import numpy as np\n",
+    "\n",
+    "img_path = keras.utils.get_file(\n",
+    "    fname=\"cat.jpg\", origin=\"https://img-datasets.s3.amazonaws.com/cat.jpg\"\n",
+    ")\n",
+    "\n",
+    "def get_img_array(img_path, target_size):\n",
+    "    img = keras.utils.load_img(img_path, target_size=target_size)\n",
+    "    array = keras.utils.img_to_array(img)\n",
+    "    array = np.expand_dims(array, axis=0)\n",
+    "    return array\n",
+    "\n",
+    "img_tensor = get_img_array(img_path, target_size=(180, 180))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "plt.axis(\"off\")\n",
+    "plt.imshow(img_tensor[0].astype(\"uint8\"))\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from keras import layers\n",
+    "\n",
+    "layer_outputs = []\n",
+    "layer_names = []\n",
+    "for layer in model.layers:\n",
+    "    if isinstance(layer, (layers.Conv2D, layers.MaxPooling2D)):\n",
+    "        layer_outputs.append(layer.output)\n",
+    "        layer_names.append(layer.name)\n",
+    "activation_model = keras.Model(inputs=model.input, outputs=layer_outputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "activations = activation_model.predict(img_tensor)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "first_layer_activation = activations[0]\n",
+    "print(first_layer_activation.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "plt.matshow(first_layer_activation[0, :, :, 5], cmap=\"viridis\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "images_per_row = 16\n",
+    "for layer_name, layer_activation in zip(layer_names, activations):\n",
+    "    n_features = layer_activation.shape[-1]\n",
+    "    size = layer_activation.shape[1]\n",
+    "    n_cols = n_features // images_per_row\n",
+    "    display_grid = np.zeros(\n",
+    "        ((size + 1) * n_cols - 1, images_per_row * (size + 1) - 1)\n",
+    "    )\n",
+    "    for col in range(n_cols):\n",
+    "        for row in range(images_per_row):\n",
+    "            channel_index = col * images_per_row + row\n",
+    "            channel_image = layer_activation[0, :, :, channel_index].copy()\n",
+    "            if channel_image.sum() != 0:\n",
+    "                channel_image -= channel_image.mean()\n",
+    "                channel_image /= channel_image.std()\n",
+    "                channel_image *= 64\n",
+    "                channel_image += 128\n",
+    "            channel_image = np.clip(channel_image, 0, 255).astype(\"uint8\")\n",
+    "            display_grid[\n",
+    "                col * (size + 1) : (col + 1) * size + col,\n",
+    "                row * (size + 1) : (row + 1) * size + row,\n",
+    "            ] = channel_image\n",
+    "    scale = 1.0 / size\n",
+    "    plt.figure(\n",
+    "        figsize=(scale * display_grid.shape[1], scale * display_grid.shape[0])\n",
+    "    )\n",
+    "    plt.title(layer_name)\n",
+    "    plt.grid(False)\n",
+    "    plt.axis(\"off\")\n",
+    "    plt.imshow(display_grid, aspect=\"auto\", cmap=\"viridis\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Visualizing convnet filters"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = keras.applications.xception.Xception(\n",
+    "    weights=\"imagenet\",\n",
+    "    include_top=False,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "for layer in model.layers:\n",
+    "    if isinstance(layer, (keras.layers.Conv2D, keras.layers.SeparableConv2D)):\n",
+    "        print(layer.name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "layer_name = \"block3_sepconv1\"\n",
+    "layer = model.get_layer(name=layer_name)\n",
+    "feature_extractor = keras.Model(inputs=model.input, outputs=layer.output)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "activation = feature_extractor(\n",
+    "    keras.applications.xception.preprocess_input(img_tensor)\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from keras import ops\n",
+    "\n",
+    "def compute_loss(image, filter_index):\n",
+    "    activation = feature_extractor(image)\n",
+    "    filter_activation = activation[:, 2:-2, 2:-2, filter_index]\n",
+    "    return ops.mean(filter_activation)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Gradient ascent in TensorFlow"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "keras.config.set_backend(\"tensorflow\")\n",
+    "\n",
+    "import keras\n",
+    "from keras import ops\n",
+    "\n",
+    "model = keras.applications.Xception(weights=\"imagenet\", include_top=False)\n",
+    "\n",
+    "layer_name = \"block3_sepconv1\"\n",
+    "layer = model.get_layer(name=layer_name)\n",
+    "feature_extractor = keras.Model(inputs=model.input, outputs=layer.output)\n",
+    "\n",
+    "def compute_loss(image, filter_index):\n",
+    "    activation = feature_extractor(image)\n",
+    "    filter_activation = activation[:, 2:-2, 2:-2, filter_index]\n",
+    "    return ops.mean(filter_activation)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf\n",
+    "\n",
+    "@tf.function\n",
+    "def gradient_ascent_step(image, filter_index, learning_rate):\n",
+    "    with tf.GradientTape() as tape:\n",
+    "        tape.watch(image)\n",
+    "        loss = compute_loss(image, filter_index)\n",
+    "    grads = tape.gradient(loss, image)\n",
+    "    grads = ops.normalize(grads)\n",
+    "    image += learning_rate * grads\n",
+    "    return image"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Gradient ascent in PyTorch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "keras.config.set_backend(\"tensorflow\")\n",
+    "\n",
+    "from keras import ops\n",
+    "\n",
+    "def compute_loss(image, filter_index):\n",
+    "    activation = feature_extractor(image)\n",
+    "    filter_activation = activation[:, 2:-2, 2:-2, filter_index]\n",
+    "    return ops.mean(filter_activation)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "\n",
+    "def gradient_ascent_step(image, filter_index, learning_rate):\n",
+    "    image = image.clone().detach().requires_grad_(True)\n",
+    "    loss = compute_loss(image, filter_index)\n",
+    "    loss.backward()\n",
+    "    grads = image.grad\n",
+    "    grads = ops.normalize(grads)\n",
+    "    image = image + learning_rate * grads\n",
+    "    return image"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Gradient ascent in JAX"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "keras.config.set_backend(\"jax\")\n",
+    "\n",
+    "import keras\n",
+    "from keras import ops\n",
+    "\n",
+    "model = keras.applications.Xception(weights=\"imagenet\", include_top=False)\n",
+    "\n",
+    "layer_name = \"block3_sepconv1\"\n",
+    "layer = model.get_layer(name=layer_name)\n",
+    "feature_extractor = keras.Model(inputs=model.input, outputs=layer.output)\n",
+    "\n",
+    "def compute_loss(image, filter_index):\n",
+    "    activation = feature_extractor(image)\n",
+    "    filter_activation = activation[:, 2:-2, 2:-2, filter_index]\n",
+    "    return ops.mean(filter_activation)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import jax\n",
+    "\n",
+    "grad_fn = jax.grad(compute_loss)\n",
+    "\n",
+    "@jax.jit\n",
+    "def gradient_ascent_step(image, filter_index, learning_rate):\n",
+    "    grads = grad_fn(image, filter_index)\n",
+    "    grads = ops.normalize(grads)\n",
+    "    image += learning_rate * grads\n",
+    "    return image"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### The filter visualization loop"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "img_width = 200\n",
+    "img_height = 200\n",
+    "\n",
+    "def generate_filter_pattern(filter_index):\n",
+    "    iterations = 30\n",
+    "    learning_rate = 10.0\n",
+    "    image = keras.random.uniform(\n",
+    "        minval=0.4, maxval=0.6, shape=(1, img_width, img_height, 3)\n",
+    "    )\n",
+    "    for i in range(iterations):\n",
+    "        image = gradient_ascent_step(image, filter_index, learning_rate)\n",
+    "    return image[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def deprocess_image(image):\n",
+    "    image -= ops.mean(image)\n",
+    "    image /= ops.std(image)\n",
+    "    image *= 64\n",
+    "    image += 128\n",
+    "    image = ops.clip(image, 0, 255)\n",
+    "    image = image[25:-25, 25:-25, :]\n",
+    "    image = ops.cast(image, dtype=\"uint8\")\n",
+    "    return ops.convert_to_numpy(image)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "plt.axis(\"off\")\n",
+    "plt.imshow(deprocess_image(generate_filter_pattern(filter_index=2)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "all_images = []\n",
+    "for filter_index in range(64):\n",
+    "    print(f\"Processing filter {filter_index}\")\n",
+    "    image = deprocess_image(generate_filter_pattern(filter_index))\n",
+    "    all_images.append(image)\n",
+    "\n",
+    "margin = 5\n",
+    "n = 8\n",
+    "box_width = img_width - 25 * 2\n",
+    "box_height = img_height - 25 * 2\n",
+    "full_width = n * box_width + (n - 1) * margin\n",
+    "full_height = n * box_height + (n - 1) * margin\n",
+    "stitched_filters = np.zeros((full_width, full_height, 3))\n",
+    "\n",
+    "for i in range(n):\n",
+    "    for j in range(n):\n",
+    "        image = all_images[i * n + j]\n",
+    "        stitched_filters[\n",
+    "            (box_width + margin) * i : (box_width + margin) * i + box_width,\n",
+    "            (box_height + margin) * j : (box_height + margin) * j + box_height,\n",
+    "            :,\n",
+    "        ] = image\n",
+    "\n",
+    "keras.utils.save_img(f\"filters_for_layer_{layer_name}.png\", stitched_filters)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Visualizing heatmaps of class activation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "img_path = keras.utils.get_file(\n",
+    "    fname=\"elephant.jpg\",\n",
+    "    origin=\"https://img-datasets.s3.amazonaws.com/elephant.jpg\",\n",
+    ")\n",
+    "\n",
+    "def get_img_array(img_path, target_size):\n",
+    "    img = keras.utils.load_img(img_path, target_size=target_size)\n",
+    "    array = keras.utils.img_to_array(img)\n",
+    "    array = np.expand_dims(array, axis=0)\n",
+    "    array = keras.applications.xception.preprocess_input(array)\n",
+    "    return array\n",
+    "\n",
+    "img_array = get_img_array(img_path, target_size=(299, 299))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = keras.applications.Xception(weights=\"imagenet\")\n",
+    "preds = model.predict(img_array)\n",
+    "print(keras.applications.xception.decode_predictions(preds, top=3)[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "np.argmax(preds[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "last_conv_layer_name = \"block14_sepconv2_act\"\n",
+    "classifier_layer_names = [\n",
+    "    \"avg_pool\",\n",
+    "    \"predictions\",\n",
+    "]\n",
+    "last_conv_layer = model.get_layer(last_conv_layer_name)\n",
+    "last_conv_layer_model = keras.Model(model.inputs, last_conv_layer.output)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "classifier_input = keras.Input(shape=last_conv_layer.output.shape[1:])\n",
+    "x = classifier_input\n",
+    "for layer_name in classifier_layer_names:\n",
+    "    x = model.get_layer(layer_name)(x)\n",
+    "classifier_model = keras.Model(classifier_input, x)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Getting the gradient of the top class: TensorFlow version"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "keras.config.set_backend(\"tensorflow\")\n",
+    "import keras\n",
+    "from keras import ops\n",
+    "\n",
+    "model = keras.applications.Xception(weights=\"imagenet\")\n",
+    "\n",
+    "last_conv_layer_name = \"block14_sepconv2_act\"\n",
+    "classifier_layer_names = [\n",
+    "    \"avg_pool\",\n",
+    "    \"predictions\",\n",
+    "]\n",
+    "last_conv_layer = model.get_layer(last_conv_layer_name)\n",
+    "last_conv_layer_model = keras.Model(model.inputs, last_conv_layer.output)\n",
+    "\n",
+    "classifier_input = keras.Input(shape=last_conv_layer.output.shape[1:])\n",
+    "x = classifier_input\n",
+    "for layer_name in classifier_layer_names:\n",
+    "    x = model.get_layer(layer_name)(x)\n",
+    "classifier_model = keras.Model(classifier_input, x)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf\n",
+    "\n",
+    "def get_top_class_gradients(img_array):\n",
+    "    last_conv_layer_output = last_conv_layer_model(img_array)\n",
+    "    with tf.GradientTape() as tape:\n",
+    "        tape.watch(last_conv_layer_output)\n",
+    "        preds = classifier_model(last_conv_layer_output)\n",
+    "        top_pred_index = ops.argmax(preds[0])\n",
+    "        top_class_channel = preds[:, top_pred_index]\n",
+    "\n",
+    "    grads = tape.gradient(top_class_channel, last_conv_layer_output)\n",
+    "    return grads, last_conv_layer_output\n",
+    "\n",
+    "grads, last_conv_layer_output = get_top_class_gradients(img_array)\n",
+    "grads = ops.convert_to_numpy(grads)\n",
+    "last_conv_layer_output = ops.convert_to_numpy(last_conv_layer_output)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Getting the gradient of the top class: PyTorch version"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "keras.config.set_backend(\"torch\")\n",
+    "import keras\n",
+    "from keras import ops\n",
+    "\n",
+    "model = keras.applications.Xception(weights=\"imagenet\")\n",
+    "\n",
+    "last_conv_layer_name = \"block14_sepconv2_act\"\n",
+    "classifier_layer_names = [\n",
+    "    \"avg_pool\",\n",
+    "    \"predictions\",\n",
+    "]\n",
+    "last_conv_layer = model.get_layer(last_conv_layer_name)\n",
+    "last_conv_layer_model = keras.Model(model.inputs, last_conv_layer.output)\n",
+    "\n",
+    "classifier_input = keras.Input(shape=last_conv_layer.output.shape[1:])\n",
+    "x = classifier_input\n",
+    "for layer_name in classifier_layer_names:\n",
+    "    x = model.get_layer(layer_name)(x)\n",
+    "classifier_model = keras.Model(classifier_input, x)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def get_top_class_gradients(img_array):\n",
+    "    last_conv_layer_output = last_conv_layer_model(img_array)\n",
+    "    last_conv_layer_output = (\n",
+    "        last_conv_layer_output.clone().detach().requires_grad_(True)\n",
+    "    )\n",
+    "    preds = classifier_model(last_conv_layer_output)\n",
+    "    top_pred_index = ops.argmax(preds[0])\n",
+    "    top_class_channel = preds[:, top_pred_index]\n",
+    "    top_class_channel.backward()\n",
+    "    grads = last_conv_layer_output.grad\n",
+    "    return grads, last_conv_layer_output\n",
+    "\n",
+    "grads, last_conv_layer_output = get_top_class_gradients(img_array)\n",
+    "grads = ops.convert_to_numpy(grads)\n",
+    "last_conv_layer_output = ops.convert_to_numpy(last_conv_layer_output)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Getting the gradient of the top class: JAX version"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "keras.config.set_backend(\"jax\")\n",
+    "import keras\n",
+    "from keras import ops\n",
+    "\n",
+    "model = keras.applications.Xception(weights=\"imagenet\")\n",
+    "\n",
+    "last_conv_layer_name = \"block14_sepconv2_act\"\n",
+    "classifier_layer_names = [\n",
+    "    \"avg_pool\",\n",
+    "    \"predictions\",\n",
+    "]\n",
+    "last_conv_layer = model.get_layer(last_conv_layer_name)\n",
+    "last_conv_layer_model = keras.Model(model.inputs, last_conv_layer.output)\n",
+    "\n",
+    "classifier_input = keras.Input(shape=last_conv_layer.output.shape[1:])\n",
+    "x = classifier_input\n",
+    "for layer_name in classifier_layer_names:\n",
+    "    x = model.get_layer(layer_name)(x)\n",
+    "classifier_model = keras.Model(classifier_input, x)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import jax\n",
+    "\n",
+    "def loss_fn(last_conv_layer_output):\n",
+    "    preds = classifier_model(last_conv_layer_output)\n",
+    "    top_pred_index = ops.argmax(preds[0])\n",
+    "    top_class_channel = preds[:, top_pred_index]\n",
+    "    return top_class_channel[0]\n",
+    "\n",
+    "grad_fn = jax.grad(loss_fn)\n",
+    "\n",
+    "def get_top_class_gradients(img_array):\n",
+    "    last_conv_layer_output = last_conv_layer_model(img_array)\n",
+    "    grads = grad_fn(last_conv_layer_output)\n",
+    "    return grads, last_conv_layer_output\n",
+    "\n",
+    "grads, last_conv_layer_output = get_top_class_gradients(img_array)\n",
+    "grads = ops.convert_to_numpy(grads)\n",
+    "last_conv_layer_output = ops.convert_to_numpy(last_conv_layer_output)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Displaying the class activation heatmap"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "pooled_grads = np.mean(grads, axis=(0, 1, 2))\n",
+    "last_conv_layer_output = last_conv_layer_output[0].copy()\n",
+    "for i in range(pooled_grads.shape[-1]):\n",
+    "    last_conv_layer_output[:, :, i] *= pooled_grads[i]\n",
+    "heatmap = np.mean(last_conv_layer_output, axis=-1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "heatmap = np.maximum(heatmap, 0)\n",
+    "heatmap /= np.max(heatmap)\n",
+    "plt.matshow(heatmap)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import matplotlib.cm as cm\n",
+    "\n",
+    "img = keras.utils.load_img(img_path)\n",
+    "img = keras.utils.img_to_array(img)\n",
+    "\n",
+    "heatmap = np.uint8(255 * heatmap)\n",
+    "\n",
+    "jet = cm.get_cmap(\"jet\")\n",
+    "jet_colors = jet(np.arange(256))[:, :3]\n",
+    "jet_heatmap = jet_colors[heatmap]\n",
+    "\n",
+    "jet_heatmap = keras.utils.array_to_img(jet_heatmap)\n",
+    "jet_heatmap = jet_heatmap.resize((img.shape[1], img.shape[0]))\n",
+    "jet_heatmap = keras.utils.img_to_array(jet_heatmap)\n",
+    "\n",
+    "superimposed_img = jet_heatmap * 0.4 + img\n",
+    "superimposed_img = keras.utils.array_to_img(superimposed_img)\n",
+    "\n",
+    "save_path = \"elephant_cam.jpg\"\n",
+    "superimposed_img.save(save_path)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Visualizing the latent space of a convnet"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Chapter summary"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "collapsed_sections": [],
+   "name": "chapter10_interpreting-what-vision-models-learn",
+   "private_outputs": false,
+   "provenance": [],
+   "toc_visible": true
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/chapter11_image-segmentation.ipynb b/chapter11_image-segmentation.ipynb
new file mode 100644
index 0000000000..315c7b5f5c
--- /dev/null
+++ b/chapter11_image-segmentation.ipynb
@@ -0,0 +1,659 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install keras-nightly --upgrade -q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.environ[\"KERAS_BACKEND\"] = \"jax\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### A universe of computer vision tasks"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Image segmentation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Training a small semantic segmentation model from scratch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "!wget http://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz\n",
+    "!wget http://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz\n",
+    "!tar -xf images.tar.gz\n",
+    "!tar -xf annotations.tar.gz"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "input_dir = \"images/\"\n",
+    "target_dir = \"annotations/trimaps/\"\n",
+    "\n",
+    "input_img_paths = sorted(\n",
+    "    [\n",
+    "        os.path.join(input_dir, fname)\n",
+    "        for fname in os.listdir(input_dir)\n",
+    "        if fname.endswith(\".jpg\")\n",
+    "    ]\n",
+    ")\n",
+    "target_paths = sorted(\n",
+    "    [\n",
+    "        os.path.join(target_dir, fname)\n",
+    "        for fname in os.listdir(target_dir)\n",
+    "        if fname.endswith(\".png\") and not fname.startswith(\".\")\n",
+    "    ]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "from keras.utils import load_img, img_to_array\n",
+    "\n",
+    "plt.axis(\"off\")\n",
+    "plt.imshow(load_img(input_img_paths[9]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def display_target(target_array):\n",
+    "    normalized_array = (target_array.astype(\"uint8\") - 1) * 127\n",
+    "    plt.axis(\"off\")\n",
+    "    plt.imshow(normalized_array[:, :, 0])\n",
+    "\n",
+    "img = img_to_array(load_img(target_paths[9], color_mode=\"grayscale\"))\n",
+    "display_target(img)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import random\n",
+    "\n",
+    "img_size = (200, 200)\n",
+    "num_imgs = len(input_img_paths)\n",
+    "\n",
+    "random.Random(1337).shuffle(input_img_paths)\n",
+    "random.Random(1337).shuffle(target_paths)\n",
+    "\n",
+    "def path_to_input_image(path):\n",
+    "    return img_to_array(load_img(path, target_size=img_size))\n",
+    "\n",
+    "def path_to_target(path):\n",
+    "    img = img_to_array(\n",
+    "        load_img(path, target_size=img_size, color_mode=\"grayscale\")\n",
+    "    )\n",
+    "    img = img.astype(\"uint8\") - 1\n",
+    "    return img\n",
+    "\n",
+    "input_imgs = np.zeros((num_imgs,) + img_size + (3,), dtype=\"float32\")\n",
+    "targets = np.zeros((num_imgs,) + img_size + (1,), dtype=\"uint8\")\n",
+    "for i in range(num_imgs):\n",
+    "    input_imgs[i] = path_to_input_image(input_img_paths[i])\n",
+    "    targets[i] = path_to_target(target_paths[i])\n",
+    "\n",
+    "num_val_samples = 1000\n",
+    "train_input_imgs = input_imgs[:-num_val_samples]\n",
+    "train_targets = targets[:-num_val_samples]\n",
+    "val_input_imgs = input_imgs[-num_val_samples:]\n",
+    "val_targets = targets[-num_val_samples:]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import keras\n",
+    "from keras.layers import Rescaling, Conv2D, Conv2DTranspose\n",
+    "\n",
+    "def get_model(img_size, num_classes):\n",
+    "    inputs = keras.Input(shape=img_size + (3,))\n",
+    "    x = Rescaling(1.0 / 255)(inputs)\n",
+    "\n",
+    "    x = Conv2D(64, 3, strides=2, activation=\"relu\", padding=\"same\")(x)\n",
+    "    x = Conv2D(64, 3, activation=\"relu\", padding=\"same\")(x)\n",
+    "    x = Conv2D(128, 3, strides=2, activation=\"relu\", padding=\"same\")(x)\n",
+    "    x = Conv2D(128, 3, activation=\"relu\", padding=\"same\")(x)\n",
+    "    x = Conv2D(256, 3, strides=2, padding=\"same\", activation=\"relu\")(x)\n",
+    "    x = Conv2D(256, 3, activation=\"relu\", padding=\"same\")(x)\n",
+    "\n",
+    "    x = Conv2DTranspose(256, 3, activation=\"relu\", padding=\"same\")(x)\n",
+    "    x = Conv2DTranspose(256, 3, strides=2, activation=\"relu\", padding=\"same\")(x)\n",
+    "    x = Conv2DTranspose(128, 3, activation=\"relu\", padding=\"same\")(x)\n",
+    "    x = Conv2DTranspose(128, 3, strides=2, activation=\"relu\", padding=\"same\")(x)\n",
+    "    x = Conv2DTranspose(64, 3, activation=\"relu\", padding=\"same\")(x)\n",
+    "    x = Conv2DTranspose(64, 3, strides=2, activation=\"relu\", padding=\"same\")(x)\n",
+    "\n",
+    "    outputs = Conv2D(num_classes, 3, activation=\"softmax\", padding=\"same\")(x)\n",
+    "\n",
+    "    return keras.Model(inputs, outputs)\n",
+    "\n",
+    "model = get_model(img_size=img_size, num_classes=3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.summary(line_length=80)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "foreground_iou = keras.metrics.IoU(\n",
+    "    num_classes=3,\n",
+    "    target_class_ids=(0,),\n",
+    "    name=\"foreground_iou\",\n",
+    "    sparse_y_true=True,\n",
+    "    sparse_y_pred=False,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.compile(\n",
+    "    optimizer=\"adam\",\n",
+    "    loss=\"sparse_categorical_crossentropy\",\n",
+    "    metrics=[foreground_iou],\n",
+    ")\n",
+    "callbacks = [\n",
+    "    keras.callbacks.ModelCheckpoint(\n",
+    "        \"oxford_segmentation.keras\",\n",
+    "        save_best_only=True,\n",
+    "    ),\n",
+    "]\n",
+    "history = model.fit(\n",
+    "    train_input_imgs,\n",
+    "    train_targets,\n",
+    "    epochs=50,\n",
+    "    callbacks=callbacks,\n",
+    "    batch_size=64,\n",
+    "    validation_data=(val_input_imgs, val_targets),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "epochs = range(1, len(history.history[\"loss\"]) + 1)\n",
+    "loss = history.history[\"loss\"]\n",
+    "val_loss = history.history[\"val_loss\"]\n",
+    "plt.figure()\n",
+    "plt.plot(epochs, loss, \"r--\", label=\"Training loss\")\n",
+    "plt.plot(epochs, val_loss, \"b\", label=\"Validation loss\")\n",
+    "plt.title(\"Training and validation loss\")\n",
+    "plt.legend()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from keras.utils import array_to_img\n",
+    "\n",
+    "model = keras.models.load_model(\"oxford_segmentation.keras\")\n",
+    "\n",
+    "i = 4\n",
+    "test_image = val_input_imgs[i]\n",
+    "plt.axis(\"off\")\n",
+    "plt.imshow(array_to_img(test_image))\n",
+    "\n",
+    "mask = model.predict(np.expand_dims(test_image, 0))[0]\n",
+    "\n",
+    "def display_mask(pred):\n",
+    "    mask = np.argmax(pred, axis=-1)\n",
+    "    mask *= 127\n",
+    "    plt.axis(\"off\")\n",
+    "    plt.imshow(mask)\n",
+    "\n",
+    "display_mask(mask)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Image segmentation with a pretrained model: Segment Anything"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Prepare a test image"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "path = keras.utils.get_file(\n",
+    "    origin=\"https://s3.amazonaws.com/keras.io/img/book/fruits.jpg\"\n",
+    ")\n",
+    "pil_image = keras.utils.load_img(path)\n",
+    "image_array = keras.utils.img_to_array(pil_image)\n",
+    "\n",
+    "plt.imshow(image_array.astype(\"uint8\"))\n",
+    "plt.axis(\"off\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from keras import ops\n",
+    "\n",
+    "image_size = (1024, 1024)\n",
+    "\n",
+    "def resize_and_pad(x):\n",
+    "    return ops.image.resize(x, image_size, pad_to_aspect_ratio=True)\n",
+    "\n",
+    "image = resize_and_pad(image_array)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Get the pretrained SAM model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import keras_hub\n",
+    "\n",
+    "model = keras_hub.models.SAMImageSegmenter.from_preset(\"sam_huge_sa1b\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "from keras import ops\n",
+    "\n",
+    "def show_image(image, ax):\n",
+    "    ax.imshow(ops.convert_to_numpy(image).astype(\"uint8\"))\n",
+    "\n",
+    "def show_mask(mask, ax):\n",
+    "    color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6])\n",
+    "    h, w, _ = mask.shape\n",
+    "    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)\n",
+    "    ax.imshow(mask_image)\n",
+    "\n",
+    "def show_points(coords, labels, ax, marker_size=375):\n",
+    "    pos_points = coords[labels == 1]\n",
+    "    neg_points = coords[labels == 0]\n",
+    "    ax.scatter(\n",
+    "        pos_points[:, 0],\n",
+    "        pos_points[:, 1],\n",
+    "        color=\"green\",\n",
+    "        marker=\"*\",\n",
+    "        s=marker_size,\n",
+    "        edgecolor=\"white\",\n",
+    "        linewidth=1.25,\n",
+    "    )\n",
+    "    ax.scatter(\n",
+    "        neg_points[:, 0],\n",
+    "        neg_points[:, 1],\n",
+    "        color=\"red\",\n",
+    "        marker=\"*\",\n",
+    "        s=marker_size,\n",
+    "        edgecolor=\"white\",\n",
+    "        linewidth=1.25,\n",
+    "    )\n",
+    "\n",
+    "def show_box(box, ax):\n",
+    "    box = box.reshape(-1)\n",
+    "    x0, y0 = box[0], box[1]\n",
+    "    w, h = box[2] - box[0], box[3] - box[1]\n",
+    "    ax.add_patch(plt.Rectangle((x0, y0), w, h, ec=\"green\", fc=\"black\", lw=2))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Prompting SAM with a target point"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "input_point = np.array([[580, 450]])\n",
+    "input_label = np.array([1])\n",
+    "\n",
+    "plt.figure(figsize=(10, 10))\n",
+    "show_image(image, plt.gca())\n",
+    "show_points(input_point, input_label, plt.gca())\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "outputs = model.predict(\n",
+    "    {\n",
+    "        \"images\": ops.expand_dims(image, axis=0),\n",
+    "        \"points\": ops.expand_dims(input_point, axis=0),\n",
+    "        \"labels\": ops.expand_dims(input_label, axis=0),\n",
+    "    }\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "outputs[\"masks\"].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def get_mask(sam_outputs, index=0):\n",
+    "    mask = outputs[\"masks\"][0][index]\n",
+    "    mask = np.expand_dims(mask, axis=-1)\n",
+    "    mask = resize_and_pad(mask)\n",
+    "    return ops.convert_to_numpy(mask) > 0.0\n",
+    "\n",
+    "mask = get_mask(outputs, index=0)\n",
+    "\n",
+    "plt.figure(figsize=(10, 10))\n",
+    "show_image(image, plt.gca())\n",
+    "show_mask(mask, plt.gca())\n",
+    "show_points(input_point, input_label, plt.gca())\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "input_point = np.array([[300, 550]])\n",
+    "input_label = np.array([1])\n",
+    "\n",
+    "outputs = model.predict(\n",
+    "    {\n",
+    "        \"images\": ops.expand_dims(image, axis=0),\n",
+    "        \"points\": ops.expand_dims(input_point, axis=0),\n",
+    "        \"labels\": ops.expand_dims(input_label, axis=0),\n",
+    "    }\n",
+    ")\n",
+    "mask = get_mask(outputs, index=0)\n",
+    "\n",
+    "plt.figure(figsize=(10, 10))\n",
+    "show_image(image, plt.gca())\n",
+    "show_mask(mask, plt.gca())\n",
+    "show_points(input_point, input_label, plt.gca())\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "fig, axes = plt.subplots(1, 3, figsize=(20, 60))\n",
+    "masks = outputs[\"masks\"][0][1:]\n",
+    "for i, mask in enumerate(masks):\n",
+    "    show_image(image, axes[i])\n",
+    "    show_points(input_point, input_label, axes[i])\n",
+    "    mask = get_mask(outputs, index=i + 1)\n",
+    "    show_mask(mask, axes[i])\n",
+    "    axes[i].set_title(f\"Mask {i + 1}\", fontsize=16)\n",
+    "    axes[i].axis(\"off\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Prompting SAM with a target box"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "input_box = np.array(\n",
+    "    [\n",
+    "        [520, 180],\n",
+    "        [770, 420],\n",
+    "    ]\n",
+    ")\n",
+    "\n",
+    "plt.figure(figsize=(10, 10))\n",
+    "show_image(image, plt.gca())\n",
+    "show_box(input_box, plt.gca())\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "outputs = model.predict(\n",
+    "    {\n",
+    "        \"images\": ops.expand_dims(image, axis=0),\n",
+    "        \"boxes\": ops.expand_dims(input_box, axis=(0, 1)),\n",
+    "    }\n",
+    ")\n",
+    "mask = get_mask(outputs, 0)\n",
+    "plt.figure(figsize=(10, 10))\n",
+    "show_image(image, plt.gca())\n",
+    "show_mask(mask, plt.gca())\n",
+    "show_box(input_box, plt.gca())\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Chapter summary"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "collapsed_sections": [],
+   "name": "chapter11_image-segmentation",
+   "private_outputs": false,
+   "provenance": [],
+   "toc_visible": true
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/chapter12_object-detection.ipynb b/chapter12_object-detection.ipynb
new file mode 100644
index 0000000000..c97b75f870
--- /dev/null
+++ b/chapter12_object-detection.ipynb
@@ -0,0 +1,506 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install keras-nightly --upgrade -q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.environ[\"KERAS_BACKEND\"] = \"jax\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Two families of object detection models"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### The R-CNN architecture"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Single-stage detectors"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Object detection with a pretrained model: RetinaNet"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Introducing the dataset: Pascal VOC"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "!wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar\n",
+    "!wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar\n",
+    "\n",
+    "!tar -xf VOCtrainval_06-Nov-2007.tar\n",
+    "!tar -xf VOCtest_06-Nov-2007.tar"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import numpy as np\n",
+    "import xml.etree.ElementTree as ET\n",
+    "import tensorflow as tf\n",
+    "\n",
+    "BASE_DIR = os.path.join(os.getcwd(), \"VOCdevkit\", \"VOC2007\")\n",
+    "IMAGE_DIR = os.path.join(BASE_DIR, \"JPEGImages\")\n",
+    "ANNOTATION_DIR = os.path.join(BASE_DIR, \"Annotations\")\n",
+    "IMAGESET_DIR = os.path.join(BASE_DIR, \"ImageSets\", \"Main\")\n",
+    "CLASSES = {\n",
+    "    0: \"aeroplane\",\n",
+    "    1: \"bicycle\",\n",
+    "    2: \"bird\",\n",
+    "    3: \"boat\",\n",
+    "    4: \"bottle\",\n",
+    "    5: \"bus\",\n",
+    "    6: \"car\",\n",
+    "    7: \"cat\",\n",
+    "    8: \"chair\",\n",
+    "    9: \"cow\",\n",
+    "    10: \"diningtable\",\n",
+    "    11: \"dog\",\n",
+    "    12: \"horse\",\n",
+    "    13: \"motorbike\",\n",
+    "    14: \"person\",\n",
+    "    15: \"pottedplant\",\n",
+    "    16: \"sheep\",\n",
+    "    17: \"sofa\",\n",
+    "    18: \"train\",\n",
+    "    19: \"tvmonitor\",\n",
+    "}\n",
+    "\n",
+    "def parse_annotation(path):\n",
+    "    tree = ET.parse(path)\n",
+    "    root = tree.getroot()\n",
+    "    bboxes = []\n",
+    "    labels = []\n",
+    "\n",
+    "    for obj in root.findall(\"object\"):\n",
+    "        name = obj.find(\"name\").text\n",
+    "        difficult = int(obj.find(\"difficult\").text)\n",
+    "        if difficult:\n",
+    "            continue\n",
+    "\n",
+    "        bbox = obj.find(\"bndbox\")\n",
+    "        size = root.find(\"size\")\n",
+    "        width = float(size.find(\"width\").text)\n",
+    "        height = float(size.find(\"height\").text)\n",
+    "\n",
+    "        xmin = float(bbox.find(\"xmin\").text) / width\n",
+    "        ymin = float(bbox.find(\"ymin\").text) / height\n",
+    "        xmax = float(bbox.find(\"xmax\").text) / width\n",
+    "        ymax = float(bbox.find(\"ymax\").text) / height\n",
+    "        bboxes.append([ymin, xmin, ymax, xmax])\n",
+    "\n",
+    "        class_idx = [k for k, v in CLASSES.items() if v == name][0]\n",
+    "        labels.append(class_idx)\n",
+    "    bboxes = tf.constant(bboxes, dtype=tf.float32)\n",
+    "    labels = tf.constant(labels, dtype=tf.float32)\n",
+    "    return bboxes, labels\n",
+    "\n",
+    "def process_example(image_id):\n",
+    "    image_id = tf.compat.as_str_any(image_id.numpy())\n",
+    "    image_path = os.path.join(IMAGE_DIR, f\"{image_id.rstrip()}.jpg\")\n",
+    "    image_data = tf.io.read_file(image_path)\n",
+    "    image = tf.io.decode_jpeg(image_data, channels=3)\n",
+    "    path = os.path.join(ANNOTATION_DIR, f\"{image_id.rstrip()}.xml\")\n",
+    "    bboxes, labels = parse_annotation(path)\n",
+    "    return image, bboxes, labels"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def get_dataset(split, shuffle_files=True, shuffle_buffer_size=1000):\n",
+    "    split_file = os.path.join(IMAGESET_DIR, f\"{split}.txt\")\n",
+    "    with open(split_file, \"r\") as f:\n",
+    "        image_ids = [x.strip() for x in f.readlines()]\n",
+    "\n",
+    "    ds = tf.data.Dataset.from_tensor_slices(image_ids)\n",
+    "\n",
+    "    if shuffle_files:\n",
+    "        ds = ds.shuffle(shuffle_buffer_size)\n",
+    "\n",
+    "    ds = ds.map(\n",
+    "        lambda x: tf.py_function(\n",
+    "            func=process_example, inp=[x], Tout=[tf.uint8, tf.float32, tf.int64]\n",
+    "        ),\n",
+    "        num_parallel_calls=tf.data.AUTOTUNE,\n",
+    "    )\n",
+    "    ds = ds.map(\n",
+    "        lambda image, bbox, label: {\n",
+    "            \"image\": tf.ensure_shape(image, [None, None, 3]),\n",
+    "            \"objects\": {\n",
+    "                \"bbox\": tf.ensure_shape(bbox, [None, 4]),\n",
+    "                \"label\": tf.ensure_shape(label, [None]),\n",
+    "            },\n",
+    "        }\n",
+    "    )\n",
+    "    return ds.prefetch(tf.data.AUTOTUNE)\n",
+    "\n",
+    "train_ds = get_dataset(\"trainval\", shuffle_files=True)\n",
+    "eval_ds = get_dataset(\"test\", shuffle_files=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "example = next(iter(train_ds))\n",
+    "\n",
+    "plot_bounding_box_gallery(\n",
+    "    np.array([example[\"image\"]]),\n",
+    "    bounding_box_format=\"rel_yxyx\",\n",
+    "    y_true={\n",
+    "        \"boxes\": np.array([example[\"objects\"][\"bbox\"]]),\n",
+    "        \"labels\": np.array([example[\"objects\"][\"label\"]]),\n",
+    "    },\n",
+    "    scale=8,\n",
+    "    class_mapping=CLASSES,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Bounding box formats"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Setting up an image preprocessing and augmentation pipeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import keras\n",
+    "\n",
+    "BBOX_FORMAT = \"yxyx\"\n",
+    "\n",
+    "def parse_record(record):\n",
+    "    image = record[\"image\"]\n",
+    "    h, w = tf.shape(image)[0], tf.shape(image)[1]\n",
+    "    rel_boxes = record[\"objects\"][\"bbox\"]\n",
+    "    abs_boxes = keras.utils.bounding_boxes.convert_format(\n",
+    "        rel_boxes,\n",
+    "        source=\"rel_yxyx\",\n",
+    "        target=BBOX_FORMAT,\n",
+    "        height=h,\n",
+    "        width=w,\n",
+    "    )\n",
+    "    labels = tf.cast(record[\"objects\"][\"label\"], dtype=tf.int32)\n",
+    "    return {\n",
+    "        \"images\": image,\n",
+    "        \"bounding_boxes\": {\n",
+    "            \"boxes\": abs_boxes,\n",
+    "            \"labels\": labels,\n",
+    "        },\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from keras.visualization import plot_bounding_box_gallery\n",
+    "\n",
+    "IMAGE_SIZE = (640, 640)\n",
+    "BATCH_SIZE = 4\n",
+    "\n",
+    "resizing = keras.layers.Resizing(\n",
+    "    height=IMAGE_SIZE[0],\n",
+    "    width=IMAGE_SIZE[1],\n",
+    "    interpolation=\"bilinear\",\n",
+    "    pad_to_aspect_ratio=True,\n",
+    "    bounding_box_format=BBOX_FORMAT,\n",
+    ")\n",
+    "\n",
+    "max_box_layer = keras.layers.MaxNumBoundingBoxes(\n",
+    "    max_number=100,\n",
+    "    bounding_box_format=BBOX_FORMAT,\n",
+    ")\n",
+    "\n",
+    "data_augmentation_layers = [\n",
+    "    keras.layers.RandomFlip(mode=\"horizontal\", bounding_box_format=BBOX_FORMAT),\n",
+    "]\n",
+    "\n",
+    "def prepare_dataset(ds, batch_size=4):\n",
+    "    ds = ds.map(parse_record)\n",
+    "    ds = ds.map(lambda x: resizing(x))\n",
+    "    for layer in data_augmentation_layers:\n",
+    "        ds = ds.map(lambda x: layer(x))\n",
+    "    ds = ds.map(max_box_layer)\n",
+    "    ds = ds.batch(batch_size, drop_remainder=True)\n",
+    "    return ds.prefetch(tf.data.AUTOTUNE)\n",
+    "\n",
+    "train_ds_prepared = prepare_dataset(train_ds, batch_size=BATCH_SIZE)\n",
+    "eval_ds_prepared = prepare_dataset(eval_ds, batch_size=BATCH_SIZE)\n",
+    "\n",
+    "first_images_unprepared = next(iter(train_ds.take(1)))\n",
+    "\n",
+    "plot_bounding_box_gallery(\n",
+    "    np.array([first_images_unprepared[\"image\"]]),\n",
+    "    bounding_box_format=\"rel_yxyx\",\n",
+    "    y_true={\n",
+    "        \"boxes\": np.array([first_images_unprepared[\"objects\"][\"bbox\"]]),\n",
+    "        \"labels\": np.array([first_images_unprepared[\"objects\"][\"label\"]]),\n",
+    "    },\n",
+    "    scale=4,\n",
+    "    class_mapping=CLASSES,\n",
+    ")\n",
+    "\n",
+    "first_images_prepared = next(iter(train_ds_prepared.unbatch().take(1)))\n",
+    "\n",
+    "plot_bounding_box_gallery(\n",
+    "    np.array([first_images_prepared[\"images\"]]),\n",
+    "    bounding_box_format=\"yxyx\",\n",
+    "    y_true={\n",
+    "        \"boxes\": np.array([first_images_prepared[\"bounding_boxes\"][\"boxes\"]]),\n",
+    "        \"labels\": np.array([first_images_prepared[\"bounding_boxes\"][\"labels\"]]),\n",
+    "    },\n",
+    "    scale=4,\n",
+    "    class_mapping=CLASSES,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Fine-tuning the RetinaNet object detection model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import keras_hub\n",
+    "\n",
+    "model = keras_hub.models.ImageObjectDetector.from_preset(\n",
+    "    \"retinanet_resnet50_fpn_coco\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model_with_random_head = keras_hub.models.ImageObjectDetector.from_preset(\n",
+    "    \"retinanet_resnet50_fpn_coco\",\n",
+    "    num_classes=len(CLASSES),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def split_labels(x):\n",
+    "    return (\n",
+    "        x[\"images\"],\n",
+    "        {\n",
+    "            \"boxes\": x[\"bounding_boxes\"][\"boxes\"],\n",
+    "            \"classes\": x[\"bounding_boxes\"][\"labels\"],\n",
+    "        },\n",
+    "    )\n",
+    "\n",
+    "train_ds_prepared = train_ds_prepared.map(split_labels)\n",
+    "eval_ds_prepared = eval_ds_prepared.map(split_labels)\n",
+    "\n",
+    "callbacks = [\n",
+    "    keras.callbacks.ModelCheckpoint(\n",
+    "        \"pascal_voc_detection.keras\",\n",
+    "        save_best_only=True,\n",
+    "        monitor=\"val_loss\",\n",
+    "    )\n",
+    "]\n",
+    "history = model.fit(\n",
+    "    train_ds_prepared,\n",
+    "    validation_data=eval_ds_prepared,\n",
+    "    epochs=10,\n",
+    "    callbacks=callbacks,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Metrics, evaluation, and inference"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "model = keras.models.load_model(\"pascal_voc_detection.keras\")\n",
+    "images, gt_boxes = next(iter(eval_ds_prepared))\n",
+    "predictions = model.predict(images)\n",
+    "\n",
+    "plot_bounding_box_gallery(\n",
+    "    images,\n",
+    "    bounding_box_format=BBOX_FORMAT,\n",
+    "    y_true={\n",
+    "        \"boxes\": gt_boxes[\"boxes\"],\n",
+    "        \"labels\": gt_boxes[\"classes\"],\n",
+    "    },\n",
+    "    y_pred={\n",
+    "        \"boxes\": predictions[\"boxes\"],\n",
+    "        \"labels\": predictions[\"classes\"],\n",
+    "    },\n",
+    "    scale=8,\n",
+    "    class_mapping=CLASSES,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Chapter summary"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "collapsed_sections": [],
+   "name": "chapter12_object-detection",
+   "private_outputs": false,
+   "provenance": [],
+   "toc_visible": true
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/chapter13_timeseries-forecasting.ipynb b/chapter13_timeseries-forecasting.ipynb
new file mode 100644
index 0000000000..d62f3b635e
--- /dev/null
+++ b/chapter13_timeseries-forecasting.ipynb
@@ -0,0 +1,689 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install keras-nightly --upgrade -q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.environ[\"KERAS_BACKEND\"] = \"jax\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Different kinds of timeseries tasks"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### A temperature forecasting example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "!wget https://s3.amazonaws.com/keras-datasets/jena_climate_2009_2016.csv.zip\n",
+    "!unzip jena_climate_2009_2016.csv.zip"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "fname = os.path.join(\"jena_climate_2009_2016.csv\")\n",
+    "\n",
+    "with open(fname) as f:\n",
+    "    data = f.read()\n",
+    "\n",
+    "lines = data.split(\"\\n\")\n",
+    "header = lines[0].split(\",\")\n",
+    "lines = lines[1:]\n",
+    "print(header)\n",
+    "print(len(lines))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "temperature = np.zeros((len(lines),))\n",
+    "raw_data = np.zeros((len(lines), len(header) - 1))\n",
+    "\n",
+    "for i, line in enumerate(lines):\n",
+    "    values = [float(x) for x in line.split(\",\")[1:]]\n",
+    "    temperature[i] = values[1]\n",
+    "    raw_data[i, :] = values[:]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from matplotlib import pyplot as plt\n",
+    "\n",
+    "plt.plot(range(len(temperature)), temperature)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "plt.plot(range(1440), temperature[:1440])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "num_train_samples = int(0.5 * len(raw_data))\n",
+    "num_val_samples = int(0.25 * len(raw_data))\n",
+    "num_test_samples = len(raw_data) - num_train_samples - num_val_samples\n",
+    "print(\"num_train_samples:\", num_train_samples)\n",
+    "print(\"num_val_samples:\", num_val_samples)\n",
+    "print(\"num_test_samples:\", num_test_samples)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Preparing the data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "mean = raw_data[:num_train_samples].mean(axis=0)\n",
+    "raw_data -= mean\n",
+    "std = raw_data[:num_train_samples].std(axis=0)\n",
+    "raw_data /= std"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import keras\n",
+    "\n",
+    "int_sequence = np.arange(10)\n",
+    "dummy_dataset = keras.utils.timeseries_dataset_from_array(\n",
+    "    data=int_sequence[:-3],\n",
+    "    targets=int_sequence[3:],\n",
+    "    sequence_length=3,\n",
+    "    batch_size=2,\n",
+    ")\n",
+    "\n",
+    "for inputs, targets in dummy_dataset:\n",
+    "    for i in range(inputs.shape[0]):\n",
+    "        print([int(x) for x in inputs[i]], int(targets[i]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "sampling_rate = 6\n",
+    "sequence_length = 120\n",
+    "delay = sampling_rate * (sequence_length + 24 - 1)\n",
+    "batch_size = 256\n",
+    "\n",
+    "train_dataset = keras.utils.timeseries_dataset_from_array(\n",
+    "    raw_data[:-delay],\n",
+    "    targets=temperature[delay:],\n",
+    "    sampling_rate=sampling_rate,\n",
+    "    sequence_length=sequence_length,\n",
+    "    shuffle=True,\n",
+    "    batch_size=batch_size,\n",
+    "    start_index=0,\n",
+    "    end_index=num_train_samples,\n",
+    ")\n",
+    "\n",
+    "val_dataset = keras.utils.timeseries_dataset_from_array(\n",
+    "    raw_data[:-delay],\n",
+    "    targets=temperature[delay:],\n",
+    "    sampling_rate=sampling_rate,\n",
+    "    sequence_length=sequence_length,\n",
+    "    shuffle=True,\n",
+    "    batch_size=batch_size,\n",
+    "    start_index=num_train_samples,\n",
+    "    end_index=num_train_samples + num_val_samples,\n",
+    ")\n",
+    "\n",
+    "test_dataset = keras.utils.timeseries_dataset_from_array(\n",
+    "    raw_data[:-delay],\n",
+    "    targets=temperature[delay:],\n",
+    "    sampling_rate=sampling_rate,\n",
+    "    sequence_length=sequence_length,\n",
+    "    shuffle=True,\n",
+    "    batch_size=batch_size,\n",
+    "    start_index=num_train_samples + num_val_samples,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "for samples, targets in train_dataset:\n",
+    "    print(\"samples shape:\", samples.shape)\n",
+    "    print(\"targets shape:\", targets.shape)\n",
+    "    break"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### A common-sense, non-machine-learning baseline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def evaluate_naive_method(dataset):\n",
+    "    total_abs_err = 0.0\n",
+    "    samples_seen = 0\n",
+    "    for samples, targets in dataset:\n",
+    "        preds = samples[:, -1, 1] * std[1] + mean[1]\n",
+    "        total_abs_err += np.sum(np.abs(preds - targets))\n",
+    "        samples_seen += samples.shape[0]\n",
+    "    return total_abs_err / samples_seen\n",
+    "\n",
+    "print(f\"Validation MAE: {evaluate_naive_method(val_dataset):.2f}\")\n",
+    "print(f\"Test MAE: {evaluate_naive_method(test_dataset):.2f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Let's try a basic machine learning model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import keras\n",
+    "from keras import layers\n",
+    "\n",
+    "inputs = keras.Input(shape=(sequence_length, raw_data.shape[-1]))\n",
+    "x = layers.Flatten()(inputs)\n",
+    "x = layers.Dense(16, activation=\"relu\")(x)\n",
+    "outputs = layers.Dense(1)(x)\n",
+    "model = keras.Model(inputs, outputs)\n",
+    "\n",
+    "callbacks = [\n",
+    "    keras.callbacks.ModelCheckpoint(\"jena_dense.keras\", save_best_only=True)\n",
+    "]\n",
+    "model.compile(optimizer=\"adam\", loss=\"mse\", metrics=[\"mae\"])\n",
+    "history = model.fit(\n",
+    "    train_dataset,\n",
+    "    epochs=10,\n",
+    "    validation_data=val_dataset,\n",
+    "    callbacks=callbacks,\n",
+    ")\n",
+    "\n",
+    "model = keras.models.load_model(\"jena_dense.keras\")\n",
+    "print(f\"Test MAE: {model.evaluate(test_dataset)[1]:.2f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "loss = history.history[\"mae\"]\n",
+    "val_loss = history.history[\"val_mae\"]\n",
+    "epochs = range(1, len(loss) + 1)\n",
+    "plt.figure()\n",
+    "plt.plot(epochs, loss, \"r--\", label=\"Training MAE\")\n",
+    "plt.plot(epochs, val_loss, \"b\", label=\"Validation MAE\")\n",
+    "plt.title(\"Training and validation MAE\")\n",
+    "plt.legend()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Let's try a 1D convolutional model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "inputs = keras.Input(shape=(sequence_length, raw_data.shape[-1]))\n",
+    "x = layers.Conv1D(8, 24, activation=\"relu\")(inputs)\n",
+    "x = layers.MaxPooling1D(2)(x)\n",
+    "x = layers.Conv1D(8, 12, activation=\"relu\")(x)\n",
+    "x = layers.MaxPooling1D(2)(x)\n",
+    "x = layers.Conv1D(8, 6, activation=\"relu\")(x)\n",
+    "x = layers.GlobalAveragePooling1D()(x)\n",
+    "outputs = layers.Dense(1)(x)\n",
+    "model = keras.Model(inputs, outputs)\n",
+    "\n",
+    "callbacks = [\n",
+    "    keras.callbacks.ModelCheckpoint(\"jena_conv.keras\", save_best_only=True)\n",
+    "]\n",
+    "model.compile(optimizer=\"adam\", loss=\"mse\", metrics=[\"mae\"])\n",
+    "history = model.fit(\n",
+    "    train_dataset,\n",
+    "    epochs=10,\n",
+    "    validation_data=val_dataset,\n",
+    "    callbacks=callbacks,\n",
+    ")\n",
+    "\n",
+    "model = keras.models.load_model(\"jena_conv.keras\")\n",
+    "print(f\"Test MAE: {model.evaluate(test_dataset)[1]:.2f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### A first recurrent baseline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "inputs = keras.Input(shape=(sequence_length, raw_data.shape[-1]))\n",
+    "x = layers.LSTM(16)(inputs)\n",
+    "outputs = layers.Dense(1)(x)\n",
+    "model = keras.Model(inputs, outputs)\n",
+    "\n",
+    "callbacks = [\n",
+    "    keras.callbacks.ModelCheckpoint(\"jena_lstm.keras\", save_best_only=True)\n",
+    "]\n",
+    "model.compile(optimizer=\"adam\", loss=\"mse\", metrics=[\"mae\"])\n",
+    "history = model.fit(\n",
+    "    train_dataset,\n",
+    "    epochs=10,\n",
+    "    validation_data=val_dataset,\n",
+    "    callbacks=callbacks,\n",
+    ")\n",
+    "\n",
+    "model = keras.models.load_model(\"jena_lstm.keras\")\n",
+    "print(\"Test MAE: {model.evaluate(test_dataset)[1]:.2f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Understanding recurrent neural networks"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "timesteps = 100\n",
+    "input_features = 32\n",
+    "output_features = 64\n",
+    "inputs = np.random.random((timesteps, input_features))\n",
+    "state_t = np.zeros((output_features,))\n",
+    "W = np.random.random((output_features, input_features))\n",
+    "U = np.random.random((output_features, output_features))\n",
+    "b = np.random.random((output_features,))\n",
+    "successive_outputs = []\n",
+    "for input_t in inputs:\n",
+    "    output_t = np.tanh(np.dot(W, input_t) + np.dot(U, state_t) + b)\n",
+    "    successive_outputs.append(output_t)\n",
+    "    state_t = output_t\n",
+    "final_output_sequence = np.concatenate(successive_outputs, axis=0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### A recurrent layer in Keras"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "num_features = 14\n",
+    "inputs = keras.Input(shape=(None, num_features))\n",
+    "outputs = layers.SimpleRNN(16)(inputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "num_features = 14\n",
+    "steps = 120\n",
+    "inputs = keras.Input(shape=(steps, num_features))\n",
+    "outputs = layers.SimpleRNN(16, return_sequences=False)(inputs)\n",
+    "print(outputs.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "num_features = 14\n",
+    "steps = 120\n",
+    "inputs = keras.Input(shape=(steps, num_features))\n",
+    "outputs = layers.SimpleRNN(16, return_sequences=True)(inputs)\n",
+    "print(outputs.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "inputs = keras.Input(shape=(steps, num_features))\n",
+    "x = layers.SimpleRNN(16, return_sequences=True)(inputs)\n",
+    "x = layers.SimpleRNN(16, return_sequences=True)(x)\n",
+    "outputs = layers.SimpleRNN(16)(x)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Getting the most out of recurrent neural networks"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Using recurrent dropout to fight overfitting"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "inputs = keras.Input(shape=(sequence_length, raw_data.shape[-1]))\n",
+    "x = layers.LSTM(32, recurrent_dropout=0.25)(inputs)\n",
+    "x = layers.Dropout(0.5)(x)\n",
+    "outputs = layers.Dense(1)(x)\n",
+    "model = keras.Model(inputs, outputs)\n",
+    "\n",
+    "callbacks = [\n",
+    "    keras.callbacks.ModelCheckpoint(\n",
+    "        \"jena_lstm_dropout.keras\", save_best_only=True\n",
+    "    )\n",
+    "]\n",
+    "model.compile(optimizer=\"adam\", loss=\"mse\", metrics=[\"mae\"])\n",
+    "history = model.fit(\n",
+    "    train_dataset,\n",
+    "    epochs=50,\n",
+    "    validation_data=val_dataset,\n",
+    "    callbacks=callbacks,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Stacking recurrent layers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "inputs = keras.Input(shape=(sequence_length, raw_data.shape[-1]))\n",
+    "x = layers.GRU(32, recurrent_dropout=0.5, return_sequences=True)(inputs)\n",
+    "x = layers.GRU(32, recurrent_dropout=0.5)(x)\n",
+    "x = layers.Dropout(0.5)(x)\n",
+    "outputs = layers.Dense(1)(x)\n",
+    "model = keras.Model(inputs, outputs)\n",
+    "\n",
+    "callbacks = [\n",
+    "    keras.callbacks.ModelCheckpoint(\n",
+    "        \"jena_stacked_gru_dropout.keras\", save_best_only=True\n",
+    "    )\n",
+    "]\n",
+    "model.compile(optimizer=\"adam\", loss=\"mse\", metrics=[\"mae\"])\n",
+    "history = model.fit(\n",
+    "    train_dataset,\n",
+    "    epochs=50,\n",
+    "    validation_data=val_dataset,\n",
+    "    callbacks=callbacks,\n",
+    ")\n",
+    "model = keras.models.load_model(\"jena_stacked_gru_dropout.keras\")\n",
+    "print(f\"Test MAE: {model.evaluate(test_dataset)[1]:.2f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Using bidirectional RNNs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "inputs = keras.Input(shape=(sequence_length, raw_data.shape[-1]))\n",
+    "x = layers.Bidirectional(layers.LSTM(16))(inputs)\n",
+    "outputs = layers.Dense(1)(x)\n",
+    "model = keras.Model(inputs, outputs)\n",
+    "\n",
+    "model.compile(optimizer=\"adam\", loss=\"mse\", metrics=[\"mae\"])\n",
+    "history = model.fit(\n",
+    "    train_dataset,\n",
+    "    epochs=10,\n",
+    "    validation_data=val_dataset,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Going even further"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## Chapter summary"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "collapsed_sections": [],
+   "name": "chapter13_timeseries-forecasting",
+   "private_outputs": false,
+   "provenance": [],
+   "toc_visible": true
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/chapter14_text-classification.ipynb b/chapter14_text-classification.ipynb
new file mode 100644
index 0000000000..5950f1c4df
--- /dev/null
+++ b/chapter14_text-classification.ipynb
@@ -0,0 +1,1400 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install keras-nightly --upgrade -q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.environ[\"KERAS_BACKEND\"] = \"jax\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### A brief history of Natural Language Processing"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Preparing text data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import regex as re\n",
+    "\n",
+    "def split_chars(text):\n",
+    "    return re.findall(r\".\", text)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "chars = split_chars(\"The quick brown fox jumped over the lazy dog.\")\n",
+    "chars[:12]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def split_words(text):\n",
+    "    return re.findall(r\"[\\w]+|[.,!?;]\", text)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "split_words(\"The quick brown fox jumped over the dog.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "vocabulary = {\n",
+    "    \"[UNK]\": 0,\n",
+    "    \"the\": 1,\n",
+    "    \"quick\": 2,\n",
+    "    \"brown\": 3,\n",
+    "    \"fox\": 4,\n",
+    "    \"jumped\": 5,\n",
+    "    \"over\": 6,\n",
+    "    \"dog\": 7,\n",
+    "    \".\": 8,\n",
+    "}\n",
+    "words = split_words(\"The quick brown fox jumped over the lazy dog.\")\n",
+    "indices = [vocabulary.get(word, 0) for word in words]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Character and word tokenization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "class CharTokenizer:\n",
+    "    def __init__(self, vocabulary):\n",
+    "        self.vocabulary = vocabulary\n",
+    "        self.unk_id = vocabulary[\"[UNK]\"]\n",
+    "\n",
+    "    def standardize(self, inputs):\n",
+    "        return inputs.lower()\n",
+    "\n",
+    "    def split(self, inputs):\n",
+    "        return re.findall(r\".\", inputs)\n",
+    "\n",
+    "    def index(self, tokens):\n",
+    "        return [self.vocabulary.get(t, self.unk_id) for t in tokens]\n",
+    "\n",
+    "    def __call__(self, inputs):\n",
+    "        inputs = self.standardize(inputs)\n",
+    "        tokens = self.split(inputs)\n",
+    "        indices = self.index(tokens)\n",
+    "        return indices"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import collections\n",
+    "\n",
+    "def compute_char_vocabulary(inputs, max_size):\n",
+    "    char_counts = collections.Counter()\n",
+    "    for x in inputs:\n",
+    "        x = x.lower()\n",
+    "        tokens = re.findall(r\".\", x)\n",
+    "        char_counts.update(tokens)\n",
+    "    vocabulary = [\"[UNK]\"]\n",
+    "    most_common = char_counts.most_common(max_size - len(vocabulary))\n",
+    "    for token, count in most_common:\n",
+    "        vocabulary.append(token)\n",
+    "    return dict((token, i) for i, token in enumerate(vocabulary))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "class WordTokenizer:\n",
+    "    def __init__(self, vocabulary):\n",
+    "        self.vocabulary = vocabulary\n",
+    "        self.unk_id = vocabulary[\"[UNK]\"]\n",
+    "\n",
+    "    def standardize(self, inputs):\n",
+    "        return inputs.lower()\n",
+    "\n",
+    "    def split(self, inputs):\n",
+    "        return re.findall(r\"[\\w]+|[.,!?;]\", inputs)\n",
+    "\n",
+    "    def index(self, tokens):\n",
+    "        return [self.vocabulary.get(t, self.unk_id) for t in tokens]\n",
+    "\n",
+    "    def __call__(self, inputs):\n",
+    "        inputs = self.standardize(inputs)\n",
+    "        tokens = self.split(inputs)\n",
+    "        indices = self.index(tokens)\n",
+    "        return indices"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def compute_word_vocabulary(inputs, max_size):\n",
+    "    word_counts = collections.Counter()\n",
+    "    for x in inputs:\n",
+    "        x = x.lower()\n",
+    "        tokens = re.findall(r\"[\\w]+|[.,!?;]\", x)\n",
+    "        word_counts.update(tokens)\n",
+    "    vocabulary = [\"[UNK]\"]\n",
+    "    most_common = word_counts.most_common(max_size - len(vocabulary))\n",
+    "    for token, count in most_common:\n",
+    "        vocabulary.append(token)\n",
+    "    return dict((token, i) for i, token in enumerate(vocabulary))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import keras\n",
+    "\n",
+    "filename = keras.utils.get_file(\n",
+    "    origin=\"https://www.gutenberg.org/files/2701/old/moby10b.txt\",\n",
+    ")\n",
+    "moby_dick = list(open(filename, \"r\"))\n",
+    "\n",
+    "vocabulary = compute_char_vocabulary(moby_dick, max_size=100)\n",
+    "char_tokenizer = CharTokenizer(vocabulary)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "print(\"Vocabulary length:\", len(vocabulary))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "print(\"Vocabulary start:\", list(vocabulary.keys())[:10])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "print(\"Vocabulary end:\", list(vocabulary.keys())[-10:])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "print(\"Line length:\", len(char_tokenizer(\n",
+    "   \"Call me Ishmael. Some years ago--never mind how long precisely.\"\n",
+    ")))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "vocabulary = compute_word_vocabulary(moby_dick, max_size=2_000)\n",
+    "word_tokenizer = WordTokenizer(vocabulary)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "print(\"Vocabulary length:\", len(vocabulary))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "print(\"Vocabulary start:\", list(vocabulary.keys())[:5])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "print(\"Vocabulary end:\", list(vocabulary.keys())[-5:])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "print(\"Line length:\", len(word_tokenizer(\n",
+    "   \"Call me Ishmael. Some years ago--never mind how long precisely.\"\n",
+    ")))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Subword tokenization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "data = [\n",
+    "    \"the quick brown fox\",\n",
+    "    \"the slow brown fox\",\n",
+    "    \"the quick brown foxhound\",\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def count_and_split_words(data):\n",
+    "    counts = collections.Counter()\n",
+    "    for line in data:\n",
+    "        line = line.lower()\n",
+    "        for word in re.findall(r\"[\\w]+|[.,!?;]\", line):\n",
+    "            chars = re.findall(r\".\", word)\n",
+    "            split_word = \" \".join(chars)\n",
+    "            counts[split_word] += 1\n",
+    "    return dict(counts)\n",
+    "\n",
+    "counts = count_and_split_words(data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "counts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def count_pairs(counts):\n",
+    "    pairs = collections.Counter()\n",
+    "    for word, freq in counts.items():\n",
+    "        symbols = word.split()\n",
+    "        for pair in zip(symbols[:-1], symbols[1:]):\n",
+    "            pairs[pair] += freq\n",
+    "    return pairs\n",
+    "\n",
+    "def merge_pair(counts, first, second):\n",
+    "    split = re.compile(f\"(?<!\\S){first} {second}(?!\\S)\")\n",
+    "    merged = f\"{first}{second}\"\n",
+    "    return {split.sub(merged, word): count for word, count in counts.items()}\n",
+    "\n",
+    "for i in range(10):\n",
+    "    pairs = count_pairs(counts)\n",
+    "    first, second = max(pairs, key=pairs.get)\n",
+    "    counts = merge_pair(counts, first, second)\n",
+    "    print(list(counts.keys()))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def compute_sub_word_vocabulary(dataset, vocab_size):\n",
+    "    counts = count_and_split_words(dataset)\n",
+    "\n",
+    "    char_counts = collections.Counter()\n",
+    "    for word in counts:\n",
+    "        for char in word.split():\n",
+    "            char_counts[char] += counts[word]\n",
+    "    most_common = char_counts.most_common()\n",
+    "    vocab = [\"[UNK]\"] + [char for char, freq in most_common]\n",
+    "    merges = []\n",
+    "\n",
+    "    while len(vocab) < vocab_size:\n",
+    "        pairs = count_pairs(counts)\n",
+    "        if not pairs:\n",
+    "            break\n",
+    "        first, second = max(pairs, key=pairs.get)\n",
+    "        counts = merge_pair(counts, first, second)\n",
+    "        vocab.append(f\"{first}{second}\")\n",
+    "        merges.append(f\"{first} {second}\")\n",
+    "\n",
+    "    vocab = dict((token, index) for index, token in enumerate(vocab))\n",
+    "    merges = dict((token, rank) for rank, token in enumerate(merges))\n",
+    "    return vocab, merges"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "class SubWordTokenizer:\n",
+    "    def __init__(self, vocabulary, merges):\n",
+    "        self.vocabulary = vocabulary\n",
+    "        self.merges = merges\n",
+    "        self.unk_id = vocabulary[\"[UNK]\"]\n",
+    "\n",
+    "    def standardize(self, inputs):\n",
+    "        return inputs.lower()\n",
+    "\n",
+    "    def bpe_merge(self, word):\n",
+    "        while True:\n",
+    "            pairs = re.findall(r\"(?<!\\S)\\S+ \\S+(?!\\S)\", word, overlapped=True)\n",
+    "            if not pairs:\n",
+    "                break\n",
+    "            best = min(pairs, key=lambda pair: self.merges.get(pair, 1e9))\n",
+    "            if best not in self.merges:\n",
+    "                break\n",
+    "            first, second = best.split()\n",
+    "            split = re.compile(f\"(?<!\\S){first} {second}(?!\\S)\")\n",
+    "            merged = f\"{first}{second}\"\n",
+    "            word = split.sub(merged, word)\n",
+    "        return word\n",
+    "\n",
+    "    def split(self, inputs):\n",
+    "        tokens = []\n",
+    "        for word in re.findall(r\"[\\w]+|[.,!?;]\", inputs):\n",
+    "            word = \" \".join(re.findall(r\".\", word))\n",
+    "            word = self.bpe_merge(word)\n",
+    "            tokens.extend(word.split())\n",
+    "        return tokens\n",
+    "\n",
+    "    def index(self, tokens):\n",
+    "        return [self.vocabulary.get(t, self.unk_id) for t in tokens]\n",
+    "\n",
+    "    def __call__(self, inputs):\n",
+    "        inputs = self.standardize(inputs)\n",
+    "        tokens = self.split(inputs)\n",
+    "        indices = self.index(tokens)\n",
+    "        return indices"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "vocabulary, merges = compute_sub_word_vocabulary(moby_dick, 2_000)\n",
+    "sub_word_tokenizer = SubWordTokenizer(vocabulary, merges)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "print(\"Vocabulary length:\", len(vocabulary))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "print(\"Vocabulary start:\", list(vocabulary.keys())[:10])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "print(\"Vocabulary end:\", list(vocabulary.keys())[-7:])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "print(\"Line length:\", len(sub_word_tokenizer(\n",
+    "   \"Call me Ishmael. Some years ago--never mind how long precisely.\"\n",
+    ")))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Sets vs. Sequences"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Loading the IMDb Classification Dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import os, pathlib, shutil, random\n",
+    "\n",
+    "zip_path = keras.utils.get_file(\n",
+    "    origin=\"https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\",\n",
+    "    fname=\"imdb\",\n",
+    "    extract=True,\n",
+    ")\n",
+    "\n",
+    "imdb_extract_dir = pathlib.Path(zip_path) / \"aclImdb\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "for path in imdb_extract_dir.glob(\"*/*\"):\n",
+    "    if path.is_dir():\n",
+    "        print(path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "print(open(imdb_extract_dir / \"train\" / \"pos\" / \"4077_10.txt\", \"r\").read())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "train_dir = pathlib.Path(\"imdb_train\")\n",
+    "test_dir = pathlib.Path(\"imdb_test\")\n",
+    "val_dir = pathlib.Path(\"imdb_val\")\n",
+    "\n",
+    "shutil.copytree(imdb_extract_dir / \"test\", test_dir)\n",
+    "\n",
+    "val_percentage = 0.2\n",
+    "for category in (\"neg\", \"pos\"):\n",
+    "    src_dir = imdb_extract_dir / \"train\" / category\n",
+    "    src_files = os.listdir(src_dir)\n",
+    "    random.Random(1337).shuffle(src_files)\n",
+    "    num_val_samples = int(len(src_files) * val_percentage)\n",
+    "\n",
+    "    os.makedirs(val_dir / category)\n",
+    "    for file in src_files[:num_val_samples]:\n",
+    "        shutil.copy(src_dir / file, val_dir / category / file)\n",
+    "    os.makedirs(train_dir / category)\n",
+    "    for file in src_files[num_val_samples:]:\n",
+    "        shutil.copy(src_dir / file, train_dir / category / file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "batch_size = 32\n",
+    "train_ds = keras.utils.text_dataset_from_directory(\n",
+    "    train_dir, batch_size=batch_size\n",
+    ")\n",
+    "val_ds = keras.utils.text_dataset_from_directory(val_dir, batch_size=batch_size)\n",
+    "test_ds = keras.utils.text_dataset_from_directory(\n",
+    "    test_dir, batch_size=batch_size\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Set models"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Training a bag-of-words model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from keras import layers\n",
+    "\n",
+    "max_tokens = 20_000\n",
+    "text_vectorization = layers.TextVectorization(\n",
+    "    max_tokens=max_tokens,\n",
+    "    split=\"whitespace\",\n",
+    "    output_mode=\"multi_hot\",\n",
+    ")\n",
+    "train_ds_no_labels = train_ds.map(lambda x, y: x)\n",
+    "text_vectorization.adapt(train_ds_no_labels)\n",
+    "\n",
+    "bag_of_words_train_ds = train_ds.map(\n",
+    "    lambda x, y: (text_vectorization(x), y), num_parallel_calls=8\n",
+    ")\n",
+    "bag_of_words_val_ds = val_ds.map(\n",
+    "    lambda x, y: (text_vectorization(x), y), num_parallel_calls=8\n",
+    ")\n",
+    "bag_of_words_test_ds = test_ds.map(\n",
+    "    lambda x, y: (text_vectorization(x), y), num_parallel_calls=8\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "x, y = next(bag_of_words_train_ds.as_numpy_iterator())\n",
+    "x.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "y.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def build_linear_classifier(max_tokens, name):\n",
+    "    inputs = keras.Input(shape=(max_tokens,))\n",
+    "    outputs = layers.Dense(1, activation=\"sigmoid\")(inputs)\n",
+    "    model = keras.Model(inputs, outputs, name=name)\n",
+    "    model.compile(\n",
+    "        optimizer=\"adam\",\n",
+    "        loss=\"binary_crossentropy\",\n",
+    "        metrics=[\"accuracy\"],\n",
+    "    )\n",
+    "    return model\n",
+    "\n",
+    "model = build_linear_classifier(max_tokens, \"bag_of_words_classifier\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.summary(line_length=80)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "early_stopping = keras.callbacks.EarlyStopping(\n",
+    "    monitor=\"val_loss\",\n",
+    "    restore_best_weights=True,\n",
+    "    patience=2,\n",
+    ")\n",
+    "history = model.fit(\n",
+    "    bag_of_words_train_ds,\n",
+    "    validation_data=bag_of_words_val_ds,\n",
+    "    epochs=10,\n",
+    "    callbacks=[early_stopping],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "accuracy = history.history[\"accuracy\"]\n",
+    "val_accuracy = history.history[\"val_accuracy\"]\n",
+    "epochs = range(1, len(accuracy) + 1)\n",
+    "\n",
+    "plt.plot(epochs, accuracy, \"r--\", label=\"Training accuracy\")\n",
+    "plt.plot(epochs, val_accuracy, \"b\", label=\"Validation accuracy\")\n",
+    "plt.title(\"Training and validation accuracy\")\n",
+    "plt.legend()\n",
+    "plt.savefig(\"bag-of-words-acc.png\", dpi=300)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "test_loss, test_acc = model.evaluate(bag_of_words_test_ds)\n",
+    "test_acc"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Training a bigram model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "max_tokens = 30_000\n",
+    "text_vectorization = layers.TextVectorization(\n",
+    "    max_tokens=max_tokens,\n",
+    "    split=\"whitespace\",\n",
+    "    output_mode=\"multi_hot\",\n",
+    "    ngrams=2,\n",
+    ")\n",
+    "text_vectorization.adapt(train_ds_no_labels)\n",
+    "\n",
+    "bigram_train_ds = train_ds.map(\n",
+    "    lambda x, y: (text_vectorization(x), y), num_parallel_calls=8\n",
+    ")\n",
+    "bigram_val_ds = val_ds.map(\n",
+    "    lambda x, y: (text_vectorization(x), y), num_parallel_calls=8\n",
+    ")\n",
+    "bigram_test_ds = test_ds.map(\n",
+    "    lambda x, y: (text_vectorization(x), y), num_parallel_calls=8\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "x, y = next(bigram_train_ds.as_numpy_iterator())\n",
+    "x.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "text_vectorization.get_vocabulary()[100:108]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = build_linear_classifier(max_tokens, \"bigram_classifier\")\n",
+    "model.fit(\n",
+    "    bigram_train_ds,\n",
+    "    validation_data=bigram_val_ds,\n",
+    "    epochs=10,\n",
+    "    callbacks=[early_stopping],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "test_loss, test_acc = model.evaluate(bigram_test_ds)\n",
+    "test_acc"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Sequence models"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "max_length = 600\n",
+    "max_tokens = 30_000\n",
+    "text_vectorization = layers.TextVectorization(\n",
+    "    max_tokens=max_tokens,\n",
+    "    split=\"whitespace\",\n",
+    "    output_mode=\"int\",\n",
+    "    output_sequence_length=max_length,\n",
+    ")\n",
+    "text_vectorization.adapt(train_ds_no_labels)\n",
+    "\n",
+    "sequence_train_ds = train_ds.map(\n",
+    "    lambda x, y: (text_vectorization(x), y), num_parallel_calls=8\n",
+    ")\n",
+    "sequence_val_ds = val_ds.map(\n",
+    "    lambda x, y: (text_vectorization(x), y), num_parallel_calls=8\n",
+    ")\n",
+    "sequence_test_ds = test_ds.map(\n",
+    "    lambda x, y: (text_vectorization(x), y), num_parallel_calls=8\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "x, y = next(sequence_test_ds.as_numpy_iterator())\n",
+    "x.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "x"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Training a recurrent model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from keras import ops\n",
+    "\n",
+    "class OneHotEncoding(keras.Layer):\n",
+    "    def __init__(self, depth, **kwargs):\n",
+    "        super().__init__(**kwargs)\n",
+    "        self.depth = depth\n",
+    "\n",
+    "    def call(self, inputs):\n",
+    "        flat_inputs = ops.reshape(ops.cast(inputs, \"int\"), [-1])\n",
+    "        one_hot_vectors = ops.eye(self.depth)\n",
+    "        outputs = ops.take(one_hot_vectors, flat_inputs, axis=0)\n",
+    "        return ops.reshape(outputs, ops.shape(inputs) + (self.depth,))\n",
+    "\n",
+    "one_hot_encoding = OneHotEncoding(max_tokens)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "x, y = next(sequence_train_ds.as_numpy_iterator())\n",
+    "one_hot_encoding(x).shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "hidden_dim = 64\n",
+    "inputs = keras.Input(shape=(max_length,), dtype=\"int32\")\n",
+    "x = one_hot_encoding(inputs)\n",
+    "x = layers.Bidirectional(layers.LSTM(hidden_dim))(x)\n",
+    "x = layers.Dropout(0.5)(x)\n",
+    "outputs = layers.Dense(1, activation=\"sigmoid\")(x)\n",
+    "model = keras.Model(inputs, outputs, name=\"lstm_with_one_hot\")\n",
+    "model.compile(\n",
+    "    optimizer=\"adam\",\n",
+    "    loss=\"binary_crossentropy\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.summary(line_length=80)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.fit(\n",
+    "    sequence_train_ds,\n",
+    "    validation_data=sequence_val_ds,\n",
+    "    epochs=10,\n",
+    "    callbacks=[early_stopping],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "test_loss, test_acc = model.evaluate(sequence_test_ds)\n",
+    "test_acc"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Understanding word embeddings"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Using a word embedding"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "hidden_dim = 64\n",
+    "inputs = keras.Input(shape=(max_length,), dtype=\"int32\")\n",
+    "x = keras.layers.Embedding(\n",
+    "    input_dim=max_tokens,\n",
+    "    output_dim=hidden_dim,\n",
+    "    mask_zero=True,\n",
+    ")(inputs)\n",
+    "x = keras.layers.Bidirectional(keras.layers.LSTM(hidden_dim))(x)\n",
+    "x = keras.layers.Dropout(0.5)(x)\n",
+    "outputs = keras.layers.Dense(1, activation=\"sigmoid\")(x)\n",
+    "model = keras.Model(inputs, outputs, name=\"lstm_with_embedding\")\n",
+    "model.compile(\n",
+    "    optimizer=\"adam\",\n",
+    "    loss=\"binary_crossentropy\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.summary(line_length=80)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.fit(\n",
+    "    sequence_train_ds,\n",
+    "    validation_data=sequence_val_ds,\n",
+    "    epochs=10,\n",
+    "    callbacks=[early_stopping],\n",
+    ")\n",
+    "test_loss, test_acc = model.evaluate(sequence_test_ds)\n",
+    "test_acc"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Pretraining a word embedding"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "imdb_vocabulary = text_vectorization.get_vocabulary()\n",
+    "tokenize_no_padding = keras.layers.TextVectorization(\n",
+    "    vocabulary=imdb_vocabulary,\n",
+    "    split=\"whitespace\",\n",
+    "    output_mode=\"int\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf\n",
+    "\n",
+    "context_size = 4\n",
+    "window_size = 9\n",
+    "\n",
+    "def window_data(token_ids):\n",
+    "    num_windows = tf.maximum(tf.size(token_ids) - context_size * 2, 0)\n",
+    "    windows = tf.range(window_size)[None, :]\n",
+    "    windows = windows + tf.range(num_windows)[:, None]\n",
+    "    windowed_tokens = tf.gather(token_ids, windows)\n",
+    "    return tf.data.Dataset.from_tensor_slices(windowed_tokens)\n",
+    "\n",
+    "def split_label(window):\n",
+    "    left = window[:context_size]\n",
+    "    right = window[context_size + 1 :]\n",
+    "    bag = tf.concat((left, right), axis=0)\n",
+    "    label = window[4]\n",
+    "    return bag, label\n",
+    "\n",
+    "dataset = keras.utils.text_dataset_from_directory(\n",
+    "    imdb_extract_dir / \"train\", batch_size=None\n",
+    ")\n",
+    "dataset = dataset.map(lambda x, y: x, num_parallel_calls=8)\n",
+    "dataset = dataset.map(tokenize_no_padding, num_parallel_calls=8)\n",
+    "dataset = dataset.interleave(window_data, cycle_length=8, num_parallel_calls=8)\n",
+    "dataset = dataset.map(split_label, num_parallel_calls=8)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "hidden_dim = 64\n",
+    "inputs = keras.Input(shape=(2 * context_size,))\n",
+    "cbow_embedding = layers.Embedding(\n",
+    "    max_tokens,\n",
+    "    hidden_dim,\n",
+    ")\n",
+    "x = cbow_embedding(inputs)\n",
+    "x = layers.GlobalAveragePooling1D()(x)\n",
+    "outputs = layers.Dense(max_tokens, activation=\"sigmoid\")(x)\n",
+    "cbow_model = keras.Model(inputs, outputs)\n",
+    "cbow_model.compile(\n",
+    "    optimizer=\"adam\",\n",
+    "    loss=\"sparse_categorical_crossentropy\",\n",
+    "    metrics=[\"sparse_categorical_accuracy\"],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "cbow_model.summary(line_length=80)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "dataset = dataset.batch(1024).cache()\n",
+    "cbow_model.fit(dataset, epochs=4)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Using the pretrained embedding for classification"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "inputs = keras.Input(shape=(max_length,))\n",
+    "lstm_embedding = layers.Embedding(\n",
+    "    input_dim=max_tokens,\n",
+    "    output_dim=hidden_dim,\n",
+    "    mask_zero=True,\n",
+    ")\n",
+    "x = lstm_embedding(inputs)\n",
+    "x = layers.Bidirectional(layers.LSTM(hidden_dim))(x)\n",
+    "x = layers.Dropout(0.5)(x)\n",
+    "outputs = layers.Dense(1, activation=\"sigmoid\")(x)\n",
+    "model = keras.Model(inputs, outputs, name=\"lstm_with_cbow\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "lstm_embedding.embeddings.assign(cbow_embedding.embeddings)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.compile(\n",
+    "    optimizer=\"adam\",\n",
+    "    loss=\"binary_crossentropy\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")\n",
+    "model.fit(\n",
+    "    sequence_train_ds,\n",
+    "    validation_data=sequence_val_ds,\n",
+    "    epochs=10,\n",
+    "    callbacks=[early_stopping],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "test_loss, test_acc = model.evaluate(sequence_test_ds)\n",
+    "test_acc"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Chapter Summary"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "collapsed_sections": [],
+   "name": "chapter14_text-classification",
+   "private_outputs": false,
+   "provenance": [],
+   "toc_visible": true
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/chapter15_language-models-and-the-transformer.ipynb b/chapter15_language-models-and-the-transformer.ipynb
new file mode 100644
index 0000000000..c2646b76ea
--- /dev/null
+++ b/chapter15_language-models-and-the-transformer.ipynb
@@ -0,0 +1,1180 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install keras-nightly --upgrade -q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.environ[\"KERAS_BACKEND\"] = \"jax\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### The Language Model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Training a Shakespeare Language Model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import keras\n",
+    "\n",
+    "filename = keras.utils.get_file(\n",
+    "    origin=(\n",
+    "        \"https://storage.googleapis.com/download.tensorflow.org/\"\n",
+    "        \"data/shakespeare.txt\"\n",
+    "    ),\n",
+    ")\n",
+    "shakespeare = open(filename, \"r\").read()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "shakespeare[:250]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf\n",
+    "\n",
+    "sequence_length = 100\n",
+    "\n",
+    "def split_input(input, sequence_length):\n",
+    "    for i in range(0, len(input), sequence_length):\n",
+    "        yield input[i : i + sequence_length]\n",
+    "\n",
+    "features = list(split_input(shakespeare[:-1], sequence_length))\n",
+    "labels = list(split_input(shakespeare[1:], sequence_length))\n",
+    "dataset = tf.data.Dataset.from_tensor_slices((features, labels))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "x, y = next(dataset.as_numpy_iterator())\n",
+    "x[:50], y[:50]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from keras import layers\n",
+    "\n",
+    "tokenizer = layers.TextVectorization(\n",
+    "    standardize=None,\n",
+    "    split=\"character\",\n",
+    "    output_sequence_length=sequence_length,\n",
+    ")\n",
+    "tokenizer.adapt(dataset.map(lambda text, labels: text))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "vocabulary_size = tokenizer.vocabulary_size()\n",
+    "vocabulary_size"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "dataset = dataset.map(\n",
+    "    lambda features, labels: (tokenizer(features), tokenizer(labels)),\n",
+    "    num_parallel_calls=8,\n",
+    ")\n",
+    "training_data = dataset.shuffle(10_000).batch(64).cache()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "embedding_dim = 256\n",
+    "hidden_dim = 1024\n",
+    "\n",
+    "inputs = layers.Input(shape=(sequence_length,), dtype=\"int\", name=\"token_ids\")\n",
+    "x = layers.Embedding(vocabulary_size, embedding_dim)(inputs)\n",
+    "x = layers.GRU(hidden_dim, return_sequences=True)(x)\n",
+    "x = layers.Dropout(0.1)(x)\n",
+    "outputs = layers.Dense(vocabulary_size, activation=\"softmax\")(x)\n",
+    "model = keras.Model(inputs, outputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.summary(line_length=80)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.compile(\n",
+    "    optimizer=\"adam\",\n",
+    "    loss=\"sparse_categorical_crossentropy\",\n",
+    "    metrics=[\"sparse_categorical_accuracy\"],\n",
+    ")\n",
+    "model.fit(training_data, epochs=20)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Generating Shakespeare"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "inputs = keras.Input(shape=(1,), dtype=\"int\", name=\"token_ids\")\n",
+    "input_state = keras.Input(shape=(hidden_dim,), name=\"state\")\n",
+    "\n",
+    "x = layers.Embedding(vocabulary_size, embedding_dim)(inputs)\n",
+    "x, output_state = layers.GRU(hidden_dim, return_state=True)(\n",
+    "    x, initial_state=input_state\n",
+    ")\n",
+    "outputs = layers.Dense(vocabulary_size, activation=\"softmax\")(x)\n",
+    "generation_model = keras.Model(\n",
+    "    inputs=(inputs, input_state),\n",
+    "    outputs=(outputs, output_state),\n",
+    ")\n",
+    "generation_model.set_weights(model.get_weights())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "tokens = tokenizer.get_vocabulary()\n",
+    "token_ids = range(vocabulary_size)\n",
+    "char_to_id = dict(zip(tokens, token_ids))\n",
+    "id_to_char = dict(zip(token_ids, tokens))\n",
+    "\n",
+    "prompt = \"\"\"\n",
+    "KING RICHARD III:\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "input_ids = [char_to_id[c] for c in prompt]\n",
+    "state = keras.ops.zeros(shape=(1, hidden_dim))\n",
+    "for token_id in input_ids:\n",
+    "    inputs = keras.ops.expand_dims([token_id], axis=0)\n",
+    "    predictions, state = generation_model((inputs, state))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "generated_ids = []\n",
+    "max_length = 250\n",
+    "for i in range(max_length):\n",
+    "    next_char = int(np.array(keras.ops.argmax(predictions, axis=-1)[0]))\n",
+    "    generated_ids.append(next_char)\n",
+    "    inputs = keras.ops.expand_dims([next_char], axis=0)\n",
+    "    predictions, state = generation_model((inputs, state))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "output = \"\".join([id_to_char[token_id] for token_id in generated_ids])\n",
+    "print(prompt + output)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Sequence-to-sequence learning"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### English to Spanish Translation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import pathlib\n",
+    "\n",
+    "zip_path = keras.utils.get_file(\n",
+    "    origin=(\n",
+    "        \"http://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip\"\n",
+    "    ),\n",
+    "    extract=True,\n",
+    ")\n",
+    "text_path = pathlib.Path(zip_path).parent / \"spa-eng\" / \"spa.txt\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "with open(text_path) as f:\n",
+    "    lines = f.read().split(\"\\n\")[:-1]\n",
+    "text_pairs = []\n",
+    "for line in lines:\n",
+    "    english, spanish = line.split(\"\\t\")\n",
+    "    spanish = \"[start] \" + spanish + \" [end]\"\n",
+    "    text_pairs.append((english, spanish))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import random\n",
+    "random.choice(text_pairs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import random\n",
+    "\n",
+    "random.shuffle(text_pairs)\n",
+    "val_samples = int(0.15 * len(text_pairs))\n",
+    "train_samples = len(text_pairs) - 2 * val_samples\n",
+    "train_pairs = text_pairs[:train_samples]\n",
+    "val_pairs = text_pairs[train_samples : train_samples + val_samples]\n",
+    "test_pairs = text_pairs[train_samples + val_samples :]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import string\n",
+    "import re\n",
+    "\n",
+    "strip_chars = string.punctuation + \"\u00bf\"\n",
+    "strip_chars = strip_chars.replace(\"[\", \"\")\n",
+    "strip_chars = strip_chars.replace(\"]\", \"\")\n",
+    "\n",
+    "def custom_standardization(input_string):\n",
+    "    lowercase = tf.strings.lower(input_string)\n",
+    "    return tf.strings.regex_replace(\n",
+    "        lowercase, f\"[{re.escape(strip_chars)}]\", \"\"\n",
+    "    )\n",
+    "\n",
+    "vocab_size = 15000\n",
+    "sequence_length = 20\n",
+    "\n",
+    "english_tokenizer = layers.TextVectorization(\n",
+    "    max_tokens=vocab_size,\n",
+    "    output_mode=\"int\",\n",
+    "    output_sequence_length=sequence_length,\n",
+    ")\n",
+    "spanish_tokenizer = layers.TextVectorization(\n",
+    "    max_tokens=vocab_size,\n",
+    "    output_mode=\"int\",\n",
+    "    output_sequence_length=sequence_length + 1,\n",
+    "    standardize=custom_standardization,\n",
+    ")\n",
+    "train_english_texts = [pair[0] for pair in train_pairs]\n",
+    "train_spanish_texts = [pair[1] for pair in train_pairs]\n",
+    "english_tokenizer.adapt(train_english_texts)\n",
+    "spanish_tokenizer.adapt(train_spanish_texts)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "batch_size = 64\n",
+    "\n",
+    "def format_dataset(eng, spa):\n",
+    "    eng = english_tokenizer(eng)\n",
+    "    spa = spanish_tokenizer(spa)\n",
+    "    features = {\"english\": eng, \"spanish\": spa[:, :-1]}\n",
+    "    labels = spa[:, 1:]\n",
+    "    sample_weights = labels != 0\n",
+    "    return features, labels, sample_weights\n",
+    "\n",
+    "def make_dataset(pairs):\n",
+    "    eng_texts, spa_texts = zip(*pairs)\n",
+    "    eng_texts = list(eng_texts)\n",
+    "    spa_texts = list(spa_texts)\n",
+    "    dataset = tf.data.Dataset.from_tensor_slices((eng_texts, spa_texts))\n",
+    "    dataset = dataset.batch(batch_size)\n",
+    "    dataset = dataset.map(format_dataset, num_parallel_calls=4)\n",
+    "    return dataset.shuffle(2048).cache()\n",
+    "\n",
+    "train_ds = make_dataset(train_pairs)\n",
+    "val_ds = make_dataset(val_pairs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "inputs, targets, sample_weights = next(iter(train_ds))\n",
+    "print(inputs['english'].shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "print(inputs['spanish'].shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "print(targets.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "print(sample_weights.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Sequence-to-sequence learning with RNNs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "embed_dim = 256\n",
+    "hidden_dim = 1024\n",
+    "\n",
+    "source = keras.Input(shape=(None,), dtype=\"int32\", name=\"english\")\n",
+    "x = layers.Embedding(vocab_size, embed_dim, mask_zero=True)(source)\n",
+    "rnn_layer = layers.GRU(hidden_dim)\n",
+    "rnn_layer = layers.Bidirectional(rnn_layer, merge_mode=\"sum\")\n",
+    "encoder_output = rnn_layer(x)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "target = keras.Input(shape=(None,), dtype=\"int32\", name=\"spanish\")\n",
+    "x = layers.Embedding(vocab_size, embed_dim, mask_zero=True)(target)\n",
+    "rnn_layer = layers.GRU(hidden_dim, return_sequences=True)\n",
+    "x = rnn_layer(x, initial_state=encoder_output)\n",
+    "x = layers.Dropout(0.5)(x)\n",
+    "target_predictions = layers.Dense(vocab_size, activation=\"softmax\")(x)\n",
+    "seq2seq_rnn = keras.Model([source, target], target_predictions)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "seq2seq_rnn.summary(line_length=80)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "seq2seq_rnn.compile(\n",
+    "    optimizer=\"adam\",\n",
+    "    loss=\"sparse_categorical_crossentropy\",\n",
+    "    weighted_metrics=[\"accuracy\"],\n",
+    ")\n",
+    "seq2seq_rnn.fit(train_ds, epochs=15, validation_data=val_ds)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "spa_vocab = spanish_tokenizer.get_vocabulary()\n",
+    "spa_index_lookup = dict(zip(range(len(spa_vocab)), spa_vocab))\n",
+    "\n",
+    "def generate_translation(input_sentence):\n",
+    "    tokenized_input_sentence = english_tokenizer([input_sentence])\n",
+    "    decoded_sentence = \"[start]\"\n",
+    "    for i in range(sequence_length):\n",
+    "        tokenized_target_sentence = spanish_tokenizer([decoded_sentence])\n",
+    "        inputs = [tokenized_input_sentence, tokenized_target_sentence]\n",
+    "        next_token_predictions = seq2seq_rnn.predict(inputs, verbose=0)\n",
+    "        sampled_token_index = np.argmax(next_token_predictions[0, i, :])\n",
+    "        sampled_token = spa_index_lookup[sampled_token_index]\n",
+    "        decoded_sentence += \" \" + sampled_token\n",
+    "        if sampled_token == \"[end]\":\n",
+    "            break\n",
+    "    return decoded_sentence\n",
+    "\n",
+    "test_eng_texts = [pair[0] for pair in test_pairs]\n",
+    "for _ in range(5):\n",
+    "    input_sentence = random.choice(test_eng_texts)\n",
+    "    print(\"-\")\n",
+    "    print(input_sentence)\n",
+    "    print(generate_translation(input_sentence))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### The Transformer architecture"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Dot-product attention"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Transformer Encoder block"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "class TransformerEncoder(keras.Layer):\n",
+    "    def __init__(self, hidden_dim, intermediate_dim, num_heads):\n",
+    "        super().__init__()\n",
+    "        key_dim = hidden_dim // num_heads\n",
+    "        self.self_attention = layers.MultiHeadAttention(num_heads, key_dim)\n",
+    "        self.self_attention_layernorm = layers.LayerNormalization()\n",
+    "        self.feed_forward_1 = layers.Dense(intermediate_dim, activation=\"relu\")\n",
+    "        self.feed_forward_2 = layers.Dense(hidden_dim)\n",
+    "        self.feed_forward_layernorm = layers.LayerNormalization()\n",
+    "\n",
+    "    def call(self, source, source_mask):\n",
+    "        residual = x = source\n",
+    "        mask = source_mask[:, None, :]\n",
+    "        x = self.self_attention(query=x, key=x, value=x, attention_mask=mask)\n",
+    "        x = x + residual\n",
+    "        x = self.self_attention_layernorm(x)\n",
+    "        residual = x\n",
+    "        x = self.feed_forward_1(x)\n",
+    "        x = self.feed_forward_2(x)\n",
+    "        x = x + residual\n",
+    "        x = self.feed_forward_layernorm(x)\n",
+    "        return x"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Transformer Decoder block"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "class TransformerDecoder(keras.Layer):\n",
+    "    def __init__(self, hidden_dim, intermediate_dim, num_heads):\n",
+    "        super().__init__()\n",
+    "        key_dim = hidden_dim // num_heads\n",
+    "        self.self_attention = layers.MultiHeadAttention(num_heads, key_dim)\n",
+    "        self.self_attention_layernorm = layers.LayerNormalization()\n",
+    "        self.cross_attention = layers.MultiHeadAttention(num_heads, key_dim)\n",
+    "        self.cross_attention_layernorm = layers.LayerNormalization()\n",
+    "        self.feed_forward_1 = layers.Dense(intermediate_dim, activation=\"relu\")\n",
+    "        self.feed_forward_2 = layers.Dense(hidden_dim)\n",
+    "        self.feed_forward_layernorm = layers.LayerNormalization()\n",
+    "\n",
+    "    def call(self, target, source, source_mask):\n",
+    "        residual = x = target\n",
+    "        x = self.self_attention(query=x, key=x, value=x, use_causal_mask=True)\n",
+    "        x = x + residual\n",
+    "        x = self.self_attention_layernorm(x)\n",
+    "        residual = x\n",
+    "        mask = source_mask[:, None, :]\n",
+    "        x = self.cross_attention(\n",
+    "            query=x, key=source, value=source, attention_mask=mask\n",
+    "        )\n",
+    "        x = x + residual\n",
+    "        x = self.cross_attention_layernorm(x)\n",
+    "        residual = x\n",
+    "        x = self.feed_forward_1(x)\n",
+    "        x = self.feed_forward_2(x)\n",
+    "        x = x + residual\n",
+    "        x = self.feed_forward_layernorm(x)\n",
+    "        return x"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Sequence-to-sequence learning with a Transformer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "hidden_dim = 256\n",
+    "intermediate_dim = 2048\n",
+    "num_heads = 8\n",
+    "\n",
+    "source = keras.Input(shape=(None,), dtype=\"int32\", name=\"english\")\n",
+    "x = layers.Embedding(vocab_size, hidden_dim)(source)\n",
+    "encoder_output = TransformerEncoder(hidden_dim, intermediate_dim, num_heads)(\n",
+    "    source=x,\n",
+    "    source_mask=source != 0,\n",
+    ")\n",
+    "\n",
+    "target = keras.Input(shape=(None,), dtype=\"int32\", name=\"spanish\")\n",
+    "x = layers.Embedding(vocab_size, hidden_dim)(target)\n",
+    "x = TransformerDecoder(hidden_dim, intermediate_dim, num_heads)(\n",
+    "    target=x,\n",
+    "    source=encoder_output,\n",
+    "    source_mask=source != 0,\n",
+    ")\n",
+    "x = layers.Dropout(0.5)(x)\n",
+    "target_predictions = layers.Dense(vocab_size, activation=\"softmax\")(x)\n",
+    "transformer = keras.Model([source, target], target_predictions)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "transformer.summary(line_length=80)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "transformer.compile(\n",
+    "    optimizer=\"adam\",\n",
+    "    loss=\"sparse_categorical_crossentropy\",\n",
+    "    weighted_metrics=[\"accuracy\"],\n",
+    ")\n",
+    "transformer.fit(train_ds, epochs=15, validation_data=val_ds)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Embedding positional information"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from keras import ops\n",
+    "\n",
+    "class PositionalEmbedding(keras.Layer):\n",
+    "    def __init__(self, sequence_length, input_dim, output_dim):\n",
+    "        super().__init__()\n",
+    "        self.token_embeddings = layers.Embedding(input_dim, output_dim)\n",
+    "        self.position_embeddings = layers.Embedding(sequence_length, output_dim)\n",
+    "\n",
+    "    def call(self, inputs):\n",
+    "        positions = ops.cumsum(ops.ones_like(inputs), axis=-1) - 1\n",
+    "        embedded_tokens = self.token_embeddings(inputs)\n",
+    "        embedded_positions = self.position_embeddings(positions)\n",
+    "        return embedded_tokens + embedded_positions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "hidden_dim = 256\n",
+    "intermediate_dim = 2056\n",
+    "num_heads = 8\n",
+    "\n",
+    "source = keras.Input(shape=(None,), dtype=\"int32\", name=\"english\")\n",
+    "x = PositionalEmbedding(sequence_length, vocab_size, hidden_dim)(source)\n",
+    "encoder_output = TransformerEncoder(hidden_dim, intermediate_dim, num_heads)(\n",
+    "    source=x,\n",
+    "    source_mask=source != 0,\n",
+    ")\n",
+    "\n",
+    "target = keras.Input(shape=(None,), dtype=\"int32\", name=\"spanish\")\n",
+    "x = PositionalEmbedding(sequence_length, vocab_size, hidden_dim)(target)\n",
+    "x = TransformerDecoder(hidden_dim, intermediate_dim, num_heads)(\n",
+    "    target=x,\n",
+    "    source=encoder_output,\n",
+    "    source_mask=source != 0,\n",
+    ")\n",
+    "x = layers.Dropout(0.5)(x)\n",
+    "target_predictions = layers.Dense(vocab_size, activation=\"softmax\")(x)\n",
+    "transformer = keras.Model([source, target], target_predictions)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "transformer.compile(\n",
+    "    optimizer=\"adam\",\n",
+    "    loss=\"sparse_categorical_crossentropy\",\n",
+    "    weighted_metrics=[\"accuracy\"],\n",
+    ")\n",
+    "transformer.fit(train_ds, epochs=30, validation_data=val_ds)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "spa_vocab = spanish_tokenizer.get_vocabulary()\n",
+    "spa_index_lookup = dict(zip(range(len(spa_vocab)), spa_vocab))\n",
+    "\n",
+    "def generate_translation(input_sentence):\n",
+    "    tokenized_input_sentence = english_tokenizer([input_sentence])\n",
+    "    decoded_sentence = \"[start]\"\n",
+    "    for i in range(sequence_length):\n",
+    "        tokenized_target_sentence = spanish_tokenizer([decoded_sentence])\n",
+    "        tokenized_target_sentence = tokenized_target_sentence[:, :-1]\n",
+    "        inputs = [tokenized_input_sentence, tokenized_target_sentence]\n",
+    "        next_token_predictions = transformer.predict(inputs, verbose=0)\n",
+    "        sampled_token_index = np.argmax(next_token_predictions[0, i, :])\n",
+    "        sampled_token = spa_index_lookup[sampled_token_index]\n",
+    "        decoded_sentence += \" \" + sampled_token\n",
+    "        if sampled_token == \"[end]\":\n",
+    "            break\n",
+    "    return decoded_sentence\n",
+    "\n",
+    "test_eng_texts = [pair[0] for pair in test_pairs]\n",
+    "for _ in range(5):\n",
+    "    input_sentence = random.choice(test_eng_texts)\n",
+    "    print(\"-\")\n",
+    "    print(input_sentence)\n",
+    "    print(generate_translation(input_sentence))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Classification with a pretrained Transformer"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Pretraining a Transformer encoder"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Loading a pretrained Transformer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install keras-hub"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import keras_hub\n",
+    "\n",
+    "tokenizer = keras_hub.models.Tokenizer.from_preset(\"roberta_base_en\")\n",
+    "backbone = keras_hub.models.Backbone.from_preset(\"roberta_base_en\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "tokenizer(\"The quick brown fox\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "backbone.summary(line_length=80)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Preprocessing IMDb Movie reviews"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import os, pathlib, shutil, random\n",
+    "\n",
+    "zip_path = keras.utils.get_file(\n",
+    "    origin=\"https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\",\n",
+    "    fname=\"imdb\",\n",
+    "    extract=True,\n",
+    ")\n",
+    "\n",
+    "imdb_extract_dir = pathlib.Path(zip_path) / \"aclImdb\"\n",
+    "train_dir = pathlib.Path(\"imdb_train\")\n",
+    "test_dir = pathlib.Path(\"imdb_test\")\n",
+    "val_dir = pathlib.Path(\"imdb_val\")\n",
+    "\n",
+    "shutil.copytree(imdb_extract_dir / \"test\", test_dir, dirs_exist_ok=True)\n",
+    "\n",
+    "val_percentage = 0.2\n",
+    "for category in (\"neg\", \"pos\"):\n",
+    "    src_dir = imdb_extract_dir / \"train\" / category\n",
+    "    src_files = os.listdir(src_dir)\n",
+    "    random.Random(1337).shuffle(src_files)\n",
+    "    num_val_samples = int(len(src_files) * val_percentage)\n",
+    "\n",
+    "    os.makedirs(train_dir / category, exist_ok=True)\n",
+    "    os.makedirs(val_dir / category, exist_ok=True)\n",
+    "    for index, file in enumerate(src_files):\n",
+    "        if index < num_val_samples:\n",
+    "            shutil.copy(src_dir / file, val_dir / category / file)\n",
+    "        else:\n",
+    "            shutil.copy(src_dir / file, train_dir / category / file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "batch_size = 16\n",
+    "train_ds = keras.utils.text_dataset_from_directory(\n",
+    "    train_dir, batch_size=batch_size\n",
+    ")\n",
+    "val_ds = keras.utils.text_dataset_from_directory(val_dir, batch_size=batch_size)\n",
+    "test_ds = keras.utils.text_dataset_from_directory(\n",
+    "    test_dir, batch_size=batch_size\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def preprocess(text, label):\n",
+    "    packer = keras_hub.layers.StartEndPacker(\n",
+    "        sequence_length=512,\n",
+    "        start_value=tokenizer.start_token_id,\n",
+    "        end_value=tokenizer.end_token_id,\n",
+    "        pad_value=tokenizer.pad_token_id,\n",
+    "        return_padding_mask=True,\n",
+    "    )\n",
+    "    token_ids, padding_mask = packer(tokenizer(text))\n",
+    "    return {\"token_ids\": token_ids, \"padding_mask\": padding_mask}, label\n",
+    "\n",
+    "preprocessed_train_ds = train_ds.map(preprocess)\n",
+    "preprocessed_val_ds = val_ds.map(preprocess)\n",
+    "preprocessed_test_ds = test_ds.map(preprocess)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "next(iter(preprocessed_train_ds))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Fine-tuning a pretrained Transformer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "inputs = backbone.input\n",
+    "x = backbone(inputs)\n",
+    "x = x[:, 0, :]\n",
+    "x = layers.Dropout(0.1)(x)\n",
+    "x = layers.Dense(768, activation=\"relu\")(x)\n",
+    "x = layers.Dropout(0.1)(x)\n",
+    "outputs = layers.Dense(1, activation=\"sigmoid\")(x)\n",
+    "classifier = keras.Model(inputs, outputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "classifier.compile(\n",
+    "    optimizer=keras.optimizers.Adam(5e-5),\n",
+    "    loss=\"binary_crossentropy\",\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")\n",
+    "classifier.fit(\n",
+    "    preprocessed_train_ds,\n",
+    "    validation_data=preprocessed_val_ds,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "classifier.evaluate(preprocessed_test_ds)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### What makes the Transformer effective?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Chapter Summary"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "collapsed_sections": [],
+   "name": "chapter15_language-models-and-the-transformer",
+   "private_outputs": false,
+   "provenance": [],
+   "toc_visible": true
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/chapter16_generative-large-language-models.ipynb b/chapter16_generative-large-language-models.ipynb
new file mode 100644
index 0000000000..efb8fc4de0
--- /dev/null
+++ b/chapter16_generative-large-language-models.ipynb
@@ -0,0 +1,1120 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install keras-nightly --upgrade -q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.environ[\"KERAS_BACKEND\"] = \"jax\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### The potential of generative modeling"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### A brief history of sequence generation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Training a miniature GPT"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import keras\n",
+    "import pathlib\n",
+    "\n",
+    "extract_dir = keras.utils.get_file(\n",
+    "    fname=\"mini-c4\",\n",
+    "    origin=(\n",
+    "        \"https://hf.co/datasets/mattdangerw/mini-c4/resolve/main/mini-c4.zip\"\n",
+    "    ),\n",
+    "    extract=True,\n",
+    ")\n",
+    "extract_dir = pathlib.Path(extract_dir) / \"mini-c4\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "os.listdir(extract_dir)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "with open(extract_dir / \"shard0.txt\", \"r\") as f:\n",
+    "   print(f.readline().replace(\"\\\\n\", \"\\n\")[:100])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install keras-hub"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import keras_hub\n",
+    "import numpy as np\n",
+    "\n",
+    "vocabulary_file = keras.utils.get_file(\n",
+    "    origin=\"https://hf.co/mattdangerw/spiece/resolve/main/vocabulary.proto\",\n",
+    ")\n",
+    "tokenizer = keras_hub.tokenizers.SentencePieceTokenizer(vocabulary_file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "tokenizer.tokenize(\"The quick brown fox.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "tokenizer.detokenize([450, 4996, 17354, 1701, 29916, 29889])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf\n",
+    "\n",
+    "batch_size = 128\n",
+    "sequence_length = 256\n",
+    "suffix = np.array([tokenizer.token_to_id(\"<|endoftext|>\")])\n",
+    "\n",
+    "files = [extract_dir / file for file in os.listdir(extract_dir)]\n",
+    "ds = tf.data.TextLineDataset(files, num_parallel_reads=32)\n",
+    "ds = ds.map(\n",
+    "    lambda x: tf.strings.regex_replace(x, r\"\\\\n\", \"\\n\"),\n",
+    "    num_parallel_calls=32,\n",
+    ")\n",
+    "ds = ds.map(tokenizer, num_parallel_calls=32)\n",
+    "ds = ds.map(lambda x: tf.concat([x, suffix], -1), num_parallel_calls=32)\n",
+    "ds = ds.rebatch(sequence_length + 1, drop_remainder=True)\n",
+    "ds = ds.map(lambda x: (x[:-1], x[1:]), num_parallel_calls=32)\n",
+    "ds = ds.batch(batch_size, num_parallel_calls=32).cache()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "num_batches = ds.reduce(0, lambda count, input: count + 1).numpy()\n",
+    "num_batches"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "num_val_batches = 500\n",
+    "num_train_batches = num_batches - num_val_batches\n",
+    "val_ds = ds.take(500)\n",
+    "train_ds = ds.skip(500).repeat()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Building the model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from keras import layers\n",
+    "\n",
+    "class TransformerDecoder(keras.Layer):\n",
+    "    def __init__(self, hidden_dim, intermediate_dim, num_heads):\n",
+    "        super().__init__()\n",
+    "        key_dim = hidden_dim // num_heads\n",
+    "        self.self_attention = layers.MultiHeadAttention(\n",
+    "            num_heads, key_dim, dropout=0.1\n",
+    "        )\n",
+    "        self.self_attention_layernorm = layers.LayerNormalization()\n",
+    "        self.feed_forward_1 = layers.Dense(intermediate_dim, activation=\"relu\")\n",
+    "        self.feed_forward_2 = layers.Dense(hidden_dim)\n",
+    "        self.feed_forward_layernorm = layers.LayerNormalization()\n",
+    "        self.dropout = layers.Dropout(0.1)\n",
+    "\n",
+    "    def call(self, inputs):\n",
+    "        residual = x = inputs\n",
+    "        x = self.self_attention(query=x, key=x, value=x, use_causal_mask=True)\n",
+    "        x = self.dropout(x)\n",
+    "        x = x + residual\n",
+    "        x = self.self_attention_layernorm(x)\n",
+    "        residual = x\n",
+    "        x = self.feed_forward_1(x)\n",
+    "        x = self.feed_forward_2(x)\n",
+    "        x = self.dropout(x)\n",
+    "        x = x + residual\n",
+    "        x = self.feed_forward_layernorm(x)\n",
+    "        return x"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from keras import ops\n",
+    "\n",
+    "class PositionalEmbedding(keras.Layer):\n",
+    "    def __init__(self, sequence_length, input_dim, output_dim):\n",
+    "        super().__init__()\n",
+    "        self.token_embeddings = layers.Embedding(input_dim, output_dim)\n",
+    "        self.position_embeddings = layers.Embedding(sequence_length, output_dim)\n",
+    "\n",
+    "    def call(self, inputs, reverse=False):\n",
+    "        if reverse:\n",
+    "            token_embeddings = self.token_embeddings.embeddings\n",
+    "            return ops.matmul(inputs, ops.transpose(token_embeddings))\n",
+    "        positions = ops.cumsum(ops.ones_like(inputs), axis=-1) - 1\n",
+    "        embedded_tokens = self.token_embeddings(inputs)\n",
+    "        embedded_positions = self.position_embeddings(positions)\n",
+    "        return embedded_tokens + embedded_positions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "vocab_size = tokenizer.vocabulary_size()\n",
+    "hidden_dim = 512\n",
+    "intermediate_dim = 2056\n",
+    "num_heads = 8\n",
+    "num_layers = 8\n",
+    "\n",
+    "inputs = keras.Input(shape=(None,), dtype=\"int32\", name=\"inputs\")\n",
+    "embedding = PositionalEmbedding(sequence_length, vocab_size, hidden_dim)\n",
+    "x = embedding(inputs)\n",
+    "x = layers.LayerNormalization()(x)\n",
+    "for i in range(num_layers):\n",
+    "    x = TransformerDecoder(hidden_dim, intermediate_dim, num_heads)(x)\n",
+    "outputs = embedding(x, reverse=True)\n",
+    "mini_gpt = keras.Model(inputs, outputs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Pretraining the model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "class WarmupSchedule(keras.optimizers.schedules.LearningRateSchedule):\n",
+    "    def __init__(self):\n",
+    "        self.rate = 1e-4\n",
+    "        self.warmup_steps = 1_000.0\n",
+    "\n",
+    "    def __call__(self, step):\n",
+    "        step = ops.cast(step, dtype=\"float32\")\n",
+    "        scale = ops.minimum(step / self.warmup_steps, 1.0)\n",
+    "        return self.rate * scale"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "schedule = WarmupSchedule()\n",
+    "x = range(0, 5_000, 100)\n",
+    "y = [schedule(step) for step in x]\n",
+    "plt.plot(x, y)\n",
+    "plt.xlabel(\"Train Step\")\n",
+    "plt.ylabel(\"Learning Rate\")\n",
+    "plt.savefig(\"learning-rate-warmup.png\", dpi=300)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "num_passes = 2\n",
+    "num_epochs = 16\n",
+    "steps_per_epoch = num_train_batches * num_passes // num_epochs\n",
+    "\n",
+    "mini_gpt.compile(\n",
+    "    optimizer=keras.optimizers.Adam(schedule),\n",
+    "    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
+    "    metrics=[\"accuracy\"],\n",
+    ")\n",
+    "mini_gpt.fit(\n",
+    "    train_ds,\n",
+    "    validation_data=val_ds,\n",
+    "    epochs=num_epochs,\n",
+    "    steps_per_epoch=steps_per_epoch,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Generative decoding"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def generate(prompt, max_length=64):\n",
+    "    tokens = list(tokenizer(prompt))\n",
+    "    prompt_length = len(tokens)\n",
+    "    for _ in range(max_length - prompt_length):\n",
+    "        prediction = mini_gpt(np.array([tokens]))\n",
+    "        prediction = prediction[0, -1]\n",
+    "        tokens.append(np.argmax(prediction).item())\n",
+    "    return tokenizer.detokenize(tokens)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "prompt = \"A piece of advice\"\n",
+    "generate(prompt)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def compiled_generate(prompt, max_length=64):\n",
+    "    tokens = list(tokenizer(prompt))\n",
+    "    prompt_length = len(tokens)\n",
+    "    tokens = tokens + [0] * (max_length - prompt_length)\n",
+    "    for i in range(prompt_length, max_length):\n",
+    "        prediction = mini_gpt.predict(np.array([tokens]), verbose=0)\n",
+    "        prediction = prediction[0, i - 1]\n",
+    "        tokens[i] = np.argmax(prediction).item()\n",
+    "    return tokenizer.detokenize(tokens)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import timeit\n",
+    "tries = 10\n",
+    "timeit.timeit(lambda: compiled_generate(prompt), number=tries) / tries"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Sampling strategies"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def compiled_generate(prompt, sample_fn, max_length=64):\n",
+    "    tokens = list(tokenizer(prompt))\n",
+    "    prompt_length = len(tokens)\n",
+    "    tokens = tokens + [0] * (max_length - prompt_length)\n",
+    "    for i in range(prompt_length, max_length):\n",
+    "        prediction = mini_gpt.predict(np.array([tokens]), verbose=0)\n",
+    "        prediction = prediction[0, i - 1]\n",
+    "        next_token = sample_fn(prediction)\n",
+    "        tokens[i] = np.array(next_token).item()\n",
+    "    return tokenizer.detokenize(tokens)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def greedy_search(preds):\n",
+    "    return ops.argmax(preds)\n",
+    "\n",
+    "compiled_generate(prompt, greedy_search)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def random_sample(preds, temperature=1.0):\n",
+    "    preds = preds / temperature\n",
+    "    return keras.random.categorical(preds[None, :], num_samples=1)[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "compiled_generate(prompt, random_sample)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from functools import partial\n",
+    "compiled_generate(prompt, partial(random_sample, temperature=2.0))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "compiled_generate(prompt, partial(random_sample, temperature=0.8))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "compiled_generate(prompt, partial(random_sample, temperature=0.2))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def top_k(preds, k=5, temperature=1.0):\n",
+    "    preds = preds / temperature\n",
+    "    top_preds, top_indices = ops.top_k(preds, k=k, sorted=False)\n",
+    "    choice = keras.random.categorical(top_preds[None, :], num_samples=1)[0]\n",
+    "    return ops.take_along_axis(top_indices, choice, axis=-1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "compiled_generate(prompt, partial(top_k, k=5))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "compiled_generate(prompt, partial(top_k, k=20))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "compiled_generate(prompt, partial(top_k, k=5, temperature=0.5))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Using a pretrained LLM"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Prompting LLMs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "gemma_lm = keras_hub.models.CausalLM.from_preset(\"gemma_2b_en\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "gemma_lm.summary(line_length=80)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "gemma_lm.compile(sampler=\"greedy\")\n",
+    "gemma_lm.generate(\"A piece of advice\", max_length=64)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "gemma_lm.generate(\"How can I make brownies?\", max_length=64)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "gemma_lm.generate(\n",
+    "    \"The following brownie recipe is easy to make in just a few \"\n",
+    "    \"steps.\\n\\nYou can start by\",\n",
+    "    max_length=64,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "gemma_lm.generate(\n",
+    "    \"Tell me about the 61st president of the United States.\",\n",
+    "    max_length=64,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Instruction fine-tuning an LLM"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "\n",
+    "TEMPLATE = \"\"\"\"[instruction]\n",
+    "{instruction}[end]\n",
+    "[reponse]\n",
+    "{response}[end]\"\"\"\n",
+    "\n",
+    "dataset_path = keras.utils.get_file(\n",
+    "    origin=(\n",
+    "        \"https://hf.co/datasets/databricks/databricks-dolly-15k/\"\n",
+    "        \"resolve/main/databricks-dolly-15k.jsonl\"\n",
+    "    ),\n",
+    ")\n",
+    "data = []\n",
+    "with open(dataset_path) as file:\n",
+    "    for line in file:\n",
+    "        features = json.loads(line)\n",
+    "        if features[\"context\"]:\n",
+    "            continue\n",
+    "        data.append(TEMPLATE.format(**features))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "data[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "ds = tf.data.Dataset.from_tensor_slices(data).shuffle(2000).batch(8)\n",
+    "val_ds = ds.take(100)\n",
+    "train_ds = ds.skip(100)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "preprocessor = gemma_lm.preprocessor\n",
+    "preprocessor.sequence_length = 512\n",
+    "batch = next(iter(train_ds))\n",
+    "x, y, sample_weight = preprocessor(batch)\n",
+    "x[\"token_ids\"].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "x[\"padding_mask\"].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "y.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "sample_weight.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "x[\"token_ids\"][0, :5], y[0, :5]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Low-Rank Adaptation (LoRA) fine-tuning"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "gemma_lm.backbone.enable_lora(rank=8)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "gemma_lm.summary(line_length=80)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "gemma_lm.compile(\n",
+    "    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
+    "    optimizer=keras.optimizers.Adam(5e-5),\n",
+    "    weighted_metrics=[keras.metrics.SparseCategoricalAccuracy()],\n",
+    ")\n",
+    "gemma_lm.fit(train_ds, validation_data=val_ds, epochs=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "gemma_lm.generate(\n",
+    "    \"[instruction]\\nHow can I make brownies?[end]\\n\"\n",
+    "    \"[response]\\n\",\n",
+    "    max_length=512,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "gemma_lm.generate(\n",
+    "    \"[instruction]\\nWho is the 44th president of the United States?[end]\\n\"\n",
+    "    \"[response]\\n\",\n",
+    "    max_length=512,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "gemma_lm.generate(\n",
+    "    \"[instruction]\\nWho is the 61st president of the United States?[end]\\n\"\n",
+    "    \"[response]\\n\",\n",
+    "    max_length=512,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Reinforcement Learning with Human Feedback"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Reinforcement Learning with Chain of Thought Reasoning"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Beyond text data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Extending an LLM for image input"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "image_url = (\n",
+    "    \"https://github.com/mattdangerw/keras-nlp-scripts/\"\n",
+    "    \"blob/main/learned-python.png?raw=true\"\n",
+    ")\n",
+    "image_path = keras.utils.get_file(origin=image_url)\n",
+    "\n",
+    "image = keras.utils.load_img(image_path)\n",
+    "plt.axis(\"off\")\n",
+    "plt.imshow(image)\n",
+    "plt.savefig(\"pali-gemma-test-image.png\", dpi=300)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "pali_gemma_lm = keras_hub.models.CausalLM.from_preset(\"pali_gemma_3b_mix_448\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "pali_gemma_lm.summary(line_length=80)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "pali_gemma_lm.generate({\n",
+    "    \"images\": image,\n",
+    "    \"prompts\": \"cap en\\n\",\n",
+    "})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "pali_gemma_lm.generate({\n",
+    "    \"images\": image,\n",
+    "    \"prompts\": \"answer en where is the snake doing?\\n\",\n",
+    "})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "pali_gemma_lm.generate({\n",
+    "    \"images\": image,\n",
+    "    \"prompts\": \"detect glasses\\n\",\n",
+    "})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import re\n",
+    "import matplotlib.patches as patches\n",
+    "\n",
+    "response = \"<loc0280><loc0371><loc0380><loc0685> glasses\"\n",
+    "box = [int(d) for d in re.findall(r\"\\d+\", response)]\n",
+    "scale = image.shape[0] / 1024.0\n",
+    "y1, x1, y2, x2 = (c * scale for c in box)\n",
+    "width, height = x2 - x1, y2 - y1\n",
+    "\n",
+    "fig, ax = plt.subplots()\n",
+    "ax.imshow(image)\n",
+    "ax.add_patch(\n",
+    "    patches.Rectangle(\n",
+    "        (x1, y1), width, height, linewidth=1, edgecolor=\"r\", facecolor=\"none\"\n",
+    "    )\n",
+    ")\n",
+    "plt.axis(\"off\")\n",
+    "plt.savefig(\"pali-gemma-detect-box.png\", dpi=300, bbox_inches=\"tight\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Retrieval Augmented Generation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Foundation models"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Where are LLMs heading next?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Chapter Summary"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "collapsed_sections": [],
+   "name": "chapter16_generative-large-language-models",
+   "private_outputs": false,
+   "provenance": [],
+   "toc_visible": true
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/chapter17_image-generation.ipynb b/chapter17_image-generation.ipynb
new file mode 100644
index 0000000000..a9b6559e68
--- /dev/null
+++ b/chapter17_image-generation.ipynb
@@ -0,0 +1,1145 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install keras-nightly --upgrade -q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.environ[\"KERAS_BACKEND\"] = \"jax\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Deep learning for image generation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Sampling from latent spaces of images"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Variational autoencoders"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Implementing a VAE with Keras"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import keras\n",
+    "from keras import layers\n",
+    "\n",
+    "latent_dim = 2\n",
+    "\n",
+    "image_inputs = keras.Input(shape=(28, 28, 1))\n",
+    "x = layers.Conv2D(32, 3, activation=\"relu\", strides=2, padding=\"same\")(\n",
+    "    image_inputs\n",
+    ")\n",
+    "x = layers.Conv2D(64, 3, activation=\"relu\", strides=2, padding=\"same\")(x)\n",
+    "x = layers.Flatten()(x)\n",
+    "x = layers.Dense(16, activation=\"relu\")(x)\n",
+    "z_mean = layers.Dense(latent_dim, name=\"z_mean\")(x)\n",
+    "z_log_var = layers.Dense(latent_dim, name=\"z_log_var\")(x)\n",
+    "encoder = keras.Model(image_inputs, [z_mean, z_log_var], name=\"encoder\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "encoder.summary(line_length=80)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from keras import ops\n",
+    "\n",
+    "class Sampler(keras.Layer):\n",
+    "    def __init__(self, **kwargs):\n",
+    "        super().__init__(**kwargs)\n",
+    "        self.seed_generator = keras.random.SeedGenerator()\n",
+    "        self.built = True\n",
+    "\n",
+    "    def call(self, z_mean, z_log_var):\n",
+    "        batch_size = ops.shape(z_mean)[0]\n",
+    "        z_size = ops.shape(z_mean)[1]\n",
+    "        epsilon = keras.random.normal(\n",
+    "            (batch_size, z_size), seed=self.seed_generator\n",
+    "        )\n",
+    "        return z_mean + ops.exp(0.5 * z_log_var) * epsilon"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "latent_inputs = keras.Input(shape=(latent_dim,))\n",
+    "x = layers.Dense(7 * 7 * 64, activation=\"relu\")(latent_inputs)\n",
+    "x = layers.Reshape((7, 7, 64))(x)\n",
+    "x = layers.Conv2DTranspose(64, 3, activation=\"relu\", strides=2, padding=\"same\")(\n",
+    "    x\n",
+    ")\n",
+    "x = layers.Conv2DTranspose(32, 3, activation=\"relu\", strides=2, padding=\"same\")(\n",
+    "    x\n",
+    ")\n",
+    "decoder_outputs = layers.Conv2D(1, 3, activation=\"sigmoid\", padding=\"same\")(x)\n",
+    "decoder = keras.Model(latent_inputs, decoder_outputs, name=\"decoder\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "decoder.summary(line_length=80)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "class VAE(keras.Model):\n",
+    "    def __init__(self, encoder, decoder, **kwargs):\n",
+    "        super().__init__(**kwargs)\n",
+    "        self.encoder = encoder\n",
+    "        self.decoder = decoder\n",
+    "        self.sampler = Sampler()\n",
+    "        self.reconstruction_loss_tracker = keras.metrics.Mean(\n",
+    "            name=\"reconstruction_loss\"\n",
+    "        )\n",
+    "        self.kl_loss_tracker = keras.metrics.Mean(name=\"kl_loss\")\n",
+    "\n",
+    "    def call(self, inputs):\n",
+    "        return self.encoder(inputs)\n",
+    "\n",
+    "    def compute_loss(self, x, y, y_pred, sample_weight=None, training=True):\n",
+    "        original = x\n",
+    "        z_mean, z_log_var = y_pred\n",
+    "        reconstruction = self.decoder(self.sampler(z_mean, z_log_var))\n",
+    "\n",
+    "        reconstruction_loss = ops.mean(\n",
+    "            ops.sum(\n",
+    "                keras.losses.binary_crossentropy(x, reconstruction), axis=(1, 2)\n",
+    "            )\n",
+    "        )\n",
+    "        kl_loss = -0.5 * (\n",
+    "            1 + z_log_var - ops.square(z_mean) - ops.exp(z_log_var)\n",
+    "        )\n",
+    "        total_loss = reconstruction_loss + ops.mean(kl_loss)\n",
+    "\n",
+    "        self.reconstruction_loss_tracker.update_state(reconstruction_loss)\n",
+    "        self.kl_loss_tracker.update_state(kl_loss)\n",
+    "        return total_loss"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()\n",
+    "mnist_digits = np.concatenate([x_train, x_test], axis=0)\n",
+    "mnist_digits = np.expand_dims(mnist_digits, -1).astype(\"float32\") / 255\n",
+    "\n",
+    "vae = VAE(encoder, decoder)\n",
+    "vae.compile(optimizer=keras.optimizers.Adam())\n",
+    "vae.fit(mnist_digits, epochs=30, batch_size=128)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "n = 30\n",
+    "digit_size = 28\n",
+    "figure = np.zeros((digit_size * n, digit_size * n))\n",
+    "\n",
+    "grid_x = np.linspace(-1, 1, n)\n",
+    "grid_y = np.linspace(-1, 1, n)[::-1]\n",
+    "\n",
+    "for i, yi in enumerate(grid_y):\n",
+    "    for j, xi in enumerate(grid_x):\n",
+    "        z_sample = np.array([[xi, yi]])\n",
+    "        x_decoded = vae.decoder.predict(z_sample)\n",
+    "        digit = x_decoded[0].reshape(digit_size, digit_size)\n",
+    "        figure[\n",
+    "            i * digit_size : (i + 1) * digit_size,\n",
+    "            j * digit_size : (j + 1) * digit_size,\n",
+    "        ] = digit\n",
+    "\n",
+    "plt.figure(figsize=(15, 15))\n",
+    "start_range = digit_size // 2\n",
+    "end_range = n * digit_size + start_range\n",
+    "pixel_range = np.arange(start_range, end_range, digit_size)\n",
+    "sample_range_x = np.round(grid_x, 1)\n",
+    "sample_range_y = np.round(grid_y, 1)\n",
+    "plt.xticks(pixel_range, sample_range_x)\n",
+    "plt.yticks(pixel_range, sample_range_y)\n",
+    "plt.xlabel(\"z[0]\")\n",
+    "plt.ylabel(\"z[1]\")\n",
+    "plt.axis(\"off\")\n",
+    "plt.imshow(figure, cmap=\"Greys_r\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Diffusion models"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### The Oxford Flowers dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "fpath = keras.utils.get_file(\n",
+    "    origin=\"https://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz\",\n",
+    "    extract=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "batch_size = 32\n",
+    "image_size = 128\n",
+    "images_dir = os.path.join(fpath, \"jpg\")\n",
+    "dataset = keras.utils.image_dataset_from_directory(\n",
+    "    images_dir,\n",
+    "    labels=None,\n",
+    "    image_size=(image_size, image_size),\n",
+    "    crop_to_aspect_ratio=True,\n",
+    ")\n",
+    "dataset = dataset.rebatch(\n",
+    "    batch_size,\n",
+    "    drop_remainder=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from matplotlib import pyplot as plt\n",
+    "\n",
+    "for batch in dataset:\n",
+    "    img = batch.numpy()[0]\n",
+    "    break\n",
+    "plt.imshow(img)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### A U-Net denoising autoencoder"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def residual_block(x, width):\n",
+    "    input_width = x.shape[3]\n",
+    "    if input_width == width:\n",
+    "        residual = x\n",
+    "    else:\n",
+    "        residual = layers.Conv2D(width, kernel_size=1)(x)\n",
+    "    x = layers.BatchNormalization(center=False, scale=False)(x)\n",
+    "    x = layers.Conv2D(width, kernel_size=3, padding=\"same\", activation=\"swish\")(\n",
+    "        x\n",
+    "    )\n",
+    "    x = layers.Conv2D(width, kernel_size=3, padding=\"same\")(x)\n",
+    "    x = x + residual\n",
+    "    return x\n",
+    "\n",
+    "def get_model(image_size, widths, block_depth):\n",
+    "    noisy_images = keras.Input(shape=(image_size, image_size, 3))\n",
+    "    noise_rates = keras.Input(shape=(1, 1, 1))\n",
+    "\n",
+    "    x = layers.Conv2D(widths[0], kernel_size=1)(noisy_images)\n",
+    "    n = layers.UpSampling2D(size=image_size, interpolation=\"nearest\")(\n",
+    "        noise_rates\n",
+    "    )\n",
+    "    x = layers.Concatenate()([x, n])\n",
+    "\n",
+    "    skips = []\n",
+    "    for width in widths[:-1]:\n",
+    "        for _ in range(block_depth):\n",
+    "            x = residual_block(x, width)\n",
+    "            skips.append(x)\n",
+    "        x = layers.AveragePooling2D(pool_size=2)(x)\n",
+    "\n",
+    "    for _ in range(block_depth):\n",
+    "        x = residual_block(x, widths[-1])\n",
+    "\n",
+    "    for width in reversed(widths[:-1]):\n",
+    "        x = layers.UpSampling2D(size=2, interpolation=\"bilinear\")(x)\n",
+    "        for _ in range(block_depth):\n",
+    "            x = layers.Concatenate()([x, skips.pop()])\n",
+    "            x = residual_block(x, width)\n",
+    "\n",
+    "    pred_noise_masks = layers.Conv2D(\n",
+    "        3, kernel_size=1, kernel_initializer=\"zeros\"\n",
+    "    )(x)\n",
+    "\n",
+    "    return keras.Model([noisy_images, noise_rates], pred_noise_masks)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### The concept of \"diffusion time\" and \"diffusion schedule\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def diffusion_schedule(\n",
+    "    diffusion_times,\n",
+    "    min_signal_rate=0.02,\n",
+    "    max_signal_rate=0.95,\n",
+    "):\n",
+    "    start_angle = ops.cast(ops.arccos(max_signal_rate), \"float32\")\n",
+    "    end_angle = ops.cast(ops.arccos(min_signal_rate), \"float32\")\n",
+    "    diffusion_angles = start_angle + diffusion_times * (end_angle - start_angle)\n",
+    "    signal_rates = ops.cos(diffusion_angles)\n",
+    "    noise_rates = ops.sin(diffusion_angles)\n",
+    "    return noise_rates, signal_rates"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "diffusion_times = ops.arange(0.0, 1.0, 0.01)\n",
+    "noise_rates, signal_rates = diffusion_schedule(diffusion_times)\n",
+    "\n",
+    "diffusion_times = ops.convert_to_numpy(diffusion_times)\n",
+    "noise_rates = ops.convert_to_numpy(noise_rates)\n",
+    "signal_rates = ops.convert_to_numpy(signal_rates)\n",
+    "\n",
+    "plt.plot(diffusion_times, noise_rates, label=\"Noise rate\")\n",
+    "plt.plot(diffusion_times, signal_rates, label=\"Signal rate\")\n",
+    "\n",
+    "plt.xlabel(\"Diffusion time\")\n",
+    "plt.legend()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### The training process"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "class DiffusionModel(keras.Model):\n",
+    "    def __init__(self, image_size, widths, block_depth, **kwargs):\n",
+    "        super().__init__(**kwargs)\n",
+    "        self.image_size = image_size\n",
+    "        self.denoising_model = get_model(image_size, widths, block_depth)\n",
+    "        self.seed_generator = keras.random.SeedGenerator()\n",
+    "        self.loss = keras.losses.MeanAbsoluteError()\n",
+    "        self.normalizer = keras.layers.Normalization()\n",
+    "\n",
+    "    def denoise(self, noisy_images, noise_rates, signal_rates):\n",
+    "        pred_noise_masks = self.denoising_model([noisy_images, noise_rates])\n",
+    "        pred_images = (\n",
+    "            noisy_images - noise_rates * pred_noise_masks\n",
+    "        ) / signal_rates\n",
+    "        return pred_images, pred_noise_masks\n",
+    "\n",
+    "    def call(self, images):\n",
+    "        images = self.normalizer(images)\n",
+    "        noise_masks = keras.random.normal(\n",
+    "            (batch_size, self.image_size, self.image_size, 3),\n",
+    "            seed=self.seed_generator,\n",
+    "        )\n",
+    "        diffusion_times = keras.random.uniform(\n",
+    "            (batch_size, 1, 1, 1),\n",
+    "            minval=0.0,\n",
+    "            maxval=1.0,\n",
+    "            seed=self.seed_generator,\n",
+    "        )\n",
+    "        noise_rates, signal_rates = diffusion_schedule(diffusion_times)\n",
+    "        noisy_images = signal_rates * images + noise_rates * noise_masks\n",
+    "        pred_images, pred_noise_masks = self.denoise(\n",
+    "            noisy_images, noise_rates, signal_rates\n",
+    "        )\n",
+    "        return pred_images, pred_noise_masks, noise_masks\n",
+    "\n",
+    "    def compute_loss(self, x, y, y_pred, sample_weight=None, training=True):\n",
+    "        _, pred_noise_masks, noise_masks = y_pred\n",
+    "        return self.loss(noise_masks, pred_noise_masks)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### The generation process\n",
+    "\n",
+    "    def generate(self, num_images, diffusion_steps):\n",
+    "        noisy_images = keras.random.normal(\n",
+    "            (num_images, self.image_size, self.image_size, 3),\n",
+    "            seed=self.seed_generator,\n",
+    "        )\n",
+    "        step_size = 1.0 / diffusion_steps\n",
+    "        for step in range(diffusion_steps):\n",
+    "            diffusion_times = ops.ones((num_images, 1, 1, 1)) - step * step_size\n",
+    "            noise_rates, signal_rates = diffusion_schedule(diffusion_times)\n",
+    "            pred_images, pred_noises = self.denoise(\n",
+    "                noisy_images, noise_rates, signal_rates\n",
+    "            )\n",
+    "            next_diffusion_times = diffusion_times - step_size\n",
+    "            next_noise_rates, next_signal_rates = diffusion_schedule(\n",
+    "                next_diffusion_times\n",
+    "            )\n",
+    "            noisy_images = (\n",
+    "                next_signal_rates * pred_images + next_noise_rates * pred_noises\n",
+    "            )\n",
+    "        images = (\n",
+    "            self.normalizer.mean + pred_images * self.normalizer.variance**0.5\n",
+    "        )\n",
+    "        return ops.clip(images, 0.0, 255.0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Visualizing results with a custom callback"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "class VisualizationCallback(keras.callbacks.Callback):\n",
+    "    def __init__(self, diffusion_steps=20, num_rows=3, num_cols=6):\n",
+    "        self.diffusion_steps = diffusion_steps\n",
+    "        self.num_rows = num_rows\n",
+    "        self.num_cols = num_cols\n",
+    "\n",
+    "    def on_epoch_end(self, epoch=None, logs=None):\n",
+    "        generated_images = self.model.generate(\n",
+    "            num_images=self.num_rows * self.num_cols,\n",
+    "            diffusion_steps=self.diffusion_steps,\n",
+    "        )\n",
+    "\n",
+    "        plt.figure(figsize=(self.num_cols * 2.0, self.num_rows * 2.0))\n",
+    "        for row in range(self.num_rows):\n",
+    "            for col in range(self.num_cols):\n",
+    "                i = row * self.num_cols + col\n",
+    "                plt.subplot(self.num_rows, self.num_cols, i + 1)\n",
+    "                img = ops.convert_to_numpy(generated_images[i]).astype(\"uint8\")\n",
+    "                plt.imshow(img)\n",
+    "                plt.axis(\"off\")\n",
+    "        plt.tight_layout()\n",
+    "        plt.show()\n",
+    "        plt.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### It's go time!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = DiffusionModel(image_size, widths=[32, 64, 96, 128], block_depth=2)\n",
+    "model.normalizer.adapt(dataset)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.compile(\n",
+    "    optimizer=keras.optimizers.AdamW(\n",
+    "        learning_rate=keras.optimizers.schedules.InverseTimeDecay(\n",
+    "            initial_learning_rate=1e-3,\n",
+    "            decay_steps=1000,\n",
+    "            decay_rate=0.1,\n",
+    "        ),\n",
+    "        use_ema=True,\n",
+    "        ema_overwrite_frequency=100,\n",
+    "    ),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.fit(\n",
+    "    dataset,\n",
+    "    epochs=100,\n",
+    "    callbacks=[\n",
+    "        VisualizationCallback(),\n",
+    "        keras.callbacks.ModelCheckpoint(\n",
+    "            filepath=\"diffusion_model.weights.h5\",\n",
+    "            save_weights_only=True,\n",
+    "            save_best_only=True,\n",
+    "        ),\n",
+    "    ],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Text-to-image models"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install keras-hub"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import keras_hub\n",
+    "\n",
+    "model = keras_hub.models.TextToImage.from_preset(\n",
+    "    \"stable_diffusion_3_medium\",\n",
+    "    height=1024,\n",
+    "    width=1024,\n",
+    "    dtype=\"float16\",\n",
+    ")\n",
+    "image = model.generate(\n",
+    "    \"photograph of an astronaut riding a horse, detailed, 8k\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### The TextToImage class"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from PIL import Image\n",
+    "\n",
+    "model = keras_hub.models.TextToImage.from_preset(\n",
+    "    \"stable_diffusion_3_medium\",\n",
+    "    image_shape=(512, 512, 3),\n",
+    "    dtype=\"float16\",\n",
+    ")\n",
+    "\n",
+    "prompts = [\n",
+    "    \"A photograph of a cat wearing a top hat, photorealistic\",\n",
+    "    \"A neon sci-fi skyline at night, illustration\",\n",
+    "]\n",
+    "images = model.generate(\n",
+    "    prompts,\n",
+    "    num_steps=25,\n",
+    "    guidance_scale=7.5,\n",
+    ")\n",
+    "\n",
+    "for i, img in enumerate(images):\n",
+    "    pil_image = Image.fromarray(img)\n",
+    "    pil_image.save(f\"generated_image_{i}.png\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Exploring the latent space of a text-to-image model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Latent manifold continuity and latent space walking"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Generating a GIF showing prompt interpolation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import math\n",
+    "\n",
+    "height, width = 512, 512\n",
+    "num_steps = 28\n",
+    "guidance_scale = 7.0\n",
+    "dtype = \"float16\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "backbone = keras_hub.models.Backbone.from_preset(\n",
+    "    \"stable_diffusion_3_medium\", image_shape=(height, width, 3), dtype=dtype\n",
+    ")\n",
+    "preprocessor = (\n",
+    "    keras_hub.models.StableDiffusion3TextToImagePreprocessor.from_preset(\n",
+    "        \"stable_diffusion_3_medium\"\n",
+    "    )\n",
+    ")  # TODO: make this TextToImagePreprocessor or just use tokenizers."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def get_text_embeddings(prompt):\n",
+    "    token_ids = preprocessor.generate_preprocess([prompt])\n",
+    "    negative_token_ids = preprocessor.generate_preprocess([\"\"])\n",
+    "    (\n",
+    "        positive_embeddings,\n",
+    "        negative_embeddings,\n",
+    "        positive_pooled_embeddings,\n",
+    "        negative_pooled_embeddings,\n",
+    "    ) = backbone.encode_text_step(token_ids, negative_token_ids)\n",
+    "    return (\n",
+    "        positive_embeddings,\n",
+    "        negative_embeddings,\n",
+    "        positive_pooled_embeddings,\n",
+    "        negative_pooled_embeddings,\n",
+    "    )\n",
+    "\n",
+    "def decode_to_images(x, height, width):\n",
+    "    x = ops.concatenate(x, axis=0)\n",
+    "    x = ops.reshape(x, (-1, height, width, 3))\n",
+    "    x = ops.clip((x + 1.0) / 2.0, 0.0, 1.0)\n",
+    "    return ops.cast(ops.round(x * 255.0), \"uint8\")\n",
+    "\n",
+    "def generate_with_latents_and_embeddings(\n",
+    "    latents, embeddings, num_steps, guidance_scale\n",
+    "):\n",
+    "    def body_fun(step, latents):\n",
+    "        return backbone.denoise_step(\n",
+    "            latents, embeddings, step, num_steps, guidance_scale\n",
+    "        )\n",
+    "\n",
+    "    latents = ops.fori_loop(0, num_steps, body_fun, latents)\n",
+    "    return backbone.decode_step(latents)\n",
+    "\n",
+    "def export_as_gif(filename, images, frames_per_second=10, no_rubber_band=False):\n",
+    "    if not no_rubber_band:\n",
+    "        images += images[2:-1][::-1]\n",
+    "    images[0].save(\n",
+    "        filename,\n",
+    "        save_all=True,\n",
+    "        append_images=images[1:],\n",
+    "        duration=1000 // frames_per_second,\n",
+    "        loop=0,\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "keras.config.set_backend(\"jax\")\n",
+    "import itertools\n",
+    "import jax\n",
+    "\n",
+    "@jax.jit\n",
+    "def compiled_function(state, *args, **kwargs):\n",
+    "    (trainable_variables, non_trainable_variables) = state\n",
+    "    mapping = itertools.chain(\n",
+    "        zip(backbone.trainable_variables, trainable_variables),\n",
+    "        zip(backbone.non_trainable_variables, non_trainable_variables),\n",
+    "    )\n",
+    "    with keras.StatelessScope(state_mapping=mapping):\n",
+    "        return generate_with_latents_and_embeddings(*args, **kwargs)\n",
+    "\n",
+    "def wrapped_jax_generate_function(*args, **kwargs):\n",
+    "    state = (\n",
+    "        [v.value for v in backbone.trainable_variables],\n",
+    "        [v.value for v in backbone.non_trainable_variables],\n",
+    "    )\n",
+    "    return compiled_function(state, *args, **kwargs)\n",
+    "\n",
+    "generate_function = wrapped_jax_generate_function"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def slerp(v1, v2, num):\n",
+    "    ori_dtype = v1.dtype\n",
+    "    v1 = ops.cast(v1, \"float32\")\n",
+    "    v2 = ops.cast(v2, \"float32\")\n",
+    "\n",
+    "    def interpolation(t, v1, v2, dot_threshold=0.9995):\n",
+    "        norm = ops.linalg.norm(ops.ravel(v1)) * ops.linalg.norm(ops.ravel(v2))\n",
+    "        dot = ops.sum(v1 * v2 / norm)\n",
+    "        if ops.abs(dot) > dot_threshold:\n",
+    "            v2 = (1 - t) * v1 + t * v2\n",
+    "        else:\n",
+    "            theta_0 = ops.arccos(dot)\n",
+    "            sin_theta_0 = ops.sin(theta_0)\n",
+    "            theta_t = theta_0 * t\n",
+    "            sin_theta_t = ops.sin(theta_t)\n",
+    "            s0 = ops.sin(theta_0 - theta_t) / sin_theta_0\n",
+    "            s1 = sin_theta_t / sin_theta_0\n",
+    "            v2 = s0 * v1 + s1 * v2\n",
+    "        return v2\n",
+    "\n",
+    "    t = ops.linspace(0, 1, num)\n",
+    "    interpolated = [interpolation(t[i], v1, v2) for i in range(num)]\n",
+    "    return ops.cast(ops.stack(interpolated), ori_dtype)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "prompt_1 = (\n",
+    "    \"Victorian mechanical butterfly made of brass and clockwork, gears visible \"\n",
+    "    \"through delicate filigree wings\"\n",
+    ")\n",
+    "prompt_2 = (\n",
+    "    \"Bioluminescent butterfly made of flowing plasma and starlight, trailing \"\n",
+    "    \"cosmic dust\"\n",
+    ")\n",
+    "\n",
+    "encoding_1 = get_text_embeddings(prompt_1)\n",
+    "encoding_2 = get_text_embeddings(prompt_2)\n",
+    "pos_emb_1, neg_emb_1, pos_pool_1, neg_pool_1 = encoding_1\n",
+    "pos_emb_2, neg_emb_2, pos_pool_2, neg_pool_2 = encoding_2\n",
+    "\n",
+    "interpolation_steps = 64\n",
+    "batch_size = 4\n",
+    "batches = interpolation_steps // batch_size\n",
+    "\n",
+    "interpolated_pos_emb = slerp(pos_emb_1, pos_emb_2, interpolation_steps)\n",
+    "interpolated_pos_pool = slerp(pos_pool_1, pos_pool_2, interpolation_steps)\n",
+    "\n",
+    "pos_emb_shape = ops.shape(pos_emb_1)\n",
+    "pos_pool_shape = ops.shape(pos_pool_1)\n",
+    "\n",
+    "interpolated_pos_emb = ops.reshape(\n",
+    "    interpolated_pos_emb,\n",
+    "    (batches, batch_size, pos_emb_shape[-2], pos_emb_shape[-1]),\n",
+    ")\n",
+    "interpolated_pos_pool = ops.reshape(\n",
+    "    interpolated_pos_pool, (batches, batch_size, pos_pool_shape[-1])\n",
+    ")\n",
+    "\n",
+    "negative_embeddings = ops.tile(neg_emb_1, (batch_size, 1, 1))\n",
+    "negative_pooled_embeddings = ops.tile(neg_pool_1, (batch_size, 1))\n",
+    "\n",
+    "latents = keras.random.normal((1, height // 8, width // 8, 16), seed=42)\n",
+    "latents = ops.tile(latents, (batch_size, 1, 1, 1))\n",
+    "\n",
+    "images = []\n",
+    "progbar = keras.utils.Progbar(batches)\n",
+    "for i in range(batches):\n",
+    "    images.append(\n",
+    "        generate_function(\n",
+    "            latents,\n",
+    "            (\n",
+    "                interpolated_pos_emb[i],\n",
+    "                negative_embeddings,\n",
+    "                interpolated_pos_pool[i],\n",
+    "                negative_pooled_embeddings,\n",
+    "            ),\n",
+    "            ops.convert_to_tensor(num_steps),\n",
+    "            ops.convert_to_tensor(guidance_scale),\n",
+    "        )\n",
+    "    )\n",
+    "    progbar.update(i + 1, finalize=(i == batches - 1))\n",
+    "\n",
+    "images = ops.convert_to_numpy(decode_to_images(images, height, width))\n",
+    "export_as_gif(\n",
+    "    \"dog_to_cat_64.gif\",\n",
+    "    [Image.fromarray(image) for image in images],\n",
+    "    frames_per_second=2,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### 4-way interpolation to make image grids"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "prompt_1 = \"Pearl white seashell, spiral form, smooth iridescent surface\"\n",
+    "prompt_2 = \"Porcelain teacup, curved handle, glossy white ceramic\"\n",
+    "prompt_3 = \"White orchid flower, delicate petals, curved organic form\"\n",
+    "prompt_4 = \"Paper origami crane, crisp folds, pure white surface\"\n",
+    "\n",
+    "interpolation_steps = 8\n",
+    "batch_size = 4\n",
+    "batches = (interpolation_steps**2) // batch_size\n",
+    "\n",
+    "enc1 = get_text_embeddings(prompt_1)\n",
+    "enc2 = get_text_embeddings(prompt_2)\n",
+    "enc3 = get_text_embeddings(prompt_3)\n",
+    "enc4 = get_text_embeddings(prompt_4)\n",
+    "\n",
+    "pos_emb_1, neg_emb_1, pos_pool_1, neg_pool_1 = enc1\n",
+    "pos_emb_2, neg_emb_2, pos_pool_2, neg_pool_2 = enc2\n",
+    "pos_emb_3, neg_emb_3, pos_pool_3, neg_pool_3 = enc3\n",
+    "pos_emb_4, neg_emb_4, pos_pool_4, neg_pool_4 = enc4\n",
+    "\n",
+    "pos_emb_shape = ops.shape(pos_emb_1)\n",
+    "pos_pool_shape = ops.shape(pos_pool_1)\n",
+    "\n",
+    "interpolated_12_emb = slerp(pos_emb_1, pos_emb_2, interpolation_steps)\n",
+    "interpolated_34_emb = slerp(pos_emb_3, pos_emb_4, interpolation_steps)\n",
+    "interpolated_12_pool = slerp(pos_pool_1, pos_pool_2, interpolation_steps)\n",
+    "interpolated_34_pool = slerp(pos_pool_3, pos_pool_4, interpolation_steps)\n",
+    "\n",
+    "interpolated_pos_emb = slerp(\n",
+    "    interpolated_12_emb, interpolated_34_emb, interpolation_steps\n",
+    ")\n",
+    "interpolated_pos_pool = slerp(\n",
+    "    interpolated_12_pool, interpolated_34_pool, interpolation_steps\n",
+    ")\n",
+    "\n",
+    "interpolated_pos_emb = ops.reshape(\n",
+    "    interpolated_pos_emb,\n",
+    "    (batches, batch_size, pos_emb_shape[-2], pos_emb_shape[-1]),\n",
+    ")\n",
+    "interpolated_pos_pool = ops.reshape(\n",
+    "    interpolated_pos_pool, (batches, batch_size, pos_pool_shape[-1])\n",
+    ")\n",
+    "\n",
+    "negative_embeddings = ops.tile(neg_emb_1, (batch_size, 1, 1))\n",
+    "negative_pooled_embeddings = ops.tile(neg_pool_1, (batch_size, 1))\n",
+    "\n",
+    "latents = keras.random.normal((1, height // 8, width // 8, 16), seed=42)\n",
+    "latents = ops.tile(latents, (batch_size, 1, 1, 1))\n",
+    "\n",
+    "images = []\n",
+    "progbar = keras.utils.Progbar(batches)\n",
+    "for i in range(batches):\n",
+    "    images.append(\n",
+    "        generate_function(\n",
+    "            latents,\n",
+    "            (\n",
+    "                interpolated_pos_emb[i],\n",
+    "                negative_embeddings,\n",
+    "                interpolated_pos_pool[i],\n",
+    "                negative_pooled_embeddings,\n",
+    "            ),\n",
+    "            ops.convert_to_tensor(num_steps),\n",
+    "            ops.convert_to_tensor(guidance_scale),\n",
+    "        )\n",
+    "    )\n",
+    "    progbar.update(i + 1, finalize=(i == batches - 1))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def plot_grid(images, path, grid_size, scale=2):\n",
+    "    fig, axs = plt.subplots(\n",
+    "        grid_size, grid_size, figsize=(grid_size * scale, grid_size * scale)\n",
+    "    )\n",
+    "    fig.tight_layout()\n",
+    "    plt.subplots_adjust(wspace=0, hspace=0)\n",
+    "    plt.axis(\"off\")\n",
+    "    for ax in axs.flat:\n",
+    "        ax.axis(\"off\")\n",
+    "\n",
+    "    for i in range(min(grid_size * grid_size, len(images))):\n",
+    "        ax = axs.flat[i]\n",
+    "        ax.imshow(images[i])\n",
+    "        ax.axis(\"off\")\n",
+    "\n",
+    "    for i in range(len(images), grid_size * grid_size):\n",
+    "        axs.flat[i].axis(\"off\")\n",
+    "        axs.flat[i].remove()\n",
+    "\n",
+    "    plt.savefig(\n",
+    "        fname=path,\n",
+    "        pad_inches=0,\n",
+    "        bbox_inches=\"tight\",\n",
+    "        transparent=False,\n",
+    "        dpi=60,\n",
+    "    )\n",
+    "\n",
+    "images = ops.convert_to_numpy(decode_to_images(images, height, width))\n",
+    "plot_grid(images, \"4-way-interpolation.jpg\", interpolation_steps)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Wrapping up"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "collapsed_sections": [],
+   "name": "chapter17_image-generation",
+   "private_outputs": false,
+   "provenance": [],
+   "toc_visible": true
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/chapter18_best-practices-for-the-real-world.ipynb b/chapter18_best-practices-for-the-real-world.ipynb
new file mode 100644
index 0000000000..6207d849ee
--- /dev/null
+++ b/chapter18_best-practices-for-the-real-world.ipynb
@@ -0,0 +1,801 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install keras-nightly --upgrade -q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.environ[\"KERAS_BACKEND\"] = \"jax\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Getting the most our of your models"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Hyperparameter optimization"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Using KerasTuner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install keras-tuner -q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import keras\n",
+    "from keras import layers\n",
+    "\n",
+    "def build_model(hp):\n",
+    "    units = hp.Int(name=\"units\", min_value=16, max_value=64, step=16)\n",
+    "    model = keras.Sequential(\n",
+    "        [\n",
+    "            layers.Dense(units, activation=\"relu\"),\n",
+    "            layers.Dense(10, activation=\"softmax\"),\n",
+    "        ]\n",
+    "    )\n",
+    "    optimizer = hp.Choice(name=\"optimizer\", values=[\"rmsprop\", \"adam\"])\n",
+    "    model.compile(\n",
+    "        optimizer=optimizer,\n",
+    "        loss=\"sparse_categorical_crossentropy\",\n",
+    "        metrics=[\"accuracy\"],\n",
+    "    )\n",
+    "    return model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import keras_tuner as kt\n",
+    "\n",
+    "class SimpleMLP(kt.HyperModel):\n",
+    "    def __init__(self, num_classes):\n",
+    "        self.num_classes = num_classes\n",
+    "\n",
+    "    def build(self, hp):\n",
+    "        units = hp.Int(name=\"units\", min_value=16, max_value=64, step=16)\n",
+    "        model = keras.Sequential(\n",
+    "            [\n",
+    "                layers.Dense(units, activation=\"relu\"),\n",
+    "                layers.Dense(self.num_classes, activation=\"softmax\"),\n",
+    "            ]\n",
+    "        )\n",
+    "        optimizer = hp.Choice(name=\"optimizer\", values=[\"rmsprop\", \"adam\"])\n",
+    "        model.compile(\n",
+    "            optimizer=optimizer,\n",
+    "            loss=\"sparse_categorical_crossentropy\",\n",
+    "            metrics=[\"accuracy\"],\n",
+    "        )\n",
+    "        return model\n",
+    "\n",
+    "hypermodel = SimpleMLP(num_classes=10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "tuner = kt.BayesianOptimization(\n",
+    "    build_model,\n",
+    "    objective=\"val_accuracy\",\n",
+    "    max_trials=100,\n",
+    "    executions_per_trial=2,\n",
+    "    directory=\"mnist_kt_test\",\n",
+    "    overwrite=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "tuner.search_space_summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()\n",
+    "x_train = x_train.reshape((-1, 28 * 28)).astype(\"float32\") / 255\n",
+    "x_test = x_test.reshape((-1, 28 * 28)).astype(\"float32\") / 255\n",
+    "x_train_full = x_train[:]\n",
+    "y_train_full = y_train[:]\n",
+    "num_val_samples = 10000\n",
+    "x_train, x_val = x_train[:-num_val_samples], x_train[-num_val_samples:]\n",
+    "y_train, y_val = y_train[:-num_val_samples], y_train[-num_val_samples:]\n",
+    "callbacks = [\n",
+    "    keras.callbacks.EarlyStopping(monitor=\"val_loss\", patience=5),\n",
+    "]\n",
+    "tuner.search(\n",
+    "    x_train,\n",
+    "    y_train,\n",
+    "    batch_size=128,\n",
+    "    epochs=100,\n",
+    "    validation_data=(x_val, y_val),\n",
+    "    callbacks=callbacks,\n",
+    "    verbose=2,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "top_n = 4\n",
+    "best_hps = tuner.get_best_hyperparameters(top_n)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def get_best_epoch(hp):\n",
+    "    model = build_model(hp)\n",
+    "    callbacks = [\n",
+    "        keras.callbacks.EarlyStopping(\n",
+    "            monitor=\"val_loss\", mode=\"min\", patience=10\n",
+    "        )\n",
+    "    ]\n",
+    "    history = model.fit(\n",
+    "        x_train,\n",
+    "        y_train,\n",
+    "        validation_data=(x_val, y_val),\n",
+    "        epochs=100,\n",
+    "        batch_size=128,\n",
+    "        callbacks=callbacks,\n",
+    "    )\n",
+    "    val_loss_per_epoch = history.history[\"val_loss\"]\n",
+    "    best_epoch = val_loss_per_epoch.index(min(val_loss_per_epoch)) + 1\n",
+    "    print(f\"Best epoch: {best_epoch}\")\n",
+    "    return best_epoch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def get_best_trained_model(hp):\n",
+    "    best_epoch = get_best_epoch(hp)\n",
+    "    model.fit(\n",
+    "        x_train_full, y_train_full, batch_size=128, epochs=int(best_epoch * 1.2)\n",
+    "    )\n",
+    "    return model\n",
+    "\n",
+    "best_models = []\n",
+    "for hp in best_hps:\n",
+    "    model = get_best_trained_model(hp)\n",
+    "    model.evaluate(x_test, y_test)\n",
+    "    best_models.append(model)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "best_models = tuner.get_best_models(top_n)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### The art of crafting the right search space"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### The future of hyperparameter tuning: automated machine learning"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Model ensembling"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Scaling up model training with multiple devices"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Multi-GPU training"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Data parallelism: replicate your model on each GPU"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Model parallelism: split your model across multiple GPUs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = keras.Sequential(\n",
+    "    [\n",
+    "        keras.layers.Input(shape=(16000,)),\n",
+    "        keras.layers.Dense(64000, activation=\"relu\"),\n",
+    "        keras.layers.Dense(8000, activation=\"sigmoid\"),\n",
+    "    ]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "half_kernel_0 = kernel[:, :32000]\n",
+    "half_bias_0 = bias[:32000]\n",
+    "\n",
+    "half_kernel_1 = kernel[:, 32000:]\n",
+    "half_bias_1 = bias[32000:]\n",
+    "\n",
+    "with keras.device(\"gpu:0\"):\n",
+    "    half_output_0 = keras.ops.matmul(inputs, half_kernel_0) + half_bias_0\n",
+    "\n",
+    "with keras.device(\"gpu:1\"):\n",
+    "    half_output_1 = keras.ops.matmul(inputs, half_kernel_1) + half_bias_1"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Distributed training in practice"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "###### Getting your hands on two or more GPUs"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "###### Using data parallelism with JAX"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "keras.distribution.set_distribution(keras.distribution.DataParallel())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "keras.distribution.list_devices()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "keras.distribution.set_distribution(\n",
+    "    keras.distribution.DataParallel([\"gpu:0\", \"gpu:1\"])\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "###### Using model parallelism with JAX"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "mesh = keras.distribution.DeviceMesh(\n",
+    "    shape=(2, 4),\n",
+    "    axis_names=[\"data\", \"model\"],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "devices = [f\"gpu:{i}\" for i in range(8)]\n",
+    "mesh = keras.distribution.DeviceMesh(\n",
+    "    shape=(2, 4),\n",
+    "    axis_names=[\"data\", \"model\"],\n",
+    "    devices=devices,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "for v in model.variables:\n",
+    "    print(v.path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "sequential/dense/kernel\n",
+    "sequential/dense/bias\n",
+    "sequential/dense_1/kernel\n",
+    "sequential/dense_1/bias"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "layout_map = keras.distribution.LayoutMap(device_mesh)\n",
+    "layout_map[\"sequential/dense/kernel\"] = (None, \"model\")\n",
+    "layout_map[\"sequential/dense/bias\"] = (\"model\",)\n",
+    "layout_map[\"sequential/dense_1/kernel\"] = (None, \"model\")\n",
+    "layout_map[\"sequential/dense_1/bias\"] = (\"model\",)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model_parallel = keras.distribution.ModelParallel(\n",
+    "    layout_map=layout_map,\n",
+    "    batch_dim_name=\"data\",\n",
+    ")\n",
+    "keras.distribution.set_distribution(model_parallel)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.layers[0].kernel.value.sharding"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import jax\n",
+    "\n",
+    "value = model.layers[0].kernel.value\n",
+    "jax.debug.visualize_sharding(value.shape, value.sharding)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### TPU training"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Leveraging step fusing to improve TPU utilization"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Speeding up training and inference with lower-precision computation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Understanding floating-point precision"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Float16 inference"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import keras\n",
+    "\n",
+    "keras.config.set_dtype_policy(\"float16\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Mixed-precision training"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import keras\n",
+    "\n",
+    "keras.config.set_dtype_policy(\"mixed_float16\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Using loss scaling with mixed precision"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "optimizer = keras.optimizers.Adam(learning_rate=1e-3, loss_scale_factor=10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "optimizer = keras.optimizers.LossScaleOptimizer(\n",
+    "    keras.optimizers.Adam(learning_rate=1e-3)\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Beyond mixed precision: float8 training"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Faster inference with quantization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from keras import ops\n",
+    "\n",
+    "x = ops.array([[0.1, 0.9], [1.2, -0.8]])\n",
+    "kernel = ops.array([[-0.1, -2.2], [1.1, 0.7]])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def abs_max_quantize(value):\n",
+    "    abs_max = ops.max(ops.abs(value), keepdims=True)\n",
+    "    scale = ops.divide(127, abs_max + 1e-7)\n",
+    "    scaled_value = value * scale\n",
+    "    scaled_value = ops.clip(ops.round(scaled_value), -127, 127)\n",
+    "    scaled_value = ops.cast(scaled_value, dtype=\"int8\")\n",
+    "    return scaled_value, scale\n",
+    "\n",
+    "int_x, x_scale = abs_max_quantize(x)\n",
+    "int_kernel, kernel_scale = abs_max_quantize(kernel)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "int_y = ops.matmul(int_x, int_kernel)\n",
+    "y = ops.cast(int_y, dtype=\"float32\") / (x_scale * kernel_scale)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "y"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "ops.matmul(x, kernel)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = ...\n",
+    "model.quantize(\"int8\")\n",
+    "predictions = model.predict(...)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Chapter summary"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "collapsed_sections": [],
+   "name": "chapter18_best-practices-for-the-real-world",
+   "private_outputs": false,
+   "provenance": [],
+   "toc_visible": true
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/second_edition/chapter02_mathematical-building-blocks.ipynb b/second_edition/chapter02_mathematical-building-blocks.ipynb
new file mode 100644
index 0000000000..01edc9becc
--- /dev/null
+++ b/second_edition/chapter02_mathematical-building-blocks.ipynb
@@ -0,0 +1,1469 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "This is a companion notebook for the book [Deep Learning with Python, Second Edition](https://www.manning.com/books/deep-learning-with-python-second-edition?a_aid=keras&a_bid=76564dff). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThis notebook was generated for TensorFlow 2.6."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "# The mathematical building blocks of neural networks"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## A first look at a neural network"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Loading the MNIST dataset in Keras**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from tensorflow.keras.datasets import mnist\n",
+    "(train_images, train_labels), (test_images, test_labels) = mnist.load_data()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "train_images.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "len(train_labels)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "train_labels"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "test_images.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "len(test_labels)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "test_labels"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**The network architecture**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from tensorflow import keras\n",
+    "from tensorflow.keras import layers\n",
+    "model = keras.Sequential([\n",
+    "    layers.Dense(512, activation=\"relu\"),\n",
+    "    layers.Dense(10, activation=\"softmax\")\n",
+    "])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**The compilation step**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.compile(optimizer=\"rmsprop\",\n",
+    "              loss=\"sparse_categorical_crossentropy\",\n",
+    "              metrics=[\"accuracy\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Preparing the image data**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "train_images = train_images.reshape((60000, 28 * 28))\n",
+    "train_images = train_images.astype(\"float32\") / 255\n",
+    "test_images = test_images.reshape((10000, 28 * 28))\n",
+    "test_images = test_images.astype(\"float32\") / 255"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**\"Fitting\" the model**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.fit(train_images, train_labels, epochs=5, batch_size=128)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Using the model to make predictions**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "test_digits = test_images[0:10]\n",
+    "predictions = model.predict(test_digits)\n",
+    "predictions[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "predictions[0].argmax()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "predictions[0][7]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "test_labels[0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Evaluating the model on new data**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "test_loss, test_acc = model.evaluate(test_images, test_labels)\n",
+    "print(f\"test_acc: {test_acc}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## Data representations for neural networks"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Scalars (rank-0 tensors)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "x = np.array(12)\n",
+    "x"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "x.ndim"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Vectors (rank-1 tensors)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "x = np.array([12, 3, 6, 14, 7])\n",
+    "x"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "x.ndim"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Matrices (rank-2 tensors)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "x = np.array([[5, 78, 2, 34, 0],\n",
+    "              [6, 79, 3, 35, 1],\n",
+    "              [7, 80, 4, 36, 2]])\n",
+    "x.ndim"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Rank-3 and higher-rank tensors"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "x = np.array([[[5, 78, 2, 34, 0],\n",
+    "               [6, 79, 3, 35, 1],\n",
+    "               [7, 80, 4, 36, 2]],\n",
+    "              [[5, 78, 2, 34, 0],\n",
+    "               [6, 79, 3, 35, 1],\n",
+    "               [7, 80, 4, 36, 2]],\n",
+    "              [[5, 78, 2, 34, 0],\n",
+    "               [6, 79, 3, 35, 1],\n",
+    "               [7, 80, 4, 36, 2]]])\n",
+    "x.ndim"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Key attributes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from tensorflow.keras.datasets import mnist\n",
+    "(train_images, train_labels), (test_images, test_labels) = mnist.load_data()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "train_images.ndim"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "train_images.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "train_images.dtype"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Displaying the fourth digit**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "digit = train_images[4]\n",
+    "plt.imshow(digit, cmap=plt.cm.binary)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "train_labels[4]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Manipulating tensors in NumPy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "my_slice = train_images[10:100]\n",
+    "my_slice.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "my_slice = train_images[10:100, :, :]\n",
+    "my_slice.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "my_slice = train_images[10:100, 0:28, 0:28]\n",
+    "my_slice.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "my_slice = train_images[:, 14:, 14:]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "my_slice = train_images[:, 7:-7, 7:-7]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### The notion of data batches"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "batch = train_images[:128]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "batch = train_images[128:256]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "n = 3\n",
+    "batch = train_images[128 * n:128 * (n + 1)]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Real-world examples of data tensors"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Vector data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Timeseries data or sequence data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Image data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Video data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## The gears of neural networks: tensor operations"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Element-wise operations"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def naive_relu(x):\n",
+    "    assert len(x.shape) == 2\n",
+    "    x = x.copy()\n",
+    "    for i in range(x.shape[0]):\n",
+    "        for j in range(x.shape[1]):\n",
+    "            x[i, j] = max(x[i, j], 0)\n",
+    "    return x"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def naive_add(x, y):\n",
+    "    assert len(x.shape) == 2\n",
+    "    assert x.shape == y.shape\n",
+    "    x = x.copy()\n",
+    "    for i in range(x.shape[0]):\n",
+    "        for j in range(x.shape[1]):\n",
+    "            x[i, j] += y[i, j]\n",
+    "    return x"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "\n",
+    "x = np.random.random((20, 100))\n",
+    "y = np.random.random((20, 100))\n",
+    "\n",
+    "t0 = time.time()\n",
+    "for _ in range(1000):\n",
+    "    z = x + y\n",
+    "    z = np.maximum(z, 0.)\n",
+    "print(\"Took: {0:.2f} s\".format(time.time() - t0))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "t0 = time.time()\n",
+    "for _ in range(1000):\n",
+    "    z = naive_add(x, y)\n",
+    "    z = naive_relu(z)\n",
+    "print(\"Took: {0:.2f} s\".format(time.time() - t0))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Broadcasting"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "X = np.random.random((32, 10))\n",
+    "y = np.random.random((10,))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "y = np.expand_dims(y, axis=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "Y = np.concatenate([y] * 32, axis=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def naive_add_matrix_and_vector(x, y):\n",
+    "    assert len(x.shape) == 2\n",
+    "    assert len(y.shape) == 1\n",
+    "    assert x.shape[1] == y.shape[0]\n",
+    "    x = x.copy()\n",
+    "    for i in range(x.shape[0]):\n",
+    "        for j in range(x.shape[1]):\n",
+    "            x[i, j] += y[j]\n",
+    "    return x"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "x = np.random.random((64, 3, 32, 10))\n",
+    "y = np.random.random((32, 10))\n",
+    "z = np.maximum(x, y)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Tensor product"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "x = np.random.random((32,))\n",
+    "y = np.random.random((32,))\n",
+    "z = np.dot(x, y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def naive_vector_dot(x, y):\n",
+    "    assert len(x.shape) == 1\n",
+    "    assert len(y.shape) == 1\n",
+    "    assert x.shape[0] == y.shape[0]\n",
+    "    z = 0.\n",
+    "    for i in range(x.shape[0]):\n",
+    "        z += x[i] * y[i]\n",
+    "    return z"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def naive_matrix_vector_dot(x, y):\n",
+    "    assert len(x.shape) == 2\n",
+    "    assert len(y.shape) == 1\n",
+    "    assert x.shape[1] == y.shape[0]\n",
+    "    z = np.zeros(x.shape[0])\n",
+    "    for i in range(x.shape[0]):\n",
+    "        for j in range(x.shape[1]):\n",
+    "            z[i] += x[i, j] * y[j]\n",
+    "    return z"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def naive_matrix_vector_dot(x, y):\n",
+    "    z = np.zeros(x.shape[0])\n",
+    "    for i in range(x.shape[0]):\n",
+    "        z[i] = naive_vector_dot(x[i, :], y)\n",
+    "    return z"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def naive_matrix_dot(x, y):\n",
+    "    assert len(x.shape) == 2\n",
+    "    assert len(y.shape) == 2\n",
+    "    assert x.shape[1] == y.shape[0]\n",
+    "    z = np.zeros((x.shape[0], y.shape[1]))\n",
+    "    for i in range(x.shape[0]):\n",
+    "        for j in range(y.shape[1]):\n",
+    "            row_x = x[i, :]\n",
+    "            column_y = y[:, j]\n",
+    "            z[i, j] = naive_vector_dot(row_x, column_y)\n",
+    "    return z"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Tensor reshaping"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "train_images = train_images.reshape((60000, 28 * 28))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "x = np.array([[0., 1.],\n",
+    "             [2., 3.],\n",
+    "             [4., 5.]])\n",
+    "x.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "x = x.reshape((6, 1))\n",
+    "x"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "x = np.zeros((300, 20))\n",
+    "x = np.transpose(x)\n",
+    "x.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Geometric interpretation of tensor operations"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### A geometric interpretation of deep learning"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## The engine of neural networks: gradient-based optimization"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### What's a derivative?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Derivative of a tensor operation: the gradient"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Stochastic gradient descent"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Chaining derivatives: The Backpropagation algorithm"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### The chain rule"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Automatic differentiation with computation graphs"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### The gradient tape in TensorFlow"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf\n",
+    "x = tf.Variable(0.)\n",
+    "with tf.GradientTape() as tape:\n",
+    "    y = 2 * x + 3\n",
+    "grad_of_y_wrt_x = tape.gradient(y, x)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "x = tf.Variable(tf.random.uniform((2, 2)))\n",
+    "with tf.GradientTape() as tape:\n",
+    "    y = 2 * x + 3\n",
+    "grad_of_y_wrt_x = tape.gradient(y, x)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "W = tf.Variable(tf.random.uniform((2, 2)))\n",
+    "b = tf.Variable(tf.zeros((2,)))\n",
+    "x = tf.random.uniform((2, 2))\n",
+    "with tf.GradientTape() as tape:\n",
+    "    y = tf.matmul(x, W) + b\n",
+    "grad_of_y_wrt_W_and_b = tape.gradient(y, [W, b])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## Looking back at our first example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "(train_images, train_labels), (test_images, test_labels) = mnist.load_data()\n",
+    "train_images = train_images.reshape((60000, 28 * 28))\n",
+    "train_images = train_images.astype(\"float32\") / 255\n",
+    "test_images = test_images.reshape((10000, 28 * 28))\n",
+    "test_images = test_images.astype(\"float32\") / 255"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = keras.Sequential([\n",
+    "    layers.Dense(512, activation=\"relu\"),\n",
+    "    layers.Dense(10, activation=\"softmax\")\n",
+    "])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.compile(optimizer=\"rmsprop\",\n",
+    "              loss=\"sparse_categorical_crossentropy\",\n",
+    "              metrics=[\"accuracy\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.fit(train_images, train_labels, epochs=5, batch_size=128)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Reimplementing our first example from scratch in TensorFlow"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### A simple Dense class"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf\n",
+    "\n",
+    "class NaiveDense:\n",
+    "    def __init__(self, input_size, output_size, activation):\n",
+    "        self.activation = activation\n",
+    "\n",
+    "        w_shape = (input_size, output_size)\n",
+    "        w_initial_value = tf.random.uniform(w_shape, minval=0, maxval=1e-1)\n",
+    "        self.W = tf.Variable(w_initial_value)\n",
+    "\n",
+    "        b_shape = (output_size,)\n",
+    "        b_initial_value = tf.zeros(b_shape)\n",
+    "        self.b = tf.Variable(b_initial_value)\n",
+    "\n",
+    "    def __call__(self, inputs):\n",
+    "        return self.activation(tf.matmul(inputs, self.W) + self.b)\n",
+    "\n",
+    "    @property\n",
+    "    def weights(self):\n",
+    "        return [self.W, self.b]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### A simple Sequential class"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "class NaiveSequential:\n",
+    "    def __init__(self, layers):\n",
+    "        self.layers = layers\n",
+    "\n",
+    "    def __call__(self, inputs):\n",
+    "        x = inputs\n",
+    "        for layer in self.layers:\n",
+    "           x = layer(x)\n",
+    "        return x\n",
+    "\n",
+    "    @property\n",
+    "    def weights(self):\n",
+    "       weights = []\n",
+    "       for layer in self.layers:\n",
+    "           weights += layer.weights\n",
+    "       return weights"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = NaiveSequential([\n",
+    "    NaiveDense(input_size=28 * 28, output_size=512, activation=tf.nn.relu),\n",
+    "    NaiveDense(input_size=512, output_size=10, activation=tf.nn.softmax)\n",
+    "])\n",
+    "assert len(model.weights) == 4"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### A batch generator"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import math\n",
+    "\n",
+    "class BatchGenerator:\n",
+    "    def __init__(self, images, labels, batch_size=128):\n",
+    "        assert len(images) == len(labels)\n",
+    "        self.index = 0\n",
+    "        self.images = images\n",
+    "        self.labels = labels\n",
+    "        self.batch_size = batch_size\n",
+    "        self.num_batches = math.ceil(len(images) / batch_size)\n",
+    "\n",
+    "    def next(self):\n",
+    "        images = self.images[self.index : self.index + self.batch_size]\n",
+    "        labels = self.labels[self.index : self.index + self.batch_size]\n",
+    "        self.index += self.batch_size\n",
+    "        return images, labels"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Running one training step"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def one_training_step(model, images_batch, labels_batch):\n",
+    "    with tf.GradientTape() as tape:\n",
+    "        predictions = model(images_batch)\n",
+    "        per_sample_losses = tf.keras.losses.sparse_categorical_crossentropy(\n",
+    "            labels_batch, predictions)\n",
+    "        average_loss = tf.reduce_mean(per_sample_losses)\n",
+    "    gradients = tape.gradient(average_loss, model.weights)\n",
+    "    update_weights(gradients, model.weights)\n",
+    "    return average_loss"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "learning_rate = 1e-3\n",
+    "\n",
+    "def update_weights(gradients, weights):\n",
+    "    for g, w in zip(gradients, weights):\n",
+    "        w.assign_sub(g * learning_rate)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from tensorflow.keras import optimizers\n",
+    "\n",
+    "optimizer = optimizers.SGD(learning_rate=1e-3)\n",
+    "\n",
+    "def update_weights(gradients, weights):\n",
+    "    optimizer.apply_gradients(zip(gradients, weights))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### The full training loop"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def fit(model, images, labels, epochs, batch_size=128):\n",
+    "    for epoch_counter in range(epochs):\n",
+    "        print(f\"Epoch {epoch_counter}\")\n",
+    "        batch_generator = BatchGenerator(images, labels)\n",
+    "        for batch_counter in range(batch_generator.num_batches):\n",
+    "            images_batch, labels_batch = batch_generator.next()\n",
+    "            loss = one_training_step(model, images_batch, labels_batch)\n",
+    "            if batch_counter % 100 == 0:\n",
+    "                print(f\"loss at batch {batch_counter}: {loss:.2f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from tensorflow.keras.datasets import mnist\n",
+    "(train_images, train_labels), (test_images, test_labels) = mnist.load_data()\n",
+    "\n",
+    "train_images = train_images.reshape((60000, 28 * 28))\n",
+    "train_images = train_images.astype(\"float32\") / 255\n",
+    "test_images = test_images.reshape((10000, 28 * 28))\n",
+    "test_images = test_images.astype(\"float32\") / 255\n",
+    "\n",
+    "fit(model, train_images, train_labels, epochs=10, batch_size=128)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Evaluating the model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "predictions = model(test_images)\n",
+    "predictions = predictions.numpy()\n",
+    "predicted_labels = np.argmax(predictions, axis=1)\n",
+    "matches = predicted_labels == test_labels\n",
+    "print(f\"accuracy: {matches.mean():.2f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## Summary"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "collapsed_sections": [],
+   "name": "chapter02_mathematical-building-blocks.i",
+   "private_outputs": false,
+   "provenance": [],
+   "toc_visible": true
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/chapter03_introduction-to-keras-and-tf.ipynb b/second_edition/chapter03_introduction-to-keras-and-tf.ipynb
similarity index 100%
rename from chapter03_introduction-to-keras-and-tf.ipynb
rename to second_edition/chapter03_introduction-to-keras-and-tf.ipynb
diff --git a/second_edition/chapter04_getting-started-with-neural-networks.ipynb b/second_edition/chapter04_getting-started-with-neural-networks.ipynb
new file mode 100644
index 0000000000..ba77a17d45
--- /dev/null
+++ b/second_edition/chapter04_getting-started-with-neural-networks.ipynb
@@ -0,0 +1,1413 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "This is a companion notebook for the book [Deep Learning with Python, Second Edition](https://www.manning.com/books/deep-learning-with-python-second-edition?a_aid=keras&a_bid=76564dff). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThis notebook was generated for TensorFlow 2.6."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "# Getting started with neural networks: Classification and regression"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## Classifying movie reviews: A binary classification example"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### The IMDB dataset"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Loading the IMDB dataset**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from tensorflow.keras.datasets import imdb\n",
+    "(train_data, train_labels), (test_data, test_labels) = imdb.load_data(\n",
+    "    num_words=10000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "train_data[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "train_labels[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "max([max(sequence) for sequence in train_data])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Decoding reviews back to text**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "word_index = imdb.get_word_index()\n",
+    "reverse_word_index = dict(\n",
+    "    [(value, key) for (key, value) in word_index.items()])\n",
+    "decoded_review = \" \".join(\n",
+    "    [reverse_word_index.get(i - 3, \"?\") for i in train_data[0]])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Preparing the data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Encoding the integer sequences via multi-hot encoding**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "def vectorize_sequences(sequences, dimension=10000):\n",
+    "    results = np.zeros((len(sequences), dimension))\n",
+    "    for i, sequence in enumerate(sequences):\n",
+    "        for j in sequence:\n",
+    "            results[i, j] = 1.\n",
+    "    return results\n",
+    "x_train = vectorize_sequences(train_data)\n",
+    "x_test = vectorize_sequences(test_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "x_train[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "y_train = np.asarray(train_labels).astype(\"float32\")\n",
+    "y_test = np.asarray(test_labels).astype(\"float32\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Building your model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Model definition**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from tensorflow import keras\n",
+    "from tensorflow.keras import layers\n",
+    "\n",
+    "model = keras.Sequential([\n",
+    "    layers.Dense(16, activation=\"relu\"),\n",
+    "    layers.Dense(16, activation=\"relu\"),\n",
+    "    layers.Dense(1, activation=\"sigmoid\")\n",
+    "])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Compiling the model**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.compile(optimizer=\"rmsprop\",\n",
+    "              loss=\"binary_crossentropy\",\n",
+    "              metrics=[\"accuracy\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Validating your approach"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Setting aside a validation set**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "x_val = x_train[:10000]\n",
+    "partial_x_train = x_train[10000:]\n",
+    "y_val = y_train[:10000]\n",
+    "partial_y_train = y_train[10000:]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Training your model**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "history = model.fit(partial_x_train,\n",
+    "                    partial_y_train,\n",
+    "                    epochs=20,\n",
+    "                    batch_size=512,\n",
+    "                    validation_data=(x_val, y_val))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "history_dict = history.history\n",
+    "history_dict.keys()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Plotting the training and validation loss**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "history_dict = history.history\n",
+    "loss_values = history_dict[\"loss\"]\n",
+    "val_loss_values = history_dict[\"val_loss\"]\n",
+    "epochs = range(1, len(loss_values) + 1)\n",
+    "plt.plot(epochs, loss_values, \"bo\", label=\"Training loss\")\n",
+    "plt.plot(epochs, val_loss_values, \"b\", label=\"Validation loss\")\n",
+    "plt.title(\"Training and validation loss\")\n",
+    "plt.xlabel(\"Epochs\")\n",
+    "plt.ylabel(\"Loss\")\n",
+    "plt.legend()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Plotting the training and validation accuracy**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "plt.clf()\n",
+    "acc = history_dict[\"accuracy\"]\n",
+    "val_acc = history_dict[\"val_accuracy\"]\n",
+    "plt.plot(epochs, acc, \"bo\", label=\"Training acc\")\n",
+    "plt.plot(epochs, val_acc, \"b\", label=\"Validation acc\")\n",
+    "plt.title(\"Training and validation accuracy\")\n",
+    "plt.xlabel(\"Epochs\")\n",
+    "plt.ylabel(\"Accuracy\")\n",
+    "plt.legend()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Retraining a model from scratch**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = keras.Sequential([\n",
+    "    layers.Dense(16, activation=\"relu\"),\n",
+    "    layers.Dense(16, activation=\"relu\"),\n",
+    "    layers.Dense(1, activation=\"sigmoid\")\n",
+    "])\n",
+    "model.compile(optimizer=\"rmsprop\",\n",
+    "              loss=\"binary_crossentropy\",\n",
+    "              metrics=[\"accuracy\"])\n",
+    "model.fit(x_train, y_train, epochs=4, batch_size=512)\n",
+    "results = model.evaluate(x_test, y_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "results"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Using a trained model to generate predictions on new data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.predict(x_test)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Further experiments"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Wrapping up"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## Classifying newswires: A multiclass classification example"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### The Reuters dataset"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Loading the Reuters dataset**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from tensorflow.keras.datasets import reuters\n",
+    "(train_data, train_labels), (test_data, test_labels) = reuters.load_data(\n",
+    "    num_words=10000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "len(train_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "len(test_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "train_data[10]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Decoding newswires back to text**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "word_index = reuters.get_word_index()\n",
+    "reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])\n",
+    "decoded_newswire = \" \".join([reverse_word_index.get(i - 3, \"?\") for i in\n",
+    "    train_data[0]])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "train_labels[10]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Preparing the data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Encoding the input data**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "x_train = vectorize_sequences(train_data)\n",
+    "x_test = vectorize_sequences(test_data)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Encoding the labels**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def to_one_hot(labels, dimension=46):\n",
+    "    results = np.zeros((len(labels), dimension))\n",
+    "    for i, label in enumerate(labels):\n",
+    "        results[i, label] = 1.\n",
+    "    return results\n",
+    "y_train = to_one_hot(train_labels)\n",
+    "y_test = to_one_hot(test_labels)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from tensorflow.keras.utils import to_categorical\n",
+    "y_train = to_categorical(train_labels)\n",
+    "y_test = to_categorical(test_labels)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Building your model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Model definition**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = keras.Sequential([\n",
+    "    layers.Dense(64, activation=\"relu\"),\n",
+    "    layers.Dense(64, activation=\"relu\"),\n",
+    "    layers.Dense(46, activation=\"softmax\")\n",
+    "])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Compiling the model**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.compile(optimizer=\"rmsprop\",\n",
+    "              loss=\"categorical_crossentropy\",\n",
+    "              metrics=[\"accuracy\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Validating your approach"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Setting aside a validation set**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "x_val = x_train[:1000]\n",
+    "partial_x_train = x_train[1000:]\n",
+    "y_val = y_train[:1000]\n",
+    "partial_y_train = y_train[1000:]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Training the model**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "history = model.fit(partial_x_train,\n",
+    "                    partial_y_train,\n",
+    "                    epochs=20,\n",
+    "                    batch_size=512,\n",
+    "                    validation_data=(x_val, y_val))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Plotting the training and validation loss**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "loss = history.history[\"loss\"]\n",
+    "val_loss = history.history[\"val_loss\"]\n",
+    "epochs = range(1, len(loss) + 1)\n",
+    "plt.plot(epochs, loss, \"bo\", label=\"Training loss\")\n",
+    "plt.plot(epochs, val_loss, \"b\", label=\"Validation loss\")\n",
+    "plt.title(\"Training and validation loss\")\n",
+    "plt.xlabel(\"Epochs\")\n",
+    "plt.ylabel(\"Loss\")\n",
+    "plt.legend()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Plotting the training and validation accuracy**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "plt.clf()\n",
+    "acc = history.history[\"accuracy\"]\n",
+    "val_acc = history.history[\"val_accuracy\"]\n",
+    "plt.plot(epochs, acc, \"bo\", label=\"Training accuracy\")\n",
+    "plt.plot(epochs, val_acc, \"b\", label=\"Validation accuracy\")\n",
+    "plt.title(\"Training and validation accuracy\")\n",
+    "plt.xlabel(\"Epochs\")\n",
+    "plt.ylabel(\"Accuracy\")\n",
+    "plt.legend()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Retraining a model from scratch**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = keras.Sequential([\n",
+    "  layers.Dense(64, activation=\"relu\"),\n",
+    "  layers.Dense(64, activation=\"relu\"),\n",
+    "  layers.Dense(46, activation=\"softmax\")\n",
+    "])\n",
+    "model.compile(optimizer=\"rmsprop\",\n",
+    "              loss=\"categorical_crossentropy\",\n",
+    "              metrics=[\"accuracy\"])\n",
+    "model.fit(x_train,\n",
+    "          y_train,\n",
+    "          epochs=9,\n",
+    "          batch_size=512)\n",
+    "results = model.evaluate(x_test, y_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import copy\n",
+    "test_labels_copy = copy.copy(test_labels)\n",
+    "np.random.shuffle(test_labels_copy)\n",
+    "hits_array = np.array(test_labels) == np.array(test_labels_copy)\n",
+    "hits_array.mean()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Generating predictions on new data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "predictions = model.predict(x_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "predictions[0].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "np.sum(predictions[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "np.argmax(predictions[0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### A different way to handle the labels and the loss"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "y_train = np.array(train_labels)\n",
+    "y_test = np.array(test_labels)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.compile(optimizer=\"rmsprop\",\n",
+    "              loss=\"sparse_categorical_crossentropy\",\n",
+    "              metrics=[\"accuracy\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### The importance of having sufficiently large intermediate layers"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**A model with an information bottleneck**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = keras.Sequential([\n",
+    "    layers.Dense(64, activation=\"relu\"),\n",
+    "    layers.Dense(4, activation=\"relu\"),\n",
+    "    layers.Dense(46, activation=\"softmax\")\n",
+    "])\n",
+    "model.compile(optimizer=\"rmsprop\",\n",
+    "              loss=\"categorical_crossentropy\",\n",
+    "              metrics=[\"accuracy\"])\n",
+    "model.fit(partial_x_train,\n",
+    "          partial_y_train,\n",
+    "          epochs=20,\n",
+    "          batch_size=128,\n",
+    "          validation_data=(x_val, y_val))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Further experiments"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Wrapping up"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## Predicting house prices: A regression example"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### The Boston Housing Price dataset"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Loading the Boston housing dataset**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from tensorflow.keras.datasets import boston_housing\n",
+    "(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "train_data.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "test_data.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "train_targets"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Preparing the data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Normalizing the data**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "mean = train_data.mean(axis=0)\n",
+    "train_data -= mean\n",
+    "std = train_data.std(axis=0)\n",
+    "train_data /= std\n",
+    "test_data -= mean\n",
+    "test_data /= std"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Building your model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Model definition**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def build_model():\n",
+    "    model = keras.Sequential([\n",
+    "        layers.Dense(64, activation=\"relu\"),\n",
+    "        layers.Dense(64, activation=\"relu\"),\n",
+    "        layers.Dense(1)\n",
+    "    ])\n",
+    "    model.compile(optimizer=\"rmsprop\", loss=\"mse\", metrics=[\"mae\"])\n",
+    "    return model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Validating your approach using K-fold validation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**K-fold validation**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "k = 4\n",
+    "num_val_samples = len(train_data) // k\n",
+    "num_epochs = 100\n",
+    "all_scores = []\n",
+    "for i in range(k):\n",
+    "    print(f\"Processing fold #{i}\")\n",
+    "    val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples]\n",
+    "    val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples]\n",
+    "    partial_train_data = np.concatenate(\n",
+    "        [train_data[:i * num_val_samples],\n",
+    "         train_data[(i + 1) * num_val_samples:]],\n",
+    "        axis=0)\n",
+    "    partial_train_targets = np.concatenate(\n",
+    "        [train_targets[:i * num_val_samples],\n",
+    "         train_targets[(i + 1) * num_val_samples:]],\n",
+    "        axis=0)\n",
+    "    model = build_model()\n",
+    "    model.fit(partial_train_data, partial_train_targets,\n",
+    "              epochs=num_epochs, batch_size=16, verbose=0)\n",
+    "    val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)\n",
+    "    all_scores.append(val_mae)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "all_scores"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "np.mean(all_scores)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Saving the validation logs at each fold**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "num_epochs = 500\n",
+    "all_mae_histories = []\n",
+    "for i in range(k):\n",
+    "    print(f\"Processing fold #{i}\")\n",
+    "    val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples]\n",
+    "    val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples]\n",
+    "    partial_train_data = np.concatenate(\n",
+    "        [train_data[:i * num_val_samples],\n",
+    "         train_data[(i + 1) * num_val_samples:]],\n",
+    "        axis=0)\n",
+    "    partial_train_targets = np.concatenate(\n",
+    "        [train_targets[:i * num_val_samples],\n",
+    "         train_targets[(i + 1) * num_val_samples:]],\n",
+    "        axis=0)\n",
+    "    model = build_model()\n",
+    "    history = model.fit(partial_train_data, partial_train_targets,\n",
+    "                        validation_data=(val_data, val_targets),\n",
+    "                        epochs=num_epochs, batch_size=16, verbose=0)\n",
+    "    mae_history = history.history[\"val_mae\"]\n",
+    "    all_mae_histories.append(mae_history)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Building the history of successive mean K-fold validation scores**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "average_mae_history = [\n",
+    "    np.mean([x[i] for x in all_mae_histories]) for i in range(num_epochs)]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Plotting validation scores**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "plt.plot(range(1, len(average_mae_history) + 1), average_mae_history)\n",
+    "plt.xlabel(\"Epochs\")\n",
+    "plt.ylabel(\"Validation MAE\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Plotting validation scores, excluding the first 10 data points**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "truncated_mae_history = average_mae_history[10:]\n",
+    "plt.plot(range(1, len(truncated_mae_history) + 1), truncated_mae_history)\n",
+    "plt.xlabel(\"Epochs\")\n",
+    "plt.ylabel(\"Validation MAE\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Training the final model**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = build_model()\n",
+    "model.fit(train_data, train_targets,\n",
+    "          epochs=130, batch_size=16, verbose=0)\n",
+    "test_mse_score, test_mae_score = model.evaluate(test_data, test_targets)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "test_mae_score"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Generating predictions on new data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "predictions = model.predict(test_data)\n",
+    "predictions[0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Wrapping up"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## Summary"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "collapsed_sections": [],
+   "name": "chapter04_getting-started-with-neural-networks.i",
+   "private_outputs": false,
+   "provenance": [],
+   "toc_visible": true
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/second_edition/chapter05_fundamentals-of-ml.ipynb b/second_edition/chapter05_fundamentals-of-ml.ipynb
new file mode 100644
index 0000000000..dd61f4ead8
--- /dev/null
+++ b/second_edition/chapter05_fundamentals-of-ml.ipynb
@@ -0,0 +1,786 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "This is a companion notebook for the book [Deep Learning with Python, Second Edition](https://www.manning.com/books/deep-learning-with-python-second-edition?a_aid=keras&a_bid=76564dff). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThis notebook was generated for TensorFlow 2.6."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "# Fundamentals of machine learning"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## Generalization: The goal of machine learning"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Underfitting and overfitting"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Noisy training data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Ambiguous features"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Rare features and spurious correlations"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Adding white-noise channels or all-zeros channels to MNIST**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from tensorflow.keras.datasets import mnist\n",
+    "import numpy as np\n",
+    "\n",
+    "(train_images, train_labels), _ = mnist.load_data()\n",
+    "train_images = train_images.reshape((60000, 28 * 28))\n",
+    "train_images = train_images.astype(\"float32\") / 255\n",
+    "\n",
+    "train_images_with_noise_channels = np.concatenate(\n",
+    "    [train_images, np.random.random((len(train_images), 784))], axis=1)\n",
+    "\n",
+    "train_images_with_zeros_channels = np.concatenate(\n",
+    "    [train_images, np.zeros((len(train_images), 784))], axis=1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Training the same model on MNIST data with noise channels or all-zero channels**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from tensorflow import keras\n",
+    "from tensorflow.keras import layers\n",
+    "\n",
+    "def get_model():\n",
+    "    model = keras.Sequential([\n",
+    "        layers.Dense(512, activation=\"relu\"),\n",
+    "        layers.Dense(10, activation=\"softmax\")\n",
+    "    ])\n",
+    "    model.compile(optimizer=\"rmsprop\",\n",
+    "                  loss=\"sparse_categorical_crossentropy\",\n",
+    "                  metrics=[\"accuracy\"])\n",
+    "    return model\n",
+    "\n",
+    "model = get_model()\n",
+    "history_noise = model.fit(\n",
+    "    train_images_with_noise_channels, train_labels,\n",
+    "    epochs=10,\n",
+    "    batch_size=128,\n",
+    "    validation_split=0.2)\n",
+    "\n",
+    "model = get_model()\n",
+    "history_zeros = model.fit(\n",
+    "    train_images_with_zeros_channels, train_labels,\n",
+    "    epochs=10,\n",
+    "    batch_size=128,\n",
+    "    validation_split=0.2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Plotting a validation accuracy comparison**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "val_acc_noise = history_noise.history[\"val_accuracy\"]\n",
+    "val_acc_zeros = history_zeros.history[\"val_accuracy\"]\n",
+    "epochs = range(1, 11)\n",
+    "plt.plot(epochs, val_acc_noise, \"b-\",\n",
+    "         label=\"Validation accuracy with noise channels\")\n",
+    "plt.plot(epochs, val_acc_zeros, \"b--\",\n",
+    "         label=\"Validation accuracy with zeros channels\")\n",
+    "plt.title(\"Effect of noise channels on validation accuracy\")\n",
+    "plt.xlabel(\"Epochs\")\n",
+    "plt.ylabel(\"Accuracy\")\n",
+    "plt.legend()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### The nature of generalization in deep learning"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Fitting a MNIST model with randomly shuffled labels**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "(train_images, train_labels), _ = mnist.load_data()\n",
+    "train_images = train_images.reshape((60000, 28 * 28))\n",
+    "train_images = train_images.astype(\"float32\") / 255\n",
+    "\n",
+    "random_train_labels = train_labels[:]\n",
+    "np.random.shuffle(random_train_labels)\n",
+    "\n",
+    "model = keras.Sequential([\n",
+    "    layers.Dense(512, activation=\"relu\"),\n",
+    "    layers.Dense(10, activation=\"softmax\")\n",
+    "])\n",
+    "model.compile(optimizer=\"rmsprop\",\n",
+    "              loss=\"sparse_categorical_crossentropy\",\n",
+    "              metrics=[\"accuracy\"])\n",
+    "model.fit(train_images, random_train_labels,\n",
+    "          epochs=100,\n",
+    "          batch_size=128,\n",
+    "          validation_split=0.2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### The manifold hypothesis"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Interpolation as a source of generalization"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Why deep learning works"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Training data is paramount"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## Evaluating machine-learning models"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Training, validation, and test sets"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Simple hold-out validation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### K-fold validation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Iterated K-fold validation with shuffling"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Beating a common-sense baseline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Things to keep in mind about model evaluation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## Improving model fit"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Tuning key gradient descent parameters"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Training a MNIST model with an incorrectly high learning rate**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "(train_images, train_labels), _ = mnist.load_data()\n",
+    "train_images = train_images.reshape((60000, 28 * 28))\n",
+    "train_images = train_images.astype(\"float32\") / 255\n",
+    "\n",
+    "model = keras.Sequential([\n",
+    "    layers.Dense(512, activation=\"relu\"),\n",
+    "    layers.Dense(10, activation=\"softmax\")\n",
+    "])\n",
+    "model.compile(optimizer=keras.optimizers.RMSprop(1.),\n",
+    "              loss=\"sparse_categorical_crossentropy\",\n",
+    "              metrics=[\"accuracy\"])\n",
+    "model.fit(train_images, train_labels,\n",
+    "          epochs=10,\n",
+    "          batch_size=128,\n",
+    "          validation_split=0.2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**The same model with a more appropriate learning rate**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = keras.Sequential([\n",
+    "    layers.Dense(512, activation=\"relu\"),\n",
+    "    layers.Dense(10, activation=\"softmax\")\n",
+    "])\n",
+    "model.compile(optimizer=keras.optimizers.RMSprop(1e-2),\n",
+    "              loss=\"sparse_categorical_crossentropy\",\n",
+    "              metrics=[\"accuracy\"])\n",
+    "model.fit(train_images, train_labels,\n",
+    "          epochs=10,\n",
+    "          batch_size=128,\n",
+    "          validation_split=0.2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Leveraging better architecture priors"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Increasing model capacity"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**A simple logistic regression on MNIST**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = keras.Sequential([layers.Dense(10, activation=\"softmax\")])\n",
+    "model.compile(optimizer=\"rmsprop\",\n",
+    "              loss=\"sparse_categorical_crossentropy\",\n",
+    "              metrics=[\"accuracy\"])\n",
+    "history_small_model = model.fit(\n",
+    "    train_images, train_labels,\n",
+    "    epochs=20,\n",
+    "    batch_size=128,\n",
+    "    validation_split=0.2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "val_loss = history_small_model.history[\"val_loss\"]\n",
+    "epochs = range(1, 21)\n",
+    "plt.plot(epochs, val_loss, \"b--\",\n",
+    "         label=\"Validation loss\")\n",
+    "plt.title(\"Effect of insufficient model capacity on validation loss\")\n",
+    "plt.xlabel(\"Epochs\")\n",
+    "plt.ylabel(\"Loss\")\n",
+    "plt.legend()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = keras.Sequential([\n",
+    "    layers.Dense(96, activation=\"relu\"),\n",
+    "    layers.Dense(96, activation=\"relu\"),\n",
+    "    layers.Dense(10, activation=\"softmax\"),\n",
+    "])\n",
+    "model.compile(optimizer=\"rmsprop\",\n",
+    "              loss=\"sparse_categorical_crossentropy\",\n",
+    "              metrics=[\"accuracy\"])\n",
+    "history_large_model = model.fit(\n",
+    "    train_images, train_labels,\n",
+    "    epochs=20,\n",
+    "    batch_size=128,\n",
+    "    validation_split=0.2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## Improving generalization"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Dataset curation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Feature engineering"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Using early stopping"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Regularizing your model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Reducing the network's size"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Original model**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from tensorflow.keras.datasets import imdb\n",
+    "(train_data, train_labels), _ = imdb.load_data(num_words=10000)\n",
+    "\n",
+    "def vectorize_sequences(sequences, dimension=10000):\n",
+    "    results = np.zeros((len(sequences), dimension))\n",
+    "    for i, sequence in enumerate(sequences):\n",
+    "        results[i, sequence] = 1.\n",
+    "    return results\n",
+    "train_data = vectorize_sequences(train_data)\n",
+    "\n",
+    "model = keras.Sequential([\n",
+    "    layers.Dense(16, activation=\"relu\"),\n",
+    "    layers.Dense(16, activation=\"relu\"),\n",
+    "    layers.Dense(1, activation=\"sigmoid\")\n",
+    "])\n",
+    "model.compile(optimizer=\"rmsprop\",\n",
+    "              loss=\"binary_crossentropy\",\n",
+    "              metrics=[\"accuracy\"])\n",
+    "history_original = model.fit(train_data, train_labels,\n",
+    "                             epochs=20, batch_size=512, validation_split=0.4)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Version of the model with lower capacity**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = keras.Sequential([\n",
+    "    layers.Dense(4, activation=\"relu\"),\n",
+    "    layers.Dense(4, activation=\"relu\"),\n",
+    "    layers.Dense(1, activation=\"sigmoid\")\n",
+    "])\n",
+    "model.compile(optimizer=\"rmsprop\",\n",
+    "              loss=\"binary_crossentropy\",\n",
+    "              metrics=[\"accuracy\"])\n",
+    "history_smaller_model = model.fit(\n",
+    "    train_data, train_labels,\n",
+    "    epochs=20, batch_size=512, validation_split=0.4)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Version of the model with higher capacity**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = keras.Sequential([\n",
+    "    layers.Dense(512, activation=\"relu\"),\n",
+    "    layers.Dense(512, activation=\"relu\"),\n",
+    "    layers.Dense(1, activation=\"sigmoid\")\n",
+    "])\n",
+    "model.compile(optimizer=\"rmsprop\",\n",
+    "              loss=\"binary_crossentropy\",\n",
+    "              metrics=[\"accuracy\"])\n",
+    "history_larger_model = model.fit(\n",
+    "    train_data, train_labels,\n",
+    "    epochs=20, batch_size=512, validation_split=0.4)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Adding weight regularization"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Adding L2 weight regularization to the model**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from tensorflow.keras import regularizers\n",
+    "model = keras.Sequential([\n",
+    "    layers.Dense(16,\n",
+    "                 kernel_regularizer=regularizers.l2(0.002),\n",
+    "                 activation=\"relu\"),\n",
+    "    layers.Dense(16,\n",
+    "                 kernel_regularizer=regularizers.l2(0.002),\n",
+    "                 activation=\"relu\"),\n",
+    "    layers.Dense(1, activation=\"sigmoid\")\n",
+    "])\n",
+    "model.compile(optimizer=\"rmsprop\",\n",
+    "              loss=\"binary_crossentropy\",\n",
+    "              metrics=[\"accuracy\"])\n",
+    "history_l2_reg = model.fit(\n",
+    "    train_data, train_labels,\n",
+    "    epochs=20, batch_size=512, validation_split=0.4)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Different weight regularizers available in Keras**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from tensorflow.keras import regularizers\n",
+    "regularizers.l1(0.001)\n",
+    "regularizers.l1_l2(l1=0.001, l2=0.001)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Adding dropout"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Adding dropout to the IMDB model**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = keras.Sequential([\n",
+    "    layers.Dense(16, activation=\"relu\"),\n",
+    "    layers.Dropout(0.5),\n",
+    "    layers.Dense(16, activation=\"relu\"),\n",
+    "    layers.Dropout(0.5),\n",
+    "    layers.Dense(1, activation=\"sigmoid\")\n",
+    "])\n",
+    "model.compile(optimizer=\"rmsprop\",\n",
+    "              loss=\"binary_crossentropy\",\n",
+    "              metrics=[\"accuracy\"])\n",
+    "history_dropout = model.fit(\n",
+    "    train_data, train_labels,\n",
+    "    epochs=20, batch_size=512, validation_split=0.4)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## Summary"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "collapsed_sections": [],
+   "name": "chapter05_fundamentals-of-ml.i",
+   "private_outputs": false,
+   "provenance": [],
+   "toc_visible": true
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/second_edition/chapter07_working-with-keras.ipynb b/second_edition/chapter07_working-with-keras.ipynb
new file mode 100644
index 0000000000..632d7c7e99
--- /dev/null
+++ b/second_edition/chapter07_working-with-keras.ipynb
@@ -0,0 +1,1439 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "This is a companion notebook for the book [Deep Learning with Python, Second Edition](https://www.manning.com/books/deep-learning-with-python-second-edition?a_aid=keras&a_bid=76564dff). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThis notebook was generated for TensorFlow 2.6."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "# Working with Keras: A deep dive"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## A spectrum of workflows"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## Different ways to build Keras models"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### The Sequential model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**The `Sequential` class**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from tensorflow import keras\n",
+    "from tensorflow.keras import layers\n",
+    "\n",
+    "model = keras.Sequential([\n",
+    "    layers.Dense(64, activation=\"relu\"),\n",
+    "    layers.Dense(10, activation=\"softmax\")\n",
+    "])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Incrementally building a Sequential model**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = keras.Sequential()\n",
+    "model.add(layers.Dense(64, activation=\"relu\"))\n",
+    "model.add(layers.Dense(10, activation=\"softmax\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Calling a model for the first time to build it**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.build(input_shape=(None, 3))\n",
+    "model.weights"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**The summary method**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.summary()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Naming models and layers with the `name` argument**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = keras.Sequential(name=\"my_example_model\")\n",
+    "model.add(layers.Dense(64, activation=\"relu\", name=\"my_first_layer\"))\n",
+    "model.add(layers.Dense(10, activation=\"softmax\", name=\"my_last_layer\"))\n",
+    "model.build((None, 3))\n",
+    "model.summary()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Specifying the input shape of your model in advance**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = keras.Sequential()\n",
+    "model.add(keras.Input(shape=(3,)))\n",
+    "model.add(layers.Dense(64, activation=\"relu\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.add(layers.Dense(10, activation=\"softmax\"))\n",
+    "model.summary()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### The Functional API"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### A simple example"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**A simple Functional model with two `Dense` layers**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "inputs = keras.Input(shape=(3,), name=\"my_input\")\n",
+    "features = layers.Dense(64, activation=\"relu\")(inputs)\n",
+    "outputs = layers.Dense(10, activation=\"softmax\")(features)\n",
+    "model = keras.Model(inputs=inputs, outputs=outputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "inputs = keras.Input(shape=(3,), name=\"my_input\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "inputs.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "inputs.dtype"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "features = layers.Dense(64, activation=\"relu\")(inputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "features.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "outputs = layers.Dense(10, activation=\"softmax\")(features)\n",
+    "model = keras.Model(inputs=inputs, outputs=outputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.summary()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Multi-input, multi-output models"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**A multi-input, multi-output Functional model**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "vocabulary_size = 10000\n",
+    "num_tags = 100\n",
+    "num_departments = 4\n",
+    "\n",
+    "title = keras.Input(shape=(vocabulary_size,), name=\"title\")\n",
+    "text_body = keras.Input(shape=(vocabulary_size,), name=\"text_body\")\n",
+    "tags = keras.Input(shape=(num_tags,), name=\"tags\")\n",
+    "\n",
+    "features = layers.Concatenate()([title, text_body, tags])\n",
+    "features = layers.Dense(64, activation=\"relu\")(features)\n",
+    "\n",
+    "priority = layers.Dense(1, activation=\"sigmoid\", name=\"priority\")(features)\n",
+    "department = layers.Dense(\n",
+    "    num_departments, activation=\"softmax\", name=\"department\")(features)\n",
+    "\n",
+    "model = keras.Model(inputs=[title, text_body, tags], outputs=[priority, department])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Training a multi-input, multi-output model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Training a model by providing lists of input & target arrays**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "num_samples = 1280\n",
+    "\n",
+    "title_data = np.random.randint(0, 2, size=(num_samples, vocabulary_size))\n",
+    "text_body_data = np.random.randint(0, 2, size=(num_samples, vocabulary_size))\n",
+    "tags_data = np.random.randint(0, 2, size=(num_samples, num_tags))\n",
+    "\n",
+    "priority_data = np.random.random(size=(num_samples, 1))\n",
+    "department_data = np.random.randint(0, 2, size=(num_samples, num_departments))\n",
+    "\n",
+    "model.compile(optimizer=\"rmsprop\",\n",
+    "              loss=[\"mean_squared_error\", \"categorical_crossentropy\"],\n",
+    "              metrics=[[\"mean_absolute_error\"], [\"accuracy\"]])\n",
+    "model.fit([title_data, text_body_data, tags_data],\n",
+    "          [priority_data, department_data],\n",
+    "          epochs=1)\n",
+    "model.evaluate([title_data, text_body_data, tags_data],\n",
+    "               [priority_data, department_data])\n",
+    "priority_preds, department_preds = model.predict([title_data, text_body_data, tags_data])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Training a model by providing dicts of input & target arrays**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.compile(optimizer=\"rmsprop\",\n",
+    "              loss={\"priority\": \"mean_squared_error\", \"department\": \"categorical_crossentropy\"},\n",
+    "              metrics={\"priority\": [\"mean_absolute_error\"], \"department\": [\"accuracy\"]})\n",
+    "model.fit({\"title\": title_data, \"text_body\": text_body_data, \"tags\": tags_data},\n",
+    "          {\"priority\": priority_data, \"department\": department_data},\n",
+    "          epochs=1)\n",
+    "model.evaluate({\"title\": title_data, \"text_body\": text_body_data, \"tags\": tags_data},\n",
+    "               {\"priority\": priority_data, \"department\": department_data})\n",
+    "priority_preds, department_preds = model.predict(\n",
+    "    {\"title\": title_data, \"text_body\": text_body_data, \"tags\": tags_data})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### The power of the Functional API: Access to layer connectivity"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "keras.utils.plot_model(model, \"ticket_classifier.png\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "keras.utils.plot_model(model, \"ticket_classifier_with_shape_info.png\", show_shapes=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Retrieving the inputs or outputs of a layer in a Functional model**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.layers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.layers[3].input"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.layers[3].output"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Creating a new model by reusing intermediate layer outputs**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "features = model.layers[4].output\n",
+    "difficulty = layers.Dense(3, activation=\"softmax\", name=\"difficulty\")(features)\n",
+    "\n",
+    "new_model = keras.Model(\n",
+    "    inputs=[title, text_body, tags],\n",
+    "    outputs=[priority, department, difficulty])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "keras.utils.plot_model(new_model, \"updated_ticket_classifier.png\", show_shapes=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Subclassing the Model class"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Rewriting our previous example as a subclassed model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**A simple subclassed model**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "class CustomerTicketModel(keras.Model):\n",
+    "\n",
+    "    def __init__(self, num_departments):\n",
+    "        super().__init__()\n",
+    "        self.concat_layer = layers.Concatenate()\n",
+    "        self.mixing_layer = layers.Dense(64, activation=\"relu\")\n",
+    "        self.priority_scorer = layers.Dense(1, activation=\"sigmoid\")\n",
+    "        self.department_classifier = layers.Dense(\n",
+    "            num_departments, activation=\"softmax\")\n",
+    "\n",
+    "    def call(self, inputs):\n",
+    "        title = inputs[\"title\"]\n",
+    "        text_body = inputs[\"text_body\"]\n",
+    "        tags = inputs[\"tags\"]\n",
+    "\n",
+    "        features = self.concat_layer([title, text_body, tags])\n",
+    "        features = self.mixing_layer(features)\n",
+    "        priority = self.priority_scorer(features)\n",
+    "        department = self.department_classifier(features)\n",
+    "        return priority, department"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = CustomerTicketModel(num_departments=4)\n",
+    "\n",
+    "priority, department = model(\n",
+    "    {\"title\": title_data, \"text_body\": text_body_data, \"tags\": tags_data})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.compile(optimizer=\"rmsprop\",\n",
+    "              loss=[\"mean_squared_error\", \"categorical_crossentropy\"],\n",
+    "              metrics=[[\"mean_absolute_error\"], [\"accuracy\"]])\n",
+    "model.fit({\"title\": title_data,\n",
+    "           \"text_body\": text_body_data,\n",
+    "           \"tags\": tags_data},\n",
+    "          [priority_data, department_data],\n",
+    "          epochs=1)\n",
+    "model.evaluate({\"title\": title_data,\n",
+    "                \"text_body\": text_body_data,\n",
+    "                \"tags\": tags_data},\n",
+    "               [priority_data, department_data])\n",
+    "priority_preds, department_preds = model.predict({\"title\": title_data,\n",
+    "                                                  \"text_body\": text_body_data,\n",
+    "                                                  \"tags\": tags_data})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Beware: What subclassed models don't support"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Mixing and matching different components"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Creating a Functional model that includes a subclassed model**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "class Classifier(keras.Model):\n",
+    "\n",
+    "    def __init__(self, num_classes=2):\n",
+    "        super().__init__()\n",
+    "        if num_classes == 2:\n",
+    "            num_units = 1\n",
+    "            activation = \"sigmoid\"\n",
+    "        else:\n",
+    "            num_units = num_classes\n",
+    "            activation = \"softmax\"\n",
+    "        self.dense = layers.Dense(num_units, activation=activation)\n",
+    "\n",
+    "    def call(self, inputs):\n",
+    "        return self.dense(inputs)\n",
+    "\n",
+    "inputs = keras.Input(shape=(3,))\n",
+    "features = layers.Dense(64, activation=\"relu\")(inputs)\n",
+    "outputs = Classifier(num_classes=10)(features)\n",
+    "model = keras.Model(inputs=inputs, outputs=outputs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Creating a subclassed model that includes a Functional model**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "inputs = keras.Input(shape=(64,))\n",
+    "outputs = layers.Dense(1, activation=\"sigmoid\")(inputs)\n",
+    "binary_classifier = keras.Model(inputs=inputs, outputs=outputs)\n",
+    "\n",
+    "class MyModel(keras.Model):\n",
+    "\n",
+    "    def __init__(self, num_classes=2):\n",
+    "        super().__init__()\n",
+    "        self.dense = layers.Dense(64, activation=\"relu\")\n",
+    "        self.classifier = binary_classifier\n",
+    "\n",
+    "    def call(self, inputs):\n",
+    "        features = self.dense(inputs)\n",
+    "        return self.classifier(features)\n",
+    "\n",
+    "model = MyModel()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Remember: Use the right tool for the job"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## Using built-in training and evaluation loops"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**The standard workflow: `compile()`, `fit()`, `evaluate()`, `predict()`**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from tensorflow.keras.datasets import mnist\n",
+    "\n",
+    "def get_mnist_model():\n",
+    "    inputs = keras.Input(shape=(28 * 28,))\n",
+    "    features = layers.Dense(512, activation=\"relu\")(inputs)\n",
+    "    features = layers.Dropout(0.5)(features)\n",
+    "    outputs = layers.Dense(10, activation=\"softmax\")(features)\n",
+    "    model = keras.Model(inputs, outputs)\n",
+    "    return model\n",
+    "\n",
+    "(images, labels), (test_images, test_labels) = mnist.load_data()\n",
+    "images = images.reshape((60000, 28 * 28)).astype(\"float32\") / 255\n",
+    "test_images = test_images.reshape((10000, 28 * 28)).astype(\"float32\") / 255\n",
+    "train_images, val_images = images[10000:], images[:10000]\n",
+    "train_labels, val_labels = labels[10000:], labels[:10000]\n",
+    "\n",
+    "model = get_mnist_model()\n",
+    "model.compile(optimizer=\"rmsprop\",\n",
+    "              loss=\"sparse_categorical_crossentropy\",\n",
+    "              metrics=[\"accuracy\"])\n",
+    "model.fit(train_images, train_labels,\n",
+    "          epochs=3,\n",
+    "          validation_data=(val_images, val_labels))\n",
+    "test_metrics = model.evaluate(test_images, test_labels)\n",
+    "predictions = model.predict(test_images)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Writing your own metrics"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Implementing a custom metric by subclassing the `Metric` class**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf\n",
+    "\n",
+    "class RootMeanSquaredError(keras.metrics.Metric):\n",
+    "\n",
+    "    def __init__(self, name=\"rmse\", **kwargs):\n",
+    "        super().__init__(name=name, **kwargs)\n",
+    "        self.mse_sum = self.add_weight(name=\"mse_sum\", initializer=\"zeros\")\n",
+    "        self.total_samples = self.add_weight(\n",
+    "            name=\"total_samples\", initializer=\"zeros\", dtype=\"int32\")\n",
+    "\n",
+    "    def update_state(self, y_true, y_pred, sample_weight=None):\n",
+    "        y_true = tf.one_hot(y_true, depth=tf.shape(y_pred)[1])\n",
+    "        mse = tf.reduce_sum(tf.square(y_true - y_pred))\n",
+    "        self.mse_sum.assign_add(mse)\n",
+    "        num_samples = tf.shape(y_pred)[0]\n",
+    "        self.total_samples.assign_add(num_samples)\n",
+    "\n",
+    "    def result(self):\n",
+    "        return tf.sqrt(self.mse_sum / tf.cast(self.total_samples, tf.float32))\n",
+    "\n",
+    "    def reset_state(self):\n",
+    "        self.mse_sum.assign(0.)\n",
+    "        self.total_samples.assign(0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = get_mnist_model()\n",
+    "model.compile(optimizer=\"rmsprop\",\n",
+    "              loss=\"sparse_categorical_crossentropy\",\n",
+    "              metrics=[\"accuracy\", RootMeanSquaredError()])\n",
+    "model.fit(train_images, train_labels,\n",
+    "          epochs=3,\n",
+    "          validation_data=(val_images, val_labels))\n",
+    "test_metrics = model.evaluate(test_images, test_labels)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Using callbacks"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### The EarlyStopping and ModelCheckpoint callbacks"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Using the `callbacks` argument in the `fit()` method**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "callbacks_list = [\n",
+    "    keras.callbacks.EarlyStopping(\n",
+    "        monitor=\"val_accuracy\",\n",
+    "        patience=2,\n",
+    "    ),\n",
+    "    keras.callbacks.ModelCheckpoint(\n",
+    "        filepath=\"checkpoint_path.keras\",\n",
+    "        monitor=\"val_loss\",\n",
+    "        save_best_only=True,\n",
+    "    )\n",
+    "]\n",
+    "model = get_mnist_model()\n",
+    "model.compile(optimizer=\"rmsprop\",\n",
+    "              loss=\"sparse_categorical_crossentropy\",\n",
+    "              metrics=[\"accuracy\"])\n",
+    "model.fit(train_images, train_labels,\n",
+    "          epochs=10,\n",
+    "          callbacks=callbacks_list,\n",
+    "          validation_data=(val_images, val_labels))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = keras.models.load_model(\"checkpoint_path.keras\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Writing your own callbacks"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Creating a custom callback by subclassing the `Callback` class**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from matplotlib import pyplot as plt\n",
+    "\n",
+    "class LossHistory(keras.callbacks.Callback):\n",
+    "    def on_train_begin(self, logs):\n",
+    "        self.per_batch_losses = []\n",
+    "\n",
+    "    def on_batch_end(self, batch, logs):\n",
+    "        self.per_batch_losses.append(logs.get(\"loss\"))\n",
+    "\n",
+    "    def on_epoch_end(self, epoch, logs):\n",
+    "        plt.clf()\n",
+    "        plt.plot(range(len(self.per_batch_losses)), self.per_batch_losses,\n",
+    "                 label=\"Training loss for each batch\")\n",
+    "        plt.xlabel(f\"Batch (epoch {epoch})\")\n",
+    "        plt.ylabel(\"Loss\")\n",
+    "        plt.legend()\n",
+    "        plt.savefig(f\"plot_at_epoch_{epoch}\")\n",
+    "        self.per_batch_losses = []"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = get_mnist_model()\n",
+    "model.compile(optimizer=\"rmsprop\",\n",
+    "              loss=\"sparse_categorical_crossentropy\",\n",
+    "              metrics=[\"accuracy\"])\n",
+    "model.fit(train_images, train_labels,\n",
+    "          epochs=10,\n",
+    "          callbacks=[LossHistory()],\n",
+    "          validation_data=(val_images, val_labels))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Monitoring and visualization with TensorBoard"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = get_mnist_model()\n",
+    "model.compile(optimizer=\"rmsprop\",\n",
+    "              loss=\"sparse_categorical_crossentropy\",\n",
+    "              metrics=[\"accuracy\"])\n",
+    "\n",
+    "tensorboard = keras.callbacks.TensorBoard(\n",
+    "    log_dir=\"/full_path_to_your_log_dir\",\n",
+    ")\n",
+    "model.fit(train_images, train_labels,\n",
+    "          epochs=10,\n",
+    "          validation_data=(val_images, val_labels),\n",
+    "          callbacks=[tensorboard])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "%load_ext tensorboard\n",
+    "%tensorboard --logdir /full_path_to_your_log_dir"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## Writing your own training and evaluation loops"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Training versus inference"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Low-level usage of metrics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "metric = keras.metrics.SparseCategoricalAccuracy()\n",
+    "targets = [0, 1, 2]\n",
+    "predictions = [[1, 0, 0], [0, 1, 0], [0, 0, 1]]\n",
+    "metric.update_state(targets, predictions)\n",
+    "current_result = metric.result()\n",
+    "print(f\"result: {current_result:.2f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "values = [0, 1, 2, 3, 4]\n",
+    "mean_tracker = keras.metrics.Mean()\n",
+    "for value in values:\n",
+    "    mean_tracker.update_state(value)\n",
+    "print(f\"Mean of values: {mean_tracker.result():.2f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### A complete training and evaluation loop"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Writing a step-by-step training loop: the training step function**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model = get_mnist_model()\n",
+    "\n",
+    "loss_fn = keras.losses.SparseCategoricalCrossentropy()\n",
+    "optimizer = keras.optimizers.RMSprop()\n",
+    "metrics = [keras.metrics.SparseCategoricalAccuracy()]\n",
+    "loss_tracking_metric = keras.metrics.Mean()\n",
+    "\n",
+    "def train_step(inputs, targets):\n",
+    "    with tf.GradientTape() as tape:\n",
+    "        predictions = model(inputs, training=True)\n",
+    "        loss = loss_fn(targets, predictions)\n",
+    "    gradients = tape.gradient(loss, model.trainable_weights)\n",
+    "    optimizer.apply_gradients(zip(gradients, model.trainable_weights))\n",
+    "\n",
+    "    logs = {}\n",
+    "    for metric in metrics:\n",
+    "        metric.update_state(targets, predictions)\n",
+    "        logs[metric.name] = metric.result()\n",
+    "\n",
+    "    loss_tracking_metric.update_state(loss)\n",
+    "    logs[\"loss\"] = loss_tracking_metric.result()\n",
+    "    return logs"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Writing a step-by-step training loop: resetting the metrics**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def reset_metrics():\n",
+    "    for metric in metrics:\n",
+    "        metric.reset_state()\n",
+    "    loss_tracking_metric.reset_state()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Writing a step-by-step training loop: the loop itself**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "training_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels))\n",
+    "training_dataset = training_dataset.batch(32)\n",
+    "epochs = 3\n",
+    "for epoch in range(epochs):\n",
+    "    reset_metrics()\n",
+    "    for inputs_batch, targets_batch in training_dataset:\n",
+    "        logs = train_step(inputs_batch, targets_batch)\n",
+    "    print(f\"Results at the end of epoch {epoch}\")\n",
+    "    for key, value in logs.items():\n",
+    "        print(f\"...{key}: {value:.4f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Writing a step-by-step evaluation loop**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def test_step(inputs, targets):\n",
+    "    predictions = model(inputs, training=False)\n",
+    "    loss = loss_fn(targets, predictions)\n",
+    "\n",
+    "    logs = {}\n",
+    "    for metric in metrics:\n",
+    "        metric.update_state(targets, predictions)\n",
+    "        logs[\"val_\" + metric.name] = metric.result()\n",
+    "\n",
+    "    loss_tracking_metric.update_state(loss)\n",
+    "    logs[\"val_loss\"] = loss_tracking_metric.result()\n",
+    "    return logs\n",
+    "\n",
+    "val_dataset = tf.data.Dataset.from_tensor_slices((val_images, val_labels))\n",
+    "val_dataset = val_dataset.batch(32)\n",
+    "reset_metrics()\n",
+    "for inputs_batch, targets_batch in val_dataset:\n",
+    "    logs = test_step(inputs_batch, targets_batch)\n",
+    "print(\"Evaluation results:\")\n",
+    "for key, value in logs.items():\n",
+    "    print(f\"...{key}: {value:.4f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Make it fast with tf.function"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Adding a `tf.function` decorator to our evaluation-step function**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "@tf.function\n",
+    "def test_step(inputs, targets):\n",
+    "    predictions = model(inputs, training=False)\n",
+    "    loss = loss_fn(targets, predictions)\n",
+    "\n",
+    "    logs = {}\n",
+    "    for metric in metrics:\n",
+    "        metric.update_state(targets, predictions)\n",
+    "        logs[\"val_\" + metric.name] = metric.result()\n",
+    "\n",
+    "    loss_tracking_metric.update_state(loss)\n",
+    "    logs[\"val_loss\"] = loss_tracking_metric.result()\n",
+    "    return logs\n",
+    "\n",
+    "val_dataset = tf.data.Dataset.from_tensor_slices((val_images, val_labels))\n",
+    "val_dataset = val_dataset.batch(32)\n",
+    "reset_metrics()\n",
+    "for inputs_batch, targets_batch in val_dataset:\n",
+    "    logs = test_step(inputs_batch, targets_batch)\n",
+    "print(\"Evaluation results:\")\n",
+    "for key, value in logs.items():\n",
+    "    print(f\"...{key}: {value:.4f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Leveraging fit() with a custom training loop"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "**Implementing a custom training step to use with `fit()`**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "loss_fn = keras.losses.SparseCategoricalCrossentropy()\n",
+    "loss_tracker = keras.metrics.Mean(name=\"loss\")\n",
+    "\n",
+    "class CustomModel(keras.Model):\n",
+    "    def train_step(self, data):\n",
+    "        inputs, targets = data\n",
+    "        with tf.GradientTape() as tape:\n",
+    "            predictions = self(inputs, training=True)\n",
+    "            loss = loss_fn(targets, predictions)\n",
+    "        gradients = tape.gradient(loss, self.trainable_weights)\n",
+    "        self.optimizer.apply_gradients(zip(gradients, self.trainable_weights))\n",
+    "\n",
+    "        loss_tracker.update_state(loss)\n",
+    "        return {\"loss\": loss_tracker.result()}\n",
+    "\n",
+    "    @property\n",
+    "    def metrics(self):\n",
+    "        return [loss_tracker]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "inputs = keras.Input(shape=(28 * 28,))\n",
+    "features = layers.Dense(512, activation=\"relu\")(inputs)\n",
+    "features = layers.Dropout(0.5)(features)\n",
+    "outputs = layers.Dense(10, activation=\"softmax\")(features)\n",
+    "model = CustomModel(inputs, outputs)\n",
+    "\n",
+    "model.compile(optimizer=keras.optimizers.RMSprop())\n",
+    "model.fit(train_images, train_labels, epochs=3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "class CustomModel(keras.Model):\n",
+    "    def train_step(self, data):\n",
+    "        inputs, targets = data\n",
+    "        with tf.GradientTape() as tape:\n",
+    "            predictions = self(inputs, training=True)\n",
+    "            loss = self.compiled_loss(targets, predictions)\n",
+    "        gradients = tape.gradient(loss, self.trainable_weights)\n",
+    "        self.optimizer.apply_gradients(zip(gradients, self.trainable_weights))\n",
+    "        self.compiled_metrics.update_state(targets, predictions)\n",
+    "        return {m.name: m.result() for m in self.metrics}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "inputs = keras.Input(shape=(28 * 28,))\n",
+    "features = layers.Dense(512, activation=\"relu\")(inputs)\n",
+    "features = layers.Dropout(0.5)(features)\n",
+    "outputs = layers.Dense(10, activation=\"softmax\")(features)\n",
+    "model = CustomModel(inputs, outputs)\n",
+    "\n",
+    "model.compile(optimizer=keras.optimizers.RMSprop(),\n",
+    "              loss=keras.losses.SparseCategoricalCrossentropy(),\n",
+    "              metrics=[keras.metrics.SparseCategoricalAccuracy()])\n",
+    "model.fit(train_images, train_labels, epochs=3)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## Summary"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "collapsed_sections": [],
+   "name": "chapter07_working-with-keras.i",
+   "private_outputs": false,
+   "provenance": [],
+   "toc_visible": true
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/chapter08_intro-to-dl-for-computer-vision.ipynb b/second_edition/chapter08_intro-to-dl-for-computer-vision.ipynb
similarity index 100%
rename from chapter08_intro-to-dl-for-computer-vision.ipynb
rename to second_edition/chapter08_intro-to-dl-for-computer-vision.ipynb
diff --git a/chapter09_part01_image-segmentation.ipynb b/second_edition/chapter09_part01_image-segmentation.ipynb
similarity index 100%
rename from chapter09_part01_image-segmentation.ipynb
rename to second_edition/chapter09_part01_image-segmentation.ipynb
diff --git a/chapter09_part02_modern-convnet-architecture-patterns.ipynb b/second_edition/chapter09_part02_modern-convnet-architecture-patterns.ipynb
similarity index 100%
rename from chapter09_part02_modern-convnet-architecture-patterns.ipynb
rename to second_edition/chapter09_part02_modern-convnet-architecture-patterns.ipynb
diff --git a/chapter09_part03_interpreting-what-convnets-learn.ipynb b/second_edition/chapter09_part03_interpreting-what-convnets-learn.ipynb
similarity index 100%
rename from chapter09_part03_interpreting-what-convnets-learn.ipynb
rename to second_edition/chapter09_part03_interpreting-what-convnets-learn.ipynb
diff --git a/chapter10_dl-for-timeseries.ipynb b/second_edition/chapter10_dl-for-timeseries.ipynb
similarity index 100%
rename from chapter10_dl-for-timeseries.ipynb
rename to second_edition/chapter10_dl-for-timeseries.ipynb
diff --git a/chapter11_part01_introduction.ipynb b/second_edition/chapter11_part01_introduction.ipynb
similarity index 100%
rename from chapter11_part01_introduction.ipynb
rename to second_edition/chapter11_part01_introduction.ipynb
diff --git a/chapter11_part02_sequence-models.ipynb b/second_edition/chapter11_part02_sequence-models.ipynb
similarity index 100%
rename from chapter11_part02_sequence-models.ipynb
rename to second_edition/chapter11_part02_sequence-models.ipynb
diff --git a/chapter11_part03_transformer.ipynb b/second_edition/chapter11_part03_transformer.ipynb
similarity index 100%
rename from chapter11_part03_transformer.ipynb
rename to second_edition/chapter11_part03_transformer.ipynb
diff --git a/chapter11_part04_sequence-to-sequence-learning.ipynb b/second_edition/chapter11_part04_sequence-to-sequence-learning.ipynb
similarity index 100%
rename from chapter11_part04_sequence-to-sequence-learning.ipynb
rename to second_edition/chapter11_part04_sequence-to-sequence-learning.ipynb
diff --git a/chapter12_part01_text-generation.ipynb b/second_edition/chapter12_part01_text-generation.ipynb
similarity index 100%
rename from chapter12_part01_text-generation.ipynb
rename to second_edition/chapter12_part01_text-generation.ipynb
diff --git a/chapter12_part02_deep-dream.ipynb b/second_edition/chapter12_part02_deep-dream.ipynb
similarity index 100%
rename from chapter12_part02_deep-dream.ipynb
rename to second_edition/chapter12_part02_deep-dream.ipynb
diff --git a/chapter12_part03_neural-style-transfer.ipynb b/second_edition/chapter12_part03_neural-style-transfer.ipynb
similarity index 100%
rename from chapter12_part03_neural-style-transfer.ipynb
rename to second_edition/chapter12_part03_neural-style-transfer.ipynb
diff --git a/chapter12_part04_variational-autoencoders.ipynb b/second_edition/chapter12_part04_variational-autoencoders.ipynb
similarity index 100%
rename from chapter12_part04_variational-autoencoders.ipynb
rename to second_edition/chapter12_part04_variational-autoencoders.ipynb
diff --git a/chapter12_part05_gans.ipynb b/second_edition/chapter12_part05_gans.ipynb
similarity index 100%
rename from chapter12_part05_gans.ipynb
rename to second_edition/chapter12_part05_gans.ipynb
diff --git a/chapter13_best-practices-for-the-real-world.ipynb b/second_edition/chapter13_best-practices-for-the-real-world.ipynb
similarity index 100%
rename from chapter13_best-practices-for-the-real-world.ipynb
rename to second_edition/chapter13_best-practices-for-the-real-world.ipynb
diff --git a/chapter14_conclusions.ipynb b/second_edition/chapter14_conclusions.ipynb
similarity index 100%
rename from chapter14_conclusions.ipynb
rename to second_edition/chapter14_conclusions.ipynb

From d569dc381b65d598ddb0499a7542be334ce1f69e Mon Sep 17 00:00:00 2001
From: Matt Watson <mattdangerw@gmail.com>
Date: Sun, 30 Mar 2025 14:58:49 -0700
Subject: [PATCH 07/24] Rename chapters

---
 chapter02_mathematical-building-blocks.ipynb  |   2 +-
 chapter03_introduction-to-ml-frameworks.ipynb |   2 +-
 ...pter04_classification-and-regression.ipynb |   4 +-
 chapter05_fundamentals-of-ml.ipynb            |   2 +-
 ...s.ipynb => chapter07_deep-dive-keras.ipynb |   4 +-
 chapter08_image-classification.ipynb          |  53 ++---
 ...pter09_convnet-architecture-patterns.ipynb |   4 +-
 ...r10_interpreting-what-convnets-learn.ipynb | 167 ++++++++--------
 chapter11_image-segmentation.ipynb            | 188 +++++++++---------
 chapter12_object-detection.ipynb              |   2 +-
 chapter13_timeseries-forecasting.ipynb        |   2 +-
 chapter14_text-classification.ipynb           |  14 +-
 ..._language-models-and-the-transformer.ipynb |  28 +--
 ...s.ipynb => chapter16_text-generation.ipynb |  15 +-
 chapter17_image-generation.ipynb              |  39 +---
 ...18_best-practices-for-the-real-world.ipynb |   2 +-
 16 files changed, 241 insertions(+), 287 deletions(-)
 rename chapter04_getting-started-with-neural-networks.ipynb => chapter04_classification-and-regression.ipynb (99%)
 rename chapter07_working-with-keras.ipynb => chapter07_deep-dive-keras.ipynb (99%)
 rename chapter09_convnet-architecture-best-practices.ipynb => chapter09_convnet-architecture-patterns.ipynb (98%)
 rename chapter10_interpreting-what-vision-models-learn.ipynb => chapter10_interpreting-what-convnets-learn.ipynb (88%)
 rename chapter16_generative-large-language-models.ipynb => chapter16_text-generation.ipynb (98%)

diff --git a/chapter02_mathematical-building-blocks.ipynb b/chapter02_mathematical-building-blocks.ipynb
index 0a507c0a49..de84db9738 100644
--- a/chapter02_mathematical-building-blocks.ipynb
+++ b/chapter02_mathematical-building-blocks.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly --upgrade -q"
+    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
    ]
   },
   {
diff --git a/chapter03_introduction-to-ml-frameworks.ipynb b/chapter03_introduction-to-ml-frameworks.ipynb
index 4eea8270c9..3a20c3e25c 100644
--- a/chapter03_introduction-to-ml-frameworks.ipynb
+++ b/chapter03_introduction-to-ml-frameworks.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly --upgrade -q"
+    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
    ]
   },
   {
diff --git a/chapter04_getting-started-with-neural-networks.ipynb b/chapter04_classification-and-regression.ipynb
similarity index 99%
rename from chapter04_getting-started-with-neural-networks.ipynb
rename to chapter04_classification-and-regression.ipynb
index b91c31d941..8bfd94c95b 100644
--- a/chapter04_getting-started-with-neural-networks.ipynb
+++ b/chapter04_classification-and-regression.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly --upgrade -q"
+    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
    ]
   },
   {
@@ -1252,7 +1252,7 @@
   "accelerator": "GPU",
   "colab": {
    "collapsed_sections": [],
-   "name": "chapter04_getting-started-with-neural-networks",
+   "name": "chapter04_classification-and-regression",
    "private_outputs": false,
    "provenance": [],
    "toc_visible": true
diff --git a/chapter05_fundamentals-of-ml.ipynb b/chapter05_fundamentals-of-ml.ipynb
index 74114d9dda..9e72d01825 100644
--- a/chapter05_fundamentals-of-ml.ipynb
+++ b/chapter05_fundamentals-of-ml.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly --upgrade -q"
+    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
    ]
   },
   {
diff --git a/chapter07_working-with-keras.ipynb b/chapter07_deep-dive-keras.ipynb
similarity index 99%
rename from chapter07_working-with-keras.ipynb
rename to chapter07_deep-dive-keras.ipynb
index dd9f0393c1..04cd33c47c 100644
--- a/chapter07_working-with-keras.ipynb
+++ b/chapter07_deep-dive-keras.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly --upgrade -q"
+    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
    ]
   },
   {
@@ -1841,7 +1841,7 @@
   "accelerator": "GPU",
   "colab": {
    "collapsed_sections": [],
-   "name": "chapter07_working-with-keras",
+   "name": "chapter07_deep-dive-keras",
    "private_outputs": false,
    "provenance": [],
    "toc_visible": true
diff --git a/chapter08_image-classification.ipynb b/chapter08_image-classification.ipynb
index fa8e8ff058..a89cc072bb 100644
--- a/chapter08_image-classification.ipynb
+++ b/chapter08_image-classification.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly --upgrade -q"
+    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
    ]
   },
   {
@@ -564,7 +564,8 @@
     "    for i in range(9):\n",
     "        ax = plt.subplot(3, 3, i + 1)\n",
     "        augmented_image, _ = data_augmentation(image, None)\n",
-    "        plt.imshow(np.array(augmented_image).astype(\"uint8\"))\n",
+    "        augmented_image = keras.ops.convert_to_numpy(augmented_image)\n",
+    "        plt.imshow(augmented_image.astype(\"uint8\"))\n",
     "        plt.axis(\"off\")"
    ]
   },
@@ -663,10 +664,22 @@
    },
    "outputs": [],
    "source": [
-    "conv_base = keras.applications.Xception(\n",
-    "    weights=\"imagenet\",\n",
-    "    include_top=False,\n",
-    "    input_shape=(180, 180, 3),\n",
+    "import keras_hub\n",
+    "\n",
+    "conv_base = keras_hub.models.Backbone.from_preset(\"xception_41_imagenet\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "preprocessor = keras_hub.layers.ImageConverter.from_preset(\n",
+    "    \"xception_41_imagenet\",\n",
+    "    image_size=(180, 180),\n",
     ")"
    ]
   },
@@ -687,14 +700,11 @@
    },
    "outputs": [],
    "source": [
-    "import numpy as np\n",
-    "from keras.applications.xception import preprocess_input\n",
-    "\n",
     "def get_features_and_labels(dataset):\n",
     "    all_features = []\n",
     "    all_labels = []\n",
     "    for images, labels in dataset:\n",
-    "        preprocessed_images = preprocess_input(images)\n",
+    "        preprocessed_images = preprocessor(images)\n",
     "        features = conv_base.predict(preprocessed_images, verbose=0)\n",
     "        all_features.append(features)\n",
     "        all_labels.append(labels)\n",
@@ -809,8 +819,12 @@
    },
    "outputs": [],
    "source": [
-    "conv_base = keras.applications.Xception(weights=\"imagenet\", include_top=False)\n",
-    "conv_base.trainable = False"
+    "import keras_hub\n",
+    "\n",
+    "conv_base = keras_hub.models.Backbone.from_preset(\n",
+    "    \"xception_41_imagenet\",\n",
+    "    trainable=False,\n",
+    ")"
    ]
   },
   {
@@ -846,7 +860,7 @@
    "outputs": [],
    "source": [
     "inputs = keras.Input(shape=(180, 180, 3))\n",
-    "x = preprocess_input(inputs)\n",
+    "x = preprocessor(inputs)\n",
     "x = conv_base(x)\n",
     "x = layers.GlobalAveragePooling2D()(x)\n",
     "x = layers.Dense(256)(x)\n",
@@ -907,19 +921,6 @@
     "#### Fine-tuning a pretrained model"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "conv_base.trainable = True\n",
-    "for layer in conv_base.layers[:-4]:\n",
-    "    layer.trainable = False"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 0,
diff --git a/chapter09_convnet-architecture-best-practices.ipynb b/chapter09_convnet-architecture-patterns.ipynb
similarity index 98%
rename from chapter09_convnet-architecture-best-practices.ipynb
rename to chapter09_convnet-architecture-patterns.ipynb
index 7f114a4d20..97656034a1 100644
--- a/chapter09_convnet-architecture-best-practices.ipynb
+++ b/chapter09_convnet-architecture-patterns.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly --upgrade -q"
+    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
    ]
   },
   {
@@ -328,7 +328,7 @@
   "accelerator": "GPU",
   "colab": {
    "collapsed_sections": [],
-   "name": "chapter09_convnet-architecture-best-practices",
+   "name": "chapter09_convnet-architecture-patterns",
    "private_outputs": false,
    "provenance": [],
    "toc_visible": true
diff --git a/chapter10_interpreting-what-vision-models-learn.ipynb b/chapter10_interpreting-what-convnets-learn.ipynb
similarity index 88%
rename from chapter10_interpreting-what-vision-models-learn.ipynb
rename to chapter10_interpreting-what-convnets-learn.ipynb
index 4cc5cb308e..543491650d 100644
--- a/chapter10_interpreting-what-vision-models-learn.ipynb
+++ b/chapter10_interpreting-what-convnets-learn.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly --upgrade -q"
+    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
    ]
   },
   {
@@ -41,33 +41,6 @@
     "### Visualizing intermediate activations"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "# You can use this to load the file"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "# \"convnet_from_scratch_with_augmentation.keras\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "# you obtained in the last chapter."
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -76,11 +49,11 @@
    },
    "outputs": [],
    "source": [
+    "from google.colab import files\n",
+    "\n",
     "# You can use this to load the file\n",
     "# \"convnet_from_scratch_with_augmentation.keras\"\n",
     "# you obtained in the last chapter.\n",
-    "from google.colab import files\n",
-    "\n",
     "files.upload()"
    ]
   },
@@ -250,9 +223,14 @@
    },
    "outputs": [],
    "source": [
-    "model = keras.applications.xception.Xception(\n",
-    "    weights=\"imagenet\",\n",
-    "    include_top=False,\n",
+    "import keras_hub\n",
+    "\n",
+    "backbone = keras_hub.models.Backbone.from_preset(\n",
+    "    \"xception_41_imagenet\",\n",
+    ")\n",
+    "preprocessor = keras_hub.layers.ImageConverter.from_preset(\n",
+    "    \"xception_41_imagenet\",\n",
+    "    image_size=(180, 180),\n",
     ")"
    ]
   },
@@ -290,9 +268,7 @@
    },
    "outputs": [],
    "source": [
-    "activation = feature_extractor(\n",
-    "    keras.applications.xception.preprocess_input(img_tensor)\n",
-    ")"
+    "activation = feature_extractor(preprocessor(img_tensor))"
    ]
   },
   {
@@ -331,9 +307,10 @@
     "keras.config.set_backend(\"tensorflow\")\n",
     "\n",
     "import keras\n",
+    "import keras_hub\n",
     "from keras import ops\n",
     "\n",
-    "model = keras.applications.Xception(weights=\"imagenet\", include_top=False)\n",
+    "model = keras_hub.models.Backbone.from_preset(\"xception_41_imagenet\")\n",
     "\n",
     "layer_name = \"block3_sepconv1\"\n",
     "layer = model.get_layer(name=layer_name)\n",
@@ -383,10 +360,18 @@
    },
    "outputs": [],
    "source": [
-    "keras.config.set_backend(\"tensorflow\")\n",
+    "keras.config.set_backend(\"torch\")\n",
     "\n",
+    "import keras\n",
+    "import keras_hub\n",
     "from keras import ops\n",
     "\n",
+    "model = keras_hub.models.Backbone.from_preset(\"xception_41_imagenet\")\n",
+    "\n",
+    "layer_name = \"block3_sepconv1\"\n",
+    "layer = model.get_layer(name=layer_name)\n",
+    "feature_extractor = keras.Model(inputs=model.input, outputs=layer.output)\n",
+    "\n",
     "def compute_loss(image, filter_index):\n",
     "    activation = feature_extractor(image)\n",
     "    filter_activation = activation[:, 2:-2, 2:-2, filter_index]\n",
@@ -433,9 +418,10 @@
     "keras.config.set_backend(\"jax\")\n",
     "\n",
     "import keras\n",
+    "import keras_hub\n",
     "from keras import ops\n",
     "\n",
-    "model = keras.applications.Xception(weights=\"imagenet\", include_top=False)\n",
+    "model = keras_hub.models.Backbone.from_preset(\"xception_41_imagenet\")\n",
     "\n",
     "layer_name = \"block3_sepconv1\"\n",
     "layer = model.get_layer(name=layer_name)\n",
@@ -584,15 +570,8 @@
     "    fname=\"elephant.jpg\",\n",
     "    origin=\"https://img-datasets.s3.amazonaws.com/elephant.jpg\",\n",
     ")\n",
-    "\n",
-    "def get_img_array(img_path, target_size):\n",
-    "    img = keras.utils.load_img(img_path, target_size=target_size)\n",
-    "    array = keras.utils.img_to_array(img)\n",
-    "    array = np.expand_dims(array, axis=0)\n",
-    "    array = keras.applications.xception.preprocess_input(array)\n",
-    "    return array\n",
-    "\n",
-    "img_array = get_img_array(img_path, target_size=(299, 299))"
+    "img = keras.utils.load_img(img_path)\n",
+    "img_array = np.expand_dims(img, axis=0)"
    ]
   },
   {
@@ -603,9 +582,23 @@
    },
    "outputs": [],
    "source": [
-    "model = keras.applications.Xception(weights=\"imagenet\")\n",
+    "model = keras_hub.models.ImageClassifier.from_preset(\n",
+    "   \"xception_41_imagenet\",\n",
+    "   activation=\"softmax\",\n",
+    ")\n",
     "preds = model.predict(img_array)\n",
-    "print(keras.applications.xception.decode_predictions(preds, top=3)[0])"
+    "preds.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "keras_hub.utils.decode_imagenet_predictions(preds[0])"
    ]
   },
   {
@@ -619,6 +612,17 @@
     "np.argmax(preds[0])"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "img_array = model.preprocessor(img_array)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -628,11 +632,7 @@
    "outputs": [],
    "source": [
     "last_conv_layer_name = \"block14_sepconv2_act\"\n",
-    "classifier_layer_names = [\n",
-    "    \"avg_pool\",\n",
-    "    \"predictions\",\n",
-    "]\n",
-    "last_conv_layer = model.get_layer(last_conv_layer_name)\n",
+    "last_conv_layer = model.backbone.get_layer(last_conv_layer_name)\n",
     "last_conv_layer_model = keras.Model(model.inputs, last_conv_layer.output)"
    ]
   },
@@ -644,9 +644,9 @@
    },
    "outputs": [],
    "source": [
-    "classifier_input = keras.Input(shape=last_conv_layer.output.shape[1:])\n",
+    "classifier_input = last_conv_layer.output\n",
     "x = classifier_input\n",
-    "for layer_name in classifier_layer_names:\n",
+    "for layer_name in [\"pooler\", \"predictions\"]:\n",
     "    x = model.get_layer(layer_name)(x)\n",
     "classifier_model = keras.Model(classifier_input, x)"
    ]
@@ -669,22 +669,23 @@
    "outputs": [],
    "source": [
     "keras.config.set_backend(\"tensorflow\")\n",
+    "\n",
     "import keras\n",
+    "import keras_hub\n",
     "from keras import ops\n",
     "\n",
-    "model = keras.applications.Xception(weights=\"imagenet\")\n",
+    "model = keras_hub.models.ImageClassifier.from_preset(\n",
+    "    \"xception_41_imagenet\",\n",
+    "    activation=\"softmax\",\n",
+    ")\n",
     "\n",
     "last_conv_layer_name = \"block14_sepconv2_act\"\n",
-    "classifier_layer_names = [\n",
-    "    \"avg_pool\",\n",
-    "    \"predictions\",\n",
-    "]\n",
-    "last_conv_layer = model.get_layer(last_conv_layer_name)\n",
+    "last_conv_layer = model.backbone.get_layer(last_conv_layer_name)\n",
     "last_conv_layer_model = keras.Model(model.inputs, last_conv_layer.output)\n",
     "\n",
-    "classifier_input = keras.Input(shape=last_conv_layer.output.shape[1:])\n",
+    "classifier_input = last_conv_layer.output\n",
     "x = classifier_input\n",
-    "for layer_name in classifier_layer_names:\n",
+    "for layer_name in [\"pooler\", \"predictions\"]:\n",
     "    x = model.get_layer(layer_name)(x)\n",
     "classifier_model = keras.Model(classifier_input, x)"
    ]
@@ -733,22 +734,23 @@
    "outputs": [],
    "source": [
     "keras.config.set_backend(\"torch\")\n",
+    "\n",
     "import keras\n",
+    "import keras_hub\n",
     "from keras import ops\n",
     "\n",
-    "model = keras.applications.Xception(weights=\"imagenet\")\n",
+    "model = keras_hub.models.ImageClassifier.from_preset(\n",
+    "    \"xception_41_imagenet\",\n",
+    "    activation=\"softmax\",\n",
+    ")\n",
     "\n",
     "last_conv_layer_name = \"block14_sepconv2_act\"\n",
-    "classifier_layer_names = [\n",
-    "    \"avg_pool\",\n",
-    "    \"predictions\",\n",
-    "]\n",
-    "last_conv_layer = model.get_layer(last_conv_layer_name)\n",
+    "last_conv_layer = model.backbone.get_layer(last_conv_layer_name)\n",
     "last_conv_layer_model = keras.Model(model.inputs, last_conv_layer.output)\n",
     "\n",
-    "classifier_input = keras.Input(shape=last_conv_layer.output.shape[1:])\n",
+    "classifier_input = last_conv_layer.output\n",
     "x = classifier_input\n",
-    "for layer_name in classifier_layer_names:\n",
+    "for layer_name in [\"pooler\", \"predictions\"]:\n",
     "    x = model.get_layer(layer_name)(x)\n",
     "classifier_model = keras.Model(classifier_input, x)"
    ]
@@ -796,22 +798,23 @@
    "outputs": [],
    "source": [
     "keras.config.set_backend(\"jax\")\n",
+    "\n",
     "import keras\n",
+    "import keras_hub\n",
     "from keras import ops\n",
     "\n",
-    "model = keras.applications.Xception(weights=\"imagenet\")\n",
+    "model = keras_hub.models.ImageClassifier.from_preset(\n",
+    "    \"xception_41_imagenet\",\n",
+    "    activation=\"softmax\",\n",
+    ")\n",
     "\n",
     "last_conv_layer_name = \"block14_sepconv2_act\"\n",
-    "classifier_layer_names = [\n",
-    "    \"avg_pool\",\n",
-    "    \"predictions\",\n",
-    "]\n",
-    "last_conv_layer = model.get_layer(last_conv_layer_name)\n",
+    "last_conv_layer = model.backbone.get_layer(last_conv_layer_name)\n",
     "last_conv_layer_model = keras.Model(model.inputs, last_conv_layer.output)\n",
     "\n",
-    "classifier_input = keras.Input(shape=last_conv_layer.output.shape[1:])\n",
+    "classifier_input = last_conv_layer.output\n",
     "x = classifier_input\n",
-    "for layer_name in classifier_layer_names:\n",
+    "for layer_name in [\"pooler\", \"predictions\"]:\n",
     "    x = model.get_layer(layer_name)(x)\n",
     "classifier_model = keras.Model(classifier_input, x)"
    ]
@@ -934,7 +937,7 @@
   "accelerator": "GPU",
   "colab": {
    "collapsed_sections": [],
-   "name": "chapter10_interpreting-what-vision-models-learn",
+   "name": "chapter10_interpreting-what-convnets-learn",
    "private_outputs": false,
    "provenance": [],
    "toc_visible": true
diff --git a/chapter11_image-segmentation.ipynb b/chapter11_image-segmentation.ipynb
index 315c7b5f5c..cf44c2f249 100644
--- a/chapter11_image-segmentation.ipynb
+++ b/chapter11_image-segmentation.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly --upgrade -q"
+    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
    ]
   },
   {
@@ -38,7 +38,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### A universe of computer vision tasks"
+    "### Computer vision tasks"
    ]
   },
   {
@@ -47,7 +47,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Image segmentation"
+    "#### Types of image segmentation"
    ]
   },
   {
@@ -56,7 +56,16 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Training a small semantic segmentation model from scratch"
+    "### Training a segmentation model from scratch"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Downloading a segmentation dataset"
    ]
   },
   {
@@ -81,25 +90,13 @@
    },
    "outputs": [],
    "source": [
-    "import os\n",
+    "import pathlib\n",
     "\n",
-    "input_dir = \"images/\"\n",
-    "target_dir = \"annotations/trimaps/\"\n",
+    "input_dir = pathlib.Path(\"images\")\n",
+    "target_dir = pathlib.Path(\"annotations/trimaps\")\n",
     "\n",
-    "input_img_paths = sorted(\n",
-    "    [\n",
-    "        os.path.join(input_dir, fname)\n",
-    "        for fname in os.listdir(input_dir)\n",
-    "        if fname.endswith(\".jpg\")\n",
-    "    ]\n",
-    ")\n",
-    "target_paths = sorted(\n",
-    "    [\n",
-    "        os.path.join(target_dir, fname)\n",
-    "        for fname in os.listdir(target_dir)\n",
-    "        if fname.endswith(\".png\") and not fname.startswith(\".\")\n",
-    "    ]\n",
-    ")"
+    "input_img_paths = sorted(input_dir.glob(\"*.jpg\"))\n",
+    "target_paths = sorted(target_dir.glob(\"[!.]*.png\"))"
    ]
   },
   {
@@ -111,7 +108,7 @@
    "outputs": [],
    "source": [
     "import matplotlib.pyplot as plt\n",
-    "from keras.utils import load_img, img_to_array\n",
+    "from keras.utils import load_img, img_to_array, array_to_img\n",
     "\n",
     "plt.axis(\"off\")\n",
     "plt.imshow(load_img(input_img_paths[9]))"
@@ -165,8 +162,17 @@
     "targets = np.zeros((num_imgs,) + img_size + (1,), dtype=\"uint8\")\n",
     "for i in range(num_imgs):\n",
     "    input_imgs[i] = path_to_input_image(input_img_paths[i])\n",
-    "    targets[i] = path_to_target(target_paths[i])\n",
-    "\n",
+    "    targets[i] = path_to_target(target_paths[i])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
     "num_val_samples = 1000\n",
     "train_input_imgs = input_imgs[:-num_val_samples]\n",
     "train_targets = targets[:-num_val_samples]\n",
@@ -174,6 +180,15 @@
     "val_targets = targets[-num_val_samples:]"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Building and training the segmentation model"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -210,17 +225,6 @@
     "model = get_model(img_size=img_size, num_classes=3)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "model.summary(line_length=80)"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -293,8 +297,6 @@
    },
    "outputs": [],
    "source": [
-    "from keras.utils import array_to_img\n",
-    "\n",
     "model = keras.models.load_model(\"oxford_segmentation.keras\")\n",
     "\n",
     "i = 4\n",
@@ -319,7 +321,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Image segmentation with a pretrained model: Segment Anything"
+    "### Using a pretrained segmentation model"
    ]
   },
   {
@@ -328,7 +330,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### Prepare a test image"
+    "#### Downloading the model"
    ]
   },
   {
@@ -339,15 +341,9 @@
    },
    "outputs": [],
    "source": [
-    "path = keras.utils.get_file(\n",
-    "    origin=\"https://s3.amazonaws.com/keras.io/img/book/fruits.jpg\"\n",
-    ")\n",
-    "pil_image = keras.utils.load_img(path)\n",
-    "image_array = keras.utils.img_to_array(pil_image)\n",
+    "import keras_hub\n",
     "\n",
-    "plt.imshow(image_array.astype(\"uint8\"))\n",
-    "plt.axis(\"off\")\n",
-    "plt.show()"
+    "model = keras_hub.models.ImageSegmenter.from_preset(\"sam_huge_sa1b\")"
    ]
   },
   {
@@ -358,14 +354,7 @@
    },
    "outputs": [],
    "source": [
-    "from keras import ops\n",
-    "\n",
-    "image_size = (1024, 1024)\n",
-    "\n",
-    "def resize_and_pad(x):\n",
-    "    return ops.image.resize(x, image_size, pad_to_aspect_ratio=True)\n",
-    "\n",
-    "image = resize_and_pad(image_array)"
+    "model.count_params()"
    ]
   },
   {
@@ -374,7 +363,16 @@
     "colab_type": "text"
    },
    "source": [
-    "##### Get the pretrained SAM model"
+    "#### How Segment Anything works"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Prepare a test image"
    ]
   },
   {
@@ -385,9 +383,33 @@
    },
    "outputs": [],
    "source": [
-    "import keras_hub\n",
+    "path = keras.utils.get_file(\n",
+    "    origin=\"https://s3.amazonaws.com/keras.io/img/book/fruits.jpg\"\n",
+    ")\n",
+    "pil_image = keras.utils.load_img(path)\n",
+    "image_array = keras.utils.img_to_array(pil_image)\n",
     "\n",
-    "model = keras_hub.models.SAMImageSegmenter.from_preset(\"sam_huge_sa1b\")"
+    "plt.imshow(image_array.astype(\"uint8\"))\n",
+    "plt.axis(\"off\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "from keras import ops\n",
+    "\n",
+    "image_size = (1024, 1024)\n",
+    "\n",
+    "def resize_and_pad(x):\n",
+    "    return ops.image.resize(x, image_size, pad_to_aspect_ratio=True)\n",
+    "\n",
+    "image = resize_and_pad(image_array)"
    ]
   },
   {
@@ -410,33 +432,15 @@
     "    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)\n",
     "    ax.imshow(mask_image)\n",
     "\n",
-    "def show_points(coords, labels, ax, marker_size=375):\n",
-    "    pos_points = coords[labels == 1]\n",
-    "    neg_points = coords[labels == 0]\n",
-    "    ax.scatter(\n",
-    "        pos_points[:, 0],\n",
-    "        pos_points[:, 1],\n",
-    "        color=\"green\",\n",
-    "        marker=\"*\",\n",
-    "        s=marker_size,\n",
-    "        edgecolor=\"white\",\n",
-    "        linewidth=1.25,\n",
-    "    )\n",
-    "    ax.scatter(\n",
-    "        neg_points[:, 0],\n",
-    "        neg_points[:, 1],\n",
-    "        color=\"red\",\n",
-    "        marker=\"*\",\n",
-    "        s=marker_size,\n",
-    "        edgecolor=\"white\",\n",
-    "        linewidth=1.25,\n",
-    "    )\n",
+    "def show_points(points, ax):\n",
+    "    x, y = points[:, 0], points[:, 1]\n",
+    "    ax.scatter(x, y, c=\"green\", marker=\"*\", s=375, ec=\"white\", lw=1.25)\n",
     "\n",
     "def show_box(box, ax):\n",
     "    box = box.reshape(-1)\n",
     "    x0, y0 = box[0], box[1]\n",
     "    w, h = box[2] - box[0], box[3] - box[1]\n",
-    "    ax.add_patch(plt.Rectangle((x0, y0), w, h, ec=\"green\", fc=\"black\", lw=2))"
+    "    ax.add_patch(plt.Rectangle((x0, y0), w, h, ec=\"red\", fc=\"none\", lw=2))"
    ]
   },
   {
@@ -445,7 +449,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### Prompting SAM with a target point"
+    "#### Prompting SAM with a target point"
    ]
   },
   {
@@ -463,7 +467,7 @@
     "\n",
     "plt.figure(figsize=(10, 10))\n",
     "show_image(image, plt.gca())\n",
-    "show_points(input_point, input_label, plt.gca())\n",
+    "show_points(input_point, plt.gca())\n",
     "plt.show()"
    ]
   },
@@ -477,9 +481,9 @@
    "source": [
     "outputs = model.predict(\n",
     "    {\n",
-    "        \"images\": ops.expand_dims(image, axis=0),\n",
-    "        \"points\": ops.expand_dims(input_point, axis=0),\n",
-    "        \"labels\": ops.expand_dims(input_label, axis=0),\n",
+    "        \"images\": np.expand_dims(image, axis=0),\n",
+    "        \"points\": np.expand_dims(input_point, axis=0),\n",
+    "        \"labels\": np.expand_dims(input_label, axis=0),\n",
     "    }\n",
     ")"
    ]
@@ -514,7 +518,7 @@
     "plt.figure(figsize=(10, 10))\n",
     "show_image(image, plt.gca())\n",
     "show_mask(mask, plt.gca())\n",
-    "show_points(input_point, input_label, plt.gca())\n",
+    "show_points(input_point, plt.gca())\n",
     "plt.show()"
    ]
   },
@@ -531,9 +535,9 @@
     "\n",
     "outputs = model.predict(\n",
     "    {\n",
-    "        \"images\": ops.expand_dims(image, axis=0),\n",
-    "        \"points\": ops.expand_dims(input_point, axis=0),\n",
-    "        \"labels\": ops.expand_dims(input_label, axis=0),\n",
+    "        \"images\": np.expand_dims(image, axis=0),\n",
+    "        \"points\": np.expand_dims(input_point, axis=0),\n",
+    "        \"labels\": np.expand_dims(input_label, axis=0),\n",
     "    }\n",
     ")\n",
     "mask = get_mask(outputs, index=0)\n",
@@ -541,7 +545,7 @@
     "plt.figure(figsize=(10, 10))\n",
     "show_image(image, plt.gca())\n",
     "show_mask(mask, plt.gca())\n",
-    "show_points(input_point, input_label, plt.gca())\n",
+    "show_points(input_point, plt.gca())\n",
     "plt.show()"
    ]
   },
@@ -557,7 +561,7 @@
     "masks = outputs[\"masks\"][0][1:]\n",
     "for i, mask in enumerate(masks):\n",
     "    show_image(image, axes[i])\n",
-    "    show_points(input_point, input_label, axes[i])\n",
+    "    show_points(input_point, axes[i])\n",
     "    mask = get_mask(outputs, index=i + 1)\n",
     "    show_mask(mask, axes[i])\n",
     "    axes[i].set_title(f\"Mask {i + 1}\", fontsize=16)\n",
@@ -571,7 +575,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### Prompting SAM with a target box"
+    "#### Prompting SAM with a target box"
    ]
   },
   {
diff --git a/chapter12_object-detection.ipynb b/chapter12_object-detection.ipynb
index c97b75f870..dd46522804 100644
--- a/chapter12_object-detection.ipynb
+++ b/chapter12_object-detection.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly --upgrade -q"
+    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
    ]
   },
   {
diff --git a/chapter13_timeseries-forecasting.ipynb b/chapter13_timeseries-forecasting.ipynb
index d62f3b635e..7678ce6ec1 100644
--- a/chapter13_timeseries-forecasting.ipynb
+++ b/chapter13_timeseries-forecasting.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly --upgrade -q"
+    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
    ]
   },
   {
diff --git a/chapter14_text-classification.ipynb b/chapter14_text-classification.ipynb
index 5950f1c4df..f9de93c622 100644
--- a/chapter14_text-classification.ipynb
+++ b/chapter14_text-classification.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly --upgrade -q"
+    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
    ]
   },
   {
@@ -678,14 +678,12 @@
    },
    "outputs": [],
    "source": [
+    "from keras.utils import text_dataset_from_directory\n",
+    "\n",
     "batch_size = 32\n",
-    "train_ds = keras.utils.text_dataset_from_directory(\n",
-    "    train_dir, batch_size=batch_size\n",
-    ")\n",
-    "val_ds = keras.utils.text_dataset_from_directory(val_dir, batch_size=batch_size)\n",
-    "test_ds = keras.utils.text_dataset_from_directory(\n",
-    "    test_dir, batch_size=batch_size\n",
-    ")"
+    "train_ds = text_dataset_from_directory(train_dir, batch_size=batch_size)\n",
+    "val_ds = text_dataset_from_directory(val_dir, batch_size=batch_size)\n",
+    "test_ds = text_dataset_from_directory(test_dir, batch_size=batch_size)"
    ]
   },
   {
diff --git a/chapter15_language-models-and-the-transformer.ipynb b/chapter15_language-models-and-the-transformer.ipynb
index c2646b76ea..b81cf93a9b 100644
--- a/chapter15_language-models-and-the-transformer.ipynb
+++ b/chapter15_language-models-and-the-transformer.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly --upgrade -q"
+    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
    ]
   },
   {
@@ -332,9 +332,10 @@
     "    origin=(\n",
     "        \"http://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip\"\n",
     "    ),\n",
+    "    fname=\"spa-eng\",\n",
     "    extract=True,\n",
     ")\n",
-    "text_path = pathlib.Path(zip_path).parent / \"spa-eng\" / \"spa.txt\""
+    "text_path = pathlib.Path(zip_path) / \"spa-eng\" / \"spa.txt\""
    ]
   },
   {
@@ -924,17 +925,6 @@
     "#### Loading a pretrained Transformer"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "!pip install keras-hub"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -1027,14 +1017,12 @@
    },
    "outputs": [],
    "source": [
+    "from keras.utils import text_dataset_from_directory\n",
+    "\n",
     "batch_size = 16\n",
-    "train_ds = keras.utils.text_dataset_from_directory(\n",
-    "    train_dir, batch_size=batch_size\n",
-    ")\n",
-    "val_ds = keras.utils.text_dataset_from_directory(val_dir, batch_size=batch_size)\n",
-    "test_ds = keras.utils.text_dataset_from_directory(\n",
-    "    test_dir, batch_size=batch_size\n",
-    ")"
+    "train_ds = text_dataset_from_directory(train_dir, batch_size=batch_size)\n",
+    "val_ds = text_dataset_from_directory(val_dir, batch_size=batch_size)\n",
+    "test_ds = text_dataset_from_directory(test_dir, batch_size=batch_size)"
    ]
   },
   {
diff --git a/chapter16_generative-large-language-models.ipynb b/chapter16_text-generation.ipynb
similarity index 98%
rename from chapter16_generative-large-language-models.ipynb
rename to chapter16_text-generation.ipynb
index efb8fc4de0..8d48b4aebe 100644
--- a/chapter16_generative-large-language-models.ipynb
+++ b/chapter16_text-generation.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly --upgrade -q"
+    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
    ]
   },
   {
@@ -103,17 +103,6 @@
     "   print(f.readline().replace(\"\\\\n\", \"\\n\")[:100])"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "!pip install keras-hub"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -1092,7 +1081,7 @@
   "accelerator": "GPU",
   "colab": {
    "collapsed_sections": [],
-   "name": "chapter16_generative-large-language-models",
+   "name": "chapter16_text-generation",
    "private_outputs": false,
    "provenance": [],
    "toc_visible": true
diff --git a/chapter17_image-generation.ipynb b/chapter17_image-generation.ipynb
index a9b6559e68..65fd10a079 100644
--- a/chapter17_image-generation.ipynb
+++ b/chapter17_image-generation.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly --upgrade -q"
+    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
    ]
   },
   {
@@ -657,17 +657,6 @@
     "### Text-to-image models"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "!pip install keras-hub"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -680,8 +669,7 @@
     "\n",
     "model = keras_hub.models.TextToImage.from_preset(\n",
     "    \"stable_diffusion_3_medium\",\n",
-    "    height=1024,\n",
-    "    width=1024,\n",
+    "    image_shape=(512, 512, 3),\n",
     "    dtype=\"float16\",\n",
     ")\n",
     "image = model.generate(\n",
@@ -689,15 +677,6 @@
     ")"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "##### The TextToImage class"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -708,12 +687,6 @@
    "source": [
     "from PIL import Image\n",
     "\n",
-    "model = keras_hub.models.TextToImage.from_preset(\n",
-    "    \"stable_diffusion_3_medium\",\n",
-    "    image_shape=(512, 512, 3),\n",
-    "    dtype=\"float16\",\n",
-    ")\n",
-    "\n",
     "prompts = [\n",
     "    \"A photograph of a cat wearing a top hat, photorealistic\",\n",
     "    \"A neon sci-fi skyline at night, illustration\",\n",
@@ -783,11 +756,9 @@
     "backbone = keras_hub.models.Backbone.from_preset(\n",
     "    \"stable_diffusion_3_medium\", image_shape=(height, width, 3), dtype=dtype\n",
     ")\n",
-    "preprocessor = (\n",
-    "    keras_hub.models.StableDiffusion3TextToImagePreprocessor.from_preset(\n",
-    "        \"stable_diffusion_3_medium\"\n",
-    "    )\n",
-    ")  # TODO: make this TextToImagePreprocessor or just use tokenizers."
+    "preprocessor = keras_hub.models.TextToImagePreprocessor.from_preset(\n",
+    "    \"stable_diffusion_3_medium\"\n",
+    ")"
    ]
   },
   {
diff --git a/chapter18_best-practices-for-the-real-world.ipynb b/chapter18_best-practices-for-the-real-world.ipynb
index 6207d849ee..b6bb430d83 100644
--- a/chapter18_best-practices-for-the-real-world.ipynb
+++ b/chapter18_best-practices-for-the-real-world.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly --upgrade -q"
+    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
    ]
   },
   {

From 287b213d1dd81c22b2f2d23c69a777a894483745 Mon Sep 17 00:00:00 2001
From: Matt Watson <mattdangerw@gmail.com>
Date: Sat, 26 Apr 2025 01:02:56 -0700
Subject: [PATCH 08/24] Update chapter 12

---
 chapter12_object-detection.ipynb | 663 ++++++++++++++++++++-----------
 1 file changed, 423 insertions(+), 240 deletions(-)

diff --git a/chapter12_object-detection.ipynb b/chapter12_object-detection.ipynb
index dd46522804..1f3804cc43 100644
--- a/chapter12_object-detection.ipynb
+++ b/chapter12_object-detection.ipynb
@@ -38,7 +38,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Two families of object detection models"
+    "### Single-stage vs two-stage object detectors"
    ]
   },
   {
@@ -47,7 +47,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### The R-CNN architecture"
+    "#### Two-stage R-CNN detectors"
    ]
   },
   {
@@ -56,7 +56,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### Single-stage detectors"
+    "#### Single-stage detectors"
    ]
   },
   {
@@ -65,7 +65,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Object detection with a pretrained model: RetinaNet"
+    "### Training a YOLO model from scratch"
    ]
   },
   {
@@ -74,7 +74,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### Introducing the dataset: Pascal VOC"
+    "#### Downloading the COCO dataset"
    ]
   },
   {
@@ -85,11 +85,19 @@
    },
    "outputs": [],
    "source": [
-    "!wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar\n",
-    "!wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar\n",
+    "import keras\n",
+    "import keras_hub\n",
     "\n",
-    "!tar -xf VOCtrainval_06-Nov-2007.tar\n",
-    "!tar -xf VOCtest_06-Nov-2007.tar"
+    "images_path = keras.utils.get_file(\n",
+    "    \"coco\",\n",
+    "    \"http://images.cocodataset.org/zips/train2017.zip\",\n",
+    "    extract=True,\n",
+    ")\n",
+    "annotations_path = keras.utils.get_file(\n",
+    "    \"annotations\",\n",
+    "    \"http://images.cocodataset.org/annotations/annotations_trainval2017.zip\",\n",
+    "    extract=True,\n",
+    ")"
    ]
   },
   {
@@ -100,75 +108,86 @@
    },
    "outputs": [],
    "source": [
-    "import os\n",
-    "import numpy as np\n",
-    "import xml.etree.ElementTree as ET\n",
-    "import tensorflow as tf\n",
-    "\n",
-    "BASE_DIR = os.path.join(os.getcwd(), \"VOCdevkit\", \"VOC2007\")\n",
-    "IMAGE_DIR = os.path.join(BASE_DIR, \"JPEGImages\")\n",
-    "ANNOTATION_DIR = os.path.join(BASE_DIR, \"Annotations\")\n",
-    "IMAGESET_DIR = os.path.join(BASE_DIR, \"ImageSets\", \"Main\")\n",
-    "CLASSES = {\n",
-    "    0: \"aeroplane\",\n",
-    "    1: \"bicycle\",\n",
-    "    2: \"bird\",\n",
-    "    3: \"boat\",\n",
-    "    4: \"bottle\",\n",
-    "    5: \"bus\",\n",
-    "    6: \"car\",\n",
-    "    7: \"cat\",\n",
-    "    8: \"chair\",\n",
-    "    9: \"cow\",\n",
-    "    10: \"diningtable\",\n",
-    "    11: \"dog\",\n",
-    "    12: \"horse\",\n",
-    "    13: \"motorbike\",\n",
-    "    14: \"person\",\n",
-    "    15: \"pottedplant\",\n",
-    "    16: \"sheep\",\n",
-    "    17: \"sofa\",\n",
-    "    18: \"train\",\n",
-    "    19: \"tvmonitor\",\n",
-    "}\n",
+    "import json\n",
     "\n",
-    "def parse_annotation(path):\n",
-    "    tree = ET.parse(path)\n",
-    "    root = tree.getroot()\n",
-    "    bboxes = []\n",
-    "    labels = []\n",
+    "with open(f\"{annotations_path}/annotations/instances_train2017.json\", \"r\") as f:\n",
+    "    annotations = json.load(f)\n",
     "\n",
-    "    for obj in root.findall(\"object\"):\n",
-    "        name = obj.find(\"name\").text\n",
-    "        difficult = int(obj.find(\"difficult\").text)\n",
-    "        if difficult:\n",
-    "            continue\n",
+    "images = {image[\"id\"]: image for image in annotations[\"images\"]}\n",
     "\n",
-    "        bbox = obj.find(\"bndbox\")\n",
-    "        size = root.find(\"size\")\n",
-    "        width = float(size.find(\"width\").text)\n",
-    "        height = float(size.find(\"height\").text)\n",
+    "def scale_box(box, width, height):\n",
+    "    scale = 1.0 / max(width, height)\n",
+    "    x, y, w, h = [v * scale for v in box]\n",
+    "    x += (height - width) * scale / 2 if height > width else 0\n",
+    "    y += (width - height) * scale / 2 if width > height else 0\n",
+    "    return [x, y, w, h]\n",
     "\n",
-    "        xmin = float(bbox.find(\"xmin\").text) / width\n",
-    "        ymin = float(bbox.find(\"ymin\").text) / height\n",
-    "        xmax = float(bbox.find(\"xmax\").text) / width\n",
-    "        ymax = float(bbox.find(\"ymax\").text) / height\n",
-    "        bboxes.append([ymin, xmin, ymax, xmax])\n",
-    "\n",
-    "        class_idx = [k for k, v in CLASSES.items() if v == name][0]\n",
-    "        labels.append(class_idx)\n",
-    "    bboxes = tf.constant(bboxes, dtype=tf.float32)\n",
-    "    labels = tf.constant(labels, dtype=tf.float32)\n",
-    "    return bboxes, labels\n",
-    "\n",
-    "def process_example(image_id):\n",
-    "    image_id = tf.compat.as_str_any(image_id.numpy())\n",
-    "    image_path = os.path.join(IMAGE_DIR, f\"{image_id.rstrip()}.jpg\")\n",
-    "    image_data = tf.io.read_file(image_path)\n",
-    "    image = tf.io.decode_jpeg(image_data, channels=3)\n",
-    "    path = os.path.join(ANNOTATION_DIR, f\"{image_id.rstrip()}.xml\")\n",
-    "    bboxes, labels = parse_annotation(path)\n",
-    "    return image, bboxes, labels"
+    "metadata = {}\n",
+    "for annotation in annotations[\"annotations\"]:\n",
+    "    id = annotation[\"image_id\"]\n",
+    "    if id not in metadata:\n",
+    "        metadata[id] = {\"boxes\": [], \"labels\": []}\n",
+    "    image = images[id]\n",
+    "    box = scale_box(annotation[\"bbox\"], image[\"width\"], image[\"height\"])\n",
+    "    metadata[id][\"boxes\"].append(box)\n",
+    "    metadata[id][\"labels\"].append(annotation[\"category_id\"])\n",
+    "    metadata[id][\"path\"] = images_path + \"/train2017/\" + image[\"file_name\"]\n",
+    "metadata = list(metadata.values())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "len(metadata)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "min([len(x[\"boxes\"]) for x in metadata])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "max([len(x[\"boxes\"]) for x in metadata])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "max(max(x[\"labels\"]) for x in metadata) + 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "metadata[435]"
    ]
   },
   {
@@ -179,35 +198,43 @@
    },
    "outputs": [],
    "source": [
-    "def get_dataset(split, shuffle_files=True, shuffle_buffer_size=1000):\n",
-    "    split_file = os.path.join(IMAGESET_DIR, f\"{split}.txt\")\n",
-    "    with open(split_file, \"r\") as f:\n",
-    "        image_ids = [x.strip() for x in f.readlines()]\n",
+    "[keras_hub.utils.coco_id_to_name(x) for x in metadata[435][\"labels\"]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "from matplotlib.colors import hsv_to_rgb\n",
+    "from matplotlib.patches import Rectangle\n",
     "\n",
-    "    ds = tf.data.Dataset.from_tensor_slices(image_ids)\n",
+    "color_map = {0: \"gray\"}\n",
     "\n",
-    "    if shuffle_files:\n",
-    "        ds = ds.shuffle(shuffle_buffer_size)\n",
+    "def label_to_color(label):\n",
+    "    if label not in color_map:\n",
+    "        h, s, v = (len(color_map) * 0.618) % 1, 0.5, 0.9\n",
+    "        color_map[label] = hsv_to_rgb((h, s, v))\n",
+    "    return color_map[label]\n",
     "\n",
-    "    ds = ds.map(\n",
-    "        lambda x: tf.py_function(\n",
-    "            func=process_example, inp=[x], Tout=[tf.uint8, tf.float32, tf.int64]\n",
-    "        ),\n",
-    "        num_parallel_calls=tf.data.AUTOTUNE,\n",
-    "    )\n",
-    "    ds = ds.map(\n",
-    "        lambda image, bbox, label: {\n",
-    "            \"image\": tf.ensure_shape(image, [None, None, 3]),\n",
-    "            \"objects\": {\n",
-    "                \"bbox\": tf.ensure_shape(bbox, [None, 4]),\n",
-    "                \"label\": tf.ensure_shape(label, [None]),\n",
-    "            },\n",
-    "        }\n",
-    "    )\n",
-    "    return ds.prefetch(tf.data.AUTOTUNE)\n",
+    "def draw_box(ax, box, text, color):\n",
+    "    x, y, w, h = box\n",
+    "    ax.add_patch(Rectangle((x, y), w, h, lw=2, ec=color, fc=\"none\"))\n",
+    "    textbox = dict(fc=color, pad=1, ec=\"none\")\n",
+    "    ax.text(x, y, text, c=\"white\", size=10, va=\"bottom\", bbox=textbox)\n",
     "\n",
-    "train_ds = get_dataset(\"trainval\", shuffle_files=True)\n",
-    "eval_ds = get_dataset(\"test\", shuffle_files=True)"
+    "def draw_image(ax, image):\n",
+    "    ax.set(xlim=(0, 1), ylim=(1, 0), xticks=[], yticks=[], aspect=\"equal\")\n",
+    "    image = plt.imread(image)\n",
+    "    height, width = image.shape[:2]\n",
+    "    hpad = (1 - height / width) / 2 if width > height else 0\n",
+    "    wpad = (1 - width / height) / 2 if height > width else 0\n",
+    "    extent = [wpad, 1 - wpad, 1 - hpad, hpad]\n",
+    "    ax.imshow(image, extent=extent)"
    ]
   },
   {
@@ -218,27 +245,27 @@
    },
    "outputs": [],
    "source": [
-    "example = next(iter(train_ds))\n",
-    "\n",
-    "plot_bounding_box_gallery(\n",
-    "    np.array([example[\"image\"]]),\n",
-    "    bounding_box_format=\"rel_yxyx\",\n",
-    "    y_true={\n",
-    "        \"boxes\": np.array([example[\"objects\"][\"bbox\"]]),\n",
-    "        \"labels\": np.array([example[\"objects\"][\"label\"]]),\n",
-    "    },\n",
-    "    scale=8,\n",
-    "    class_mapping=CLASSES,\n",
-    ")"
+    "sample = metadata[435]\n",
+    "ig, ax = plt.subplots(dpi=300)\n",
+    "draw_image(ax, sample[\"path\"])\n",
+    "for box, label in zip(sample[\"boxes\"], sample[\"labels\"]):\n",
+    "    label_name = keras_hub.utils.coco_id_to_name(label)\n",
+    "    draw_box(ax, box, label_name, label_to_color(label))\n",
+    "plt.show()"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 0,
    "metadata": {
-    "colab_type": "text"
+    "colab_type": "code"
    },
+   "outputs": [],
    "source": [
-    "##### Bounding box formats"
+    "import random\n",
+    "\n",
+    "metadata = list(filter(lambda x: len(x[\"boxes\"]) <= 4, metadata))\n",
+    "random.shuffle(metadata)"
    ]
   },
   {
@@ -247,7 +274,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### Setting up an image preprocessing and augmentation pipeline"
+    "#### Creating a YOLO model"
    ]
   },
   {
@@ -258,29 +285,15 @@
    },
    "outputs": [],
    "source": [
-    "import keras\n",
+    "image_size = 448\n",
     "\n",
-    "BBOX_FORMAT = \"yxyx\"\n",
-    "\n",
-    "def parse_record(record):\n",
-    "    image = record[\"image\"]\n",
-    "    h, w = tf.shape(image)[0], tf.shape(image)[1]\n",
-    "    rel_boxes = record[\"objects\"][\"bbox\"]\n",
-    "    abs_boxes = keras.utils.bounding_boxes.convert_format(\n",
-    "        rel_boxes,\n",
-    "        source=\"rel_yxyx\",\n",
-    "        target=BBOX_FORMAT,\n",
-    "        height=h,\n",
-    "        width=w,\n",
-    "    )\n",
-    "    labels = tf.cast(record[\"objects\"][\"label\"], dtype=tf.int32)\n",
-    "    return {\n",
-    "        \"images\": image,\n",
-    "        \"bounding_boxes\": {\n",
-    "            \"boxes\": abs_boxes,\n",
-    "            \"labels\": labels,\n",
-    "        },\n",
-    "    }"
+    "backbone = keras_hub.models.Backbone.from_preset(\n",
+    "    \"resnet_50_imagenet\",\n",
+    ")\n",
+    "preprocessor = keras_hub.layers.ImageConverter.from_preset(\n",
+    "    \"resnet_50_imagenet\",\n",
+    "    image_size=(image_size, image_size),\n",
+    ")"
    ]
   },
   {
@@ -291,65 +304,149 @@
    },
    "outputs": [],
    "source": [
-    "from keras.visualization import plot_bounding_box_gallery\n",
+    "from keras import layers\n",
     "\n",
-    "IMAGE_SIZE = (640, 640)\n",
-    "BATCH_SIZE = 4\n",
+    "grid_size = 6\n",
+    "num_labels = 91\n",
     "\n",
-    "resizing = keras.layers.Resizing(\n",
-    "    height=IMAGE_SIZE[0],\n",
-    "    width=IMAGE_SIZE[1],\n",
-    "    interpolation=\"bilinear\",\n",
-    "    pad_to_aspect_ratio=True,\n",
-    "    bounding_box_format=BBOX_FORMAT,\n",
-    ")\n",
+    "inputs = keras.Input(shape=(image_size, image_size, 3))\n",
+    "x = backbone(inputs)\n",
+    "x = layers.Conv2D(512, (3, 3), strides=(2, 2))(x)\n",
+    "x = keras.layers.Flatten()(x)\n",
+    "x = layers.Dense(2048, activation=\"relu\", kernel_initializer=\"glorot_normal\")(x)\n",
+    "x = layers.Dropout(0.5)(x)\n",
+    "x = layers.Dense(grid_size * grid_size * (num_labels + 5))(x)\n",
+    "x = layers.Reshape((grid_size, grid_size, num_labels + 5))(x)\n",
+    "box_predictions = x[..., :5]\n",
+    "class_predictions = layers.Activation(\"softmax\")(x[..., 5:])\n",
+    "outputs = {\"box\": box_predictions, \"class\": class_predictions}\n",
+    "model = keras.Model(inputs, outputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "model.summary()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Readying the COCO data for the YOLO model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def to_grid(box):\n",
+    "    x, y, w, h = box\n",
+    "    cx, cy = (x + w / 2) * grid_size, (y + h / 2) * grid_size\n",
+    "    ix, iy = int(cx), int(cy)\n",
+    "    return (ix, iy), (cx - ix, cy - iy, w, h)\n",
     "\n",
-    "max_box_layer = keras.layers.MaxNumBoundingBoxes(\n",
-    "    max_number=100,\n",
-    "    bounding_box_format=BBOX_FORMAT,\n",
-    ")\n",
+    "def from_grid(loc, box):\n",
+    "    (xi, yi), (x, y, w, h) = loc, box\n",
+    "    x = (xi + x) / grid_size - w / 2\n",
+    "    y = (yi + y) / grid_size - h / 2\n",
+    "    return (x, y, w, h)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import math\n",
     "\n",
-    "data_augmentation_layers = [\n",
-    "    keras.layers.RandomFlip(mode=\"horizontal\", bounding_box_format=BBOX_FORMAT),\n",
-    "]\n",
+    "class_array = np.zeros((len(metadata), grid_size, grid_size))\n",
+    "box_array = np.zeros((len(metadata), grid_size, grid_size, 5))\n",
     "\n",
-    "def prepare_dataset(ds, batch_size=4):\n",
-    "    ds = ds.map(parse_record)\n",
-    "    ds = ds.map(lambda x: resizing(x))\n",
-    "    for layer in data_augmentation_layers:\n",
-    "        ds = ds.map(lambda x: layer(x))\n",
-    "    ds = ds.map(max_box_layer)\n",
-    "    ds = ds.batch(batch_size, drop_remainder=True)\n",
-    "    return ds.prefetch(tf.data.AUTOTUNE)\n",
+    "for index, sample in enumerate(metadata):\n",
+    "    boxes, labels = sample[\"boxes\"], sample[\"labels\"]\n",
+    "    for box, label in zip(boxes, labels):\n",
+    "        (x, y, w, h) = box\n",
+    "        left, right = math.floor(x * grid_size), math.ceil((x + w) * grid_size)\n",
+    "        bottom, top = math.floor(y * grid_size), math.ceil((y + h) * grid_size)\n",
+    "        class_array[index, bottom:top, left:right] = label\n",
     "\n",
-    "train_ds_prepared = prepare_dataset(train_ds, batch_size=BATCH_SIZE)\n",
-    "eval_ds_prepared = prepare_dataset(eval_ds, batch_size=BATCH_SIZE)\n",
+    "for index, sample in enumerate(metadata):\n",
+    "    boxes, labels = sample[\"boxes\"], sample[\"labels\"]\n",
+    "    for box, label in zip(boxes, labels):\n",
+    "        (xi, yi), (grid_box) = to_grid(box)\n",
+    "        box_array[index, yi, xi] = [*grid_box, 1.0]\n",
+    "        class_array[index, yi, xi] = label"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def draw_prediction(image, boxes, classes, cutoff=None):\n",
+    "    fig, ax = plt.subplots(dpi=300)\n",
+    "    draw_image(ax, image)\n",
+    "    for yi, row in enumerate(classes):\n",
+    "        for xi, label in enumerate(row):\n",
+    "            color = label_to_color(label) if label else \"none\"\n",
+    "            x, y, w, h = (v / grid_size for v in (xi, yi, 1.0, 1.0))\n",
+    "            r = Rectangle((x, y), w, h, lw=2, ec=\"black\", fc=color, alpha=0.5)\n",
+    "            ax.add_patch(r)\n",
+    "    for yi, row in enumerate(boxes):\n",
+    "        for xi, box in enumerate(row):\n",
+    "            box, confidence = box[:4], box[4]\n",
+    "            if not cutoff or confidence >= cutoff:\n",
+    "                box = from_grid((xi, yi), box)\n",
+    "                label = classes[yi, xi]\n",
+    "                color = label_to_color(label)\n",
+    "                name = keras_hub.utils.coco_id_to_name(label)\n",
+    "                draw_box(ax, box, f\"{name} {max(confidence, 0):.2f}\", color)\n",
+    "    plt.show()\n",
     "\n",
-    "first_images_unprepared = next(iter(train_ds.take(1)))\n",
+    "draw_prediction(metadata[0][\"path\"], box_array[0], class_array[0], cutoff=1.0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf\n",
     "\n",
-    "plot_bounding_box_gallery(\n",
-    "    np.array([first_images_unprepared[\"image\"]]),\n",
-    "    bounding_box_format=\"rel_yxyx\",\n",
-    "    y_true={\n",
-    "        \"boxes\": np.array([first_images_unprepared[\"objects\"][\"bbox\"]]),\n",
-    "        \"labels\": np.array([first_images_unprepared[\"objects\"][\"label\"]]),\n",
-    "    },\n",
-    "    scale=4,\n",
-    "    class_mapping=CLASSES,\n",
-    ")\n",
+    "def load_image(path):\n",
+    "    x = tf.io.read_file(path)\n",
+    "    x = tf.image.decode_jpeg(x, channels=3)\n",
+    "    return preprocessor(x)\n",
     "\n",
-    "first_images_prepared = next(iter(train_ds_prepared.unbatch().take(1)))\n",
+    "images = tf.data.Dataset.from_tensor_slices([x[\"path\"] for x in metadata])\n",
+    "images = images.map(load_image, num_parallel_calls=8)\n",
+    "labels = {\"box\": box_array, \"class\": class_array}\n",
+    "labels = tf.data.Dataset.from_tensor_slices(labels)\n",
     "\n",
-    "plot_bounding_box_gallery(\n",
-    "    np.array([first_images_prepared[\"images\"]]),\n",
-    "    bounding_box_format=\"yxyx\",\n",
-    "    y_true={\n",
-    "        \"boxes\": np.array([first_images_prepared[\"bounding_boxes\"][\"boxes\"]]),\n",
-    "        \"labels\": np.array([first_images_prepared[\"bounding_boxes\"][\"labels\"]]),\n",
-    "    },\n",
-    "    scale=4,\n",
-    "    class_mapping=CLASSES,\n",
-    ")"
+    "dataset = tf.data.Dataset.zip(images, labels).batch(16).prefetch(2)\n",
+    "val_dataset, train_dataset = dataset.take(500), dataset.skip(500)"
    ]
   },
   {
@@ -358,7 +455,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### Fine-tuning the RetinaNet object detection model"
+    "#### Training the YOLO model"
    ]
   },
   {
@@ -369,11 +466,28 @@
    },
    "outputs": [],
    "source": [
-    "import keras_hub\n",
+    "from keras import ops\n",
     "\n",
-    "model = keras_hub.models.ImageObjectDetector.from_preset(\n",
-    "    \"retinanet_resnet50_fpn_coco\"\n",
-    ")"
+    "def unpack(box):\n",
+    "    return box[..., 0], box[..., 1], box[..., 2], box[..., 3]\n",
+    "\n",
+    "def intersection(box1, box2):\n",
+    "    cx1, cy1, w1, h1 = unpack(box1)\n",
+    "    cx2, cy2, w2, h2 = unpack(box2)\n",
+    "    left = ops.maximum(cx1 - w1 / 2, cx2 - w2 / 2)\n",
+    "    bottom = ops.maximum(cy1 - h1 / 2, cy2 - h2 / 2)\n",
+    "    right = ops.minimum(cx1 + w1 / 2, cx2 + w2 / 2)\n",
+    "    top = ops.minimum(cy1 + h1 / 2, cy2 + h2 / 2)\n",
+    "    return ops.maximum(0.0, right - left) * ops.maximum(0.0, top - bottom)\n",
+    "\n",
+    "def intersection_over_union(box1, box2):\n",
+    "    cx1, cy1, w1, h1 = unpack(box1)\n",
+    "    cx2, cy2, w2, h2 = unpack(box2)\n",
+    "    intersection_area = intersection(box1, box2)\n",
+    "    a1 = ops.maximum(w1, 0.0) * ops.maximum(h1, 0.0)\n",
+    "    a2 = ops.maximum(w2, 0.0) * ops.maximum(h2, 0.0)\n",
+    "    union_area = a1 + a2 - intersection_area\n",
+    "    return ops.divide_no_nan(intersection_area, union_area)"
    ]
   },
   {
@@ -384,10 +498,27 @@
    },
    "outputs": [],
    "source": [
-    "model_with_random_head = keras_hub.models.ImageObjectDetector.from_preset(\n",
-    "    \"retinanet_resnet50_fpn_coco\",\n",
-    "    num_classes=len(CLASSES),\n",
-    ")"
+    "def signed_sqrt(x):\n",
+    "    return ops.sign(x) * ops.sqrt(ops.absolute(x) + keras.config.epsilon())\n",
+    "\n",
+    "def box_loss(true, pred):\n",
+    "    xy_true, wh_true, conf_true = true[..., :2], true[..., 2:4], true[..., 4:]\n",
+    "    xy_pred, wh_pred, conf_pred = pred[..., :2], pred[..., 2:4], pred[..., 4:]\n",
+    "    no_object = conf_true == 0.0\n",
+    "    xy_error = ops.square(xy_true - xy_pred)\n",
+    "    wh_error = ops.square(signed_sqrt(wh_true) - signed_sqrt(wh_pred))\n",
+    "    iou = intersection_over_union(true, pred)\n",
+    "    conf_target = ops.where(no_object, 0.0, ops.expand_dims(iou, -1))\n",
+    "    conf_error = ops.square(conf_target - conf_pred)\n",
+    "    error = ops.concatenate(\n",
+    "        (\n",
+    "            ops.where(no_object, 0.0, xy_error * 5.0),\n",
+    "            ops.where(no_object, 0.0, wh_error * 5.0),\n",
+    "            ops.where(no_object, conf_error * 0.5, conf_error),\n",
+    "        ),\n",
+    "        axis=-1,\n",
+    "    )\n",
+    "    return ops.sum(error, axis=(1, 2, 3))"
    ]
   },
   {
@@ -398,40 +529,51 @@
    },
    "outputs": [],
    "source": [
-    "def split_labels(x):\n",
-    "    return (\n",
-    "        x[\"images\"],\n",
-    "        {\n",
-    "            \"boxes\": x[\"bounding_boxes\"][\"boxes\"],\n",
-    "            \"classes\": x[\"bounding_boxes\"][\"labels\"],\n",
-    "        },\n",
-    "    )\n",
-    "\n",
-    "train_ds_prepared = train_ds_prepared.map(split_labels)\n",
-    "eval_ds_prepared = eval_ds_prepared.map(split_labels)\n",
-    "\n",
-    "callbacks = [\n",
-    "    keras.callbacks.ModelCheckpoint(\n",
-    "        \"pascal_voc_detection.keras\",\n",
-    "        save_best_only=True,\n",
-    "        monitor=\"val_loss\",\n",
-    "    )\n",
-    "]\n",
-    "history = model.fit(\n",
-    "    train_ds_prepared,\n",
-    "    validation_data=eval_ds_prepared,\n",
-    "    epochs=10,\n",
-    "    callbacks=callbacks,\n",
+    "model.compile(\n",
+    "    optimizer=keras.optimizers.Adam(2e-4),\n",
+    "    loss={\"box\": box_loss, \"class\": \"sparse_categorical_crossentropy\"},\n",
+    ")\n",
+    "model.fit(\n",
+    "    train_dataset,\n",
+    "    validation_data=val_dataset,\n",
+    "    epochs=4,\n",
     ")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "x, y = next(iter(val_dataset.rebatch(1)))\n",
+    "preds = model.predict(x)\n",
+    "boxes = preds[\"box\"][0]\n",
+    "classes = np.argmax(preds[\"class\"][0], axis=-1)\n",
+    "path = metadata[0][\"path\"]\n",
+    "draw_prediction(path, boxes, classes, cutoff=0.1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "draw_prediction(path, boxes, classes, cutoff=None)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
     "colab_type": "text"
    },
    "source": [
-    "##### Metrics, evaluation, and inference"
+    "### Using a pretrained RetinaNet detector"
    ]
   },
   {
@@ -442,26 +584,67 @@
    },
    "outputs": [],
    "source": [
-    "import matplotlib.pyplot as plt\n",
-    "\n",
-    "model = keras.models.load_model(\"pascal_voc_detection.keras\")\n",
-    "images, gt_boxes = next(iter(eval_ds_prepared))\n",
-    "predictions = model.predict(images)\n",
-    "\n",
-    "plot_bounding_box_gallery(\n",
-    "    images,\n",
-    "    bounding_box_format=BBOX_FORMAT,\n",
-    "    y_true={\n",
-    "        \"boxes\": gt_boxes[\"boxes\"],\n",
-    "        \"labels\": gt_boxes[\"classes\"],\n",
-    "    },\n",
-    "    y_pred={\n",
-    "        \"boxes\": predictions[\"boxes\"],\n",
-    "        \"labels\": predictions[\"classes\"],\n",
-    "    },\n",
-    "    scale=8,\n",
-    "    class_mapping=CLASSES,\n",
-    ")"
+    "url = (\n",
+    "    \"https://upload.wikimedia.org/wikipedia/commons/thumb/7/7d/\"\n",
+    "    \"A_Sunday_on_La_Grande_Jatte%2C_Georges_Seurat%2C_1884.jpg\"\n",
+    ")\n",
+    "path = keras.utils.get_file(origin=url)\n",
+    "image = np.array([keras.utils.load_img(path)])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "detector = keras_hub.models.ObjectDetector.from_preset(\n",
+    "    \"retinanet_resnet50_fpn_v2_coco\",\n",
+    ")\n",
+    "predictions = detector.predict(image)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "[(k, v.shape) for k, v in predictions.items()]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "predictions[\"boxes\"][0][0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "ig, ax = plt.subplots(dpi=300)\n",
+    "draw_image(ax, path)\n",
+    "for i in range(predictions[\"num_detections\"][0]):\n",
+    "    y1, x1, y2, x2 = predictions[\"boxes\"][0][i]\n",
+    "    box = (x1 / 800, y1 / 800, (x2 - x1) / 800, (y2 - y1) / 800)\n",
+    "    label = predictions[\"labels\"][0][i]\n",
+    "    label_name = keras_hub.utils.coco_id_to_name(label)\n",
+    "    draw_box(ax, box, label_name, label_to_color(label))\n",
+    "plt.show()"
    ]
   },
   {

From 6dfbb5eecc3591ad10f49157e7c3389e4d2fee6c Mon Sep 17 00:00:00 2001
From: Matt Watson <mattdangerw@gmail.com>
Date: Sat, 26 Apr 2025 14:02:01 -0700
Subject: [PATCH 09/24] Chapter 10 fix

---
 chapter10_interpreting-what-convnets-learn.ipynb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/chapter10_interpreting-what-convnets-learn.ipynb b/chapter10_interpreting-what-convnets-learn.ipynb
index 543491650d..3bc8da4286 100644
--- a/chapter10_interpreting-what-convnets-learn.ipynb
+++ b/chapter10_interpreting-what-convnets-learn.ipynb
@@ -225,7 +225,7 @@
    "source": [
     "import keras_hub\n",
     "\n",
-    "backbone = keras_hub.models.Backbone.from_preset(\n",
+    "model = keras_hub.models.Backbone.from_preset(\n",
     "    \"xception_41_imagenet\",\n",
     ")\n",
     "preprocessor = keras_hub.layers.ImageConverter.from_preset(\n",
@@ -598,7 +598,7 @@
    },
    "outputs": [],
    "source": [
-    "keras_hub.utils.decode_imagenet_predictions(preds[0])"
+    "keras_hub.utils.decode_imagenet_predictions(preds)"
    ]
   },
   {

From a253729a11e02ffbc212e844a75939ff9ab40ebe Mon Sep 17 00:00:00 2001
From: Matt Watson <mattdangerw@gmail.com>
Date: Sat, 26 Apr 2025 18:29:24 -0700
Subject: [PATCH 10/24] More fixes

---
 chapter12_object-detection.ipynb              |   2 +-
 ...18_best-practices-for-the-real-world.ipynb | 250 +-----------------
 2 files changed, 3 insertions(+), 249 deletions(-)

diff --git a/chapter12_object-detection.ipynb b/chapter12_object-detection.ipynb
index 1f3804cc43..a559fab192 100644
--- a/chapter12_object-detection.ipynb
+++ b/chapter12_object-detection.ipynb
@@ -585,7 +585,7 @@
    "outputs": [],
    "source": [
     "url = (\n",
-    "    \"https://upload.wikimedia.org/wikipedia/commons/thumb/7/7d/\"\n",
+    "    \"https://upload.wikimedia.org/wikipedia/commons/7/7d/\"\n",
     "    \"A_Sunday_on_La_Grande_Jatte%2C_Georges_Seurat%2C_1884.jpg\"\n",
     ")\n",
     "path = keras.utils.get_file(origin=url)\n",
diff --git a/chapter18_best-practices-for-the-real-world.ipynb b/chapter18_best-practices-for-the-real-world.ipynb
index b6bb430d83..f7f17539d0 100644
--- a/chapter18_best-practices-for-the-real-world.ipynb
+++ b/chapter18_best-practices-for-the-real-world.ipynb
@@ -142,7 +142,7 @@
     "tuner = kt.BayesianOptimization(\n",
     "    build_model,\n",
     "    objective=\"val_accuracy\",\n",
-    "    max_trials=100,\n",
+    "    max_trials=20,\n",
     "    executions_per_trial=2,\n",
     "    directory=\"mnist_kt_test\",\n",
     "    overwrite=True,\n",
@@ -241,6 +241,7 @@
    "source": [
     "def get_best_trained_model(hp):\n",
     "    best_epoch = get_best_epoch(hp)\n",
+    "    model = build_model(hp)\n",
     "    model.fit(\n",
     "        x_train_full, y_train_full, batch_size=128, epochs=int(best_epoch * 1.2)\n",
     "    )\n",
@@ -327,44 +328,6 @@
     "##### Model parallelism: split your model across multiple GPUs"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "model = keras.Sequential(\n",
-    "    [\n",
-    "        keras.layers.Input(shape=(16000,)),\n",
-    "        keras.layers.Dense(64000, activation=\"relu\"),\n",
-    "        keras.layers.Dense(8000, activation=\"sigmoid\"),\n",
-    "    ]\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "half_kernel_0 = kernel[:, :32000]\n",
-    "half_bias_0 = bias[:32000]\n",
-    "\n",
-    "half_kernel_1 = kernel[:, 32000:]\n",
-    "half_bias_1 = bias[32000:]\n",
-    "\n",
-    "with keras.device(\"gpu:0\"):\n",
-    "    half_output_0 = keras.ops.matmul(inputs, half_kernel_0) + half_bias_0\n",
-    "\n",
-    "with keras.device(\"gpu:1\"):\n",
-    "    half_output_1 = keras.ops.matmul(inputs, half_kernel_1) + half_bias_1"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -392,41 +355,6 @@
     "###### Using data parallelism with JAX"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "keras.distribution.set_distribution(keras.distribution.DataParallel())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "keras.distribution.list_devices()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "keras.distribution.set_distribution(\n",
-    "    keras.distribution.DataParallel([\"gpu:0\", \"gpu:1\"])\n",
-    ")"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -436,117 +364,6 @@
     "###### Using model parallelism with JAX"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "mesh = keras.distribution.DeviceMesh(\n",
-    "    shape=(2, 4),\n",
-    "    axis_names=[\"data\", \"model\"],\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "devices = [f\"gpu:{i}\" for i in range(8)]\n",
-    "mesh = keras.distribution.DeviceMesh(\n",
-    "    shape=(2, 4),\n",
-    "    axis_names=[\"data\", \"model\"],\n",
-    "    devices=devices,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "for v in model.variables:\n",
-    "    print(v.path)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "sequential/dense/kernel\n",
-    "sequential/dense/bias\n",
-    "sequential/dense_1/kernel\n",
-    "sequential/dense_1/bias"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "layout_map = keras.distribution.LayoutMap(device_mesh)\n",
-    "layout_map[\"sequential/dense/kernel\"] = (None, \"model\")\n",
-    "layout_map[\"sequential/dense/bias\"] = (\"model\",)\n",
-    "layout_map[\"sequential/dense_1/kernel\"] = (None, \"model\")\n",
-    "layout_map[\"sequential/dense_1/bias\"] = (\"model\",)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "model_parallel = keras.distribution.ModelParallel(\n",
-    "    layout_map=layout_map,\n",
-    "    batch_dim_name=\"data\",\n",
-    ")\n",
-    "keras.distribution.set_distribution(model_parallel)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "model.layers[0].kernel.value.sharding"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "import jax\n",
-    "\n",
-    "value = model.layers[0].kernel.value\n",
-    "jax.debug.visualize_sharding(value.shape, value.sharding)"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -592,19 +409,6 @@
     "##### Float16 inference"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "import keras\n",
-    "\n",
-    "keras.config.set_dtype_policy(\"float16\")"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -614,19 +418,6 @@
     "##### Mixed-precision training"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "import keras\n",
-    "\n",
-    "keras.config.set_dtype_policy(\"mixed_float16\")"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -636,30 +427,6 @@
     "##### Using loss scaling with mixed precision"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "optimizer = keras.optimizers.Adam(learning_rate=1e-3, loss_scale_factor=10)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "optimizer = keras.optimizers.LossScaleOptimizer(\n",
-    "    keras.optimizers.Adam(learning_rate=1e-3)\n",
-    ")"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -746,19 +513,6 @@
     "ops.matmul(x, kernel)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "model = ...\n",
-    "model.quantize(\"int8\")\n",
-    "predictions = model.predict(...)"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {

From 7a4892707579c770a3304bd86576ed26cfc286c8 Mon Sep 17 00:00:00 2001
From: Matt Watson <mattdangerw@gmail.com>
Date: Sat, 26 Apr 2025 18:31:18 -0700
Subject: [PATCH 11/24] Simpler bounding box

---
 chapter12_object-detection.ipynb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/chapter12_object-detection.ipynb b/chapter12_object-detection.ipynb
index a559fab192..765e84149a 100644
--- a/chapter12_object-detection.ipynb
+++ b/chapter12_object-detection.ipynb
@@ -602,6 +602,7 @@
    "source": [
     "detector = keras_hub.models.ObjectDetector.from_preset(\n",
     "    \"retinanet_resnet50_fpn_v2_coco\",\n",
+    "    bounding_box_format=\"rel_xywh\",\n",
     ")\n",
     "predictions = detector.predict(image)"
    ]
@@ -639,8 +640,7 @@
     "ig, ax = plt.subplots(dpi=300)\n",
     "draw_image(ax, path)\n",
     "for i in range(predictions[\"num_detections\"][0]):\n",
-    "    y1, x1, y2, x2 = predictions[\"boxes\"][0][i]\n",
-    "    box = (x1 / 800, y1 / 800, (x2 - x1) / 800, (y2 - y1) / 800)\n",
+    "    box = predictions[\"boxes\"][0][i]\n",
     "    label = predictions[\"labels\"][0][i]\n",
     "    label_name = keras_hub.utils.coco_id_to_name(label)\n",
     "    draw_box(ax, box, label_name, label_to_color(label))\n",

From fd3fc5ea6bc6c04449e37a153d10bfea539300c2 Mon Sep 17 00:00:00 2001
From: Matt Watson <mattdangerw@gmail.com>
Date: Sun, 27 Apr 2025 20:48:41 -0700
Subject: [PATCH 12/24] Update chapter 17

---
 chapter17_image-generation.ipynb | 427 ++++++-------------------------
 1 file changed, 85 insertions(+), 342 deletions(-)

diff --git a/chapter17_image-generation.ipynb b/chapter17_image-generation.ipynb
index 65fd10a079..ee00309514 100644
--- a/chapter17_image-generation.ipynb
+++ b/chapter17_image-generation.ipynb
@@ -358,12 +358,10 @@
     "    if input_width == width:\n",
     "        residual = x\n",
     "    else:\n",
-    "        residual = layers.Conv2D(width, kernel_size=1)(x)\n",
+    "        residual = layers.Conv2D(width, 1)(x)\n",
     "    x = layers.BatchNormalization(center=False, scale=False)(x)\n",
-    "    x = layers.Conv2D(width, kernel_size=3, padding=\"same\", activation=\"swish\")(\n",
-    "        x\n",
-    "    )\n",
-    "    x = layers.Conv2D(width, kernel_size=3, padding=\"same\")(x)\n",
+    "    x = layers.Conv2D(width, 3, padding=\"same\", activation=\"swish\")(x)\n",
+    "    x = layers.Conv2D(width, 3, padding=\"same\")(x)\n",
     "    x = x + residual\n",
     "    return x\n",
     "\n",
@@ -371,10 +369,8 @@
     "    noisy_images = keras.Input(shape=(image_size, image_size, 3))\n",
     "    noise_rates = keras.Input(shape=(1, 1, 1))\n",
     "\n",
-    "    x = layers.Conv2D(widths[0], kernel_size=1)(noisy_images)\n",
-    "    n = layers.UpSampling2D(size=image_size, interpolation=\"nearest\")(\n",
-    "        noise_rates\n",
-    "    )\n",
+    "    x = layers.Conv2D(widths[0], 1)(noisy_images)\n",
+    "    n = layers.UpSampling2D(image_size, interpolation=\"nearest\")(noise_rates)\n",
     "    x = layers.Concatenate()([x, n])\n",
     "\n",
     "    skips = []\n",
@@ -393,9 +389,7 @@
     "            x = layers.Concatenate()([x, skips.pop()])\n",
     "            x = residual_block(x, width)\n",
     "\n",
-    "    pred_noise_masks = layers.Conv2D(\n",
-    "        3, kernel_size=1, kernel_initializer=\"zeros\"\n",
-    "    )(x)\n",
+    "    pred_noise_masks = layers.Conv2D(3, 1, kernel_initializer=\"zeros\")(x)\n",
     "\n",
     "    return keras.Model([noisy_images, noise_rates], pred_noise_masks)"
    ]
@@ -458,7 +452,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### The training process"
+    "#### The training process"
    ]
   },
   {
@@ -515,7 +509,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### The generation process\n",
+    "#### The generation process\n",
     "\n",
     "    def generate(self, num_images, diffusion_steps):\n",
     "        noisy_images = keras.random.normal(\n",
@@ -548,7 +542,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Visualizing results with a custom callback"
+    "#### Visualizing results with a custom callback"
    ]
   },
   {
@@ -590,7 +584,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### It's go time!"
+    "#### It's go time!"
    ]
   },
   {
@@ -667,14 +661,14 @@
    "source": [
     "import keras_hub\n",
     "\n",
-    "model = keras_hub.models.TextToImage.from_preset(\n",
+    "height, width = 512, 512\n",
+    "task = keras_hub.models.TextToImage.from_preset(\n",
     "    \"stable_diffusion_3_medium\",\n",
-    "    image_shape=(512, 512, 3),\n",
+    "    image_shape=(height, width, 3),\n",
     "    dtype=\"float16\",\n",
     ")\n",
-    "image = model.generate(\n",
-    "    \"photograph of an astronaut riding a horse, detailed, 8k\",\n",
-    ")"
+    "prompt = \"A NASA astraunaut riding an origami elephant in New York City\"\n",
+    "task.generate(prompt)"
    ]
   },
   {
@@ -685,48 +679,12 @@
    },
    "outputs": [],
    "source": [
-    "from PIL import Image\n",
-    "\n",
-    "prompts = [\n",
-    "    \"A photograph of a cat wearing a top hat, photorealistic\",\n",
-    "    \"A neon sci-fi skyline at night, illustration\",\n",
-    "]\n",
-    "images = model.generate(\n",
-    "    prompts,\n",
-    "    num_steps=25,\n",
-    "    guidance_scale=7.5,\n",
-    ")\n",
-    "\n",
-    "for i, img in enumerate(images):\n",
-    "    pil_image = Image.fromarray(img)\n",
-    "    pil_image.save(f\"generated_image_{i}.png\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "#### Exploring the latent space of a text-to-image model"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "##### Latent manifold continuity and latent space walking"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "##### Generating a GIF showing prompt interpolation"
+    "task.generate(\n",
+    "    {\n",
+    "        \"prompts\": prompt,\n",
+    "        \"negative_prompts\": \"blue color\",\n",
+    "    }\n",
+    ")"
    ]
   },
   {
@@ -737,28 +695,22 @@
    },
    "outputs": [],
    "source": [
-    "import math\n",
+    "import numpy as np\n",
+    "from PIL import Image\n",
     "\n",
-    "height, width = 512, 512\n",
-    "num_steps = 28\n",
-    "guidance_scale = 7.0\n",
-    "dtype = \"float16\""
+    "def display(images):\n",
+    "    return Image.fromarray(np.concatenate(images, axis=1))\n",
+    "\n",
+    "display([task.generate(prompt, num_steps=x) for x in [5, 10, 15, 20, 25]])"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 0,
+   "cell_type": "markdown",
    "metadata": {
-    "colab_type": "code"
+    "colab_type": "text"
    },
-   "outputs": [],
    "source": [
-    "backbone = keras_hub.models.Backbone.from_preset(\n",
-    "    \"stable_diffusion_3_medium\", image_shape=(height, width, 3), dtype=dtype\n",
-    ")\n",
-    "preprocessor = keras_hub.models.TextToImagePreprocessor.from_preset(\n",
-    "    \"stable_diffusion_3_medium\"\n",
-    ")"
+    "#### Exploring the latent space of a text-to-image model"
    ]
   },
   {
@@ -769,81 +721,32 @@
    },
    "outputs": [],
    "source": [
-    "def get_text_embeddings(prompt):\n",
-    "    token_ids = preprocessor.generate_preprocess([prompt])\n",
-    "    negative_token_ids = preprocessor.generate_preprocess([\"\"])\n",
-    "    (\n",
-    "        positive_embeddings,\n",
-    "        negative_embeddings,\n",
-    "        positive_pooled_embeddings,\n",
-    "        negative_pooled_embeddings,\n",
-    "    ) = backbone.encode_text_step(token_ids, negative_token_ids)\n",
-    "    return (\n",
-    "        positive_embeddings,\n",
-    "        negative_embeddings,\n",
-    "        positive_pooled_embeddings,\n",
-    "        negative_pooled_embeddings,\n",
-    "    )\n",
-    "\n",
-    "def decode_to_images(x, height, width):\n",
-    "    x = ops.concatenate(x, axis=0)\n",
-    "    x = ops.reshape(x, (-1, height, width, 3))\n",
-    "    x = ops.clip((x + 1.0) / 2.0, 0.0, 1.0)\n",
-    "    return ops.cast(ops.round(x * 255.0), \"uint8\")\n",
+    "from keras import random\n",
     "\n",
-    "def generate_with_latents_and_embeddings(\n",
-    "    latents, embeddings, num_steps, guidance_scale\n",
-    "):\n",
-    "    def body_fun(step, latents):\n",
-    "        return backbone.denoise_step(\n",
-    "            latents, embeddings, step, num_steps, guidance_scale\n",
+    "def get_text_embeddings(prompt):\n",
+    "    token_ids = task.preprocessor.generate_preprocess([prompt])\n",
+    "    negative_token_ids = task.preprocessor.generate_preprocess([\"\"])\n",
+    "    return task.backbone.encode_text_step(token_ids, negative_token_ids)\n",
+    "\n",
+    "def denoise_with_text_embeddings(embeddings, num_steps=28, guidance_scale=7.0):\n",
+    "    latents = random.normal((1, height // 8, width // 8, 16))\n",
+    "    for step in range(num_steps):\n",
+    "        latents = task.backbone.denoise_step(\n",
+    "            latents,\n",
+    "            embeddings,\n",
+    "            step,\n",
+    "            num_steps,\n",
+    "            guidance_scale,\n",
     "        )\n",
+    "    return task.backbone.decode_step(latents)[0]\n",
     "\n",
-    "    latents = ops.fori_loop(0, num_steps, body_fun, latents)\n",
-    "    return backbone.decode_step(latents)\n",
-    "\n",
-    "def export_as_gif(filename, images, frames_per_second=10, no_rubber_band=False):\n",
-    "    if not no_rubber_band:\n",
-    "        images += images[2:-1][::-1]\n",
-    "    images[0].save(\n",
-    "        filename,\n",
-    "        save_all=True,\n",
-    "        append_images=images[1:],\n",
-    "        duration=1000 // frames_per_second,\n",
-    "        loop=0,\n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "keras.config.set_backend(\"jax\")\n",
-    "import itertools\n",
-    "import jax\n",
-    "\n",
-    "@jax.jit\n",
-    "def compiled_function(state, *args, **kwargs):\n",
-    "    (trainable_variables, non_trainable_variables) = state\n",
-    "    mapping = itertools.chain(\n",
-    "        zip(backbone.trainable_variables, trainable_variables),\n",
-    "        zip(backbone.non_trainable_variables, non_trainable_variables),\n",
-    "    )\n",
-    "    with keras.StatelessScope(state_mapping=mapping):\n",
-    "        return generate_with_latents_and_embeddings(*args, **kwargs)\n",
+    "def scale_output(x):\n",
+    "    x = np.clip((x + 1.0) / 2.0, 0.0, 1.0)\n",
+    "    return np.round(x * 255.0).astype(\"uint8\")\n",
     "\n",
-    "def wrapped_jax_generate_function(*args, **kwargs):\n",
-    "    state = (\n",
-    "        [v.value for v in backbone.trainable_variables],\n",
-    "        [v.value for v in backbone.non_trainable_variables],\n",
-    "    )\n",
-    "    return compiled_function(state, *args, **kwargs)\n",
-    "\n",
-    "generate_function = wrapped_jax_generate_function"
+    "embeddings = get_text_embeddings(prompt)\n",
+    "image = denoise_with_text_embeddings(embeddings)\n",
+    "scale_output(image)"
    ]
   },
   {
@@ -854,29 +757,7 @@
    },
    "outputs": [],
    "source": [
-    "def slerp(v1, v2, num):\n",
-    "    ori_dtype = v1.dtype\n",
-    "    v1 = ops.cast(v1, \"float32\")\n",
-    "    v2 = ops.cast(v2, \"float32\")\n",
-    "\n",
-    "    def interpolation(t, v1, v2, dot_threshold=0.9995):\n",
-    "        norm = ops.linalg.norm(ops.ravel(v1)) * ops.linalg.norm(ops.ravel(v2))\n",
-    "        dot = ops.sum(v1 * v2 / norm)\n",
-    "        if ops.abs(dot) > dot_threshold:\n",
-    "            v2 = (1 - t) * v1 + t * v2\n",
-    "        else:\n",
-    "            theta_0 = ops.arccos(dot)\n",
-    "            sin_theta_0 = ops.sin(theta_0)\n",
-    "            theta_t = theta_0 * t\n",
-    "            sin_theta_t = ops.sin(theta_t)\n",
-    "            s0 = ops.sin(theta_0 - theta_t) / sin_theta_0\n",
-    "            s1 = sin_theta_t / sin_theta_0\n",
-    "            v2 = s0 * v1 + s1 * v2\n",
-    "        return v2\n",
-    "\n",
-    "    t = ops.linspace(0, 1, num)\n",
-    "    interpolated = [interpolation(t[i], v1, v2) for i in range(num)]\n",
-    "    return ops.cast(ops.stack(interpolated), ori_dtype)"
+    "[x.shape for x in embeddings]"
    ]
   },
   {
@@ -887,77 +768,33 @@
    },
    "outputs": [],
    "source": [
-    "prompt_1 = (\n",
-    "    \"Victorian mechanical butterfly made of brass and clockwork, gears visible \"\n",
-    "    \"through delicate filigree wings\"\n",
-    ")\n",
-    "prompt_2 = (\n",
-    "    \"Bioluminescent butterfly made of flowing plasma and starlight, trailing \"\n",
-    "    \"cosmic dust\"\n",
-    ")\n",
-    "\n",
-    "encoding_1 = get_text_embeddings(prompt_1)\n",
-    "encoding_2 = get_text_embeddings(prompt_2)\n",
-    "pos_emb_1, neg_emb_1, pos_pool_1, neg_pool_1 = encoding_1\n",
-    "pos_emb_2, neg_emb_2, pos_pool_2, neg_pool_2 = encoding_2\n",
-    "\n",
-    "interpolation_steps = 64\n",
-    "batch_size = 4\n",
-    "batches = interpolation_steps // batch_size\n",
-    "\n",
-    "interpolated_pos_emb = slerp(pos_emb_1, pos_emb_2, interpolation_steps)\n",
-    "interpolated_pos_pool = slerp(pos_pool_1, pos_pool_2, interpolation_steps)\n",
-    "\n",
-    "pos_emb_shape = ops.shape(pos_emb_1)\n",
-    "pos_pool_shape = ops.shape(pos_pool_1)\n",
-    "\n",
-    "interpolated_pos_emb = ops.reshape(\n",
-    "    interpolated_pos_emb,\n",
-    "    (batches, batch_size, pos_emb_shape[-2], pos_emb_shape[-1]),\n",
-    ")\n",
-    "interpolated_pos_pool = ops.reshape(\n",
-    "    interpolated_pos_pool, (batches, batch_size, pos_pool_shape[-1])\n",
-    ")\n",
-    "\n",
-    "negative_embeddings = ops.tile(neg_emb_1, (batch_size, 1, 1))\n",
-    "negative_pooled_embeddings = ops.tile(neg_pool_1, (batch_size, 1))\n",
-    "\n",
-    "latents = keras.random.normal((1, height // 8, width // 8, 16), seed=42)\n",
-    "latents = ops.tile(latents, (batch_size, 1, 1, 1))\n",
+    "from keras import ops\n",
     "\n",
-    "images = []\n",
-    "progbar = keras.utils.Progbar(batches)\n",
-    "for i in range(batches):\n",
-    "    images.append(\n",
-    "        generate_function(\n",
-    "            latents,\n",
+    "def slerp(t, v1, v2):\n",
+    "    v1, v2 = ops.cast(v1, \"float32\"), ops.cast(v2, \"float32\")\n",
+    "    v1_norm = ops.linalg.norm(ops.ravel(v1))\n",
+    "    v2_norm = ops.linalg.norm(ops.ravel(v2))\n",
+    "    dot = ops.sum(v1 * v2 / (v1_norm * v2_norm))\n",
+    "    theta_0 = ops.arccos(dot)\n",
+    "    sin_theta_0 = ops.sin(theta_0)\n",
+    "    theta_t = theta_0 * t\n",
+    "    sin_theta_t = ops.sin(theta_t)\n",
+    "    s0 = ops.sin(theta_0 - theta_t) / sin_theta_0\n",
+    "    s1 = sin_theta_t / sin_theta_0\n",
+    "    return s0 * v1 + s1 * v2\n",
+    "\n",
+    "def interpolate_text_embeddings(e1, e2, start=0, stop=1, num=10):\n",
+    "    embeddings = []\n",
+    "    for t in np.linspace(start, stop, num):\n",
+    "        embeddings.append(\n",
     "            (\n",
-    "                interpolated_pos_emb[i],\n",
-    "                negative_embeddings,\n",
-    "                interpolated_pos_pool[i],\n",
-    "                negative_pooled_embeddings,\n",
-    "            ),\n",
-    "            ops.convert_to_tensor(num_steps),\n",
-    "            ops.convert_to_tensor(guidance_scale),\n",
+    "                slerp(t, e1[0], e2[0]),\n",
+    "                e1[1],\n",
+    "                slerp(t, e1[2], e2[2]),\n",
+    "                e1[3],\n",
+    "            )\n",
     "        )\n",
-    "    )\n",
-    "    progbar.update(i + 1, finalize=(i == batches - 1))\n",
-    "\n",
-    "images = ops.convert_to_numpy(decode_to_images(images, height, width))\n",
-    "export_as_gif(\n",
-    "    \"dog_to_cat_64.gif\",\n",
-    "    [Image.fromarray(image) for image in images],\n",
-    "    frames_per_second=2,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "##### 4-way interpolation to make image grids"
+    "    return embeddings"
    ]
   },
   {
@@ -968,110 +805,16 @@
    },
    "outputs": [],
    "source": [
-    "prompt_1 = \"Pearl white seashell, spiral form, smooth iridescent surface\"\n",
-    "prompt_2 = \"Porcelain teacup, curved handle, glossy white ceramic\"\n",
-    "prompt_3 = \"White orchid flower, delicate petals, curved organic form\"\n",
-    "prompt_4 = \"Paper origami crane, crisp folds, pure white surface\"\n",
-    "\n",
-    "interpolation_steps = 8\n",
-    "batch_size = 4\n",
-    "batches = (interpolation_steps**2) // batch_size\n",
-    "\n",
-    "enc1 = get_text_embeddings(prompt_1)\n",
-    "enc2 = get_text_embeddings(prompt_2)\n",
-    "enc3 = get_text_embeddings(prompt_3)\n",
-    "enc4 = get_text_embeddings(prompt_4)\n",
-    "\n",
-    "pos_emb_1, neg_emb_1, pos_pool_1, neg_pool_1 = enc1\n",
-    "pos_emb_2, neg_emb_2, pos_pool_2, neg_pool_2 = enc2\n",
-    "pos_emb_3, neg_emb_3, pos_pool_3, neg_pool_3 = enc3\n",
-    "pos_emb_4, neg_emb_4, pos_pool_4, neg_pool_4 = enc4\n",
-    "\n",
-    "pos_emb_shape = ops.shape(pos_emb_1)\n",
-    "pos_pool_shape = ops.shape(pos_pool_1)\n",
-    "\n",
-    "interpolated_12_emb = slerp(pos_emb_1, pos_emb_2, interpolation_steps)\n",
-    "interpolated_34_emb = slerp(pos_emb_3, pos_emb_4, interpolation_steps)\n",
-    "interpolated_12_pool = slerp(pos_pool_1, pos_pool_2, interpolation_steps)\n",
-    "interpolated_34_pool = slerp(pos_pool_3, pos_pool_4, interpolation_steps)\n",
-    "\n",
-    "interpolated_pos_emb = slerp(\n",
-    "    interpolated_12_emb, interpolated_34_emb, interpolation_steps\n",
-    ")\n",
-    "interpolated_pos_pool = slerp(\n",
-    "    interpolated_12_pool, interpolated_34_pool, interpolation_steps\n",
-    ")\n",
-    "\n",
-    "interpolated_pos_emb = ops.reshape(\n",
-    "    interpolated_pos_emb,\n",
-    "    (batches, batch_size, pos_emb_shape[-2], pos_emb_shape[-1]),\n",
-    ")\n",
-    "interpolated_pos_pool = ops.reshape(\n",
-    "    interpolated_pos_pool, (batches, batch_size, pos_pool_shape[-1])\n",
-    ")\n",
-    "\n",
-    "negative_embeddings = ops.tile(neg_emb_1, (batch_size, 1, 1))\n",
-    "negative_pooled_embeddings = ops.tile(neg_pool_1, (batch_size, 1))\n",
-    "\n",
-    "latents = keras.random.normal((1, height // 8, width // 8, 16), seed=42)\n",
-    "latents = ops.tile(latents, (batch_size, 1, 1, 1))\n",
+    "prompt1 = \"A friendly dog looking up in a field of flowers\"\n",
+    "prompt2 = \"A horrifying, tentacled creature hovering over a field of flowers\"\n",
+    "e1 = get_text_embeddings(prompt1)\n",
+    "e2 = get_text_embeddings(prompt2)\n",
     "\n",
     "images = []\n",
-    "progbar = keras.utils.Progbar(batches)\n",
-    "for i in range(batches):\n",
-    "    images.append(\n",
-    "        generate_function(\n",
-    "            latents,\n",
-    "            (\n",
-    "                interpolated_pos_emb[i],\n",
-    "                negative_embeddings,\n",
-    "                interpolated_pos_pool[i],\n",
-    "                negative_pooled_embeddings,\n",
-    "            ),\n",
-    "            ops.convert_to_tensor(num_steps),\n",
-    "            ops.convert_to_tensor(guidance_scale),\n",
-    "        )\n",
-    "    )\n",
-    "    progbar.update(i + 1, finalize=(i == batches - 1))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "def plot_grid(images, path, grid_size, scale=2):\n",
-    "    fig, axs = plt.subplots(\n",
-    "        grid_size, grid_size, figsize=(grid_size * scale, grid_size * scale)\n",
-    "    )\n",
-    "    fig.tight_layout()\n",
-    "    plt.subplots_adjust(wspace=0, hspace=0)\n",
-    "    plt.axis(\"off\")\n",
-    "    for ax in axs.flat:\n",
-    "        ax.axis(\"off\")\n",
-    "\n",
-    "    for i in range(min(grid_size * grid_size, len(images))):\n",
-    "        ax = axs.flat[i]\n",
-    "        ax.imshow(images[i])\n",
-    "        ax.axis(\"off\")\n",
-    "\n",
-    "    for i in range(len(images), grid_size * grid_size):\n",
-    "        axs.flat[i].axis(\"off\")\n",
-    "        axs.flat[i].remove()\n",
-    "\n",
-    "    plt.savefig(\n",
-    "        fname=path,\n",
-    "        pad_inches=0,\n",
-    "        bbox_inches=\"tight\",\n",
-    "        transparent=False,\n",
-    "        dpi=60,\n",
-    "    )\n",
-    "\n",
-    "images = ops.convert_to_numpy(decode_to_images(images, height, width))\n",
-    "plot_grid(images, \"4-way-interpolation.jpg\", interpolation_steps)"
+    "for et in interpolate_text_embeddings(e1, e2, start=0.5, stop=0.6, num=9):\n",
+    "    image = denoise_with_text_embeddings(et)\n",
+    "    images.append(scale_output(image))\n",
+    "display(images)"
    ]
   },
   {

From 944c7d3a7b4566bdfc5adfc08894ae7339032759 Mon Sep 17 00:00:00 2001
From: Matt Watson <mattdangerw@gmail.com>
Date: Wed, 30 Apr 2025 20:20:57 -0700
Subject: [PATCH 13/24] Section title updates and chapter 16 code changes

---
 chapter03_introduction-to-ml-frameworks.ipynb |   9 -
 chapter11_image-segmentation.ipynb            |   6 +-
 chapter13_timeseries-forecasting.ipynb        |  22 +-
 chapter14_text-classification.ipynb           |   2 +-
 ..._language-models-and-the-transformer.ipynb |   6 +-
 chapter16_text-generation.ipynb               | 304 +++++++++++-------
 chapter17_image-generation.ipynb              |   4 +-
 7 files changed, 210 insertions(+), 143 deletions(-)

diff --git a/chapter03_introduction-to-ml-frameworks.ipynb b/chapter03_introduction-to-ml-frameworks.ipynb
index 3a20c3e25c..73c446456c 100644
--- a/chapter03_introduction-to-ml-frameworks.ipynb
+++ b/chapter03_introduction-to-ml-frameworks.ipynb
@@ -1432,15 +1432,6 @@
     "import keras"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "### Anatomy of a neural network: understanding core Keras APIs"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {
diff --git a/chapter11_image-segmentation.ipynb b/chapter11_image-segmentation.ipynb
index cf44c2f249..6ee3055738 100644
--- a/chapter11_image-segmentation.ipynb
+++ b/chapter11_image-segmentation.ipynb
@@ -330,7 +330,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Downloading the model"
+    "#### Downloading the Segment Anything model"
    ]
   },
   {
@@ -449,7 +449,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Prompting SAM with a target point"
+    "#### Prompting the model with a target point"
    ]
   },
   {
@@ -575,7 +575,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Prompting SAM with a target box"
+    "#### Prompting the model with a target box"
    ]
   },
   {
diff --git a/chapter13_timeseries-forecasting.ipynb b/chapter13_timeseries-forecasting.ipynb
index 7678ce6ec1..eab1cd836d 100644
--- a/chapter13_timeseries-forecasting.ipynb
+++ b/chapter13_timeseries-forecasting.ipynb
@@ -149,7 +149,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Preparing the data"
+    "#### Preparing the data"
    ]
   },
   {
@@ -256,7 +256,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### A common-sense, non-machine-learning baseline"
+    "#### A common-sense, non-machine-learning baseline"
    ]
   },
   {
@@ -286,7 +286,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Let's try a basic machine learning model"
+    "#### Let's try a basic machine learning model"
    ]
   },
   {
@@ -348,7 +348,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Let's try a 1D convolutional model"
+    "#### Let's try a 1D convolutional model"
    ]
   },
   {
@@ -390,7 +390,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### A first recurrent baseline"
+    "### Recurrent neural networks"
    ]
   },
   {
@@ -427,7 +427,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Understanding recurrent neural networks"
+    "#### Understanding recurrent neural networks"
    ]
   },
   {
@@ -462,7 +462,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### A recurrent layer in Keras"
+    "#### A recurrent layer in Keras"
    ]
   },
   {
@@ -528,7 +528,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Getting the most out of recurrent neural networks"
+    "#### Getting the most out of recurrent neural networks"
    ]
   },
   {
@@ -537,7 +537,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Using recurrent dropout to fight overfitting"
+    "#### Using recurrent dropout to fight overfitting"
    ]
   },
   {
@@ -574,7 +574,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Stacking recurrent layers"
+    "#### Stacking recurrent layers"
    ]
   },
   {
@@ -614,7 +614,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Using bidirectional RNNs"
+    "#### Using bidirectional RNNs"
    ]
   },
   {
diff --git a/chapter14_text-classification.ipynb b/chapter14_text-classification.ipynb
index f9de93c622..6b82a5ba51 100644
--- a/chapter14_text-classification.ipynb
+++ b/chapter14_text-classification.ipynb
@@ -1362,7 +1362,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Chapter Summary"
+    "### Chapter summary"
    ]
   }
  ],
diff --git a/chapter15_language-models-and-the-transformer.ipynb b/chapter15_language-models-and-the-transformer.ipynb
index b81cf93a9b..5756663f0f 100644
--- a/chapter15_language-models-and-the-transformer.ipynb
+++ b/chapter15_language-models-and-the-transformer.ipynb
@@ -466,7 +466,7 @@
    "outputs": [],
    "source": [
     "inputs, targets, sample_weights = next(iter(train_ds))\n",
-    "print(inputs['english'].shape)"
+    "print(inputs[\"english\"].shape)"
    ]
   },
   {
@@ -477,7 +477,7 @@
    },
    "outputs": [],
    "source": [
-    "print(inputs['spanish'].shape)"
+    "print(inputs[\"spanish\"].shape)"
    ]
   },
   {
@@ -1132,7 +1132,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Chapter Summary"
+    "### Chapter summary"
    ]
   }
  ],
diff --git a/chapter16_text-generation.ipynb b/chapter16_text-generation.ipynb
index 8d48b4aebe..6393ce3f69 100644
--- a/chapter16_text-generation.ipynb
+++ b/chapter16_text-generation.ipynb
@@ -56,7 +56,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Training a miniature GPT"
+    "### Training a mini-GPT"
    ]
   },
   {
@@ -67,17 +67,10 @@
    },
    "outputs": [],
    "source": [
-    "import keras\n",
-    "import pathlib\n",
+    "import os\n",
     "\n",
-    "extract_dir = keras.utils.get_file(\n",
-    "    fname=\"mini-c4\",\n",
-    "    origin=(\n",
-    "        \"https://hf.co/datasets/mattdangerw/mini-c4/resolve/main/mini-c4.zip\"\n",
-    "    ),\n",
-    "    extract=True,\n",
-    ")\n",
-    "extract_dir = pathlib.Path(extract_dir) / \"mini-c4\""
+    "# Free up more GPU memory on the Jax and TensorFlow backends.\n",
+    "os.environ[\"XLA_PYTHON_CLIENT_MEM_FRACTION\"] = \"1.00\""
    ]
   },
   {
@@ -88,7 +81,17 @@
    },
    "outputs": [],
    "source": [
-    "os.listdir(extract_dir)"
+    "import keras\n",
+    "import pathlib\n",
+    "\n",
+    "extract_dir = keras.utils.get_file(\n",
+    "    fname=\"mini-c4\",\n",
+    "    origin=(\n",
+    "        \"https://hf.co/datasets/mattdangerw/mini-c4/resolve/main/mini-c4.zip\"\n",
+    "    ),\n",
+    "    extract=True,\n",
+    ")\n",
+    "extract_dir = pathlib.Path(extract_dir) / \"mini-c4\""
    ]
   },
   {
@@ -156,29 +159,18 @@
     "sequence_length = 256\n",
     "suffix = np.array([tokenizer.token_to_id(\"<|endoftext|>\")])\n",
     "\n",
-    "files = [extract_dir / file for file in os.listdir(extract_dir)]\n",
-    "ds = tf.data.TextLineDataset(files, num_parallel_reads=32)\n",
-    "ds = ds.map(\n",
-    "    lambda x: tf.strings.regex_replace(x, r\"\\\\n\", \"\\n\"),\n",
-    "    num_parallel_calls=32,\n",
-    ")\n",
-    "ds = ds.map(tokenizer, num_parallel_calls=32)\n",
-    "ds = ds.map(lambda x: tf.concat([x, suffix], -1), num_parallel_calls=32)\n",
+    "def read_file(filename):\n",
+    "    ds = tf.data.TextLineDataset(filename)\n",
+    "    ds = ds.map(lambda x: tf.strings.regex_replace(x, r\"\\\\n\", \"\\n\"))\n",
+    "    ds = ds.map(tokenizer, num_parallel_calls=8)\n",
+    "    return ds.map(lambda x: tf.concat([x, suffix], -1))\n",
+    "\n",
+    "files = [str(file) for file in extract_dir.glob(\"*.txt\")]\n",
+    "ds = tf.data.Dataset.from_tensor_slices(files)\n",
+    "ds = ds.interleave(read_file, cycle_length=32, num_parallel_calls=32)\n",
     "ds = ds.rebatch(sequence_length + 1, drop_remainder=True)\n",
-    "ds = ds.map(lambda x: (x[:-1], x[1:]), num_parallel_calls=32)\n",
-    "ds = ds.batch(batch_size, num_parallel_calls=32).cache()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "num_batches = ds.reduce(0, lambda count, input: count + 1).numpy()\n",
-    "num_batches"
+    "ds = ds.map(lambda x: (x[:-1], x[1:]))\n",
+    "ds = ds.batch(batch_size).prefetch(8)"
    ]
   },
   {
@@ -189,10 +181,11 @@
    },
    "outputs": [],
    "source": [
+    "num_batches = 29373\n",
     "num_val_batches = 500\n",
     "num_train_batches = num_batches - num_val_batches\n",
-    "val_ds = ds.take(500)\n",
-    "train_ds = ds.skip(500).repeat()"
+    "val_ds = ds.take(num_val_batches).repeat()\n",
+    "train_ds = ds.skip(num_val_batches).repeat()"
    ]
   },
   {
@@ -276,6 +269,8 @@
    },
    "outputs": [],
    "source": [
+    "keras.config.set_dtype_policy(\"mixed_float16\")\n",
+    "\n",
     "vocab_size = tokenizer.vocabulary_size()\n",
     "hidden_dim = 512\n",
     "intermediate_dim = 2056\n",
@@ -311,7 +306,7 @@
    "source": [
     "class WarmupSchedule(keras.optimizers.schedules.LearningRateSchedule):\n",
     "    def __init__(self):\n",
-    "        self.rate = 1e-4\n",
+    "        self.rate = 2e-4\n",
     "        self.warmup_steps = 1_000.0\n",
     "\n",
     "    def __call__(self, step):\n",
@@ -347,9 +342,9 @@
    },
    "outputs": [],
    "source": [
-    "num_passes = 2\n",
-    "num_epochs = 16\n",
-    "steps_per_epoch = num_train_batches * num_passes // num_epochs\n",
+    "num_epochs = 8\n",
+    "steps_per_epoch = num_train_batches // num_epochs\n",
+    "validation_steps = num_val_batches\n",
     "\n",
     "mini_gpt.compile(\n",
     "    optimizer=keras.optimizers.Adam(schedule),\n",
@@ -361,6 +356,7 @@
     "    validation_data=val_ds,\n",
     "    epochs=num_epochs,\n",
     "    steps_per_epoch=steps_per_epoch,\n",
+    "    validation_steps=validation_steps,\n",
     ")"
    ]
   },
@@ -599,7 +595,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Prompting LLMs"
+    "#### Text generation with the Gemma model"
    ]
   },
   {
@@ -610,7 +606,23 @@
    },
    "outputs": [],
    "source": [
-    "gemma_lm = keras_hub.models.CausalLM.from_preset(\"gemma_2b_en\")"
+    "import kagglehub\n",
+    "\n",
+    "kagglehub.login()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "gemma_lm = keras_hub.models.CausalLM.from_preset(\n",
+    "    \"gemma3_1b\",\n",
+    "    dtype=\"float32\",\n",
+    ")"
    ]
   },
   {
@@ -633,7 +645,7 @@
    "outputs": [],
    "source": [
     "gemma_lm.compile(sampler=\"greedy\")\n",
-    "gemma_lm.generate(\"A piece of advice\", max_length=64)"
+    "gemma_lm.generate(\"A piece of advice\", max_length=40)"
    ]
   },
   {
@@ -644,7 +656,7 @@
    },
    "outputs": [],
    "source": [
-    "gemma_lm.generate(\"How can I make brownies?\", max_length=64)"
+    "gemma_lm.generate(\"How can I make brownies?\", max_length=40)"
    ]
   },
   {
@@ -658,7 +670,7 @@
     "gemma_lm.generate(\n",
     "    \"The following brownie recipe is easy to make in just a few \"\n",
     "    \"steps.\\n\\nYou can start by\",\n",
-    "    max_length=64,\n",
+    "    max_length=40,\n",
     ")"
    ]
   },
@@ -671,8 +683,8 @@
    "outputs": [],
    "source": [
     "gemma_lm.generate(\n",
-    "    \"Tell me about the 61st president of the United States.\",\n",
-    "    max_length=64,\n",
+    "    \"Tell me about the 542nd president of the United States.\",\n",
+    "    max_length=40,\n",
     ")"
    ]
   },
@@ -682,7 +694,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Instruction fine-tuning an LLM"
+    "#### Instruction fine-tuning"
    ]
   },
   {
@@ -695,10 +707,8 @@
    "source": [
     "import json\n",
     "\n",
-    "TEMPLATE = \"\"\"\"[instruction]\n",
-    "{instruction}[end]\n",
-    "[reponse]\n",
-    "{response}[end]\"\"\"\n",
+    "PROMPT_TEMPLATE = \"\"\"\"[instruction]\\n{}[end]\\n[reponse]\\n\"\"\"\n",
+    "RESPONSE_TEMPLATE = \"\"\"{}[end]\"\"\"\n",
     "\n",
     "dataset_path = keras.utils.get_file(\n",
     "    origin=(\n",
@@ -706,13 +716,25 @@
     "        \"resolve/main/databricks-dolly-15k.jsonl\"\n",
     "    ),\n",
     ")\n",
-    "data = []\n",
+    "data = {\"prompts\": [], \"responses\": []}\n",
     "with open(dataset_path) as file:\n",
     "    for line in file:\n",
     "        features = json.loads(line)\n",
     "        if features[\"context\"]:\n",
     "            continue\n",
-    "        data.append(TEMPLATE.format(**features))"
+    "        data[\"prompts\"].append(PROMPT_TEMPLATE.format(features[\"instruction\"]))\n",
+    "        data[\"responses\"].append(RESPONSE_TEMPLATE.format(features[\"response\"]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "data[\"prompts\"][0]"
    ]
   },
   {
@@ -723,7 +745,7 @@
    },
    "outputs": [],
    "source": [
-    "data[0]"
+    "data[\"responses\"][0]"
    ]
   },
   {
@@ -804,7 +826,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Low-Rank Adaptation (LoRA) fine-tuning"
+    "#### Low-Rank Adaptation (LoRA)"
    ]
   },
   {
@@ -869,7 +891,7 @@
    "outputs": [],
    "source": [
     "gemma_lm.generate(\n",
-    "    \"[instruction]\\nWho is the 44th president of the United States?[end]\\n\"\n",
+    "    \"[instruction]\\nWhat is a proper noun?[end]\\n\"\n",
     "    \"[response]\\n\",\n",
     "    max_length=512,\n",
     ")"
@@ -884,7 +906,7 @@
    "outputs": [],
    "source": [
     "gemma_lm.generate(\n",
-    "    \"[instruction]\\nWho is the 61st president of the United States?[end]\\n\"\n",
+    "    \"[instruction]\\nWho is the 542nd president of the United States?[end]\\n\"\n",
     "    \"[response]\\n\",\n",
     "    max_length=512,\n",
     ")"
@@ -896,7 +918,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Reinforcement Learning with Human Feedback"
+    "### Going further with LLMs"
    ]
   },
   {
@@ -905,25 +927,47 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Reinforcement Learning with Chain of Thought Reasoning"
+    "#### Reinforcement Learning with Human Feedback (RLHF)"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 0,
    "metadata": {
-    "colab_type": "text"
+    "colab_type": "code"
    },
+   "outputs": [],
    "source": [
-    "### Beyond text data"
+    "# NOTE: If you are running on the free tier Colab GPUs, you will need to restart\n",
+    "# your runtime and run the notebook from here to free up memory for this\n",
+    "# 4 billion parameter model.\n",
+    "import os\n",
+    "\n",
+    "os.environ[\"KERAS_BACKEND\"] = \"jax\"\n",
+    "# Free up more GPU memory on the Jax and TensorFlow backends.\n",
+    "os.environ[\"XLA_PYTHON_CLIENT_MEM_FRACTION\"] = \"1.00\"\n",
+    "\n",
+    "import keras\n",
+    "import keras_hub\n",
+    "import kagglehub\n",
+    "import numpy as np\n",
+    "\n",
+    "kagglehub.login()"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 0,
    "metadata": {
-    "colab_type": "text"
+    "colab_type": "code"
    },
+   "outputs": [],
    "source": [
-    "#### Extending an LLM for image input"
+    "gemma_lm = keras_hub.models.CausalLM.from_preset(\n",
+    "    \"gemma3_instruct_4b\",\n",
+    "    dtype=\"bfloat16\",\n",
+    ")\n",
+    "gemma_lm.preprocessor.sequence_length = 512"
    ]
   },
   {
@@ -934,18 +978,10 @@
    },
    "outputs": [],
    "source": [
-    "import matplotlib.pyplot as plt\n",
-    "\n",
-    "image_url = (\n",
-    "    \"https://github.com/mattdangerw/keras-nlp-scripts/\"\n",
-    "    \"blob/main/learned-python.png?raw=true\"\n",
-    ")\n",
-    "image_path = keras.utils.get_file(origin=image_url)\n",
-    "\n",
-    "image = keras.utils.load_img(image_path)\n",
-    "plt.axis(\"off\")\n",
-    "plt.imshow(image)\n",
-    "plt.savefig(\"pali-gemma-test-image.png\", dpi=300)"
+    "PROMPT_TEMPLATE = \"\"\"<start_of_turn>user\n",
+    "{}<end_of_turn>\n",
+    "<start_of_turn>model\n",
+    "\"\"\""
    ]
   },
   {
@@ -956,7 +992,17 @@
    },
    "outputs": [],
    "source": [
-    "pali_gemma_lm = keras_hub.models.CausalLM.from_preset(\"pali_gemma_3b_mix_448\")"
+    "prompt = \"Who is the 542nd president of the United States?\"\n",
+    "gemma_lm.generate(PROMPT_TEMPLATE.format(prompt))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Multi-modal LLMs"
    ]
   },
   {
@@ -967,7 +1013,7 @@
    },
    "outputs": [],
    "source": [
-    "pali_gemma_lm.summary(line_length=80)"
+    "gemma_lm.summary(line_length=80)"
    ]
   },
   {
@@ -978,10 +1024,18 @@
    },
    "outputs": [],
    "source": [
-    "pali_gemma_lm.generate({\n",
-    "    \"images\": image,\n",
-    "    \"prompts\": \"cap en\\n\",\n",
-    "})"
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "image_url = (\n",
+    "    \"https://github.com/mattdangerw/keras-nlp-scripts/\"\n",
+    "    \"blob/main/learned-python.png?raw=true\"\n",
+    ")\n",
+    "image_path = keras.utils.get_file(origin=image_url)\n",
+    "\n",
+    "image = np.array(keras.utils.load_img(image_path))\n",
+    "plt.axis(\"off\")\n",
+    "plt.imshow(image)\n",
+    "plt.savefig(\"pali-gemma-test-image.png\", dpi=300)"
    ]
   },
   {
@@ -992,9 +1046,12 @@
    },
    "outputs": [],
    "source": [
-    "pali_gemma_lm.generate({\n",
-    "    \"images\": image,\n",
-    "    \"prompts\": \"answer en where is the snake doing?\\n\",\n",
+    "gemma_lm.preprocessor.max_images_per_prompt = 1\n",
+    "gemma_lm.compile(sampler=\"greedy\")\n",
+    "prompt = \"What is going on in this image? Be concise!<start_of_image>\"\n",
+    "gemma_lm.generate({\n",
+    "    \"prompts\": PROMPT_TEMPLATE.format(prompt),\n",
+    "    \"images\": [image],\n",
     "})"
    ]
   },
@@ -1006,12 +1063,40 @@
    },
    "outputs": [],
    "source": [
-    "pali_gemma_lm.generate({\n",
-    "    \"images\": image,\n",
-    "    \"prompts\": \"detect glasses\\n\",\n",
+    "prompt = \"What is the snake wearing?<start_of_image>\"\n",
+    "gemma_lm.generate({\n",
+    "    \"prompts\": PROMPT_TEMPLATE.format(prompt),\n",
+    "    \"images\": [image],\n",
     "})"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Foundation models"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Retrieval Augmented Generation (RAG)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### \"Thinking\" models and Agents"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -1020,42 +1105,33 @@
    },
    "outputs": [],
    "source": [
-    "import re\n",
-    "import matplotlib.patches as patches\n",
+    "gemma_lm.compile(sampler=\"random\")\n",
     "\n",
-    "response = \"<loc0280><loc0371><loc0380><loc0685> glasses\"\n",
-    "box = [int(d) for d in re.findall(r\"\\d+\", response)]\n",
-    "scale = image.shape[0] / 1024.0\n",
-    "y1, x1, y2, x2 = (c * scale for c in box)\n",
-    "width, height = x2 - x1, y2 - y1\n",
-    "\n",
-    "fig, ax = plt.subplots()\n",
-    "ax.imshow(image)\n",
-    "ax.add_patch(\n",
-    "    patches.Rectangle(\n",
-    "        (x1, y1), width, height, linewidth=1, edgecolor=\"r\", facecolor=\"none\"\n",
-    "    )\n",
-    ")\n",
-    "plt.axis(\"off\")\n",
-    "plt.savefig(\"pali-gemma-detect-box.png\", dpi=300, bbox_inches=\"tight\")"
+    "prompt = \"\"\"Judy wrote a 2-page letter to 3 friends twice a week for 3 months.\n",
+    "How many letters did she write?\n",
+    "Be brief, and add \"ANSWER:\" before your final answer.\"\"\""
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 0,
    "metadata": {
-    "colab_type": "text"
+    "colab_type": "code"
    },
+   "outputs": [],
    "source": [
-    "#### Retrieval Augmented Generation"
+    "gemma_lm.generate(PROMPT_TEMPLATE.format(prompt))"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 0,
    "metadata": {
-    "colab_type": "text"
+    "colab_type": "code"
    },
+   "outputs": [],
    "source": [
-    "#### Foundation models"
+    "gemma_lm.generate(PROMPT_TEMPLATE.format(prompt))"
    ]
   },
   {
@@ -1073,7 +1149,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Chapter Summary"
+    "### Chapter summary"
    ]
   }
  ],
diff --git a/chapter17_image-generation.ipynb b/chapter17_image-generation.ipynb
index ee00309514..afb1fc671a 100644
--- a/chapter17_image-generation.ipynb
+++ b/chapter17_image-generation.ipynb
@@ -333,7 +333,7 @@
     "for batch in dataset:\n",
     "    img = batch.numpy()[0]\n",
     "    break\n",
-    "plt.imshow(img)"
+    "plt.imshow(img.astype(\"uint8\"))"
    ]
   },
   {
@@ -823,7 +823,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Wrapping up"
+    "### Chapter summary"
    ]
   }
  ],

From 255354b4f9ddff7145f4028f324f4f0a488f9466 Mon Sep 17 00:00:00 2001
From: Matt Watson <mattdangerw@gmail.com>
Date: Wed, 4 Jun 2025 16:25:43 -0700
Subject: [PATCH 14/24] Use stable keras and keras-hub, bug fixes

---
 chapter02_mathematical-building-blocks.ipynb  |  4 +-
 chapter03_introduction-to-ml-frameworks.ipynb | 26 ++++---
 chapter04_classification-and-regression.ipynb | 28 +++----
 chapter05_fundamentals-of-ml.ipynb            | 25 +++---
 chapter07_deep-dive-keras.ipynb               | 36 ++++++---
 chapter08_image-classification.ipynb          |  2 +-
 chapter09_convnet-architecture-patterns.ipynb |  4 +-
 ...r10_interpreting-what-convnets-learn.ipynb |  2 +-
 chapter11_image-segmentation.ipynb            | 16 ++--
 chapter12_object-detection.ipynb              | 12 +--
 chapter13_timeseries-forecasting.ipynb        |  4 +-
 chapter14_text-classification.ipynb           |  4 +-
 ..._language-models-and-the-transformer.ipynb |  8 +-
 chapter16_text-generation.ipynb               | 78 +++++++++----------
 chapter17_image-generation.ipynb              | 20 ++++-
 ...18_best-practices-for-the-real-world.ipynb |  4 +-
 16 files changed, 155 insertions(+), 118 deletions(-)

diff --git a/chapter02_mathematical-building-blocks.ipynb b/chapter02_mathematical-building-blocks.ipynb
index de84db9738..a0ad2070d5 100644
--- a/chapter02_mathematical-building-blocks.ipynb
+++ b/chapter02_mathematical-building-blocks.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
+    "!pip install keras keras-hub --upgrade -q"
    ]
   },
   {
@@ -430,7 +430,7 @@
     "\n",
     "digit = train_images[4]\n",
     "plt.imshow(digit, cmap=plt.cm.binary)\n",
-    "plt.savefig(\"The-fourth-sample-in-our-dataset.png\", dpi=300)"
+    "plt.show()"
    ]
   },
   {
diff --git a/chapter03_introduction-to-ml-frameworks.ipynb b/chapter03_introduction-to-ml-frameworks.ipynb
index 73c446456c..523816aec4 100644
--- a/chapter03_introduction-to-ml-frameworks.ipynb
+++ b/chapter03_introduction-to-ml-frameworks.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
+    "!pip install keras keras-hub --upgrade -q"
    ]
   },
   {
@@ -159,7 +159,7 @@
     "colab_type": "text"
    },
    "source": [
-    "###### Tensor assignment and the Variable class"
+    "###### Tensor assignment and the `Variable` class"
    ]
   },
   {
@@ -413,7 +413,7 @@
     "import matplotlib.pyplot as plt\n",
     "\n",
     "plt.scatter(inputs[:, 0], inputs[:, 1], c=targets[:, 0])\n",
-    "plt.savefig(\"linear_model_inputs.png\", dpi=300)"
+    "plt.show()"
    ]
   },
   {
@@ -499,7 +499,7 @@
    "source": [
     "predictions = model(inputs, W, b)\n",
     "plt.scatter(inputs[:, 0], inputs[:, 1], c=predictions[:, 0] > 0.5)\n",
-    "plt.savefig(\"linear_model_predictions.png\", dpi=300)"
+    "plt.show()"
    ]
   },
   {
@@ -684,7 +684,7 @@
     "c = torch.sqrt(a)\n",
     "d = b + c\n",
     "e = torch.matmul(a, b)\n",
-    "f = torch.cat((a, b), axis=0)"
+    "f = torch.cat((a, b), dim=0)"
    ]
   },
   {
@@ -825,7 +825,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### Packaging state and computation with Modules"
+    "##### Packaging state and computation with the `Module` class"
    ]
   },
   {
@@ -914,7 +914,7 @@
    },
    "outputs": [],
    "source": [
-    "compiled_model = model.compile()"
+    "compiled_model = torch.compile(model)"
    ]
   },
   {
@@ -1006,7 +1006,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Random tensors"
+    "#### Random number generation in JAX"
    ]
   },
   {
@@ -1060,7 +1060,7 @@
    "source": [
     "import jax\n",
     "\n",
-    "seed_key = jax.random.PRNGKey(1337)"
+    "seed_key = jax.random.key(1337)"
    ]
   },
   {
@@ -1071,7 +1071,7 @@
    },
    "outputs": [],
    "source": [
-    "seed_key = jax.random.PRNGKey(0)\n",
+    "seed_key = jax.random.key(0)\n",
     "jax.random.normal(seed_key, shape=(3,))"
    ]
   },
@@ -1083,7 +1083,7 @@
    },
    "outputs": [],
    "source": [
-    "seed_key = jax.random.PRNGKey(123)\n",
+    "seed_key = jax.random.key(123)\n",
     "jax.random.normal(seed_key, shape=(3,))"
    ]
   },
@@ -1106,7 +1106,7 @@
    },
    "outputs": [],
    "source": [
-    "seed_key = jax.random.PRNGKey(123)\n",
+    "seed_key = jax.random.key(123)\n",
     "jax.random.normal(seed_key, shape=(3,))"
    ]
   },
@@ -1353,6 +1353,8 @@
    },
    "outputs": [],
    "source": [
+    "learning_rate = 0.1\n",
+    "\n",
     "@jax.jit\n",
     "def training_step(inputs, targets, W, b):\n",
     "    loss, grads = grad_fn((W, b), inputs, targets)\n",
diff --git a/chapter04_classification-and-regression.ipynb b/chapter04_classification-and-regression.ipynb
index 8bfd94c95b..b4c0d81ec0 100644
--- a/chapter04_classification-and-regression.ipynb
+++ b/chapter04_classification-and-regression.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
+    "!pip install keras keras-hub --upgrade -q"
    ]
   },
   {
@@ -172,8 +172,8 @@
    },
    "outputs": [],
    "source": [
-    "y_train = np.asarray(train_labels).astype(\"float32\")\n",
-    "y_test = np.asarray(test_labels).astype(\"float32\")"
+    "y_train = train_labels.astype(\"float32\")\n",
+    "y_test = test_labels.astype(\"float32\")"
    ]
   },
   {
@@ -310,7 +310,7 @@
     "plt.xticks(epochs)\n",
     "plt.ylabel(\"Loss\")\n",
     "plt.legend()\n",
-    "plt.savefig(\"imdb_loss_plot.png\", dpi=300)"
+    "plt.show()"
    ]
   },
   {
@@ -331,7 +331,7 @@
     "plt.xticks(epochs)\n",
     "plt.ylabel(\"Accuracy\")\n",
     "plt.legend()\n",
-    "plt.savefig(\"imdb_accuracy_plot.png\", dpi=300)"
+    "plt.show()"
    ]
   },
   {
@@ -654,7 +654,7 @@
     "plt.xticks(epochs)\n",
     "plt.ylabel(\"Loss\")\n",
     "plt.legend()\n",
-    "plt.savefig(\"reuters_loss_plot.png\", dpi=300)"
+    "plt.show()"
    ]
   },
   {
@@ -675,7 +675,7 @@
     "plt.xticks(epochs)\n",
     "plt.ylabel(\"Accuracy\")\n",
     "plt.legend()\n",
-    "plt.savefig(\"reuters_accuracy_plot.png\", dpi=300)"
+    "plt.show()"
    ]
   },
   {
@@ -696,7 +696,7 @@
     "plt.xticks(epochs)\n",
     "plt.ylabel(\"Top-3 accuracy\")\n",
     "plt.legend()\n",
-    "plt.savefig(\"reuters_top_3_accuracy_plot.png\", dpi=300)"
+    "plt.show()"
    ]
   },
   {
@@ -750,7 +750,7 @@
     "import copy\n",
     "test_labels_copy = copy.copy(test_labels)\n",
     "np.random.shuffle(test_labels_copy)\n",
-    "hits_array = np.array(test_labels)\n",
+    "hits_array = np.array(test_labels == test_labels_copy)\n",
     "hits_array.mean()"
    ]
   },
@@ -824,8 +824,8 @@
    },
    "outputs": [],
    "source": [
-    "y_train = np.array(train_labels)\n",
-    "y_test = np.array(test_labels)"
+    "y_train = train_labels\n",
+    "y_test = test_labels"
    ]
   },
   {
@@ -1163,7 +1163,7 @@
     "plt.plot(epochs, average_mae_history)\n",
     "plt.xlabel(\"Epochs\")\n",
     "plt.ylabel(\"Validation MAE\")\n",
-    "plt.savefig(\"california_housing_validation_mae_plot.png\", dpi=300)"
+    "plt.show()"
    ]
   },
   {
@@ -1179,7 +1179,7 @@
     "plt.plot(epochs, truncated_mae_history)\n",
     "plt.xlabel(\"Epochs\")\n",
     "plt.ylabel(\"Validation MAE\")\n",
-    "plt.savefig(\"california_housing_validation_mae_plot_zoomed.png\", dpi=300)"
+    "plt.show()"
    ]
   },
   {
@@ -1225,7 +1225,7 @@
    },
    "outputs": [],
    "source": [
-    "predictions = model.predict(test_data)\n",
+    "predictions = model.predict(x_test)\n",
     "predictions[0]"
    ]
   },
diff --git a/chapter05_fundamentals-of-ml.ipynb b/chapter05_fundamentals-of-ml.ipynb
index 9e72d01825..2e152b2147 100644
--- a/chapter05_fundamentals-of-ml.ipynb
+++ b/chapter05_fundamentals-of-ml.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
+    "!pip install keras keras-hub --upgrade -q"
    ]
   },
   {
@@ -175,7 +175,7 @@
     "plt.xticks(epochs)\n",
     "plt.ylabel(\"Accuracy\")\n",
     "plt.legend()\n",
-    "plt.savefig(\"mnist_with_added_noise_channels_or_zeros_channels.png\", dpi=300)"
+    "plt.show()"
    ]
   },
   {
@@ -445,7 +445,7 @@
     "plt.xlabel(\"Epochs\")\n",
     "plt.ylabel(\"Loss\")\n",
     "plt.legend()\n",
-    "plt.savefig(\"effect_of_insufficient_model_capacity_on_val_loss.png\", dpi=300)"
+    "plt.show()"
    ]
   },
   {
@@ -485,14 +485,14 @@
    },
    "outputs": [],
    "source": [
-    "val_loss = history_small_model.history[\"val_loss\"]\n",
+    "val_loss = history_large_model.history[\"val_loss\"]\n",
     "epochs = range(1, 21)\n",
     "plt.plot(epochs, val_loss, \"b-\", label=\"Validation loss\")\n",
     "plt.title(\"Validation loss for a model with appropriate capacity\")\n",
     "plt.xlabel(\"Epochs\")\n",
     "plt.ylabel(\"Loss\")\n",
     "plt.legend()\n",
-    "plt.savefig(\"effect_of_correct_model_capacity_on_val_loss.png\", dpi=300)"
+    "plt.show()"
    ]
   },
   {
@@ -507,6 +507,7 @@
     "    [\n",
     "        layers.Dense(2048, activation=\"relu\"),\n",
     "        layers.Dense(2048, activation=\"relu\"),\n",
+    "        layers.Dense(2048, activation=\"relu\"),\n",
     "        layers.Dense(10, activation=\"softmax\"),\n",
     "    ]\n",
     ")\n",
@@ -519,7 +520,7 @@
     "    train_images,\n",
     "    train_labels,\n",
     "    epochs=20,\n",
-    "    batch_size=128,\n",
+    "    batch_size=32,\n",
     "    validation_split=0.2,\n",
     ")"
    ]
@@ -539,7 +540,7 @@
     "plt.xlabel(\"Epochs\")\n",
     "plt.ylabel(\"Loss\")\n",
     "plt.legend()\n",
-    "plt.savefig(\"effect_of_excessive_model_capacity_on_val_loss.png\", dpi=300)"
+    "plt.show()"
    ]
   },
   {
@@ -694,7 +695,7 @@
     "plt.ylabel(\"Loss\")\n",
     "plt.xticks(epochs)\n",
     "plt.legend()\n",
-    "plt.savefig(\"original_model_vs_smaller_model_imdb.png\", dpi=300)"
+    "plt.show()"
    ]
   },
   {
@@ -735,7 +736,7 @@
    "outputs": [],
    "source": [
     "original_val_loss = history_original.history[\"val_loss\"]\n",
-    "larger_model_val_loss = history_smaller_model.history[\"val_loss\"]\n",
+    "larger_model_val_loss = history_larger_model.history[\"val_loss\"]\n",
     "epochs = range(1, 21)\n",
     "plt.plot(\n",
     "    epochs,\n",
@@ -754,7 +755,7 @@
     "plt.ylabel(\"Loss\")\n",
     "plt.xticks(epochs)\n",
     "plt.legend()\n",
-    "plt.savefig(\"original_model_vs_larger_model_imdb.png\", dpi=300)"
+    "plt.show()"
    ]
   },
   {
@@ -827,7 +828,7 @@
     "plt.ylabel(\"Loss\")\n",
     "plt.xticks(epochs)\n",
     "plt.legend()\n",
-    "plt.savefig(\"original_model_vs_l2_regularized_model_imdb.png\", dpi=300)"
+    "plt.show()"
    ]
   },
   {
@@ -914,7 +915,7 @@
     "plt.ylabel(\"Loss\")\n",
     "plt.xticks(epochs)\n",
     "plt.legend()\n",
-    "plt.savefig(\"original_model_vs_dropout_regularized_model_imdb.png\", dpi=300)"
+    "plt.show()"
    ]
   },
   {
diff --git a/chapter07_deep-dive-keras.ipynb b/chapter07_deep-dive-keras.ipynb
index 04cd33c47c..855fd0e619 100644
--- a/chapter07_deep-dive-keras.ipynb
+++ b/chapter07_deep-dive-keras.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
+    "!pip install keras keras-hub --upgrade -q"
    ]
   },
   {
@@ -351,11 +351,11 @@
     "tags_data = np.random.randint(0, 2, size=(num_samples, num_tags))\n",
     "\n",
     "priority_data = np.random.random(size=(num_samples, 1))\n",
-    "department_data = np.random.randint(0, 2, size=(num_samples, num_departments))\n",
+    "department_data = np.random.randint(0, num_departments, size=(num_samples, 1))\n",
     "\n",
     "model.compile(\n",
     "    optimizer=\"adam\",\n",
-    "    loss=[\"mean_squared_error\", \"categorical_crossentropy\"],\n",
+    "    loss=[\"mean_squared_error\", \"sparse_categorical_crossentropy\"],\n",
     "    metrics=[[\"mean_absolute_error\"], [\"accuracy\"]],\n",
     ")\n",
     "model.fit(\n",
@@ -383,7 +383,7 @@
     "    optimizer=\"adam\",\n",
     "    loss={\n",
     "        \"priority\": \"mean_squared_error\",\n",
-    "        \"department\": \"categorical_crossentropy\",\n",
+    "        \"department\": \"sparse_categorical_crossentropy\",\n",
     "    },\n",
     "    metrics={\n",
     "        \"priority\": [\"mean_absolute_error\"],\n",
@@ -596,7 +596,7 @@
    "source": [
     "model.compile(\n",
     "    optimizer=\"adam\",\n",
-    "    loss=[\"mean_squared_error\", \"categorical_crossentropy\"],\n",
+    "    loss=[\"mean_squared_error\", \"sparse_categorical_crossentropy\"],\n",
     "    metrics=[[\"mean_absolute_error\"], [\"accuracy\"]],\n",
     ")\n",
     "model.fit(\n",
@@ -781,7 +781,7 @@
     "\n",
     "    def reset_state(self):\n",
     "        self.mse_sum.assign(0.)\n",
-    "        self.total_samples.assign(0)"
+    "        self.total_samples.assign(0.)"
    ]
   },
   {
@@ -1192,7 +1192,7 @@
     "    trainable_variables, non_trainable_variables, inputs, targets\n",
     "):\n",
     "    outputs, non_trainable_variables = model.stateless_call(\n",
-    "        trainable_variables, non_trainable_variables, inputs\n",
+    "        trainable_variables, non_trainable_variables, inputs, training=True\n",
     "    )\n",
     "    loss = loss_fn(targets, outputs)\n",
     "    return loss, non_trainable_variables"
@@ -1370,8 +1370,8 @@
     "        with tf.GradientTape() as tape:\n",
     "            predictions = self(inputs, training=True)\n",
     "            loss = loss_fn(targets, predictions)\n",
-    "        gradients = tape.gradient(loss, model.trainable_weights)\n",
-    "        self.optimizer.apply(gradients, model.trainable_weights)\n",
+    "        gradients = tape.gradient(loss, self.trainable_weights)\n",
+    "        self.optimizer.apply(gradients, self.trainable_weights)\n",
     "\n",
     "        loss_tracker.update_state(loss)\n",
     "        return {\"loss\": loss_tracker.result()}\n",
@@ -1626,8 +1626,8 @@
     "            predictions = self(inputs, training=True)\n",
     "            loss = self.compute_loss(y=targets, y_pred=predictions)\n",
     "\n",
-    "        gradients = tape.gradient(loss, model.trainable_weights)\n",
-    "        self.optimizer.apply(gradients, model.trainable_weights)\n",
+    "        gradients = tape.gradient(loss, self.trainable_weights)\n",
+    "        self.optimizer.apply(gradients, self.trainable_weights)\n",
     "\n",
     "        for metric in self.metrics:\n",
     "            if metric.name == \"loss\":\n",
@@ -1758,6 +1758,20 @@
    },
    "outputs": [],
    "source": [
+    "keras.config.set_backend(\"jax\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "import keras\n",
+    "from keras import layers\n",
+    "\n",
     "class CustomModel(keras.Model):\n",
     "    def compute_loss_and_updates(\n",
     "        self,\n",
diff --git a/chapter08_image-classification.ipynb b/chapter08_image-classification.ipynb
index a89cc072bb..40320aeaf5 100644
--- a/chapter08_image-classification.ipynb
+++ b/chapter08_image-classification.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
+    "!pip install keras keras-hub --upgrade -q"
    ]
   },
   {
diff --git a/chapter09_convnet-architecture-patterns.ipynb b/chapter09_convnet-architecture-patterns.ipynb
index 97656034a1..60c3a9ce01 100644
--- a/chapter09_convnet-architecture-patterns.ipynb
+++ b/chapter09_convnet-architecture-patterns.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
+    "!pip install keras keras-hub --upgrade -q"
    ]
   },
   {
@@ -320,7 +320,7 @@
     "colab_type": "text"
    },
    "source": [
-    "## Chapter summary"
+    "### Chapter summary"
    ]
   }
  ],
diff --git a/chapter10_interpreting-what-convnets-learn.ipynb b/chapter10_interpreting-what-convnets-learn.ipynb
index 3bc8da4286..64d98c05a5 100644
--- a/chapter10_interpreting-what-convnets-learn.ipynb
+++ b/chapter10_interpreting-what-convnets-learn.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
+    "!pip install keras keras-hub --upgrade -q"
    ]
   },
   {
diff --git a/chapter11_image-segmentation.ipynb b/chapter11_image-segmentation.ipynb
index 6ee3055738..607ab9b026 100644
--- a/chapter11_image-segmentation.ipynb
+++ b/chapter11_image-segmentation.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
+    "!pip install keras keras-hub --upgrade -q"
    ]
   },
   {
@@ -481,9 +481,9 @@
    "source": [
     "outputs = model.predict(\n",
     "    {\n",
-    "        \"images\": np.expand_dims(image, axis=0),\n",
-    "        \"points\": np.expand_dims(input_point, axis=0),\n",
-    "        \"labels\": np.expand_dims(input_label, axis=0),\n",
+    "        \"images\": ops.expand_dims(image, axis=0),\n",
+    "        \"points\": ops.expand_dims(input_point, axis=0),\n",
+    "        \"labels\": ops.expand_dims(input_label, axis=0),\n",
     "    }\n",
     ")"
    ]
@@ -508,7 +508,7 @@
    "outputs": [],
    "source": [
     "def get_mask(sam_outputs, index=0):\n",
-    "    mask = outputs[\"masks\"][0][index]\n",
+    "    mask = sam_outputs[\"masks\"][0][index]\n",
     "    mask = np.expand_dims(mask, axis=-1)\n",
     "    mask = resize_and_pad(mask)\n",
     "    return ops.convert_to_numpy(mask) > 0.0\n",
@@ -535,9 +535,9 @@
     "\n",
     "outputs = model.predict(\n",
     "    {\n",
-    "        \"images\": np.expand_dims(image, axis=0),\n",
-    "        \"points\": np.expand_dims(input_point, axis=0),\n",
-    "        \"labels\": np.expand_dims(input_label, axis=0),\n",
+    "        \"images\": ops.expand_dims(image, axis=0),\n",
+    "        \"points\": ops.expand_dims(input_point, axis=0),\n",
+    "        \"labels\": ops.expand_dims(input_label, axis=0),\n",
     "    }\n",
     ")\n",
     "mask = get_mask(outputs, index=0)\n",
diff --git a/chapter12_object-detection.ipynb b/chapter12_object-detection.ipynb
index 765e84149a..1ce5479941 100644
--- a/chapter12_object-detection.ipynb
+++ b/chapter12_object-detection.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
+    "!pip install keras keras-hub --upgrade -q"
    ]
   },
   {
@@ -585,8 +585,9 @@
    "outputs": [],
    "source": [
     "url = (\n",
-    "    \"https://upload.wikimedia.org/wikipedia/commons/7/7d/\"\n",
-    "    \"A_Sunday_on_La_Grande_Jatte%2C_Georges_Seurat%2C_1884.jpg\"\n",
+    "    \"https://upload.wikimedia.org/wikipedia/commons/thumb/7/7d/\"\n",
+    "    \"A_Sunday_on_La_Grande_Jatte%2C_Georges_Seurat%2C_1884.jpg/\"\n",
+    "    \"1280px-A_Sunday_on_La_Grande_Jatte%2C_Georges_Seurat%2C_1884.jpg\"\n",
     ")\n",
     "path = keras.utils.get_file(origin=url)\n",
     "image = np.array([keras.utils.load_img(path)])"
@@ -637,9 +638,10 @@
    },
    "outputs": [],
    "source": [
-    "ig, ax = plt.subplots(dpi=300)\n",
+    "fig, ax = plt.subplots(dpi=300)\n",
     "draw_image(ax, path)\n",
-    "for i in range(predictions[\"num_detections\"][0]):\n",
+    "num_detections = predictions[\"num_detections\"][0]\n",
+    "for i in range(num_detections):\n",
     "    box = predictions[\"boxes\"][0][i]\n",
     "    label = predictions[\"labels\"][0][i]\n",
     "    label_name = keras_hub.utils.coco_id_to_name(label)\n",
diff --git a/chapter13_timeseries-forecasting.ipynb b/chapter13_timeseries-forecasting.ipynb
index eab1cd836d..0413202fc4 100644
--- a/chapter13_timeseries-forecasting.ipynb
+++ b/chapter13_timeseries-forecasting.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
+    "!pip install keras keras-hub --upgrade -q"
    ]
   },
   {
@@ -653,7 +653,7 @@
     "colab_type": "text"
    },
    "source": [
-    "## Chapter summary"
+    "### Chapter summary"
    ]
   }
  ],
diff --git a/chapter14_text-classification.ipynb b/chapter14_text-classification.ipynb
index 6b82a5ba51..7f0e90c7b4 100644
--- a/chapter14_text-classification.ipynb
+++ b/chapter14_text-classification.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
+    "!pip install keras keras-hub --upgrade -q"
    ]
   },
   {
@@ -829,7 +829,7 @@
     "plt.plot(epochs, val_accuracy, \"b\", label=\"Validation accuracy\")\n",
     "plt.title(\"Training and validation accuracy\")\n",
     "plt.legend()\n",
-    "plt.savefig(\"bag-of-words-acc.png\", dpi=300)"
+    "plt.show()"
    ]
   },
   {
diff --git a/chapter15_language-models-and-the-transformer.ipynb b/chapter15_language-models-and-the-transformer.ipynb
index 5756663f0f..dc2a065206 100644
--- a/chapter15_language-models-and-the-transformer.ipynb
+++ b/chapter15_language-models-and-the-transformer.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
+    "!pip install keras keras-hub --upgrade -q"
    ]
   },
   {
@@ -266,7 +266,7 @@
     "state = keras.ops.zeros(shape=(1, hidden_dim))\n",
     "for token_id in input_ids:\n",
     "    inputs = keras.ops.expand_dims([token_id], axis=0)\n",
-    "    predictions, state = generation_model((inputs, state))"
+    "    predictions, state = generation_model.predict((inputs, state), verbose=0)"
    ]
   },
   {
@@ -282,10 +282,10 @@
     "generated_ids = []\n",
     "max_length = 250\n",
     "for i in range(max_length):\n",
-    "    next_char = int(np.array(keras.ops.argmax(predictions, axis=-1)[0]))\n",
+    "    next_char = int(np.argmax(predictions, axis=-1)[0])\n",
     "    generated_ids.append(next_char)\n",
     "    inputs = keras.ops.expand_dims([next_char], axis=0)\n",
-    "    predictions, state = generation_model((inputs, state))"
+    "    predictions, state = generation_model.predict((inputs, state), verbose=0)"
    ]
   },
   {
diff --git a/chapter16_text-generation.ipynb b/chapter16_text-generation.ipynb
index 6393ce3f69..8f6f0c94cf 100644
--- a/chapter16_text-generation.ipynb
+++ b/chapter16_text-generation.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
+    "!pip install keras keras-hub --upgrade -q"
    ]
   },
   {
@@ -32,15 +32,6 @@
     "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "### The potential of generative modeling"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -103,7 +94,7 @@
    "outputs": [],
    "source": [
     "with open(extract_dir / \"shard0.txt\", \"r\") as f:\n",
-    "   print(f.readline().replace(\"\\\\n\", \"\\n\")[:100])"
+    "    print(f.readline().replace(\"\\\\n\", \"\\n\")[:100])"
    ]
   },
   {
@@ -327,11 +318,11 @@
     "\n",
     "schedule = WarmupSchedule()\n",
     "x = range(0, 5_000, 100)\n",
-    "y = [schedule(step) for step in x]\n",
+    "y = [ops.convert_to_numpy(schedule(step)) for step in x]\n",
     "plt.plot(x, y)\n",
     "plt.xlabel(\"Train Step\")\n",
     "plt.ylabel(\"Learning Rate\")\n",
-    "plt.savefig(\"learning-rate-warmup.png\", dpi=300)"
+    "plt.show()"
    ]
   },
   {
@@ -378,11 +369,11 @@
    "outputs": [],
    "source": [
     "def generate(prompt, max_length=64):\n",
-    "    tokens = list(tokenizer(prompt))\n",
+    "    tokens = list(ops.convert_to_numpy(tokenizer(prompt)))\n",
     "    prompt_length = len(tokens)\n",
     "    for _ in range(max_length - prompt_length):\n",
-    "        prediction = mini_gpt(np.array([tokens]))\n",
-    "        prediction = prediction[0, -1]\n",
+    "        prediction = mini_gpt(ops.convert_to_numpy([tokens]))\n",
+    "        prediction = ops.convert_to_numpy(prediction[0, -1])\n",
     "        tokens.append(np.argmax(prediction).item())\n",
     "    return tokenizer.detokenize(tokens)"
    ]
@@ -408,7 +399,7 @@
    "outputs": [],
    "source": [
     "def compiled_generate(prompt, max_length=64):\n",
-    "    tokens = list(tokenizer(prompt))\n",
+    "    tokens = list(ops.convert_to_numpy(tokenizer(prompt)))\n",
     "    prompt_length = len(tokens)\n",
     "    tokens = tokens + [0] * (max_length - prompt_length)\n",
     "    for i in range(prompt_length, max_length):\n",
@@ -449,13 +440,13 @@
    "outputs": [],
    "source": [
     "def compiled_generate(prompt, sample_fn, max_length=64):\n",
-    "    tokens = list(tokenizer(prompt))\n",
+    "    tokens = list(ops.convert_to_numpy(tokenizer(prompt)))\n",
     "    prompt_length = len(tokens)\n",
     "    tokens = tokens + [0] * (max_length - prompt_length)\n",
     "    for i in range(prompt_length, max_length):\n",
     "        prediction = mini_gpt.predict(np.array([tokens]), verbose=0)\n",
     "        prediction = prediction[0, i - 1]\n",
-    "        next_token = sample_fn(prediction)\n",
+    "        next_token = ops.convert_to_numpy(sample_fn(prediction))\n",
     "        tokens[i] = np.array(next_token).item()\n",
     "    return tokenizer.detokenize(tokens)"
    ]
@@ -707,7 +698,7 @@
    "source": [
     "import json\n",
     "\n",
-    "PROMPT_TEMPLATE = \"\"\"\"[instruction]\\n{}[end]\\n[reponse]\\n\"\"\"\n",
+    "PROMPT_TEMPLATE = \"\"\"\"[instruction]\\n{}[end]\\n[response]\\n\"\"\"\n",
     "RESPONSE_TEMPLATE = \"\"\"{}[end]\"\"\"\n",
     "\n",
     "dataset_path = keras.utils.get_file(\n",
@@ -756,7 +747,7 @@
    },
    "outputs": [],
    "source": [
-    "ds = tf.data.Dataset.from_tensor_slices(data).shuffle(2000).batch(8)\n",
+    "ds = tf.data.Dataset.from_tensor_slices(data).shuffle(2000).batch(2)\n",
     "val_ds = ds.take(100)\n",
     "train_ds = ds.skip(100)"
    ]
@@ -930,6 +921,15 @@
     "#### Reinforcement Learning with Human Feedback (RLHF)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Using a chatbot trained with RLHF"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -966,8 +966,7 @@
     "gemma_lm = keras_hub.models.CausalLM.from_preset(\n",
     "    \"gemma3_instruct_4b\",\n",
     "    dtype=\"bfloat16\",\n",
-    ")\n",
-    "gemma_lm.preprocessor.sequence_length = 512"
+    ")"
    ]
   },
   {
@@ -992,28 +991,29 @@
    },
    "outputs": [],
    "source": [
-    "prompt = \"Who is the 542nd president of the United States?\"\n",
-    "gemma_lm.generate(PROMPT_TEMPLATE.format(prompt))"
+    "prompt = \"Why can't you assign values in Jax tensors? Be brief!\"\n",
+    "gemma_lm.generate(PROMPT_TEMPLATE.format(prompt), max_length=512)"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 0,
    "metadata": {
-    "colab_type": "text"
+    "colab_type": "code"
    },
+   "outputs": [],
    "source": [
-    "#### Multi-modal LLMs"
+    "prompt = \"Who is the 542nd president of the United States?\"\n",
+    "gemma_lm.generate(PROMPT_TEMPLATE.format(prompt), max_length=512)"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 0,
+   "cell_type": "markdown",
    "metadata": {
-    "colab_type": "code"
+    "colab_type": "text"
    },
-   "outputs": [],
    "source": [
-    "gemma_lm.summary(line_length=80)"
+    "#### Multimodal LLMs"
    ]
   },
   {
@@ -1035,7 +1035,7 @@
     "image = np.array(keras.utils.load_img(image_path))\n",
     "plt.axis(\"off\")\n",
     "plt.imshow(image)\n",
-    "plt.savefig(\"pali-gemma-test-image.png\", dpi=300)"
+    "plt.show()"
    ]
   },
   {
@@ -1047,7 +1047,7 @@
    "outputs": [],
    "source": [
     "gemma_lm.preprocessor.max_images_per_prompt = 1\n",
-    "gemma_lm.compile(sampler=\"greedy\")\n",
+    "gemma_lm.preprocessor.sequence_length = 512\n",
     "prompt = \"What is going on in this image? Be concise!<start_of_image>\"\n",
     "gemma_lm.generate({\n",
     "    \"prompts\": PROMPT_TEMPLATE.format(prompt),\n",
@@ -1094,7 +1094,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### \"Thinking\" models and Agents"
+    "#### \"Reasoning\" models"
    ]
   },
   {
@@ -1105,11 +1105,11 @@
    },
    "outputs": [],
    "source": [
-    "gemma_lm.compile(sampler=\"random\")\n",
-    "\n",
     "prompt = \"\"\"Judy wrote a 2-page letter to 3 friends twice a week for 3 months.\n",
     "How many letters did she write?\n",
-    "Be brief, and add \"ANSWER:\" before your final answer.\"\"\""
+    "Be brief, and add \"ANSWER:\" before your final answer.\"\"\"\n",
+    "\n",
+    "gemma_lm.compile(sampler=\"random\")"
    ]
   },
   {
diff --git a/chapter17_image-generation.ipynb b/chapter17_image-generation.ipynb
index afb1fc671a..d69809894d 100644
--- a/chapter17_image-generation.ipynb
+++ b/chapter17_image-generation.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
+    "!pip install keras keras-hub --upgrade -q"
    ]
   },
   {
@@ -651,6 +651,23 @@
     "### Text-to-image models"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "if keras.config.backend() == \"torch\":\n",
+    "    # The rest of this chapter will not do any training. The following keeps\n",
+    "    # PyTorch from using too much memory by disabling gradients. TensorFlow and\n",
+    "    # JAX use a much smaller memory footprint and do not need this hack.\n",
+    "    import torch\n",
+    "\n",
+    "    torch.set_grad_enabled(False)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -741,6 +758,7 @@
     "    return task.backbone.decode_step(latents)[0]\n",
     "\n",
     "def scale_output(x):\n",
+    "    x = ops.convert_to_numpy(x)\n",
     "    x = np.clip((x + 1.0) / 2.0, 0.0, 1.0)\n",
     "    return np.round(x * 255.0).astype(\"uint8\")\n",
     "\n",
diff --git a/chapter18_best-practices-for-the-real-world.ipynb b/chapter18_best-practices-for-the-real-world.ipynb
index f7f17539d0..33a0adbdab 100644
--- a/chapter18_best-practices-for-the-real-world.ipynb
+++ b/chapter18_best-practices-for-the-real-world.ipynb
@@ -17,7 +17,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install keras-nightly keras-hub-nightly --upgrade -q"
+    "!pip install keras keras-hub --upgrade -q"
    ]
   },
   {
@@ -38,7 +38,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Getting the most our of your models"
+    "### Getting the most out of your models"
    ]
   },
   {

From ef481bd7293fbcbdf535f7af02bfa8f6ed52101d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Chollet?= <francois.chollet@gmail.com>
Date: Fri, 1 Aug 2025 13:38:36 -0700
Subject: [PATCH 15/24] Update README.md

---
 README.md | 46 ++++++++++++++++++++--------------------------
 1 file changed, 20 insertions(+), 26 deletions(-)

diff --git a/README.md b/README.md
index b97bd9f0e8..506d410202 100644
--- a/README.md
+++ b/README.md
@@ -1,35 +1,29 @@
-# Companion Jupyter notebooks for the book "Deep Learning with Python"
+# Companion Jupyter notebooks for the book "Deep Learning with Python" (2025)
 
-This repository contains Jupyter notebooks implementing the code samples found in the book [Deep Learning with Python, 2nd Edition (Manning Publications)](https://www.manning.com/books/deep-learning-with-python-second-edition?a_aid=keras&a_bid=76564dff).
+This repository contains Jupyter notebooks implementing the code samples found in the book [Deep Learning with Python, third edition](https://www.manning.com/books/deep-learning-with-python-third-edition?a_aid=keras&a_bid=76564dff)
+by Francois Chollet and Matthew Watson.
+
+In addition, you will also find the legacy notebooks for the [second edition (2021)](https://www.manning.com/books/deep-learning-with-python-second-edition?a_aid=keras&a_bid=76564dff)
+and the [first edition (2017)](https://www.manning.com/books/deep-learning-with-python?a_aid=keras&a_bid=76564dff).
 
 For readability, these notebooks only contain runnable code blocks and section titles, and omit everything else in the book: text paragraphs, figures, and pseudocode.
 **If you want to be able to follow what's going on, I recommend reading the notebooks side by side with your copy of the book.**
 
-These notebooks use TensorFlow 2.6.
-
 ## Table of contents
 
 * [Chapter 2: The mathematical building blocks of neural networks](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter02_mathematical-building-blocks.ipynb)
-* [Chapter 3: Introduction to Keras and TensorFlow](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter03_introduction-to-keras-and-tf.ipynb)
-* [Chapter 4: Getting started with neural networks: classification and regression](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter04_getting-started-with-neural-networks.ipynb)
+* [Chapter 3: Introduction to TensorFlow, PyTorch, JAX, and Keras](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter03_introduction-to-ml-frameworks.ipynb)
+* [Chapter 4: Classification and regression](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter04_classification-and-regression.ipynb)
 * [Chapter 5: Fundamentals of machine learning](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter05_fundamentals-of-ml.ipynb)
-* [Chapter 7: Working with Keras: a deep dive](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter07_working-with-keras.ipynb)
-* [Chapter 8: Introduction to deep learning for computer vision](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter08_intro-to-dl-for-computer-vision.ipynb)
-* Chapter 9: Advanced deep learning for computer vision
-    - [Part 1: Image segmentation](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter09_part01_image-segmentation.ipynb)
-    - [Part 2: Modern convnet architecture patterns](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter09_part02_modern-convnet-architecture-patterns.ipynb)
-    - [Part 3: Interpreting what convnets learn](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter09_part03_interpreting-what-convnets-learn.ipynb)
-* [Chapter 10: Deep learning for timeseries](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter10_dl-for-timeseries.ipynb)
-* Chapter 11: Deep learning for text
-    - [Part 1: Introduction](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter11_part01_introduction.ipynb)
-    - [Part 2: Sequence models](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter11_part02_sequence-models.ipynb)
-    - [Part 3: Transformer](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter11_part03_transformer.ipynb)
-    - [Part 4: Sequence-to-sequence learning](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter11_part04_sequence-to-sequence-learning.ipynb)
-* Chapter 12: Generative deep learning
-    - [Part 1: Text generation](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter12_part01_text-generation.ipynb)
-    - [Part 2: Deep Dream](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter12_part02_deep-dream.ipynb)
-    - [Part 3: Neural style transfer](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter12_part03_neural-style-transfer.ipynb)
-    - [Part 4: Variational autoencoders](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter12_part04_variational-autoencoders.ipynb)
-    - [Part 5: Generative adversarial networks](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter12_part05_gans.ipynb)
-* [Chapter 13: Best practices for the real world](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter13_best-practices-for-the-real-world.ipynb)
-* [Chapter 14: Conclusions](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter14_conclusions.ipynb)
+* [Chapter 7: A deep dive on Keras](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter07_deep-dive-keras.ipynb)
+* [Chapter 8: Image Classification](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter08_image-classification.ipynb)
+* [Chapter 9: Convnet architecture patterns](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter09_convnet-architecture-patterns.ipynb)
+* [Chapter 10: Interpreting what ConvNets learn](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter10_interpreting-what-convnets-learn.ipynb)
+* [Chapter 11: Image Segmentation](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter11_image-segmentation.ipynb)
+* [Chapter 12: Object Detection](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter12_object-detection.ipynb)
+* [Chapter 13: Timeseries Forecasting](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter13_timeseries-forecasting.ipynb)
+* [Chapter 14: Text Classification](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter14_text-classification.ipynb)
+* [Chapter 15: Language Models and the Transformer](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter15_language-models-and-the-transformer.ipynb)
+* [Chapter 16: Text Generation](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter16_text-generation.ipynb)
+* [Chapter 17: Image Generation](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter17_image-generation.ipynb)
+* [Chapter 18: Best practices for the real world](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter18_best-practices-for-the-real-world.ipynb)

From faea02e0100d1b5de43a1809b9af05cd1ff7e3c6 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Fri, 1 Aug 2025 13:54:00 -0700
Subject: [PATCH 16/24] Update notebooks

---
 chapter02_mathematical-building-blocks.ipynb  | 17 +++-
 chapter03_introduction-to-ml-frameworks.ipynb | 49 +++++++-----
 chapter04_classification-and-regression.ipynb | 17 +++-
 chapter05_fundamentals-of-ml.ipynb            | 21 +++--
 chapter07_deep-dive-keras.ipynb               | 79 +++++++++++++++----
 chapter08_image-classification.ipynb          | 15 +++-
 chapter09_convnet-architecture-patterns.ipynb | 15 +++-
 ...r10_interpreting-what-convnets-learn.ipynb | 13 ++-
 chapter11_image-segmentation.ipynb            | 17 +++-
 chapter12_object-detection.ipynb              | 15 +++-
 chapter13_timeseries-forecasting.ipynb        | 13 ++-
 chapter14_text-classification.ipynb           | 19 +++--
 ..._language-models-and-the-transformer.ipynb | 25 ++++--
 chapter16_text-generation.ipynb               | 13 ++-
 chapter17_image-generation.ipynb              | 15 +++-
 ...18_best-practices-for-the-real-world.ipynb | 45 ++++++++---
 second_edition/README.md                      | 30 +++++++
 17 files changed, 323 insertions(+), 95 deletions(-)
 create mode 100644 second_edition/README.md

diff --git a/chapter02_mathematical-building-blocks.ipynb b/chapter02_mathematical-building-blocks.ipynb
index a0ad2070d5..72e8af3300 100644
--- a/chapter02_mathematical-building-blocks.ipynb
+++ b/chapter02_mathematical-building-blocks.ipynb
@@ -6,7 +6,7 @@
     "colab_type": "text"
    },
    "source": [
-    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](https://www.manning.com/books/deep-learning-with-python-third-edition). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThe book's contents are available online at [deeplearningwithpython.io](https://deeplearningwithpython.io)."
    ]
   },
   {
@@ -32,6 +32,15 @@
     "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## The mathematical building blocks of neural networks"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -958,7 +967,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### The engine of neural networks: gradient-based optimization"
+    "### The engine of neural networks: Gradient-based optimization"
    ]
   },
   {
@@ -994,7 +1003,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Chaining derivatives: the Backpropagation algorithm"
+    "#### Chaining derivatives: The Backpropagation algorithm"
    ]
   },
   {
@@ -1402,7 +1411,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Chapter summary"
+    "### Summary"
    ]
   }
  ],
diff --git a/chapter03_introduction-to-ml-frameworks.ipynb b/chapter03_introduction-to-ml-frameworks.ipynb
index 523816aec4..5fe3309cd0 100644
--- a/chapter03_introduction-to-ml-frameworks.ipynb
+++ b/chapter03_introduction-to-ml-frameworks.ipynb
@@ -6,7 +6,7 @@
     "colab_type": "text"
    },
    "source": [
-    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](https://www.manning.com/books/deep-learning-with-python-third-edition). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThe book's contents are available online at [deeplearningwithpython.io](https://deeplearningwithpython.io)."
    ]
   },
   {
@@ -32,6 +32,15 @@
     "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## Introduction to TensorFlow, PyTorch, JAX, and Keras"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -74,7 +83,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### Tensors and Variables in TensorFlow"
+    "##### Tensors and variables in TensorFlow"
    ]
   },
   {
@@ -159,7 +168,7 @@
     "colab_type": "text"
    },
    "source": [
-    "###### Tensor assignment and the `Variable` class"
+    "###### Tensor assignment and the Variable class"
    ]
   },
   {
@@ -227,7 +236,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### Tensor operations: doing math in TensorFlow"
+    "##### Tensor operations: Doing math in TensorFlow"
    ]
   },
   {
@@ -264,7 +273,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### Gradients in TensorFlow: a second look at the `GradientTape` API"
+    "##### Gradients in TensorFlow: A second look at the GradientTape API"
    ]
   },
   {
@@ -353,7 +362,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### An end-to-end example: a linear classifier in pure TensorFlow"
+    "#### An end-to-end example: A linear classifier in pure TensorFlow"
    ]
   },
   {
@@ -549,7 +558,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### Tensors and Parameters in PyTorch"
+    "##### Tensors and parameters in PyTorch"
    ]
   },
   {
@@ -668,7 +677,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### Tensor operations: doing math in PyTorch"
+    "##### Tensor operations: Doing math in PyTorch"
    ]
   },
   {
@@ -753,7 +762,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### An end-to-end example: a linear classifier in pure PyTorch"
+    "#### An end-to-end example: A linear classifier in pure PyTorch"
    ]
   },
   {
@@ -825,7 +834,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### Packaging state and computation with the `Module` class"
+    "##### Packaging state and computation with the Module class"
    ]
   },
   {
@@ -1149,7 +1158,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### Tensor operations: doing math in JAX"
+    "##### Tensor operations: Doing math in JAX"
    ]
   },
   {
@@ -1278,7 +1287,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### Making JAX functions fast with `@jax.jit`"
+    "##### Making JAX functions fast with @jax.jit"
    ]
   },
   {
@@ -1300,7 +1309,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### An end-to-end example: a linear classifier in pure JAX"
+    "#### An end-to-end example: A linear classifier in pure JAX"
    ]
   },
   {
@@ -1440,7 +1449,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Layers: the building blocks of deep learning"
+    "#### Layers: The building blocks of deep learning"
    ]
   },
   {
@@ -1502,7 +1511,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### Automatic shape inference: building layers on the fly"
+    "##### Automatic shape inference: Building layers on the fly"
    ]
   },
   {
@@ -1570,7 +1579,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### The \"compile\" step: configuring the learning process"
+    "#### The \"compile\" step: Configuring the learning process"
    ]
   },
   {
@@ -1619,7 +1628,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Understanding the `fit` method"
+    "#### Understanding the fit method"
    ]
   },
   {
@@ -1655,7 +1664,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Monitoring loss & metrics on validation data"
+    "#### Monitoring loss and metrics on validation data"
    ]
   },
   {
@@ -1697,7 +1706,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Inference: using a model after training"
+    "#### Inference: Using a model after training"
    ]
   },
   {
@@ -1718,7 +1727,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Chapter summary"
+    "### Summary"
    ]
   }
  ],
diff --git a/chapter04_classification-and-regression.ipynb b/chapter04_classification-and-regression.ipynb
index b4c0d81ec0..0b6e32a3ec 100644
--- a/chapter04_classification-and-regression.ipynb
+++ b/chapter04_classification-and-regression.ipynb
@@ -6,7 +6,7 @@
     "colab_type": "text"
    },
    "source": [
-    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](https://www.manning.com/books/deep-learning-with-python-third-edition). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThe book's contents are available online at [deeplearningwithpython.io](https://deeplearningwithpython.io)."
    ]
   },
   {
@@ -38,7 +38,16 @@
     "colab_type": "text"
    },
    "source": [
-    "### Classifying movie reviews: a binary classification example"
+    "## Classification and regression"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Classifying movie reviews: A binary classification example"
    ]
   },
   {
@@ -413,7 +422,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Classifying newswires: a multiclass classification example"
+    "### Classifying newswires: A multiclass classification example"
    ]
   },
   {
@@ -1244,7 +1253,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Chapter summary"
+    "### Summary"
    ]
   }
  ],
diff --git a/chapter05_fundamentals-of-ml.ipynb b/chapter05_fundamentals-of-ml.ipynb
index 2e152b2147..0623b7f16b 100644
--- a/chapter05_fundamentals-of-ml.ipynb
+++ b/chapter05_fundamentals-of-ml.ipynb
@@ -6,7 +6,7 @@
     "colab_type": "text"
    },
    "source": [
-    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](https://www.manning.com/books/deep-learning-with-python-third-edition). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThe book's contents are available online at [deeplearningwithpython.io](https://deeplearningwithpython.io)."
    ]
   },
   {
@@ -38,7 +38,16 @@
     "colab_type": "text"
    },
    "source": [
-    "### Generalization: the goal of machine learning"
+    "## Fundamentals of machine learning"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Generalization: The goal of machine learning"
    ]
   },
   {
@@ -397,7 +406,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Leveraging better architecture priors"
+    "#### Using better architecture priors"
    ]
   },
   {
@@ -894,7 +903,7 @@
    "outputs": [],
    "source": [
     "original_val_loss = history_original.history[\"val_loss\"]\n",
-    "l2_val_loss = history_l2_reg.history[\"val_loss\"]\n",
+    "dropout_val_loss = history_dropout.history[\"val_loss\"]\n",
     "epochs = range(1, 21)\n",
     "plt.plot(\n",
     "    epochs,\n",
@@ -904,7 +913,7 @@
     ")\n",
     "plt.plot(\n",
     "    epochs,\n",
-    "    l2_val_loss,\n",
+    "    dropout_val_loss,\n",
     "    \"b-\",\n",
     "    label=\"Validation loss of dropout-regularized model\",\n",
     ")\n",
@@ -924,7 +933,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Chapter summary"
+    "### Summary"
    ]
   }
  ],
diff --git a/chapter07_deep-dive-keras.ipynb b/chapter07_deep-dive-keras.ipynb
index 855fd0e619..54dc92c023 100644
--- a/chapter07_deep-dive-keras.ipynb
+++ b/chapter07_deep-dive-keras.ipynb
@@ -6,7 +6,7 @@
     "colab_type": "text"
    },
    "source": [
-    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](https://www.manning.com/books/deep-learning-with-python-third-edition). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThe book's contents are available online at [deeplearningwithpython.io](https://deeplearningwithpython.io)."
    ]
   },
   {
@@ -32,6 +32,15 @@
     "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## A deep dive on Keras"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -410,7 +419,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### The power of the Functional API: access to layer connectivity"
+    "##### The power of the Functional API: Access to layer connectivity"
    ]
   },
   {
@@ -529,7 +538,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Subclassing the `Model` class"
+    "#### Subclassing the Model class"
    ]
   },
   {
@@ -619,7 +628,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### Beware: what subclassed models don't support"
+    "##### Beware: What subclassed models don't support"
    ]
   },
   {
@@ -690,7 +699,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Remember: use the right tool for the job"
+    "#### Remember: Use the right tool for the job"
    ]
   },
   {
@@ -813,7 +822,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Using Callbacks"
+    "#### Using callbacks"
    ]
   },
   {
@@ -822,7 +831,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### The `EarlyStopping` and `ModelCheckpoint` callbacks"
+    "##### The EarlyStopping and ModelCheckpoint callbacks"
    ]
   },
   {
@@ -996,7 +1005,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Training versus inference"
+    "#### Training vs. inference"
    ]
   },
   {
@@ -1327,7 +1336,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Leveraging `fit()` with a custom training loop"
+    "#### Using fit() with a custom training loop"
    ]
   },
   {
@@ -1336,7 +1345,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### Customizing `fit()` with TensorFlow"
+    "##### Customizing fit() with TensorFlow"
    ]
   },
   {
@@ -1417,7 +1426,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### Customizing `fit()` with PyTorch"
+    "##### Customizing fit() with PyTorch"
    ]
   },
   {
@@ -1466,6 +1475,24 @@
     "        return [loss_tracker]"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def get_custom_model():\n",
+    "    inputs = keras.Input(shape=(28 * 28,))\n",
+    "    features = layers.Dense(512, activation=\"relu\")(inputs)\n",
+    "    features = layers.Dropout(0.5)(features)\n",
+    "    outputs = layers.Dense(10, activation=\"softmax\")(features)\n",
+    "    model = CustomModel(inputs, outputs)\n",
+    "    model.compile(optimizer=keras.optimizers.Adam())\n",
+    "    return model"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -1484,7 +1511,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### Customizing `fit()` with JAX"
+    "##### Customizing fit() with JAX"
    ]
   },
   {
@@ -1567,6 +1594,24 @@
     "        return logs, state"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "def get_custom_model():\n",
+    "    inputs = keras.Input(shape=(28 * 28,))\n",
+    "    features = layers.Dense(512, activation=\"relu\")(inputs)\n",
+    "    features = layers.Dropout(0.5)(features)\n",
+    "    outputs = layers.Dense(10, activation=\"softmax\")(features)\n",
+    "    model = CustomModel(inputs, outputs)\n",
+    "    model.compile(optimizer=keras.optimizers.Adam())\n",
+    "    return model"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -1585,7 +1630,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Handling metrics in a custom `train_step()`"
+    "#### Handling metrics in a custom train_step()"
    ]
   },
   {
@@ -1594,7 +1639,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### `train_step()` metrics handling with TensorFlow"
+    "##### train_step() metrics handling with TensorFlow"
    ]
   },
   {
@@ -1669,7 +1714,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### `train_step()` metrics handling with PyTorch"
+    "##### train_step() metrics handling with PyTorch"
    ]
   },
   {
@@ -1747,7 +1792,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### `train_step()` metrics handling with JAX"
+    "##### train_step() metrics handling with JAX"
    ]
   },
   {
@@ -1847,7 +1892,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Chapter summary"
+    "### Summary"
    ]
   }
  ],
diff --git a/chapter08_image-classification.ipynb b/chapter08_image-classification.ipynb
index 40320aeaf5..dddce9ae36 100644
--- a/chapter08_image-classification.ipynb
+++ b/chapter08_image-classification.ipynb
@@ -6,7 +6,7 @@
     "colab_type": "text"
    },
    "source": [
-    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](https://www.manning.com/books/deep-learning-with-python-third-edition). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThe book's contents are available online at [deeplearningwithpython.io](https://deeplearningwithpython.io)."
    ]
   },
   {
@@ -32,6 +32,15 @@
     "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## Image classification"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -644,7 +653,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Leveraging a pretrained model"
+    "### Using a pretrained model"
    ]
   },
   {
@@ -969,7 +978,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Chapter summary"
+    "### Summary"
    ]
   }
  ],
diff --git a/chapter09_convnet-architecture-patterns.ipynb b/chapter09_convnet-architecture-patterns.ipynb
index 60c3a9ce01..8e0350e2df 100644
--- a/chapter09_convnet-architecture-patterns.ipynb
+++ b/chapter09_convnet-architecture-patterns.ipynb
@@ -6,7 +6,7 @@
     "colab_type": "text"
    },
    "source": [
-    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](https://www.manning.com/books/deep-learning-with-python-third-edition). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThe book's contents are available online at [deeplearningwithpython.io](https://deeplearningwithpython.io)."
    ]
   },
   {
@@ -32,6 +32,15 @@
     "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## Convnet architecture patterns"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -142,7 +151,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Putting it together: a mini Xception-like model"
+    "### Putting it together: A mini Xception-like model"
    ]
   },
   {
@@ -320,7 +329,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Chapter summary"
+    "### Summary"
    ]
   }
  ],
diff --git a/chapter10_interpreting-what-convnets-learn.ipynb b/chapter10_interpreting-what-convnets-learn.ipynb
index 64d98c05a5..6fa924d04f 100644
--- a/chapter10_interpreting-what-convnets-learn.ipynb
+++ b/chapter10_interpreting-what-convnets-learn.ipynb
@@ -6,7 +6,7 @@
     "colab_type": "text"
    },
    "source": [
-    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](https://www.manning.com/books/deep-learning-with-python-third-edition). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThe book's contents are available online at [deeplearningwithpython.io](https://deeplearningwithpython.io)."
    ]
   },
   {
@@ -32,6 +32,15 @@
     "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## Interpreting what convnets learn"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -929,7 +938,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Chapter summary"
+    "### Summary"
    ]
   }
  ],
diff --git a/chapter11_image-segmentation.ipynb b/chapter11_image-segmentation.ipynb
index 607ab9b026..59753dce15 100644
--- a/chapter11_image-segmentation.ipynb
+++ b/chapter11_image-segmentation.ipynb
@@ -6,7 +6,7 @@
     "colab_type": "text"
    },
    "source": [
-    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](https://www.manning.com/books/deep-learning-with-python-third-edition). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThe book's contents are available online at [deeplearningwithpython.io](https://deeplearningwithpython.io)."
    ]
   },
   {
@@ -32,6 +32,15 @@
     "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## Image segmentation"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -330,7 +339,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Downloading the Segment Anything model"
+    "#### Downloading the Segment Anything Model"
    ]
   },
   {
@@ -372,7 +381,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Prepare a test image"
+    "#### Preparing a test image"
    ]
   },
   {
@@ -627,7 +636,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Chapter summary"
+    "### Summary"
    ]
   }
  ],
diff --git a/chapter12_object-detection.ipynb b/chapter12_object-detection.ipynb
index 1ce5479941..e67c085924 100644
--- a/chapter12_object-detection.ipynb
+++ b/chapter12_object-detection.ipynb
@@ -6,7 +6,7 @@
     "colab_type": "text"
    },
    "source": [
-    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n"
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](https://www.manning.com/books/deep-learning-with-python-third-edition). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThe book's contents are available online at [deeplearningwithpython.io](https://deeplearningwithpython.io)."
    ]
   },
   {
@@ -38,7 +38,16 @@
     "colab_type": "text"
    },
    "source": [
-    "### Single-stage vs two-stage object detectors"
+    "## Object detection"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### Single-stage vs. two-stage object detectors"
    ]
   },
   {
@@ -655,7 +664,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Chapter summary"
+    "### Summary"
    ]
   }
  ],
diff --git a/chapter13_timeseries-forecasting.ipynb b/chapter13_timeseries-forecasting.ipynb
index 0413202fc4..d13f5a0ec5 100644
--- a/chapter13_timeseries-forecasting.ipynb
+++ b/chapter13_timeseries-forecasting.ipynb
@@ -6,7 +6,7 @@
     "colab_type": "text"
    },
    "source": [
-    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](https://www.manning.com/books/deep-learning-with-python-third-edition). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThe book's contents are available online at [deeplearningwithpython.io](https://deeplearningwithpython.io)."
    ]
   },
   {
@@ -32,6 +32,15 @@
     "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## Timeseries forecasting"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -653,7 +662,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Chapter summary"
+    "### Summary"
    ]
   }
  ],
diff --git a/chapter14_text-classification.ipynb b/chapter14_text-classification.ipynb
index 7f0e90c7b4..4bc767c81b 100644
--- a/chapter14_text-classification.ipynb
+++ b/chapter14_text-classification.ipynb
@@ -6,7 +6,7 @@
     "colab_type": "text"
    },
    "source": [
-    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](https://www.manning.com/books/deep-learning-with-python-third-edition). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThe book's contents are available online at [deeplearningwithpython.io](https://deeplearningwithpython.io)."
    ]
   },
   {
@@ -38,7 +38,16 @@
     "colab_type": "text"
    },
    "source": [
-    "### A brief history of Natural Language Processing"
+    "## Text classification"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "### A brief history of natural language processing"
    ]
   },
   {
@@ -586,7 +595,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Sets vs. Sequences"
+    "### Sets vs. sequences"
    ]
   },
   {
@@ -595,7 +604,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Loading the IMDb Classification Dataset"
+    "#### Loading the IMDb classification dataset"
    ]
   },
   {
@@ -1362,7 +1371,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Chapter summary"
+    "### Summary"
    ]
   }
  ],
diff --git a/chapter15_language-models-and-the-transformer.ipynb b/chapter15_language-models-and-the-transformer.ipynb
index dc2a065206..4094560108 100644
--- a/chapter15_language-models-and-the-transformer.ipynb
+++ b/chapter15_language-models-and-the-transformer.ipynb
@@ -6,7 +6,7 @@
     "colab_type": "text"
    },
    "source": [
-    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](https://www.manning.com/books/deep-learning-with-python-third-edition). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThe book's contents are available online at [deeplearningwithpython.io](https://deeplearningwithpython.io)."
    ]
   },
   {
@@ -38,7 +38,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### The Language Model"
+    "## Language models and the Transformer"
    ]
   },
   {
@@ -47,7 +47,16 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Training a Shakespeare Language Model"
+    "### The language model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Training a Shakespeare language model"
    ]
   },
   {
@@ -315,7 +324,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### English to Spanish Translation"
+    "#### English-to-Spanish Translation"
    ]
   },
   {
@@ -632,7 +641,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Transformer Encoder block"
+    "#### Transformer encoder block"
    ]
   },
   {
@@ -673,7 +682,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Transformer Decoder block"
+    "#### Transformer decoder block"
    ]
   },
   {
@@ -967,7 +976,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Preprocessing IMDb Movie reviews"
+    "#### Preprocessing IMDb movie reviews"
    ]
   },
   {
@@ -1132,7 +1141,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Chapter summary"
+    "### Summary"
    ]
   }
  ],
diff --git a/chapter16_text-generation.ipynb b/chapter16_text-generation.ipynb
index 8f6f0c94cf..682a623fe8 100644
--- a/chapter16_text-generation.ipynb
+++ b/chapter16_text-generation.ipynb
@@ -6,7 +6,7 @@
     "colab_type": "text"
    },
    "source": [
-    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](https://www.manning.com/books/deep-learning-with-python-third-edition). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThe book's contents are available online at [deeplearningwithpython.io](https://deeplearningwithpython.io).\n"
    ]
   },
   {
@@ -32,6 +32,15 @@
     "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## Text generation"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -1149,7 +1158,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Chapter summary"
+    "### Summary"
    ]
   }
  ],
diff --git a/chapter17_image-generation.ipynb b/chapter17_image-generation.ipynb
index d69809894d..85a5645a2f 100644
--- a/chapter17_image-generation.ipynb
+++ b/chapter17_image-generation.ipynb
@@ -6,7 +6,7 @@
     "colab_type": "text"
    },
    "source": [
-    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](https://www.manning.com/books/deep-learning-with-python-third-edition). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThe book's contents are available online at [deeplearningwithpython.io](https://deeplearningwithpython.io)."
    ]
   },
   {
@@ -32,6 +32,15 @@
     "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## Image generation"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -400,7 +409,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### The concept of \"diffusion time\" and \"diffusion schedule\""
+    "#### The concepts of diffusion time and diffusion schedule"
    ]
   },
   {
@@ -841,7 +850,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Chapter summary"
+    "### Summary"
    ]
   }
  ],
diff --git a/chapter18_best-practices-for-the-real-world.ipynb b/chapter18_best-practices-for-the-real-world.ipynb
index 33a0adbdab..a2aec3cc18 100644
--- a/chapter18_best-practices-for-the-real-world.ipynb
+++ b/chapter18_best-practices-for-the-real-world.ipynb
@@ -6,7 +6,7 @@
     "colab_type": "text"
    },
    "source": [
-    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](TODO). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**"
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](https://www.manning.com/books/deep-learning-with-python-third-edition). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThe book's contents are available online at [deeplearningwithpython.io](https://deeplearningwithpython.io)."
    ]
   },
   {
@@ -32,6 +32,15 @@
     "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## Best practices for the real world"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -316,7 +325,25 @@
     "colab_type": "text"
    },
    "source": [
-    "##### Data parallelism: replicate your model on each GPU"
+    "##### Data parallelism: Replicating your model on each GPU"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "##### Model parallelism: Splitting your model across multiple GPUs"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### Distributed training in practice"
    ]
   },
   {
@@ -325,7 +352,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### Model parallelism: split your model across multiple GPUs"
+    "##### Getting your hands on two or more GPUs"
    ]
   },
   {
@@ -334,7 +361,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### Distributed training in practice"
+    "##### Using data parallelism with JAX"
    ]
   },
   {
@@ -343,7 +370,7 @@
     "colab_type": "text"
    },
    "source": [
-    "###### Getting your hands on two or more GPUs"
+    "##### Using model parallelism with JAX"
    ]
   },
   {
@@ -352,7 +379,7 @@
     "colab_type": "text"
    },
    "source": [
-    "###### Using data parallelism with JAX"
+    "###### The DeviceMesh API"
    ]
   },
   {
@@ -361,7 +388,7 @@
     "colab_type": "text"
    },
    "source": [
-    "###### Using model parallelism with JAX"
+    "###### The LayoutMap API"
    ]
   },
   {
@@ -379,7 +406,7 @@
     "colab_type": "text"
    },
    "source": [
-    "##### Leveraging step fusing to improve TPU utilization"
+    "##### Using step fusing to improve TPU utilization"
    ]
   },
   {
@@ -519,7 +546,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Chapter summary"
+    "### Summary"
    ]
   }
  ],
diff --git a/second_edition/README.md b/second_edition/README.md
new file mode 100644
index 0000000000..53b72c363f
--- /dev/null
+++ b/second_edition/README.md
@@ -0,0 +1,30 @@
+# Second edition notebooks
+
+These are the notebooks for the second edition of the book, originally published in 2021. These notebooks use `tf.keras` with TensorFlow 2.16.
+
+## Table of contents
+
+* [Chapter 2: The mathematical building blocks of neural networks](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/second_edition/chapter02_mathematical-building-blocks.ipynb)
+* [Chapter 3: Introduction to Keras and TensorFlow](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/second_edition/chapter03_introduction-to-keras-and-tf.ipynb)
+* [Chapter 4: Getting started with neural networks: classification and regression](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/second_edition/chapter04_getting-started-with-neural-networks.ipynb)
+* [Chapter 5: Fundamentals of machine learning](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/second_edition/chapter05_fundamentals-of-ml.ipynb)
+* [Chapter 7: Working with Keras: a deep dive](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/second_edition/chapter07_working-with-keras.ipynb)
+* [Chapter 8: Introduction to deep learning for computer vision](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/second_edition/chapter08_intro-to-dl-for-computer-vision.ipynb)
+* Chapter 9: Advanced deep learning for computer vision
+    - [Part 1: Image segmentation](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/second_edition/chapter09_part01_image-segmentation.ipynb)
+    - [Part 2: Modern convnet architecture patterns](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/second_edition/chapter09_part02_modern-convnet-architecture-patterns.ipynb)
+    - [Part 3: Interpreting what convnets learn](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/second_edition/chapter09_part03_interpreting-what-convnets-learn.ipynb)
+* [Chapter 10: Deep learning for timeseries](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/second_edition/chapter10_dl-for-timeseries.ipynb)
+* Chapter 11: Deep learning for text
+    - [Part 1: Introduction](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/second_edition/chapter11_part01_introduction.ipynb)
+    - [Part 2: Sequence models](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/second_edition/chapter11_part02_sequence-models.ipynb)
+    - [Part 3: Transformer](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/second_edition/chapter11_part03_transformer.ipynb)
+    - [Part 4: Sequence-to-sequence learning](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/second_edition/chapter11_part04_sequence-to-sequence-learning.ipynb)
+* Chapter 12: Generative deep learning
+    - [Part 1: Text generation](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/second_edition/chapter12_part01_text-generation.ipynb)
+    - [Part 2: Deep Dream](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/second_edition/chapter12_part02_deep-dream.ipynb)
+    - [Part 3: Neural style transfer](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/second_edition/chapter12_part03_neural-style-transfer.ipynb)
+    - [Part 4: Variational autoencoders](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/second_edition/chapter12_part04_variational-autoencoders.ipynb)
+    - [Part 5: Generative adversarial networks](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/second_edition/chapter12_part05_gans.ipynb)
+* [Chapter 13: Best practices for the real world](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/second_edition/chapter13_best-practices-for-the-real-world.ipynb)
+* [Chapter 14: Conclusions](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/second_edition/chapter14_conclusions.ipynb)

From 54335cf4834678b04b5ba072648123778c7b7f48 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Fri, 1 Aug 2025 13:56:39 -0700
Subject: [PATCH 17/24] Update notebooks

---
 chapter02_mathematical-building-blocks.ipynb        | 9 ---------
 chapter03_introduction-to-ml-frameworks.ipynb       | 9 ---------
 chapter04_classification-and-regression.ipynb       | 9 ---------
 chapter05_fundamentals-of-ml.ipynb                  | 9 ---------
 chapter07_deep-dive-keras.ipynb                     | 9 ---------
 chapter08_image-classification.ipynb                | 9 ---------
 chapter09_convnet-architecture-patterns.ipynb       | 9 ---------
 chapter10_interpreting-what-convnets-learn.ipynb    | 9 ---------
 chapter11_image-segmentation.ipynb                  | 9 ---------
 chapter12_object-detection.ipynb                    | 9 ---------
 chapter13_timeseries-forecasting.ipynb              | 9 ---------
 chapter14_text-classification.ipynb                 | 9 ---------
 chapter15_language-models-and-the-transformer.ipynb | 9 ---------
 chapter16_text-generation.ipynb                     | 9 ---------
 chapter17_image-generation.ipynb                    | 9 ---------
 chapter18_best-practices-for-the-real-world.ipynb   | 9 ---------
 16 files changed, 144 deletions(-)

diff --git a/chapter02_mathematical-building-blocks.ipynb b/chapter02_mathematical-building-blocks.ipynb
index 72e8af3300..56492a76b8 100644
--- a/chapter02_mathematical-building-blocks.ipynb
+++ b/chapter02_mathematical-building-blocks.ipynb
@@ -1404,15 +1404,6 @@
     "matches = predicted_labels == test_labels\n",
     "f\"accuracy: {ops.mean(matches):.2f}\""
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "### Summary"
-   ]
   }
  ],
  "metadata": {
diff --git a/chapter03_introduction-to-ml-frameworks.ipynb b/chapter03_introduction-to-ml-frameworks.ipynb
index 5fe3309cd0..a4a46b0891 100644
--- a/chapter03_introduction-to-ml-frameworks.ipynb
+++ b/chapter03_introduction-to-ml-frameworks.ipynb
@@ -1720,15 +1720,6 @@
     "predictions = model.predict(val_inputs, batch_size=128)\n",
     "print(predictions[:10])"
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "### Summary"
-   ]
   }
  ],
  "metadata": {
diff --git a/chapter04_classification-and-regression.ipynb b/chapter04_classification-and-regression.ipynb
index 0b6e32a3ec..5ddea4ecf1 100644
--- a/chapter04_classification-and-regression.ipynb
+++ b/chapter04_classification-and-regression.ipynb
@@ -1246,15 +1246,6 @@
    "source": [
     "#### Wrapping up"
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "### Summary"
-   ]
   }
  ],
  "metadata": {
diff --git a/chapter05_fundamentals-of-ml.ipynb b/chapter05_fundamentals-of-ml.ipynb
index 0623b7f16b..0d3a0d1437 100644
--- a/chapter05_fundamentals-of-ml.ipynb
+++ b/chapter05_fundamentals-of-ml.ipynb
@@ -926,15 +926,6 @@
     "plt.legend()\n",
     "plt.show()"
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "### Summary"
-   ]
   }
  ],
  "metadata": {
diff --git a/chapter07_deep-dive-keras.ipynb b/chapter07_deep-dive-keras.ipynb
index 54dc92c023..52f369009c 100644
--- a/chapter07_deep-dive-keras.ipynb
+++ b/chapter07_deep-dive-keras.ipynb
@@ -1885,15 +1885,6 @@
     "        )\n",
     "        return logs, state"
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "### Summary"
-   ]
   }
  ],
  "metadata": {
diff --git a/chapter08_image-classification.ipynb b/chapter08_image-classification.ipynb
index dddce9ae36..c10e8007c9 100644
--- a/chapter08_image-classification.ipynb
+++ b/chapter08_image-classification.ipynb
@@ -971,15 +971,6 @@
     "test_loss, test_acc = model.evaluate(test_dataset)\n",
     "print(f\"Test accuracy: {test_acc:.3f}\")"
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "### Summary"
-   ]
   }
  ],
  "metadata": {
diff --git a/chapter09_convnet-architecture-patterns.ipynb b/chapter09_convnet-architecture-patterns.ipynb
index 8e0350e2df..3bd8218151 100644
--- a/chapter09_convnet-architecture-patterns.ipynb
+++ b/chapter09_convnet-architecture-patterns.ipynb
@@ -322,15 +322,6 @@
    "source": [
     "### Beyond convolution: Vision Transformers"
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "### Summary"
-   ]
   }
  ],
  "metadata": {
diff --git a/chapter10_interpreting-what-convnets-learn.ipynb b/chapter10_interpreting-what-convnets-learn.ipynb
index 6fa924d04f..5cbdb658c5 100644
--- a/chapter10_interpreting-what-convnets-learn.ipynb
+++ b/chapter10_interpreting-what-convnets-learn.ipynb
@@ -931,15 +931,6 @@
    "source": [
     "### Visualizing the latent space of a convnet"
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "### Summary"
-   ]
   }
  ],
  "metadata": {
diff --git a/chapter11_image-segmentation.ipynb b/chapter11_image-segmentation.ipynb
index 59753dce15..fd5125ba6c 100644
--- a/chapter11_image-segmentation.ipynb
+++ b/chapter11_image-segmentation.ipynb
@@ -629,15 +629,6 @@
     "show_box(input_box, plt.gca())\n",
     "plt.show()"
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "### Summary"
-   ]
   }
  ],
  "metadata": {
diff --git a/chapter12_object-detection.ipynb b/chapter12_object-detection.ipynb
index e67c085924..fab80f2268 100644
--- a/chapter12_object-detection.ipynb
+++ b/chapter12_object-detection.ipynb
@@ -657,15 +657,6 @@
     "    draw_box(ax, box, label_name, label_to_color(label))\n",
     "plt.show()"
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "### Summary"
-   ]
   }
  ],
  "metadata": {
diff --git a/chapter13_timeseries-forecasting.ipynb b/chapter13_timeseries-forecasting.ipynb
index d13f5a0ec5..f5b2034024 100644
--- a/chapter13_timeseries-forecasting.ipynb
+++ b/chapter13_timeseries-forecasting.ipynb
@@ -655,15 +655,6 @@
    "source": [
     "### Going even further"
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "### Summary"
-   ]
   }
  ],
  "metadata": {
diff --git a/chapter14_text-classification.ipynb b/chapter14_text-classification.ipynb
index 4bc767c81b..b4e40559c1 100644
--- a/chapter14_text-classification.ipynb
+++ b/chapter14_text-classification.ipynb
@@ -1364,15 +1364,6 @@
     "test_loss, test_acc = model.evaluate(sequence_test_ds)\n",
     "test_acc"
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "### Summary"
-   ]
   }
  ],
  "metadata": {
diff --git a/chapter15_language-models-and-the-transformer.ipynb b/chapter15_language-models-and-the-transformer.ipynb
index 4094560108..2e79052525 100644
--- a/chapter15_language-models-and-the-transformer.ipynb
+++ b/chapter15_language-models-and-the-transformer.ipynb
@@ -1134,15 +1134,6 @@
    "source": [
     "### What makes the Transformer effective?"
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "### Summary"
-   ]
   }
  ],
  "metadata": {
diff --git a/chapter16_text-generation.ipynb b/chapter16_text-generation.ipynb
index 682a623fe8..49a83e68e8 100644
--- a/chapter16_text-generation.ipynb
+++ b/chapter16_text-generation.ipynb
@@ -1151,15 +1151,6 @@
    "source": [
     "### Where are LLMs heading next?"
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "### Summary"
-   ]
   }
  ],
  "metadata": {
diff --git a/chapter17_image-generation.ipynb b/chapter17_image-generation.ipynb
index 85a5645a2f..62636cc7fe 100644
--- a/chapter17_image-generation.ipynb
+++ b/chapter17_image-generation.ipynb
@@ -843,15 +843,6 @@
     "    images.append(scale_output(image))\n",
     "display(images)"
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "### Summary"
-   ]
   }
  ],
  "metadata": {
diff --git a/chapter18_best-practices-for-the-real-world.ipynb b/chapter18_best-practices-for-the-real-world.ipynb
index a2aec3cc18..7b8f10cdb2 100644
--- a/chapter18_best-practices-for-the-real-world.ipynb
+++ b/chapter18_best-practices-for-the-real-world.ipynb
@@ -539,15 +539,6 @@
    "source": [
     "ops.matmul(x, kernel)"
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "### Summary"
-   ]
   }
  ],
  "metadata": {

From 503b0f4bf9e43b8a17d805aababd860fd3293f7c Mon Sep 17 00:00:00 2001
From: Matt Watson <mattdangerw@gmail.com>
Date: Mon, 4 Aug 2025 13:46:24 -0700
Subject: [PATCH 18/24] Remove set_backend() from all notebooks

---
 chapter02_mathematical-building-blocks.ipynb  |  41 +++-
 chapter03_introduction-to-ml-frameworks.ipynb |  25 +++
 chapter04_classification-and-regression.ipynb |  25 +++
 chapter05_fundamentals-of-ml.ipynb            |  25 +++
 chapter07_deep-dive-keras.ipynb               | 181 +++++-----------
 chapter08_image-classification.ipynb          |  25 +++
 chapter09_convnet-architecture-patterns.ipynb |  25 +++
 ...r10_interpreting-what-convnets-learn.ipynb | 202 +++---------------
 chapter11_image-segmentation.ipynb            |  25 +++
 chapter12_object-detection.ipynb              |  25 +++
 chapter13_timeseries-forecasting.ipynb        |  25 +++
 chapter14_text-classification.ipynb           |  25 +++
 ..._language-models-and-the-transformer.ipynb |  25 +++
 chapter16_text-generation.ipynb               |  27 ++-
 chapter17_image-generation.ipynb              |  25 +++
 ...18_best-practices-for-the-real-world.ipynb |  25 +++
 16 files changed, 436 insertions(+), 315 deletions(-)

diff --git a/chapter02_mathematical-building-blocks.ipynb b/chapter02_mathematical-building-blocks.ipynb
index 56492a76b8..4f6e642623 100644
--- a/chapter02_mathematical-building-blocks.ipynb
+++ b/chapter02_mathematical-building-blocks.ipynb
@@ -32,6 +32,31 @@
     "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "cellView": "form",
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "# @title\n",
+    "import os\n",
+    "from IPython.core.magic import register_cell_magic\n",
+    "\n",
+    "@register_cell_magic\n",
+    "def backend(line, cell):\n",
+    "    current, required = os.environ.get(\"KERAS_BACKEND\", \"\"), line.split()[-1]\n",
+    "    if current == required:\n",
+    "        get_ipython().run_cell(cell)\n",
+    "    else:\n",
+    "        print(\n",
+    "            f\"This cell requires the {required} backend. To run it, change KERAS_BACKEND to \"\n",
+    "            f\"\\\"{required}\\\" at the top of the notebook, restart the runtime, and rerun the notebook.\"\n",
+    "        )"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -1113,17 +1138,6 @@
     "##### A simple Dense class"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "keras.config.set_backend(\"tensorflow\")"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -1308,6 +1322,7 @@
    },
    "outputs": [],
    "source": [
+    "%%backend tensorflow\n",
     "import tensorflow as tf\n",
     "\n",
     "x = tf.zeros(shape=())\n",
@@ -1324,6 +1339,7 @@
    },
    "outputs": [],
    "source": [
+    "%%backend tensorflow\n",
     "def one_training_step(model, images_batch, labels_batch):\n",
     "    with tf.GradientTape() as tape:\n",
     "        predictions = model(images_batch)\n",
@@ -1351,6 +1367,7 @@
    },
    "outputs": [],
    "source": [
+    "%%backend tensorflow\n",
     "def fit(model, images, labels, epochs, batch_size=128):\n",
     "    for epoch_counter in range(epochs):\n",
     "        print(f\"Epoch {epoch_counter}\")\n",
@@ -1370,6 +1387,7 @@
    },
    "outputs": [],
    "source": [
+    "%%backend tensorflow\n",
     "from keras.datasets import mnist\n",
     "\n",
     "(train_images, train_labels), (test_images, test_labels) = mnist.load_data()\n",
@@ -1399,6 +1417,7 @@
    },
    "outputs": [],
    "source": [
+    "%%backend tensorflow\n",
     "predictions = model(test_images)\n",
     "predicted_labels = ops.argmax(predictions, axis=1)\n",
     "matches = predicted_labels == test_labels\n",
diff --git a/chapter03_introduction-to-ml-frameworks.ipynb b/chapter03_introduction-to-ml-frameworks.ipynb
index a4a46b0891..7d29c2f859 100644
--- a/chapter03_introduction-to-ml-frameworks.ipynb
+++ b/chapter03_introduction-to-ml-frameworks.ipynb
@@ -32,6 +32,31 @@
     "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "cellView": "form",
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "# @title\n",
+    "import os\n",
+    "from IPython.core.magic import register_cell_magic\n",
+    "\n",
+    "@register_cell_magic\n",
+    "def backend(line, cell):\n",
+    "    current, required = os.environ.get(\"KERAS_BACKEND\", \"\"), line.split()[-1]\n",
+    "    if current == required:\n",
+    "        get_ipython().run_cell(cell)\n",
+    "    else:\n",
+    "        print(\n",
+    "            f\"This cell requires the {required} backend. To run it, change KERAS_BACKEND to \"\n",
+    "            f\"\\\"{required}\\\" at the top of the notebook, restart the runtime, and rerun the notebook.\"\n",
+    "        )"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
diff --git a/chapter04_classification-and-regression.ipynb b/chapter04_classification-and-regression.ipynb
index 5ddea4ecf1..1e2e7e8225 100644
--- a/chapter04_classification-and-regression.ipynb
+++ b/chapter04_classification-and-regression.ipynb
@@ -32,6 +32,31 @@
     "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "cellView": "form",
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "# @title\n",
+    "import os\n",
+    "from IPython.core.magic import register_cell_magic\n",
+    "\n",
+    "@register_cell_magic\n",
+    "def backend(line, cell):\n",
+    "    current, required = os.environ.get(\"KERAS_BACKEND\", \"\"), line.split()[-1]\n",
+    "    if current == required:\n",
+    "        get_ipython().run_cell(cell)\n",
+    "    else:\n",
+    "        print(\n",
+    "            f\"This cell requires the {required} backend. To run it, change KERAS_BACKEND to \"\n",
+    "            f\"\\\"{required}\\\" at the top of the notebook, restart the runtime, and rerun the notebook.\"\n",
+    "        )"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
diff --git a/chapter05_fundamentals-of-ml.ipynb b/chapter05_fundamentals-of-ml.ipynb
index 0d3a0d1437..2aadcc9b85 100644
--- a/chapter05_fundamentals-of-ml.ipynb
+++ b/chapter05_fundamentals-of-ml.ipynb
@@ -32,6 +32,31 @@
     "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "cellView": "form",
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "# @title\n",
+    "import os\n",
+    "from IPython.core.magic import register_cell_magic\n",
+    "\n",
+    "@register_cell_magic\n",
+    "def backend(line, cell):\n",
+    "    current, required = os.environ.get(\"KERAS_BACKEND\", \"\"), line.split()[-1]\n",
+    "    if current == required:\n",
+    "        get_ipython().run_cell(cell)\n",
+    "    else:\n",
+    "        print(\n",
+    "            f\"This cell requires the {required} backend. To run it, change KERAS_BACKEND to \"\n",
+    "            f\"\\\"{required}\\\" at the top of the notebook, restart the runtime, and rerun the notebook.\"\n",
+    "        )"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
diff --git a/chapter07_deep-dive-keras.ipynb b/chapter07_deep-dive-keras.ipynb
index 52f369009c..8211088123 100644
--- a/chapter07_deep-dive-keras.ipynb
+++ b/chapter07_deep-dive-keras.ipynb
@@ -6,7 +6,7 @@
     "colab_type": "text"
    },
    "source": [
-    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](https://www.manning.com/books/deep-learning-with-python-third-edition). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThe book's contents are available online at [deeplearningwithpython.io](https://deeplearningwithpython.io)."
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](https://www.manning.com/books/deep-learning-with-python-third-edition). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThe book's contents are available online at [deeplearningwithpython.io](https://deeplearningwithpython.io).\n"
    ]
   },
   {
@@ -32,6 +32,31 @@
     "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "cellView": "form",
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "# @title\n",
+    "import os\n",
+    "from IPython.core.magic import register_cell_magic\n",
+    "\n",
+    "@register_cell_magic\n",
+    "def backend(line, cell):\n",
+    "    current, required = os.environ.get(\"KERAS_BACKEND\", \"\"), line.split()[-1]\n",
+    "    if current == required:\n",
+    "        get_ipython().run_cell(cell)\n",
+    "    else:\n",
+    "        print(\n",
+    "            f\"This cell requires the {required} backend. To run it, change KERAS_BACKEND to \"\n",
+    "            f\"\\\"{required}\\\" at the top of the notebook, restart the runtime, and rerun the notebook.\"\n",
+    "        )"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -1034,28 +1059,7 @@
    },
    "outputs": [],
    "source": [
-    "keras.config.set_backend(\"tensorflow\")\n",
-    "\n",
-    "import keras\n",
-    "from keras import layers\n",
-    "\n",
-    "def get_mnist_model():\n",
-    "    inputs = keras.Input(shape=(28 * 28,))\n",
-    "    features = layers.Dense(512, activation=\"relu\")(inputs)\n",
-    "    features = layers.Dropout(0.5)(features)\n",
-    "    outputs = layers.Dense(10, activation=\"softmax\")(features)\n",
-    "    model = keras.Model(inputs, outputs)\n",
-    "    return model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
+    "%%backend tensorflow\n",
     "import tensorflow as tf\n",
     "\n",
     "model = get_mnist_model()\n",
@@ -1079,6 +1083,7 @@
    },
    "outputs": [],
    "source": [
+    "%%backend tensorflow\n",
     "batch_size = 32\n",
     "inputs = train_images[:batch_size]\n",
     "targets = train_labels[:batch_size]\n",
@@ -1102,28 +1107,7 @@
    },
    "outputs": [],
    "source": [
-    "keras.config.set_backend(\"torch\")\n",
-    "\n",
-    "import keras\n",
-    "from keras import layers\n",
-    "\n",
-    "def get_mnist_model():\n",
-    "    inputs = keras.Input(shape=(28 * 28,))\n",
-    "    features = layers.Dense(512, activation=\"relu\")(inputs)\n",
-    "    features = layers.Dropout(0.5)(features)\n",
-    "    outputs = layers.Dense(10, activation=\"softmax\")(features)\n",
-    "    model = keras.Model(inputs, outputs)\n",
-    "    return model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
+    "%%backend torch\n",
     "import torch\n",
     "\n",
     "model = get_mnist_model()\n",
@@ -1149,6 +1133,7 @@
    },
    "outputs": [],
    "source": [
+    "%%backend torch\n",
     "batch_size = 32\n",
     "inputs = train_images[:batch_size]\n",
     "targets = train_labels[:batch_size]\n",
@@ -1172,28 +1157,7 @@
    },
    "outputs": [],
    "source": [
-    "keras.config.set_backend(\"jax\")\n",
-    "\n",
-    "import keras\n",
-    "from keras import layers\n",
-    "\n",
-    "def get_mnist_model():\n",
-    "    inputs = keras.Input(shape=(28 * 28,))\n",
-    "    features = layers.Dense(512, activation=\"relu\")(inputs)\n",
-    "    features = layers.Dropout(0.5)(features)\n",
-    "    outputs = layers.Dense(10, activation=\"softmax\")(features)\n",
-    "    model = keras.Model(inputs, outputs)\n",
-    "    return model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
+    "%%backend jax\n",
     "model = get_mnist_model()\n",
     "loss_fn = keras.losses.SparseCategoricalCrossentropy()\n",
     "\n",
@@ -1215,6 +1179,7 @@
    },
    "outputs": [],
    "source": [
+    "%%backend jax\n",
     "import jax\n",
     "\n",
     "grad_fn = jax.value_and_grad(compute_loss_and_updates, has_aux=True)"
@@ -1228,6 +1193,7 @@
    },
    "outputs": [],
    "source": [
+    "%%backend jax\n",
     "optimizer = keras.optimizers.Adam()\n",
     "optimizer.build(model.trainable_variables)\n",
     "\n",
@@ -1254,6 +1220,7 @@
    },
    "outputs": [],
    "source": [
+    "%%backend jax\n",
     "batch_size = 32\n",
     "inputs = train_images[:batch_size]\n",
     "targets = train_labels[:batch_size]\n",
@@ -1356,17 +1323,7 @@
    },
    "outputs": [],
    "source": [
-    "keras.config.set_backend(\"tensorflow\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
+    "%%backend tensorflow\n",
     "import keras\n",
     "from keras import layers\n",
     "\n",
@@ -1398,6 +1355,7 @@
    },
    "outputs": [],
    "source": [
+    "%%backend tensorflow\n",
     "def get_custom_model():\n",
     "    inputs = keras.Input(shape=(28 * 28,))\n",
     "    features = layers.Dense(512, activation=\"relu\")(inputs)\n",
@@ -1416,6 +1374,7 @@
    },
    "outputs": [],
    "source": [
+    "%%backend tensorflow\n",
     "model = get_custom_model()\n",
     "model.fit(train_images, train_labels, epochs=3)"
    ]
@@ -1437,17 +1396,7 @@
    },
    "outputs": [],
    "source": [
-    "keras.config.set_backend(\"torch\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
+    "%%backend torch\n",
     "import keras\n",
     "from keras import layers\n",
     "\n",
@@ -1483,6 +1432,7 @@
    },
    "outputs": [],
    "source": [
+    "%%backend torch\n",
     "def get_custom_model():\n",
     "    inputs = keras.Input(shape=(28 * 28,))\n",
     "    features = layers.Dense(512, activation=\"relu\")(inputs)\n",
@@ -1501,6 +1451,7 @@
    },
    "outputs": [],
    "source": [
+    "%%backend torch\n",
     "model = get_custom_model()\n",
     "model.fit(train_images, train_labels, epochs=3)"
    ]
@@ -1522,17 +1473,7 @@
    },
    "outputs": [],
    "source": [
-    "keras.config.set_backend(\"jax\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
+    "%%backend jax\n",
     "import keras\n",
     "from keras import layers\n",
     "\n",
@@ -1602,6 +1543,7 @@
    },
    "outputs": [],
    "source": [
+    "%%backend jax\n",
     "def get_custom_model():\n",
     "    inputs = keras.Input(shape=(28 * 28,))\n",
     "    features = layers.Dense(512, activation=\"relu\")(inputs)\n",
@@ -1620,6 +1562,7 @@
    },
    "outputs": [],
    "source": [
+    "%%backend jax\n",
     "model = get_custom_model()\n",
     "model.fit(train_images, train_labels, epochs=3)"
    ]
@@ -1650,17 +1593,7 @@
    },
    "outputs": [],
    "source": [
-    "keras.config.set_backend(\"tensorflow\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
+    "%%backend tensorflow\n",
     "import keras\n",
     "from keras import layers\n",
     "\n",
@@ -1691,6 +1624,7 @@
    },
    "outputs": [],
    "source": [
+    "%%backend tensorflow\n",
     "def get_custom_model():\n",
     "    inputs = keras.Input(shape=(28 * 28,))\n",
     "    features = layers.Dense(512, activation=\"relu\")(inputs)\n",
@@ -1725,17 +1659,7 @@
    },
    "outputs": [],
    "source": [
-    "keras.config.set_backend(\"torch\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
+    "%%backend torch\n",
     "import keras\n",
     "from keras import layers\n",
     "\n",
@@ -1769,6 +1693,7 @@
    },
    "outputs": [],
    "source": [
+    "%%backend torch\n",
     "def get_custom_model():\n",
     "    inputs = keras.Input(shape=(28 * 28,))\n",
     "    features = layers.Dense(512, activation=\"relu\")(inputs)\n",
@@ -1803,17 +1728,7 @@
    },
    "outputs": [],
    "source": [
-    "keras.config.set_backend(\"jax\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
+    "%%backend jax\n",
     "import keras\n",
     "from keras import layers\n",
     "\n",
diff --git a/chapter08_image-classification.ipynb b/chapter08_image-classification.ipynb
index c10e8007c9..3ce065e2b3 100644
--- a/chapter08_image-classification.ipynb
+++ b/chapter08_image-classification.ipynb
@@ -32,6 +32,31 @@
     "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "cellView": "form",
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "# @title\n",
+    "import os\n",
+    "from IPython.core.magic import register_cell_magic\n",
+    "\n",
+    "@register_cell_magic\n",
+    "def backend(line, cell):\n",
+    "    current, required = os.environ.get(\"KERAS_BACKEND\", \"\"), line.split()[-1]\n",
+    "    if current == required:\n",
+    "        get_ipython().run_cell(cell)\n",
+    "    else:\n",
+    "        print(\n",
+    "            f\"This cell requires the {required} backend. To run it, change KERAS_BACKEND to \"\n",
+    "            f\"\\\"{required}\\\" at the top of the notebook, restart the runtime, and rerun the notebook.\"\n",
+    "        )"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
diff --git a/chapter09_convnet-architecture-patterns.ipynb b/chapter09_convnet-architecture-patterns.ipynb
index 3bd8218151..218546a3c6 100644
--- a/chapter09_convnet-architecture-patterns.ipynb
+++ b/chapter09_convnet-architecture-patterns.ipynb
@@ -32,6 +32,31 @@
     "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "cellView": "form",
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "# @title\n",
+    "import os\n",
+    "from IPython.core.magic import register_cell_magic\n",
+    "\n",
+    "@register_cell_magic\n",
+    "def backend(line, cell):\n",
+    "    current, required = os.environ.get(\"KERAS_BACKEND\", \"\"), line.split()[-1]\n",
+    "    if current == required:\n",
+    "        get_ipython().run_cell(cell)\n",
+    "    else:\n",
+    "        print(\n",
+    "            f\"This cell requires the {required} backend. To run it, change KERAS_BACKEND to \"\n",
+    "            f\"\\\"{required}\\\" at the top of the notebook, restart the runtime, and rerun the notebook.\"\n",
+    "        )"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
diff --git a/chapter10_interpreting-what-convnets-learn.ipynb b/chapter10_interpreting-what-convnets-learn.ipynb
index 5cbdb658c5..1ff0164326 100644
--- a/chapter10_interpreting-what-convnets-learn.ipynb
+++ b/chapter10_interpreting-what-convnets-learn.ipynb
@@ -32,6 +32,31 @@
     "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "cellView": "form",
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "# @title\n",
+    "import os\n",
+    "from IPython.core.magic import register_cell_magic\n",
+    "\n",
+    "@register_cell_magic\n",
+    "def backend(line, cell):\n",
+    "    current, required = os.environ.get(\"KERAS_BACKEND\", \"\"), line.split()[-1]\n",
+    "    if current == required:\n",
+    "        get_ipython().run_cell(cell)\n",
+    "    else:\n",
+    "        print(\n",
+    "            f\"This cell requires the {required} backend. To run it, change KERAS_BACKEND to \"\n",
+    "            f\"\\\"{required}\\\" at the top of the notebook, restart the runtime, and rerun the notebook.\"\n",
+    "        )"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -313,32 +338,7 @@
    },
    "outputs": [],
    "source": [
-    "keras.config.set_backend(\"tensorflow\")\n",
-    "\n",
-    "import keras\n",
-    "import keras_hub\n",
-    "from keras import ops\n",
-    "\n",
-    "model = keras_hub.models.Backbone.from_preset(\"xception_41_imagenet\")\n",
-    "\n",
-    "layer_name = \"block3_sepconv1\"\n",
-    "layer = model.get_layer(name=layer_name)\n",
-    "feature_extractor = keras.Model(inputs=model.input, outputs=layer.output)\n",
-    "\n",
-    "def compute_loss(image, filter_index):\n",
-    "    activation = feature_extractor(image)\n",
-    "    filter_activation = activation[:, 2:-2, 2:-2, filter_index]\n",
-    "    return ops.mean(filter_activation)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
+    "%%backend tensorflow\n",
     "import tensorflow as tf\n",
     "\n",
     "@tf.function\n",
@@ -369,32 +369,7 @@
    },
    "outputs": [],
    "source": [
-    "keras.config.set_backend(\"torch\")\n",
-    "\n",
-    "import keras\n",
-    "import keras_hub\n",
-    "from keras import ops\n",
-    "\n",
-    "model = keras_hub.models.Backbone.from_preset(\"xception_41_imagenet\")\n",
-    "\n",
-    "layer_name = \"block3_sepconv1\"\n",
-    "layer = model.get_layer(name=layer_name)\n",
-    "feature_extractor = keras.Model(inputs=model.input, outputs=layer.output)\n",
-    "\n",
-    "def compute_loss(image, filter_index):\n",
-    "    activation = feature_extractor(image)\n",
-    "    filter_activation = activation[:, 2:-2, 2:-2, filter_index]\n",
-    "    return ops.mean(filter_activation)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
+    "%%backend torch\n",
     "import torch\n",
     "\n",
     "def gradient_ascent_step(image, filter_index, learning_rate):\n",
@@ -424,32 +399,7 @@
    },
    "outputs": [],
    "source": [
-    "keras.config.set_backend(\"jax\")\n",
-    "\n",
-    "import keras\n",
-    "import keras_hub\n",
-    "from keras import ops\n",
-    "\n",
-    "model = keras_hub.models.Backbone.from_preset(\"xception_41_imagenet\")\n",
-    "\n",
-    "layer_name = \"block3_sepconv1\"\n",
-    "layer = model.get_layer(name=layer_name)\n",
-    "feature_extractor = keras.Model(inputs=model.input, outputs=layer.output)\n",
-    "\n",
-    "def compute_loss(image, filter_index):\n",
-    "    activation = feature_extractor(image)\n",
-    "    filter_activation = activation[:, 2:-2, 2:-2, filter_index]\n",
-    "    return ops.mean(filter_activation)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
+    "%%backend jax\n",
     "import jax\n",
     "\n",
     "grad_fn = jax.grad(compute_loss)\n",
@@ -677,36 +627,7 @@
    },
    "outputs": [],
    "source": [
-    "keras.config.set_backend(\"tensorflow\")\n",
-    "\n",
-    "import keras\n",
-    "import keras_hub\n",
-    "from keras import ops\n",
-    "\n",
-    "model = keras_hub.models.ImageClassifier.from_preset(\n",
-    "    \"xception_41_imagenet\",\n",
-    "    activation=\"softmax\",\n",
-    ")\n",
-    "\n",
-    "last_conv_layer_name = \"block14_sepconv2_act\"\n",
-    "last_conv_layer = model.backbone.get_layer(last_conv_layer_name)\n",
-    "last_conv_layer_model = keras.Model(model.inputs, last_conv_layer.output)\n",
-    "\n",
-    "classifier_input = last_conv_layer.output\n",
-    "x = classifier_input\n",
-    "for layer_name in [\"pooler\", \"predictions\"]:\n",
-    "    x = model.get_layer(layer_name)(x)\n",
-    "classifier_model = keras.Model(classifier_input, x)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
+    "%%backend tensorflow\n",
     "import tensorflow as tf\n",
     "\n",
     "def get_top_class_gradients(img_array):\n",
@@ -742,36 +663,7 @@
    },
    "outputs": [],
    "source": [
-    "keras.config.set_backend(\"torch\")\n",
-    "\n",
-    "import keras\n",
-    "import keras_hub\n",
-    "from keras import ops\n",
-    "\n",
-    "model = keras_hub.models.ImageClassifier.from_preset(\n",
-    "    \"xception_41_imagenet\",\n",
-    "    activation=\"softmax\",\n",
-    ")\n",
-    "\n",
-    "last_conv_layer_name = \"block14_sepconv2_act\"\n",
-    "last_conv_layer = model.backbone.get_layer(last_conv_layer_name)\n",
-    "last_conv_layer_model = keras.Model(model.inputs, last_conv_layer.output)\n",
-    "\n",
-    "classifier_input = last_conv_layer.output\n",
-    "x = classifier_input\n",
-    "for layer_name in [\"pooler\", \"predictions\"]:\n",
-    "    x = model.get_layer(layer_name)(x)\n",
-    "classifier_model = keras.Model(classifier_input, x)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
+    "%%backend torch\n",
     "def get_top_class_gradients(img_array):\n",
     "    last_conv_layer_output = last_conv_layer_model(img_array)\n",
     "    last_conv_layer_output = (\n",
@@ -806,36 +698,7 @@
    },
    "outputs": [],
    "source": [
-    "keras.config.set_backend(\"jax\")\n",
-    "\n",
-    "import keras\n",
-    "import keras_hub\n",
-    "from keras import ops\n",
-    "\n",
-    "model = keras_hub.models.ImageClassifier.from_preset(\n",
-    "    \"xception_41_imagenet\",\n",
-    "    activation=\"softmax\",\n",
-    ")\n",
-    "\n",
-    "last_conv_layer_name = \"block14_sepconv2_act\"\n",
-    "last_conv_layer = model.backbone.get_layer(last_conv_layer_name)\n",
-    "last_conv_layer_model = keras.Model(model.inputs, last_conv_layer.output)\n",
-    "\n",
-    "classifier_input = last_conv_layer.output\n",
-    "x = classifier_input\n",
-    "for layer_name in [\"pooler\", \"predictions\"]:\n",
-    "    x = model.get_layer(layer_name)(x)\n",
-    "classifier_model = keras.Model(classifier_input, x)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
+    "%%backend jax\n",
     "import jax\n",
     "\n",
     "def loss_fn(last_conv_layer_output):\n",
@@ -919,8 +782,7 @@
     "superimposed_img = jet_heatmap * 0.4 + img\n",
     "superimposed_img = keras.utils.array_to_img(superimposed_img)\n",
     "\n",
-    "save_path = \"elephant_cam.jpg\"\n",
-    "superimposed_img.save(save_path)"
+    "plt.imshow(superimposed_img)"
    ]
   },
   {
diff --git a/chapter11_image-segmentation.ipynb b/chapter11_image-segmentation.ipynb
index fd5125ba6c..02956e9c79 100644
--- a/chapter11_image-segmentation.ipynb
+++ b/chapter11_image-segmentation.ipynb
@@ -32,6 +32,31 @@
     "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "cellView": "form",
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "# @title\n",
+    "import os\n",
+    "from IPython.core.magic import register_cell_magic\n",
+    "\n",
+    "@register_cell_magic\n",
+    "def backend(line, cell):\n",
+    "    current, required = os.environ.get(\"KERAS_BACKEND\", \"\"), line.split()[-1]\n",
+    "    if current == required:\n",
+    "        get_ipython().run_cell(cell)\n",
+    "    else:\n",
+    "        print(\n",
+    "            f\"This cell requires the {required} backend. To run it, change KERAS_BACKEND to \"\n",
+    "            f\"\\\"{required}\\\" at the top of the notebook, restart the runtime, and rerun the notebook.\"\n",
+    "        )"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
diff --git a/chapter12_object-detection.ipynb b/chapter12_object-detection.ipynb
index fab80f2268..6b562082e3 100644
--- a/chapter12_object-detection.ipynb
+++ b/chapter12_object-detection.ipynb
@@ -32,6 +32,31 @@
     "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "cellView": "form",
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "# @title\n",
+    "import os\n",
+    "from IPython.core.magic import register_cell_magic\n",
+    "\n",
+    "@register_cell_magic\n",
+    "def backend(line, cell):\n",
+    "    current, required = os.environ.get(\"KERAS_BACKEND\", \"\"), line.split()[-1]\n",
+    "    if current == required:\n",
+    "        get_ipython().run_cell(cell)\n",
+    "    else:\n",
+    "        print(\n",
+    "            f\"This cell requires the {required} backend. To run it, change KERAS_BACKEND to \"\n",
+    "            f\"\\\"{required}\\\" at the top of the notebook, restart the runtime, and rerun the notebook.\"\n",
+    "        )"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
diff --git a/chapter13_timeseries-forecasting.ipynb b/chapter13_timeseries-forecasting.ipynb
index f5b2034024..0fe4788f17 100644
--- a/chapter13_timeseries-forecasting.ipynb
+++ b/chapter13_timeseries-forecasting.ipynb
@@ -32,6 +32,31 @@
     "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "cellView": "form",
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "# @title\n",
+    "import os\n",
+    "from IPython.core.magic import register_cell_magic\n",
+    "\n",
+    "@register_cell_magic\n",
+    "def backend(line, cell):\n",
+    "    current, required = os.environ.get(\"KERAS_BACKEND\", \"\"), line.split()[-1]\n",
+    "    if current == required:\n",
+    "        get_ipython().run_cell(cell)\n",
+    "    else:\n",
+    "        print(\n",
+    "            f\"This cell requires the {required} backend. To run it, change KERAS_BACKEND to \"\n",
+    "            f\"\\\"{required}\\\" at the top of the notebook, restart the runtime, and rerun the notebook.\"\n",
+    "        )"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
diff --git a/chapter14_text-classification.ipynb b/chapter14_text-classification.ipynb
index b4e40559c1..75c19d9911 100644
--- a/chapter14_text-classification.ipynb
+++ b/chapter14_text-classification.ipynb
@@ -32,6 +32,31 @@
     "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "cellView": "form",
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "# @title\n",
+    "import os\n",
+    "from IPython.core.magic import register_cell_magic\n",
+    "\n",
+    "@register_cell_magic\n",
+    "def backend(line, cell):\n",
+    "    current, required = os.environ.get(\"KERAS_BACKEND\", \"\"), line.split()[-1]\n",
+    "    if current == required:\n",
+    "        get_ipython().run_cell(cell)\n",
+    "    else:\n",
+    "        print(\n",
+    "            f\"This cell requires the {required} backend. To run it, change KERAS_BACKEND to \"\n",
+    "            f\"\\\"{required}\\\" at the top of the notebook, restart the runtime, and rerun the notebook.\"\n",
+    "        )"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
diff --git a/chapter15_language-models-and-the-transformer.ipynb b/chapter15_language-models-and-the-transformer.ipynb
index 2e79052525..d43b72e86e 100644
--- a/chapter15_language-models-and-the-transformer.ipynb
+++ b/chapter15_language-models-and-the-transformer.ipynb
@@ -32,6 +32,31 @@
     "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "cellView": "form",
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "# @title\n",
+    "import os\n",
+    "from IPython.core.magic import register_cell_magic\n",
+    "\n",
+    "@register_cell_magic\n",
+    "def backend(line, cell):\n",
+    "    current, required = os.environ.get(\"KERAS_BACKEND\", \"\"), line.split()[-1]\n",
+    "    if current == required:\n",
+    "        get_ipython().run_cell(cell)\n",
+    "    else:\n",
+    "        print(\n",
+    "            f\"This cell requires the {required} backend. To run it, change KERAS_BACKEND to \"\n",
+    "            f\"\\\"{required}\\\" at the top of the notebook, restart the runtime, and rerun the notebook.\"\n",
+    "        )"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
diff --git a/chapter16_text-generation.ipynb b/chapter16_text-generation.ipynb
index 49a83e68e8..d8431361ff 100644
--- a/chapter16_text-generation.ipynb
+++ b/chapter16_text-generation.ipynb
@@ -6,7 +6,7 @@
     "colab_type": "text"
    },
    "source": [
-    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](https://www.manning.com/books/deep-learning-with-python-third-edition). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThe book's contents are available online at [deeplearningwithpython.io](https://deeplearningwithpython.io).\n"
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](https://www.manning.com/books/deep-learning-with-python-third-edition). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThe book's contents are available online at [deeplearningwithpython.io](https://deeplearningwithpython.io)."
    ]
   },
   {
@@ -32,6 +32,31 @@
     "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "cellView": "form",
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "# @title\n",
+    "import os\n",
+    "from IPython.core.magic import register_cell_magic\n",
+    "\n",
+    "@register_cell_magic\n",
+    "def backend(line, cell):\n",
+    "    current, required = os.environ.get(\"KERAS_BACKEND\", \"\"), line.split()[-1]\n",
+    "    if current == required:\n",
+    "        get_ipython().run_cell(cell)\n",
+    "    else:\n",
+    "        print(\n",
+    "            f\"This cell requires the {required} backend. To run it, change KERAS_BACKEND to \"\n",
+    "            f\"\\\"{required}\\\" at the top of the notebook, restart the runtime, and rerun the notebook.\"\n",
+    "        )"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
diff --git a/chapter17_image-generation.ipynb b/chapter17_image-generation.ipynb
index 62636cc7fe..f78b560505 100644
--- a/chapter17_image-generation.ipynb
+++ b/chapter17_image-generation.ipynb
@@ -32,6 +32,31 @@
     "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "cellView": "form",
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "# @title\n",
+    "import os\n",
+    "from IPython.core.magic import register_cell_magic\n",
+    "\n",
+    "@register_cell_magic\n",
+    "def backend(line, cell):\n",
+    "    current, required = os.environ.get(\"KERAS_BACKEND\", \"\"), line.split()[-1]\n",
+    "    if current == required:\n",
+    "        get_ipython().run_cell(cell)\n",
+    "    else:\n",
+    "        print(\n",
+    "            f\"This cell requires the {required} backend. To run it, change KERAS_BACKEND to \"\n",
+    "            f\"\\\"{required}\\\" at the top of the notebook, restart the runtime, and rerun the notebook.\"\n",
+    "        )"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
diff --git a/chapter18_best-practices-for-the-real-world.ipynb b/chapter18_best-practices-for-the-real-world.ipynb
index 7b8f10cdb2..d7e28359aa 100644
--- a/chapter18_best-practices-for-the-real-world.ipynb
+++ b/chapter18_best-practices-for-the-real-world.ipynb
@@ -32,6 +32,31 @@
     "os.environ[\"KERAS_BACKEND\"] = \"jax\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "cellView": "form",
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "# @title\n",
+    "import os\n",
+    "from IPython.core.magic import register_cell_magic\n",
+    "\n",
+    "@register_cell_magic\n",
+    "def backend(line, cell):\n",
+    "    current, required = os.environ.get(\"KERAS_BACKEND\", \"\"), line.split()[-1]\n",
+    "    if current == required:\n",
+    "        get_ipython().run_cell(cell)\n",
+    "    else:\n",
+    "        print(\n",
+    "            f\"This cell requires the {required} backend. To run it, change KERAS_BACKEND to \"\n",
+    "            f\"\\\"{required}\\\" at the top of the notebook, restart the runtime, and rerun the notebook.\"\n",
+    "        )"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {

From dea84a4e12cc3bb3cb61ce952a0168bca113d315 Mon Sep 17 00:00:00 2001
From: Matt Watson <mattdangerw@gmail.com>
Date: Mon, 4 Aug 2025 14:59:40 -0700
Subject: [PATCH 19/24] Update the readme

---
 README.md | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 52 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 506d410202..a57c07e7c5 100644
--- a/README.md
+++ b/README.md
@@ -1,14 +1,61 @@
-# Companion Jupyter notebooks for the book "Deep Learning with Python" (2025)
+# Companion notebooks for Deep Learning with Python
 
-This repository contains Jupyter notebooks implementing the code samples found in the book [Deep Learning with Python, third edition](https://www.manning.com/books/deep-learning-with-python-third-edition?a_aid=keras&a_bid=76564dff)
-by Francois Chollet and Matthew Watson.
-
-In addition, you will also find the legacy notebooks for the [second edition (2021)](https://www.manning.com/books/deep-learning-with-python-second-edition?a_aid=keras&a_bid=76564dff)
+This repository contains Jupyter notebooks implementing the code samples found in the book [Deep Learning with Python, third edition (2025)](https://www.manning.com/books/deep-learning-with-python-third-edition?a_aid=keras&a_bid=76564dff)
+by Francois Chollet and Matthew Watson. In addition, you will also find the legacy notebooks for the [second edition (2021)](https://www.manning.com/books/deep-learning-with-python-second-edition?a_aid=keras&a_bid=76564dff)
 and the [first edition (2017)](https://www.manning.com/books/deep-learning-with-python?a_aid=keras&a_bid=76564dff).
 
 For readability, these notebooks only contain runnable code blocks and section titles, and omit everything else in the book: text paragraphs, figures, and pseudocode.
 **If you want to be able to follow what's going on, I recommend reading the notebooks side by side with your copy of the book.**
 
+## Running the code
+
+We recommend running these notebooks on [Colab](https://colab.google), which
+provides a hosted runtime with all the dependencies you will need. You can also,
+run these notebooks locally, either by setting up your own Jupyter environment,
+or using Colab's instructions for
+[running locally](https://research.google.com/colaboratory/local-runtimes.html).
+
+By default, all notebooks will run on Colab's free tier GPU runtime, which
+is sufficient to run all code in this book. Chapter 8-18 chapters will benefit
+from a faster GPU if you have a Colab Pro subscription. You can change your
+runtime type using **Runtime -> Change runtime type** in Colab's dropdown menus.
+
+## Choosing a backend
+
+The code for third edition is written using Keras 3. As such, it can be run with
+JAX, TensorFlow or PyTorch as a backend. To set the backend, update the backend
+in the cell at the top of the colab that looks like this:
+
+```python
+import os
+os.environ["KERAS_BACKEND"] = "jax"
+```
+
+This must be done only once per session before importing Keras. If you are
+in the middle running a notebook, you will need to restart the notebook session
+and rerun all relevant notebook cells. This can be done in using
+**Runtime -> Restart Session** in Colab's dropdown menus.
+
+## Using Kaggle data
+
+This book uses datasets and model weights provided by Kaggle, an online Machine
+Learning community and platform. You will need to create a Kaggle login to run
+Kaggle code in this book; instructions are given in Chapter 8.
+
+For chapters that need Kaggle data, you can login to Kaggle once per session
+when you hit the notebook cell with `kagglehub.login()`. Alternately,
+you can set up your Kaggle login information once as Colab secrets:
+
+ * Go to https://www.kaggle.com/ and sign in.
+ * Go to https://www.kaggle.com/settings and generate a Kaggle API key.
+ * Open the secrets tab in Colab by clicking the key icon on the left.
+ * Add two secrets, `KAGGLE_USERNAME` and `KAGGLE_KEY` with the username and key
+   you just created.
+
+Following this approach you will only need to copy your Kaggle secret key once,
+though you will need to allow each notebook to access your secrets when running
+the relevant Kaggle code.
+
 ## Table of contents
 
 * [Chapter 2: The mathematical building blocks of neural networks](https://colab.research.google.com/github/fchollet/deep-learning-with-python-notebooks/blob/master/chapter02_mathematical-building-blocks.ipynb)

From 691bd68d6ca48fbb5effdb760ad9cd225649d0af Mon Sep 17 00:00:00 2001
From: Matt Watson <mattdangerw@gmail.com>
Date: Tue, 19 Aug 2025 23:17:31 -0700
Subject: [PATCH 20/24] Switch chapter 2 backend to tf

---
 chapter02_mathematical-building-blocks.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chapter02_mathematical-building-blocks.ipynb b/chapter02_mathematical-building-blocks.ipynb
index 4f6e642623..de53b0dd64 100644
--- a/chapter02_mathematical-building-blocks.ipynb
+++ b/chapter02_mathematical-building-blocks.ipynb
@@ -29,7 +29,7 @@
    "outputs": [],
    "source": [
     "import os\n",
-    "os.environ[\"KERAS_BACKEND\"] = \"jax\""
+    "os.environ[\"KERAS_BACKEND\"] = \"tensorflow\""
    ]
   },
   {

From 5bcc7b77ff1909e01c8c48c6f5ca31b15e796197 Mon Sep 17 00:00:00 2001
From: Matt Watson <mattdangerw@gmail.com>
Date: Thu, 21 Aug 2025 13:33:47 -0700
Subject: [PATCH 21/24] Update notebooks

---
 chapter07_deep-dive-keras.ipynb     |  2 +-
 chapter11_image-segmentation.ipynb  | 13 +++++++++++++
 chapter14_text-classification.ipynb | 16 ++++++++++++++++
 chapter16_text-generation.ipynb     |  8 ++++----
 4 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/chapter07_deep-dive-keras.ipynb b/chapter07_deep-dive-keras.ipynb
index 8211088123..be5963473c 100644
--- a/chapter07_deep-dive-keras.ipynb
+++ b/chapter07_deep-dive-keras.ipynb
@@ -6,7 +6,7 @@
     "colab_type": "text"
    },
    "source": [
-    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](https://www.manning.com/books/deep-learning-with-python-third-edition). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThe book's contents are available online at [deeplearningwithpython.io](https://deeplearningwithpython.io).\n"
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](https://www.manning.com/books/deep-learning-with-python-third-edition). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThe book's contents are available online at [deeplearningwithpython.io](https://deeplearningwithpython.io)."
    ]
   },
   {
diff --git a/chapter11_image-segmentation.ipynb b/chapter11_image-segmentation.ipynb
index 02956e9c79..74f4c6b13d 100644
--- a/chapter11_image-segmentation.ipynb
+++ b/chapter11_image-segmentation.ipynb
@@ -259,6 +259,19 @@
     "model = get_model(img_size=img_size, num_classes=3)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "# \u26a0\ufe0fNOTE\u26a0\ufe0f: The following IoU metric is *very* slow on the PyTorch backend!\n",
+    "# If you are running with PyTorch, we recommend re-running the notebook with Jax\n",
+    "# or TensorFlow, or skipping to the next section of this chapter."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 0,
diff --git a/chapter14_text-classification.ipynb b/chapter14_text-classification.ipynb
index 75c19d9911..15e34f0f0c 100644
--- a/chapter14_text-classification.ipynb
+++ b/chapter14_text-classification.ipynb
@@ -1106,6 +1106,22 @@
     "model.summary(line_length=80)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "# \u26a0\ufe0fNOTE\u26a0\ufe0f: The following fit call will error on a T4 GPU on the TensorFlow\n",
+    "# backend due to a bug in TensorFlow. If you the follow cell errors out,\n",
+    "# do one of the following:\n",
+    "# - Skip the following two cells.\n",
+    "# - Switch to the Jax or Torch backend and re-run this notebook.\n",
+    "# - Change the GPU type in your runtime (requires Colab Pro as of this writing)."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 0,
diff --git a/chapter16_text-generation.ipynb b/chapter16_text-generation.ipynb
index d8431361ff..06ce65647d 100644
--- a/chapter16_text-generation.ipynb
+++ b/chapter16_text-generation.ipynb
@@ -6,7 +6,7 @@
     "colab_type": "text"
    },
    "source": [
-    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](https://www.manning.com/books/deep-learning-with-python-third-edition). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThe book's contents are available online at [deeplearningwithpython.io](https://deeplearningwithpython.io)."
+    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](https://www.manning.com/books/deep-learning-with-python-third-edition). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThe book's contents are available online at [deeplearningwithpython.io](https://deeplearningwithpython.io).\n"
    ]
   },
   {
@@ -972,9 +972,9 @@
    },
    "outputs": [],
    "source": [
-    "# NOTE: If you are running on the free tier Colab GPUs, you will need to restart\n",
-    "# your runtime and run the notebook from here to free up memory for this\n",
-    "# 4 billion parameter model.\n",
+    "# \u26a0\ufe0fNOTE\u26a0\ufe0f: If you are running on the free tier Colab GPUs, you will need to\n",
+    "# restart your runtime and run the notebook from here to free up memory for\n",
+    "# this 4 billion parameter model.\n",
     "import os\n",
     "\n",
     "os.environ[\"KERAS_BACKEND\"] = \"jax\"\n",

From 7732282de5d1f17a7e53f006feb36c4fdc20c84b Mon Sep 17 00:00:00 2001
From: Matt Watson <mattdangerw@gmail.com>
Date: Wed, 27 Aug 2025 11:13:56 -0700
Subject: [PATCH 22/24] Fix chapter 17 notebook

---
 chapter17_image-generation.ipynb | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/chapter17_image-generation.ipynb b/chapter17_image-generation.ipynb
index f78b560505..b34e4d5839 100644
--- a/chapter17_image-generation.ipynb
+++ b/chapter17_image-generation.ipynb
@@ -534,16 +534,7 @@
     "\n",
     "    def compute_loss(self, x, y, y_pred, sample_weight=None, training=True):\n",
     "        _, pred_noise_masks, noise_masks = y_pred\n",
-    "        return self.loss(noise_masks, pred_noise_masks)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "#### The generation process\n",
+    "        return self.loss(noise_masks, pred_noise_masks)\n",
     "\n",
     "    def generate(self, num_images, diffusion_steps):\n",
     "        noisy_images = keras.random.normal(\n",
@@ -570,6 +561,15 @@
     "        return ops.clip(images, 0.0, 255.0)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "#### The generation process"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {

From 11e68ca13e41dc80300ac47c32993a849966653b Mon Sep 17 00:00:00 2001
From: Matt Watson <mattdangerw@gmail.com>
Date: Wed, 10 Sep 2025 13:46:11 -0700
Subject: [PATCH 23/24] Updates to chapter 12 and chapter 16 notebooks

---
 chapter12_object-detection.ipynb |  6 +-----
 chapter16_text-generation.ipynb  | 17 +++++++++++++++--
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/chapter12_object-detection.ipynb b/chapter12_object-detection.ipynb
index 6b562082e3..47bf3e1008 100644
--- a/chapter12_object-detection.ipynb
+++ b/chapter12_object-detection.ipynb
@@ -618,11 +618,7 @@
    },
    "outputs": [],
    "source": [
-    "url = (\n",
-    "    \"https://upload.wikimedia.org/wikipedia/commons/thumb/7/7d/\"\n",
-    "    \"A_Sunday_on_La_Grande_Jatte%2C_Georges_Seurat%2C_1884.jpg/\"\n",
-    "    \"1280px-A_Sunday_on_La_Grande_Jatte%2C_Georges_Seurat%2C_1884.jpg\"\n",
-    ")\n",
+    "url = \"https://s3.us-east-1.amazonaws.com/book.keras.io/3e/seurat.jpg\"\n",
     "path = keras.utils.get_file(origin=url)\n",
     "image = np.array([keras.utils.load_img(path)])"
    ]
diff --git a/chapter16_text-generation.ipynb b/chapter16_text-generation.ipynb
index 06ce65647d..4dfc93d3e7 100644
--- a/chapter16_text-generation.ipynb
+++ b/chapter16_text-generation.ipynb
@@ -180,7 +180,7 @@
    "source": [
     "import tensorflow as tf\n",
     "\n",
-    "batch_size = 128\n",
+    "batch_size = 64\n",
     "sequence_length = 256\n",
     "suffix = np.array([tokenizer.token_to_id(\"<|endoftext|>\")])\n",
     "\n",
@@ -206,7 +206,7 @@
    },
    "outputs": [],
    "source": [
-    "num_batches = 29373\n",
+    "num_batches = 58746\n",
     "num_val_batches = 500\n",
     "num_train_batches = num_batches - num_val_batches\n",
     "val_ds = ds.take(num_val_batches).repeat()\n",
@@ -359,6 +359,19 @@
     "plt.show()"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "# \u26a0\ufe0fNOTE\u26a0\ufe0f: If you can run the following with a Colab Pro GPU, we suggest you\n",
+    "# do so. This fit() call will take many hours on free tier GPUs. You can also\n",
+    "# reduce steps_per_epoch to try the code with a less trained model."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 0,

From fbf7f1bf2041093c4081e72544ec5905ace9dc8f Mon Sep 17 00:00:00 2001
From: Matt Watson <mattdangerw@gmail.com>
Date: Wed, 17 Sep 2025 22:12:28 -0700
Subject: [PATCH 24/24] Update header names to agree with final book edits

---
 chapter02_mathematical-building-blocks.ipynb        | 4 ++--
 chapter04_classification-and-regression.ipynb       | 2 +-
 chapter08_image-classification.ipynb                | 4 ++--
 chapter09_convnet-architecture-patterns.ipynb       | 2 +-
 chapter10_interpreting-what-convnets-learn.ipynb    | 6 +++---
 chapter13_timeseries-forecasting.ipynb              | 2 +-
 chapter15_language-models-and-the-transformer.ipynb | 2 +-
 7 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/chapter02_mathematical-building-blocks.ipynb b/chapter02_mathematical-building-blocks.ipynb
index de53b0dd64..3c419b7b8f 100644
--- a/chapter02_mathematical-building-blocks.ipynb
+++ b/chapter02_mathematical-building-blocks.ipynb
@@ -639,7 +639,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### The gears of neural networks: tensor operations"
+    "### The gears of neural networks: Tensor operations"
    ]
   },
   {
@@ -1010,7 +1010,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### Derivative of a tensor operation: the gradient"
+    "#### Derivative of a tensor operation: The gradient"
    ]
   },
   {
diff --git a/chapter04_classification-and-regression.ipynb b/chapter04_classification-and-regression.ipynb
index 1e2e7e8225..6e68704a45 100644
--- a/chapter04_classification-and-regression.ipynb
+++ b/chapter04_classification-and-regression.ipynb
@@ -939,7 +939,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Predicting house prices: a regression example"
+    "### Predicting house prices: A regression example"
    ]
   },
   {
diff --git a/chapter08_image-classification.ipynb b/chapter08_image-classification.ipynb
index 3ce065e2b3..63d8e640f7 100644
--- a/chapter08_image-classification.ipynb
+++ b/chapter08_image-classification.ipynb
@@ -72,7 +72,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Introduction to convnets"
+    "### Introduction to ConvNets"
    ]
   },
   {
@@ -213,7 +213,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Training a convnet from scratch on a small dataset"
+    "### Training a ConvNet from scratch on a small dataset"
    ]
   },
   {
diff --git a/chapter09_convnet-architecture-patterns.ipynb b/chapter09_convnet-architecture-patterns.ipynb
index 218546a3c6..136ebaa12d 100644
--- a/chapter09_convnet-architecture-patterns.ipynb
+++ b/chapter09_convnet-architecture-patterns.ipynb
@@ -63,7 +63,7 @@
     "colab_type": "text"
    },
    "source": [
-    "## Convnet architecture patterns"
+    "## ConvNet architecture patterns"
    ]
   },
   {
diff --git a/chapter10_interpreting-what-convnets-learn.ipynb b/chapter10_interpreting-what-convnets-learn.ipynb
index 1ff0164326..869c82d8f5 100644
--- a/chapter10_interpreting-what-convnets-learn.ipynb
+++ b/chapter10_interpreting-what-convnets-learn.ipynb
@@ -63,7 +63,7 @@
     "colab_type": "text"
    },
    "source": [
-    "## Interpreting what convnets learn"
+    "## Interpreting what ConvNets learn"
    ]
   },
   {
@@ -246,7 +246,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Visualizing convnet filters"
+    "### Visualizing ConvNet filters"
    ]
   },
   {
@@ -791,7 +791,7 @@
     "colab_type": "text"
    },
    "source": [
-    "### Visualizing the latent space of a convnet"
+    "### Visualizing the latent space of a ConvNet"
    ]
   }
  ],
diff --git a/chapter13_timeseries-forecasting.ipynb b/chapter13_timeseries-forecasting.ipynb
index 0fe4788f17..2c60dd76be 100644
--- a/chapter13_timeseries-forecasting.ipynb
+++ b/chapter13_timeseries-forecasting.ipynb
@@ -290,7 +290,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### A common-sense, non-machine-learning baseline"
+    "#### A commonsense, non-machine-learning baseline"
    ]
   },
   {
diff --git a/chapter15_language-models-and-the-transformer.ipynb b/chapter15_language-models-and-the-transformer.ipynb
index d43b72e86e..6f8182e1bc 100644
--- a/chapter15_language-models-and-the-transformer.ipynb
+++ b/chapter15_language-models-and-the-transformer.ipynb
@@ -349,7 +349,7 @@
     "colab_type": "text"
    },
    "source": [
-    "#### English-to-Spanish Translation"
+    "#### English-to-Spanish translation"
    ]
   },
   {