pytorch · parmeet · Mar 13, 2022 · Mar 12, 2022
diff --git a/advanced_source/ddp_pipeline.py b/advanced_source/ddp_pipeline.py
@@ -139,8 +139,10 @@ def run_worker(rank, world_size):
 
 
 ######################################################################
-# The training process uses Wikitext-2 dataset from ``torchtext``. The
-# vocab object is built based on the train dataset and is used to numericalize
+# The training process uses Wikitext-2 dataset from ``torchtext``. 
+# To access torchtext datasets, please install torchdata following instructions at https://github.com/pytorch/data.
+#
+# The vocab object is built based on the train dataset and is used to numericalize
 # tokens into tensors. Starting from sequential data, the ``batchify()``
 # function arranges the dataset into columns, trimming off any tokens remaining
 # after the data has been divided into batches of size ``batch_size``.

diff --git a/beginner_source/text_sentiment_ngrams_tutorial.py b/beginner_source/text_sentiment_ngrams_tutorial.py
@@ -15,6 +15,9 @@
 # -----------------------------------
 #
 # The torchtext library provides a few raw dataset iterators, which yield the raw text strings. For example, the ``AG_NEWS`` dataset iterators yield the raw data as a tuple of label and text.
+#
+# To access torchtext datasets, please install torchdata following instructions at https://github.com/pytorch/data. 
+#
 
 import torch
 from torchtext.datasets import AG_NEWS

diff --git a/beginner_source/transformer_tutorial.py b/beginner_source/transformer_tutorial.py
@@ -132,8 +132,10 @@ def forward(self, x: Tensor) -> Tensor:
 
 
 ######################################################################
-# This tutorial uses ``torchtext`` to generate Wikitext-2 dataset. The
-# vocab object is built based on the train dataset and is used to numericalize
+# This tutorial uses ``torchtext`` to generate Wikitext-2 dataset.
+# To access torchtext datasets, please install torchdata following instructions at https://github.com/pytorch/data. 
+#
+# The vocab object is built based on the train dataset and is used to numericalize
 # tokens into tensors. Wikitext-2 represents rare tokens as `<unk>`.
 #
 # Given a 1-D vector of sequential data, ``batchify()`` arranges the data

diff --git a/beginner_source/translation_transformer.py b/beginner_source/translation_transformer.py
@@ -2,9 +2,9 @@
 Language Translation with nn.Transformer and torchtext
 ======================================================
 
-This tutorial shows, how to train a translation model from scratch using
-Transformer. We will be using `Multi30k <http://www.statmt.org/wmt16/multimodal-task.html#task1>`__ 
-dataset to train a German to English translation model.
+This tutorial shows:
+    - How to train a translation model from scratch using Transformer. 
+    - Use tochtext library to access  `Multi30k <http://www.statmt.org/wmt16/multimodal-task.html#task1>`__ dataset to train a German to English translation model.
 """
 
 
@@ -19,6 +19,7 @@
 # `Multi30k dataset from torchtext library <https://pytorch.org/text/stable/datasets.html#multi30k>`__
 # that yields a pair of source-target raw sentences. 
 #
+# To access torchtext datasets, please install torchdata following instructions at https://github.com/pytorch/data. 
 #
 
 from torchtext.data.utils import get_tokenizer

diff --git a/intermediate_source/pipeline_tutorial.py b/intermediate_source/pipeline_tutorial.py
@@ -122,8 +122,10 @@ def forward(self, x):
 
 
 ######################################################################
-# The training process uses Wikitext-2 dataset from ``torchtext``. The
-# vocab object is built based on the train dataset and is used to numericalize
+# The training process uses Wikitext-2 dataset from ``torchtext``. 
+# To access torchtext datasets, please install torchdata following instructions at https://github.com/pytorch/data.
+#
+# The vocab object is built based on the train dataset and is used to numericalize
 # tokens into tensors. Starting from sequential data, the ``batchify()``
 # function arranges the dataset into columns, trimming off any tokens remaining
 # after the data has been divided into batches of size ``batch_size``.