diff --git a/docs/source/models.rst b/docs/source/models.rst
index c9156e13b68..3aba993a2c0 100644
--- a/docs/source/models.rst
+++ b/docs/source/models.rst
@@ -64,6 +64,7 @@ These can be constructed by passing ``pretrained=True``:
     shufflenet = models.shufflenet_v2_x1_0(pretrained=True)
     mobilenet_v2 = models.mobilenet_v2(pretrained=True)
     mobilenet_v3_large = models.mobilenet_v3_large(pretrained=True)
+    mobilenet_v3_small = models.mobilenet_v3_small(pretrained=True)
     resnext50_32x4d = models.resnext50_32x4d(pretrained=True)
     wide_resnet50_2 = models.wide_resnet50_2(pretrained=True)
     mnasnet = models.mnasnet1_0(pretrained=True)
@@ -143,6 +144,7 @@ ShuffleNet V2 x1.0                69.362          88.316
 ShuffleNet V2 x0.5                60.552          81.746
 MobileNet V2                      71.878          90.286
 MobileNet V3 Large                74.042          91.340
+MobileNet V3 Small                67.668          87.402
 ResNeXt-50-32x4d                  77.618          93.698
 ResNeXt-101-32x8d                 79.312          94.526
 Wide ResNet-50-2                  78.468          94.086
diff --git a/references/classification/README.md b/references/classification/README.md
index 1694b25c7a8..7a3144b7cac 100644
--- a/references/classification/README.md
+++ b/references/classification/README.md
@@ -54,14 +54,18 @@ python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
 ```
 
 
-### MobileNetV3 Large
+### MobileNetV3 Large & Small
 ```
 python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
-     --model mobilenet_v3_large --epochs 600 --opt rmsprop --batch-size 128 --lr 0.064\ 
+     --model $MODEL --epochs 600 --opt rmsprop --batch-size 128 --lr 0.064\ 
      --wd 0.00001 --lr-step-size 2 --lr-gamma 0.973 --auto-augment imagenet --random-erase 0.2
 ```
 
-Then we averaged the parameters of the last 3 checkpoints that improved the Acc@1. See [#3182](https://github.com/pytorch/vision/pull/3182) for details.
+Here `$MODEL` is one of `mobilenet_v3_large` or `mobilenet_v3_small`.
+
+Then we averaged the parameters of the last 3 checkpoints that improved the Acc@1. See [#3182](https://github.com/pytorch/vision/pull/3182) 
+and [#3354](https://github.com/pytorch/vision/pull/3354) for details.
+
 
 ## Mixed precision training
 Automatic Mixed Precision (AMP) training on GPU for Pytorch can be enabled with the [NVIDIA Apex extension](https://github.com/NVIDIA/apex).
diff --git a/torchvision/models/mobilenetv3.py b/torchvision/models/mobilenetv3.py
index f6117dcb989..1e2606daa42 100644
--- a/torchvision/models/mobilenetv3.py
+++ b/torchvision/models/mobilenetv3.py
@@ -14,7 +14,7 @@
 
 model_urls = {
     "mobilenet_v3_large": "https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth",
-    "mobilenet_v3_small": None,
+    "mobilenet_v3_small": "https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth",
 }