Merge branch 'main' into main

carljparker · web-flow · commit 2151259842b0 · 2023-12-06T13:09:41.000-08:00
diff --git a/_static/img/distributed/fsdp_sharding.png b/_static/img/distributed/fsdp_sharding.png
diff --git a/advanced_source/neural_style_tutorial.py b/advanced_source/neural_style_tutorial.py
@@ -56,7 +56,7 @@
 import matplotlib.pyplot as plt
 
 import torchvision.transforms as transforms
-import torchvision.models as models
+from torchvision.models import vgg19, VGG19_Weights
 
 import copy
 
@@ -262,7 +262,7 @@ def forward(self, input):
 # network to evaluation mode using ``.eval()``.
 # 
 
-cnn = models.vgg19(pretrained=True).features.eval()
+cnn = vgg19(weights=VGG19_Weights.DEFAULT).features.eval()
 
 
 
diff --git a/docathon-leaderboard.md b/docathon-leaderboard.md
@@ -1,3 +1,46 @@
+# 🎉 Docathon H2 2023 Leaderboard 🎉
+
+This is the list of the docathon contributors that have participated and contributed to the H2 2023 PyTorch docathon. 
+A big shout out to everyone who have participated! We have awarded points for each merged PR. 
+For the **easy** label, we have awarded 2 points. For the **medium** label, we have awarded 5 points. 
+For the **advanced** label, we have awarded 10 points. In some cases, we have awarded half credit for the PRs that 
+were not merged or issues that have been closed without a merged PR. Thank you all for your awesome contributions! 🎉
+
+| Author | Points | PR |
+|--- | --- | ---|
+| ahoblitz | 25 | https://github.com/pytorch/pytorch/pull/112992, https://github.com/pytorch/tutorials/pull/2662, https://github.com/pytorch/tutorials/pull/2647, https://github.com/pytorch/tutorials/pull/2642, https://github.com/pytorch/tutorials/pull/2640, https://github.com/pytorch/pytorch/pull/113092, https://github.com/pytorch/pytorch/pull/113348 | 
+| ChanBong | 22 | https://github.com/pytorch/pytorch/pull/113337, https://github.com/pytorch/pytorch/pull/113336, https://github.com/pytorch/pytorch/pull/113335, https://github.com/pytorch/tutorials/pull/2644, https://github.com/pytorch/tutorials/pull/2639 | 
+| alperenunlu | 22 | https://github.com/pytorch/pytorch/pull/113260, https://github.com/pytorch/tutorials/pull/2673, https://github.com/pytorch/tutorials/pull/2660, https://github.com/pytorch/tutorials/pull/2656, https://github.com/pytorch/tutorials/pull/2649, https://github.com/pytorch/pytorch/pull/113505, https://github.com/pytorch/pytorch/pull/113218, https://github.com/pytorch/pytorch/pull/113505 | 
+| spzala | 22 | https://github.com/pytorch/pytorch/pull/113200, https://github.com/pytorch/pytorch/pull/112693, https://github.com/pytorch/tutorials/pull/2667, https://github.com/pytorch/tutorials/pull/2635 | 
+| bjhargrave | 21 | https://github.com/pytorch/pytorch/pull/113358, https://github.com/pytorch/pytorch/pull/113206, https://github.com/pytorch/pytorch/pull/112786, https://github.com/pytorch/tutorials/pull/2661, https://github.com/pytorch/tutorials/pull/1272 | 
+| zabboud | 21 | https://github.com/pytorch/pytorch/pull/113233, https://github.com/pytorch/pytorch/pull/113227, https://github.com/pytorch/pytorch/pull/113177, https://github.com/pytorch/pytorch/pull/113219, https://github.com/pytorch/pytorch/pull/113311 | 
+| nvs-abhilash | 20 | https://github.com/pytorch/pytorch/pull/113241, https://github.com/pytorch/pytorch/pull/112765, https://github.com/pytorch/pytorch/pull/112695, https://github.com/pytorch/pytorch/pull/112657 | 
+| guptaaryan16 | 19 | https://github.com/pytorch/pytorch/pull/112817, https://github.com/pytorch/pytorch/pull/112735, https://github.com/pytorch/tutorials/pull/2674, https://github.com/pytorch/pytorch/pull/113196, https://github.com/pytorch/pytorch/pull/113532 | 
+| min-jean-cho | 17 | https://github.com/pytorch/pytorch/pull/113195, https://github.com/pytorch/pytorch/pull/113183, https://github.com/pytorch/pytorch/pull/113178, https://github.com/pytorch/pytorch/pull/113109, https://github.com/pytorch/pytorch/pull/112892 | 
+| markstur | 14 | https://github.com/pytorch/pytorch/pull/113250, https://github.com/pytorch/tutorials/pull/2643, https://github.com/pytorch/tutorials/pull/2638, https://github.com/pytorch/tutorials/pull/2636 | 
+| RustyGrackle | 13 | https://github.com/pytorch/pytorch/pull/113371, https://github.com/pytorch/pytorch/pull/113266, https://github.com/pytorch/pytorch/pull/113435 | 
+| Viditagarwal7479 | 12 | https://github.com/pytorch/pytorch/pull/112860, https://github.com/pytorch/tutorials/pull/2659, https://github.com/pytorch/tutorials/pull/2671 | 
+| kiszk | 10 | https://github.com/pytorch/pytorch/pull/113523, https://github.com/pytorch/pytorch/pull/112751 | 
+| awaelchli | 10 | https://github.com/pytorch/pytorch/pull/113216, https://github.com/pytorch/pytorch/pull/112674 | 
+| pilot-j | 10 | https://github.com/pytorch/pytorch/pull/112964, https://github.com/pytorch/pytorch/pull/112856 | 
+| krishnakalyan3 | 7 | https://github.com/pytorch/tutorials/pull/2653, https://github.com/pytorch/tutorials/pull/1235, https://github.com/pytorch/tutorials/pull/1705 | 
+| ash-01xor | 5 | https://github.com/pytorch/pytorch/pull/113511 | 
+| IvanLauLinTiong | 5 | https://github.com/pytorch/pytorch/pull/113052 | 
+| Senthi1Kumar | 5 | https://github.com/pytorch/pytorch/pull/113021 | 
+| ooooo-create | 5 | https://github.com/pytorch/pytorch/pull/112953 | 
+| stanleyedward | 5 | https://github.com/pytorch/pytorch/pull/112864, https://github.com/pytorch/pytorch/pull/112617 | 
+| leslie-fang-intel | 5 | https://github.com/pytorch/tutorials/pull/2668 | 
+| measty | 5 | https://github.com/pytorch/tutorials/pull/2675 | 
+| Hhhhhhao | 5 | https://github.com/pytorch/tutorials/pull/2676 | 
+| andrewashere | 3 | https://github.com/pytorch/pytorch/pull/112721 | 
+| aalhendi | 3 | https://github.com/pytorch/pytorch/pull/112947 | 
+| sitamgithub-MSIT | 3 | https://github.com/pytorch/pytorch/pull/113264 | 
+| Jarlaze | 3 | https://github.com/pytorch/pytorch/pull/113531 | 
+| jingxu10 | 2 | https://github.com/pytorch/tutorials/pull/2657 | 
+| cirquit | 2 | https://github.com/pytorch/tutorials/pull/2529 | 
+| prithviraj-maurya | 1 | https://github.com/pytorch/tutorials/pull/2652 | 
+| MirMustafaAli | 1 | https://github.com/pytorch/tutorials/pull/2645 | 
+
 # 🎉 Docathon H1 2023 Leaderboard 🎉
 This is the list of the docathon contributors that have participated and contributed to the PyTorch docathon. 
 A big shout out to everyone who have participated! We have awarded points for each merged PR. 
diff --git a/index.rst b/index.rst
@@ -293,7 +293,7 @@ What's new in PyTorch tutorials?
    :header: Introduction to ONNX Registry
    :card_description: Demonstrate end-to-end how to address unsupported operators by using ONNX Registry.
    :image: _static/img/thumbnails/cropped/Exporting-PyTorch-Models-to-ONNX-Graphs.png
-   :link: advanced/onnx_registry_tutorial.html 
+   :link: advanced/onnx_registry_tutorial.html
    :tags: Production,ONNX,Backends
 
 .. Reinforcement Learning
@@ -1050,6 +1050,7 @@ Additional Resources
    intermediate/scaled_dot_product_attention_tutorial
    beginner/knowledge_distillation_tutorial
 
+
 .. toctree::
    :maxdepth: 2
    :includehidden:
diff --git a/intermediate_source/FSDP_tutorial.rst b/intermediate_source/FSDP_tutorial.rst
@@ -46,6 +46,15 @@ At a high level FSDP works as follow:
 * Run reduce_scatter to sync gradients
 * Discard parameters. 
 
+One way to view FSDP's sharding is to decompose the DDP gradient all-reduce into reduce-scatter and all-gather. Specifically, during the backward pass, FSDP reduces and scatters gradients, ensuring that each rank possesses a shard of the gradients. Then it updates the corresponding shard of the parameters in the optimizer step. Finally, in the subsequent forward pass, it performs an all-gather operation to collect and combine the updated parameter shards.
+
+.. figure:: /_static/img/distributed/fsdp_sharding.png
+   :width: 100%
+   :align: center
+   :alt: FSDP allreduce
+
+   FSDP Allreduce
+
 How to use FSDP
 --------------
 Here we use a toy model to run training on the MNIST dataset for demonstration purposes. The APIs and logic can be applied to training larger models as well. 
diff --git a/intermediate_source/reinforcement_q_learning.py b/intermediate_source/reinforcement_q_learning.py
@@ -7,7 +7,7 @@
 
 
 This tutorial shows how to use PyTorch to train a Deep Q Learning (DQN) agent
-on the CartPole-v1 task from `Gymnasium <https://www.gymnasium.farama.org>`__.
+on the CartPole-v1 task from `Gymnasium <https://gymnasium.farama.org>`__.
 
 **Task**
 
@@ -283,7 +283,7 @@ def select_action(state):
             # t.max(1) will return the largest column value of each row.
             # second column on max result is index of where max element was
             # found, so we pick action with the larger expected reward.
-            return policy_net(state).max(1)[1].view(1, 1)
+            return policy_net(state).max(1).indices.view(1, 1)
     else:
         return torch.tensor([[env.action_space.sample()]], device=device, dtype=torch.long)
 
@@ -360,12 +360,12 @@ def optimize_model():
 
     # Compute V(s_{t+1}) for all next states.
     # Expected values of actions for non_final_next_states are computed based
-    # on the "older" target_net; selecting their best reward with max(1)[0].
+    # on the "older" target_net; selecting their best reward with max(1).values
     # This is merged based on the mask, such that we'll have either the expected
     # state value or 0 in case the state was final.
     next_state_values = torch.zeros(BATCH_SIZE, device=device)
     with torch.no_grad():
-        next_state_values[non_final_mask] = target_net(non_final_next_states).max(1)[0]
+        next_state_values[non_final_mask] = target_net(non_final_next_states).max(1).values
     # Compute the expected Q values
     expected_state_action_values = (next_state_values * GAMMA) + reward_batch
 
diff --git a/prototype_source/pt2e_quant_ptq_x86_inductor.rst b/prototype_source/pt2e_quant_ptq_x86_inductor.rst
@@ -165,11 +165,37 @@ After we get the quantized model, we will further lower it to the inductor backe
 
 ::
 
-    optimized_model = torch.compile(converted_model)
+    with torch.no_grad():
+        optimized_model = torch.compile(converted_model)
+
+        # Running some benchmark
+        optimized_model(*example_inputs)
 
-    # Running some benchmark
-    optimized_model(*example_inputs)
+In a more advanced scenario, int8-mixed-bf16 quantization comes into play. In this instance,
+a Convolution or GEMM operator produces BFloat16 output data type instead of Float32 in the absence
+of a subsequent quantization node. Subsequently, the BFloat16 tensor seamlessly propagates through
+subsequent pointwise operators, effectively minimizing memory usage and potentially enhancing performance.
+The utilization of this feature mirrors that of regular BFloat16 Autocast, as simple as wrapping the
+script within the BFloat16 Autocast context.
+
+::
 
+    with torch.autocast(device_type="cpu", dtype=torch.bfloat16, enabled=True), torch.no_grad():
+        # Turn on Autocast to use int8-mixed-bf16 quantization. After lowering into Inductor CPP Backend,
+        # For operators such as QConvolution and QLinear:
+        # * The input data type is consistently defined as int8, attributable to the presence of a pair
+            of quantization and dequantization nodes inserted at the input.
+        # * The computation precision remains at int8.
+        # * The output data type may vary, being either int8 or BFloat16, contingent on the presence
+        #   of a pair of quantization and dequantization nodes at the output.
+        # For non-quantizable pointwise operators, the data type will be inherited from the previous node,
+        # potentially resulting in a data type of BFloat16 in this scenario.
+        # For quantizable pointwise operators such as QMaxpool2D, it continues to operate with the int8
+        # data type for both input and output.
+        optimized_model = torch.compile(converted_model)
+
+        # Running some benchmark
+        optimized_model(*example_inputs)
 
 Put all these codes together, we will have the toy example code.
 Please note that since the Inductor ``freeze`` feature does not turn on by default yet, run your example code with ``TORCHINDUCTOR_FREEZING=1``.
diff --git a/recipes_source/recipes_index.rst b/recipes_source/recipes_index.rst
@@ -324,6 +324,15 @@ Recipes are bite-sized, actionable examples of how to use specific PyTorch featu
    :link: ../recipes/DCP_tutorial.html
    :tags: Distributed-Training
 
+.. TorchServe
+
+.. customcarditem::
+   :header: Deploying a PyTorch Stable Diffusion model as a Vertex AI Endpoint
+   :card_description: Learn how to deploy model in Vertex AI with TorchServe
+   :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png
+   :link: ../recipes/torchserve_vertexai_tutorial.html
+   :tags: Production
+
 .. End of tutorial card section
 
 .. raw:: html
diff --git a/recipes_source/torchserve_vertexai_tutorial.rst b/recipes_source/torchserve_vertexai_tutorial.rst
@@ -0,0 +1,144 @@
+Deploying a PyTorch Stable Diffusion model as a Vertex AI Endpoint
+==================================================================
+
+Deploying large models, like Stable Diffusion, can be challenging and time-consuming.
+
+In this recipe, we will show how you can streamline the deployment of a PyTorch Stable Diffusion
+model by leveraging Vertex AI.
+
+PyTorch is the framework used by Stability AI on Stable
+Diffusion v1.5.  Vertex AI is a fully-managed machine learning platform with tools and
+infrastructure designed to help ML practitioners accelerate and scale ML in production with
+the benefit of open-source frameworks like PyTorch.
+
+In four steps you can deploy a PyTorch Stable Diffusion model (v1.5).
+
+Deploying your Stable Diffusion model on a Vertex AI Endpoint can be done in four steps:
+
+* Create a custom TorchServe handler.
+
+* Upload model artifacts to Google Cloud Storage (GCS).
+
+* Create a Vertex AI model with the model artifacts and a prebuilt PyTorch container image.
+
+* Deploy the Vertex AI model onto an endpoint.
+
+Let’s have a look at each step in more detail. You can follow and implement the steps using the
+`Notebook example <https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/vertex_endpoints/torchserve/dreambooth_stablediffusion.ipynb>`__.
+
+NOTE: Please keep in mind that this recipe requires a billable Vertex AI as explained in more details in the notebook example.
+
+Create a custom TorchServe handler
+----------------------------------
+
+TorchServe is an easy and flexible tool for serving PyTorch models. The model deployed to Vertex AI
+uses TorchServe to handle requests and return responses from the model.
+You must create a custom TorchServe handler to include in the model artifacts uploaded to Vertex AI. Include the handler file in the
+directory with the other model artifacts, like this: `model_artifacts/handler.py`.
+
+After creating the handler file, you must package the handler as a model archiver (MAR) file.
+The output file must be named `model.mar`.
+
+
+.. code:: shell
+
+    !torch-model-archiver \
+    -f \
+    --model-name <your_model_name> \
+    --version 1.0 \
+     --handler model_artifacts/handler.py \
+    --export-path model_artifacts
+
+Upload model artifacts to Google Cloud Storage (GCS)
+----------------------------------------------------
+
+In this step we are uploading
+`model artifacts <https://github.com/pytorch/serve/tree/master/model-archiver#artifact-details>`__
+to GCS, like the model file or handler. The advantage of storing your artifacts on GCS is that you can
+track the artifacts in a central bucket.
+
+
+.. code:: shell
+
+    BUCKET_NAME = "your-bucket-name-unique"  # @param {type:"string"}
+    BUCKET_URI = f"gs://{BUCKET_NAME}/"
+
+    # Will copy the artifacts into the bucket
+    !gsutil cp -r model_artifacts $BUCKET_URI
+
+Create a Vertex AI model with the model artifacts and a prebuilt PyTorch container image
+----------------------------------------------------------------------------------------
+
+Once you've uploaded the model artifacts into a GCS bucket, you can upload your PyTorch model to
+`Vertex AI Model Registry <https://cloud.google.com/vertex-ai/docs/model-registry/introduction>`__.
+From the Vertex AI Model Registry, you have an overview of your models
+so you can better organize, track, and train new versions. For this you can use the
+`Vertex AI SDK <https://cloud.google.com/vertex-ai/docs/python-sdk/use-vertex-ai-python-sdk>`__
+and this
+`pre-built PyTorch container <https://cloud.google.com/blog/products/ai-machine-learning/prebuilt-containers-with-pytorch-and-vertex-ai>`__.
+
+
+.. code:: shell
+
+    from google.cloud import aiplatform as vertexai
+    PYTORCH_PREDICTION_IMAGE_URI = (
+        "us-docker.pkg.dev/vertex-ai/prediction/pytorch-gpu.1-12:latest"
+    )
+    MODEL_DISPLAY_NAME = "stable_diffusion_1_5-unique"
+    MODEL_DESCRIPTION = "stable_diffusion_1_5 container"
+
+    vertexai.init(project='your_project', location='us-central1', staging_bucket=BUCKET_NAME)
+
+    model = aiplatform.Model.upload(
+        display_name=MODEL_DISPLAY_NAME,
+        description=MODEL_DESCRIPTION,
+        serving_container_image_uri=PYTORCH_PREDICTION_IMAGE_URI,
+        artifact_uri=BUCKET_URI,
+    )
+
+Deploy the Vertex AI model onto an endpoint
+-------------------------------------------
+
+Once the model has been uploaded to Vertex AI Model Registry you can then take it and deploy
+it to an Vertex AI Endpoint. For this you can use the Console or the Vertex AI SDK. In this
+example you will deploy the model on a NVIDIA Tesla P100 GPU and n1-standard-8 machine. You can
+specify your machine type.
+
+
+.. code:: shell
+
+    endpoint = aiplatform.Endpoint.create(display_name=ENDPOINT_DISPLAY_NAME)
+
+    model.deploy(
+        endpoint=endpoint,
+        deployed_model_display_name=MODEL_DISPLAY_NAME,
+        machine_type="n1-standard-8",
+        accelerator_type="NVIDIA_TESLA_P100",
+        accelerator_count=1,
+        traffic_percentage=100,
+        deploy_request_timeout=1200,
+        sync=True,
+    )
+
+If you follow this
+`notebook <https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/vertex_endpoints/torchserve/dreambooth_stablediffusion.ipynb>`__
+you can also get online predictions using the Vertex AI SDK as shown in the following snippet.
+
+
+.. code:: shell
+
+    instances = [{"prompt": "An examplePup dog with a baseball jersey."}]
+    response = endpoint.predict(instances=instances)
+
+    with open("img.jpg", "wb") as g:
+        g.write(base64.b64decode(response.predictions[0]))
+
+    display.Image("img.jpg")
+
+Create a Vertex AI model with the model artifacts and a prebuilt PyTorch container image
+
+More resources
+--------------
+
+This tutorial was created using the vendor documentation. To refer to the original documentation on the vendor site, please see
+`torchserve example <https://cloud.google.com/blog/products/ai-machine-learning/get-your-genai-model-going-in-four-easy-steps>`__.