diff --git a/.circleci/.gitignore b/.circleci/.gitignore deleted file mode 100644 index 485dee64bcf..00000000000 --- a/.circleci/.gitignore +++ /dev/null @@ -1 +0,0 @@ -.idea diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index 7e754846023..00000000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,1239 +0,0 @@ -version: 2.1 - -# How to test the Linux jobs: -# - Install CircleCI local CLI: https://circleci.com/docs/2.0/local-cli/ -# - circleci config process .circleci/config.yml > gen.yml && circleci local execute -c gen.yml --job binary_linux_wheel_py3.7 -# - Replace binary_linux_wheel_py3.7 with the name of the job you want to test. -# Job names are 'name:' key. - -orbs: - win: circleci/windows@2.0.0 - -executors: - windows-gpu-prototype: - machine: - resource_class: windows.gpu.small.prototype - image: windows-server-2019-nvidia:201908-28 - shell: bash.exe - -commands: - checkout_merge: - description: "checkout merge branch" - steps: - - checkout -# - run: -# name: Checkout merge branch -# command: | -# set -ex -# BRANCH=$(git rev-parse --abbrev-ref HEAD) -# if [[ "$BRANCH" != "master" ]]; then -# git fetch --force origin ${CIRCLE_BRANCH}/merge:merged/${CIRCLE_BRANCH} -# git checkout "merged/$CIRCLE_BRANCH" -# fi - -binary_common: &binary_common - parameters: - # Edit these defaults to do a release` - build_version: - description: "version number of release binary; by default, build a nightly" - type: string - default: "" - pytorch_version: - description: "PyTorch version to build against; by default, use a nightly" - type: string - default: "" - # Don't edit these - python_version: - description: "Python version to build against (e.g., 3.7)" - type: string - cu_version: - description: "CUDA version to build against, in CU format (e.g., cpu or cu100)" - type: string - unicode_abi: - description: "Python 2.7 wheel only: whether or not we are cp27mu (default: no)" - type: string - default: "" - wheel_docker_image: - description: "Wheel only: what docker image to use" - type: string - default: "soumith/manylinux-cuda101" - environment: - PYTHON_VERSION: << parameters.python_version >> - BUILD_VERSION: << parameters.build_version >> - PYTORCH_VERSION: << parameters.pytorch_version >> - UNICODE_ABI: << parameters.unicode_abi >> - CU_VERSION: << parameters.cu_version >> - -jobs: - circleci_consistency: - docker: - - image: circleci/python:3.7 - steps: - - checkout - - run: - command: | - pip install --user --progress-bar off jinja2 pyyaml - python .circleci/regenerate.py - git diff --exit-code || (echo ".circleci/config.yml not in sync with config.yml.in! Run .circleci/regenerate.py to update config"; exit 1) - - binary_linux_wheel: - <<: *binary_common - docker: - - image: << parameters.wheel_docker_image >> - resource_class: 2xlarge+ - steps: - - checkout_merge - - run: packaging/build_wheel.sh - - store_artifacts: - path: dist - - persist_to_workspace: - root: dist - paths: - - "*" - - binary_linux_conda: - <<: *binary_common - docker: - - image: "soumith/conda-cuda" - resource_class: 2xlarge+ - steps: - - checkout_merge - - run: packaging/build_conda.sh - - store_artifacts: - path: /opt/conda/conda-bld/linux-64 - - persist_to_workspace: - root: /opt/conda/conda-bld/linux-64 - paths: - - "*" - - binary_linux_conda_cuda: - <<: *binary_common - machine: - image: ubuntu-1604:201903-01 - resource_class: gpu.medium - steps: - - checkout_merge - - run: - name: Setup environment - command: | - set -e - - curl -L https://packagecloud.io/circleci/trusty/gpgkey | sudo apt-key add - - curl -L https://dl.google.com/linux/linux_signing_key.pub | sudo apt-key add - - - sudo apt-get update - - sudo apt-get install \ - apt-transport-https \ - ca-certificates \ - curl \ - gnupg-agent \ - software-properties-common - - curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - - - sudo add-apt-repository \ - "deb [arch=amd64] https://download.docker.com/linux/ubuntu \ - $(lsb_release -cs) \ - stable" - - sudo apt-get update - export DOCKER_VERSION="5:19.03.2~3-0~ubuntu-xenial" - sudo apt-get install docker-ce=${DOCKER_VERSION} docker-ce-cli=${DOCKER_VERSION} containerd.io=1.2.6-3 - - # Add the package repositories - distribution=$(. /etc/os-release;echo $ID$VERSION_ID) - curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - - curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list - - export NVIDIA_CONTAINER_VERSION="1.0.3-1" - sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit=${NVIDIA_CONTAINER_VERSION} - sudo systemctl restart docker - - DRIVER_FN="NVIDIA-Linux-x86_64-410.104.run" - wget "https://s3.amazonaws.com/ossci-linux/nvidia_driver/$DRIVER_FN" - sudo /bin/bash "$DRIVER_FN" -s --no-drm || (sudo cat /var/log/nvidia-installer.log && false) - nvidia-smi - - - run: - name: Pull docker image - command: | - set -e - export DOCKER_IMAGE=soumith/conda-cuda - echo Pulling docker image $DOCKER_IMAGE - docker pull $DOCKER_IMAGE >/dev/null - - - run: - name: Build and run tests - command: | - set -e - - cd ${HOME}/project/ - - export DOCKER_IMAGE=soumith/conda-cuda - export VARS_TO_PASS="-e PYTHON_VERSION -e BUILD_VERSION -e PYTORCH_VERSION -e UNICODE_ABI -e CU_VERSION" - - docker run --gpus all --ipc=host -v $(pwd):/remote -w /remote ${VARS_TO_PASS} ${DOCKER_IMAGE} ./packaging/build_conda.sh - - binary_win_conda: - <<: *binary_common - executor: - name: win/default - shell: bash.exe - steps: - - checkout_merge - - run: - command: | - choco install miniconda3 - (& "C:\tools\miniconda3\Scripts\conda.exe" "shell.powershell" "hook") | Out-String | Invoke-Expression - conda activate base - conda install -yq conda-build "conda-package-handling!=1.5.0" - bash packaging/build_conda.sh - shell: powershell.exe - - binary_win_conda_cuda: - <<: *binary_common - executor: windows-gpu-prototype - steps: - - checkout_merge - - run: - command: | - choco install miniconda3 - (& "C:\tools\miniconda3\Scripts\conda.exe" "shell.powershell" "hook") | Out-String | Invoke-Expression - conda activate base - conda install -yq conda-build "conda-package-handling!=1.5.0" - bash packaging/build_conda.sh - shell: powershell.exe - - binary_macos_wheel: - <<: *binary_common - macos: - xcode: "9.0" - steps: - - checkout_merge - - run: - # Cannot easily deduplicate this as source'ing activate - # will set environment variables which we need to propagate - # to build_wheel.sh - command: | - curl -o conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh - sh conda.sh -b - source $HOME/miniconda3/bin/activate - packaging/build_wheel.sh - - store_artifacts: - path: dist - - persist_to_workspace: - root: dist - paths: - - "*" - - binary_macos_conda: - <<: *binary_common - macos: - xcode: "9.0" - steps: - - checkout_merge - - run: - command: | - curl -o conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh - sh conda.sh -b - source $HOME/miniconda3/bin/activate - conda install -yq conda-build - packaging/build_conda.sh - - store_artifacts: - path: /Users/distiller/miniconda3/conda-bld/osx-64 - - persist_to_workspace: - root: /Users/distiller/miniconda3/conda-bld/osx-64 - paths: - - "*" - - # Requires org-member context - binary_conda_upload: - docker: - - image: continuumio/miniconda - steps: - - attach_workspace: - at: ~/workspace - - run: - command: | - # Prevent credential from leaking - conda install -yq anaconda-client - set +x - anaconda login \ - --username "$PYTORCH_BINARY_PJH5_CONDA_USERNAME" \ - --password "$PYTORCH_BINARY_PJH5_CONDA_PASSWORD" - set -x - anaconda upload ~/workspace/*.tar.bz2 -u pytorch-nightly --label main --no-progress --force - - # Requires org-member context - binary_wheel_upload: - parameters: - subfolder: - description: "What whl subfolder to upload to, e.g., blank or cu100/ (trailing slash is important)" - type: string - docker: - - image: circleci/python:3.7 - steps: - - attach_workspace: - at: ~/workspace - - checkout - - run: - command: | - pip install --user awscli - export PATH="$HOME/.local/bin:$PATH" - # Prevent credential from leaking - set +x - export AWS_ACCESS_KEY_ID="${PYTORCH_BINARY_AWS_ACCESS_KEY_ID}" - export AWS_SECRET_ACCESS_KEY="${PYTORCH_BINARY_AWS_SECRET_ACCESS_KEY}" - set -x - for pkg in ~/workspace/*.whl; do - aws s3 cp "$pkg" "s3://pytorch/whl/nightly/<< parameters.subfolder >>" --acl public-read - done - - -workflows: - build: - jobs: - - circleci_consistency - - binary_linux_wheel: - cu_version: cpu - name: binary_linux_wheel_py2.7_cpu - python_version: '2.7' - - binary_linux_wheel: - cu_version: cpu - name: binary_linux_wheel_py2.7u_cpu - python_version: '2.7' - unicode_abi: '1' - - binary_linux_wheel: - cu_version: cu92 - name: binary_linux_wheel_py2.7_cu92 - python_version: '2.7' - wheel_docker_image: soumith/manylinux-cuda92 - - binary_linux_wheel: - cu_version: cu92 - name: binary_linux_wheel_py2.7u_cu92 - python_version: '2.7' - unicode_abi: '1' - wheel_docker_image: soumith/manylinux-cuda92 - - binary_linux_wheel: - cu_version: cu100 - name: binary_linux_wheel_py2.7_cu100 - python_version: '2.7' - wheel_docker_image: soumith/manylinux-cuda100 - - binary_linux_wheel: - cu_version: cu100 - name: binary_linux_wheel_py2.7u_cu100 - python_version: '2.7' - unicode_abi: '1' - wheel_docker_image: soumith/manylinux-cuda100 - - binary_linux_wheel: - cu_version: cu101 - name: binary_linux_wheel_py2.7_cu101 - python_version: '2.7' - - binary_linux_wheel: - cu_version: cu101 - name: binary_linux_wheel_py2.7u_cu101 - python_version: '2.7' - unicode_abi: '1' - - binary_linux_wheel: - cu_version: cpu - name: binary_linux_wheel_py3.5_cpu - python_version: '3.5' - - binary_linux_wheel: - cu_version: cu92 - name: binary_linux_wheel_py3.5_cu92 - python_version: '3.5' - wheel_docker_image: soumith/manylinux-cuda92 - - binary_linux_wheel: - cu_version: cu100 - name: binary_linux_wheel_py3.5_cu100 - python_version: '3.5' - wheel_docker_image: soumith/manylinux-cuda100 - - binary_linux_wheel: - cu_version: cu101 - name: binary_linux_wheel_py3.5_cu101 - python_version: '3.5' - - binary_linux_wheel: - cu_version: cpu - name: binary_linux_wheel_py3.6_cpu - python_version: '3.6' - - binary_linux_wheel: - cu_version: cu92 - name: binary_linux_wheel_py3.6_cu92 - python_version: '3.6' - wheel_docker_image: soumith/manylinux-cuda92 - - binary_linux_wheel: - cu_version: cu100 - name: binary_linux_wheel_py3.6_cu100 - python_version: '3.6' - wheel_docker_image: soumith/manylinux-cuda100 - - binary_linux_wheel: - cu_version: cu101 - name: binary_linux_wheel_py3.6_cu101 - python_version: '3.6' - - binary_linux_wheel: - cu_version: cpu - name: binary_linux_wheel_py3.7_cpu - python_version: '3.7' - - binary_linux_wheel: - cu_version: cu92 - name: binary_linux_wheel_py3.7_cu92 - python_version: '3.7' - wheel_docker_image: soumith/manylinux-cuda92 - - binary_linux_wheel: - cu_version: cu100 - name: binary_linux_wheel_py3.7_cu100 - python_version: '3.7' - wheel_docker_image: soumith/manylinux-cuda100 - - binary_linux_wheel: - cu_version: cu101 - name: binary_linux_wheel_py3.7_cu101 - python_version: '3.7' - - binary_macos_wheel: - cu_version: cpu - name: binary_macos_wheel_py2.7_cpu - python_version: '2.7' - - binary_macos_wheel: - cu_version: cpu - name: binary_macos_wheel_py2.7u_cpu - python_version: '2.7' - unicode_abi: '1' - - binary_macos_wheel: - cu_version: cpu - name: binary_macos_wheel_py3.5_cpu - python_version: '3.5' - - binary_macos_wheel: - cu_version: cpu - name: binary_macos_wheel_py3.6_cpu - python_version: '3.6' - - binary_macos_wheel: - cu_version: cpu - name: binary_macos_wheel_py3.7_cpu - python_version: '3.7' - - binary_linux_conda: - cu_version: cpu - name: binary_linux_conda_py2.7_cpu - python_version: '2.7' - - binary_linux_conda: - cu_version: cu92 - name: binary_linux_conda_py2.7_cu92 - python_version: '2.7' - wheel_docker_image: soumith/manylinux-cuda92 - - binary_linux_conda: - cu_version: cu100 - name: binary_linux_conda_py2.7_cu100 - python_version: '2.7' - wheel_docker_image: soumith/manylinux-cuda100 - - binary_linux_conda: - cu_version: cu101 - name: binary_linux_conda_py2.7_cu101 - python_version: '2.7' - - binary_linux_conda: - cu_version: cpu - name: binary_linux_conda_py3.5_cpu - python_version: '3.5' - - binary_linux_conda: - cu_version: cu92 - name: binary_linux_conda_py3.5_cu92 - python_version: '3.5' - wheel_docker_image: soumith/manylinux-cuda92 - - binary_linux_conda: - cu_version: cu100 - name: binary_linux_conda_py3.5_cu100 - python_version: '3.5' - wheel_docker_image: soumith/manylinux-cuda100 - - binary_linux_conda: - cu_version: cu101 - name: binary_linux_conda_py3.5_cu101 - python_version: '3.5' - - binary_linux_conda: - cu_version: cpu - name: binary_linux_conda_py3.6_cpu - python_version: '3.6' - - binary_linux_conda: - cu_version: cu92 - name: binary_linux_conda_py3.6_cu92 - python_version: '3.6' - wheel_docker_image: soumith/manylinux-cuda92 - - binary_linux_conda: - cu_version: cu100 - name: binary_linux_conda_py3.6_cu100 - python_version: '3.6' - wheel_docker_image: soumith/manylinux-cuda100 - - binary_linux_conda: - cu_version: cu101 - name: binary_linux_conda_py3.6_cu101 - python_version: '3.6' - - binary_linux_conda: - cu_version: cpu - name: binary_linux_conda_py3.7_cpu - python_version: '3.7' - - binary_linux_conda: - cu_version: cu92 - name: binary_linux_conda_py3.7_cu92 - python_version: '3.7' - wheel_docker_image: soumith/manylinux-cuda92 - - binary_linux_conda: - cu_version: cu100 - name: binary_linux_conda_py3.7_cu100 - python_version: '3.7' - wheel_docker_image: soumith/manylinux-cuda100 - - binary_linux_conda: - cu_version: cu101 - name: binary_linux_conda_py3.7_cu101 - python_version: '3.7' - - binary_macos_conda: - cu_version: cpu - name: binary_macos_conda_py2.7_cpu - python_version: '2.7' - - binary_macos_conda: - cu_version: cpu - name: binary_macos_conda_py3.5_cpu - python_version: '3.5' - - binary_macos_conda: - cu_version: cpu - name: binary_macos_conda_py3.6_cpu - python_version: '3.6' - - binary_macos_conda: - cu_version: cpu - name: binary_macos_conda_py3.7_cpu - python_version: '3.7' - - binary_linux_conda_cuda: - name: torchvision_linux_py3.7_cu100 - python_version: "3.7" - cu_version: "cu100" - - binary_win_conda: - name: torchvision_win_py3.6_cpu - python_version: "3.6" - cu_version: "cpu" - - binary_win_conda_cuda: - name: torchvision_win_py3.6_cu101 - python_version: "3.6" - cu_version: "cu101" - - nightly: - jobs: - - circleci_consistency - - binary_linux_wheel: - cu_version: cpu - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py2.7_cpu - python_version: '2.7' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py2.7_cpu_upload - requires: - - nightly_binary_linux_wheel_py2.7_cpu - subfolder: cpu/ - - binary_linux_wheel: - cu_version: cpu - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py2.7u_cpu - python_version: '2.7' - unicode_abi: '1' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py2.7u_cpu_upload - requires: - - nightly_binary_linux_wheel_py2.7u_cpu - subfolder: cpu/ - - binary_linux_wheel: - cu_version: cu92 - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py2.7_cu92 - python_version: '2.7' - wheel_docker_image: soumith/manylinux-cuda92 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py2.7_cu92_upload - requires: - - nightly_binary_linux_wheel_py2.7_cu92 - subfolder: cu92/ - - binary_linux_wheel: - cu_version: cu92 - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py2.7u_cu92 - python_version: '2.7' - unicode_abi: '1' - wheel_docker_image: soumith/manylinux-cuda92 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py2.7u_cu92_upload - requires: - - nightly_binary_linux_wheel_py2.7u_cu92 - subfolder: cu92/ - - binary_linux_wheel: - cu_version: cu100 - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py2.7_cu100 - python_version: '2.7' - wheel_docker_image: soumith/manylinux-cuda100 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py2.7_cu100_upload - requires: - - nightly_binary_linux_wheel_py2.7_cu100 - subfolder: cu100/ - - binary_linux_wheel: - cu_version: cu100 - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py2.7u_cu100 - python_version: '2.7' - unicode_abi: '1' - wheel_docker_image: soumith/manylinux-cuda100 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py2.7u_cu100_upload - requires: - - nightly_binary_linux_wheel_py2.7u_cu100 - subfolder: cu100/ - - binary_linux_wheel: - cu_version: cu101 - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py2.7_cu101 - python_version: '2.7' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py2.7_cu101_upload - requires: - - nightly_binary_linux_wheel_py2.7_cu101 - subfolder: cu101/ - - binary_linux_wheel: - cu_version: cu101 - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py2.7u_cu101 - python_version: '2.7' - unicode_abi: '1' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py2.7u_cu101_upload - requires: - - nightly_binary_linux_wheel_py2.7u_cu101 - subfolder: cu101/ - - binary_linux_wheel: - cu_version: cpu - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py3.5_cpu - python_version: '3.5' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py3.5_cpu_upload - requires: - - nightly_binary_linux_wheel_py3.5_cpu - subfolder: cpu/ - - binary_linux_wheel: - cu_version: cu92 - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py3.5_cu92 - python_version: '3.5' - wheel_docker_image: soumith/manylinux-cuda92 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py3.5_cu92_upload - requires: - - nightly_binary_linux_wheel_py3.5_cu92 - subfolder: cu92/ - - binary_linux_wheel: - cu_version: cu100 - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py3.5_cu100 - python_version: '3.5' - wheel_docker_image: soumith/manylinux-cuda100 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py3.5_cu100_upload - requires: - - nightly_binary_linux_wheel_py3.5_cu100 - subfolder: cu100/ - - binary_linux_wheel: - cu_version: cu101 - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py3.5_cu101 - python_version: '3.5' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py3.5_cu101_upload - requires: - - nightly_binary_linux_wheel_py3.5_cu101 - subfolder: cu101/ - - binary_linux_wheel: - cu_version: cpu - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py3.6_cpu - python_version: '3.6' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py3.6_cpu_upload - requires: - - nightly_binary_linux_wheel_py3.6_cpu - subfolder: cpu/ - - binary_linux_wheel: - cu_version: cu92 - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py3.6_cu92 - python_version: '3.6' - wheel_docker_image: soumith/manylinux-cuda92 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py3.6_cu92_upload - requires: - - nightly_binary_linux_wheel_py3.6_cu92 - subfolder: cu92/ - - binary_linux_wheel: - cu_version: cu100 - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py3.6_cu100 - python_version: '3.6' - wheel_docker_image: soumith/manylinux-cuda100 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py3.6_cu100_upload - requires: - - nightly_binary_linux_wheel_py3.6_cu100 - subfolder: cu100/ - - binary_linux_wheel: - cu_version: cu101 - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py3.6_cu101 - python_version: '3.6' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py3.6_cu101_upload - requires: - - nightly_binary_linux_wheel_py3.6_cu101 - subfolder: cu101/ - - binary_linux_wheel: - cu_version: cpu - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py3.7_cpu - python_version: '3.7' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py3.7_cpu_upload - requires: - - nightly_binary_linux_wheel_py3.7_cpu - subfolder: cpu/ - - binary_linux_wheel: - cu_version: cu92 - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py3.7_cu92 - python_version: '3.7' - wheel_docker_image: soumith/manylinux-cuda92 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py3.7_cu92_upload - requires: - - nightly_binary_linux_wheel_py3.7_cu92 - subfolder: cu92/ - - binary_linux_wheel: - cu_version: cu100 - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py3.7_cu100 - python_version: '3.7' - wheel_docker_image: soumith/manylinux-cuda100 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py3.7_cu100_upload - requires: - - nightly_binary_linux_wheel_py3.7_cu100 - subfolder: cu100/ - - binary_linux_wheel: - cu_version: cu101 - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py3.7_cu101 - python_version: '3.7' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_wheel_py3.7_cu101_upload - requires: - - nightly_binary_linux_wheel_py3.7_cu101 - subfolder: cu101/ - - binary_macos_wheel: - cu_version: cpu - filters: - branches: - only: nightly - name: nightly_binary_macos_wheel_py2.7_cpu - python_version: '2.7' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_macos_wheel_py2.7_cpu_upload - requires: - - nightly_binary_macos_wheel_py2.7_cpu - subfolder: '' - - binary_macos_wheel: - cu_version: cpu - filters: - branches: - only: nightly - name: nightly_binary_macos_wheel_py2.7u_cpu - python_version: '2.7' - unicode_abi: '1' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_macos_wheel_py2.7u_cpu_upload - requires: - - nightly_binary_macos_wheel_py2.7u_cpu - subfolder: '' - - binary_macos_wheel: - cu_version: cpu - filters: - branches: - only: nightly - name: nightly_binary_macos_wheel_py3.5_cpu - python_version: '3.5' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_macos_wheel_py3.5_cpu_upload - requires: - - nightly_binary_macos_wheel_py3.5_cpu - subfolder: '' - - binary_macos_wheel: - cu_version: cpu - filters: - branches: - only: nightly - name: nightly_binary_macos_wheel_py3.6_cpu - python_version: '3.6' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_macos_wheel_py3.6_cpu_upload - requires: - - nightly_binary_macos_wheel_py3.6_cpu - subfolder: '' - - binary_macos_wheel: - cu_version: cpu - filters: - branches: - only: nightly - name: nightly_binary_macos_wheel_py3.7_cpu - python_version: '3.7' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_macos_wheel_py3.7_cpu_upload - requires: - - nightly_binary_macos_wheel_py3.7_cpu - subfolder: '' - - binary_linux_conda: - cu_version: cpu - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py2.7_cpu - python_version: '2.7' - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py2.7_cpu_upload - requires: - - nightly_binary_linux_conda_py2.7_cpu - - binary_linux_conda: - cu_version: cu92 - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py2.7_cu92 - python_version: '2.7' - wheel_docker_image: soumith/manylinux-cuda92 - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py2.7_cu92_upload - requires: - - nightly_binary_linux_conda_py2.7_cu92 - - binary_linux_conda: - cu_version: cu100 - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py2.7_cu100 - python_version: '2.7' - wheel_docker_image: soumith/manylinux-cuda100 - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py2.7_cu100_upload - requires: - - nightly_binary_linux_conda_py2.7_cu100 - - binary_linux_conda: - cu_version: cu101 - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py2.7_cu101 - python_version: '2.7' - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py2.7_cu101_upload - requires: - - nightly_binary_linux_conda_py2.7_cu101 - - binary_linux_conda: - cu_version: cpu - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py3.5_cpu - python_version: '3.5' - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py3.5_cpu_upload - requires: - - nightly_binary_linux_conda_py3.5_cpu - - binary_linux_conda: - cu_version: cu92 - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py3.5_cu92 - python_version: '3.5' - wheel_docker_image: soumith/manylinux-cuda92 - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py3.5_cu92_upload - requires: - - nightly_binary_linux_conda_py3.5_cu92 - - binary_linux_conda: - cu_version: cu100 - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py3.5_cu100 - python_version: '3.5' - wheel_docker_image: soumith/manylinux-cuda100 - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py3.5_cu100_upload - requires: - - nightly_binary_linux_conda_py3.5_cu100 - - binary_linux_conda: - cu_version: cu101 - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py3.5_cu101 - python_version: '3.5' - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py3.5_cu101_upload - requires: - - nightly_binary_linux_conda_py3.5_cu101 - - binary_linux_conda: - cu_version: cpu - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py3.6_cpu - python_version: '3.6' - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py3.6_cpu_upload - requires: - - nightly_binary_linux_conda_py3.6_cpu - - binary_linux_conda: - cu_version: cu92 - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py3.6_cu92 - python_version: '3.6' - wheel_docker_image: soumith/manylinux-cuda92 - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py3.6_cu92_upload - requires: - - nightly_binary_linux_conda_py3.6_cu92 - - binary_linux_conda: - cu_version: cu100 - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py3.6_cu100 - python_version: '3.6' - wheel_docker_image: soumith/manylinux-cuda100 - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py3.6_cu100_upload - requires: - - nightly_binary_linux_conda_py3.6_cu100 - - binary_linux_conda: - cu_version: cu101 - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py3.6_cu101 - python_version: '3.6' - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py3.6_cu101_upload - requires: - - nightly_binary_linux_conda_py3.6_cu101 - - binary_linux_conda: - cu_version: cpu - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py3.7_cpu - python_version: '3.7' - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py3.7_cpu_upload - requires: - - nightly_binary_linux_conda_py3.7_cpu - - binary_linux_conda: - cu_version: cu92 - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py3.7_cu92 - python_version: '3.7' - wheel_docker_image: soumith/manylinux-cuda92 - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py3.7_cu92_upload - requires: - - nightly_binary_linux_conda_py3.7_cu92 - - binary_linux_conda: - cu_version: cu100 - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py3.7_cu100 - python_version: '3.7' - wheel_docker_image: soumith/manylinux-cuda100 - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py3.7_cu100_upload - requires: - - nightly_binary_linux_conda_py3.7_cu100 - - binary_linux_conda: - cu_version: cu101 - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py3.7_cu101 - python_version: '3.7' - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_linux_conda_py3.7_cu101_upload - requires: - - nightly_binary_linux_conda_py3.7_cu101 - - binary_macos_conda: - cu_version: cpu - filters: - branches: - only: nightly - name: nightly_binary_macos_conda_py2.7_cpu - python_version: '2.7' - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_macos_conda_py2.7_cpu_upload - requires: - - nightly_binary_macos_conda_py2.7_cpu - - binary_macos_conda: - cu_version: cpu - filters: - branches: - only: nightly - name: nightly_binary_macos_conda_py3.5_cpu - python_version: '3.5' - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_macos_conda_py3.5_cpu_upload - requires: - - nightly_binary_macos_conda_py3.5_cpu - - binary_macos_conda: - cu_version: cpu - filters: - branches: - only: nightly - name: nightly_binary_macos_conda_py3.6_cpu - python_version: '3.6' - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_macos_conda_py3.6_cpu_upload - requires: - - nightly_binary_macos_conda_py3.6_cpu - - binary_macos_conda: - cu_version: cpu - filters: - branches: - only: nightly - name: nightly_binary_macos_conda_py3.7_cpu - python_version: '3.7' - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - name: nightly_binary_macos_conda_py3.7_cpu_upload - requires: - - nightly_binary_macos_conda_py3.7_cpu \ No newline at end of file diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in deleted file mode 100644 index 999904576b9..00000000000 --- a/.circleci/config.yml.in +++ /dev/null @@ -1,317 +0,0 @@ -version: 2.1 - -# How to test the Linux jobs: -# - Install CircleCI local CLI: https://circleci.com/docs/2.0/local-cli/ -# - circleci config process .circleci/config.yml > gen.yml && circleci local execute -c gen.yml --job binary_linux_wheel_py3.7 -# - Replace binary_linux_wheel_py3.7 with the name of the job you want to test. -# Job names are 'name:' key. - -orbs: - win: circleci/windows@2.0.0 - -executors: - windows-gpu-prototype: - machine: - resource_class: windows.gpu.small.prototype - image: windows-server-2019-nvidia:201908-28 - shell: bash.exe - -commands: - checkout_merge: - description: "checkout merge branch" - steps: - - checkout -# - run: -# name: Checkout merge branch -# command: | -# set -ex -# BRANCH=$(git rev-parse --abbrev-ref HEAD) -# if [[ "$BRANCH" != "master" ]]; then -# git fetch --force origin ${CIRCLE_BRANCH}/merge:merged/${CIRCLE_BRANCH} -# git checkout "merged/$CIRCLE_BRANCH" -# fi - -binary_common: &binary_common - parameters: - # Edit these defaults to do a release` - build_version: - description: "version number of release binary; by default, build a nightly" - type: string - default: "" - pytorch_version: - description: "PyTorch version to build against; by default, use a nightly" - type: string - default: "" - # Don't edit these - python_version: - description: "Python version to build against (e.g., 3.7)" - type: string - cu_version: - description: "CUDA version to build against, in CU format (e.g., cpu or cu100)" - type: string - unicode_abi: - description: "Python 2.7 wheel only: whether or not we are cp27mu (default: no)" - type: string - default: "" - wheel_docker_image: - description: "Wheel only: what docker image to use" - type: string - default: "soumith/manylinux-cuda101" - environment: - PYTHON_VERSION: << parameters.python_version >> - BUILD_VERSION: << parameters.build_version >> - PYTORCH_VERSION: << parameters.pytorch_version >> - UNICODE_ABI: << parameters.unicode_abi >> - CU_VERSION: << parameters.cu_version >> - -jobs: - circleci_consistency: - docker: - - image: circleci/python:3.7 - steps: - - checkout - - run: - command: | - pip install --user --progress-bar off jinja2 pyyaml - python .circleci/regenerate.py - git diff --exit-code || (echo ".circleci/config.yml not in sync with config.yml.in! Run .circleci/regenerate.py to update config"; exit 1) - - binary_linux_wheel: - <<: *binary_common - docker: - - image: << parameters.wheel_docker_image >> - resource_class: 2xlarge+ - steps: - - checkout_merge - - run: packaging/build_wheel.sh - - store_artifacts: - path: dist - - persist_to_workspace: - root: dist - paths: - - "*" - - binary_linux_conda: - <<: *binary_common - docker: - - image: "soumith/conda-cuda" - resource_class: 2xlarge+ - steps: - - checkout_merge - - run: packaging/build_conda.sh - - store_artifacts: - path: /opt/conda/conda-bld/linux-64 - - persist_to_workspace: - root: /opt/conda/conda-bld/linux-64 - paths: - - "*" - - binary_linux_conda_cuda: - <<: *binary_common - machine: - image: ubuntu-1604:201903-01 - resource_class: gpu.medium - steps: - - checkout_merge - - run: - name: Setup environment - command: | - set -e - - curl -L https://packagecloud.io/circleci/trusty/gpgkey | sudo apt-key add - - curl -L https://dl.google.com/linux/linux_signing_key.pub | sudo apt-key add - - - sudo apt-get update - - sudo apt-get install \ - apt-transport-https \ - ca-certificates \ - curl \ - gnupg-agent \ - software-properties-common - - curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - - - sudo add-apt-repository \ - "deb [arch=amd64] https://download.docker.com/linux/ubuntu \ - $(lsb_release -cs) \ - stable" - - sudo apt-get update - export DOCKER_VERSION="5:19.03.2~3-0~ubuntu-xenial" - sudo apt-get install docker-ce=${DOCKER_VERSION} docker-ce-cli=${DOCKER_VERSION} containerd.io=1.2.6-3 - - # Add the package repositories - distribution=$(. /etc/os-release;echo $ID$VERSION_ID) - curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - - curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list - - export NVIDIA_CONTAINER_VERSION="1.0.3-1" - sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit=${NVIDIA_CONTAINER_VERSION} - sudo systemctl restart docker - - DRIVER_FN="NVIDIA-Linux-x86_64-410.104.run" - wget "https://s3.amazonaws.com/ossci-linux/nvidia_driver/$DRIVER_FN" - sudo /bin/bash "$DRIVER_FN" -s --no-drm || (sudo cat /var/log/nvidia-installer.log && false) - nvidia-smi - - - run: - name: Pull docker image - command: | - set -e - export DOCKER_IMAGE=soumith/conda-cuda - echo Pulling docker image $DOCKER_IMAGE - docker pull $DOCKER_IMAGE >/dev/null - - - run: - name: Build and run tests - command: | - set -e - - cd ${HOME}/project/ - - export DOCKER_IMAGE=soumith/conda-cuda - export VARS_TO_PASS="-e PYTHON_VERSION -e BUILD_VERSION -e PYTORCH_VERSION -e UNICODE_ABI -e CU_VERSION" - - docker run --gpus all --ipc=host -v $(pwd):/remote -w /remote ${VARS_TO_PASS} ${DOCKER_IMAGE} ./packaging/build_conda.sh - - binary_win_conda: - <<: *binary_common - executor: - name: win/default - shell: bash.exe - steps: - - checkout_merge - - run: - command: | - choco install miniconda3 - (& "C:\tools\miniconda3\Scripts\conda.exe" "shell.powershell" "hook") | Out-String | Invoke-Expression - conda activate base - conda install -yq conda-build "conda-package-handling!=1.5.0" - bash packaging/build_conda.sh - shell: powershell.exe - - binary_win_conda_cuda: - <<: *binary_common - executor: windows-gpu-prototype - steps: - - checkout_merge - - run: - command: | - choco install miniconda3 - (& "C:\tools\miniconda3\Scripts\conda.exe" "shell.powershell" "hook") | Out-String | Invoke-Expression - conda activate base - conda install -yq conda-build "conda-package-handling!=1.5.0" - bash packaging/build_conda.sh - shell: powershell.exe - - binary_macos_wheel: - <<: *binary_common - macos: - xcode: "9.0" - steps: - - checkout_merge - - run: - # Cannot easily deduplicate this as source'ing activate - # will set environment variables which we need to propagate - # to build_wheel.sh - command: | - curl -o conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh - sh conda.sh -b - source $HOME/miniconda3/bin/activate - packaging/build_wheel.sh - - store_artifacts: - path: dist - - persist_to_workspace: - root: dist - paths: - - "*" - - binary_macos_conda: - <<: *binary_common - macos: - xcode: "9.0" - steps: - - checkout_merge - - run: - command: | - curl -o conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh - sh conda.sh -b - source $HOME/miniconda3/bin/activate - conda install -yq conda-build - packaging/build_conda.sh - - store_artifacts: - path: /Users/distiller/miniconda3/conda-bld/osx-64 - - persist_to_workspace: - root: /Users/distiller/miniconda3/conda-bld/osx-64 - paths: - - "*" - - # Requires org-member context - binary_conda_upload: - docker: - - image: continuumio/miniconda - steps: - - attach_workspace: - at: ~/workspace - - run: - command: | - # Prevent credential from leaking - conda install -yq anaconda-client - set +x - anaconda login \ - --username "$PYTORCH_BINARY_PJH5_CONDA_USERNAME" \ - --password "$PYTORCH_BINARY_PJH5_CONDA_PASSWORD" - set -x - anaconda upload ~/workspace/*.tar.bz2 -u pytorch-nightly --label main --no-progress --force - - # Requires org-member context - binary_wheel_upload: - parameters: - subfolder: - description: "What whl subfolder to upload to, e.g., blank or cu100/ (trailing slash is important)" - type: string - docker: - - image: circleci/python:3.7 - steps: - - attach_workspace: - at: ~/workspace - - checkout - - run: - command: | - pip install --user awscli - export PATH="$HOME/.local/bin:$PATH" - # Prevent credential from leaking - set +x - export AWS_ACCESS_KEY_ID="${PYTORCH_BINARY_AWS_ACCESS_KEY_ID}" - export AWS_SECRET_ACCESS_KEY="${PYTORCH_BINARY_AWS_SECRET_ACCESS_KEY}" - set -x - for pkg in ~/workspace/*.whl; do - aws s3 cp "$pkg" "s3://pytorch/whl/nightly/<< parameters.subfolder >>" --acl public-read - done - - -workflows: - build: -{%- if True %} - jobs: - - circleci_consistency - {{ workflows() }} - - binary_linux_conda_cuda: - name: torchvision_linux_py3.7_cu100 - python_version: "3.7" - cu_version: "cu100" - - binary_win_conda: - name: torchvision_win_py3.6_cpu - python_version: "3.6" - cu_version: "cpu" - - binary_win_conda_cuda: - name: torchvision_win_py3.6_cu101 - python_version: "3.6" - cu_version: "cu101" - - nightly: -{%- endif %} - jobs: - - circleci_consistency - {{ workflows(prefix="nightly_", filter_branch="nightly", upload=True) }} diff --git a/.circleci/regenerate.py b/.circleci/regenerate.py deleted file mode 100755 index e7d85d2f911..00000000000 --- a/.circleci/regenerate.py +++ /dev/null @@ -1,105 +0,0 @@ -#!/usr/bin/env python3 - -""" -This script should use a very simple, functional programming style. -Avoid Jinja macros in favor of native Python functions. - -Don't go overboard on code generation; use Python only to generate -content that can't be easily declared statically using CircleCI's YAML API. - -Data declarations (e.g. the nested loops for defining the configuration matrix) -should be at the top of the file for easy updating. - -See this comment for design rationale: -https://github.com/pytorch/vision/pull/1321#issuecomment-531033978 -""" - -import jinja2 -import yaml -import os.path - - -def workflows(prefix='', filter_branch=None, upload=False, indentation=6): - w = [] - for btype in ["wheel", "conda"]: - for os_type in ["linux", "macos"]: - for python_version in ["2.7", "3.5", "3.6", "3.7"]: - for cu_version in (["cpu", "cu92", "cu100", "cu101"] if os_type == "linux" else ["cpu"]): - for unicode in ([False, True] if btype == "wheel" and python_version == "2.7" else [False]): - w += workflow_pair( - btype, os_type, python_version, cu_version, - unicode, prefix, upload, filter_branch=filter_branch) - - return indent(indentation, w) - - -def workflow_pair(btype, os_type, python_version, cu_version, unicode, prefix='', upload=False, *, filter_branch=None): - - w = [] - unicode_suffix = "u" if unicode else "" - base_workflow_name = f"{prefix}binary_{os_type}_{btype}_py{python_version}{unicode_suffix}_{cu_version}" - - w.append(generate_base_workflow( - base_workflow_name, python_version, cu_version, - unicode, os_type, btype, filter_branch=filter_branch)) - - if upload: - w.append(generate_upload_workflow(base_workflow_name, os_type, btype, cu_version, filter_branch=filter_branch)) - - return w - - -def generate_base_workflow(base_workflow_name, python_version, cu_version, - unicode, os_type, btype, *, filter_branch=None): - - d = { - "name": base_workflow_name, - "python_version": python_version, - "cu_version": cu_version, - } - - if unicode: - d["unicode_abi"] = '1' - - if cu_version == "cu92": - d["wheel_docker_image"] = "soumith/manylinux-cuda92" - elif cu_version == "cu100": - d["wheel_docker_image"] = "soumith/manylinux-cuda100" - - if filter_branch is not None: - d["filters"] = {"branches": {"only": filter_branch}} - - return {f"binary_{os_type}_{btype}": d} - - -def generate_upload_workflow(base_workflow_name, os_type, btype, cu_version, *, filter_branch=None): - d = { - "name": f"{base_workflow_name}_upload", - "context": "org-member", - "requires": [base_workflow_name], - } - - if btype == 'wheel': - d["subfolder"] = "" if os_type == 'macos' else cu_version + "/" - - if filter_branch is not None: - d["filters"] = {"branches": {"only": filter_branch}} - - return {f"binary_{btype}_upload": d} - - -def indent(indentation, data_list): - return ("\n" + " " * indentation).join( - yaml.dump(data_list, default_flow_style=False).splitlines()) - - -if __name__ == "__main__": - d = os.path.dirname(__file__) - env = jinja2.Environment( - loader=jinja2.FileSystemLoader(d), - lstrip_blocks=True, - autoescape=False, - ) - - with open(os.path.join(d, 'config.yml'), 'w') as f: - f.write(env.get_template('config.yml.in').render(workflows=workflows)) diff --git a/.clang-format b/.clang-format index 6d0ab740db4..9f20a44fe9b 100644 --- a/.clang-format +++ b/.clang-format @@ -1,88 +1,5 @@ --- -AccessModifierOffset: -1 -AlignAfterOpenBracket: AlwaysBreak -AlignConsecutiveAssignments: false -AlignConsecutiveDeclarations: false -AlignEscapedNewlinesLeft: true -AlignOperands: false -AlignTrailingComments: false -AllowAllParametersOfDeclarationOnNextLine: false -AllowShortBlocksOnASingleLine: false -AllowShortCaseLabelsOnASingleLine: false -AllowShortFunctionsOnASingleLine: Empty -AllowShortIfStatementsOnASingleLine: false -AllowShortLoopsOnASingleLine: false -AlwaysBreakAfterReturnType: None -AlwaysBreakBeforeMultilineStrings: true -AlwaysBreakTemplateDeclarations: true -BinPackArguments: false -BinPackParameters: false -BraceWrapping: - AfterClass: false - AfterControlStatement: false - AfterEnum: false - AfterFunction: false - AfterNamespace: false - AfterObjCDeclaration: false - AfterStruct: false - AfterUnion: false - BeforeCatch: false - BeforeElse: false - IndentBraces: false -BreakBeforeBinaryOperators: None -BreakBeforeBraces: Attach -BreakBeforeTernaryOperators: true -BreakConstructorInitializersBeforeComma: false -BreakAfterJavaFieldAnnotations: false -BreakStringLiterals: false -ColumnLimit: 80 -CommentPragmas: '^ IWYU pragma:' -#CompactNamespaces: false -ConstructorInitializerAllOnOneLineOrOnePerLine: true -ConstructorInitializerIndentWidth: 4 -ContinuationIndentWidth: 4 -Cpp11BracedListStyle: true -DerivePointerAlignment: false -DisableFormat: false -ForEachMacros: [ FOR_EACH_RANGE, FOR_EACH, ] -IncludeCategories: - - Regex: '^<.*\.h(pp)?>' - Priority: 1 - - Regex: '^<.*' - Priority: 2 - - Regex: '.*' - Priority: 3 -IndentCaseLabels: true -IndentWidth: 2 -IndentWrappedFunctionNames: false -KeepEmptyLinesAtTheStartOfBlocks: false -MacroBlockBegin: '' -MacroBlockEnd: '' -MaxEmptyLinesToKeep: 1 -NamespaceIndentation: None -ObjCBlockIndentWidth: 2 -ObjCSpaceAfterProperty: false -ObjCSpaceBeforeProtocolList: false -PenaltyBreakBeforeFirstCallParameter: 1 -PenaltyBreakComment: 300 -PenaltyBreakFirstLessLess: 120 -PenaltyBreakString: 1000 -PenaltyExcessCharacter: 1000000 -PenaltyReturnTypeOnItsOwnLine: 2000000 -PointerAlignment: Left -ReflowComments: true -SortIncludes: true -SpaceAfterCStyleCast: false -SpaceBeforeAssignmentOperators: true -SpaceBeforeParens: ControlStatements -SpaceInEmptyParentheses: false -SpacesBeforeTrailingComments: 1 -SpacesInAngles: false -SpacesInContainerLiterals: true -SpacesInCStyleCastParentheses: false -SpacesInParentheses: false -SpacesInSquareBrackets: false -Standard: Cpp11 -TabWidth: 8 -UseTab: Never +Language: ObjC +DisableFormat: true +SortIncludes: false ... diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index c765e471155..00000000000 --- a/.coveragerc +++ /dev/null @@ -1,7 +0,0 @@ -[run] -branch = True - -[paths] -source = - torchvision - /**/site-packages/torchvision diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 00000000000..5e88f5b9bb7 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,13 @@ +# This file keeps git blame clean. +# See https://docs.github.com/en/repositories/working-with-files/using-files/viewing-a-file#ignore-commits-in-the-blame-view + +# Add ufmt (usort + black) as code formatter (#4384) +5f0edb97b46e5bff71dc19dedef05c5396eeaea2 +# update python syntax >=3.6 (#4585) +d367a01a18a3ae6bee13d8be3b63fd6a581ea46f +# Upgrade usort to 1.0.2 and black to 22.3.0 (#5106) +6ca9c76adb6daf2695d603ad623a9cf1c4f4806f +# Fix unnecessary exploded black formatting (#7709) +a335d916db0694770e8152f41e19195de3134523 +# Renaming: `BoundingBox` -> `BoundingBoxes` (#7778) +332bff937c6711666191880fab57fa2f23ae772e diff --git a/.gitattributes b/.gitattributes index a476e7afb59..22d0452f8d7 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,8 @@ *.pkl binary +# Jupyter notebook + +# For text count +# *.ipynb text + +# To ignore it use below +*.ipynb linguist-documentation diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml new file mode 100644 index 00000000000..ba811554c43 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -0,0 +1,60 @@ +name: 🐛 Bug Report +description: Create a report to help us reproduce and fix the bug + +body: +- type: markdown + attributes: + value: > + #### Before submitting a bug, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/pytorch/vision/issues?q=is%3Aissue+sort%3Acreated-desc+). +- type: textarea + attributes: + label: 🐛 Describe the bug + description: | + Please provide a clear and concise description of what the bug is. + + If relevant, add a minimal example so that we can reproduce the error by running the code. It is very important for the snippet to be as succinct (minimal) as possible, so please take time to trim down any irrelevant code to help us debug efficiently. We are going to copy-paste your code and we expect to get the same result as you did: avoid any external data, and include the relevant imports, etc. For example: + + ```python + # All necessary imports at the beginning + import torch + import torchvision + from torchvision.ops import nms + + # A succinct reproducing example trimmed down to the essential parts: + N = 5 + boxes = torch.rand(N, 4) # Note: the bug is here, we should enforce that x1 < x2 and y1 < y2! + scores = torch.rand(N) + nms(boxes, scores, iou_threshold=.9) + ``` + + If the code is too long (hopefully, it isn't), feel free to put it in a public gist and link it in the issue: https://gist.github.com. + + Please also paste or describe the results you observe instead of the expected results. If you observe an error, please paste the error message including the **full** traceback of the exception. It may be relevant to wrap error messages in ```` ```triple quotes blocks``` ````. + placeholder: | + A clear and concise description of what the bug is. + + ```python + Sample code to reproduce the problem + ``` + + ``` + The error message you got, with the full traceback. + ```` + validations: + required: true +- type: textarea + attributes: + label: Versions + description: | + Please run the following and paste the output below. + ```sh + wget https://raw.githubusercontent.com/pytorch/pytorch/main/torch/utils/collect_env.py + # For security purposes, please check the contents of collect_env.py before running it. + python collect_env.py + ``` + validations: + required: true +- type: markdown + attributes: + value: > + Thanks for contributing 🎉! diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 00000000000..bdb6d3614f3 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,5 @@ +blank_issues_enabled: true +contact_links: + - name: Usage questions + url: https://discuss.pytorch.org/ + about: Ask questions and discuss with other torchvision community members diff --git a/.github/ISSUE_TEMPLATE/documentation.yml b/.github/ISSUE_TEMPLATE/documentation.yml new file mode 100644 index 00000000000..a7fb6c04c63 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/documentation.yml @@ -0,0 +1,20 @@ +name: 📚 Documentation +description: Report an issue related to https://pytorch.org/vision/stable/index.html + +body: +- type: textarea + attributes: + label: 📚 The doc issue + description: > + A clear and concise description of what content in https://pytorch.org/vision/stable/index.html is an issue. If this has to do with the general https://pytorch.org website, please file an issue at https://github.com/pytorch/pytorch.github.io/issues/new/choose instead. If this has to do with https://pytorch.org/tutorials, please file an issue at https://github.com/pytorch/tutorials/issues/new. + validations: + required: true +- type: textarea + attributes: + label: Suggest a potential alternative/fix + description: > + Tell us how we could improve the documentation in this regard. +- type: markdown + attributes: + value: > + Thanks for contributing 🎉! diff --git a/.github/ISSUE_TEMPLATE/feature-request.yml b/.github/ISSUE_TEMPLATE/feature-request.yml new file mode 100644 index 00000000000..85c727dbcf5 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-request.yml @@ -0,0 +1,32 @@ +name: 🚀 Feature request +description: Submit a proposal/request for a new torchvision feature + +body: +- type: textarea + attributes: + label: 🚀 The feature + description: > + A clear and concise description of the feature proposal + validations: + required: true +- type: textarea + attributes: + label: Motivation, pitch + description: > + Please outline the motivation for the proposal. Is your feature request related to a specific problem? e.g., *"I'm working on X and would like Y to be possible"*. If this is related to another GitHub issue, please link here too. + validations: + required: true +- type: textarea + attributes: + label: Alternatives + description: > + A description of any alternative solutions or features you've considered, if any. +- type: textarea + attributes: + label: Additional context + description: > + Add any other context or screenshots about the feature request. +- type: markdown + attributes: + value: > + Thanks for contributing 🎉! diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 00000000000..f267cc7da50 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1 @@ + diff --git a/.github/failed_schedule_issue_template.md b/.github/failed_schedule_issue_template.md new file mode 100644 index 00000000000..5e2d77550ac --- /dev/null +++ b/.github/failed_schedule_issue_template.md @@ -0,0 +1,13 @@ +--- +title: Scheduled workflow failed +labels: + - bug + - "module: datasets" +--- + +Oh no, something went wrong in the scheduled workflow {{ env.WORKFLOW }}/{{ env.JOB }}. +Please look into it: + +https://github.com/{{ env.REPO }}/actions/runs/{{ env.ID }} + +Feel free to close this if this was just a one-off error. diff --git a/.github/process_commit.py b/.github/process_commit.py new file mode 100644 index 00000000000..e1e534d98f4 --- /dev/null +++ b/.github/process_commit.py @@ -0,0 +1,81 @@ +""" +This script finds the merger responsible for labeling a PR by a commit SHA. It is used by the workflow in +'.github/workflows/pr-labels.yml'. If there exists no PR associated with the commit or the PR is properly labeled, +this script is a no-op. + +Note: we ping the merger only, not the reviewers, as the reviewers can sometimes be external to torchvision +with no labeling responsibility, so we don't want to bother them. +""" + +import sys +from typing import Any, Optional, Set, Tuple + +import requests + +# For a PR to be properly labeled it should have one primary label and one secondary label +PRIMARY_LABELS = { + "new feature", + "bug", + "code quality", + "enhancement", + "bc-breaking", + "deprecation", + "other", + "prototype", +} + +SECONDARY_LABELS = { + "dependency issue", + "module: c++ frontend", + "module: ci", + "module: datasets", + "module: documentation", + "module: io", + "module: models.quantization", + "module: models", + "module: onnx", + "module: ops", + "module: reference scripts", + "module: rocm", + "module: tests", + "module: transforms", + "module: utils", + "module: video", + "Perf", + "Revert(ed)", + "topic: build", +} + + +def query_torchvision(cmd: str, *, accept) -> Any: + response = requests.get(f"https://api.github.com/repos/pytorch/vision/{cmd}", headers=dict(Accept=accept)) + return response.json() + + +def get_pr_number(commit_hash: str) -> Optional[int]: + # See https://docs.github.com/en/rest/reference/repos#list-pull-requests-associated-with-a-commit + data = query_torchvision(f"commits/{commit_hash}/pulls", accept="application/vnd.github.groot-preview+json") + if not data: + return None + return data[0]["number"] + + +def get_pr_merger_and_labels(pr_number: int) -> Tuple[str, Set[str]]: + # See https://docs.github.com/en/rest/reference/pulls#get-a-pull-request + data = query_torchvision(f"pulls/{pr_number}", accept="application/vnd.github.v3+json") + merger = data["merged_by"]["login"] + labels = {label["name"] for label in data["labels"]} + return merger, labels + + +if __name__ == "__main__": + commit_hash = sys.argv[1] + pr_number = get_pr_number(commit_hash) + if not pr_number: + sys.exit(0) + + merger, labels = get_pr_merger_and_labels(pr_number) + is_properly_labeled = bool(PRIMARY_LABELS.intersection(labels) and SECONDARY_LABELS.intersection(labels)) + + if not is_properly_labeled: + print(f"@{merger}") diff --git a/.github/pytorch-probot.yml b/.github/pytorch-probot.yml new file mode 100644 index 00000000000..1a3402466f4 --- /dev/null +++ b/.github/pytorch-probot.yml @@ -0,0 +1,10 @@ +tracking_issue: 2447 + +# List of workflows that will be re-run in case of failures +# https://github.com/pytorch/test-infra/blob/main/torchci/lib/bot/retryBot.ts +retryable_workflows: +- Build Linux +- Build Macos +- Build M1 +- Build Windows +- Tests diff --git a/.github/scripts/cmake.sh b/.github/scripts/cmake.sh new file mode 100755 index 00000000000..4217a9d24be --- /dev/null +++ b/.github/scripts/cmake.sh @@ -0,0 +1,107 @@ +#!/usr/bin/env bash + +set -euxo pipefail + +./.github/scripts/setup-env.sh + +# Activate conda environment +set +x && eval "$($(which conda) shell.bash hook)" && conda deactivate && conda activate ci && set -x + +# Setup the OS_TYPE environment variable that should be used for conditions involving the OS below. +case $(uname) in + Linux) + OS_TYPE=linux + ;; + Darwin) + OS_TYPE=macos + ;; + MSYS*) + OS_TYPE=windows + ;; + *) + echo "Unknown OS type:" $(uname) + exit 1 + ;; +esac + +if [[ $OS_TYPE == macos ]]; then + JOBS=$(sysctl -n hw.logicalcpu) +else + JOBS=$(nproc) +fi + +if [[ $OS_TYPE == linux ]]; then + export LD_LIBRARY_PATH="${CONDA_PREFIX}/lib:${LD_LIBRARY_PATH}" +fi + +TORCH_PATH=$(python -c "import pathlib, torch; print(pathlib.Path(torch.__path__[0]))") +if [[ $OS_TYPE == windows ]]; then + PACKAGING_DIR="${PWD}/packaging" + export PATH="${TORCH_PATH}/lib:${PATH}" +fi + +Torch_DIR="${TORCH_PATH}/share/cmake/Torch" +if [[ "${GPU_ARCH_TYPE}" == "cuda" ]]; then + WITH_CUDA=1 +else + WITH_CUDA=0 +fi + +echo '::group::Prepare CMake builds' +mkdir -p cpp_build + +pushd examples/cpp +python script_model.py +mkdir -p build +mv resnet18.pt fasterrcnn_resnet50_fpn.pt build +popd + +# This was only needed for the tracing above +pip uninstall -y torchvision +echo '::endgroup::' + +echo '::group::Build and install libtorchvision' +pushd cpp_build + + +# On macOS, CMake is looking for the library (*.dylib) and the header (*.h) separately. By default, it prefers to load +# the header from other packages that install the library. This easily leads to a mismatch if the library installed +# from conda doesn't have the exact same version. Thus, we need to explicitly set CMAKE_FIND_FRAMEWORK=NEVER to force +# it to not load anything from other installed frameworks. Resources: +# https://stackoverflow.com/questions/36523911/osx-homebrew-cmake-libpng-version-mismatch-issue +# https://cmake.org/cmake/help/latest/variable/CMAKE_FIND_FRAMEWORK.html +cmake .. -DTorch_DIR="${Torch_DIR}" -DWITH_CUDA="${WITH_CUDA}" \ + -DCMAKE_PREFIX_PATH="${CONDA_PREFIX}" \ + -DCMAKE_FIND_FRAMEWORK=NEVER \ + -DCMAKE_INSTALL_PREFIX="${CONDA_PREFIX}" +if [[ $OS_TYPE == windows ]]; then + "${PACKAGING_DIR}/windows/internal/vc_env_helper.bat" "${PACKAGING_DIR}/windows/internal/build_cmake.bat" $JOBS +else + make -j$JOBS + make install +fi + +popd +echo '::endgroup::' + +echo '::group::Build and run C++ example' +pushd examples/cpp/build + +cmake .. -DTorch_DIR="${Torch_DIR}" \ + -DCMAKE_PREFIX_PATH="${CONDA_PREFIX}" \ + -DCMAKE_FIND_FRAMEWORK=NEVER \ + -DUSE_TORCHVISION=ON # Needed for faster-rcnn since it's using torchvision ops like NMS. +if [[ $OS_TYPE == windows ]]; then + "${PACKAGING_DIR}/windows/internal/vc_env_helper.bat" "${PACKAGING_DIR}/windows/internal/build_cpp_example.bat" $JOBS + cd Release + cp ../resnet18.pt . + cp ../fasterrcnn_resnet50_fpn.pt . +else + make -j$JOBS +fi + +./run_model resnet18.pt +./run_model fasterrcnn_resnet50_fpn.pt + +popd +echo '::endgroup::' diff --git a/.github/scripts/export_IS_M1_CONDA_BUILD_JOB.sh b/.github/scripts/export_IS_M1_CONDA_BUILD_JOB.sh new file mode 100755 index 00000000000..1cca56ddc56 --- /dev/null +++ b/.github/scripts/export_IS_M1_CONDA_BUILD_JOB.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +export IS_M1_CONDA_BUILD_JOB=1 diff --git a/travis-scripts/run-clang-format/run-clang-format.py b/.github/scripts/run-clang-format.py similarity index 62% rename from travis-scripts/run-clang-format/run-clang-format.py rename to .github/scripts/run-clang-format.py index 3f16c833b63..670fd97833a 100755 --- a/travis-scripts/run-clang-format/run-clang-format.py +++ b/.github/scripts/run-clang-format.py @@ -1,5 +1,28 @@ #!/usr/bin/env python -"""A wrapper script around clang-format, suitable for linting multiple files +""" +MIT License + +Copyright (c) 2017 Guillaume Papin + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +A wrapper script around clang-format, suitable for linting multiple files and to use for continuous integration. This is an alternative API for the clang-format command line. @@ -8,20 +31,15 @@ """ -from __future__ import print_function, unicode_literals - import argparse -import codecs import difflib import fnmatch -import io import multiprocessing import os import signal import subprocess import sys import traceback - from functools import partial try: @@ -30,7 +48,7 @@ DEVNULL = open(os.devnull, "wb") -DEFAULT_EXTENSIONS = 'c,h,C,H,cpp,hpp,cc,hh,c++,h++,cxx,hxx' +DEFAULT_EXTENSIONS = "c,h,C,H,cpp,hpp,cc,hh,c++,h++,cxx,hxx,cu,mm" class ExitStatus: @@ -54,14 +72,8 @@ def list_files(files, recursive=False, extensions=None, exclude=None): # os.walk() supports trimming down the dnames list # by modifying it in-place, # to avoid unnecessary directory listings. - dnames[:] = [ - x for x in dnames - if - not fnmatch.fnmatch(os.path.join(dirpath, x), pattern) - ] - fpaths = [ - x for x in fpaths if not fnmatch.fnmatch(x, pattern) - ] + dnames[:] = [x for x in dnames if not fnmatch.fnmatch(os.path.join(dirpath, x), pattern)] + fpaths = [x for x in fpaths if not fnmatch.fnmatch(x, pattern)] for f in fpaths: ext = os.path.splitext(f)[1][1:] if ext in extensions: @@ -74,22 +86,20 @@ def list_files(files, recursive=False, extensions=None, exclude=None): def make_diff(file, original, reformatted): return list( difflib.unified_diff( - original, - reformatted, - fromfile='{}\t(original)'.format(file), - tofile='{}\t(reformatted)'.format(file), - n=3)) + original, reformatted, fromfile=f"{file}\t(original)", tofile=f"{file}\t(reformatted)", n=3 + ) + ) class DiffError(Exception): def __init__(self, message, errs=None): - super(DiffError, self).__init__(message) + super().__init__(message) self.errs = errs or [] class UnexpectedError(Exception): def __init__(self, message, exc=None): - super(UnexpectedError, self).__init__(message) + super().__init__(message) self.formatted_traceback = traceback.format_exc() self.exc = exc @@ -101,15 +111,14 @@ def run_clang_format_diff_wrapper(args, file): except DiffError: raise except Exception as e: - raise UnexpectedError('{}: {}: {}'.format(file, e.__class__.__name__, - e), e) + raise UnexpectedError(f"{file}: {e.__class__.__name__}: {e}", e) def run_clang_format_diff(args, file): try: - with io.open(file, 'r', encoding='utf-8') as f: + with open(file, encoding="utf-8") as f: original = f.readlines() - except IOError as exc: + except OSError as exc: raise DiffError(str(exc)) invocation = [args.clang_format_executable, file] @@ -129,33 +138,16 @@ def run_clang_format_diff(args, file): # > Each translation completely replaces the format string # > for the diagnostic. # > -- http://clang.llvm.org/docs/InternalsManual.html#internals-diag-translation - # - # It's not pretty, due to Python 2 & 3 compatibility. - encoding_py3 = {} - if sys.version_info[0] >= 3: - encoding_py3['encoding'] = 'utf-8' try: proc = subprocess.Popen( - invocation, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - universal_newlines=True, - **encoding_py3) - except OSError as exc: - raise DiffError( - "Command '{}' failed to start: {}".format( - subprocess.list2cmdline(invocation), exc - ) + invocation, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, encoding="utf-8" ) + except OSError as exc: + raise DiffError(f"Command '{subprocess.list2cmdline(invocation)}' failed to start: {exc}") proc_stdout = proc.stdout proc_stderr = proc.stderr - if sys.version_info[0] < 3: - # make the pipes compatible with Python 3, - # reading lines should output unicode - encoding = 'utf-8' - proc_stdout = codecs.getreader(encoding)(proc_stdout) - proc_stderr = codecs.getreader(encoding)(proc_stderr) + # hopefully the stderr pipe won't get full and block the process outs = list(proc_stdout.readlines()) errs = list(proc_stderr.readlines()) @@ -171,30 +163,30 @@ def run_clang_format_diff(args, file): def bold_red(s): - return '\x1b[1m\x1b[31m' + s + '\x1b[0m' + return "\x1b[1m\x1b[31m" + s + "\x1b[0m" def colorize(diff_lines): def bold(s): - return '\x1b[1m' + s + '\x1b[0m' + return "\x1b[1m" + s + "\x1b[0m" def cyan(s): - return '\x1b[36m' + s + '\x1b[0m' + return "\x1b[36m" + s + "\x1b[0m" def green(s): - return '\x1b[32m' + s + '\x1b[0m' + return "\x1b[32m" + s + "\x1b[0m" def red(s): - return '\x1b[31m' + s + '\x1b[0m' + return "\x1b[31m" + s + "\x1b[0m" for line in diff_lines: - if line[:4] in ['--- ', '+++ ']: + if line[:4] in ["--- ", "+++ "]: yield bold(line) - elif line.startswith('@@ '): + elif line.startswith("@@ "): yield cyan(line) - elif line.startswith('+'): + elif line.startswith("+"): yield green(line) - elif line.startswith('-'): + elif line.startswith("-"): yield red(line) else: yield line @@ -203,61 +195,50 @@ def red(s): def print_diff(diff_lines, use_color): if use_color: diff_lines = colorize(diff_lines) - if sys.version_info[0] < 3: - sys.stdout.writelines((l.encode('utf-8') for l in diff_lines)) - else: - sys.stdout.writelines(diff_lines) + sys.stdout.writelines(diff_lines) def print_trouble(prog, message, use_colors): - error_text = 'error:' + error_text = "error:" if use_colors: error_text = bold_red(error_text) - print("{}: {} {}".format(prog, error_text, message), file=sys.stderr) + print(f"{prog}: {error_text} {message}", file=sys.stderr) def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( - '--clang-format-executable', - metavar='EXECUTABLE', - help='path to the clang-format executable', - default='clang-format') - parser.add_argument( - '--extensions', - help='comma separated list of file extensions (default: {})'.format( - DEFAULT_EXTENSIONS), - default=DEFAULT_EXTENSIONS) + "--clang-format-executable", + metavar="EXECUTABLE", + help="path to the clang-format executable", + default="clang-format", + ) parser.add_argument( - '-r', - '--recursive', - action='store_true', - help='run recursively over directories') - parser.add_argument('files', metavar='file', nargs='+') + "--extensions", + help=f"comma separated list of file extensions (default: {DEFAULT_EXTENSIONS})", + default=DEFAULT_EXTENSIONS, + ) + parser.add_argument("-r", "--recursive", action="store_true", help="run recursively over directories") + parser.add_argument("files", metavar="file", nargs="+") + parser.add_argument("-q", "--quiet", action="store_true") parser.add_argument( - '-q', - '--quiet', - action='store_true') - parser.add_argument( - '-j', - metavar='N', + "-j", + metavar="N", type=int, default=0, - help='run N clang-format jobs in parallel' - ' (default number of cpus + 1)') + help="run N clang-format jobs in parallel (default number of cpus + 1)", + ) parser.add_argument( - '--color', - default='auto', - choices=['auto', 'always', 'never'], - help='show colored diff (default: auto)') + "--color", default="auto", choices=["auto", "always", "never"], help="show colored diff (default: auto)" + ) parser.add_argument( - '-e', - '--exclude', - metavar='PATTERN', - action='append', + "-e", + "--exclude", + metavar="PATTERN", + action="append", default=[], - help='exclude paths matching the given glob-like pattern(s)' - ' from recursive search') + help="exclude paths matching the given glob-like pattern(s) from recursive search", + ) args = parser.parse_args() @@ -274,14 +255,14 @@ def main(): colored_stdout = False colored_stderr = False - if args.color == 'always': + if args.color == "always": colored_stdout = True colored_stderr = True - elif args.color == 'auto': + elif args.color == "auto": colored_stdout = sys.stdout.isatty() colored_stderr = sys.stderr.isatty() - version_invocation = [args.clang_format_executable, str("--version")] + version_invocation = [args.clang_format_executable, "--version"] try: subprocess.check_call(version_invocation, stdout=DEVNULL) except subprocess.CalledProcessError as e: @@ -290,19 +271,15 @@ def main(): except OSError as e: print_trouble( parser.prog, - "Command '{}' failed to start: {}".format( - subprocess.list2cmdline(version_invocation), e - ), + f"Command '{subprocess.list2cmdline(version_invocation)}' failed to start: {e}", use_colors=colored_stderr, ) return ExitStatus.TROUBLE retcode = ExitStatus.SUCCESS files = list_files( - args.files, - recursive=args.recursive, - exclude=args.exclude, - extensions=args.extensions.split(',')) + args.files, recursive=args.recursive, exclude=args.exclude, extensions=args.extensions.split(",") + ) if not files: return @@ -319,8 +296,7 @@ def main(): pool = None else: pool = multiprocessing.Pool(njobs) - it = pool.imap_unordered( - partial(run_clang_format_diff_wrapper, args), files) + it = pool.imap_unordered(partial(run_clang_format_diff_wrapper, args), files) while True: try: outs, errs = next(it) @@ -351,5 +327,5 @@ def main(): return retcode -if __name__ == '__main__': +if __name__ == "__main__": sys.exit(main()) diff --git a/.github/scripts/setup-env.sh b/.github/scripts/setup-env.sh new file mode 100755 index 00000000000..24e7aa97986 --- /dev/null +++ b/.github/scripts/setup-env.sh @@ -0,0 +1,113 @@ +#!/usr/bin/env bash + +set -euxo pipefail + +# Prepare conda +set +x && eval "$($(which conda) shell.bash hook)" && set -x + +# Setup the OS_TYPE environment variable that should be used for conditions involving the OS below. +case $(uname) in + Linux) + OS_TYPE=linux + ;; + Darwin) + OS_TYPE=macos + ;; + MSYS*) + OS_TYPE=windows + ;; + *) + echo "Unknown OS type:" $(uname) + exit 1 + ;; +esac + +echo '::group::Create build environment' +# See https://github.com/pytorch/vision/issues/7296 for ffmpeg +conda create \ + --name ci \ + --quiet --yes \ + python="${PYTHON_VERSION}" pip \ + ninja cmake \ + libpng \ + libwebp \ + 'ffmpeg<4.3' +conda activate ci +conda install --quiet --yes libjpeg-turbo -c pytorch +pip install --progress-bar=off --upgrade setuptools==72.1.0 + +# See https://github.com/pytorch/vision/issues/6790 +if [[ "${PYTHON_VERSION}" != "3.11" ]]; then + pip install --progress-bar=off av!=10.0.0 +fi + +echo '::endgroup::' + +if [[ "${OS_TYPE}" == windows && "${GPU_ARCH_TYPE}" == cuda ]]; then + echo '::group::Install VisualStudio CUDA extensions on Windows' + if [[ "${VC_YEAR:-}" == "2022" ]]; then + TARGET_DIR="/c/Program Files (x86)/Microsoft Visual Studio/2022/BuildTools/MSBuild/Microsoft/VC/v170/BuildCustomizations" + else + TARGET_DIR="/c/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/MSBuild/Microsoft/VC/v160/BuildCustomizations" + fi + mkdir -p "${TARGET_DIR}" + cp -r "${CUDA_HOME}/MSBuildExtensions/"* "${TARGET_DIR}" + echo '::endgroup::' +fi + +echo '::group::Install PyTorch' +# TODO: Can we maybe have this as environment variable in the job template? For example, `IS_RELEASE`. +if [[ (${GITHUB_EVENT_NAME} = 'pull_request' && (${GITHUB_BASE_REF} = 'release'*)) || (${GITHUB_REF} = 'refs/heads/release'*) ]]; then + CHANNEL=test +else + CHANNEL=nightly +fi + +case $GPU_ARCH_TYPE in + cpu) + GPU_ARCH_ID="cpu" + ;; + cuda) + VERSION_WITHOUT_DOT=$(echo "${GPU_ARCH_VERSION}" | sed 's/\.//') + GPU_ARCH_ID="cu${VERSION_WITHOUT_DOT}" + ;; + *) + echo "Unknown GPU_ARCH_TYPE=${GPU_ARCH_TYPE}" + exit 1 + ;; +esac +PYTORCH_WHEEL_INDEX="https://download.pytorch.org/whl/${CHANNEL}/${GPU_ARCH_ID}" +pip install --progress-bar=off --pre torch --index-url="${PYTORCH_WHEEL_INDEX}" + +if [[ $GPU_ARCH_TYPE == 'cuda' ]]; then + python -c "import torch; exit(not torch.cuda.is_available())" +fi +echo '::endgroup::' + +echo '::group::Install third party dependencies prior to TorchVision install' +# Installing with `easy_install`, e.g. `python setup.py install` or `python setup.py develop`, has some quirks when +# when pulling in third-party dependencies. For example: +# - On Windows, we often hit an SSL error although `pip` can install just fine. +# - It happily pulls in pre-releases, which can lead to more problems down the line. +# `pip` does not unless explicitly told to do so. +# Thus, we use `easy_install` to extract the third-party dependencies here and install them upfront with `pip`. +python setup.py egg_info +# The requires.txt cannot be used with `pip install -r` directly. The requirements are listed at the top and the +# optional dependencies come in non-standard syntax after a blank line. Thus, we just extract the header. +sed -e '/^$/,$d' *.egg-info/requires.txt | tee requirements.txt +pip install --progress-bar=off -r requirements.txt +echo '::endgroup::' + +echo '::group::Install TorchVision' +python setup.py develop +echo '::endgroup::' + +echo '::group::Install torchvision-extra-decoders' +# This can be done after torchvision was built +pip install torchvision-extra-decoders +echo '::endgroup::' + +echo '::group::Collect environment information' +conda list +python -m torch.utils.collect_env +echo '::endgroup::' diff --git a/.github/scripts/unittest.sh b/.github/scripts/unittest.sh new file mode 100755 index 00000000000..da8a06928ea --- /dev/null +++ b/.github/scripts/unittest.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +set -euo pipefail + +./.github/scripts/setup-env.sh + +# Activate conda environment +eval "$($(which conda) shell.bash hook)" && conda deactivate && conda activate ci + +echo '::group::Install testing utilities' +# TODO: remove the <8 constraint on pytest when https://github.com/pytorch/vision/issues/8238 is closed +pip install --progress-bar=off "pytest<8" pytest-mock pytest-cov expecttest!=0.2.0 requests +echo '::endgroup::' + +python test/smoke_test.py + +# We explicitly ignore the video tests until we resolve https://github.com/pytorch/vision/issues/8162 +pytest --ignore-glob="*test_video*" --junit-xml="${RUNNER_TEST_RESULTS_DIR}/test-results.xml" -v --durations=25 diff --git a/.github/workflows/build-cmake.yml b/.github/workflows/build-cmake.yml new file mode 100644 index 00000000000..9cee3bfc26d --- /dev/null +++ b/.github/workflows/build-cmake.yml @@ -0,0 +1,84 @@ +name: CMake + +on: + pull_request: + push: + branches: + - nightly + - main + - release/* + workflow_dispatch: + +jobs: + linux: + strategy: + matrix: + include: + - runner: linux.12xlarge + gpu-arch-type: cpu + - runner: linux.g5.4xlarge.nvidia.gpu + gpu-arch-type: cuda + gpu-arch-version: "11.8" + fail-fast: false + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + with: + repository: pytorch/vision + runner: ${{ matrix.runner }} + gpu-arch-type: ${{ matrix.gpu-arch-type }} + gpu-arch-version: ${{ matrix.gpu-arch-version }} + test-infra-ref: main + script: | + set -euo pipefail + + export PYTHON_VERSION=3.9 + export GPU_ARCH_TYPE=${{ matrix.gpu-arch-type }} + export GPU_ARCH_VERSION=${{ matrix.gpu-arch-version }} + ./.github/scripts/cmake.sh + + macos: + strategy: + matrix: + include: + - runner: macos-m1-stable + fail-fast: false + uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + with: + repository: pytorch/vision + runner: ${{ matrix.runner }} + test-infra-ref: main + script: | + set -euo pipefail + + export PYTHON_VERSION=3.9 + export GPU_ARCH_TYPE=cpu + export GPU_ARCH_VERSION='' + + ${CONDA_RUN} ./.github/scripts/cmake.sh + + windows: + strategy: + matrix: + include: + - runner: windows.4xlarge + gpu-arch-type: cpu + - runner: windows.g5.4xlarge.nvidia.gpu + gpu-arch-type: cuda + gpu-arch-version: "11.8" + fail-fast: false + uses: pytorch/test-infra/.github/workflows/windows_job.yml@main + with: + repository: pytorch/vision + runner: ${{ matrix.runner }} + gpu-arch-type: ${{ matrix.gpu-arch-type }} + gpu-arch-version: ${{ matrix.gpu-arch-version }} + test-infra-ref: main + script: | + set -euo pipefail + + export PYTHON_VERSION=3.9 + export VC_YEAR=2022 + export VSDEVCMD_ARGS="" + export GPU_ARCH_TYPE=${{ matrix.gpu-arch-type }} + export GPU_ARCH_VERSION=${{ matrix.gpu-arch-version }} + + ./.github/scripts/cmake.sh diff --git a/.github/workflows/build-conda-linux.yml b/.github/workflows/build-conda-linux.yml new file mode 100644 index 00000000000..a445ef9af25 --- /dev/null +++ b/.github/workflows/build-conda-linux.yml @@ -0,0 +1,52 @@ +name: Build Linux Conda + +on: + pull_request: + push: + branches: + - nightly + - main + - release/* + tags: + # NOTE: Binary build pipelines should only get triggered on release candidate builds + # Release candidate tags look like: v1.11.0-rc1 + - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ + workflow_dispatch: + +jobs: + generate-matrix: + uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main + with: + package-type: conda + os: linux + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build: + needs: generate-matrix + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/vision + pre-script: "" + post-script: "" + conda-package-directory: packaging/torchvision + smoke-test-script: test/smoke_test.py + package-name: torchvision + name: ${{ matrix.repository }} + uses: pytorch/test-infra/.github/workflows/build_conda_linux.yml@main + with: + conda-package-directory: ${{ matrix.conda-package-directory }} + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + post-script: ${{ matrix.post-script }} + package-name: ${{ matrix.package-name }} + smoke-test-script: ${{ matrix.smoke-test-script }} + trigger-event: ${{ github.event_name }} + secrets: + CONDA_PYTORCHBOT_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} + CONDA_PYTORCHBOT_TOKEN_TEST: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }} diff --git a/.github/workflows/build-conda-m1.yml b/.github/workflows/build-conda-m1.yml new file mode 100644 index 00000000000..e8f6546a678 --- /dev/null +++ b/.github/workflows/build-conda-m1.yml @@ -0,0 +1,54 @@ +name: Build M1 Conda + +on: + pull_request: + push: + branches: + - nightly + - main + - release/* + tags: + # NOTE: Binary build pipelines should only get triggered on release candidate builds + # Release candidate tags look like: v1.11.0-rc1 + - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ + workflow_dispatch: + +jobs: + generate-matrix: + uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main + with: + package-type: conda + os: macos-arm64 + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build: + needs: generate-matrix + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/vision + pre-script: "" + post-script: "" + conda-package-directory: packaging/torchvision + smoke-test-script: test/smoke_test.py + package-name: torchvision + name: ${{ matrix.repository }} + uses: pytorch/test-infra/.github/workflows/build_conda_macos.yml@main + with: + conda-package-directory: ${{ matrix.conda-package-directory }} + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + env-var-script: ./.github/scripts/export_IS_M1_CONDA_BUILD_JOB.sh + pre-script: ${{ matrix.pre-script }} + post-script: ${{ matrix.post-script }} + package-name: ${{ matrix.package-name }} + smoke-test-script: ${{ matrix.smoke-test-script }} + runner-type: macos-m1-stable + trigger-event: ${{ github.event_name }} + secrets: + CONDA_PYTORCHBOT_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} + CONDA_PYTORCHBOT_TOKEN_TEST: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }} diff --git a/.github/workflows/build-conda-windows.yml b/.github/workflows/build-conda-windows.yml new file mode 100644 index 00000000000..f404c06b888 --- /dev/null +++ b/.github/workflows/build-conda-windows.yml @@ -0,0 +1,53 @@ +name: Build Windows Conda + +on: + pull_request: + push: + branches: + - nightly + - main + - release/* + tags: + # NOTE: Binary build pipelines should only get triggered on release candidate builds + # Release candidate tags look like: v1.11.0-rc1 + - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ + workflow_dispatch: + +jobs: + generate-matrix: + uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main + with: + package-type: conda + os: windows + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build: + needs: generate-matrix + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/vision + pre-script: "" + post-script: "" + env-script: packaging/windows/internal/vc_env_helper.bat + conda-package-directory: packaging/torchvision + smoke-test-script: test/smoke_test.py + package-name: torchvision + name: ${{ matrix.repository }} + uses: pytorch/test-infra/.github/workflows/build_conda_windows.yml@main + with: + conda-package-directory: ${{ matrix.conda-package-directory }} + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + post-script: ${{ matrix.post-script }} + package-name: ${{ matrix.package-name }} + smoke-test-script: ${{ matrix.smoke-test-script }} + trigger-event: ${{ github.event_name }} + secrets: + CONDA_PYTORCHBOT_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} + CONDA_PYTORCHBOT_TOKEN_TEST: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }} diff --git a/.github/workflows/build-wheels-aarch64-linux.yml b/.github/workflows/build-wheels-aarch64-linux.yml new file mode 100644 index 00000000000..05c83991d5b --- /dev/null +++ b/.github/workflows/build-wheels-aarch64-linux.yml @@ -0,0 +1,54 @@ +name: Build Aarch64 Linux Wheels + +on: + pull_request: + push: + branches: + - nightly + - main + - release/* + tags: + # NOTE: Binary build pipelines should only get triggered on release candidate builds + # Release candidate tags look like: v1.11.0-rc1 + - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ + workflow_dispatch: + +permissions: + id-token: write + contents: read + +jobs: + generate-matrix: + uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main + with: + package-type: wheel + os: linux-aarch64 + test-infra-repository: pytorch/test-infra + test-infra-ref: main + with-cuda: disable + build: + needs: generate-matrix + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/vision + pre-script: packaging/pre_build_script.sh + post-script: packaging/post_build_script.sh + smoke-test-script: test/smoke_test.py + package-name: torchvision + name: ${{ matrix.repository }} + uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main + with: + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + post-script: ${{ matrix.post-script }} + package-name: ${{ matrix.package-name }} + smoke-test-script: ${{ matrix.smoke-test-script }} + trigger-event: ${{ github.event_name }} + architecture: aarch64 + setup-miniconda: false diff --git a/.github/workflows/build-wheels-linux.yml b/.github/workflows/build-wheels-linux.yml new file mode 100644 index 00000000000..818f32c102b --- /dev/null +++ b/.github/workflows/build-wheels-linux.yml @@ -0,0 +1,52 @@ +name: Build Linux Wheels + +on: + pull_request: + push: + branches: + - nightly + - main + - release/* + tags: + # NOTE: Binary build pipelines should only get triggered on release candidate builds + # Release candidate tags look like: v1.11.0-rc1 + - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ + workflow_dispatch: + +permissions: + id-token: write + contents: read + +jobs: + generate-matrix: + uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main + with: + package-type: wheel + os: linux + test-infra-repository: pytorch/test-infra + test-infra-ref: main + with-xpu: enable + build: + needs: generate-matrix + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/vision + pre-script: packaging/pre_build_script.sh + post-script: packaging/post_build_script.sh + smoke-test-script: test/smoke_test.py + package-name: torchvision + name: ${{ matrix.repository }} + uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main + with: + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + post-script: ${{ matrix.post-script }} + package-name: ${{ matrix.package-name }} + smoke-test-script: ${{ matrix.smoke-test-script }} + trigger-event: ${{ github.event_name }} diff --git a/.github/workflows/build-wheels-m1.yml b/.github/workflows/build-wheels-m1.yml new file mode 100644 index 00000000000..76709b755e8 --- /dev/null +++ b/.github/workflows/build-wheels-m1.yml @@ -0,0 +1,52 @@ +name: Build M1 Wheels + +on: + pull_request: + push: + branches: + - nightly + - main + - release/* + tags: + # NOTE: Binary build pipelines should only get triggered on release candidate builds + # Release candidate tags look like: v1.11.0-rc1 + - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ + workflow_dispatch: + +permissions: + id-token: write + contents: read + +jobs: + generate-matrix: + uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main + with: + package-type: wheel + os: macos-arm64 + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build: + needs: generate-matrix + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/vision + pre-script: packaging/pre_build_script.sh + post-script: packaging/post_build_script.sh + smoke-test-script: test/smoke_test.py + package-name: torchvision + name: ${{ matrix.repository }} + uses: pytorch/test-infra/.github/workflows/build_wheels_macos.yml@main + with: + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + post-script: ${{ matrix.post-script }} + package-name: ${{ matrix.package-name }} + runner-type: macos-m1-stable + smoke-test-script: ${{ matrix.smoke-test-script }} + trigger-event: ${{ github.event_name }} diff --git a/.github/workflows/build-wheels-windows.yml b/.github/workflows/build-wheels-windows.yml new file mode 100644 index 00000000000..a269aea2604 --- /dev/null +++ b/.github/workflows/build-wheels-windows.yml @@ -0,0 +1,54 @@ +name: Build Windows Wheels + +on: + pull_request: + push: + branches: + - nightly + - main + - release/* + tags: + # NOTE: Binary build pipelines should only get triggered on release candidate builds + # Release candidate tags look like: v1.11.0-rc1 + - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ + workflow_dispatch: + +permissions: + id-token: write + contents: read + +jobs: + generate-matrix: + uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main + with: + package-type: wheel + os: windows + test-infra-repository: pytorch/test-infra + test-infra-ref: main + with-xpu: enable + build: + needs: generate-matrix + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/vision + pre-script: packaging/pre_build_script.sh + env-script: packaging/windows/internal/vc_env_helper.bat + post-script: "python packaging/wheel/relocate.py" + smoke-test-script: test/smoke_test.py + package-name: torchvision + name: ${{ matrix.repository }} + uses: pytorch/test-infra/.github/workflows/build_wheels_windows.yml@main + with: + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + env-script: ${{ matrix.env-script }} + post-script: ${{ matrix.post-script }} + package-name: ${{ matrix.package-name }} + smoke-test-script: ${{ matrix.smoke-test-script }} + trigger-event: ${{ github.event_name }} diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 00000000000..f6ec4201da3 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,129 @@ +name: Docs + +on: + pull_request: + push: + branches: + - nightly + - main + - release/* + tags: + - v[0-9]+.[0-9]+.[0-9] + - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ + workflow_dispatch: + +jobs: + build: + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + with: + repository: pytorch/vision + upload-artifact: docs + test-infra-ref: main + script: | + set -euo pipefail + + export PYTHON_VERSION=3.10 + export GPU_ARCH_TYPE=cpu + export GPU_ARCH_VERSION='' + ./.github/scripts/setup-env.sh + + # Prepare conda + CONDA_PATH=$(which conda) + eval "$(${CONDA_PATH} shell.bash hook)" + conda activate ci + # FIXME: not sure why we need this. `ldd torchvision/video_reader.so` shows that it + # already links against the one pulled from conda. However, at runtime it pulls from + # /lib64 + # Should we maybe always do this in `./.github/scripts/setup-env.sh` so that we don't + # have to pay attention in all other workflows? + export LD_LIBRARY_PATH="${CONDA_PREFIX}/lib:${LD_LIBRARY_PATH}" + + cd docs + + echo '::group::Install doc requirements' + pip install --progress-bar=off -r requirements.txt + echo '::endgroup::' + + if [[ ${{ github.event_name }} == push && (${{ github.ref_type }} == tag || (${{ github.ref_type }} == branch && ${{ github.ref_name }} == release/*)) ]]; then + echo '::group::Enable version string sanitization' + # This environment variable just has to exist and must not be empty. The actual value is arbitrary. + # See docs/source/conf.py for details + export TORCHVISION_SANITIZE_VERSION_STR_IN_DOCS=1 + echo '::endgroup::' + fi + + # The runner does not have sufficient memory to run with as many processes as there are + # cores (`-j auto`). Thus, we limit to a single process (`-j 1`) here. + sed -i -e 's/-j auto/-j 1/' Makefile + make html + + # Below is an imperfect way for us to add "try on Colab" links to all of our gallery examples. + # sphinx-gallery will convert all gallery examples to .ipynb notebooks and stores them in + # build/html/_downloads//.ipynb + # We copy all those ipynb files in a more convenient folder so that we can more easily link to them. + mkdir build/html/_generated_ipynb_notebooks + for file in `find build/html/_downloads`; do + if [[ $file == *.ipynb ]]; then + cp $file build/html/_generated_ipynb_notebooks/ + fi + done + + cp -r build/html "${RUNNER_ARTIFACT_DIR}" + + # On PRs we also want to upload the docs into our S3 bucket for preview. + if [[ ${{ github.event_name == 'pull_request' }} ]]; then + cp -r build/html/* "${RUNNER_DOCS_DIR}" + fi + + upload: + needs: build + if: github.repository == 'pytorch/vision' && github.event_name == 'push' && + ((github.ref_type == 'branch' && github.ref_name == 'main') || github.ref_type == 'tag') + permissions: + contents: write + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + with: + repository: pytorch/vision + download-artifact: docs + ref: gh-pages + test-infra-ref: main + script: | + set -euo pipefail + + REF_TYPE=${{ github.ref_type }} + REF_NAME=${{ github.ref_name }} + + if [[ "${REF_TYPE}" == branch ]]; then + TARGET_FOLDER="${REF_NAME}" + elif [[ "${REF_TYPE}" == tag ]]; then + case "${REF_NAME}" in + *-rc*) + echo "Aborting upload since this is an RC tag: ${REF_NAME}" + exit 0 + ;; + *) + # Strip the leading "v" as well as the trailing patch version. For example: + # 'v0.15.2' -> '0.15' + TARGET_FOLDER=$(echo "${REF_NAME}" | sed 's/v\([0-9]\+\)\.\([0-9]\+\)\.[0-9]\+/\1.\2/') + ;; + esac + fi + echo "Target Folder: ${TARGET_FOLDER}" + + mkdir -p "${TARGET_FOLDER}" + rm -rf "${TARGET_FOLDER}"/* + mv "${RUNNER_ARTIFACT_DIR}"/html/* "${TARGET_FOLDER}" + git add "${TARGET_FOLDER}" || true + + if [[ "${TARGET_FOLDER}" == main ]]; then + mkdir -p _static + rm -rf _static/* + cp -r "${TARGET_FOLDER}"/_static/* _static + git add _static || true + fi + + git config user.name 'pytorchbot' + git config user.email 'soumith+bot@pytorch.org' + git config http.postBuffer 524288000 + git commit -m "auto-generating sphinx docs" || true + git push diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 00000000000..22ada97fba8 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,102 @@ +name: Lint + +on: + pull_request: + push: + branches: + - nightly + - main + - release/* + workflow_dispatch: + +jobs: + python-source-and-configs: + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + with: + repository: pytorch/vision + test-infra-ref: main + script: | + set -euo pipefail + + echo '::group::Setup environment' + CONDA_PATH=$(which conda) + eval "$(${CONDA_PATH} shell.bash hook)" + conda create --name ci --quiet --yes python=3.9 pip + conda activate ci + echo '::endgroup::' + + echo '::group::Install lint tools' + pip install --progress-bar=off pre-commit + echo '::endgroup::' + + set +e + pre-commit run --all-files + + if [ $? -ne 0 ]; then + git --no-pager diff + exit 1 + fi + + c-source: + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + with: + repository: pytorch/vision + test-infra-ref: main + script: | + set -euo pipefail + + echo '::group::Setup environment' + CONDA_PATH=$(which conda) + eval "$(${CONDA_PATH} shell.bash hook)" + conda create --name ci --quiet --yes -c conda-forge python=3.9 clang-format + conda activate ci + echo '::endgroup::' + + + echo '::group::Lint C source' + set +e + ./.github/scripts/run-clang-format.py -r torchvision/csrc --exclude "torchvision/csrc/io/image/cpu/giflib/*" + + if [ $? -ne 0 ]; then + git --no-pager diff + exit 1 + fi + echo '::endgroup::' + + + python-types: + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + with: + repository: pytorch/vision + test-infra-ref: main + script: | + set -euo pipefail + + export PYTHON_VERSION=3.11 + export GPU_ARCH_TYPE=cpu + export GPU_ARCH_VERSION='' + + ./.github/scripts/setup-env.sh + + CONDA_PATH=$(which conda) + eval "$(${CONDA_PATH} shell.bash hook)" + conda activate ci + + echo '::group::Install lint tools' + pip install --progress-bar=off "mypy==1.13.0" + echo '::endgroup::' + + echo '::group::Lint Python types' + mypy --install-types --non-interactive --config-file mypy.ini + echo '::endgroup::' + + bc: + if: github.event.pull_request + runs-on: ubuntu-latest + steps: + - name: Run BC Lint Action + uses: pytorch/test-infra/.github/actions/bc-lint@main + with: + repo: ${{ github.event.pull_request.head.repo.full_name }} + base_sha: ${{ github.event.pull_request.base.sha }} + head_sha: ${{ github.event.pull_request.head.sha }} diff --git a/.github/workflows/pr-labels.yml b/.github/workflows/pr-labels.yml new file mode 100644 index 00000000000..bf6349ab02e --- /dev/null +++ b/.github/workflows/pr-labels.yml @@ -0,0 +1,40 @@ +name: pr-labels + +on: + push: + branches: + - main + +jobs: + is-properly-labeled: + runs-on: ubuntu-latest + permissions: + pull-requests: write + + steps: + - name: Set up python + uses: actions/setup-python@v5 + + - name: Install requests + run: pip install requests + + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Process commit and find merger responsible for labeling + id: commit + run: | + MERGER=$(python .github/process_commit.py ${{ github.sha }}) + echo "merger=${MERGER}" | tee --append $GITHUB_OUTPUT + + - name: Ping merger responsible for labeling if necessary + if: ${{ steps.commit.outputs.merger != '' }} + uses: mshick/add-pr-comment@v2 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + message: | + Hey ${{ steps.commit.outputs.merger }}! + + You merged this PR, but no labels were added. + The list of valid labels is available at https://github.com/pytorch/vision/blob/main/.github/process_commit.py diff --git a/.github/workflows/prototype-tests-linux-gpu.yml b/.github/workflows/prototype-tests-linux-gpu.yml new file mode 100644 index 00000000000..e1d6498761b --- /dev/null +++ b/.github/workflows/prototype-tests-linux-gpu.yml @@ -0,0 +1,57 @@ +name: Prototype tests on Linux + +# IMPORTANT: This workflow has been manually disabled from the GitHub interface +# in June 2024. The file is kept for reference in case we ever put this back. + +on: + pull_request: + +jobs: + unittests-prototype: + strategy: + matrix: + python-version: + - "3.9" + - "3.10" + - "3.11" + - "3.12" + runner: ["linux.12xlarge"] + gpu-arch-type: ["cpu"] + include: + - python-version: "3.9" + runner: linux.g5.4xlarge.nvidia.gpu + gpu-arch-type: cuda + gpu-arch-version: "11.8" + fail-fast: false + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + with: + repository: pytorch/vision + runner: ${{ matrix.runner }} + gpu-arch-type: ${{ matrix.gpu-arch-type }} + gpu-arch-version: ${{ matrix.gpu-arch-version }} + timeout: 120 + script: | + set -euo pipefail + + export PYTHON_VERSION=${{ matrix.python-version }} + export GPU_ARCH_TYPE=${{ matrix.gpu-arch-type }} + export GPU_ARCH_VERSION=${{ matrix.gpu-arch-version }} + ./.github/scripts/setup-env.sh + + # Prepare conda + CONDA_PATH=$(which conda) + eval "$(${CONDA_PATH} shell.bash hook)" + conda activate ci + + echo '::group::Install testing utilities' + pip install --progress-bar=off pytest pytest-mock pytest-cov + echo '::endgroup::' + + # We don't want to run the prototype datasets tests. Since the positional glob into `pytest`, i.e. + # `test/test_prototype*.py` takes the highest priority, neither `--ignore` nor `--ignore-glob` can help us here. + rm test/test_prototype_datasets*.py + pytest \ + -v --durations=25 \ + --cov=torchvision/prototype --cov-report=term-missing \ + --junit-xml="${RUNNER_TEST_RESULTS_DIR}/test-results.xml" \ + test/test_prototype_*.py diff --git a/.github/workflows/tests-schedule.yml b/.github/workflows/tests-schedule.yml new file mode 100644 index 00000000000..3cba2ef59d8 --- /dev/null +++ b/.github/workflows/tests-schedule.yml @@ -0,0 +1,60 @@ +name: tests + +on: + pull_request: + paths: + - "test/test_datasets_download.py" + - ".github/failed_schedule_issue_template.md" + - ".github/workflows/tests-schedule.yml" + + schedule: + - cron: "0 9 * * *" + +jobs: + download: + runs-on: ubuntu-latest + + steps: + - name: Set up python + uses: actions/setup-python@v2 + with: + python-version: 3.9 + + - name: Upgrade system packages + run: python -m pip install --upgrade pip setuptools wheel + + - name: SSL + run: python -c 'import ssl; print(ssl.OPENSSL_VERSION)' + + - name: Checkout repository + uses: actions/checkout@v2 + + - name: TODO REMOVE THIS! Install non pre-release version of mpmath. + run: pip install "mpmath<1.4" + + - name: Install torch nightly build + run: pip install --pre torch -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html + + - name: Install torchvision + run: pip install --no-build-isolation --editable . + + - name: Install all optional dataset requirements + run: pip install scipy pycocotools lmdb gdown + + - name: Install tests requirements + run: pip install pytest + + - name: Run tests + run: pytest -ra -v test/test_datasets_download.py + + - uses: JasonEtco/create-an-issue@v2.4.0 + name: Create issue if download tests failed + if: failure() && github.event_name == 'schedule' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO: ${{ github.repository }} + WORKFLOW: ${{ github.workflow }} + JOB: ${{ github.job }} + ID: ${{ github.run_id }} + with: + filename: .github/failed_schedule_issue_template.md diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 00000000000..b4a74733967 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,170 @@ +name: Tests + +on: + pull_request: + push: + branches: + - nightly + - main + - release/* + workflow_dispatch: + +jobs: + unittests-linux: + strategy: + matrix: + python-version: + - "3.9" + - "3.10" + - "3.11" + - "3.12" + runner: ["linux.12xlarge"] + gpu-arch-type: ["cpu"] + include: + - python-version: 3.9 + runner: linux.g5.4xlarge.nvidia.gpu + gpu-arch-type: cuda + gpu-arch-version: "11.8" + fail-fast: false + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + with: + repository: pytorch/vision + runner: ${{ matrix.runner }} + gpu-arch-type: ${{ matrix.gpu-arch-type }} + gpu-arch-version: ${{ matrix.gpu-arch-version }} + timeout: 120 + test-infra-ref: main + script: | + set -euo pipefail + + export PYTHON_VERSION=${{ matrix.python-version }} + export GPU_ARCH_TYPE=${{ matrix.gpu-arch-type }} + export GPU_ARCH_VERSION=${{ matrix.gpu-arch-version }} + + ./.github/scripts/unittest.sh + + unittests-macos: + strategy: + matrix: + python-version: + - "3.9" + - "3.10" + - "3.11" + - "3.12" + runner: ["macos-m1-stable"] + fail-fast: false + uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + with: + repository: pytorch/vision + timeout: 240 + runner: ${{ matrix.runner }} + test-infra-ref: main + script: | + set -euo pipefail + + export PYTHON_VERSION=${{ matrix.python-version }} + export GPU_ARCH_TYPE=cpu + export GPU_ARCH_VERSION='' + + ${CONDA_RUN} ./.github/scripts/unittest.sh + + unittests-windows: + strategy: + matrix: + python-version: + - "3.9" + - "3.10" + - "3.11" + - "3.12" + runner: ["windows.4xlarge"] + gpu-arch-type: ["cpu"] + include: + - python-version: "3.9" + runner: windows.g5.4xlarge.nvidia.gpu + gpu-arch-type: cuda + gpu-arch-version: "11.8" + fail-fast: false + uses: pytorch/test-infra/.github/workflows/windows_job.yml@main + with: + repository: pytorch/vision + runner: ${{ matrix.runner }} + gpu-arch-type: ${{ matrix.gpu-arch-type }} + gpu-arch-version: ${{ matrix.gpu-arch-version }} + timeout: 120 + test-infra-ref: main + script: | + set -euxo pipefail + + export PYTHON_VERSION=${{ matrix.python-version }} + export VC_YEAR=2019 + export VSDEVCMD_ARGS="" + export GPU_ARCH_TYPE=${{ matrix.gpu-arch-type }} + export GPU_ARCH_VERSION=${{ matrix.gpu-arch-version }} + + ./.github/scripts/unittest.sh + + onnx: + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + with: + repository: pytorch/vision + test-infra-ref: main + script: | + set -euo pipefail + + export PYTHON_VERSION=3.10 + export GPU_ARCH_TYPE=cpu + export GPU_ARCH_VERSION='' + + ./.github/scripts/setup-env.sh + + # Prepare conda + CONDA_PATH=$(which conda) + eval "$(${CONDA_PATH} shell.bash hook)" + conda activate ci + + echo '::group::Install ONNX' + pip install --progress-bar=off onnx onnxruntime + echo '::endgroup::' + + echo '::group::Install testing utilities' + pip install --progress-bar=off pytest "numpy<2" + echo '::endgroup::' + + echo '::group::Run ONNX tests' + pytest --junit-xml="${RUNNER_TEST_RESULTS_DIR}/test-results.xml" -v --durations=25 test/test_onnx.py + echo '::endgroup::' + + unittests-extended: + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + if: contains(github.event.pull_request.labels.*.name, 'run-extended') + with: + repository: pytorch/vision + test-infra-ref: main + script: | + set -euo pipefail + + export PYTHON_VERSION=3.9 + export GPU_ARCH_TYPE=cpu + export GPU_ARCH_VERSION='' + + ./.github/scripts/setup-env.sh + + # Prepare conda + CONDA_PATH=$(which conda) + eval "$(${CONDA_PATH} shell.bash hook)" + conda activate ci + + echo '::group::Pre-download model weights' + pip install --progress-bar=off aiohttp aiofiles tqdm + python scripts/download_model_urls.py + echo '::endgroup::' + + echo '::group::Install testing utilities' + # TODO: remove the <8 constraint on pytest when https://github.com/pytorch/vision/issues/8238 is closed + pip install --progress-bar=off "pytest<8" + echo '::endgroup::' + + echo '::group::Run extended unittests' + export PYTORCH_TEST_WITH_EXTENDED=1 + pytest --junit-xml="${RUNNER_TEST_RESULTS_DIR}/test-results.xml" -v --durations=25 test/test_extended_*.py + echo '::endgroup::' diff --git a/.gitignore b/.gitignore index 5f483c84327..c2d4d2a1c42 100644 --- a/.gitignore +++ b/.gitignore @@ -9,7 +9,17 @@ torchvision/version.py */**/**/*.pyc */**/*~ *~ + docs/build +# sphinx-gallery +docs/source/auto_examples/ +docs/source/gen_modules/ +docs/source/generated/ +docs/source/models/generated/ +docs/source/sg_execution_times.rst +# pytorch-sphinx-theme gets installed here +docs/src + .coverage htmlcov .*.swp @@ -20,3 +30,18 @@ htmlcov *.swp *.swo gen.yml +.mypy_cache +.vscode/ +.idea/ +*.orig +*-checkpoint.ipynb +*.venv + +## Xcode User settings +xcuserdata/ + +# direnv +.direnv +.envrc + +scripts/release_notes/data.json diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000000..762ebf6fce0 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,32 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.0.1 + hooks: + - id: check-docstring-first + - id: check-toml + - id: check-yaml + exclude: packaging/.* + args: + - --allow-multiple-documents + - id: mixed-line-ending + args: [--fix=lf] + - id: end-of-file-fixer + + - repo: https://github.com/omnilib/ufmt + rev: v1.3.3 + hooks: + - id: ufmt + additional_dependencies: + - black == 22.3.0 + - usort == 1.0.2 + + - repo: https://github.com/PyCQA/flake8 + rev: 5.0.4 + hooks: + - id: flake8 + args: [--config=setup.cfg] + + - repo: https://github.com/PyCQA/pydocstyle + rev: 6.1.1 + hooks: + - id: pydocstyle diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 69c34f0f690..00000000000 --- a/.travis.yml +++ /dev/null @@ -1,86 +0,0 @@ -language: python - -dist: xenial -matrix: - include: - - env: FORMAT_CHECK - language: cpp - addons: - apt: - sources: - - llvm-toolchain-xenial-7 - packages: - - clang-7 - - clang-format-7 - before_install: skip - install: skip - script: ./travis-scripts/run-clang-format/run-clang-format.py -r torchvision/csrc - - env: LINT_CHECK - python: "2.7" - install: pip install flake8 typing - script: flake8 --exclude .circleci - after_success: [] - - env: LINT_CHECK - python: "3.6" - install: pip install flake8 typing - script: flake8 .circleci - after_success: [] - - python: "2.7" - env: IMAGE_BACKEND=Pillow-SIMD - - python: "2.7" - - python: "3.6" - env: IMAGE_BACKEND=Pillow-SIMD - - python: "3.6" - -before_install: - - sudo apt-get update - - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; - - bash miniconda.sh -b -p $HOME/miniconda - - export PATH="$HOME/miniconda/bin:$PATH" - - hash -r - - conda config --set always_yes yes --set changeps1 no - # Useful for debugging any issues with conda - - conda info -a - - - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION pytorch scipy -c pytorch-nightly - - source activate test-environment - - | - if [[ "$IMAGE_BACKEND" == "Pillow-SIMD" ]]; then - pip uninstall -y pillow && CC="cc -march=native" pip install --force-reinstall pillow-simd - fi - - pip install future - - pip install pytest pytest-cov codecov - - pip install mock - - pip install typing - - | - if [[ $TRAVIS_PYTHON_VERSION == 3.6 ]]; then - pip install onnxruntime - fi - - conda install av -c conda-forge - - -install: - # Using pip instead of setup.py ensures we install a non-compressed version of the package - # (as opposed to an egg), which is necessary to collect coverage. - # We still get the benefit of testing an installed version over the - # test version to iron out installation file-inclusion bugs but can - # also collect coverage. - - pip install . - # Move to home dir, otherwise we'll end up with the path to the - # package in $PWD rather than the installed v - - | - cd $HOME - export TV_INSTALL_PATH="$(python -c 'import os; import torchvision; print(os.path.dirname(os.path.abspath(torchvision.__file__)))')" - echo "$TV_INSTALL_PATH" - cd - - -script: - - pytest --cov-config .coveragerc --cov torchvision --cov $TV_INSTALL_PATH -k 'not TestVideoReader and not TestVideoTransforms' test - - pytest test/test_hub.py - -after_success: - # Necessary to run coverage combine to rewrite paths from - # /travis/env/path/site-packages/torchvision to actual path - - coverage combine .coverage - - coverage report - - codecov diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 00000000000..37db28b2bad --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,14 @@ +cff-version: 1.2.0 +title: "TorchVision: PyTorch's Computer Vision library" +message: >- + If you find TorchVision useful in your work, please + consider citing the following BibTeX entry. +type: software +authors: + - given-names: TorchVision maintainers and contributors +url: "https://github.com/pytorch/vision" +license: "BSD-3-Clause" +date-released: "2016-11-06" +journal: "GitHub repository" +publisher: "GitHub" +key: "torchvision2016" diff --git a/CMakeLists.txt b/CMakeLists.txt index df77482c870..f2430559909 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,16 +1,180 @@ -cmake_minimum_required(VERSION 2.8) +cmake_minimum_required(VERSION 3.18) project(torchvision) -set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD 17) +file(STRINGS version.txt TORCHVISION_VERSION) + +option(WITH_CUDA "Enable CUDA support" OFF) +option(WITH_MPS "Enable MPS support" OFF) +option(WITH_PNG "Enable features requiring LibPNG." ON) +option(WITH_JPEG "Enable features requiring LibJPEG." ON) +# Libwebp is disabled by default, which means enabling it from cmake is largely +# untested. Since building from cmake is very low pri anyway, this is OK. If +# you're a user and you need this, please open an issue (and a PR!). +option(WITH_WEBP "Enable features requiring LibWEBP." OFF) +# Same here +option(WITH_AVIF "Enable features requiring LibAVIF." OFF) + +if(WITH_CUDA) + enable_language(CUDA) + add_definitions(-D__CUDA_NO_HALF_OPERATORS__) + add_definitions(-DWITH_CUDA) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr") +endif() + +if(WITH_MPS) + enable_language(OBJC OBJCXX) + add_definitions(-DWITH_MPS) +endif() find_package(Torch REQUIRED) -file(GLOB HEADERS torchvision/csrc/vision.h) -file(GLOB MODELS_HEADERS torchvision/csrc/models/*.h) -file(GLOB MODELS_SOURCES torchvision/csrc/models/*.h torchvision/csrc/models/*.cpp) +if (WITH_PNG) + add_definitions(-DPNG_FOUND) + find_package(PNG REQUIRED) +endif() + +if (WITH_JPEG) + add_definitions(-DJPEG_FOUND) + find_package(JPEG REQUIRED) +endif() + +if (WITH_WEBP) + add_definitions(-DWEBP_FOUND) + find_package(WEBP REQUIRED) +endif() + +if (WITH_AVIF) + add_definitions(-DAVIF_FOUND) + find_package(AVIF REQUIRED) +endif() + +function(CUDA_CONVERT_FLAGS EXISTING_TARGET) + get_property(old_flags TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_COMPILE_OPTIONS) + if(NOT "${old_flags}" STREQUAL "") + string(REPLACE ";" "," CUDA_flags "${old_flags}") + set_property(TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_COMPILE_OPTIONS + "$<$>:${old_flags}>$<$>:-Xcompiler=${CUDA_flags}>" + ) + endif() +endfunction() + +if(MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4819") + if(WITH_CUDA) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=/wd4819") + foreach(diag cc_clobber_ignored integer_sign_change useless_using_declaration + set_but_not_used field_without_dll_interface + base_class_has_different_dll_interface + dll_interface_conflict_none_assumed + dll_interface_conflict_dllexport_assumed + implicit_return_from_non_void_function + unsigned_compare_with_zero + declared_but_not_referenced + bad_friend_decl) + string(APPEND CMAKE_CUDA_FLAGS " -Xcudafe --diag_suppress=${diag}") + endforeach() + CUDA_CONVERT_FLAGS(torch_cpu) + if(TARGET torch_cuda) + CUDA_CONVERT_FLAGS(torch_cuda) + endif() + if(TARGET torch_cuda_cu) + CUDA_CONVERT_FLAGS(torch_cuda_cu) + endif() + if(TARGET torch_cuda_cpp) + CUDA_CONVERT_FLAGS(torch_cuda_cpp) + endif() + endif() +endif() + +include(GNUInstallDirs) +include(CMakePackageConfigHelpers) + +set(TVCPP torchvision/csrc) +list(APPEND ALLOW_LISTED ${TVCPP} ${TVCPP}/io/image ${TVCPP}/io/image/cpu ${TVCPP}/io/image/cpu/giflib ${TVCPP}/models ${TVCPP}/ops + ${TVCPP}/ops/autograd ${TVCPP}/ops/cpu ${TVCPP}/io/image/cuda) +if(WITH_CUDA) + list(APPEND ALLOW_LISTED ${TVCPP}/ops/cuda ${TVCPP}/ops/autocast) +endif() +if(WITH_MPS) + list(APPEND ALLOW_LISTED ${TVCPP}/ops/mps) +endif() + +FOREACH(DIR ${ALLOW_LISTED}) + file(GLOB ALL_SOURCES ${ALL_SOURCES} ${DIR}/*.*) +ENDFOREACH() + +add_library(${PROJECT_NAME} SHARED ${ALL_SOURCES}) +target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIBRARIES}) + +if(WITH_MPS) + find_library(metal NAMES Metal) + find_library(foundation NAMES Foundation) + target_link_libraries(${PROJECT_NAME} PRIVATE ${metal} ${foundation}) +endif() + +if (WITH_PNG) + target_link_libraries(${PROJECT_NAME} PRIVATE ${PNG_LIBRARY}) +endif() + +if (WITH_JPEG) + target_link_libraries(${PROJECT_NAME} PRIVATE ${JPEG_LIBRARIES}) +endif() + +if (WITH_WEBP) + target_link_libraries(${PROJECT_NAME} PRIVATE ${WEBP_LIBRARIES}) +endif() + +if (WITH_AVIF) + target_link_libraries(${PROJECT_NAME} PRIVATE ${AVIF_LIBRARIES}) +endif() + +set_target_properties(${PROJECT_NAME} PROPERTIES + EXPORT_NAME TorchVision + INSTALL_RPATH ${TORCH_INSTALL_PREFIX}/lib) + +include_directories(torchvision/csrc) + +if (WITH_PNG) + include_directories(${PNG_INCLUDE_DIRS}) +endif() + +if (WITH_JPEG) + include_directories(${JPEG_INCLUDE_DIRS}) +endif() + +if (WITH_WEBP) + include_directories(${WEBP_INCLUDE_DIRS}) +endif() + +if (WITH_AVIF) + include_directories(${AVIF_INCLUDE_DIRS}) +endif() + +set(TORCHVISION_CMAKECONFIG_INSTALL_DIR "share/cmake/TorchVision" CACHE STRING "install path for TorchVisionConfig.cmake") + +configure_package_config_file(cmake/TorchVisionConfig.cmake.in + "${CMAKE_CURRENT_BINARY_DIR}/TorchVisionConfig.cmake" + INSTALL_DESTINATION ${TORCHVISION_CMAKECONFIG_INSTALL_DIR}) + +write_basic_package_version_file(${CMAKE_CURRENT_BINARY_DIR}/TorchVisionConfigVersion.cmake + VERSION ${TORCHVISION_VERSION} + COMPATIBILITY AnyNewerVersion) + +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/TorchVisionConfig.cmake + ${CMAKE_CURRENT_BINARY_DIR}/TorchVisionConfigVersion.cmake + DESTINATION ${TORCHVISION_CMAKECONFIG_INSTALL_DIR}) + +install(TARGETS ${PROJECT_NAME} + EXPORT TorchVisionTargets + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) -add_library (${PROJECT_NAME} SHARED ${MODELS_SOURCES}) -target_link_libraries(${PROJECT_NAME} PUBLIC "${TORCH_LIBRARIES}") +install(EXPORT TorchVisionTargets + NAMESPACE TorchVision:: + DESTINATION ${TORCHVISION_CMAKECONFIG_INSTALL_DIR}) -install(TARGETS ${PROJECT_NAME} DESTINATION ${CMAKE_INSTALL_PREFIX}/lib) -install(FILES ${HEADERS} DESTINATION ${CMAKE_INSTALL_PREFIX}/include/${PROJECT_NAME}) -install(FILES ${MODELS_HEADERS} DESTINATION ${CMAKE_INSTALL_PREFIX}/include/${PROJECT_NAME}/models) +FOREACH(INPUT_DIR ${ALLOW_LISTED}) + string(REPLACE "${TVCPP}" "${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}" OUTPUT_DIR ${INPUT_DIR}) + file(GLOB INPUT_FILES ${INPUT_DIR}/*.*) + install(FILES ${INPUT_FILES} DESTINATION ${OUTPUT_DIR}) +ENDFOREACH() diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000000..b91e23b17c0 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,76 @@ +# Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to make participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, sex characteristics, gender identity and expression, +level of experience, education, socio-economic status, nationality, personal +appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or +advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic +address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a +professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies within all project spaces, and it also applies when +an individual is representing the project or its community in public spaces. +Examples of representing a project or community include using an official +project e-mail address, posting via an official social media account, or acting +as an appointed representative at an online or offline event. Representation of +a project may be further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at . All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see +https://www.contributor-covenant.org/faq diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000000..41ecd860055 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,234 @@ +# Contributing to Torchvision + +We want to make contributing to this project as easy and transparent as possible. + +## TL;DR + +We appreciate all contributions. If you are interested in contributing to Torchvision, there are many ways to help out. +Your contributions may fall into the following categories: + +- It helps the project if you could + - Report issues you're facing + - Give a :+1: on issues that others reported and that are relevant to you + +- Answering queries on the issue tracker, investigating bugs are very valuable contributions to the project. + +- You would like to improve the documentation. This is no less important than improving the library itself! +If you find a typo in the documentation, do not hesitate to submit a GitHub pull request. + +- If you would like to fix a bug + - please pick one from the [list of open issues labelled as "help wanted"](https://github.com/pytorch/vision/issues?q=is%3Aopen+is%3Aissue+label%3A%22help+wanted%22) + - comment on the issue that you want to work on this issue + - send a PR with your fix, see below. + +- If you plan to contribute new features, utility functions or extensions, please first open an issue and discuss the feature with us. + +## Issues + +We use GitHub issues to track public bugs. Please ensure your description is +clear and has sufficient instructions to be able to reproduce the issue. + +## Development installation + + +### Dependencies + +Start by installing the **nightly** build of PyTorch following the [official +instructions](https://pytorch.org/get-started/locally/). Note that the official +instructions may ask you to install torchvision itself. If you are doing development +on torchvision, you should not install prebuilt torchvision packages. + +**Optionally**, install `libpng` and `libjpeg-turbo` if you want to enable +support for +native encoding / decoding of PNG and JPEG formats in +[torchvision.io](https://pytorch.org/vision/stable/io.html#image): + +```bash +conda install libpng libjpeg-turbo -c pytorch +``` + +Note: you can use the `TORCHVISION_INCLUDE` and `TORCHVISION_LIBRARY` +environment variables to tell the build system where to find those libraries if +they are in specific locations. Take a look at +[setup.py](https://github.com/pytorch/vision/blob/main/setup.py) for more +details. + +### Clone and install torchvision + +```bash +git clone https://github.com/pytorch/vision.git +cd vision +python setup.py develop # use install instead of develop if you don't care about development. +# or, for OSX +# MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py develop +# for C++ debugging, use DEBUG=1 +# DEBUG=1 python setup.py develop +``` + +By default, GPU support is built if CUDA is found and `torch.cuda.is_available()` is true. It's possible to force +building GPU support by setting `FORCE_CUDA=1` environment variable, which is useful when building a docker image. + +We don't officially support building from source using `pip`, but _if_ you do, you'll need to use the +`--no-build-isolation` flag. + +#### Other development dependencies (some of these are needed to run tests): + +``` +pip install expecttest flake8 typing mypy pytest pytest-mock scipy requests +``` + +## Development Process + +If you plan to modify the code or documentation, please follow the steps below: + +1. Fork the repository and create your branch from `main`. +2. If you have modified the code (new feature or bug-fix), please add unit tests. +3. If you have changed APIs, update the documentation. Make sure the documentation builds. +4. Ensure the test suite passes. +5. Make sure your code passes the formatting checks (see below). + +For more details about pull requests, +please read [GitHub's guides](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request). + +If you would like to contribute a new model, please see [here](#New-architecture-or-improved-model-weights). + +If you would like to contribute a new dataset, please see [here](#New-dataset). + +### Code formatting and typing + +#### Formatting + +The torchvision code is formatted by [black](https://black.readthedocs.io/en/stable/), +and checked against pep8 compliance with [flake8](https://flake8.pycqa.org/en/latest/). +Instead of relying directly on `black` however, we rely on +[ufmt](https://github.com/omnilib/ufmt), for compatibility reasons with Facebook +internal infrastructure. + +To format your code, install `ufmt` with `pip install ufmt==1.3.3 black==22.3.0 usort==1.0.2` and use e.g.: + +```bash +ufmt format torchvision +``` + +For the vast majority of cases, this is all you should need to run. For the +formatting to be a bit faster, you can also choose to only apply `ufmt` to the +files that were edited in your PR with e.g.: + +```bash +ufmt format `git diff main --name-only` +``` + +Similarly, you can check for `flake8` errors with `flake8 torchvision`, although +they should be fairly rare considering that most of the errors are automatically +taken care of by `ufmt` already. + +##### Pre-commit hooks + +For convenience and **purely optionally**, you can rely on [pre-commit +hooks](https://pre-commit.com/) which will run both `ufmt` and `flake8` prior to +every commit. + +First install the `pre-commit` package with `pip install pre-commit`, and then +run `pre-commit install` at the root of the repo for the hooks to be set up - +that's it. + +Feel free to read the [pre-commit docs](https://pre-commit.com/#usage) to learn +more and improve your workflow. You'll see for example that `pre-commit run +--all-files` will run both `ufmt` and `flake8` without the need for you to +commit anything, and that the `--no-verify` flag can be added to `git commit` to +temporarily deactivate the hooks. + +#### Type annotations + +The codebase has type annotations, please make sure to add type hints if required. We use `mypy` tool for type checking: +```bash +mypy --config-file mypy.ini +``` + +### Unit tests + +Before running tests make sure to install [test dependencies](#other-development-dependencies-some-of-these-are-needed-to-run-tests). + +If you have modified the code by adding a new feature or a bug-fix, please add unit tests for that. To run a specific +test: +```bash +pytest test/ -vvv -k +# e.g. pytest test/test_transforms.py -vvv -k test_center_crop +``` + +If you would like to run all tests: +```bash +pytest test -vvv +``` + +Tests that require internet access should be in +`test/test_internet.py`. + +### Documentation + +Torchvision uses [Google style](http://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) +for formatting docstrings. Length of line inside docstrings block must be limited to 120 characters. + +Please, follow the instructions to build and deploy the documentation locally. + +#### Install requirements + +```bash +cd docs +pip install -r requirements.txt +``` + +#### Build + +```bash +cd docs +make html-noplot +``` + +Then open `docs/build/html/index.html` in your favorite browser. + +The docs are also automatically built when you submit a PR. The job that +builds the docs is named `build_docs`. You can access the rendered docs by +clicking on that job and then going to the "Artifacts" tab. + +You can clean the built docs and re-start the build from scratch by doing ``make +clean``. + +#### Building the example gallery - or not + +In most cases, running `make html-noplot` is enough to build the docs for your +specific use-case. The `noplot` part tells sphinx **not** to build the examples +in the [gallery](https://pytorch.org/vision/stable/auto_examples/index.html), +which saves a lot of building time. + +If you need to build all the examples in the gallery, then you can use `make +html`. + +You can also choose to only build a subset of the examples by using the +``EXAMPLES_PATTERN`` env variable, which accepts a regular expression. For +example ``EXAMPLES_PATTERN="transforms" make html`` will only build the examples +with "transforms" in their name. + +### New architecture or improved model weights + +Please refer to the guidelines in [Contributing to Torchvision - Models](https://github.com/pytorch/vision/blob/main/CONTRIBUTING_MODELS.md). + +### New dataset + +Please, do not send any PR with a new dataset without discussing +it in an issue as, most likely, it will not be accepted. + +### Pull Request + +If all previous checks (flake8, mypy, unit tests) are passing, please send a PR. Submitted PR will pass other tests on +different operating systems, python versions and hardware. + +For more details about pull requests workflow, +please read [GitHub's guides](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request). + +## License + +By contributing to Torchvision, you agree that your contributions will be licensed +under the LICENSE file in the root directory of this source tree. + +Contributors are also required to [sign our Contributor License Agreement](https://code.facebook.com/cla). diff --git a/CONTRIBUTING_MODELS.md b/CONTRIBUTING_MODELS.md new file mode 100644 index 00000000000..390a25a0f89 --- /dev/null +++ b/CONTRIBUTING_MODELS.md @@ -0,0 +1,65 @@ +# Contributing to Torchvision - Models + +- [New Model Architectures - Overview](#new-model-architectures---overview) + +- [New Weights for Existing Model Architectures](#new-weights-for-existing-model-architectures) + +## New Model Architectures - Overview + +For someone who would be interested in adding a model architecture, it is also expected to train the model, so here are a few important considerations: + +- Training big models requires lots of resources and the cost quickly adds up + +- Reproducing models is fun but also risky as you might not always get the results reported on the paper. It might require a huge amount of effort to close the gap + +- The contribution might not get merged if we significantly lack in terms of accuracy, speed etc + +- Including new models in TorchVision might not be the best approach, so other options such as releasing the model through to [Pytorch Hub](https://pytorch.org/hub/) should be considered + +So, before starting any work and submitting a PR there are a few critical things that need to be taken into account in order to make sure the planned contribution is within the context of TorchVision, and the requirements and expectations are discussed beforehand. If this step is skipped and a PR is submitted without prior discussion it will almost certainly be rejected. + +### 1. Preparation work + +- Start by looking into this [issue](https://github.com/pytorch/vision/issues/2707) in order to have an idea of the models that are being considered, express your willingness to add a new model and discuss with the community whether this model should be included in TorchVision. It is very important at this stage to make sure that there is an agreement on the value of having this model in TorchVision and there is no one else already working on it. + +- If the decision is to include the new model, then please create a new ticket which will be used for all design and implementation discussions prior to the PR. One of the TorchVision maintainers will reach out at this stage and this will be your POC from this point onwards in order to provide support, guidance and regular feedback. + +### 2. Implement the model + +Please take a look at existing models in TorchVision to get familiar with the idioms. Also, please look at recent contributions for new models. If in doubt about any design decisions you can ask for feedback on the issue created in step 1. Example of things to take into account: + +- The implementation should be as close as possible to the canonical implementation/paper +- The PR must include the code implementation, documentation and tests +- It should also extend the existing reference scripts used to train the model +- The weights need to reproduce closely the results of the paper in terms of accuracy, even though the final weights to be deployed will be those trained by the TorchVision maintainers +- The PR description should include commands/configuration used to train the model, so that the TorchVision maintainers can easily run them to verify the implementation and generate the final model to be released +- Make sure we re-use existing components as much as possible (inheritance) +- New primitives (transforms, losses, etc.) can be added if necessary, but the final location will be determined after discussion with the dedicated maintainer +- Please take a look at the detailed [implementation and documentation guidelines](https://github.com/pytorch/vision/issues/5319) for a fine grain list of things not to be missed + +### 3. Train the model with reference scripts + +To validate the new model against the common benchmark, as well as to generate pre-trained weights, you must use TorchVision’s reference scripts to train the model. + +Make sure all logs and a final (or best) checkpoint are saved, because it is expected that a submission shows that a model has been successfully trained and the results are in line with the original paper/repository. This will allow the reviewers to quickly check the validity of the submission, but please note that the final model to be released will be re-trained by the maintainers in order to verify reproducibility, ensure that the changes occurred during the PR review did not introduce any bugs, and to avoid moving around a large amount of data (including all checkpoints and logs). + +### 4. Submit a PR + +Submit a PR and tag the assigned maintainer. This PR should: + +- Link the original ticket +- Provide a link for the original paper and the original repository if available +- Highlight the important test metrics and how they compare to the original paper +- Highlight any design choices that deviate from the original paper/implementation and rationale for these choices + +## New Weights for Existing Model Architectures + +The process of improving existing models, for instance improving accuracy by retraining the model with a different set of hyperparameters or augmentations, is the following: + +1. Open a ticket and discuss with the community and maintainers whether this improvement should be added to TorchVision. Note that to add new weights the improvement should be significant. + +2. Train the model using TorchVision reference scripts. You can add new primitives (transforms, losses, etc) when necessary, but the final location will be determined after discussion with the dedicated maintainer. + +3. Open a PR with the new weights, together with the training logs and the checkpoint chosen so the reviewers can verify the submission. Details on how the model was trained, i.e., the training command using the reference scripts, should be included in the PR. + +4. The PR reviewers should replicate the results on their side to verify the submission and if all goes well the new weights should be ready to be released! diff --git a/MANIFEST.in b/MANIFEST.in index 75f238c0a2c..9e45188df35 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,4 @@ -include README.rst +include README.md include LICENSE recursive-exclude * __pycache__ diff --git a/README.md b/README.md new file mode 100644 index 00000000000..1076a7a186d --- /dev/null +++ b/README.md @@ -0,0 +1,128 @@ +# torchvision + +[![total torchvision downloads](https://pepy.tech/badge/torchvision)](https://pepy.tech/project/torchvision) +[![documentation](https://img.shields.io/badge/dynamic/json.svg?label=docs&url=https%3A%2F%2Fpypi.org%2Fpypi%2Ftorchvision%2Fjson&query=%24.info.version&colorB=brightgreen&prefix=v)](https://pytorch.org/vision/stable/index.html) + +The torchvision package consists of popular datasets, model architectures, and common image transformations for computer +vision. + +## Installation + +Please refer to the [official +instructions](https://pytorch.org/get-started/locally/) to install the stable +versions of `torch` and `torchvision` on your system. + +To build source, refer to our [contributing +page](https://github.com/pytorch/vision/blob/main/CONTRIBUTING.md#development-installation). + +The following is the corresponding `torchvision` versions and supported Python +versions. + +| `torch` | `torchvision` | Python | +| ------------------ | ------------------ | ------------------- | +| `main` / `nightly` | `main` / `nightly` | `>=3.9`, `<=3.12` | +| `2.5` | `0.20` | `>=3.9`, `<=3.12` | +| `2.4` | `0.19` | `>=3.8`, `<=3.12` | +| `2.3` | `0.18` | `>=3.8`, `<=3.12` | +| `2.2` | `0.17` | `>=3.8`, `<=3.11` | +| `2.1` | `0.16` | `>=3.8`, `<=3.11` | +| `2.0` | `0.15` | `>=3.8`, `<=3.11` | + +
+ older versions + +| `torch` | `torchvision` | Python | +|---------|-------------------|---------------------------| +| `1.13` | `0.14` | `>=3.7.2`, `<=3.10` | +| `1.12` | `0.13` | `>=3.7`, `<=3.10` | +| `1.11` | `0.12` | `>=3.7`, `<=3.10` | +| `1.10` | `0.11` | `>=3.6`, `<=3.9` | +| `1.9` | `0.10` | `>=3.6`, `<=3.9` | +| `1.8` | `0.9` | `>=3.6`, `<=3.9` | +| `1.7` | `0.8` | `>=3.6`, `<=3.9` | +| `1.6` | `0.7` | `>=3.6`, `<=3.8` | +| `1.5` | `0.6` | `>=3.5`, `<=3.8` | +| `1.4` | `0.5` | `==2.7`, `>=3.5`, `<=3.8` | +| `1.3` | `0.4.2` / `0.4.3` | `==2.7`, `>=3.5`, `<=3.7` | +| `1.2` | `0.4.1` | `==2.7`, `>=3.5`, `<=3.7` | +| `1.1` | `0.3` | `==2.7`, `>=3.5`, `<=3.7` | +| `<=1.0` | `0.2` | `==2.7`, `>=3.5`, `<=3.7` | + +
+ +## Image Backends + +Torchvision currently supports the following image backends: + +- torch tensors +- PIL images: + - [Pillow](https://python-pillow.org/) + - [Pillow-SIMD](https://github.com/uploadcare/pillow-simd) - a **much faster** drop-in replacement for Pillow with SIMD. + +Read more in in our [docs](https://pytorch.org/vision/stable/transforms.html). + +## [UNSTABLE] Video Backend + +Torchvision currently supports the following video backends: + +- [pyav](https://github.com/PyAV-Org/PyAV) (default) - Pythonic binding for ffmpeg libraries. +- video_reader - This needs ffmpeg to be installed and torchvision to be built from source. There shouldn't be any + conflicting version of ffmpeg installed. Currently, this is only supported on Linux. + +``` +conda install -c conda-forge 'ffmpeg<4.3' +python setup.py install +``` + +# Using the models on C++ + +Refer to [example/cpp](https://github.com/pytorch/vision/tree/main/examples/cpp). + +**DISCLAIMER**: the `libtorchvision` library includes the torchvision +custom ops as well as most of the C++ torchvision APIs. Those APIs do not come +with any backward-compatibility guarantees and may change from one version to +the next. Only the Python APIs are stable and with backward-compatibility +guarantees. So, if you need stability within a C++ environment, your best bet is +to export the Python APIs via torchscript. + +## Documentation + +You can find the API documentation on the pytorch website: + +## Contributing + +See the [CONTRIBUTING](CONTRIBUTING.md) file for how to help out. + +## Disclaimer on Datasets + +This is a utility library that downloads and prepares public datasets. We do not host or distribute these datasets, +vouch for their quality or fairness, or claim that you have license to use the dataset. It is your responsibility to +determine whether you have permission to use the dataset under the dataset's license. + +If you're a dataset owner and wish to update any part of it (description, citation, etc.), or do not want your dataset +to be included in this library, please get in touch through a GitHub issue. Thanks for your contribution to the ML +community! + +## Pre-trained Model License + +The pre-trained models provided in this library may have their own licenses or terms and conditions derived from the +dataset used for training. It is your responsibility to determine whether you have permission to use the models for your +use case. + +More specifically, SWAG models are released under the CC-BY-NC 4.0 license. See +[SWAG LICENSE](https://github.com/facebookresearch/SWAG/blob/main/LICENSE) for additional details. + +## Citing TorchVision + +If you find TorchVision useful in your work, please consider citing the following BibTeX entry: + +```bibtex +@software{torchvision2016, + title = {TorchVision: PyTorch's Computer Vision library}, + author = {TorchVision maintainers and contributors}, + year = 2016, + journal = {GitHub repository}, + publisher = {GitHub}, + howpublished = {\url{https://github.com/pytorch/vision}} +} +``` diff --git a/README.rst b/README.rst deleted file mode 100644 index 30dce8b4639..00000000000 --- a/README.rst +++ /dev/null @@ -1,89 +0,0 @@ -torchvision -=========== - -.. image:: https://travis-ci.org/pytorch/vision.svg?branch=master - :target: https://travis-ci.org/pytorch/vision - -.. image:: https://codecov.io/gh/pytorch/vision/branch/master/graph/badge.svg - :target: https://codecov.io/gh/pytorch/vision - -.. image:: https://pepy.tech/badge/torchvision - :target: https://pepy.tech/project/torchvision - -.. image:: https://img.shields.io/badge/dynamic/json.svg?label=docs&url=https%3A%2F%2Fpypi.org%2Fpypi%2Ftorchvision%2Fjson&query=%24.info.version&colorB=brightgreen&prefix=v - :target: https://pytorch.org/docs/stable/torchvision/index.html - - -The torchvision package consists of popular datasets, model architectures, and common image transformations for computer vision. - -Installation -============ - -TorchVision requires PyTorch 1.2 or newer. - -Anaconda: - -.. code:: bash - - conda install torchvision -c pytorch - -pip: - -.. code:: bash - - pip install torchvision - -From source: - -.. code:: bash - - python setup.py install - # or, for OSX - # MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py install - -By default, GPU support is built if CUDA is found and ``torch.cuda.is_available()`` is true. -It's possible to force building GPU support by setting ``FORCE_CUDA=1`` environment variable, -which is useful when building a docker image. - -Image Backend -============= -Torchvision currently supports the following image backends: - -* `Pillow`_ (default) - -* `Pillow-SIMD`_ - a **much faster** drop-in replacement for Pillow with SIMD. If installed will be used as the default. - -* `accimage`_ - if installed can be activated by calling :code:`torchvision.set_image_backend('accimage')` - -.. _Pillow : https://python-pillow.org/ -.. _Pillow-SIMD : https://github.com/uploadcare/pillow-simd -.. _accimage: https://github.com/pytorch/accimage - -C++ API -======= -TorchVision also offers a C++ API that contains C++ equivalent of python models. - -Installation From source: - -.. code:: bash - - mkdir build - cd build - cmake .. - make - make install - -Documentation -============= -You can find the API documentation on the pytorch website: http://pytorch.org/docs/master/torchvision/ - -Contributing -============ -We appreciate all contributions. If you are planning to contribute back bug-fixes, please do so without any further discussion. If you plan to contribute new features, utility functions or extensions, please first open an issue and discuss the feature with us. - -Disclaimer on Datasets -====================== - -This is a utility library that downloads and prepares public datasets. We do not host or distribute these datasets, vouch for their quality or fairness, or claim that you have license to use the dataset. It is your responsibility to determine whether you have permission to use the dataset under the dataset's license. - -If you're a dataset owner and wish to update any part of it (description, citation, etc.), or do not want your dataset to be included in this library, please get in touch through a GitHub issue. Thanks for your contribution to the ML community! diff --git a/android/.gitignore b/android/.gitignore new file mode 100644 index 00000000000..adcfad04c91 --- /dev/null +++ b/android/.gitignore @@ -0,0 +1,6 @@ +local.properties +**/*.iml +.gradle +.idea/* +.externalNativeBuild +build diff --git a/android/README.md b/android/README.md new file mode 100644 index 00000000000..788c83f26de --- /dev/null +++ b/android/README.md @@ -0,0 +1,3 @@ +## Status + +The Android demo of TorchVision is currently unmaintained, untested and likely out-of-date. diff --git a/android/build.gradle b/android/build.gradle new file mode 100644 index 00000000000..f7995a07f5b --- /dev/null +++ b/android/build.gradle @@ -0,0 +1,40 @@ +allprojects { + buildscript { + ext { + minSdkVersion = 21 + targetSdkVersion = 28 + compileSdkVersion = 28 + buildToolsVersion = '28.0.3' + + coreVersion = "1.2.0" + extJUnitVersion = "1.1.1" + runnerVersion = "1.2.0" + rulesVersion = "1.2.0" + junitVersion = "4.12" + + androidSupportAppCompatV7Version = "28.0.0" + fbjniJavaOnlyVersion = "0.0.3" + soLoaderNativeLoaderVersion = "0.10.5" + pytorchAndroidVersion = "1.12" + } + + repositories { + google() + mavenCentral() + } + + dependencies { + classpath 'com.android.tools.build:gradle:4.1.2' + classpath 'com.vanniktech:gradle-maven-publish-plugin:0.14.2' + } + } + + repositories { + google() + mavenCentral() + } +} + +ext.deps = [ + jsr305: 'com.google.code.findbugs:jsr305:3.0.1', +] diff --git a/android/gradle.properties b/android/gradle.properties new file mode 100644 index 00000000000..8204b73b051 --- /dev/null +++ b/android/gradle.properties @@ -0,0 +1,24 @@ +ABI_FILTERS=armeabi-v7a,arm64-v8a,x86,x86_64 + +VERSION_NAME=0.15.0-SNAPSHOT +GROUP=org.pytorch +MAVEN_GROUP=org.pytorch +SONATYPE_STAGING_PROFILE=orgpytorch +POM_URL=https://github.com/pytorch/vision/ +POM_SCM_URL=https://github.com/pytorch/vision.git +POM_SCM_CONNECTION=scm:git:https://github.com/pytorch/vision +POM_SCM_DEV_CONNECTION=scm:git:git@github.com:pytorch/vision.git +POM_LICENSE_NAME=BSD 3-Clause +POM_LICENSE_URL=https://github.com/pytorch/vision/blob/main/LICENSE +POM_ISSUES_URL=https://github.com/pytorch/vision/issues +POM_LICENSE_DIST=repo +POM_DEVELOPER_ID=pytorch +POM_DEVELOPER_NAME=pytorch + +# Gradle internals +android.useAndroidX=true +android.enableJetifier=true + +testAppAllVariantsEnabled=false + +org.gradle.jvmargs=-Xmx12g diff --git a/android/gradle/wrapper/gradle-wrapper.jar b/android/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 00000000000..94336fcae91 Binary files /dev/null and b/android/gradle/wrapper/gradle-wrapper.jar differ diff --git a/android/gradle/wrapper/gradle-wrapper.properties b/android/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 00000000000..442d9132ea3 --- /dev/null +++ b/android/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,5 @@ +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-6.8.3-bin.zip +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/android/gradle_scripts/android_tasks.gradle b/android/gradle_scripts/android_tasks.gradle new file mode 100644 index 00000000000..6bba126b2f6 --- /dev/null +++ b/android/gradle_scripts/android_tasks.gradle @@ -0,0 +1,11 @@ +afterEvaluate { project -> + if (POM_PACKAGING == 'aar') { + task headersJar(type: Jar) { + archiveClassifier.set('headers') + from("$rootDir/cxx/") { + include '**/*.h' + } + } + artifacts.add('archives', headersJar) + } +} diff --git a/android/gradle_scripts/release.gradle b/android/gradle_scripts/release.gradle new file mode 100644 index 00000000000..ada97f33964 --- /dev/null +++ b/android/gradle_scripts/release.gradle @@ -0,0 +1,3 @@ +apply from: rootProject.file('gradle_scripts/android_tasks.gradle') + +apply plugin: 'com.vanniktech.maven.publish' diff --git a/android/gradlew b/android/gradlew new file mode 100755 index 00000000000..cccdd3d517f --- /dev/null +++ b/android/gradlew @@ -0,0 +1,172 @@ +#!/usr/bin/env sh + +############################################################################## +## +## Gradle start up script for UN*X +## +############################################################################## + +# Attempt to set APP_HOME +# Resolve links: $0 may be a link +PRG="$0" +# Need this for relative symlinks. +while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "$PRG"`"/$link" + fi +done +SAVED="`pwd`" +cd "`dirname \"$PRG\"`/" >/dev/null +APP_HOME="`pwd -P`" +cd "$SAVED" >/dev/null + +APP_NAME="Gradle" +APP_BASE_NAME=`basename "$0"` + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS="" + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD="maximum" + +warn () { + echo "$*" +} + +die () { + echo + echo "$*" + echo + exit 1 +} + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "`uname`" in + CYGWIN* ) + cygwin=true + ;; + Darwin* ) + darwin=true + ;; + MINGW* ) + msys=true + ;; + NONSTOP* ) + nonstop=true + ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + else + JAVACMD="$JAVA_HOME/bin/java" + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD="java" + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then + MAX_FD_LIMIT=`ulimit -H -n` + if [ $? -eq 0 ] ; then + if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then + MAX_FD="$MAX_FD_LIMIT" + fi + ulimit -n $MAX_FD + if [ $? -ne 0 ] ; then + warn "Could not set maximum file descriptor limit: $MAX_FD" + fi + else + warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" + fi +fi + +# For Darwin, add options to specify how the application appears in the dock +if $darwin; then + GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" +fi + +# For Cygwin, switch paths to Windows format before running java +if $cygwin ; then + APP_HOME=`cygpath --path --mixed "$APP_HOME"` + CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` + JAVACMD=`cygpath --unix "$JAVACMD"` + + # We build the pattern for arguments to be converted via cygpath + ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` + SEP="" + for dir in $ROOTDIRSRAW ; do + ROOTDIRS="$ROOTDIRS$SEP$dir" + SEP="|" + done + OURCYGPATTERN="(^($ROOTDIRS))" + # Add a user-defined pattern to the cygpath arguments + if [ "$GRADLE_CYGPATTERN" != "" ] ; then + OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" + fi + # Now convert the arguments - kludge to limit ourselves to /bin/sh + i=0 + for arg in "$@" ; do + CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` + CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option + + if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition + eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` + else + eval `echo args$i`="\"$arg\"" + fi + i=$((i+1)) + done + case $i in + (0) set -- ;; + (1) set -- "$args0" ;; + (2) set -- "$args0" "$args1" ;; + (3) set -- "$args0" "$args1" "$args2" ;; + (4) set -- "$args0" "$args1" "$args2" "$args3" ;; + (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; + (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; + (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; + (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; + (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; + esac +fi + +# Escape application args +save () { + for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done + echo " " +} +APP_ARGS=$(save "$@") + +# Collect all arguments for the java command, following the shell quoting and substitution rules +eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" + +# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong +if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then + cd "$(dirname "$0")" +fi + +exec "$JAVACMD" "$@" diff --git a/android/gradlew.bat b/android/gradlew.bat new file mode 100644 index 00000000000..f9553162f12 --- /dev/null +++ b/android/gradlew.bat @@ -0,0 +1,84 @@ +@if "%DEBUG%" == "" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%" == "" set DIRNAME=. +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS= + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if "%ERRORLEVEL%" == "0" goto init + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto init + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:init +@rem Get command-line arguments, handling Windows variants + +if not "%OS%" == "Windows_NT" goto win9xME_args + +:win9xME_args +@rem Slurp the command line arguments. +set CMD_LINE_ARGS= +set _SKIP=2 + +:win9xME_args_slurp +if "x%~1" == "x" goto execute + +set CMD_LINE_ARGS=%* + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% + +:end +@rem End local scope for the variables with windows NT shell +if "%ERRORLEVEL%"=="0" goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 +exit /b 1 + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/android/ops/CMakeLists.txt b/android/ops/CMakeLists.txt new file mode 100644 index 00000000000..fb8d4348e8e --- /dev/null +++ b/android/ops/CMakeLists.txt @@ -0,0 +1,47 @@ +cmake_minimum_required(VERSION 3.4.1) +set(TARGET torchvision_ops) +project(${TARGET} CXX) +set(CMAKE_CXX_STANDARD 17) + +string(APPEND CMAKE_CXX_FLAGS " -DMOBILE") + +set(build_DIR ${CMAKE_SOURCE_DIR}/build) +set(root_DIR ${CMAKE_CURRENT_LIST_DIR}/..) + +file(GLOB VISION_SRCS + ../../torchvision/csrc/ops/cpu/*.h + ../../torchvision/csrc/ops/cpu/*.cpp + ../../torchvision/csrc/ops/*.h + ../../torchvision/csrc/ops/*.cpp) + +add_library(${TARGET} SHARED + ${VISION_SRCS} +) + +file(GLOB PYTORCH_INCLUDE_DIRS "${build_DIR}/pytorch_android*.aar/headers") +file(GLOB PYTORCH_INCLUDE_DIRS_CSRC "${build_DIR}/pytorch_android*.aar/headers/torch/csrc/api/include") +file(GLOB PYTORCH_LINK_DIRS "${build_DIR}/pytorch_android*.aar/jni/${ANDROID_ABI}") + +target_compile_options(${TARGET} PRIVATE + -fexceptions +) + +set(BUILD_SUBDIR ${ANDROID_ABI}) + +find_library(PYTORCH_LIBRARY pytorch_jni + PATHS ${PYTORCH_LINK_DIRS} + NO_CMAKE_FIND_ROOT_PATH) + +find_library(FBJNI_LIBRARY fbjni + PATHS ${PYTORCH_LINK_DIRS} + NO_CMAKE_FIND_ROOT_PATH) + +target_include_directories(${TARGET} PRIVATE + ${PYTORCH_INCLUDE_DIRS} + ${PYTORCH_INCLUDE_DIRS_CSRC} +) + +target_link_libraries(${TARGET} PRIVATE + ${PYTORCH_LIBRARY} + ${FBJNI_LIBRARY} +) diff --git a/android/ops/build.gradle b/android/ops/build.gradle new file mode 100644 index 00000000000..bfa2c393833 --- /dev/null +++ b/android/ops/build.gradle @@ -0,0 +1,93 @@ +apply plugin: 'com.android.library' +apply plugin: 'maven' + +repositories { + mavenCentral() + maven { + url "https://oss.sonatype.org/content/repositories/snapshots" + } + flatDir { + dirs 'aars' + } +} + +android { + configurations { + extractForNativeBuild + } + compileSdkVersion rootProject.compileSdkVersion + buildToolsVersion rootProject.buildToolsVersion + + + defaultConfig { + minSdkVersion rootProject.minSdkVersion + targetSdkVersion rootProject.targetSdkVersion + versionCode 0 + versionName "0.1" + + testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner" + ndk { + abiFilters ABI_FILTERS.split(",") + } + } + + buildTypes { + debug { + minifyEnabled false + debuggable true + } + release { + minifyEnabled false + } + } + + externalNativeBuild { + cmake { + path "CMakeLists.txt" + } + } + + useLibrary 'android.test.runner' + useLibrary 'android.test.base' + useLibrary 'android.test.mock' +} + +dependencies { + implementation 'com.android.support:appcompat-v7:' + rootProject.androidSupportAppCompatV7Version + + extractForNativeBuild "org.pytorch:pytorch_android:$pytorchAndroidVersion" + + // For testing: deps on local aar files + //implementation(name: 'pytorch_android-release', ext: 'aar') + //extractForNativeBuild(name: 'pytorch_android-release', ext: 'aar') + //implementation 'com.facebook.fbjni:fbjni-java-only:0.0.3' +} + +task extractAARForNativeBuild { + doLast { + configurations.extractForNativeBuild.files.each { + def file = it.absoluteFile + copy { + from zipTree(file) + into "$buildDir/$file.name" + include "headers/**" + include "jni/**" + } + } + } +} + +tasks.whenTaskAdded { task -> + if (task.name.contains('externalNativeBuild')) { + task.dependsOn(extractAARForNativeBuild) + } +} + +apply from: rootProject.file('gradle_scripts/release.gradle') + +task sourcesJar(type: Jar) { + from android.sourceSets.main.java.srcDirs + classifier = 'sources' +} + +artifacts.add('archives', sourcesJar) diff --git a/android/ops/gradle.properties b/android/ops/gradle.properties new file mode 100644 index 00000000000..5a4ea2f3aba --- /dev/null +++ b/android/ops/gradle.properties @@ -0,0 +1,4 @@ +POM_NAME=torchvision ops +POM_DESCRIPTION=torchvision ops +POM_ARTIFACT_ID=torchvision_ops +POM_PACKAGING=aar diff --git a/android/ops/src/main/AndroidManifest.xml b/android/ops/src/main/AndroidManifest.xml new file mode 100644 index 00000000000..8ca386493c4 --- /dev/null +++ b/android/ops/src/main/AndroidManifest.xml @@ -0,0 +1 @@ + diff --git a/android/settings.gradle b/android/settings.gradle new file mode 100644 index 00000000000..6d34eb8d51a --- /dev/null +++ b/android/settings.gradle @@ -0,0 +1,4 @@ +include ':ops', ':test_app' + +project(':ops').projectDir = file('ops') +project(':test_app').projectDir = file('test_app/app') diff --git a/android/test_app/app/build.gradle b/android/test_app/app/build.gradle new file mode 100644 index 00000000000..84cf1d82e6b --- /dev/null +++ b/android/test_app/app/build.gradle @@ -0,0 +1,135 @@ +apply plugin: 'com.android.application' + +repositories { + mavenCentral() + maven { + url "https://oss.sonatype.org/content/repositories/snapshots" + } + flatDir { + dirs 'aars' + } +} + +android { + configurations { + extractForNativeBuild + } + compileOptions { + sourceCompatibility 1.8 + targetCompatibility 1.8 + } + compileSdkVersion rootProject.compileSdkVersion + buildToolsVersion rootProject.buildToolsVersion + defaultConfig { + applicationId "org.pytorch.testapp" + minSdkVersion rootProject.minSdkVersion + targetSdkVersion rootProject.targetSdkVersion + versionCode 1 + versionName "1.0" + ndk { + abiFilters ABI_FILTERS.split(",") + } + externalNativeBuild { + cmake { + abiFilters ABI_FILTERS.split(",") + arguments "-DANDROID_STL=c++_shared" + } + } + buildConfigField("String", "MODULE_ASSET_NAME", "\"frcnn_mnetv3.pt\"") + buildConfigField("String", "LOGCAT_TAG", "@string/app_name") + buildConfigField("long[]", "INPUT_TENSOR_SHAPE", "new long[]{3, 96, 96}") + addManifestPlaceholders([APP_NAME: "@string/app_name", MAIN_ACTIVITY: "org.pytorch.testapp.MainActivity"]) + } + buildTypes { + debug { + minifyEnabled false + debuggable true + } + release { + minifyEnabled false + } + } + flavorDimensions "model", "activity", "build" + productFlavors { + frcnnMnetv3 { + dimension "model" + applicationIdSuffix ".frcnnMnetv3" + buildConfigField("String", "MODULE_ASSET_NAME", "\"frcnn_mnetv3.pt\"") + addManifestPlaceholders([APP_NAME: "TV_FRCNN_MNETV3"]) + buildConfigField("String", "LOGCAT_TAG", "\"pytorch-frcnn-mnetv3\"") + } + camera { + dimension "activity" + addManifestPlaceholders([APP_NAME: "TV_CAMERA_FRCNN"]) + addManifestPlaceholders([MAIN_ACTIVITY: "org.pytorch.testapp.CameraActivity"]) + } + base { + dimension "activity" + } + aar { + dimension "build" + } + local { + dimension "build" + } + } + packagingOptions { + doNotStrip '**.so' + pickFirst '**.so' + } + + // Filtering for CI + if (!testAppAllVariantsEnabled.toBoolean()) { + variantFilter { variant -> + def names = variant.flavors*.name + if (names.contains("aar")) { + setIgnore(true) + } + } + } +} + +tasks.all { task -> + // Disable externalNativeBuild for all but nativeBuild variant + if (task.name.startsWith('externalNativeBuild') + && !task.name.contains('NativeBuild')) { + task.enabled = false + } +} + +dependencies { + implementation 'com.android.support:appcompat-v7:28.0.0' + implementation 'com.facebook.soloader:nativeloader:0.8.0' + localImplementation project(':ops') + + implementation "org.pytorch:pytorch_android:$pytorchAndroidVersion" + implementation "org.pytorch:pytorch_android_torchvision:$pytorchAndroidVersion" + + aarImplementation(name: 'pytorch_android-release', ext: 'aar') + aarImplementation(name: 'pytorch_android_torchvision-release', ext: 'aar') + + def camerax_version = "1.0.0-alpha05" + implementation "androidx.camera:camera-core:$camerax_version" + implementation "androidx.camera:camera-camera2:$camerax_version" + implementation 'com.google.android.material:material:1.0.0-beta01' +} + +task extractAARForNativeBuild { + doLast { + configurations.extractForNativeBuild.files.each { + def file = it.absoluteFile + copy { + from zipTree(file) + into "$buildDir/$file.name" + include "headers/**" + include "jni/**" + } + } + } +} + +tasks.whenTaskAdded { task -> + if (task.name.contains('externalNativeBuild')) { + task.dependsOn(extractAARForNativeBuild) + } +} diff --git a/android/test_app/app/src/main/AndroidManifest.xml b/android/test_app/app/src/main/AndroidManifest.xml new file mode 100644 index 00000000000..a83bf223bda --- /dev/null +++ b/android/test_app/app/src/main/AndroidManifest.xml @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + diff --git a/android/test_app/app/src/main/java/org/pytorch/testapp/BBox.java b/android/test_app/app/src/main/java/org/pytorch/testapp/BBox.java new file mode 100644 index 00000000000..6fd60791864 --- /dev/null +++ b/android/test_app/app/src/main/java/org/pytorch/testapp/BBox.java @@ -0,0 +1,22 @@ +package org.pytorch.testapp; + +class BBox { + public final float score; + public final float x0; + public final float y0; + public final float x1; + public final float y1; + + public BBox(float score, float x0, float y0, float x1, float y1) { + this.score = score; + this.x0 = x0; + this.y0 = y0; + this.x1 = x1; + this.y1 = y1; + } + + @Override + public String toString() { + return String.format("Box{score=%f x0=%f y0=%f x1=%f y1=%f", score, x0, y0, x1, y1); + } +} diff --git a/android/test_app/app/src/main/java/org/pytorch/testapp/CameraActivity.java b/android/test_app/app/src/main/java/org/pytorch/testapp/CameraActivity.java new file mode 100644 index 00000000000..1c427bb82ba --- /dev/null +++ b/android/test_app/app/src/main/java/org/pytorch/testapp/CameraActivity.java @@ -0,0 +1,432 @@ +package org.pytorch.testapp; + +import android.Manifest; +import android.content.Context; +import android.content.pm.PackageManager; +import android.graphics.Bitmap; +import android.graphics.Canvas; +import android.graphics.Color; +import android.graphics.Paint; +import android.graphics.Rect; +import android.os.Bundle; +import android.os.Handler; +import android.os.HandlerThread; +import android.os.SystemClock; +import android.util.DisplayMetrics; +import android.util.Log; +import android.util.Size; +import android.view.TextureView; +import android.view.ViewStub; +import android.widget.ImageView; +import android.widget.TextView; +import android.widget.Toast; +import androidx.annotation.Nullable; +import androidx.annotation.UiThread; +import androidx.annotation.WorkerThread; +import androidx.appcompat.app.AppCompatActivity; +import androidx.camera.core.CameraX; +import androidx.camera.core.ImageAnalysis; +import androidx.camera.core.ImageAnalysisConfig; +import androidx.camera.core.ImageProxy; +import androidx.camera.core.Preview; +import androidx.camera.core.PreviewConfig; +import androidx.core.app.ActivityCompat; +import com.facebook.soloader.nativeloader.NativeLoader; +import com.facebook.soloader.nativeloader.SystemDelegate; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.FloatBuffer; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import org.pytorch.IValue; +import org.pytorch.Module; +import org.pytorch.Tensor; + +public class CameraActivity extends AppCompatActivity { + + private static final float BBOX_SCORE_DRAW_THRESHOLD = 0.5f; + private static final String TAG = BuildConfig.LOGCAT_TAG; + private static final int TEXT_TRIM_SIZE = 4096; + private static final int RGB_MAX_CHANNEL_VALUE = 262143; + + private static final int REQUEST_CODE_CAMERA_PERMISSION = 200; + private static final String[] PERMISSIONS = {Manifest.permission.CAMERA}; + + static { + if (!NativeLoader.isInitialized()) { + NativeLoader.init(new SystemDelegate()); + } + NativeLoader.loadLibrary("pytorch_jni"); + NativeLoader.loadLibrary("torchvision_ops"); + } + + private Bitmap mInputTensorBitmap; + private Bitmap mBitmap; + private Canvas mCanvas; + + private long mLastAnalysisResultTime; + + protected HandlerThread mBackgroundThread; + protected Handler mBackgroundHandler; + protected Handler mUIHandler; + + private TextView mTextView; + private ImageView mCameraOverlay; + private StringBuilder mTextViewStringBuilder = new StringBuilder(); + + private Paint mBboxPaint; + + @Override + protected void onCreate(Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + setContentView(R.layout.activity_camera); + mTextView = findViewById(R.id.text); + mCameraOverlay = findViewById(R.id.camera_overlay); + mUIHandler = new Handler(getMainLooper()); + startBackgroundThread(); + + if (ActivityCompat.checkSelfPermission(this, Manifest.permission.CAMERA) + != PackageManager.PERMISSION_GRANTED) { + ActivityCompat.requestPermissions(this, PERMISSIONS, REQUEST_CODE_CAMERA_PERMISSION); + } else { + setupCameraX(); + } + mBboxPaint = new Paint(); + mBboxPaint.setAntiAlias(true); + mBboxPaint.setDither(true); + mBboxPaint.setColor(Color.GREEN); + } + + @Override + protected void onPostCreate(@Nullable Bundle savedInstanceState) { + super.onPostCreate(savedInstanceState); + startBackgroundThread(); + } + + protected void startBackgroundThread() { + mBackgroundThread = new HandlerThread("ModuleActivity"); + mBackgroundThread.start(); + mBackgroundHandler = new Handler(mBackgroundThread.getLooper()); + } + + @Override + protected void onDestroy() { + stopBackgroundThread(); + super.onDestroy(); + } + + protected void stopBackgroundThread() { + mBackgroundThread.quitSafely(); + try { + mBackgroundThread.join(); + mBackgroundThread = null; + mBackgroundHandler = null; + } catch (InterruptedException e) { + Log.e(TAG, "Error on stopping background thread", e); + } + } + + @Override + public void onRequestPermissionsResult( + int requestCode, String[] permissions, int[] grantResults) { + if (requestCode == REQUEST_CODE_CAMERA_PERMISSION) { + if (grantResults[0] == PackageManager.PERMISSION_DENIED) { + Toast.makeText( + this, + "You can't use image classification example without granting CAMERA permission", + Toast.LENGTH_LONG) + .show(); + finish(); + } else { + setupCameraX(); + } + } + } + + private void setupCameraX() { + final TextureView textureView = + ((ViewStub) findViewById(R.id.camera_texture_view_stub)) + .inflate() + .findViewById(R.id.texture_view); + final PreviewConfig previewConfig = new PreviewConfig.Builder().build(); + final Preview preview = new Preview(previewConfig); + preview.setOnPreviewOutputUpdateListener( + new Preview.OnPreviewOutputUpdateListener() { + @Override + public void onUpdated(Preview.PreviewOutput output) { + textureView.setSurfaceTexture(output.getSurfaceTexture()); + } + }); + + final DisplayMetrics displayMetrics = new DisplayMetrics(); + getWindowManager().getDefaultDisplay().getMetrics(displayMetrics); + + final ImageAnalysisConfig imageAnalysisConfig = + new ImageAnalysisConfig.Builder() + .setTargetResolution(new Size(displayMetrics.widthPixels, displayMetrics.heightPixels)) + .setCallbackHandler(mBackgroundHandler) + .setImageReaderMode(ImageAnalysis.ImageReaderMode.ACQUIRE_LATEST_IMAGE) + .build(); + final ImageAnalysis imageAnalysis = new ImageAnalysis(imageAnalysisConfig); + imageAnalysis.setAnalyzer( + new ImageAnalysis.Analyzer() { + @Override + public void analyze(ImageProxy image, int rotationDegrees) { + if (SystemClock.elapsedRealtime() - mLastAnalysisResultTime < 500) { + return; + } + + final Result result = CameraActivity.this.analyzeImage(image, rotationDegrees); + + if (result != null) { + mLastAnalysisResultTime = SystemClock.elapsedRealtime(); + CameraActivity.this.runOnUiThread( + new Runnable() { + @Override + public void run() { + CameraActivity.this.handleResult(result); + } + }); + } + } + }); + + CameraX.bindToLifecycle(this, preview, imageAnalysis); + } + + private Module mModule; + private FloatBuffer mInputTensorBuffer; + private Tensor mInputTensor; + + private static int clamp0255(int x) { + if (x > 255) { + return 255; + } + return x < 0 ? 0 : x; + } + + protected void fillInputTensorBuffer( + ImageProxy image, int rotationDegrees, FloatBuffer inputTensorBuffer) { + + if (mInputTensorBitmap == null) { + final int tensorSize = Math.min(image.getWidth(), image.getHeight()); + mInputTensorBitmap = Bitmap.createBitmap(tensorSize, tensorSize, Bitmap.Config.ARGB_8888); + } + + ImageProxy.PlaneProxy[] planes = image.getPlanes(); + ImageProxy.PlaneProxy Y = planes[0]; + ImageProxy.PlaneProxy U = planes[1]; + ImageProxy.PlaneProxy V = planes[2]; + ByteBuffer yBuffer = Y.getBuffer(); + ByteBuffer uBuffer = U.getBuffer(); + ByteBuffer vBuffer = V.getBuffer(); + final int imageWidth = image.getWidth(); + final int imageHeight = image.getHeight(); + final int tensorSize = Math.min(imageWidth, imageHeight); + + int widthAfterRtn = imageWidth; + int heightAfterRtn = imageHeight; + boolean oddRotation = rotationDegrees == 90 || rotationDegrees == 270; + if (oddRotation) { + widthAfterRtn = imageHeight; + heightAfterRtn = imageWidth; + } + + int minSizeAfterRtn = Math.min(heightAfterRtn, widthAfterRtn); + int cropWidthAfterRtn = minSizeAfterRtn; + int cropHeightAfterRtn = minSizeAfterRtn; + + int cropWidthBeforeRtn = cropWidthAfterRtn; + int cropHeightBeforeRtn = cropHeightAfterRtn; + if (oddRotation) { + cropWidthBeforeRtn = cropHeightAfterRtn; + cropHeightBeforeRtn = cropWidthAfterRtn; + } + + int offsetX = (int) ((imageWidth - cropWidthBeforeRtn) / 2.f); + int offsetY = (int) ((imageHeight - cropHeightBeforeRtn) / 2.f); + + int yRowStride = Y.getRowStride(); + int yPixelStride = Y.getPixelStride(); + int uvRowStride = U.getRowStride(); + int uvPixelStride = U.getPixelStride(); + + float scale = cropWidthAfterRtn / tensorSize; + int yIdx, uvIdx, yi, ui, vi; + final int channelSize = tensorSize * tensorSize; + for (int y = 0; y < tensorSize; y++) { + for (int x = 0; x < tensorSize; x++) { + final int centerCropX = (int) Math.floor(x * scale); + final int centerCropY = (int) Math.floor(y * scale); + int srcX = centerCropX + offsetX; + int srcY = centerCropY + offsetY; + + if (rotationDegrees == 90) { + srcX = offsetX + centerCropY; + srcY = offsetY + (minSizeAfterRtn - 1) - centerCropX; + } else if (rotationDegrees == 180) { + srcX = offsetX + (minSizeAfterRtn - 1) - centerCropX; + srcY = offsetY + (minSizeAfterRtn - 1) - centerCropY; + } else if (rotationDegrees == 270) { + srcX = offsetX + (minSizeAfterRtn - 1) - centerCropY; + srcY = offsetY + centerCropX; + } + + yIdx = srcY * yRowStride + srcX * yPixelStride; + uvIdx = (srcY >> 1) * uvRowStride + (srcX >> 1) * uvPixelStride; + + yi = yBuffer.get(yIdx) & 0xff; + ui = uBuffer.get(uvIdx) & 0xff; + vi = vBuffer.get(uvIdx) & 0xff; + + yi = (yi - 16) < 0 ? 0 : (yi - 16); + ui -= 128; + vi -= 128; + + int a0 = 1192 * yi; + int ri = (a0 + 1634 * vi); + int gi = (a0 - 833 * vi - 400 * ui); + int bi = (a0 + 2066 * ui); + + ri = ri > RGB_MAX_CHANNEL_VALUE ? RGB_MAX_CHANNEL_VALUE : (ri < 0 ? 0 : ri); + gi = gi > RGB_MAX_CHANNEL_VALUE ? RGB_MAX_CHANNEL_VALUE : (gi < 0 ? 0 : gi); + bi = bi > RGB_MAX_CHANNEL_VALUE ? RGB_MAX_CHANNEL_VALUE : (bi < 0 ? 0 : bi); + + final int color = + 0xff000000 | ((ri << 6) & 0xff0000) | ((gi >> 2) & 0xff00) | ((bi >> 10) & 0xff); + mInputTensorBitmap.setPixel(x, y, color); + inputTensorBuffer.put(0 * channelSize + y * tensorSize + x, clamp0255(ri >> 10) / 255.f); + inputTensorBuffer.put(1 * channelSize + y * tensorSize + x, clamp0255(gi >> 10) / 255.f); + inputTensorBuffer.put(2 * channelSize + y * tensorSize + x, clamp0255(bi >> 10) / 255.f); + } + } + } + + public static String assetFilePath(Context context, String assetName) { + File file = new File(context.getFilesDir(), assetName); + if (file.exists() && file.length() > 0) { + return file.getAbsolutePath(); + } + + try (InputStream is = context.getAssets().open(assetName)) { + try (OutputStream os = new FileOutputStream(file)) { + byte[] buffer = new byte[4 * 1024]; + int read; + while ((read = is.read(buffer)) != -1) { + os.write(buffer, 0, read); + } + os.flush(); + } + return file.getAbsolutePath(); + } catch (IOException e) { + Log.e(TAG, "Error process asset " + assetName + " to file path"); + } + return null; + } + + @WorkerThread + @Nullable + protected Result analyzeImage(ImageProxy image, int rotationDegrees) { + Log.i(TAG, String.format("analyzeImage(%s, %d)", image, rotationDegrees)); + final int tensorSize = Math.min(image.getWidth(), image.getHeight()); + if (mModule == null) { + Log.i(TAG, "Loading module from asset '" + BuildConfig.MODULE_ASSET_NAME + "'"); + mInputTensorBuffer = Tensor.allocateFloatBuffer(3 * tensorSize * tensorSize); + mInputTensor = Tensor.fromBlob(mInputTensorBuffer, new long[] {3, tensorSize, tensorSize}); + final String modelFileAbsoluteFilePath = + new File(assetFilePath(this, BuildConfig.MODULE_ASSET_NAME)).getAbsolutePath(); + mModule = Module.load(modelFileAbsoluteFilePath); + } + + final long startTime = SystemClock.elapsedRealtime(); + fillInputTensorBuffer(image, rotationDegrees, mInputTensorBuffer); + + final long moduleForwardStartTime = SystemClock.elapsedRealtime(); + final IValue outputTuple = mModule.forward(IValue.listFrom(mInputTensor)); + final IValue out1 = outputTuple.toTuple()[1]; + final Map map = out1.toList()[0].toDictStringKey(); + + float[] boxesData = new float[] {}; + float[] scoresData = new float[] {}; + final List bboxes = new ArrayList<>(); + if (map.containsKey("boxes")) { + final Tensor boxesTensor = map.get("boxes").toTensor(); + final Tensor scoresTensor = map.get("scores").toTensor(); + boxesData = boxesTensor.getDataAsFloatArray(); + scoresData = scoresTensor.getDataAsFloatArray(); + final int n = scoresData.length; + for (int i = 0; i < n; i++) { + final BBox bbox = + new BBox( + scoresData[i], + boxesData[4 * i + 0], + boxesData[4 * i + 1], + boxesData[4 * i + 2], + boxesData[4 * i + 3]); + android.util.Log.i(TAG, String.format("Forward result %d: %s", i, bbox)); + bboxes.add(bbox); + } + } else { + android.util.Log.i(TAG, "Forward result empty"); + } + + final long moduleForwardDuration = SystemClock.elapsedRealtime() - moduleForwardStartTime; + final long analysisDuration = SystemClock.elapsedRealtime() - startTime; + return new Result(tensorSize, bboxes, moduleForwardDuration, analysisDuration); + } + + @UiThread + protected void handleResult(Result result) { + final int W = mCameraOverlay.getMeasuredWidth(); + final int H = mCameraOverlay.getMeasuredHeight(); + + final int size = Math.min(W, H); + final int offsetX = (W - size) / 2; + final int offsetY = (H - size) / 2; + + float scaleX = (float) size / result.tensorSize; + float scaleY = (float) size / result.tensorSize; + if (mBitmap == null) { + mBitmap = Bitmap.createBitmap(W, H, Bitmap.Config.ARGB_8888); + mCanvas = new Canvas(mBitmap); + } + + mCanvas.drawBitmap( + mInputTensorBitmap, + new Rect(0, 0, result.tensorSize, result.tensorSize), + new Rect(offsetX, offsetY, offsetX + size, offsetY + size), + null); + + for (final BBox bbox : result.bboxes) { + if (bbox.score < BBOX_SCORE_DRAW_THRESHOLD) { + continue; + } + + float c_x0 = offsetX + scaleX * bbox.x0; + float c_y0 = offsetY + scaleY * bbox.y0; + + float c_x1 = offsetX + scaleX * bbox.x1; + float c_y1 = offsetY + scaleY * bbox.y1; + + mCanvas.drawLine(c_x0, c_y0, c_x1, c_y0, mBboxPaint); + mCanvas.drawLine(c_x1, c_y0, c_x1, c_y1, mBboxPaint); + mCanvas.drawLine(c_x1, c_y1, c_x0, c_y1, mBboxPaint); + mCanvas.drawLine(c_x0, c_y1, c_x0, c_y0, mBboxPaint); + mCanvas.drawText(String.format("%.2f", bbox.score), c_x0, c_y0, mBboxPaint); + } + mCameraOverlay.setImageBitmap(mBitmap); + + String message = String.format("forwardDuration:%d", result.moduleForwardDuration); + Log.i(TAG, message); + mTextViewStringBuilder.insert(0, '\n').insert(0, message); + if (mTextViewStringBuilder.length() > TEXT_TRIM_SIZE) { + mTextViewStringBuilder.delete(TEXT_TRIM_SIZE, mTextViewStringBuilder.length()); + } + mTextView.setText(mTextViewStringBuilder.toString()); + } +} diff --git a/android/test_app/app/src/main/java/org/pytorch/testapp/MainActivity.java b/android/test_app/app/src/main/java/org/pytorch/testapp/MainActivity.java new file mode 100644 index 00000000000..a9c13bffa6e --- /dev/null +++ b/android/test_app/app/src/main/java/org/pytorch/testapp/MainActivity.java @@ -0,0 +1,159 @@ +package org.pytorch.testapp; + +import android.os.Bundle; +import android.os.Handler; +import android.os.HandlerThread; +import android.os.SystemClock; +import android.util.Log; +import android.widget.TextView; +import androidx.annotation.Nullable; +import androidx.annotation.UiThread; +import androidx.annotation.WorkerThread; +import androidx.appcompat.app.AppCompatActivity; +import com.facebook.soloader.nativeloader.NativeLoader; +import com.facebook.soloader.nativeloader.SystemDelegate; +import java.nio.FloatBuffer; +import java.util.Map; +import org.pytorch.IValue; +import org.pytorch.Module; +import org.pytorch.PyTorchAndroid; +import org.pytorch.Tensor; + +public class MainActivity extends AppCompatActivity { + static { + if (!NativeLoader.isInitialized()) { + NativeLoader.init(new SystemDelegate()); + } + NativeLoader.loadLibrary("pytorch_jni"); + NativeLoader.loadLibrary("torchvision_ops"); + } + + private static final String TAG = BuildConfig.LOGCAT_TAG; + private static final int TEXT_TRIM_SIZE = 4096; + + private TextView mTextView; + + protected HandlerThread mBackgroundThread; + protected Handler mBackgroundHandler; + private Module mModule; + private FloatBuffer mInputTensorBuffer; + private Tensor mInputTensor; + private StringBuilder mTextViewStringBuilder = new StringBuilder(); + + private final Runnable mModuleForwardRunnable = + new Runnable() { + @Override + public void run() { + final Result result = doModuleForward(); + runOnUiThread( + () -> { + handleResult(result); + if (mBackgroundHandler != null) { + mBackgroundHandler.post(mModuleForwardRunnable); + } + }); + } + }; + + @Override + protected void onCreate(Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + setContentView(R.layout.activity_main); + mTextView = findViewById(R.id.text); + startBackgroundThread(); + mBackgroundHandler.post(mModuleForwardRunnable); + } + + protected void startBackgroundThread() { + mBackgroundThread = new HandlerThread(TAG + "_bg"); + mBackgroundThread.start(); + mBackgroundHandler = new Handler(mBackgroundThread.getLooper()); + } + + @Override + protected void onDestroy() { + stopBackgroundThread(); + super.onDestroy(); + } + + protected void stopBackgroundThread() { + mBackgroundThread.quitSafely(); + try { + mBackgroundThread.join(); + mBackgroundThread = null; + mBackgroundHandler = null; + } catch (InterruptedException e) { + Log.e(TAG, "Error stopping background thread", e); + } + } + + @WorkerThread + @Nullable + protected Result doModuleForward() { + if (mModule == null) { + final long[] shape = BuildConfig.INPUT_TENSOR_SHAPE; + long numElements = 1; + for (int i = 0; i < shape.length; i++) { + numElements *= shape[i]; + } + mInputTensorBuffer = Tensor.allocateFloatBuffer((int) numElements); + mInputTensor = Tensor.fromBlob(mInputTensorBuffer, BuildConfig.INPUT_TENSOR_SHAPE); + PyTorchAndroid.setNumThreads(1); + mModule = PyTorchAndroid.loadModuleFromAsset(getAssets(), BuildConfig.MODULE_ASSET_NAME); + } + + final long startTime = SystemClock.elapsedRealtime(); + final long moduleForwardStartTime = SystemClock.elapsedRealtime(); + final IValue outputTuple = mModule.forward(IValue.listFrom(mInputTensor)); + final IValue[] outputArray = outputTuple.toTuple(); + final IValue out0 = outputArray[0]; + final Map map = out0.toDictStringKey(); + if (map.containsKey("boxes")) { + final Tensor boxes = map.get("boxes").toTensor(); + final Tensor scores = map.get("scores").toTensor(); + final float[] boxesData = boxes.getDataAsFloatArray(); + final float[] scoresData = scores.getDataAsFloatArray(); + final int n = scoresData.length; + for (int i = 0; i < n; i++) { + android.util.Log.i( + TAG, + String.format( + "Forward result %d: score %f box:(%f, %f, %f, %f)", + scoresData[i], + boxesData[4 * i + 0], + boxesData[4 * i + 1], + boxesData[4 * i + 2], + boxesData[4 * i + 3])); + } + } else { + android.util.Log.i(TAG, "Forward result empty"); + } + + final long moduleForwardDuration = SystemClock.elapsedRealtime() - moduleForwardStartTime; + final long analysisDuration = SystemClock.elapsedRealtime() - startTime; + return new Result(new float[] {}, moduleForwardDuration, analysisDuration); + } + + static class Result { + + private final float[] scores; + private final long totalDuration; + private final long moduleForwardDuration; + + public Result(float[] scores, long moduleForwardDuration, long totalDuration) { + this.scores = scores; + this.moduleForwardDuration = moduleForwardDuration; + this.totalDuration = totalDuration; + } + } + + @UiThread + protected void handleResult(Result result) { + String message = String.format("forwardDuration:%d", result.moduleForwardDuration); + mTextViewStringBuilder.insert(0, '\n').insert(0, message); + if (mTextViewStringBuilder.length() > TEXT_TRIM_SIZE) { + mTextViewStringBuilder.delete(TEXT_TRIM_SIZE, mTextViewStringBuilder.length()); + } + mTextView.setText(mTextViewStringBuilder.toString()); + } +} diff --git a/android/test_app/app/src/main/java/org/pytorch/testapp/Result.java b/android/test_app/app/src/main/java/org/pytorch/testapp/Result.java new file mode 100644 index 00000000000..ed7ebd006cd --- /dev/null +++ b/android/test_app/app/src/main/java/org/pytorch/testapp/Result.java @@ -0,0 +1,17 @@ +package org.pytorch.testapp; + +import java.util.List; + +class Result { + public final int tensorSize; + public final List bboxes; + public final long totalDuration; + public final long moduleForwardDuration; + + public Result(int tensorSize, List bboxes, long moduleForwardDuration, long totalDuration) { + this.tensorSize = tensorSize; + this.bboxes = bboxes; + this.moduleForwardDuration = moduleForwardDuration; + this.totalDuration = totalDuration; + } +} diff --git a/android/test_app/app/src/main/res/layout/activity_camera.xml b/android/test_app/app/src/main/res/layout/activity_camera.xml new file mode 100644 index 00000000000..7ba2e42b7c0 --- /dev/null +++ b/android/test_app/app/src/main/res/layout/activity_camera.xml @@ -0,0 +1,28 @@ + + + + + + + + + diff --git a/android/test_app/app/src/main/res/layout/activity_main.xml b/android/test_app/app/src/main/res/layout/activity_main.xml new file mode 100644 index 00000000000..556839a994c --- /dev/null +++ b/android/test_app/app/src/main/res/layout/activity_main.xml @@ -0,0 +1,17 @@ + + + + + + diff --git a/android/test_app/app/src/main/res/layout/texture_view.xml b/android/test_app/app/src/main/res/layout/texture_view.xml new file mode 100644 index 00000000000..6518c6c84c6 --- /dev/null +++ b/android/test_app/app/src/main/res/layout/texture_view.xml @@ -0,0 +1,5 @@ + + diff --git a/android/test_app/app/src/main/res/mipmap-mdpi/ic_launcher.png b/android/test_app/app/src/main/res/mipmap-mdpi/ic_launcher.png new file mode 100644 index 00000000000..64ba76f75e9 Binary files /dev/null and b/android/test_app/app/src/main/res/mipmap-mdpi/ic_launcher.png differ diff --git a/android/test_app/app/src/main/res/mipmap-mdpi/ic_launcher_round.png b/android/test_app/app/src/main/res/mipmap-mdpi/ic_launcher_round.png new file mode 100644 index 00000000000..dae5e082342 Binary files /dev/null and b/android/test_app/app/src/main/res/mipmap-mdpi/ic_launcher_round.png differ diff --git a/android/test_app/app/src/main/res/values/colors.xml b/android/test_app/app/src/main/res/values/colors.xml new file mode 100644 index 00000000000..69b22338c65 --- /dev/null +++ b/android/test_app/app/src/main/res/values/colors.xml @@ -0,0 +1,6 @@ + + + #008577 + #00574B + #D81B60 + diff --git a/android/test_app/app/src/main/res/values/strings.xml b/android/test_app/app/src/main/res/values/strings.xml new file mode 100644 index 00000000000..cafbaad1511 --- /dev/null +++ b/android/test_app/app/src/main/res/values/strings.xml @@ -0,0 +1,3 @@ + + TV_FRCNN + diff --git a/android/test_app/app/src/main/res/values/styles.xml b/android/test_app/app/src/main/res/values/styles.xml new file mode 100644 index 00000000000..5885930df6d --- /dev/null +++ b/android/test_app/app/src/main/res/values/styles.xml @@ -0,0 +1,11 @@ + + + + + + diff --git a/android/test_app/make_assets.py b/android/test_app/make_assets.py new file mode 100644 index 00000000000..f99933e9a9d --- /dev/null +++ b/android/test_app/make_assets.py @@ -0,0 +1,21 @@ +import torch +from torch.utils.mobile_optimizer import optimize_for_mobile +from torchvision.models.detection import ( + fasterrcnn_mobilenet_v3_large_320_fpn, + FasterRCNN_MobileNet_V3_Large_320_FPN_Weights, +) + +print(torch.__version__) + +model = fasterrcnn_mobilenet_v3_large_320_fpn( + weights=FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT, + box_score_thresh=0.7, + rpn_post_nms_top_n_test=100, + rpn_score_thresh=0.4, + rpn_pre_nms_top_n_test=150, +) + +model.eval() +script_model = torch.jit.script(model) +opt_script_model = optimize_for_mobile(script_model) +opt_script_model.save("app/src/main/assets/frcnn_mnetv3.pt") diff --git a/benchmarks/encoding_decoding.py b/benchmarks/encoding_decoding.py new file mode 100644 index 00000000000..0cafdb2d8a6 --- /dev/null +++ b/benchmarks/encoding_decoding.py @@ -0,0 +1,99 @@ +import os +import platform +import statistics + +import torch +import torch.utils.benchmark as benchmark +import torchvision + + +def print_machine_specs(): + print("Processor:", platform.processor()) + print("Platform:", platform.platform()) + print("Logical CPUs:", os.cpu_count()) + print(f"\nCUDA device: {torch.cuda.get_device_name()}") + print(f"Total Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB") + + +def get_data(): + transform = torchvision.transforms.Compose( + [ + torchvision.transforms.PILToTensor(), + ] + ) + path = os.path.join(os.getcwd(), "data") + testset = torchvision.datasets.Places365( + root="./data", download=not os.path.exists(path), transform=transform, split="val" + ) + testloader = torch.utils.data.DataLoader( + testset, batch_size=1000, shuffle=False, num_workers=1, collate_fn=lambda batch: [r[0] for r in batch] + ) + return next(iter(testloader)) + + +def run_encoding_benchmark(decoded_images): + results = [] + for device in ["cpu", "cuda"]: + decoded_images_device = [t.to(device=device) for t in decoded_images] + for size in [1, 100, 1000]: + for num_threads in [1, 12, 24]: + for stmt, strat in zip( + [ + "[torchvision.io.encode_jpeg(img) for img in decoded_images_device_trunc]", + "torchvision.io.encode_jpeg(decoded_images_device_trunc)", + ], + ["unfused", "fused"], + ): + decoded_images_device_trunc = decoded_images_device[:size] + t = benchmark.Timer( + stmt=stmt, + setup="import torchvision", + globals={"decoded_images_device_trunc": decoded_images_device_trunc}, + label="Image Encoding", + sub_label=f"{device.upper()} ({strat}): {stmt}", + description=f"{size} images", + num_threads=num_threads, + ) + results.append(t.blocked_autorange()) + compare = benchmark.Compare(results) + compare.print() + + +def run_decoding_benchmark(encoded_images): + results = [] + for device in ["cpu", "cuda"]: + for size in [1, 100, 1000]: + for num_threads in [1, 12, 24]: + for stmt, strat in zip( + [ + f"[torchvision.io.decode_jpeg(img, device='{device}') for img in encoded_images_trunc]", + f"torchvision.io.decode_jpeg(encoded_images_trunc, device='{device}')", + ], + ["unfused", "fused"], + ): + encoded_images_trunc = encoded_images[:size] + t = benchmark.Timer( + stmt=stmt, + setup="import torchvision", + globals={"encoded_images_trunc": encoded_images_trunc}, + label="Image Decoding", + sub_label=f"{device.upper()} ({strat}): {stmt}", + description=f"{size} images", + num_threads=num_threads, + ) + results.append(t.blocked_autorange()) + compare = benchmark.Compare(results) + compare.print() + + +if __name__ == "__main__": + print_machine_specs() + decoded_images = get_data() + mean_h, mean_w = statistics.mean(t.shape[-2] for t in decoded_images), statistics.mean( + t.shape[-1] for t in decoded_images + ) + print(f"\nMean image size: {int(mean_h)}x{int(mean_w)}") + run_encoding_benchmark(decoded_images) + encoded_images_cuda = torchvision.io.encode_jpeg([img.cuda() for img in decoded_images]) + encoded_images_cpu = [img.cpu() for img in encoded_images_cuda] + run_decoding_benchmark(encoded_images_cpu) diff --git a/cmake/TorchVisionConfig.cmake.in b/cmake/TorchVisionConfig.cmake.in new file mode 100644 index 00000000000..7f7e78817fa --- /dev/null +++ b/cmake/TorchVisionConfig.cmake.in @@ -0,0 +1,50 @@ +# TorchVisionConfig.cmake +# -------------------- +# +# Exported targets:: Vision +# + +@PACKAGE_INIT@ + +set(PN TorchVision) + +# location of include/torchvision +set(${PN}_INCLUDE_DIR "${PACKAGE_PREFIX_DIR}/@CMAKE_INSTALL_INCLUDEDIR@") + +set(${PN}_LIBRARY "") +set(${PN}_DEFINITIONS USING_${PN}) + +check_required_components(${PN}) + + +if(NOT (CMAKE_VERSION VERSION_LESS 3.0)) +#----------------------------------------------------------------------------- +# Don't include targets if this file is being picked up by another +# project which has already built this as a subproject +#----------------------------------------------------------------------------- +if(NOT TARGET ${PN}::${PN}) +include("${CMAKE_CURRENT_LIST_DIR}/${PN}Targets.cmake") + +target_include_directories(${PN}::${PN} INTERFACE "${${PN}_INCLUDE_DIR}") + +if(@WITH_CUDA@) + target_compile_definitions(${PN}::${PN} INTERFACE WITH_CUDA) +endif() + +find_package(Torch REQUIRED) +target_link_libraries(${PN}::${PN} INTERFACE torch) + +if(@WITH_PNG@) + find_package(PNG REQUIRED) + target_link_libraries(${PN}::${PN} INTERFACE ${PNG_LIBRARY}) + target_compile_definitions(${PN}::${PN} INTERFACE PNG_FOUND) +endif() + +if(@WITH_JPEG@) + find_package(JPEG REQUIRED) + target_link_libraries(${PN}::${PN} INTERFACE ${JPEG_LIBRARIES}) + target_compile_definitions(${PN}::${PN} INTERFACE JPEG_FOUND) +endif() + +endif() +endif() diff --git a/cmake/iOS.cmake b/cmake/iOS.cmake new file mode 100644 index 00000000000..935c57f11b9 --- /dev/null +++ b/cmake/iOS.cmake @@ -0,0 +1,207 @@ +# This file is based off of the Platform/Darwin.cmake and Platform/UnixPaths.cmake +# files which are included with CMake 2.8.4 +# It has been altered for iOS development + +# Options: +# +# IOS_PLATFORM = OS (default) or SIMULATOR +# This decides if SDKS will be selected from the iPhoneOS.platform or iPhoneSimulator.platform folders +# OS - the default, used to build for iPhone and iPad physical devices, which have an arm arch. +# SIMULATOR - used to build for the Simulator platforms, which have an x86 arch. +# +# CMAKE_IOS_DEVELOPER_ROOT = automatic(default) or /path/to/platform/Developer folder +# By default this location is automatically chosen based on the IOS_PLATFORM value above. +# If set manually, it will override the default location and force the user of a particular Developer Platform +# +# CMAKE_IOS_SDK_ROOT = automatic(default) or /path/to/platform/Developer/SDKs/SDK folder +# By default this location is automatically chosen based on the CMAKE_IOS_DEVELOPER_ROOT value. +# In this case it will always be the most up-to-date SDK found in the CMAKE_IOS_DEVELOPER_ROOT path. +# If set manually, this will force the use of a specific SDK version + +# Macros: +# +# set_xcode_property (TARGET XCODE_PROPERTY XCODE_VALUE) +# A convenience macro for setting xcode specific properties on targets +# example: set_xcode_property (myioslib IPHONEOS_DEPLOYMENT_TARGET "3.1") +# +# find_host_package (PROGRAM ARGS) +# A macro used to find executable programs on the host system, not within the iOS environment. +# Thanks to the android-cmake project for providing the command + +# Standard settings +set(CMAKE_SYSTEM_NAME Darwin) +set(CMAKE_SYSTEM_VERSION 1) +set(UNIX True) +set(APPLE True) +set(IOS True) + +# Required as of cmake 2.8.10 +set(CMAKE_OSX_DEPLOYMENT_TARGET "" CACHE STRING "Force unset of the deployment target for iOS" FORCE) + +# Determine the cmake host system version so we know where to find the iOS SDKs +find_program(CMAKE_UNAME uname /bin /usr/bin /usr/local/bin) +if(CMAKE_UNAME) + exec_program(uname ARGS -r OUTPUT_VARIABLE CMAKE_HOST_SYSTEM_VERSION) + string(REGEX REPLACE "^([0-9]+)\\.([0-9]+).*$" "\\1" DARWIN_MAJOR_VERSION "${CMAKE_HOST_SYSTEM_VERSION}") +endif(CMAKE_UNAME) + +# Force the compilers to gcc for iOS +set(CMAKE_C_COMPILER /usr/bin/gcc CACHE STRING "") +set(CMAKE_CXX_COMPILER /usr/bin/g++ CACHE STRING "") +set(CMAKE_AR ar CACHE FILEPATH "" FORCE) +set(CMAKE_RANLIB ranlib CACHE FILEPATH "" FORCE) +set(PKG_CONFIG_EXECUTABLE pkg-config CACHE FILEPATH "" FORCE) + +# Setup iOS platform unless specified manually with IOS_PLATFORM +if(NOT DEFINED IOS_PLATFORM) + set(IOS_PLATFORM "OS") +endif(NOT DEFINED IOS_PLATFORM) +set(IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING "Type of iOS Platform") + +# Check the platform selection and setup for developer root +if(${IOS_PLATFORM} STREQUAL "OS") + set(IOS_PLATFORM_LOCATION "iPhoneOS.platform") + set(XCODE_IOS_PLATFORM iphoneos) + + # This causes the installers to properly locate the output libraries + set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-iphoneos") +elseif(${IOS_PLATFORM} STREQUAL "SIMULATOR") + set(IOS_PLATFORM_LOCATION "iPhoneSimulator.platform") + set(XCODE_IOS_PLATFORM iphonesimulator) + + # This causes the installers to properly locate the output libraries + set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-iphonesimulator") +elseif(${IOS_PLATFORM} STREQUAL "WATCHOS") + set(IOS_PLATFORM_LOCATION "WatchOS.platform") + set(XCODE_IOS_PLATFORM watchos) + + # This causes the installers to properly locate the output libraries + set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-watchos") +else(${IOS_PLATFORM} STREQUAL "OS") + message(FATAL_ERROR + "Unsupported IOS_PLATFORM value selected. " + "Please choose OS, SIMULATOR, or WATCHOS.") +endif() + +# All iOS/Darwin specific settings - some may be redundant +set(CMAKE_SHARED_LIBRARY_PREFIX "lib") +set(CMAKE_SHARED_LIBRARY_SUFFIX ".dylib") +set(CMAKE_SHARED_MODULE_PREFIX "lib") +set(CMAKE_SHARED_MODULE_SUFFIX ".so") +set(CMAKE_MODULE_EXISTS 1) +set(CMAKE_DL_LIBS "") + +set(CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG "-compatibility_version ") +set(CMAKE_C_OSX_CURRENT_VERSION_FLAG "-current_version ") +set(CMAKE_CXX_OSX_COMPATIBILITY_VERSION_FLAG "${CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG}") +set(CMAKE_CXX_OSX_CURRENT_VERSION_FLAG "${CMAKE_C_OSX_CURRENT_VERSION_FLAG}") + +if(IOS_DEPLOYMENT_TARGET) + set(XCODE_IOS_PLATFORM_VERSION_FLAGS "-m${XCODE_IOS_PLATFORM}-version-min=${IOS_DEPLOYMENT_TARGET}") +endif() + +# Hidden visibility is required for cxx on iOS +set(CMAKE_C_FLAGS_INIT "${XCODE_IOS_PLATFORM_VERSION_FLAGS}") +set(CMAKE_CXX_FLAGS_INIT "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -fvisibility-inlines-hidden") + +set(CMAKE_C_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_C_LINK_FLAGS}") +set(CMAKE_CXX_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_CXX_LINK_FLAGS}") + +set(CMAKE_PLATFORM_HAS_INSTALLNAME 1) +set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib -headerpad_max_install_names") +set(CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle -headerpad_max_install_names") +set(CMAKE_SHARED_MODULE_LOADER_C_FLAG "-Wl,-bundle_loader,") +set(CMAKE_SHARED_MODULE_LOADER_CXX_FLAG "-Wl,-bundle_loader,") +set(CMAKE_FIND_LIBRARY_SUFFIXES ".dylib" ".so" ".a") + +# hack: if a new cmake (which uses CMAKE_INSTALL_NAME_TOOL) runs on an old build tree +# (where install_name_tool was hardcoded) and where CMAKE_INSTALL_NAME_TOOL isn't in the cache +# and still cmake didn't fail in CMakeFindBinUtils.cmake (because it isn't rerun) +# hardcode CMAKE_INSTALL_NAME_TOOL here to install_name_tool, so it behaves as it did before, Alex +if(NOT DEFINED CMAKE_INSTALL_NAME_TOOL) + find_program(CMAKE_INSTALL_NAME_TOOL install_name_tool) +endif(NOT DEFINED CMAKE_INSTALL_NAME_TOOL) + +# Setup iOS deployment target +set(IOS_DEPLOYMENT_TARGET ${IOS_DEPLOYMENT_TARGET} CACHE STRING "Minimum iOS version") + +# Setup iOS developer location unless specified manually with CMAKE_IOS_DEVELOPER_ROOT +# Note Xcode 4.3 changed the installation location, choose the most recent one available +exec_program(/usr/bin/xcode-select ARGS -print-path OUTPUT_VARIABLE CMAKE_XCODE_DEVELOPER_DIR) +set(XCODE_POST_43_ROOT "${CMAKE_XCODE_DEVELOPER_DIR}/Platforms/${IOS_PLATFORM_LOCATION}/Developer") +set(XCODE_PRE_43_ROOT "/Developer/Platforms/${IOS_PLATFORM_LOCATION}/Developer") +if(NOT DEFINED CMAKE_IOS_DEVELOPER_ROOT) + if(EXISTS ${XCODE_POST_43_ROOT}) + set(CMAKE_IOS_DEVELOPER_ROOT ${XCODE_POST_43_ROOT}) + elseif(EXISTS ${XCODE_PRE_43_ROOT}) + set(CMAKE_IOS_DEVELOPER_ROOT ${XCODE_PRE_43_ROOT}) + endif(EXISTS ${XCODE_POST_43_ROOT}) +endif(NOT DEFINED CMAKE_IOS_DEVELOPER_ROOT) +set(CMAKE_IOS_DEVELOPER_ROOT ${CMAKE_IOS_DEVELOPER_ROOT} CACHE PATH "Location of iOS Platform") + +# Find and use the most recent iOS sdk unless specified manually with CMAKE_IOS_SDK_ROOT +if(NOT DEFINED CMAKE_IOS_SDK_ROOT) + file(GLOB _CMAKE_IOS_SDKS "${CMAKE_IOS_DEVELOPER_ROOT}/SDKs/*") + if(_CMAKE_IOS_SDKS) + list(SORT _CMAKE_IOS_SDKS) + list(REVERSE _CMAKE_IOS_SDKS) + list(GET _CMAKE_IOS_SDKS 0 CMAKE_IOS_SDK_ROOT) + else(_CMAKE_IOS_SDKS) + message(FATAL_ERROR "No iOS SDK's found in default search path ${CMAKE_IOS_DEVELOPER_ROOT}. Manually set CMAKE_IOS_SDK_ROOT or install the iOS SDK.") + endif(_CMAKE_IOS_SDKS) + message(STATUS "Toolchain using default iOS SDK: ${CMAKE_IOS_SDK_ROOT}") +endif(NOT DEFINED CMAKE_IOS_SDK_ROOT) +set(CMAKE_IOS_SDK_ROOT ${CMAKE_IOS_SDK_ROOT} CACHE PATH "Location of the selected iOS SDK") + +# Set the sysroot default to the most recent SDK +set(CMAKE_OSX_SYSROOT ${CMAKE_IOS_SDK_ROOT} CACHE PATH "Sysroot used for iOS support") + +# set the architecture for iOS +if(IOS_PLATFORM STREQUAL "OS") + set(DEFAULT_IOS_ARCH "arm64") +elseif(IOS_PLATFORM STREQUAL "SIMULATOR") + set(DEFAULT_IOS_ARCH "x86_64") +elseif(IOS_PLATFORM STREQUAL "WATCHOS") + set(DEFAULT_IOS_ARCH "armv7k;arm64_32") +endif() + +set(IOS_ARCH ${DEFAULT_IOS_ARCH} CACHE STRING "Build architecture for iOS") +set(CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE STRING "Build architecture for iOS") + +# Set the find root to the iOS developer roots and to user defined paths +set(CMAKE_FIND_ROOT_PATH ${CMAKE_IOS_DEVELOPER_ROOT} ${CMAKE_IOS_SDK_ROOT} ${CMAKE_PREFIX_PATH} CACHE STRING "iOS find search path root") + +# default to searching for frameworks first +set(CMAKE_FIND_FRAMEWORK FIRST) + +# set up the default search directories for frameworks +set(CMAKE_SYSTEM_FRAMEWORK_PATH + ${CMAKE_IOS_SDK_ROOT}/System/Library/Frameworks + ${CMAKE_IOS_SDK_ROOT}/System/Library/PrivateFrameworks + ${CMAKE_IOS_SDK_ROOT}/Developer/Library/Frameworks +) + +# only search the iOS sdks, not the remainder of the host filesystem +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) + +# This little macro lets you set any XCode specific property +macro(set_xcode_property TARGET XCODE_PROPERTY XCODE_VALUE) + set_property(TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY} ${XCODE_VALUE}) +endmacro(set_xcode_property) + +# This macro lets you find executable programs on the host system +macro(find_host_package) + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER) + set(IOS FALSE) + + find_package(${ARGN}) + + set(IOS TRUE) + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +endmacro(find_host_package) diff --git a/docs/Makefile b/docs/Makefile index 2ca4b0d71a2..f462ff22303 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -1,8 +1,12 @@ # Minimal makefile for Sphinx documentation # +ifneq ($(EXAMPLES_PATTERN),) + EXAMPLES_PATTERN_OPTS := -D sphinx_gallery_conf.filename_pattern="$(EXAMPLES_PATTERN)" +endif + # You can set these variables from the command line. -SPHINXOPTS = +SPHINXOPTS = -W -j auto $(EXAMPLES_PATTERN_OPTS) SPHINXBUILD = sphinx-build SPHINXPROJ = torchvision SOURCEDIR = source @@ -19,6 +23,18 @@ docset: html cp $(SPHINXPROJ).docset/icon.png $(SPHINXPROJ).docset/icon@2x.png convert $(SPHINXPROJ).docset/icon@2x.png -resize 16x16 $(SPHINXPROJ).docset/icon.png +html-noplot: # Avoids running the gallery examples, which may take time + $(SPHINXBUILD) -D plot_gallery=0 -b html "${SOURCEDIR}" "$(BUILDDIR)"/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +clean: + rm -rf $(BUILDDIR)/* + rm -rf $(SOURCEDIR)/auto_examples/ # sphinx-gallery + rm -rf $(SOURCEDIR)/gen_modules/ # sphinx-gallery + rm -rf $(SOURCEDIR)/generated/ # autosummary + rm -rf $(SOURCEDIR)/models/generated # autosummary + .PHONY: help Makefile docset # Catch-all target: route all unknown targets to Sphinx using the new diff --git a/docs/requirements.txt b/docs/requirements.txt index 014f642d0eb..2a50d9b8f45 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,3 +1,8 @@ -sphinx==1.7.3 -sphinxcontrib-googleanalytics --e git://github.com/snide/sphinx_rtd_theme.git#egg=sphinx_rtd_theme +matplotlib +numpy +sphinx-copybutton>=0.3.1 +sphinx-gallery>=0.11.1 +sphinx==5.0.0 +tabulate +-e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme +pycocotools diff --git a/docs/source/_static/css/custom_torchvision.css b/docs/source/_static/css/custom_torchvision.css new file mode 100644 index 00000000000..07346d7b03f --- /dev/null +++ b/docs/source/_static/css/custom_torchvision.css @@ -0,0 +1,35 @@ +/* This rule should be removed once +https://github.com/pytorch/pytorch_sphinx_theme/issues/125 is fixed. + +We override the rule so that the links to the notebooks aren't hidden in the +gallery examples. pytorch_sphinx_theme is supposed to customize those links so +that they render nicely (look at the nice links on top of the tutorials +examples) but it doesn't work for repos that are not the tutorial repo, and in +torchvision it just hides the links. So we have to put them back here */ +article.pytorch-article .sphx-glr-download-link-note.admonition.note, +article.pytorch-article .reference.download.internal, article.pytorch-article .sphx-glr-signature { + display: block; +} + +/* These 2 rules below are for the weight tables (generated in conf.py) to look + * better. In particular we make their row height shorter */ +.table-weights td, .table-weights th { + margin-bottom: 0.2rem; + padding: 0 !important; + line-height: 1 !important; +} +.table-weights p { + margin-bottom: 0.2rem !important; +} + +/* Fix for Sphinx gallery 0.11 +See https://github.com/sphinx-gallery/sphinx-gallery/issues/990 +*/ +article.pytorch-article .sphx-glr-thumbnails .sphx-glr-thumbcontainer { + width: unset; + margin-right: 0; + margin-left: 0; +} +article.pytorch-article div.section div.wy-table-responsive tbody td { + width: 50%; +} diff --git a/docs/source/_static/css/pytorch_theme.css b/docs/source/_static/css/pytorch_theme.css deleted file mode 100644 index 0e54497643c..00000000000 --- a/docs/source/_static/css/pytorch_theme.css +++ /dev/null @@ -1,118 +0,0 @@ -body { - font-family: "Lato","proxima-nova","Helvetica Neue",Arial,sans-serif; -} - -/* Default header fonts are ugly */ -h1, h2, .rst-content .toctree-wrapper p.caption, h3, h4, h5, h6, legend, p.caption { - font-family: "Lato","proxima-nova","Helvetica Neue",Arial,sans-serif; -} - -/* Use white for docs background */ -.wy-side-nav-search { - background-color: #fff; -} - -.wy-nav-content-wrap, .wy-menu li.current > a { - background-color: #fff; -} - -@media screen and (min-width: 1400px) { - .wy-nav-content-wrap { - background-color: rgba(0, 0, 0, 0.0470588); - } - - .wy-nav-content { - background-color: #fff; - } -} - -/* Fixes for mobile */ -.wy-nav-top { - background-color: #fff; - background-image: url('../img/pytorch-logo-dark.svg'); - background-repeat: no-repeat; - background-position: center; - padding: 0; - margin: 0.4045em 0.809em; - color: #333; -} - -.wy-nav-top > a { - display: none; -} - -@media screen and (max-width: 768px) { - .wy-side-nav-search>a img.logo { - height: 60px; - } -} - -/* This is needed to ensure that logo above search scales properly */ -.wy-side-nav-search a { - display: block; -} - -/* This ensures that multiple constructors will remain in separate lines. */ -.rst-content dl:not(.docutils) dt { - display: table; -} - -/* Use our red for literals (it's very similar to the original color) */ -.rst-content tt.literal, .rst-content tt.literal, .rst-content code.literal { - color: #F05732; -} - -.rst-content tt.xref, a .rst-content tt, .rst-content tt.xref, -.rst-content code.xref, a .rst-content tt, a .rst-content code { - color: #404040; -} - -/* Change link colors (except for the menu) */ - -a { - color: #F05732; -} - -a:hover { - color: #F05732; -} - - -a:visited { - color: #D44D2C; -} - -.wy-menu a { - color: #b3b3b3; -} - -.wy-menu a:hover { - color: #b3b3b3; -} - -/* Default footer text is quite big */ -footer { - font-size: 80%; -} - -footer .rst-footer-buttons { - font-size: 125%; /* revert footer settings - 1/80% = 125% */ -} - -footer p { - font-size: 100%; -} - -/* For hidden headers that appear in TOC tree */ -/* see http://stackoverflow.com/a/32363545/3343043 */ -.rst-content .hidden-section { - display: none; -} - -nav .hidden-section { - display: inherit; -} - -.wy-side-nav-search>div.version { - color: #000; -} diff --git a/docs/source/_static/img/pytorch-logo-flame.svg b/docs/source/_static/img/pytorch-logo-flame.svg index 22d7228b4fa..5f2fb76be77 100644 --- a/docs/source/_static/img/pytorch-logo-flame.svg +++ b/docs/source/_static/img/pytorch-logo-flame.svg @@ -30,4 +30,4 @@ style="fill:#9e529f" id="path4698" d="m 24.075479,-7.6293945e-7 c -0.5,0 -1.8,2.49999996293945 -1.8,3.59999996293945 0,1.5 1,2 1.8,2 0.8,0 1.8,-0.5 1.8,-2 -0.1,-1.1 -1.4,-3.59999996293945 -1.8,-3.59999996293945 z" - class="st1" /> \ No newline at end of file + class="st1" /> diff --git a/docs/source/_templates/class.rst b/docs/source/_templates/class.rst new file mode 100644 index 00000000000..eeb823a961f --- /dev/null +++ b/docs/source/_templates/class.rst @@ -0,0 +1,9 @@ +.. role:: hidden + :class: hidden-section +.. currentmodule:: {{ module }} + + +{{ name | underline}} + +.. autoclass:: {{ name }} + :members: diff --git a/docs/source/_templates/class_dataset.rst b/docs/source/_templates/class_dataset.rst new file mode 100644 index 00000000000..c559c6dc9b0 --- /dev/null +++ b/docs/source/_templates/class_dataset.rst @@ -0,0 +1,12 @@ +.. role:: hidden + :class: hidden-section +.. currentmodule:: {{ module }} + + +{{ name | underline}} + +.. autoclass:: {{ name }} + :members: + __getitem__, + {% if "category_name" in methods %} category_name {% endif %} + :special-members: diff --git a/docs/source/_templates/function.rst b/docs/source/_templates/function.rst new file mode 100644 index 00000000000..72abc4f50fe --- /dev/null +++ b/docs/source/_templates/function.rst @@ -0,0 +1,8 @@ +.. role:: hidden + :class: hidden-section +.. currentmodule:: {{ module }} + + +{{ name | underline}} + +.. autofunction:: {{ name }} diff --git a/docs/source/_templates/layout.html b/docs/source/_templates/layout.html new file mode 100644 index 00000000000..aaa15d56e02 --- /dev/null +++ b/docs/source/_templates/layout.html @@ -0,0 +1,8 @@ +{% extends "!layout.html" %} + +{% block sidebartitle %} + + {% include "searchbox.html" %} +{% endblock %} diff --git a/docs/source/beta_status.py b/docs/source/beta_status.py new file mode 100644 index 00000000000..8871f6debbb --- /dev/null +++ b/docs/source/beta_status.py @@ -0,0 +1,21 @@ +from docutils import nodes +from docutils.parsers.rst import Directive + + +class BetaStatus(Directive): + has_content = True + text = "The {api_name} is in Beta stage, and backward compatibility is not guaranteed." + node = nodes.warning + + def run(self): + text = self.text.format(api_name=" ".join(self.content)) + return [self.node("", nodes.paragraph("", "", nodes.Text(text)))] + + +def setup(app): + app.add_directive("betastatus", BetaStatus) + return { + "version": "0.1", + "parallel_read_safe": True, + "parallel_write_safe": True, + } diff --git a/docs/source/conf.py b/docs/source/conf.py index 3c277168a70..df6cca3856a 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # # PyTorch documentation build configuration file, created by # sphinx-quickstart on Fri Dec 23 13:31:47 2016. @@ -20,72 +19,147 @@ # import os # import sys # sys.path.insert(0, os.path.abspath('.')) -import torch + +import os +import sys +import textwrap +from copy import copy +from pathlib import Path + +import pytorch_sphinx_theme import torchvision -import sphinx_rtd_theme +import torchvision.models as M +from sphinx_gallery.sorting import ExplicitOrder +from tabulate import tabulate +sys.path.append(os.path.abspath(".")) # -- General configuration ------------------------------------------------ -# If your documentation needs a minimal Sphinx version, state it here. -# -# needs_sphinx = '1.0' +# Required version of sphinx is set from docs/requirements.txt # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary', - 'sphinx.ext.doctest', - 'sphinx.ext.intersphinx', - 'sphinx.ext.todo', - 'sphinx.ext.coverage', - 'sphinx.ext.mathjax', - 'sphinx.ext.napoleon', - 'sphinx.ext.viewcode', - 'sphinxcontrib.googleanalytics', + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.doctest", + "sphinx.ext.intersphinx", + "sphinx.ext.todo", + "sphinx.ext.mathjax", + "sphinx.ext.napoleon", + "sphinx.ext.viewcode", + "sphinx.ext.duration", + "sphinx_gallery.gen_gallery", + "sphinx_copybutton", + "beta_status", ] +# We override sphinx-gallery's example header to prevent sphinx-gallery from +# creating a note at the top of the renderred notebook. +# https://github.com/sphinx-gallery/sphinx-gallery/blob/451ccba1007cc523f39cbcc960ebc21ca39f7b75/sphinx_gallery/gen_rst.py#L1267-L1271 +# This is because we also want to add a link to google Colab, so we write our own note in each example. +from sphinx_gallery import gen_rst + +gen_rst.EXAMPLE_HEADER = """ +.. DO NOT EDIT. +.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. +.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: +.. "{0}" +.. LINE NUMBERS ARE GIVEN BELOW. + +.. rst-class:: sphx-glr-example-title + +.. _sphx_glr_{1}: + +""" + + +class CustomGalleryExampleSortKey: + # See https://sphinx-gallery.github.io/stable/configuration.html#sorting-gallery-examples + # and https://github.com/sphinx-gallery/sphinx-gallery/blob/master/sphinx_gallery/sorting.py + def __init__(self, src_dir): + self.src_dir = src_dir + + transforms_subsection_order = [ + "plot_transforms_getting_started.py", + "plot_transforms_illustrations.py", + "plot_transforms_e2e.py", + "plot_cutmix_mixup.py", + "plot_custom_transforms.py", + "plot_tv_tensors.py", + "plot_custom_tv_tensors.py", + ] + + def __call__(self, filename): + if "gallery/transforms" in self.src_dir: + try: + return self.transforms_subsection_order.index(filename) + except ValueError as e: + raise ValueError( + "Looks like you added an example in gallery/transforms? " + "You need to specify its order in docs/source/conf.py. Look for CustomGalleryExampleSortKey." + ) from e + else: + # For other subsections we just sort alphabetically by filename + return filename + + +sphinx_gallery_conf = { + "examples_dirs": "../../gallery/", # path to your example scripts + "gallery_dirs": "auto_examples", # path to where to save gallery generated output + "subsection_order": ExplicitOrder(["../../gallery/transforms", "../../gallery/others"]), + "backreferences_dir": "gen_modules/backreferences", + "doc_module": ("torchvision",), + "remove_config_comments": True, + "ignore_pattern": "helpers.py", + "within_subsection_order": CustomGalleryExampleSortKey, +} + napoleon_use_ivar = True +napoleon_numpy_docstring = False +napoleon_google_docstring = True -googleanalytics_id = 'UA-90545585-1' -googleanalytics_enabled = True # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # -# source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +source_suffix = { + ".rst": "restructuredtext", +} # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = 'Torchvision' -copyright = '2017, Torch Contributors' -author = 'Torch Contributors' +project = "Torchvision" +copyright = "2017-present, Torch Contributors" +author = "Torch Contributors" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. -# -# The short X.Y version. -# TODO: change to [:2] at v1.0 -version = 'master (' + torchvision.__version__ + ' )' -# The full version, including alpha/beta/rc tags. -# TODO: verify this works as expected -release = 'master' +# version: The short X.Y version. +# release: The full version, including alpha/beta/rc tags. +if os.environ.get("TORCHVISION_SANITIZE_VERSION_STR_IN_DOCS", None): + # Turn 1.11.0aHASH into 1.11 (major.minor only) + version = release = ".".join(torchvision.__version__.split(".")[:2]) + html_title = " ".join((project, version, "documentation")) +else: + version = f"main ({torchvision.__version__})" + release = "main" + # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = "en" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -93,7 +167,7 @@ exclude_patterns = [] # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = True @@ -104,67 +178,65 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' -html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] +html_theme = "pytorch_sphinx_theme" +html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()] # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # html_theme_options = { - 'collapse_navigation': False, - 'display_version': True, - 'logo_only': True, + "collapse_navigation": False, + "display_version": True, + "logo_only": True, + "pytorch_project": "docs", + "navigation_with_keys": True, + "analytics_id": "GTM-T8XT4PS", } -html_logo = '_static/img/pytorch-logo-dark.svg' +html_logo = "_static/img/pytorch-logo-dark.svg" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] - -# html_style_path = 'css/pytorch_theme.css' -html_context = { - 'css_files': [ - 'https://fonts.googleapis.com/css?family=Lato', - '_static/css/pytorch_theme.css' - ], -} +html_static_path = ["_static"] +# TODO: remove this once https://github.com/pytorch/pytorch_sphinx_theme/issues/125 is fixed +html_css_files = [ + "css/custom_torchvision.css", +] # -- Options for HTMLHelp output ------------------------------------------ # Output file base name for HTML help builder. -htmlhelp_basename = 'PyTorchdoc' +htmlhelp_basename = "PyTorchdoc" -# -- Options for LaTeX output --------------------------------------------- +autosummary_generate = True + +# -- Options for LaTeX output --------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # # 'preamble': '', - # Latex figure (float) alignment # # 'figure_align': 'htbp', } + # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'pytorch.tex', 'torchvision Documentation', - 'Torch Contributors', 'manual'), + (master_doc, "pytorch.tex", "torchvision Documentation", "Torch Contributors", "manual"), ] @@ -172,10 +244,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'torchvision', 'torchvision Documentation', - [author], 1) -] +man_pages = [(master_doc, "torchvision", "torchvision Documentation", [author], 1)] # -- Options for Texinfo output ------------------------------------------- @@ -184,67 +253,272 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'torchvision', 'torchvision Documentation', - author, 'torchvision', 'One line description of project.', - 'Miscellaneous'), + ( + master_doc, + "torchvision", + "torchvision Documentation", + author, + "torchvision", + "One line description of project.", + "Miscellaneous", + ), ] # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { - 'python': ('https://docs.python.org/', None), - 'numpy': ('http://docs.scipy.org/doc/numpy/', None), + "python": ("https://docs.python.org/3/", None), + "torch": ("https://pytorch.org/docs/stable/", None), + "numpy": ("https://numpy.org/doc/stable/", None), + "PIL": ("https://pillow.readthedocs.io/en/stable/", None), + "matplotlib": ("https://matplotlib.org/stable/", None), } # -- A patch that prevents Sphinx from cross-referencing ivar tags ------- # See http://stackoverflow.com/a/41184353/3343043 from docutils import nodes -from sphinx.util.docfields import TypedField from sphinx import addnodes +from sphinx.util.docfields import TypedField def patched_make_field(self, types, domain, items, **kw): # `kw` catches `env=None` needed for newer sphinx while maintaining # backwards compatibility when passed along further down! - # type: (list, unicode, tuple) -> nodes.field + # type: (list, unicode, tuple) -> nodes.field # noqa: F821 def handle_item(fieldarg, content): par = nodes.paragraph() - par += addnodes.literal_strong('', fieldarg) # Patch: this line added + par += addnodes.literal_strong("", fieldarg) # Patch: this line added # par.extend(self.make_xrefs(self.rolename, domain, fieldarg, # addnodes.literal_strong)) if fieldarg in types: - par += nodes.Text(' (') + par += nodes.Text(" (") # NOTE: using .pop() here to prevent a single type node to be # inserted twice into the doctree, which leads to # inconsistencies later when references are resolved fieldtype = types.pop(fieldarg) if len(fieldtype) == 1 and isinstance(fieldtype[0], nodes.Text): - typename = u''.join(n.astext() for n in fieldtype) - typename = typename.replace('int', 'python:int') - typename = typename.replace('long', 'python:long') - typename = typename.replace('float', 'python:float') - typename = typename.replace('type', 'python:type') - par.extend(self.make_xrefs(self.typerolename, domain, typename, - addnodes.literal_emphasis, **kw)) + typename = "".join(n.astext() for n in fieldtype) + typename = typename.replace("int", "python:int") + typename = typename.replace("long", "python:long") + typename = typename.replace("float", "python:float") + typename = typename.replace("type", "python:type") + par.extend(self.make_xrefs(self.typerolename, domain, typename, addnodes.literal_emphasis, **kw)) else: par += fieldtype - par += nodes.Text(')') - par += nodes.Text(' -- ') + par += nodes.Text(")") + par += nodes.Text(" -- ") par += content return par - fieldname = nodes.field_name('', self.label) + fieldname = nodes.field_name("", self.label) if len(items) == 1 and self.can_collapse: fieldarg, content = items[0] bodynode = handle_item(fieldarg, content) else: bodynode = self.list_type() for fieldarg, content in items: - bodynode += nodes.list_item('', handle_item(fieldarg, content)) - fieldbody = nodes.field_body('', bodynode) - return nodes.field('', fieldname, fieldbody) + bodynode += nodes.list_item("", handle_item(fieldarg, content)) + fieldbody = nodes.field_body("", bodynode) + return nodes.field("", fieldname, fieldbody) TypedField.make_field = patched_make_field + + +def inject_minigalleries(app, what, name, obj, options, lines): + """Inject a minigallery into a docstring. + + This avoids having to manually write the .. minigallery directive for every item we want a minigallery for, + as it would be easy to miss some. + + This callback is called after the .. auto directives (like ..autoclass) have been processed, + and modifies the lines parameter inplace to add the .. minigallery that will show which examples + are using which object. + + It's a bit hacky, but not *that* hacky when you consider that the recommended way is to do pretty much the same, + but instead with templates using autosummary (which we don't want to use): + (https://sphinx-gallery.github.io/stable/configuration.html#auto-documenting-your-api-with-links-to-examples) + + For docs on autodoc-process-docstring, see the autodoc docs: + https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html + """ + + if what in ("class", "function"): + lines.append(f".. minigallery:: {name}") + lines.append(f" :add-heading: Examples using ``{name.split('.')[-1]}``:") + # avoid heading entirely to avoid warning. As a bonud it actually renders better + lines.append(" :heading-level: 9") + lines.append("\n") + + +def inject_weight_metadata(app, what, name, obj, options, lines): + """This hook is used to generate docs for the models weights. + + Objects like ResNet18_Weights are enums with fields, where each field is a Weight object. + Enums aren't easily documented in Python so the solution we're going for is to: + + - add an autoclass directive in the model's builder docstring, e.g. + + ``` + .. autoclass:: torchvision.models.ResNet34_Weights + :members: + ``` + + (see resnet.py for an example) + - then this hook is called automatically when building the docs, and it generates the text that gets + used within the autoclass directive. + """ + + if getattr(obj, "__name__", "").endswith(("_Weights", "_QuantizedWeights")): + + if len(obj) == 0: + lines[:] = ["There are no available pre-trained weights."] + return + + lines[:] = [ + "The model builder above accepts the following values as the ``weights`` parameter.", + f"``{obj.__name__}.DEFAULT`` is equivalent to ``{obj.DEFAULT}``. You can also use strings, e.g. " + f"``weights='DEFAULT'`` or ``weights='{str(list(obj)[0]).split('.')[1]}'``.", + ] + + if obj.__doc__ is not None and obj.__doc__ != "An enumeration.": + # We only show the custom enum doc if it was overridden. The default one from Python is "An enumeration" + lines.append("") + lines.append(obj.__doc__) + + lines.append("") + + for field in obj: + meta = copy(field.meta) + + lines += [f"**{str(field)}**:", ""] + lines += [meta.pop("_docs")] + + if field == obj.DEFAULT: + lines += [f"Also available as ``{obj.__name__}.DEFAULT``."] + lines += [""] + + table = [] + metrics = meta.pop("_metrics") + for dataset, dataset_metrics in metrics.items(): + for metric_name, metric_value in dataset_metrics.items(): + table.append((f"{metric_name} (on {dataset})", str(metric_value))) + + for k, v in meta.items(): + if k in {"recipe", "license"}: + v = f"`link <{v}>`__" + elif k == "min_size": + v = f"height={v[0]}, width={v[1]}" + elif k in {"categories", "keypoint_names"} and isinstance(v, list): + max_visible = 3 + v_sample = ", ".join(v[:max_visible]) + v = f"{v_sample}, ... ({len(v)-max_visible} omitted)" if len(v) > max_visible else v_sample + elif k == "_ops": + v = f"{v:.2f}" + k = "GIPS" if obj.__name__.endswith("_QuantizedWeights") else "GFLOPS" + elif k == "_file_size": + k = "File size" + v = f"{v:.1f} MB" + + table.append((str(k), str(v))) + table = tabulate(table, tablefmt="rst") + lines += [".. rst-class:: table-weights"] # Custom CSS class, see custom_torchvision.css + lines += [".. table::", ""] + lines += textwrap.indent(table, " " * 4).split("\n") + lines.append("") + lines.append( + f"The inference transforms are available at ``{str(field)}.transforms`` and " + f"perform the following preprocessing operations: {field.transforms().describe()}" + ) + lines.append("") + + +def generate_weights_table(module, table_name, metrics, dataset, include_patterns=None, exclude_patterns=None): + weights_endswith = "_QuantizedWeights" if module.__name__.split(".")[-1] == "quantization" else "_Weights" + weight_enums = [getattr(module, name) for name in dir(module) if name.endswith(weights_endswith)] + weights = [w for weight_enum in weight_enums for w in weight_enum] + + if include_patterns is not None: + weights = [w for w in weights if any(p in str(w) for p in include_patterns)] + if exclude_patterns is not None: + weights = [w for w in weights if all(p not in str(w) for p in exclude_patterns)] + + ops_name = "GIPS" if "QuantizedWeights" in weights_endswith else "GFLOPS" + + metrics_keys, metrics_names = zip(*metrics) + column_names = ["Weight"] + list(metrics_names) + ["Params"] + [ops_name, "Recipe"] # Final column order + column_names = [f"**{name}**" for name in column_names] # Add bold + + content = [] + for w in weights: + row = [ + f":class:`{w} <{type(w).__name__}>`", + *(w.meta["_metrics"][dataset][metric] for metric in metrics_keys), + f"{w.meta['num_params']/1e6:.1f}M", + f"{w.meta['_ops']:.2f}", + f"`link <{w.meta['recipe']}>`__", + ] + + content.append(row) + + column_widths = ["110"] + ["18"] * len(metrics_names) + ["18"] * 2 + ["10"] + widths_table = " ".join(column_widths) + + table = tabulate(content, headers=column_names, tablefmt="rst") + + generated_dir = Path("generated") + generated_dir.mkdir(exist_ok=True) + with open(generated_dir / f"{table_name}_table.rst", "w+") as table_file: + table_file.write(".. rst-class:: table-weights\n") # Custom CSS class, see custom_torchvision.css + table_file.write(".. table::\n") + table_file.write(f" :widths: {widths_table} \n\n") + table_file.write(f"{textwrap.indent(table, ' ' * 4)}\n\n") + + +generate_weights_table( + module=M, table_name="classification", metrics=[("acc@1", "Acc@1"), ("acc@5", "Acc@5")], dataset="ImageNet-1K" +) +generate_weights_table( + module=M.quantization, + table_name="classification_quant", + metrics=[("acc@1", "Acc@1"), ("acc@5", "Acc@5")], + dataset="ImageNet-1K", +) +generate_weights_table( + module=M.detection, + table_name="detection", + metrics=[("box_map", "Box MAP")], + exclude_patterns=["Mask", "Keypoint"], + dataset="COCO-val2017", +) +generate_weights_table( + module=M.detection, + table_name="instance_segmentation", + metrics=[("box_map", "Box MAP"), ("mask_map", "Mask MAP")], + dataset="COCO-val2017", + include_patterns=["Mask"], +) +generate_weights_table( + module=M.detection, + table_name="detection_keypoint", + metrics=[("box_map", "Box MAP"), ("kp_map", "Keypoint MAP")], + dataset="COCO-val2017", + include_patterns=["Keypoint"], +) +generate_weights_table( + module=M.segmentation, + table_name="segmentation", + metrics=[("miou", "Mean IoU"), ("pixel_acc", "pixelwise Acc")], + dataset="COCO-val2017-VOC-labels", +) +generate_weights_table( + module=M.video, table_name="video", metrics=[("acc@1", "Acc@1"), ("acc@5", "Acc@5")], dataset="Kinetics-400" +) + + +def setup(app): + + app.connect("autodoc-process-docstring", inject_minigalleries) + app.connect("autodoc-process-docstring", inject_weight_metadata) diff --git a/docs/source/datasets.rst b/docs/source/datasets.rst index 040962edc6a..3caa7434e20 100644 --- a/docs/source/datasets.rst +++ b/docs/source/datasets.rst @@ -1,10 +1,18 @@ -torchvision.datasets -==================== +.. _datasets: + +Datasets +======== + +Torchvision provides many built-in datasets in the ``torchvision.datasets`` +module, as well as utility classes for building your own datasets. + +Built-in datasets +----------------- All datasets are subclasses of :class:`torch.utils.data.Dataset` i.e, they have ``__getitem__`` and ``__len__`` methods implemented. Hence, they can all be passed to a :class:`torch.utils.data.DataLoader` -which can load multiple samples parallelly using ``torch.multiprocessing`` workers. +which can load multiple samples in parallel using ``torch.multiprocessing`` workers. For example: :: imagenet_data = torchvision.datasets.ImageNet('path/to/imagenet_root/') @@ -13,214 +21,172 @@ For example: :: shuffle=True, num_workers=args.nThreads) -The following datasets are available: - -.. contents:: Datasets - :local: +.. currentmodule:: torchvision.datasets All the datasets have almost similar API. They all have two common arguments: ``transform`` and ``target_transform`` to transform the input and target respectively. +You can also create your own datasets using the provided :ref:`base classes `. + +.. warning:: + + When a dataset object is created with ``download=True``, the files are first + downloaded and extracted in the root directory. This download logic is not + multi-process safe, so it may lead to conflicts / race conditions if it is + run within a distributed setting. In distributed mode, we recommend creating + a dummy dataset object to trigger the download logic *before* setting up + distributed mode. + +Image classification +~~~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + :toctree: generated/ + :template: class_dataset.rst + + Caltech101 + Caltech256 + CelebA + CIFAR10 + CIFAR100 + Country211 + DTD + EMNIST + EuroSAT + FakeData + FashionMNIST + FER2013 + FGVCAircraft + Flickr8k + Flickr30k + Flowers102 + Food101 + GTSRB + INaturalist + ImageNet + Imagenette + KMNIST + LFWPeople + LSUN + MNIST + Omniglot + OxfordIIITPet + Places365 + PCAM + QMNIST + RenderedSST2 + SEMEION + SBU + StanfordCars + STL10 + SUN397 + SVHN + USPS + +Image detection or segmentation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + :toctree: generated/ + :template: class_dataset.rst + + CocoDetection + CelebA + Cityscapes + Kitti + OxfordIIITPet + SBDataset + VOCSegmentation + VOCDetection + WIDERFace + +Optical Flow +~~~~~~~~~~~~ - -.. currentmodule:: torchvision.datasets - - -MNIST -~~~~~ - -.. autoclass:: MNIST - -Fashion-MNIST -~~~~~~~~~~~~~ - -.. autoclass:: FashionMNIST - -KMNIST -~~~~~~~~~~~~~ - -.. autoclass:: KMNIST - -EMNIST -~~~~~~ - -.. autoclass:: EMNIST - -QMNIST -~~~~~~ - -.. autoclass:: QMNIST - -FakeData -~~~~~~~~ - -.. autoclass:: FakeData - -COCO -~~~~ - -.. note :: - These require the `COCO API to be installed`_ - -.. _COCO API to be installed: https://github.com/pdollar/coco/tree/master/PythonAPI - - -Captions -^^^^^^^^ - -.. autoclass:: CocoCaptions - :members: __getitem__ - :special-members: - - -Detection -^^^^^^^^^ - -.. autoclass:: CocoDetection - :members: __getitem__ - :special-members: - -LSUN -~~~~ - -.. autoclass:: LSUN - :members: __getitem__ - :special-members: - -ImageFolder -~~~~~~~~~~~ - -.. autoclass:: ImageFolder - :members: __getitem__ - :special-members: - -DatasetFolder -~~~~~~~~~~~~~ - -.. autoclass:: DatasetFolder - :members: __getitem__ - :special-members: - - - -ImageNet +.. autosummary:: + :toctree: generated/ + :template: class_dataset.rst + + FlyingChairs + FlyingThings3D + HD1K + KittiFlow + Sintel + +Stereo Matching +~~~~~~~~~~~~~~~ + +.. autosummary:: + :toctree: generated/ + :template: class_dataset.rst + + CarlaStereo + Kitti2012Stereo + Kitti2015Stereo + CREStereo + FallingThingsStereo + SceneFlowStereo + SintelStereo + InStereo2k + ETH3DStereo + Middlebury2014Stereo + +Image pairs ~~~~~~~~~~~ -.. autoclass:: ImageNet - -.. note :: - This requires `scipy` to be installed - - -CIFAR -~~~~~ - -.. autoclass:: CIFAR10 - :members: __getitem__ - :special-members: - -.. autoclass:: CIFAR100 - -STL10 -~~~~~ - - -.. autoclass:: STL10 - :members: __getitem__ - :special-members: - -SVHN -~~~~~ - +.. autosummary:: + :toctree: generated/ + :template: class_dataset.rst -.. autoclass:: SVHN - :members: __getitem__ - :special-members: + LFWPairs + PhotoTour -PhotoTour -~~~~~~~~~ +Image captioning +~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: generated/ + :template: class_dataset.rst -.. autoclass:: PhotoTour - :members: __getitem__ - :special-members: + CocoCaptions -SBU -~~~ +Video classification +~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: generated/ + :template: class_dataset.rst -.. autoclass:: SBU - :members: __getitem__ - :special-members: + HMDB51 + Kinetics + UCF101 -Flickr -~~~~~~ +Video prediction +~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: generated/ + :template: class_dataset.rst -.. autoclass:: Flickr8k - :members: __getitem__ - :special-members: - -.. autoclass:: Flickr30k - :members: __getitem__ - :special-members: - -VOC -~~~~~~ - - -.. autoclass:: VOCSegmentation - :members: __getitem__ - :special-members: - -.. autoclass:: VOCDetection - :members: __getitem__ - :special-members: - -Cityscapes -~~~~~~~~~~ - -.. note :: - Requires Cityscape to be downloaded. - -.. autoclass:: Cityscapes - :members: __getitem__ - :special-members: - -SBD -~~~~~~ - - -.. autoclass:: SBDataset - :members: __getitem__ - :special-members: - -USPS -~~~~~ - -.. autoclass:: USPS - :members: __getitem__ - :special-members: - - -Kinetics-400 -~~~~~~~~~~~~ + MovingMNIST -.. autoclass:: Kinetics400 - :members: __getitem__ - :special-members: +.. _base_classes_datasets: +Base classes for custom datasets +-------------------------------- -HMDB51 -~~~~~~~ +.. autosummary:: + :toctree: generated/ + :template: class.rst -.. autoclass:: HMDB51 - :members: __getitem__ - :special-members: + DatasetFolder + ImageFolder + VisionDataset +Transforms v2 +------------- -UCF101 -~~~~~~~ +.. autosummary:: + :toctree: generated/ + :template: function.rst -.. autoclass:: UCF101 - :members: __getitem__ - :special-members: + wrap_dataset_for_transforms_v2 diff --git a/docs/source/docutils.conf b/docs/source/docutils.conf new file mode 100644 index 00000000000..e2bef654a4a --- /dev/null +++ b/docs/source/docutils.conf @@ -0,0 +1,3 @@ +# Necessary for the table generated by autosummary to look decent +[html writers] +table_style: colwidths-auto diff --git a/docs/source/feature_extraction.rst b/docs/source/feature_extraction.rst new file mode 100644 index 00000000000..e83bc2fe4bc --- /dev/null +++ b/docs/source/feature_extraction.rst @@ -0,0 +1,166 @@ +Feature extraction for model inspection +======================================= + +.. currentmodule:: torchvision.models.feature_extraction + +The ``torchvision.models.feature_extraction`` package contains +feature extraction utilities that let us tap into our models to access intermediate +transformations of our inputs. This could be useful for a variety of +applications in computer vision. Just a few examples are: + +- Visualizing feature maps. +- Extracting features to compute image descriptors for tasks like facial + recognition, copy-detection, or image retrieval. +- Passing selected features to downstream sub-networks for end-to-end training + with a specific task in mind. For example, passing a hierarchy of features + to a Feature Pyramid Network with object detection heads. + +Torchvision provides :func:`create_feature_extractor` for this purpose. +It works by following roughly these steps: + +1. Symbolically tracing the model to get a graphical representation of + how it transforms the input, step by step. +2. Setting the user-selected graph nodes as outputs. +3. Removing all redundant nodes (anything downstream of the output nodes). +4. Generating python code from the resulting graph and bundling that into a + PyTorch module together with the graph itself. + +| + +The `torch.fx documentation `_ +provides a more general and detailed explanation of the above procedure and +the inner workings of the symbolic tracing. + +.. _about-node-names: + +**About Node Names** + +In order to specify which nodes should be output nodes for extracted +features, one should be familiar with the node naming convention used here +(which differs slightly from that used in ``torch.fx``). A node name is +specified as a ``.`` separated path walking the module hierarchy from top level +module down to leaf operation or leaf module. For instance ``"layer4.2.relu"`` +in ResNet-50 represents the output of the ReLU of the 2nd block of the 4th +layer of the ``ResNet`` module. Here are some finer points to keep in mind: + +- When specifying node names for :func:`create_feature_extractor`, you may + provide a truncated version of a node name as a shortcut. To see how this + works, try creating a ResNet-50 model and printing the node names with + ``train_nodes, _ = get_graph_node_names(model) print(train_nodes)`` and + observe that the last node pertaining to ``layer4`` is + ``"layer4.2.relu_2"``. One may specify ``"layer4.2.relu_2"`` as the return + node, or just ``"layer4"`` as this, by convention, refers to the last node + (in order of execution) of ``layer4``. +- If a certain module or operation is repeated more than once, node names get + an additional ``_{int}`` postfix to disambiguate. For instance, maybe the + addition (``+``) operation is used three times in the same ``forward`` + method. Then there would be ``"path.to.module.add"``, + ``"path.to.module.add_1"``, ``"path.to.module.add_2"``. The counter is + maintained within the scope of the direct parent. So in ResNet-50 there is + a ``"layer4.1.add"`` and a ``"layer4.2.add"``. Because the addition + operations reside in different blocks, there is no need for a postfix to + disambiguate. + + +**An Example** + +Here is an example of how we might extract features for MaskRCNN: + +.. code-block:: python + + import torch + from torchvision.models import resnet50 + from torchvision.models.feature_extraction import get_graph_node_names + from torchvision.models.feature_extraction import create_feature_extractor + from torchvision.models.detection.mask_rcnn import MaskRCNN + from torchvision.models.detection.backbone_utils import LastLevelMaxPool + from torchvision.ops.feature_pyramid_network import FeaturePyramidNetwork + + + # To assist you in designing the feature extractor you may want to print out + # the available nodes for resnet50. + m = resnet50() + train_nodes, eval_nodes = get_graph_node_names(resnet50()) + + # The lists returned, are the names of all the graph nodes (in order of + # execution) for the input model traced in train mode and in eval mode + # respectively. You'll find that `train_nodes` and `eval_nodes` are the same + # for this example. But if the model contains control flow that's dependent + # on the training mode, they may be different. + + # To specify the nodes you want to extract, you could select the final node + # that appears in each of the main layers: + return_nodes = { + # node_name: user-specified key for output dict + 'layer1.2.relu_2': 'layer1', + 'layer2.3.relu_2': 'layer2', + 'layer3.5.relu_2': 'layer3', + 'layer4.2.relu_2': 'layer4', + } + + # But `create_feature_extractor` can also accept truncated node specifications + # like "layer1", as it will just pick the last node that's a descendent of + # of the specification. (Tip: be careful with this, especially when a layer + # has multiple outputs. It's not always guaranteed that the last operation + # performed is the one that corresponds to the output you desire. You should + # consult the source code for the input model to confirm.) + return_nodes = { + 'layer1': 'layer1', + 'layer2': 'layer2', + 'layer3': 'layer3', + 'layer4': 'layer4', + } + + # Now you can build the feature extractor. This returns a module whose forward + # method returns a dictionary like: + # { + # 'layer1': output of layer 1, + # 'layer2': output of layer 2, + # 'layer3': output of layer 3, + # 'layer4': output of layer 4, + # } + create_feature_extractor(m, return_nodes=return_nodes) + + # Let's put all that together to wrap resnet50 with MaskRCNN + + # MaskRCNN requires a backbone with an attached FPN + class Resnet50WithFPN(torch.nn.Module): + def __init__(self): + super(Resnet50WithFPN, self).__init__() + # Get a resnet50 backbone + m = resnet50() + # Extract 4 main layers (note: MaskRCNN needs this particular name + # mapping for return nodes) + self.body = create_feature_extractor( + m, return_nodes={f'layer{k}': str(v) + for v, k in enumerate([1, 2, 3, 4])}) + # Dry run to get number of channels for FPN + inp = torch.randn(2, 3, 224, 224) + with torch.no_grad(): + out = self.body(inp) + in_channels_list = [o.shape[1] for o in out.values()] + # Build FPN + self.out_channels = 256 + self.fpn = FeaturePyramidNetwork( + in_channels_list, out_channels=self.out_channels, + extra_blocks=LastLevelMaxPool()) + + def forward(self, x): + x = self.body(x) + x = self.fpn(x) + return x + + + # Now we can build our model! + model = MaskRCNN(Resnet50WithFPN(), num_classes=91).eval() + + +API Reference +------------- + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + create_feature_extractor + get_graph_node_names diff --git a/docs/source/index.rst b/docs/source/index.rst index 9de82b6e7fc..dc5fdefaefb 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,5 +1,28 @@ torchvision =========== +This library is part of the `PyTorch +`_ project. PyTorch is an open source +machine learning framework. + +Features described in this documentation are classified by release status: + + *Stable:* These features will be maintained long-term and there should generally + be no major performance limitations or gaps in documentation. + We also expect to maintain backwards compatibility (although + breaking changes can happen and notice will be given one release ahead + of time). + + *Beta:* Features are tagged as Beta because the API may change based on + user feedback, because the performance needs to improve, or because + coverage across operators is not yet complete. For Beta features, we are + committing to seeing the feature through to the Stable classification. + We are not, however, committing to backwards compatibility. + + *Prototype:* These features are typically not available as part of + binary distributions like PyPI or Conda, except sometimes behind run-time + flags, and are at an early stage for feedback and testing. + + The :mod:`torchvision` package consists of popular datasets, model architectures, and common image transformations for computer vision. @@ -8,12 +31,39 @@ architectures, and common image transformations for computer vision. :maxdepth: 2 :caption: Package Reference - datasets - io - models - ops transforms + tv_tensors + models + datasets utils + ops + io + feature_extraction + +.. toctree:: + :maxdepth: 1 + :caption: Examples and training references + + auto_examples/index + training_references .. automodule:: torchvision :members: + +.. toctree:: + :maxdepth: 1 + :caption: PyTorch Libraries + + PyTorch + torchaudio + torchtext + torchvision + TorchElastic + TorchServe + PyTorch on XLA Devices + + +Indices +------- + +* :ref:`genindex` diff --git a/docs/source/io.rst b/docs/source/io.rst index e7aeedc0716..c3f2d658014 100644 --- a/docs/source/io.rst +++ b/docs/source/io.rst @@ -1,16 +1,120 @@ -torchvision.io -============== +Decoding / Encoding images and videos +===================================== .. currentmodule:: torchvision.io -The :mod:`torchvision.io` package provides functions for performing IO -operations. They are currently specific to reading and writing video. +The :mod:`torchvision.io` module provides utilities for decoding and encoding +images and videos. + +Image Decoding +-------------- + +Torchvision currently supports decoding JPEG, PNG, WEBP, GIF, AVIF, and HEIC +images. JPEG decoding can also be done on CUDA GPUs. + +The main entry point is the :func:`~torchvision.io.decode_image` function, which +you can use as an alternative to ``PIL.Image.open()``. It will decode images +straight into image Tensors, thus saving you the conversion and allowing you to +run transforms/preproc natively on tensors. + +.. code:: + + from torchvision.io import decode_image + + img = decode_image("path_to_image", mode="RGB") + img.dtype # torch.uint8 + + # Or + raw_encoded_bytes = ... # read encoded bytes from your file system + img = decode_image(raw_encoded_bytes, mode="RGB") + + +:func:`~torchvision.io.decode_image` will automatically detect the image format, +and call the corresponding decoder (except for HEIC and AVIF images, see details +in :func:`~torchvision.io.decode_avif` and :func:`~torchvision.io.decode_heic`). +You can also use the lower-level format-specific decoders which can be more +powerful, e.g. if you want to encode/decode JPEGs on CUDA. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + decode_image + decode_jpeg + encode_png + decode_webp + decode_avif + decode_heic + decode_gif + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + ImageReadMode + +Obsolete decoding function: + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + read_image + +Image Encoding +-------------- + +For encoding, JPEG (cpu and CUDA) and PNG are supported. + + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + encode_jpeg + write_jpeg + encode_png + write_png + +IO operations +------------- + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + read_file + write_file Video ----- -.. autofunction:: read_video +.. warning:: + + Torchvision supports video decoding through different APIs listed below, + some of which are still in BETA stage. In the near future, we intend to + centralize PyTorch's video decoding capabilities within the `torchcodec + `_ project. We encourage you to try + it out and share your feedback, as the torchvision video decoders will + eventually be deprecated. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + read_video + read_video_timestamps + write_video + + +**Fine-grained video API** + +In addition to the :mod:`read_video` function, we provide a high-performance +lower-level API for more fine-grained control compared to the :mod:`read_video` function. +It does all this whilst fully supporting torchscript. -.. autofunction:: read_video_timestamps +.. autosummary:: + :toctree: generated/ + :template: class.rst -.. autofunction:: write_video + VideoReader diff --git a/docs/source/models.rst b/docs/source/models.rst index e1a141092dc..d0096aaf854 100644 --- a/docs/source/models.rst +++ b/docs/source/models.rst @@ -1,445 +1,577 @@ -torchvision.models -################## +.. _models: +Models and pre-trained weights +############################## -The models subpackage contains definitions of models for addressing +The ``torchvision.models`` subpackage contains definitions of models for addressing different tasks, including: image classification, pixelwise semantic segmentation, object detection, instance segmentation, person -keypoint detection and video classification. +keypoint detection, video classification, and optical flow. +General information on pre-trained weights +========================================== -Classification -============== +TorchVision offers pre-trained weights for every provided architecture, using +the PyTorch :mod:`torch.hub`. Instancing a pre-trained model will download its +weights to a cache directory. This directory can be set using the `TORCH_HOME` +environment variable. See :func:`torch.hub.load_state_dict_from_url` for details. + +.. note:: + + The pre-trained models provided in this library may have their own licenses or + terms and conditions derived from the dataset used for training. It is your + responsibility to determine whether you have permission to use the models for + your use case. + +.. note :: + Backward compatibility is guaranteed for loading a serialized + ``state_dict`` to the model created using old PyTorch version. + On the contrary, loading entire saved models or serialized + ``ScriptModules`` (serialized using older versions of PyTorch) + may not preserve the historic behaviour. Refer to the following + `documentation + `_ + + +Initializing pre-trained models +------------------------------- + +As of v0.13, TorchVision offers a new `Multi-weight support API +`_ +for loading different weights to the existing model builder methods: + +.. code:: python + + from torchvision.models import resnet50, ResNet50_Weights + + # Old weights with accuracy 76.130% + resnet50(weights=ResNet50_Weights.IMAGENET1K_V1) + + # New weights with accuracy 80.858% + resnet50(weights=ResNet50_Weights.IMAGENET1K_V2) -The models subpackage contains definitions for the following model -architectures for image classification: + # Best available weights (currently alias for IMAGENET1K_V2) + # Note that these weights may change across versions + resnet50(weights=ResNet50_Weights.DEFAULT) -- `AlexNet`_ -- `VGG`_ -- `ResNet`_ -- `SqueezeNet`_ -- `DenseNet`_ -- `Inception`_ v3 -- `GoogLeNet`_ -- `ShuffleNet`_ v2 -- `MobileNet`_ v2 -- `ResNeXt`_ -- `Wide ResNet`_ -- `MNASNet`_ + # Strings are also supported + resnet50(weights="IMAGENET1K_V2") -You can construct a model with random weights by calling its constructor: + # No weights - random initialization + resnet50(weights=None) + + +Migrating to the new API is very straightforward. The following method calls between the 2 APIs are all equivalent: .. code:: python - import torchvision.models as models - resnet18 = models.resnet18() - alexnet = models.alexnet() - vgg16 = models.vgg16() - squeezenet = models.squeezenet1_0() - densenet = models.densenet161() - inception = models.inception_v3() - googlenet = models.googlenet() - shufflenet = models.shufflenet_v2_x1_0() - mobilenet = models.mobilenet_v2() - resnext50_32x4d = models.resnext50_32x4d() - wide_resnet50_2 = models.wide_resnet50_2() - mnasnet = models.mnasnet1_0() - -We provide pre-trained models, using the PyTorch :mod:`torch.utils.model_zoo`. -These can be constructed by passing ``pretrained=True``: + from torchvision.models import resnet50, ResNet50_Weights + + # Using pretrained weights: + resnet50(weights=ResNet50_Weights.IMAGENET1K_V1) + resnet50(weights="IMAGENET1K_V1") + resnet50(pretrained=True) # deprecated + resnet50(True) # deprecated + + # Using no weights: + resnet50(weights=None) + resnet50() + resnet50(pretrained=False) # deprecated + resnet50(False) # deprecated + +Note that the ``pretrained`` parameter is now deprecated, using it will emit warnings and will be removed on v0.15. + +Using the pre-trained models +---------------------------- + +Before using the pre-trained models, one must preprocess the image +(resize with right resolution/interpolation, apply inference transforms, +rescale the values etc). There is no standard way to do this as it depends on +how a given model was trained. It can vary across model families, variants or +even weight versions. Using the correct preprocessing method is critical and +failing to do so may lead to decreased accuracy or incorrect outputs. + +All the necessary information for the inference transforms of each pre-trained +model is provided on its weights documentation. To simplify inference, TorchVision +bundles the necessary preprocessing transforms into each model weight. These are +accessible via the ``weight.transforms`` attribute: .. code:: python - import torchvision.models as models - resnet18 = models.resnet18(pretrained=True) - alexnet = models.alexnet(pretrained=True) - squeezenet = models.squeezenet1_0(pretrained=True) - vgg16 = models.vgg16(pretrained=True) - densenet = models.densenet161(pretrained=True) - inception = models.inception_v3(pretrained=True) - googlenet = models.googlenet(pretrained=True) - shufflenet = models.shufflenet_v2_x1_0(pretrained=True) - mobilenet = models.mobilenet_v2(pretrained=True) - resnext50_32x4d = models.resnext50_32x4d(pretrained=True) - wide_resnet50_2 = models.wide_resnet50_2(pretrained=True) - mnasnet = models.mnasnet1_0(pretrained=True) - -Instancing a pre-trained model will download its weights to a cache directory. -This directory can be set using the `TORCH_MODEL_ZOO` environment variable. See -:func:`torch.utils.model_zoo.load_url` for details. + # Initialize the Weight Transforms + weights = ResNet50_Weights.DEFAULT + preprocess = weights.transforms() + + # Apply it to the input image + img_transformed = preprocess(img) + Some models use modules which have different training and evaluation behavior, such as batch normalization. To switch between these modes, use ``model.train()`` or ``model.eval()`` as appropriate. See -:meth:`~torch.nn.Module.train` or :meth:`~torch.nn.Module.eval` for details. - -All pre-trained models expect input images normalized in the same way, -i.e. mini-batches of 3-channel RGB images of shape (3 x H x W), -where H and W are expected to be at least 224. -The images have to be loaded in to a range of [0, 1] and then normalized -using ``mean = [0.485, 0.456, 0.406]`` and ``std = [0.229, 0.224, 0.225]``. -You can use the following transform to normalize:: - - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) - -An example of such normalization can be found in the imagenet example -`here `_ - -ImageNet 1-crop error rates (224x224) - -================================ ============= ============= -Network Top-1 error Top-5 error -================================ ============= ============= -AlexNet 43.45 20.91 -VGG-11 30.98 11.37 -VGG-13 30.07 10.75 -VGG-16 28.41 9.62 -VGG-19 27.62 9.12 -VGG-11 with batch normalization 29.62 10.19 -VGG-13 with batch normalization 28.45 9.63 -VGG-16 with batch normalization 26.63 8.50 -VGG-19 with batch normalization 25.76 8.15 -ResNet-18 30.24 10.92 -ResNet-34 26.70 8.58 -ResNet-50 23.85 7.13 -ResNet-101 22.63 6.44 -ResNet-152 21.69 5.94 -SqueezeNet 1.0 41.90 19.58 -SqueezeNet 1.1 41.81 19.38 -Densenet-121 25.35 7.83 -Densenet-169 24.00 7.00 -Densenet-201 22.80 6.43 -Densenet-161 22.35 6.20 -Inception v3 22.55 6.44 -GoogleNet 30.22 10.47 -ShuffleNet V2 30.64 11.68 -MobileNet V2 28.12 9.71 -ResNeXt-50-32x4d 22.38 6.30 -ResNeXt-101-32x8d 20.69 5.47 -Wide ResNet-50-2 21.49 5.91 -Wide ResNet-101-2 21.16 5.72 -MNASNet 1.0 26.49 8.456 -================================ ============= ============= - - -.. _AlexNet: https://arxiv.org/abs/1404.5997 -.. _VGG: https://arxiv.org/abs/1409.1556 -.. _ResNet: https://arxiv.org/abs/1512.03385 -.. _SqueezeNet: https://arxiv.org/abs/1602.07360 -.. _DenseNet: https://arxiv.org/abs/1608.06993 -.. _Inception: https://arxiv.org/abs/1512.00567 -.. _GoogLeNet: https://arxiv.org/abs/1409.4842 -.. _ShuffleNet: https://arxiv.org/abs/1807.11164 -.. _MobileNet: https://arxiv.org/abs/1801.04381 -.. _ResNeXt: https://arxiv.org/abs/1611.05431 -.. _MNASNet: https://arxiv.org/abs/1807.11626 +:meth:`~torch.nn.Module.train` or :meth:`~torch.nn.Module.eval` for details. + +.. code:: python + + # Initialize model + weights = ResNet50_Weights.DEFAULT + model = resnet50(weights=weights) + + # Set model to eval mode + model.eval() + +Listing and retrieving available models +--------------------------------------- + +As of v0.14, TorchVision offers a new mechanism which allows listing and +retrieving models and weights by their names. Here are a few examples on how to +use them: + +.. code:: python + + # List available models + all_models = list_models() + classification_models = list_models(module=torchvision.models) + + # Initialize models + m1 = get_model("mobilenet_v3_large", weights=None) + m2 = get_model("quantized_mobilenet_v3_large", weights="DEFAULT") + + # Fetch weights + weights = get_weight("MobileNet_V3_Large_QuantizedWeights.DEFAULT") + assert weights == MobileNet_V3_Large_QuantizedWeights.DEFAULT + + weights_enum = get_model_weights("quantized_mobilenet_v3_large") + assert weights_enum == MobileNet_V3_Large_QuantizedWeights + + weights_enum2 = get_model_weights(torchvision.models.quantization.mobilenet_v3_large) + assert weights_enum == weights_enum2 + +Here are the available public functions to retrieve models and their corresponding weights: .. currentmodule:: torchvision.models +.. autosummary:: + :toctree: generated/ + :template: function.rst -Alexnet -------- + get_model + get_model_weights + get_weight + list_models -.. autofunction:: alexnet +Using models from Hub +--------------------- -VGG ---- +Most pre-trained models can be accessed directly via PyTorch Hub without having TorchVision installed: -.. autofunction:: vgg11 -.. autofunction:: vgg11_bn -.. autofunction:: vgg13 -.. autofunction:: vgg13_bn -.. autofunction:: vgg16 -.. autofunction:: vgg16_bn -.. autofunction:: vgg19 -.. autofunction:: vgg19_bn +.. code:: python + import torch -ResNet ------- + # Option 1: passing weights param as string + model = torch.hub.load("pytorch/vision", "resnet50", weights="IMAGENET1K_V2") -.. autofunction:: resnet18 -.. autofunction:: resnet34 -.. autofunction:: resnet50 -.. autofunction:: resnet101 -.. autofunction:: resnet152 + # Option 2: passing weights param as enum + weights = torch.hub.load( + "pytorch/vision", + "get_weight", + weights="ResNet50_Weights.IMAGENET1K_V2", + ) + model = torch.hub.load("pytorch/vision", "resnet50", weights=weights) -SqueezeNet ----------- +You can also retrieve all the available weights of a specific model via PyTorch Hub by doing: -.. autofunction:: squeezenet1_0 -.. autofunction:: squeezenet1_1 +.. code:: python -DenseNet ---------- + import torch -.. autofunction:: densenet121 -.. autofunction:: densenet169 -.. autofunction:: densenet161 -.. autofunction:: densenet201 + weight_enum = torch.hub.load("pytorch/vision", "get_model_weights", name="resnet50") + print([weight for weight in weight_enum]) -Inception v3 ------------- +The only exception to the above are the detection models included on +:mod:`torchvision.models.detection`. These models require TorchVision +to be installed because they depend on custom C++ operators. -.. autofunction:: inception_v3 +Classification +============== + +.. currentmodule:: torchvision.models + +The following classification models are available, with or without pre-trained +weights: + +.. toctree:: + :maxdepth: 1 + + models/alexnet + models/convnext + models/densenet + models/efficientnet + models/efficientnetv2 + models/googlenet + models/inception + models/maxvit + models/mnasnet + models/mobilenetv2 + models/mobilenetv3 + models/regnet + models/resnet + models/resnext + models/shufflenetv2 + models/squeezenet + models/swin_transformer + models/vgg + models/vision_transformer + models/wide_resnet + +| + +Here is an example of how to use the pre-trained image classification models: + +.. code:: python + + from torchvision.io import decode_image + from torchvision.models import resnet50, ResNet50_Weights -GoogLeNet ------------- + img = decode_image("test/assets/encode_jpeg/grace_hopper_517x606.jpg") -.. autofunction:: googlenet + # Step 1: Initialize model with the best available weights + weights = ResNet50_Weights.DEFAULT + model = resnet50(weights=weights) + model.eval() -ShuffleNet v2 -------------- + # Step 2: Initialize the inference transforms + preprocess = weights.transforms() -.. autofunction:: shufflenet_v2_x0_5 -.. autofunction:: shufflenet_v2_x1_0 -.. autofunction:: shufflenet_v2_x1_5 -.. autofunction:: shufflenet_v2_x2_0 + # Step 3: Apply inference preprocessing transforms + batch = preprocess(img).unsqueeze(0) -MobileNet v2 -------------- + # Step 4: Use the model and print the predicted category + prediction = model(batch).squeeze(0).softmax(0) + class_id = prediction.argmax().item() + score = prediction[class_id].item() + category_name = weights.meta["categories"][class_id] + print(f"{category_name}: {100 * score:.1f}%") -.. autofunction:: mobilenet_v2 +The classes of the pre-trained model outputs can be found at ``weights.meta["categories"]``. -ResNext -------- +Table of all available classification weights +--------------------------------------------- -.. autofunction:: resnext50_32x4d -.. autofunction:: resnext101_32x8d +Accuracies are reported on ImageNet-1K using single crops: -Wide ResNet ------------ +.. include:: generated/classification_table.rst -.. autofunction:: wide_resnet50_2 -.. autofunction:: wide_resnet101_2 +Quantized models +---------------- -MNASNet --------- +.. currentmodule:: torchvision.models.quantization -.. autofunction:: mnasnet0_5 -.. autofunction:: mnasnet0_75 -.. autofunction:: mnasnet1_0 -.. autofunction:: mnasnet1_3 +The following architectures provide support for INT8 quantized models, with or without +pre-trained weights: + +.. toctree:: + :maxdepth: 1 + + models/googlenet_quant + models/inception_quant + models/mobilenetv2_quant + models/mobilenetv3_quant + models/resnet_quant + models/resnext_quant + models/shufflenetv2_quant + +| + +Here is an example of how to use the pre-trained quantized image classification models: + +.. code:: python + from torchvision.io import decode_image + from torchvision.models.quantization import resnet50, ResNet50_QuantizedWeights + + img = decode_image("test/assets/encode_jpeg/grace_hopper_517x606.jpg") + + # Step 1: Initialize model with the best available weights + weights = ResNet50_QuantizedWeights.DEFAULT + model = resnet50(weights=weights, quantize=True) + model.eval() + + # Step 2: Initialize the inference transforms + preprocess = weights.transforms() + + # Step 3: Apply inference preprocessing transforms + batch = preprocess(img).unsqueeze(0) + + # Step 4: Use the model and print the predicted category + prediction = model(batch).squeeze(0).softmax(0) + class_id = prediction.argmax().item() + score = prediction[class_id].item() + category_name = weights.meta["categories"][class_id] + print(f"{category_name}: {100 * score}%") + +The classes of the pre-trained model outputs can be found at ``weights.meta["categories"]``. + + +Table of all available quantized classification weights +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Accuracies are reported on ImageNet-1K using single crops: + +.. include:: generated/classification_quant_table.rst Semantic Segmentation ===================== -The models subpackage contains definitions for the following model -architectures for semantic segmentation: +.. currentmodule:: torchvision.models.segmentation -- `FCN ResNet101 `_ -- `DeepLabV3 ResNet101 `_ +.. betastatus:: segmentation module -As with image classification models, all pre-trained models expect input images normalized in the same way. -The images have to be loaded in to a range of ``[0, 1]`` and then normalized using -``mean = [0.485, 0.456, 0.406]`` and ``std = [0.229, 0.224, 0.225]``. -They have been trained on images resized such that their minimum size is 520. +The following semantic segmentation models are available, with or without +pre-trained weights: -The pre-trained models have been trained on a subset of COCO train2017, on the 20 categories that are -present in the Pascal VOC dataset. You can see more information on how the subset has been selected in -``references/segmentation/coco_utils.py``. The classes that the pre-trained model outputs are the following, -in order: +.. toctree:: + :maxdepth: 1 - .. code-block:: python + models/deeplabv3 + models/fcn + models/lraspp - ['__background__', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', - 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', - 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] +| -The accuracies of the pre-trained models evaluated on COCO val2017 are as follows +Here is an example of how to use the pre-trained semantic segmentation models: -================================ ============= ==================== -Network mean IoU global pixelwise acc -================================ ============= ==================== -FCN ResNet101 63.7 91.9 -DeepLabV3 ResNet101 67.4 92.4 -================================ ============= ==================== +.. code:: python + from torchvision.io.image import decode_image + from torchvision.models.segmentation import fcn_resnet50, FCN_ResNet50_Weights + from torchvision.transforms.functional import to_pil_image -Fully Convolutional Networks ----------------------------- + img = decode_image("gallery/assets/dog1.jpg") -.. autofunction:: torchvision.models.segmentation.fcn_resnet50 -.. autofunction:: torchvision.models.segmentation.fcn_resnet101 + # Step 1: Initialize model with the best available weights + weights = FCN_ResNet50_Weights.DEFAULT + model = fcn_resnet50(weights=weights) + model.eval() + # Step 2: Initialize the inference transforms + preprocess = weights.transforms() -DeepLabV3 ---------- + # Step 3: Apply inference preprocessing transforms + batch = preprocess(img).unsqueeze(0) -.. autofunction:: torchvision.models.segmentation.deeplabv3_resnet50 -.. autofunction:: torchvision.models.segmentation.deeplabv3_resnet101 + # Step 4: Use the model and visualize the prediction + prediction = model(batch)["out"] + normalized_masks = prediction.softmax(dim=1) + class_to_idx = {cls: idx for (idx, cls) in enumerate(weights.meta["categories"])} + mask = normalized_masks[0, class_to_idx["dog"]] + to_pil_image(mask).show() +The classes of the pre-trained model outputs can be found at ``weights.meta["categories"]``. +The output format of the models is illustrated in :ref:`semantic_seg_output`. -Object Detection, Instance Segmentation and Person Keypoint Detection -===================================================================== -The models subpackage contains definitions for the following model -architectures for detection: +Table of all available semantic segmentation weights +---------------------------------------------------- -- `Faster R-CNN ResNet-50 FPN `_ -- `Mask R-CNN ResNet-50 FPN `_ +All models are evaluated a subset of COCO val2017, on the 20 categories that are present in the Pascal VOC dataset: + +.. include:: generated/segmentation_table.rst + + +.. _object_det_inst_seg_pers_keypoint_det: + +Object Detection, Instance Segmentation and Person Keypoint Detection +===================================================================== The pre-trained models for detection, instance segmentation and keypoint detection are initialized with the classification models -in torchvision. - -The models expect a list of ``Tensor[C, H, W]``, in the range ``0-1``. -The models internally resize the images so that they have a minimum size -of ``800``. This option can be changed by passing the option ``min_size`` -to the constructor of the models. - - -For object detection and instance segmentation, the pre-trained -models return the predictions of the following classes: +in torchvision. The models expect a list of ``Tensor[C, H, W]``. +Check the constructor of the models for more information. + +.. betastatus:: detection module + +Object Detection +---------------- + +.. currentmodule:: torchvision.models.detection + +The following object detection models are available, with or without pre-trained +weights: + +.. toctree:: + :maxdepth: 1 + + models/faster_rcnn + models/fcos + models/retinanet + models/ssd + models/ssdlite + +| - .. code-block:: python +Here is an example of how to use the pre-trained object detection models: - COCO_INSTANCE_CATEGORY_NAMES = [ - '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', - 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign', - 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', - 'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A', - 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', - 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', - 'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', - 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', - 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table', - 'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', - 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book', - 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' - ] +.. code:: python + + + from torchvision.io.image import decode_image + from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights + from torchvision.utils import draw_bounding_boxes + from torchvision.transforms.functional import to_pil_image + + img = decode_image("test/assets/encode_jpeg/grace_hopper_517x606.jpg") + + # Step 1: Initialize model with the best available weights + weights = FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT + model = fasterrcnn_resnet50_fpn_v2(weights=weights, box_score_thresh=0.9) + model.eval() + + # Step 2: Initialize the inference transforms + preprocess = weights.transforms() + + # Step 3: Apply inference preprocessing transforms + batch = [preprocess(img)] + # Step 4: Use the model and visualize the prediction + prediction = model(batch)[0] + labels = [weights.meta["categories"][i] for i in prediction["labels"]] + box = draw_bounding_boxes(img, boxes=prediction["boxes"], + labels=labels, + colors="red", + width=4, font_size=30) + im = to_pil_image(box.detach()) + im.show() -Here are the summary of the accuracies for the models trained on -the instances set of COCO train2017 and evaluated on COCO val2017. +The classes of the pre-trained model outputs can be found at ``weights.meta["categories"]``. +For details on how to plot the bounding boxes of the models, you may refer to :ref:`instance_seg_output`. -================================ ======= ======== =========== -Network box AP mask AP keypoint AP -================================ ======= ======== =========== -Faster R-CNN ResNet-50 FPN 37.0 - - -Mask R-CNN ResNet-50 FPN 37.9 34.6 - -================================ ======= ======== =========== +Table of all available Object detection weights +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -For person keypoint detection, the accuracies for the pre-trained -models are as follows +Box MAPs are reported on COCO val2017: -================================ ======= ======== =========== -Network box AP mask AP keypoint AP -================================ ======= ======== =========== -Keypoint R-CNN ResNet-50 FPN 54.6 - 65.0 -================================ ======= ======== =========== +.. include:: generated/detection_table.rst -For person keypoint detection, the pre-trained model return the -keypoints in the following order: - .. code-block:: python +Instance Segmentation +--------------------- - COCO_PERSON_KEYPOINT_NAMES = [ - 'nose', - 'left_eye', - 'right_eye', - 'left_ear', - 'right_ear', - 'left_shoulder', - 'right_shoulder', - 'left_elbow', - 'right_elbow', - 'left_wrist', - 'right_wrist', - 'left_hip', - 'right_hip', - 'left_knee', - 'right_knee', - 'left_ankle', - 'right_ankle' - ] +.. currentmodule:: torchvision.models.detection -Runtime characteristics ------------------------ +The following instance segmentation models are available, with or without pre-trained +weights: -The implementations of the models for object detection, instance segmentation -and keypoint detection are efficient. +.. toctree:: + :maxdepth: 1 -In the following table, we use 8 V100 GPUs, with CUDA 10.0 and CUDNN 7.4 to -report the results. During training, we use a batch size of 2 per GPU, and -during testing a batch size of 1 is used. + models/mask_rcnn -For test time, we report the time for the model evaluation and postprocessing -(including mask pasting in image), but not the time for computing the -precision-recall. +| -============================== =================== ================== =========== -Network train time (s / it) test time (s / it) memory (GB) -============================== =================== ================== =========== -Faster R-CNN ResNet-50 FPN 0.2288 0.0590 5.2 -Mask R-CNN ResNet-50 FPN 0.2728 0.0903 5.4 -Keypoint R-CNN ResNet-50 FPN 0.3789 0.1242 6.8 -============================== =================== ================== =========== +For details on how to plot the masks of the models, you may refer to :ref:`instance_seg_output`. -Faster R-CNN ------------- +Table of all available Instance segmentation weights +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. autofunction:: torchvision.models.detection.fasterrcnn_resnet50_fpn +Box and Mask MAPs are reported on COCO val2017: +.. include:: generated/instance_segmentation_table.rst -Mask R-CNN ----------- +Keypoint Detection +------------------ -.. autofunction:: torchvision.models.detection.maskrcnn_resnet50_fpn +.. currentmodule:: torchvision.models.detection +The following person keypoint detection models are available, with or without +pre-trained weights: -Keypoint R-CNN --------------- +.. toctree:: + :maxdepth: 1 -.. autofunction:: torchvision.models.detection.keypointrcnn_resnet50_fpn + models/keypoint_rcnn - -Video classification +| + +The classes of the pre-trained model outputs can be found at ``weights.meta["keypoint_names"]``. +For details on how to plot the bounding boxes of the models, you may refer to :ref:`keypoint_output`. + +Table of all available Keypoint detection weights +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Box and Keypoint MAPs are reported on COCO val2017: + +.. include:: generated/detection_keypoint_table.rst + + +Video Classification ==================== -We provide models for action recognition pre-trained on Kinetics-400. -They have all been trained with the scripts provided in ``references/video_classification``. +.. currentmodule:: torchvision.models.video -All pre-trained models expect input images normalized in the same way, -i.e. mini-batches of 3-channel RGB videos of shape (3 x T x H x W), -where H and W are expected to be 112, and T is a number of video frames in a clip. -The images have to be loaded in to a range of [0, 1] and then normalized -using ``mean = [0.43216, 0.394666, 0.37645]`` and ``std = [0.22803, 0.22145, 0.216989]``. +.. betastatus:: video module +The following video classification models are available, with or without +pre-trained weights: -.. note:: - The normalization parameters are different from the image classification ones, and correspond - to the mean and std from Kinetics-400. +.. toctree:: + :maxdepth: 1 -.. note:: - For now, normalization code can be found in ``references/video_classification/transforms.py``, - see the ``Normalize`` function there. Note that it differs from standard normalization for - images because it assumes the video is 4d. + models/video_mvit + models/video_resnet + models/video_s3d + models/video_swin_transformer + +| + +Here is an example of how to use the pre-trained video classification models: + +.. code:: python + + + from torchvision.io.video import read_video + from torchvision.models.video import r3d_18, R3D_18_Weights + + vid, _, _ = read_video("test/assets/videos/v_SoccerJuggling_g23_c01.avi", output_format="TCHW") + vid = vid[:32] # optionally shorten duration + + # Step 1: Initialize model with the best available weights + weights = R3D_18_Weights.DEFAULT + model = r3d_18(weights=weights) + model.eval() + + # Step 2: Initialize the inference transforms + preprocess = weights.transforms() + + # Step 3: Apply inference preprocessing transforms + batch = preprocess(vid).unsqueeze(0) + + # Step 4: Use the model and print the predicted category + prediction = model(batch).squeeze(0).softmax(0) + label = prediction.argmax().item() + score = prediction[label].item() + category_name = weights.meta["categories"][label] + print(f"{category_name}: {100 * score}%") + +The classes of the pre-trained model outputs can be found at ``weights.meta["categories"]``. -Kinetics 1-crop accuracies for clip length 16 (16x112x112) -================================ ============= ============= -Network Clip acc@1 Clip acc@5 -================================ ============= ============= -ResNet 3D 18 52.75 75.45 -ResNet MC 18 53.90 76.29 -ResNet (2+1)D 57.50 78.81 -================================ ============= ============= +Table of all available video classification weights +--------------------------------------------------- +Accuracies are reported on Kinetics-400 using single crops for clip length 16: -ResNet 3D ----------- +.. include:: generated/video_table.rst -.. autofunction:: torchvision.models.video.r3d_18 +Optical Flow +============ -ResNet Mixed Convolution ------------------------- +.. currentmodule:: torchvision.models.optical_flow -.. autofunction:: torchvision.models.video.mc3_18 +The following Optical Flow models are available, with or without pre-trained -ResNet (2+1)D -------------- +.. toctree:: + :maxdepth: 1 -.. autofunction:: torchvision.models.video.r2plus1d_18 + models/raft diff --git a/docs/source/models/alexnet.rst b/docs/source/models/alexnet.rst new file mode 100644 index 00000000000..8e94b4eeed9 --- /dev/null +++ b/docs/source/models/alexnet.rst @@ -0,0 +1,28 @@ +AlexNet +======= + +.. currentmodule:: torchvision.models + +The AlexNet model was originally introduced in the +`ImageNet Classification with Deep Convolutional Neural Networks +`__ +paper. The implemented architecture is slightly different from the original one, +and is based on `One weird trick for parallelizing convolutional neural networks +`__. + + +Model builders +-------------- + +The following model builders can be used to instantiate an AlexNet model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.alexnet.AlexNet`` base class. Please refer to the `source +code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + alexnet diff --git a/docs/source/models/convnext.rst b/docs/source/models/convnext.rst new file mode 100644 index 00000000000..f484bf63d94 --- /dev/null +++ b/docs/source/models/convnext.rst @@ -0,0 +1,26 @@ +ConvNeXt +======== + +.. currentmodule:: torchvision.models + +The ConvNeXt model is based on the `A ConvNet for the 2020s +`_ paper. + + +Model builders +-------------- + +The following model builders can be used to instantiate a ConvNeXt model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.convnext.ConvNeXt`` base class. Please refer to the `source code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + convnext_tiny + convnext_small + convnext_base + convnext_large diff --git a/docs/source/models/deeplabv3.rst b/docs/source/models/deeplabv3.rst new file mode 100644 index 00000000000..e6f21686081 --- /dev/null +++ b/docs/source/models/deeplabv3.rst @@ -0,0 +1,28 @@ +DeepLabV3 +========= + +.. currentmodule:: torchvision.models.segmentation + +The DeepLabV3 model is based on the `Rethinking Atrous Convolution for Semantic +Image Segmentation `__ paper. + +.. betastatus:: segmentation module + + +Model builders +-------------- + +The following model builders can be used to instantiate a DeepLabV3 model with +different backbones, with or without pre-trained weights. All the model builders +internally rely on the ``torchvision.models.segmentation.deeplabv3.DeepLabV3`` base class. Please +refer to the `source code +`_ +for more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + deeplabv3_mobilenet_v3_large + deeplabv3_resnet50 + deeplabv3_resnet101 diff --git a/docs/source/models/densenet.rst b/docs/source/models/densenet.rst new file mode 100644 index 00000000000..ee984886925 --- /dev/null +++ b/docs/source/models/densenet.rst @@ -0,0 +1,27 @@ +DenseNet +======== + +.. currentmodule:: torchvision.models + +The DenseNet model is based on the `Densely Connected Convolutional Networks +`_ paper. + + +Model builders +-------------- + +The following model builders can be used to instantiate a DenseNet model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.densenet.DenseNet`` base class. Please refer to the `source +code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + densenet121 + densenet161 + densenet169 + densenet201 diff --git a/docs/source/models/efficientnet.rst b/docs/source/models/efficientnet.rst new file mode 100644 index 00000000000..cbc9718959a --- /dev/null +++ b/docs/source/models/efficientnet.rst @@ -0,0 +1,31 @@ +EfficientNet +============ + +.. currentmodule:: torchvision.models + +The EfficientNet model is based on the `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks `__ +paper. + + +Model builders +-------------- + +The following model builders can be used to instantiate an EfficientNet model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.efficientnet.EfficientNet`` base class. Please refer to the `source +code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + efficientnet_b0 + efficientnet_b1 + efficientnet_b2 + efficientnet_b3 + efficientnet_b4 + efficientnet_b5 + efficientnet_b6 + efficientnet_b7 diff --git a/docs/source/models/efficientnetv2.rst b/docs/source/models/efficientnetv2.rst new file mode 100644 index 00000000000..3066c28ebd4 --- /dev/null +++ b/docs/source/models/efficientnetv2.rst @@ -0,0 +1,26 @@ +EfficientNetV2 +============== + +.. currentmodule:: torchvision.models + +The EfficientNetV2 model is based on the `EfficientNetV2: Smaller Models and Faster Training `__ +paper. + + +Model builders +-------------- + +The following model builders can be used to instantiate an EfficientNetV2 model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.efficientnet.EfficientNet`` base class. Please refer to the `source +code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + efficientnet_v2_s + efficientnet_v2_m + efficientnet_v2_l diff --git a/docs/source/models/faster_rcnn.rst b/docs/source/models/faster_rcnn.rst new file mode 100644 index 00000000000..19ec9227886 --- /dev/null +++ b/docs/source/models/faster_rcnn.rst @@ -0,0 +1,31 @@ +Faster R-CNN +============ + +.. currentmodule:: torchvision.models.detection + + +The Faster R-CNN model is based on the `Faster R-CNN: Towards Real-Time Object Detection +with Region Proposal Networks `__ +paper. + +.. betastatus:: detection module + +Model builders +-------------- + +The following model builders can be used to instantiate a Faster R-CNN model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.detection.faster_rcnn.FasterRCNN`` base class. Please refer to the `source +code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + fasterrcnn_resnet50_fpn + fasterrcnn_resnet50_fpn_v2 + fasterrcnn_mobilenet_v3_large_fpn + fasterrcnn_mobilenet_v3_large_320_fpn + diff --git a/docs/source/models/fcn.rst b/docs/source/models/fcn.rst new file mode 100644 index 00000000000..efcdb37c0d5 --- /dev/null +++ b/docs/source/models/fcn.rst @@ -0,0 +1,28 @@ +FCN +=== + +.. currentmodule:: torchvision.models.segmentation + +The FCN model is based on the `Fully Convolutional Networks for Semantic +Segmentation `__ +paper. + +.. betastatus:: segmentation module + + +Model builders +-------------- + +The following model builders can be used to instantiate a FCN model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.segmentation.FCN`` base class. Please refer to the `source +code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + fcn_resnet50 + fcn_resnet101 diff --git a/docs/source/models/fcos.rst b/docs/source/models/fcos.rst new file mode 100644 index 00000000000..085f26549b8 --- /dev/null +++ b/docs/source/models/fcos.rst @@ -0,0 +1,24 @@ +FCOS +========= + +.. currentmodule:: torchvision.models.detection + +The FCOS model is based on the `FCOS: Fully Convolutional One-Stage Object Detection +`__ paper. + +.. betastatus:: detection module + +Model builders +-------------- + +The following model builders can be used to instantiate a FCOS model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.detection.fcos.FCOS`` base class. Please refer to the `source code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + fcos_resnet50_fpn diff --git a/docs/source/models/googlenet.rst b/docs/source/models/googlenet.rst new file mode 100644 index 00000000000..91ea03ddf3d --- /dev/null +++ b/docs/source/models/googlenet.rst @@ -0,0 +1,24 @@ +GoogLeNet +========= + +.. currentmodule:: torchvision.models + +The GoogleNet model is based on the `Going Deeper with Convolutions `__ +paper. + + +Model builders +-------------- + +The following model builders can be used to instantiate a GoogLeNet model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.googlenet.GoogLeNet`` base class. Please refer to the `source +code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + googlenet diff --git a/docs/source/models/googlenet_quant.rst b/docs/source/models/googlenet_quant.rst new file mode 100644 index 00000000000..4358389b3e5 --- /dev/null +++ b/docs/source/models/googlenet_quant.rst @@ -0,0 +1,24 @@ +Quantized GoogLeNet +=================== + +.. currentmodule:: torchvision.models.quantization + +The Quantized GoogleNet model is based on the `Going Deeper with Convolutions `__ +paper. + + +Model builders +-------------- + +The following model builders can be used to instantiate a quantized GoogLeNet +model, with or without pre-trained weights. All the model builders internally +rely on the ``torchvision.models.quantization.googlenet.QuantizableGoogLeNet`` +base class. Please refer to the `source code +`_ +for more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + googlenet diff --git a/docs/source/models/inception.rst b/docs/source/models/inception.rst new file mode 100644 index 00000000000..e162eef5d30 --- /dev/null +++ b/docs/source/models/inception.rst @@ -0,0 +1,23 @@ +Inception V3 +============ + +.. currentmodule:: torchvision.models + +The InceptionV3 model is based on the `Rethinking the Inception Architecture for +Computer Vision `__ paper. + + +Model builders +-------------- + +The following model builders can be used to instantiate an InceptionV3 model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.inception.Inception3`` base class. Please refer to the `source +code `_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + inception_v3 diff --git a/docs/source/models/inception_quant.rst b/docs/source/models/inception_quant.rst new file mode 100644 index 00000000000..d26f1ab09da --- /dev/null +++ b/docs/source/models/inception_quant.rst @@ -0,0 +1,24 @@ +Quantized InceptionV3 +===================== + +.. currentmodule:: torchvision.models.quantization + +The Quantized Inception model is based on the `Rethinking the Inception Architecture for +Computer Vision `__ paper. + + +Model builders +-------------- + +The following model builders can be used to instantiate a quantized Inception +model, with or without pre-trained weights. All the model builders internally +rely on the ``torchvision.models.quantization.inception.QuantizableInception3`` +base class. Please refer to the `source code +`_ +for more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + inception_v3 diff --git a/docs/source/models/keypoint_rcnn.rst b/docs/source/models/keypoint_rcnn.rst new file mode 100644 index 00000000000..ba677c7f8f3 --- /dev/null +++ b/docs/source/models/keypoint_rcnn.rst @@ -0,0 +1,26 @@ +Keypoint R-CNN +============== + +.. currentmodule:: torchvision.models.detection + +The Keypoint R-CNN model is based on the `Mask R-CNN +`__ paper. + +.. betastatus:: detection module + + +Model builders +-------------- + +The following model builders can be used to instantiate a Keypoint R-CNN model, +with or without pre-trained weights. All the model builders internally rely on +the ``torchvision.models.detection.KeypointRCNN`` base class. Please refer to the `source +code +`__ +for more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + keypointrcnn_resnet50_fpn diff --git a/docs/source/models/lraspp.rst b/docs/source/models/lraspp.rst new file mode 100644 index 00000000000..312249c53e1 --- /dev/null +++ b/docs/source/models/lraspp.rst @@ -0,0 +1,24 @@ +LRASPP +====== + +.. currentmodule:: torchvision.models.segmentation + +The LRASPP model is based on the `Searching for MobileNetV3 `_ paper. + +.. betastatus:: segmentation module + +Model builders +-------------- + +The following model builders can be used to instantiate a FCN model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.segmentation.LRASPP`` base class. Please refer to the `source +code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + lraspp_mobilenet_v3_large diff --git a/docs/source/models/mask_rcnn.rst b/docs/source/models/mask_rcnn.rst new file mode 100644 index 00000000000..5887b6c71a6 --- /dev/null +++ b/docs/source/models/mask_rcnn.rst @@ -0,0 +1,27 @@ +Mask R-CNN +========== + +.. currentmodule:: torchvision.models.detection + +The Mask R-CNN model is based on the `Mask R-CNN `__ +paper. + +.. betastatus:: detection module + + +Model builders +-------------- + +The following model builders can be used to instantiate a Mask R-CNN model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.detection.mask_rcnn.MaskRCNN`` base class. Please refer to the `source +code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + maskrcnn_resnet50_fpn + maskrcnn_resnet50_fpn_v2 diff --git a/docs/source/models/maxvit.rst b/docs/source/models/maxvit.rst new file mode 100644 index 00000000000..29aaaaab334 --- /dev/null +++ b/docs/source/models/maxvit.rst @@ -0,0 +1,23 @@ +MaxVit +=============== + +.. currentmodule:: torchvision.models + +The MaxVit transformer models are based on the `MaxViT: Multi-Axis Vision Transformer `__ +paper. + + +Model builders +-------------- + +The following model builders can be used to instantiate an MaxVit model with and without pre-trained weights. +All the model builders internally rely on the ``torchvision.models.maxvit.MaxVit`` +base class. Please refer to the `source code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + maxvit_t diff --git a/docs/source/models/mnasnet.rst b/docs/source/models/mnasnet.rst new file mode 100644 index 00000000000..fd9ea511585 --- /dev/null +++ b/docs/source/models/mnasnet.rst @@ -0,0 +1,28 @@ +MNASNet +======= + +.. currentmodule:: torchvision.models + + +The MNASNet model is based on the `MnasNet: Platform-Aware Neural Architecture +Search for Mobile `__ paper. + + +Model builders +-------------- + +The following model builders can be used to instantiate an MNASNet model. +All the model builders internally rely on the +``torchvision.models.mnasnet.MNASNet`` base class. Please refer to the `source +code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + mnasnet0_5 + mnasnet0_75 + mnasnet1_0 + mnasnet1_3 diff --git a/docs/source/models/mobilenetv2.rst b/docs/source/models/mobilenetv2.rst new file mode 100644 index 00000000000..666dcce57ce --- /dev/null +++ b/docs/source/models/mobilenetv2.rst @@ -0,0 +1,24 @@ +MobileNet V2 +============ + +.. currentmodule:: torchvision.models + +The MobileNet V2 model is based on the `MobileNetV2: Inverted Residuals and Linear +Bottlenecks `__ paper. + + +Model builders +-------------- + +The following model builders can be used to instantiate a MobileNetV2 model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.mobilenetv2.MobileNetV2`` base class. Please refer to the `source +code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + mobilenet_v2 diff --git a/docs/source/models/mobilenetv2_quant.rst b/docs/source/models/mobilenetv2_quant.rst new file mode 100644 index 00000000000..e5397378fab --- /dev/null +++ b/docs/source/models/mobilenetv2_quant.rst @@ -0,0 +1,24 @@ +Quantized MobileNet V2 +====================== + +.. currentmodule:: torchvision.models.quantization + +The Quantized MobileNet V2 model is based on the `MobileNetV2: Inverted Residuals and Linear +Bottlenecks `__ paper. + + +Model builders +-------------- + +The following model builders can be used to instantiate a quantized MobileNetV2 +model, with or without pre-trained weights. All the model builders internally +rely on the ``torchvision.models.quantization.mobilenetv2.QuantizableMobileNetV2`` +base class. Please refer to the `source code +`_ +for more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + mobilenet_v2 diff --git a/docs/source/models/mobilenetv3.rst b/docs/source/models/mobilenetv3.rst new file mode 100644 index 00000000000..4322470286d --- /dev/null +++ b/docs/source/models/mobilenetv3.rst @@ -0,0 +1,24 @@ +MobileNet V3 +============ + +.. currentmodule:: torchvision.models + +The MobileNet V3 model is based on the `Searching for MobileNetV3 `__ paper. + + +Model builders +-------------- + +The following model builders can be used to instantiate a MobileNetV3 model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.mobilenetv3.MobileNetV3`` base class. Please refer to the `source +code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + mobilenet_v3_large + mobilenet_v3_small diff --git a/docs/source/models/mobilenetv3_quant.rst b/docs/source/models/mobilenetv3_quant.rst new file mode 100644 index 00000000000..fe385b493e5 --- /dev/null +++ b/docs/source/models/mobilenetv3_quant.rst @@ -0,0 +1,23 @@ +Quantized MobileNet V3 +====================== + +.. currentmodule:: torchvision.models.quantization + +The Quantized MobileNet V3 model is based on the `Searching for MobileNetV3 `__ paper. + + +Model builders +-------------- + +The following model builders can be used to instantiate a quantized MobileNetV3 +model, with or without pre-trained weights. All the model builders internally +rely on the ``torchvision.models.quantization.mobilenetv3.QuantizableMobileNetV3`` +base class. Please refer to the `source code +`_ +for more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + mobilenet_v3_large diff --git a/docs/source/models/raft.rst b/docs/source/models/raft.rst new file mode 100644 index 00000000000..7ea477698b4 --- /dev/null +++ b/docs/source/models/raft.rst @@ -0,0 +1,25 @@ +RAFT +==== + +.. currentmodule:: torchvision.models.optical_flow + +The RAFT model is based on the `RAFT: Recurrent All-Pairs Field Transforms for +Optical Flow `__ paper. + + +Model builders +-------------- + +The following model builders can be used to instantiate a RAFT model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.optical_flow.RAFT`` base class. Please refer to the `source +code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + raft_large + raft_small diff --git a/docs/source/models/regnet.rst b/docs/source/models/regnet.rst new file mode 100644 index 00000000000..aef4abd2544 --- /dev/null +++ b/docs/source/models/regnet.rst @@ -0,0 +1,37 @@ +RegNet +====== + +.. currentmodule:: torchvision.models + +The RegNet model is based on the `Designing Network Design Spaces +`_ paper. + + +Model builders +-------------- + +The following model builders can be used to instantiate a RegNet model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.regnet.RegNet`` base class. Please refer to the `source code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + regnet_y_400mf + regnet_y_800mf + regnet_y_1_6gf + regnet_y_3_2gf + regnet_y_8gf + regnet_y_16gf + regnet_y_32gf + regnet_y_128gf + regnet_x_400mf + regnet_x_800mf + regnet_x_1_6gf + regnet_x_3_2gf + regnet_x_8gf + regnet_x_16gf + regnet_x_32gf diff --git a/docs/source/models/resnet.rst b/docs/source/models/resnet.rst new file mode 100644 index 00000000000..9d777f2f6b1 --- /dev/null +++ b/docs/source/models/resnet.rst @@ -0,0 +1,33 @@ +ResNet +====== + +.. currentmodule:: torchvision.models + +The ResNet model is based on the `Deep Residual Learning for Image Recognition +`_ paper. + +.. note:: + The bottleneck of TorchVision places the stride for downsampling to the second 3x3 + convolution while the original paper places it to the first 1x1 convolution. + This variant improves the accuracy and is known as `ResNet V1.5 + `_. + +Model builders +-------------- + +The following model builders can be used to instantiate a ResNet model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.resnet.ResNet`` base class. Please refer to the `source +code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + resnet18 + resnet34 + resnet50 + resnet101 + resnet152 diff --git a/docs/source/models/resnet_quant.rst b/docs/source/models/resnet_quant.rst new file mode 100644 index 00000000000..5609990646c --- /dev/null +++ b/docs/source/models/resnet_quant.rst @@ -0,0 +1,25 @@ +Quantized ResNet +================ + +.. currentmodule:: torchvision.models.quantization + +The Quantized ResNet model is based on the `Deep Residual Learning for Image Recognition +`_ paper. + + +Model builders +-------------- + +The following model builders can be used to instantiate a quantized ResNet +model, with or without pre-trained weights. All the model builders internally +rely on the ``torchvision.models.quantization.resnet.QuantizableResNet`` +base class. Please refer to the `source code +`_ +for more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + resnet18 + resnet50 diff --git a/docs/source/models/resnext.rst b/docs/source/models/resnext.rst new file mode 100644 index 00000000000..5d8325d9b4b --- /dev/null +++ b/docs/source/models/resnext.rst @@ -0,0 +1,26 @@ +ResNeXt +======= + +.. currentmodule:: torchvision.models + +The ResNext model is based on the `Aggregated Residual Transformations for Deep Neural Networks `__ +paper. + + +Model builders +-------------- + +The following model builders can be used to instantiate a ResNext model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.resnet.ResNet`` base class. Please refer to the `source +code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + resnext50_32x4d + resnext101_32x8d + resnext101_64x4d diff --git a/docs/source/models/resnext_quant.rst b/docs/source/models/resnext_quant.rst new file mode 100644 index 00000000000..916b9e4a39a --- /dev/null +++ b/docs/source/models/resnext_quant.rst @@ -0,0 +1,25 @@ +Quantized ResNeXt +================= + +.. currentmodule:: torchvision.models.quantization + +The quantized ResNext model is based on the `Aggregated Residual Transformations for Deep Neural Networks `__ +paper. + + +Model builders +-------------- + +The following model builders can be used to instantiate a quantized ResNeXt +model, with or without pre-trained weights. All the model builders internally +rely on the ``torchvision.models.quantization.resnet.QuantizableResNet`` +base class. Please refer to the `source code +`_ +for more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + resnext101_32x8d + resnext101_64x4d diff --git a/docs/source/models/retinanet.rst b/docs/source/models/retinanet.rst new file mode 100644 index 00000000000..910692ef3a5 --- /dev/null +++ b/docs/source/models/retinanet.rst @@ -0,0 +1,25 @@ +RetinaNet +========= + +.. currentmodule:: torchvision.models.detection + +The RetinaNet model is based on the `Focal Loss for Dense Object Detection +`__ paper. + +.. betastatus:: detection module + +Model builders +-------------- + +The following model builders can be used to instantiate a RetinaNet model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.detection.retinanet.RetinaNet`` base class. Please refer to the `source code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + retinanet_resnet50_fpn + retinanet_resnet50_fpn_v2 diff --git a/docs/source/models/shufflenetv2.rst b/docs/source/models/shufflenetv2.rst new file mode 100644 index 00000000000..2cbe328ca8b --- /dev/null +++ b/docs/source/models/shufflenetv2.rst @@ -0,0 +1,27 @@ +ShuffleNet V2 +============= + +.. currentmodule:: torchvision.models + +The ShuffleNet V2 model is based on the `ShuffleNet V2: Practical Guidelines for Efficient +CNN Architecture Design `__ paper. + + +Model builders +-------------- + +The following model builders can be used to instantiate a ShuffleNetV2 model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.shufflenetv2.ShuffleNetV2`` base class. Please refer to the `source +code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + shufflenet_v2_x0_5 + shufflenet_v2_x1_0 + shufflenet_v2_x1_5 + shufflenet_v2_x2_0 diff --git a/docs/source/models/shufflenetv2_quant.rst b/docs/source/models/shufflenetv2_quant.rst new file mode 100644 index 00000000000..4fa236d2565 --- /dev/null +++ b/docs/source/models/shufflenetv2_quant.rst @@ -0,0 +1,27 @@ +Quantized ShuffleNet V2 +======================= + +.. currentmodule:: torchvision.models.quantization + +The Quantized ShuffleNet V2 model is based on the `ShuffleNet V2: Practical Guidelines for Efficient +CNN Architecture Design `__ paper. + + +Model builders +-------------- + +The following model builders can be used to instantiate a quantized ShuffleNetV2 +model, with or without pre-trained weights. All the model builders internally rely +on the ``torchvision.models.quantization.shufflenetv2.QuantizableShuffleNetV2`` +base class. Please refer to the `source code +`_ +for more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + shufflenet_v2_x0_5 + shufflenet_v2_x1_0 + shufflenet_v2_x1_5 + shufflenet_v2_x2_0 diff --git a/docs/source/models/squeezenet.rst b/docs/source/models/squeezenet.rst new file mode 100644 index 00000000000..9771e5c623a --- /dev/null +++ b/docs/source/models/squeezenet.rst @@ -0,0 +1,26 @@ +SqueezeNet +========== + +.. currentmodule:: torchvision.models + +The SqueezeNet model is based on the `SqueezeNet: AlexNet-level accuracy with +50x fewer parameters and <0.5MB model size `__ +paper. + + +Model builders +-------------- + +The following model builders can be used to instantiate a SqueezeNet model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.squeezenet.SqueezeNet`` base class. Please refer to the `source +code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + squeezenet1_0 + squeezenet1_1 diff --git a/docs/source/models/ssd.rst b/docs/source/models/ssd.rst new file mode 100644 index 00000000000..68b0bb224df --- /dev/null +++ b/docs/source/models/ssd.rst @@ -0,0 +1,26 @@ +SSD +=== + +.. currentmodule:: torchvision.models.detection + +The SSD model is based on the `SSD: Single Shot MultiBox Detector +`__ paper. + +.. betastatus:: detection module + + +Model builders +-------------- + +The following model builders can be used to instantiate a SSD model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.detection.SSD`` base class. Please refer to the `source +code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + ssd300_vgg16 diff --git a/docs/source/models/ssdlite.rst b/docs/source/models/ssdlite.rst new file mode 100644 index 00000000000..7701d1c9f9f --- /dev/null +++ b/docs/source/models/ssdlite.rst @@ -0,0 +1,27 @@ +SSDlite +======= + +.. currentmodule:: torchvision.models.detection + +The SSDLite model is based on the `SSD: Single Shot MultiBox Detector +`__, `Searching for MobileNetV3 +`__ and `MobileNetV2: Inverted Residuals and Linear +Bottlenecks `__ papers. + +.. betastatus:: detection module + +Model builders +-------------- + +The following model builders can be used to instantiate a SSD Lite model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.detection.ssd.SSD`` base class. Please refer to the `source +code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + ssdlite320_mobilenet_v3_large diff --git a/docs/source/models/swin_transformer.rst b/docs/source/models/swin_transformer.rst new file mode 100644 index 00000000000..b302f5bd79d --- /dev/null +++ b/docs/source/models/swin_transformer.rst @@ -0,0 +1,32 @@ +SwinTransformer +=============== + +.. currentmodule:: torchvision.models + +The SwinTransformer models are based on the `Swin Transformer: Hierarchical Vision +Transformer using Shifted Windows `__ +paper. +SwinTransformer V2 models are based on the `Swin Transformer V2: Scaling Up Capacity +and Resolution `__ +paper. + + +Model builders +-------------- + +The following model builders can be used to instantiate an SwinTransformer model (original and V2) with and without pre-trained weights. +All the model builders internally rely on the ``torchvision.models.swin_transformer.SwinTransformer`` +base class. Please refer to the `source code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + swin_t + swin_s + swin_b + swin_v2_t + swin_v2_s + swin_v2_b diff --git a/docs/source/models/vgg.rst b/docs/source/models/vgg.rst new file mode 100644 index 00000000000..77b5686927c --- /dev/null +++ b/docs/source/models/vgg.rst @@ -0,0 +1,30 @@ +VGG +=== + +.. currentmodule:: torchvision.models + +The VGG model is based on the `Very Deep Convolutional Networks for Large-Scale +Image Recognition `_ paper. + + +Model builders +-------------- + +The following model builders can be used to instantiate a VGG model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.vgg.VGG`` base class. Please refer to the `source code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + vgg11 + vgg11_bn + vgg13 + vgg13_bn + vgg16 + vgg16_bn + vgg19 + vgg19_bn diff --git a/docs/source/models/video_mvit.rst b/docs/source/models/video_mvit.rst new file mode 100644 index 00000000000..cd23754b7bb --- /dev/null +++ b/docs/source/models/video_mvit.rst @@ -0,0 +1,27 @@ +Video MViT +========== + +.. currentmodule:: torchvision.models.video + +The MViT model is based on the +`MViTv2: Improved Multiscale Vision Transformers for Classification and Detection +`__ and `Multiscale Vision Transformers +`__ papers. + + +Model builders +-------------- + +The following model builders can be used to instantiate a MViT v1 or v2 model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.video.MViT`` base class. Please refer to the `source +code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + mvit_v1_b + mvit_v2_s diff --git a/docs/source/models/video_resnet.rst b/docs/source/models/video_resnet.rst new file mode 100644 index 00000000000..ecb707b4eeb --- /dev/null +++ b/docs/source/models/video_resnet.rst @@ -0,0 +1,28 @@ +Video ResNet +============ + +.. currentmodule:: torchvision.models.video + +The VideoResNet model is based on the `A Closer Look at Spatiotemporal +Convolutions for Action Recognition `__ paper. + +.. betastatus:: video module + + +Model builders +-------------- + +The following model builders can be used to instantiate a VideoResNet model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.video.resnet.VideoResNet`` base class. Please refer to the `source +code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + r3d_18 + mc3_18 + r2plus1d_18 diff --git a/docs/source/models/video_s3d.rst b/docs/source/models/video_s3d.rst new file mode 100644 index 00000000000..0d66c55487c --- /dev/null +++ b/docs/source/models/video_s3d.rst @@ -0,0 +1,25 @@ +Video S3D +========= + +.. currentmodule:: torchvision.models.video + +The S3D model is based on the +`Rethinking Spatiotemporal Feature Learning: Speed-Accuracy Trade-offs in Video Classification +`__ paper. + + +Model builders +-------------- + +The following model builders can be used to instantiate an S3D model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.video.S3D`` base class. Please refer to the `source +code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + s3d diff --git a/docs/source/models/video_swin_transformer.rst b/docs/source/models/video_swin_transformer.rst new file mode 100644 index 00000000000..e31e69759b4 --- /dev/null +++ b/docs/source/models/video_swin_transformer.rst @@ -0,0 +1,27 @@ +Video SwinTransformer +===================== + +.. currentmodule:: torchvision.models.video + +The Video SwinTransformer model is based on the `Video Swin Transformer `__ paper. + +.. betastatus:: video module + + +Model builders +-------------- + +The following model builders can be used to instantiate a VideoResNet model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.video.swin_transformer.SwinTransformer3d`` base class. Please refer to the `source +code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + swin3d_t + swin3d_s + swin3d_b diff --git a/docs/source/models/vision_transformer.rst b/docs/source/models/vision_transformer.rst new file mode 100644 index 00000000000..914caa9311e --- /dev/null +++ b/docs/source/models/vision_transformer.rst @@ -0,0 +1,28 @@ +VisionTransformer +================= + +.. currentmodule:: torchvision.models + +The VisionTransformer model is based on the `An Image is Worth 16x16 Words: +Transformers for Image Recognition at Scale `_ paper. + + +Model builders +-------------- + +The following model builders can be used to instantiate a VisionTransformer model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.vision_transformer.VisionTransformer`` base class. +Please refer to the `source code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + vit_b_16 + vit_b_32 + vit_l_16 + vit_l_32 + vit_h_14 diff --git a/docs/source/models/wide_resnet.rst b/docs/source/models/wide_resnet.rst new file mode 100644 index 00000000000..9768355c77e --- /dev/null +++ b/docs/source/models/wide_resnet.rst @@ -0,0 +1,25 @@ +Wide ResNet +=========== + +.. currentmodule:: torchvision.models + +The Wide ResNet model is based on the `Wide Residual Networks `__ +paper. + + +Model builders +-------------- + +The following model builders can be used to instantiate a Wide ResNet model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.resnet.ResNet`` base class. Please refer to the `source +code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + wide_resnet50_2 + wide_resnet101_2 diff --git a/docs/source/ops.rst b/docs/source/ops.rst index ec87d02556e..7124c85bb79 100644 --- a/docs/source/ops.rst +++ b/docs/source/ops.rst @@ -1,17 +1,103 @@ -torchvision.ops -=============== +.. _ops: + +Operators +========= .. currentmodule:: torchvision.ops -:mod:`torchvision.ops` implements operators that are specific for Computer Vision. +:mod:`torchvision.ops` implements operators, losses and layers that are specific for Computer Vision. .. note:: - Those operators currently do not support TorchScript. + All operators have native support for TorchScript. + + +Detection and Segmentation Operators +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The below operators perform pre-processing as well as post-processing required in object detection and segmentation models. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + batched_nms + masks_to_boxes + nms + roi_align + roi_pool + ps_roi_align + ps_roi_pool + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + FeaturePyramidNetwork + MultiScaleRoIAlign + RoIAlign + RoIPool + PSRoIAlign + PSRoIPool + + +Box Operators +~~~~~~~~~~~~~ + +These utility functions perform various operations on bounding boxes. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + box_area + box_convert + box_iou + clip_boxes_to_image + complete_box_iou + distance_box_iou + generalized_box_iou + remove_small_boxes + +Losses +~~~~~~ + +The following vision-specific loss functions are implemented: + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + complete_box_iou_loss + distance_box_iou_loss + generalized_box_iou_loss + sigmoid_focal_loss + + +Layers +~~~~~~ + +TorchVision provides commonly used building blocks as layers: + +.. autosummary:: + :toctree: generated/ + :template: class.rst + Conv2dNormActivation + Conv3dNormActivation + DeformConv2d + DropBlock2d + DropBlock3d + FrozenBatchNorm2d + MLP + Permute + SqueezeExcitation + StochasticDepth -.. autofunction:: nms -.. autofunction:: roi_align -.. autofunction:: roi_pool +.. autosummary:: + :toctree: generated/ + :template: function.rst -.. autoclass:: RoIAlign -.. autoclass:: RoIPool + deform_conv2d + drop_block2d + drop_block3d + stochastic_depth diff --git a/docs/source/training_references.rst b/docs/source/training_references.rst new file mode 100644 index 00000000000..fc22ac5eba6 --- /dev/null +++ b/docs/source/training_references.rst @@ -0,0 +1,29 @@ +Training references +=================== + +On top of the many models, datasets, and image transforms, Torchvision also +provides training reference scripts. These are the scripts that we use to train +the :ref:`models ` which are then available with pre-trained weights. + +These scripts are not part of the core package and are instead available `on +GitHub `_. We currently +provide references for +`classification `_, +`detection `_, +`segmentation `_, +`similarity learning `_, +and `video classification `_. + +While these scripts are largely stable, they do not offer backward compatibility +guarantees. + +In general, these scripts rely on the latest (not yet released) pytorch version +or the latest torchvision version. This means that to use them, **you might need +to install the latest pytorch and torchvision versions**, with e.g.:: + + conda install pytorch torchvision -c pytorch-nightly + +If you need to rely on an older stable version of pytorch or torchvision, e.g. +torchvision 0.10, then it's safer to use the scripts from that corresponding +release on GitHub, namely +https://github.com/pytorch/vision/tree/v0.10.0/references. diff --git a/docs/source/transforms.rst b/docs/source/transforms.rst index 2e0c6cefb8d..d2fed552c4f 100644 --- a/docs/source/transforms.rst +++ b/docs/source/transforms.rst @@ -1,132 +1,676 @@ -torchvision.transforms -====================== +.. _transforms: + +Transforming and augmenting images +================================== .. currentmodule:: torchvision.transforms -Transforms are common image transformations. They can be chained together using :class:`Compose`. -Additionally, there is the :mod:`torchvision.transforms.functional` module. -Functional transforms give fine-grained control over the transformations. -This is useful if you have to build a more complex transformation pipeline -(e.g. in the case of segmentation tasks). +Torchvision supports common computer vision transformations in the +``torchvision.transforms`` and ``torchvision.transforms.v2`` modules. Transforms +can be used to transform or augment data for training or inference of different +tasks (image classification, detection, segmentation, video classification). -.. autoclass:: Compose +.. code:: python -Transforms on PIL Image ------------------------ + # Image Classification + import torch + from torchvision.transforms import v2 -.. autoclass:: CenterCrop + H, W = 32, 32 + img = torch.randint(0, 256, size=(3, H, W), dtype=torch.uint8) -.. autoclass:: ColorJitter + transforms = v2.Compose([ + v2.RandomResizedCrop(size=(224, 224), antialias=True), + v2.RandomHorizontalFlip(p=0.5), + v2.ToDtype(torch.float32, scale=True), + v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ]) + img = transforms(img) -.. autoclass:: FiveCrop +.. code:: python -.. autoclass:: Grayscale + # Detection (re-using imports and transforms from above) + from torchvision import tv_tensors + + img = torch.randint(0, 256, size=(3, H, W), dtype=torch.uint8) + boxes = torch.randint(0, H // 2, size=(3, 4)) + boxes[:, 2:] += boxes[:, :2] + boxes = tv_tensors.BoundingBoxes(boxes, format="XYXY", canvas_size=(H, W)) + + # The same transforms can be used! + img, boxes = transforms(img, boxes) + # And you can pass arbitrary input structures + output_dict = transforms({"image": img, "boxes": boxes}) + +Transforms are typically passed as the ``transform`` or ``transforms`` argument +to the :ref:`Datasets `. -.. autoclass:: Pad +Start here +---------- -.. autoclass:: RandomAffine +Whether you're new to Torchvision transforms, or you're already experienced with +them, we encourage you to start with +:ref:`sphx_glr_auto_examples_transforms_plot_transforms_getting_started.py` in +order to learn more about what can be done with the new v2 transforms. -.. autoclass:: RandomApply +Then, browse the sections in below this page for general information and +performance tips. The available transforms and functionals are listed in the +:ref:`API reference `. -.. autoclass:: RandomChoice +More information and tutorials can also be found in our :ref:`example gallery +`, e.g. :ref:`sphx_glr_auto_examples_transforms_plot_transforms_e2e.py` +or :ref:`sphx_glr_auto_examples_transforms_plot_custom_transforms.py`. -.. autoclass:: RandomCrop +.. _conventions: -.. autoclass:: RandomGrayscale +Supported input types and conventions +------------------------------------- -.. autoclass:: RandomHorizontalFlip +Most transformations accept both `PIL `_ images +and tensor inputs. Both CPU and CUDA tensors are supported. +The result of both backends (PIL or Tensors) should be very +close. In general, we recommend relying on the tensor backend :ref:`for +performance `. The :ref:`conversion transforms +` may be used to convert to and from PIL images, or for +converting dtypes and ranges. -.. autoclass:: RandomOrder +Tensor image are expected to be of shape ``(C, H, W)``, where ``C`` is the +number of channels, and ``H`` and ``W`` refer to height and width. Most +transforms support batched tensor input. A batch of Tensor images is a tensor of +shape ``(N, C, H, W)``, where ``N`` is a number of images in the batch. The +:ref:`v2 ` transforms generally accept an arbitrary number of leading +dimensions ``(..., C, H, W)`` and can handle batched images or batched videos. -.. autoclass:: RandomPerspective +.. _range_and_dtype: -.. autoclass:: RandomResizedCrop +Dtype and expected value range +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. autoclass:: RandomRotation +The expected range of the values of a tensor image is implicitly defined by +the tensor dtype. Tensor images with a float dtype are expected to have +values in ``[0, 1]``. Tensor images with an integer dtype are expected to +have values in ``[0, MAX_DTYPE]`` where ``MAX_DTYPE`` is the largest value +that can be represented in that dtype. Typically, images of dtype +``torch.uint8`` are expected to have values in ``[0, 255]``. -.. autoclass:: RandomSizedCrop +Use :class:`~torchvision.transforms.v2.ToDtype` to convert both the dtype and +range of the inputs. -.. autoclass:: RandomVerticalFlip +.. _v1_or_v2: -.. autoclass:: Resize +V1 or V2? Which one should I use? +--------------------------------- -.. autoclass:: Scale +**TL;DR** We recommending using the ``torchvision.transforms.v2`` transforms +instead of those in ``torchvision.transforms``. They're faster and they can do +more things. Just change the import and you should be good to go. Moving +forward, new features and improvements will only be considered for the v2 +transforms. -.. autoclass:: TenCrop +In Torchvision 0.15 (March 2023), we released a new set of transforms available +in the ``torchvision.transforms.v2`` namespace. These transforms have a lot of +advantages compared to the v1 ones (in ``torchvision.transforms``): -Transforms on torch.\*Tensor ----------------------------- +- They can transform images **but also** bounding boxes, masks, or videos. This + provides support for tasks beyond image classification: detection, segmentation, + video classification, etc. See + :ref:`sphx_glr_auto_examples_transforms_plot_transforms_getting_started.py` + and :ref:`sphx_glr_auto_examples_transforms_plot_transforms_e2e.py`. +- They support more transforms like :class:`~torchvision.transforms.v2.CutMix` + and :class:`~torchvision.transforms.v2.MixUp`. See + :ref:`sphx_glr_auto_examples_transforms_plot_cutmix_mixup.py`. +- They're :ref:`faster `. +- They support arbitrary input structures (dicts, lists, tuples, etc.). +- Future improvements and features will be added to the v2 transforms only. -.. autoclass:: LinearTransformation +These transforms are **fully backward compatible** with the v1 ones, so if +you're already using tranforms from ``torchvision.transforms``, all you need to +do to is to update the import to ``torchvision.transforms.v2``. In terms of +output, there might be negligible differences due to implementation differences. -.. autoclass:: Normalize - :members: __call__ - :special-members: +.. _transforms_perf: + +Performance considerations +-------------------------- + +We recommend the following guidelines to get the best performance out of the +transforms: + +- Rely on the v2 transforms from ``torchvision.transforms.v2`` +- Use tensors instead of PIL images +- Use ``torch.uint8`` dtype, especially for resizing +- Resize with bilinear or bicubic mode -.. autoclass:: RandomErasing +This is what a typical transform pipeline could look like: -Conversion Transforms ---------------------- +.. code:: python -.. autoclass:: ToPILImage - :members: __call__ - :special-members: + from torchvision.transforms import v2 + transforms = v2.Compose([ + v2.ToImage(), # Convert to tensor, only needed if you had a PIL image + v2.ToDtype(torch.uint8, scale=True), # optional, most input are already uint8 at this point + # ... + v2.RandomResizedCrop(size=(224, 224), antialias=True), # Or Resize(antialias=True) + # ... + v2.ToDtype(torch.float32, scale=True), # Normalize expects float input + v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ]) + +The above should give you the best performance in a typical training environment +that relies on the :class:`torch.utils.data.DataLoader` with ``num_workers > +0``. + +Transforms tend to be sensitive to the input strides / memory format. Some +transforms will be faster with channels-first images while others prefer +channels-last. Like ``torch`` operators, most transforms will preserve the +memory format of the input, but this may not always be respected due to +implementation details. You may want to experiment a bit if you're chasing the +very best performance. Using :func:`torch.compile` on individual transforms may +also help factoring out the memory format variable (e.g. on +:class:`~torchvision.transforms.v2.Normalize`). Note that we're talking about +**memory format**, not :ref:`tensor shape `. + +Note that resize transforms like :class:`~torchvision.transforms.v2.Resize` +and :class:`~torchvision.transforms.v2.RandomResizedCrop` typically prefer +channels-last input and tend **not** to benefit from :func:`torch.compile` at +this time. + +.. _functional_transforms: + +Transform classes, functionals, and kernels +------------------------------------------- + +Transforms are available as classes like +:class:`~torchvision.transforms.v2.Resize`, but also as functionals like +:func:`~torchvision.transforms.v2.functional.resize` in the +``torchvision.transforms.v2.functional`` namespace. +This is very much like the :mod:`torch.nn` package which defines both classes +and functional equivalents in :mod:`torch.nn.functional`. + +The functionals support PIL images, pure tensors, or :ref:`TVTensors +`, e.g. both ``resize(image_tensor)`` and ``resize(boxes)`` are +valid. + +.. note:: + + Random transforms like :class:`~torchvision.transforms.v2.RandomCrop` will + randomly sample some parameter each time they're called. Their functional + counterpart (:func:`~torchvision.transforms.v2.functional.crop`) does not do + any kind of random sampling and thus have a slighlty different + parametrization. The ``get_params()`` class method of the transforms class + can be used to perform parameter sampling when using the functional APIs. + + +The ``torchvision.transforms.v2.functional`` namespace also contains what we +call the "kernels". These are the low-level functions that implement the +core functionalities for specific types, e.g. ``resize_bounding_boxes`` or +```resized_crop_mask``. They are public, although not documented. Check the +`code +`_ +to see which ones are available (note that those starting with a leading +underscore are **not** public!). Kernels are only really useful if you want +:ref:`torchscript support ` for types like bounding +boxes or masks. + +.. _transforms_torchscript: + +Torchscript support +------------------- + +Most transform classes and functionals support torchscript. For composing +transforms, use :class:`torch.nn.Sequential` instead of +:class:`~torchvision.transforms.v2.Compose`: -.. autoclass:: ToTensor - :members: __call__ - :special-members: +.. code:: python -Generic Transforms ------------------- + transforms = torch.nn.Sequential( + CenterCrop(10), + Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + ) + scripted_transforms = torch.jit.script(transforms) -.. autoclass:: Lambda +.. warning:: + v2 transforms support torchscript, but if you call ``torch.jit.script()`` on + a v2 **class** transform, you'll actually end up with its (scripted) v1 + equivalent. This may lead to slightly different results between the + scripted and eager executions due to implementation differences between v1 + and v2. -Functional Transforms ---------------------- + If you really need torchscript support for the v2 transforms, we recommend + scripting the **functionals** from the + ``torchvision.transforms.v2.functional`` namespace to avoid surprises. -Functional transforms give you fine-grained control of the transformation pipeline. -As opposed to the transformations above, functional transforms don't contain a random number -generator for their parameters. -That means you have to specify/generate all parameters, but you can reuse the functional transform. -Example: -you can apply a functional transform with the same parameters to multiple images like this: +Also note that the functionals only support torchscript for pure tensors, which +are always treated as images. If you need torchscript support for other types +like bounding boxes or masks, you can rely on the :ref:`low-level kernels +`. -.. code:: python +For any custom transformations to be used with ``torch.jit.script``, they should +be derived from ``torch.nn.Module``. - import torchvision.transforms.functional as TF - import random +See also: :ref:`sphx_glr_auto_examples_others_plot_scripted_tensor_transforms.py`. - def my_segmentation_transforms(image, segmentation): - if random.random() > 0.5: - angle = random.randint(-30, 30) - image = TF.rotate(image, angle) - segmentation = TF.rotate(segmentation, angle) - # more transforms ... - return image, segmentation +.. _v2_api_ref: + +V2 API reference - Recommended +------------------------------ + +Geometry +^^^^^^^^ +Resizing +"""""""" + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + v2.Resize + v2.ScaleJitter + v2.RandomShortestSize + v2.RandomResize + +Functionals + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + v2.functional.resize + +Cropping +"""""""" + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + v2.RandomCrop + v2.RandomResizedCrop + v2.RandomIoUCrop + v2.CenterCrop + v2.FiveCrop + v2.TenCrop + +Functionals + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + v2.functional.crop + v2.functional.resized_crop + v2.functional.ten_crop + v2.functional.center_crop + v2.functional.five_crop + +Others +"""""" + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + v2.RandomHorizontalFlip + v2.RandomVerticalFlip + v2.Pad + v2.RandomZoomOut + v2.RandomRotation + v2.RandomAffine + v2.RandomPerspective + v2.ElasticTransform + +Functionals + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + v2.functional.horizontal_flip + v2.functional.vertical_flip + v2.functional.pad + v2.functional.rotate + v2.functional.affine + v2.functional.perspective + v2.functional.elastic + +Color +^^^^^ + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + v2.ColorJitter + v2.RandomChannelPermutation + v2.RandomPhotometricDistort + v2.Grayscale + v2.RGB + v2.RandomGrayscale + v2.GaussianBlur + v2.GaussianNoise + v2.RandomInvert + v2.RandomPosterize + v2.RandomSolarize + v2.RandomAdjustSharpness + v2.RandomAutocontrast + v2.RandomEqualize + +Functionals + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + v2.functional.permute_channels + v2.functional.rgb_to_grayscale + v2.functional.grayscale_to_rgb + v2.functional.to_grayscale + v2.functional.gaussian_blur + v2.functional.gaussian_noise + v2.functional.invert + v2.functional.posterize + v2.functional.solarize + v2.functional.adjust_sharpness + v2.functional.autocontrast + v2.functional.adjust_contrast + v2.functional.equalize + v2.functional.adjust_brightness + v2.functional.adjust_saturation + v2.functional.adjust_hue + v2.functional.adjust_gamma + + +Composition +^^^^^^^^^^^ + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + v2.Compose + v2.RandomApply + v2.RandomChoice + v2.RandomOrder + +Miscellaneous +^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + v2.LinearTransformation + v2.Normalize + v2.RandomErasing + v2.Lambda + v2.SanitizeBoundingBoxes + v2.ClampBoundingBoxes + v2.UniformTemporalSubsample + v2.JPEG + +Functionals + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + v2.functional.normalize + v2.functional.erase + v2.functional.sanitize_bounding_boxes + v2.functional.clamp_bounding_boxes + v2.functional.uniform_temporal_subsample + v2.functional.jpeg + +.. _conversion_transforms: + +Conversion +^^^^^^^^^^ + +.. note:: + Beware, some of these conversion transforms below will scale the values + while performing the conversion, while some may not do any scaling. By + scaling, we mean e.g. that a ``uint8`` -> ``float32`` would map the [0, + 255] range into [0, 1] (and vice-versa). See :ref:`range_and_dtype`. + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + v2.ToImage + v2.ToPureTensor + v2.PILToTensor + v2.ToPILImage + v2.ToDtype + v2.ConvertBoundingBoxFormat + +functionals + +.. autosummary:: + :toctree: generated/ + :template: functional.rst + + v2.functional.to_image + v2.functional.pil_to_tensor + v2.functional.to_pil_image + v2.functional.to_dtype + v2.functional.convert_bounding_box_format + + +Deprecated + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + v2.ToTensor + v2.functional.to_tensor + v2.ConvertImageDtype + v2.functional.convert_image_dtype + +Auto-Augmentation +^^^^^^^^^^^^^^^^^ + +`AutoAugment `_ is a common Data Augmentation technique that can improve the accuracy of Image Classification models. +Though the data augmentation policies are directly linked to their trained dataset, empirical studies show that +ImageNet policies provide significant improvements when applied to other datasets. +In TorchVision we implemented 3 policies learned on the following datasets: ImageNet, CIFAR10 and SVHN. +The new transform can be used standalone or mixed-and-matched with existing transforms: + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + v2.AutoAugment + v2.RandAugment + v2.TrivialAugmentWide + v2.AugMix + + +CutMix - MixUp +^^^^^^^^^^^^^^ + +CutMix and MixUp are special transforms that +are meant to be used on batches rather than on individual images, because they +are combining pairs of images together. These can be used after the dataloader +(once the samples are batched), or part of a collation function. See +:ref:`sphx_glr_auto_examples_transforms_plot_cutmix_mixup.py` for detailed usage examples. + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + v2.CutMix + v2.MixUp + +Developer tools +^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + v2.Transform + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + v2.functional.register_kernel + v2.query_size + v2.query_chw + v2.get_bounding_boxes + + +V1 API Reference +---------------- + +Geometry +^^^^^^^^ + +.. autosummary:: + :toctree: generated/ + :template: class.rst -Example: -you can use a functional transform to build transform classes with custom behavior: + Resize + RandomCrop + RandomResizedCrop + CenterCrop + FiveCrop + TenCrop + Pad + RandomRotation + RandomAffine + RandomPerspective + ElasticTransform + RandomHorizontalFlip + RandomVerticalFlip -.. code:: python - import torchvision.transforms.functional as TF - import random +Color +^^^^^ - class MyRotationTransform: - """Rotate by one of the given angles.""" +.. autosummary:: + :toctree: generated/ + :template: class.rst - def __init__(self, angles): - self.angles = angles + ColorJitter + Grayscale + RandomGrayscale + GaussianBlur + RandomInvert + RandomPosterize + RandomSolarize + RandomAdjustSharpness + RandomAutocontrast + RandomEqualize + +Composition +^^^^^^^^^^^ - def __call__(self, x): - angle = random.choice(self.angles) - return TF.rotate(x, angle) +.. autosummary:: + :toctree: generated/ + :template: class.rst + + Compose + RandomApply + RandomChoice + RandomOrder - rotation_transform = MyRotationTransform(angles=[-30, -15, 0, 15, 30]) +Miscellaneous +^^^^^^^^^^^^^ +.. autosummary:: + :toctree: generated/ + :template: class.rst -.. automodule:: torchvision.transforms.functional - :members: + LinearTransformation + Normalize + RandomErasing + Lambda + +Conversion +^^^^^^^^^^ + +.. note:: + Beware, some of these conversion transforms below will scale the values + while performing the conversion, while some may not do any scaling. By + scaling, we mean e.g. that a ``uint8`` -> ``float32`` would map the [0, + 255] range into [0, 1] (and vice-versa). See :ref:`range_and_dtype`. + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + ToPILImage + ToTensor + PILToTensor + ConvertImageDtype + +Auto-Augmentation +^^^^^^^^^^^^^^^^^ + +`AutoAugment `_ is a common Data Augmentation technique that can improve the accuracy of Image Classification models. +Though the data augmentation policies are directly linked to their trained dataset, empirical studies show that +ImageNet policies provide significant improvements when applied to other datasets. +In TorchVision we implemented 3 policies learned on the following datasets: ImageNet, CIFAR10 and SVHN. +The new transform can be used standalone or mixed-and-matched with existing transforms: + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + AutoAugmentPolicy + AutoAugment + RandAugment + TrivialAugmentWide + AugMix + + + +Functional Transforms +^^^^^^^^^^^^^^^^^^^^^ + +.. currentmodule:: torchvision.transforms.functional + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + adjust_brightness + adjust_contrast + adjust_gamma + adjust_hue + adjust_saturation + adjust_sharpness + affine + autocontrast + center_crop + convert_image_dtype + crop + equalize + erase + five_crop + gaussian_blur + get_dimensions + get_image_num_channels + get_image_size + hflip + invert + normalize + pad + perspective + pil_to_tensor + posterize + resize + resized_crop + rgb_to_grayscale + rotate + solarize + ten_crop + to_grayscale + to_pil_image + to_tensor + vflip diff --git a/docs/source/tv_tensors.rst b/docs/source/tv_tensors.rst new file mode 100644 index 00000000000..cb8a3c45fa9 --- /dev/null +++ b/docs/source/tv_tensors.rst @@ -0,0 +1,29 @@ +.. _tv_tensors: + +TVTensors +========== + +.. currentmodule:: torchvision.tv_tensors + +TVTensors are :class:`torch.Tensor` subclasses which the v2 :ref:`transforms +` use under the hood to dispatch their inputs to the appropriate +lower-level kernels. Most users do not need to manipulate TVTensors directly. + +Refer to +:ref:`sphx_glr_auto_examples_transforms_plot_transforms_getting_started.py` for +an introduction to TVTensors, or +:ref:`sphx_glr_auto_examples_transforms_plot_tv_tensors.py` for more advanced +info. + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + Image + Video + BoundingBoxFormat + BoundingBoxes + Mask + TVTensor + set_return_type + wrap diff --git a/docs/source/utils.rst b/docs/source/utils.rst index ad2fc91c897..cda04de900a 100644 --- a/docs/source/utils.rst +++ b/docs/source/utils.rst @@ -1,9 +1,20 @@ -torchvision.utils -================= +.. _utils: -.. currentmodule:: torchvision.utils +Utils +===== + +The ``torchvision.utils`` module contains various utilities, mostly :ref:`for +visualization `. -.. autofunction:: make_grid +.. currentmodule:: torchvision.utils -.. autofunction:: save_image +.. autosummary:: + :toctree: generated/ + :template: function.rst + draw_bounding_boxes + draw_segmentation_masks + draw_keypoints + flow_to_image + make_grid + save_image diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt new file mode 100644 index 00000000000..a1329b0c968 --- /dev/null +++ b/examples/cpp/CMakeLists.txt @@ -0,0 +1,18 @@ +cmake_minimum_required(VERSION 3.10) +project(run_model) + +option(USE_TORCHVISION "Whether to link to torchvision" OFF) + +find_package(Torch REQUIRED) +if(USE_TORCHVISION) + find_package(TorchVision REQUIRED) +endif() + +add_executable(run_model run_model.cpp) + +target_link_libraries(run_model "${TORCH_LIBRARIES}") +if(USE_TORCHVISION) + target_link_libraries(run_model TorchVision::TorchVision) +endif() + +set_property(TARGET run_model PROPERTY CXX_STANDARD 17) diff --git a/examples/cpp/README.md b/examples/cpp/README.md new file mode 100644 index 00000000000..b2a9174c8ba --- /dev/null +++ b/examples/cpp/README.md @@ -0,0 +1,101 @@ +Using torchvision models in C++ +=============================== + +This is a minimal example of getting TorchVision models to work in C++ with +Torchscript. The model is first scripted in Python and exported to a file, and +then loaded in C++. For a similar tutorial, see [this +tutorial](https://pytorch.org/tutorials/advanced/cpp_export.html). + +In order to successfully compile this example, make sure you have ``LibTorch`` +installed. You can either: + +- Install PyTorch normally +- Or download the LibTorch C++ distribution. + +In both cases refer [here](https://pytorch.org/get-started/locally/) the +corresponding install or download instructions. + +Some torchvision models only depend on PyTorch operators, and can be used in C++ +without depending on the torchvision lib. Other models rely on torchvision's C++ +operators like NMS, RoiAlign (typically the detection models) and those need to +be linked against the torchvision lib. + +We'll first see the simpler case of running a model without the torchvision lib +dependency. + +Running a model that doesn't need torchvision lib +------------------------------------------------- + +Create a ``build`` directory inside the current one. + +```bash +mkdir build +cd build +``` + +Then run `python ../trace_model.py` which should create a `resnet18.pt` file in +the build directory. This is the scripted model that will be used in the C++ +code. + +We can now start building with CMake. We have to tell CMake where it can find +the necessary PyTorch resources. If you installed PyTorch normally, you can do: + +```bash +TORCH_PATH=$(python -c "import pathlib, torch; print(pathlib.Path(torch.__path__[0]))") +Torch_DIR="${TORCH_PATH}/share/cmake/Torch" # there should be .cmake files in there + +cmake .. -DTorch_DIR=$Torch_DIR +``` + +If instead you downloaded the LibTorch somewhere, you can do: + +```bash +cmake .. -DCMAKE_PREFIX_PATH=/path/to/libtorch +``` + +Then `cmake --build .` and you should now be able to run + +```bash +./run_model resnet18.pt +``` + +If you try to run the model with a model that depends on the torchvision lib, like +`./run_model fasterrcnn_resnet50_fpn.pt`, you should get a runtime error. This is +because the executable wasn't linked against the torchvision lib. + + +Running a model that needs torchvision lib +------------------------------------------ + +First, we need to build the torchvision lib. To build the torchvision lib go to +the root of the torchvision project and run: + +```bash +mkdir build +cd build +cmake .. -DCMAKE_PREFIX_PATH=/path/to/libtorch # or -DTorch_DIR= if you installed PyTorch normally, see above +cmake --build . +cmake --install . +``` + +You may want to pass `-DCMAKE_INSTALL_PREFIX=/path/to/libtorchvision` for +cmake to copy/install the files to a specific location (e.g. `$CONDA_PREFIX`). + +**DISCLAIMER**: the `libtorchvision` library includes the torchvision +custom ops as well as most of the C++ torchvision APIs. Those APIs do not come +with any backward-compatibility guarantees and may change from one version to +the next. Only the Python APIs are stable and with backward-compatibility +guarantees. So, if you need stability within a C++ environment, your best bet is +to export the Python APIs via torchscript. + +Now that libtorchvision is built and installed we can tell our project to use +and link to it via the `-DUSE_TORCHVISION` flag. We also need to tell CMake +where to find it, just like we did with LibTorch, e.g.: + +```bash +cmake .. -DTorch_DIR=$Torch_DIR -DTorchVision_DIR=path/to/libtorchvision -DUSE_TORCHVISION=ON +cmake --build . +``` + +Now the `run_model` executable should be able to run the +`fasterrcnn_resnet50_fpn.pt` file. diff --git a/examples/cpp/run_model.cpp b/examples/cpp/run_model.cpp new file mode 100644 index 00000000000..11faead5dac --- /dev/null +++ b/examples/cpp/run_model.cpp @@ -0,0 +1,67 @@ +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#endif // _WIN32 + +int main(int argc, const char* argv[]) { + if (argc != 2) { + std::cout << "Usage: run_model \n"; + return -1; + } + torch::DeviceType device_type; + device_type = torch::kCPU; + + torch::jit::script::Module model; + try { + std::cout << "Loading model\n"; + // Deserialize the ScriptModule from a file using torch::jit::load(). + model = torch::jit::load(argv[1]); + std::cout << "Model loaded\n"; + } catch (const torch::Error&) { + std::cout << "error loading the model.\n"; + return -1; + } catch (const std::exception& e) { + std::cout << "Other error: " << e.what() << "\n"; + return -1; + } + + // TorchScript models require a List[IValue] as input + std::vector inputs; + + if (std::strstr(argv[1], "fasterrcnn") != NULL) { + // Faster RCNN accepts a List[Tensor] as main input + std::vector images; + images.push_back(torch::rand({3, 256, 275})); + images.push_back(torch::rand({3, 256, 275})); + inputs.push_back(images); + } else { + inputs.push_back(torch::rand({1, 3, 10, 10})); + } + auto out = model.forward(inputs); + std::cout << out << "\n"; + + if (torch::cuda::is_available()) { + // Move model and inputs to GPU + model.to(torch::kCUDA); + + // Add GPU inputs + inputs.clear(); + torch::TensorOptions options = torch::TensorOptions{torch::kCUDA}; + if (std::strstr(argv[1], "fasterrcnn") != NULL) { + // Faster RCNN accepts a List[Tensor] as main input + std::vector images; + images.push_back(torch::rand({3, 256, 275}, options)); + images.push_back(torch::rand({3, 256, 275}, options)); + inputs.push_back(images); + } else { + inputs.push_back(torch::rand({1, 3, 10, 10}, options)); + } + + auto gpu_out = model.forward(inputs); + std::cout << gpu_out << "\n"; + } +} diff --git a/examples/cpp/script_model.py b/examples/cpp/script_model.py new file mode 100644 index 00000000000..e91e888e7be --- /dev/null +++ b/examples/cpp/script_model.py @@ -0,0 +1,10 @@ +import torch +from torchvision import models + +for model, name in ( + (models.resnet18(weights=None), "resnet18"), + (models.detection.fasterrcnn_resnet50_fpn(weights=None, weights_backbone=None), "fasterrcnn_resnet50_fpn"), +): + model.eval() + traced_model = torch.jit.script(model) + traced_model.save(f"{name}.pt") diff --git a/examples/python/README.md b/examples/python/README.md new file mode 100644 index 00000000000..b6597959e37 --- /dev/null +++ b/examples/python/README.md @@ -0,0 +1,4 @@ +# Python examples + +The examples in this directory have been moved online in our [gallery +page](https://pytorch.org/vision/stable/auto_examples/index.html). diff --git a/gallery/README.rst b/gallery/README.rst new file mode 100644 index 00000000000..8dfea355276 --- /dev/null +++ b/gallery/README.rst @@ -0,0 +1,4 @@ +.. _gallery: + +Examples and tutorials +====================== diff --git a/gallery/assets/FudanPed00054.png b/gallery/assets/FudanPed00054.png new file mode 100644 index 00000000000..951682abb93 Binary files /dev/null and b/gallery/assets/FudanPed00054.png differ diff --git a/gallery/assets/FudanPed00054_mask.png b/gallery/assets/FudanPed00054_mask.png new file mode 100644 index 00000000000..4d5aa4e4020 Binary files /dev/null and b/gallery/assets/FudanPed00054_mask.png differ diff --git a/gallery/assets/astronaut.jpg b/gallery/assets/astronaut.jpg new file mode 100644 index 00000000000..9716f656269 Binary files /dev/null and b/gallery/assets/astronaut.jpg differ diff --git a/gallery/assets/basketball.mp4 b/gallery/assets/basketball.mp4 new file mode 100644 index 00000000000..16d62366068 Binary files /dev/null and b/gallery/assets/basketball.mp4 differ diff --git a/gallery/assets/coco/images/000000000001.jpg b/gallery/assets/coco/images/000000000001.jpg new file mode 120000 index 00000000000..9be80c7c273 --- /dev/null +++ b/gallery/assets/coco/images/000000000001.jpg @@ -0,0 +1 @@ +../../astronaut.jpg \ No newline at end of file diff --git a/gallery/assets/coco/images/000000000002.jpg b/gallery/assets/coco/images/000000000002.jpg new file mode 120000 index 00000000000..9f8efef9928 --- /dev/null +++ b/gallery/assets/coco/images/000000000002.jpg @@ -0,0 +1 @@ +../../dog2.jpg \ No newline at end of file diff --git a/gallery/assets/coco/instances.json b/gallery/assets/coco/instances.json new file mode 100644 index 00000000000..fe0e09270bf --- /dev/null +++ b/gallery/assets/coco/instances.json @@ -0,0 +1 @@ +{"images": [{"file_name": "000000000001.jpg", "height": 512, "width": 512, "id": 1}, {"file_name": "000000000002.jpg", "height": 500, "width": 500, "id": 2}], "annotations": [{"segmentation": [[40.0, 511.0, 26.0, 487.0, 28.0, 438.0, 17.0, 397.0, 24.0, 346.0, 38.0, 306.0, 61.0, 250.0, 111.0, 206.0, 111.0, 187.0, 120.0, 183.0, 136.0, 159.0, 159.0, 150.0, 181.0, 148.0, 182.0, 132.0, 175.0, 132.0, 168.0, 120.0, 154.0, 102.0, 153.0, 62.0, 188.0, 35.0, 191.0, 29.0, 208.0, 20.0, 210.0, 22.0, 227.0, 16.0, 240.0, 16.0, 276.0, 31.0, 285.0, 39.0, 301.0, 88.0, 297.0, 108.0, 281.0, 128.0, 273.0, 138.0, 266.0, 138.0, 264.0, 153.0, 257.0, 162.0, 256.0, 174.0, 284.0, 197.0, 300.0, 221.0, 303.0, 236.0, 337.0, 258.0, 357.0, 306.0, 361.0, 351.0, 358.0, 511.0]], "iscrowd": 0, "image_id": 1, "bbox": [17.0, 16.0, 344.0, 495.0], "category_id": 1, "id": 1}, {"segmentation": [[0.0, 411.0, 43.0, 401.0, 99.0, 395.0, 105.0, 351.0, 124.0, 326.0, 181.0, 294.0, 227.0, 280.0, 245.0, 262.0, 259.0, 234.0, 262.0, 207.0, 271.0, 140.0, 283.0, 139.0, 301.0, 162.0, 309.0, 181.0, 341.0, 175.0, 362.0, 139.0, 369.0, 139.0, 377.0, 163.0, 378.0, 203.0, 381.0, 212.0, 380.0, 220.0, 382.0, 242.0, 404.0, 264.0, 392.0, 293.0, 384.0, 295.0, 385.0, 316.0, 399.0, 343.0, 391.0, 448.0, 452.0, 475.0, 457.0, 494.0, 436.0, 498.0, 402.0, 491.0, 369.0, 488.0, 366.0, 496.0, 319.0, 496.0, 302.0, 485.0, 226.0, 469.0, 128.0, 456.0, 74.0, 458.0, 29.0, 439.0, 0.0, 445.0]], "iscrowd": 0, "image_id": 2, "bbox": [0.0, 139.0, 457.0, 359.0], "category_id": 18, "id": 2}]} diff --git a/gallery/assets/dog1.jpg b/gallery/assets/dog1.jpg new file mode 100644 index 00000000000..df29f9d9704 Binary files /dev/null and b/gallery/assets/dog1.jpg differ diff --git a/gallery/assets/dog2.jpg b/gallery/assets/dog2.jpg new file mode 100644 index 00000000000..528dfec7209 Binary files /dev/null and b/gallery/assets/dog2.jpg differ diff --git a/gallery/assets/imagenet_class_index.json b/gallery/assets/imagenet_class_index.json new file mode 100644 index 00000000000..2ebd2961e1d --- /dev/null +++ b/gallery/assets/imagenet_class_index.json @@ -0,0 +1 @@ +{"0": ["n01440764", "tench"], "1": ["n01443537", "goldfish"], "2": ["n01484850", "great_white_shark"], "3": ["n01491361", "tiger_shark"], "4": ["n01494475", "hammerhead"], "5": ["n01496331", "electric_ray"], "6": ["n01498041", "stingray"], "7": ["n01514668", "cock"], "8": ["n01514859", "hen"], "9": ["n01518878", "ostrich"], "10": ["n01530575", "brambling"], "11": ["n01531178", "goldfinch"], "12": ["n01532829", "house_finch"], "13": ["n01534433", "junco"], "14": ["n01537544", "indigo_bunting"], "15": ["n01558993", "robin"], "16": ["n01560419", "bulbul"], "17": ["n01580077", "jay"], "18": ["n01582220", "magpie"], "19": ["n01592084", "chickadee"], "20": ["n01601694", "water_ouzel"], "21": ["n01608432", "kite"], "22": ["n01614925", "bald_eagle"], "23": ["n01616318", "vulture"], "24": ["n01622779", "great_grey_owl"], "25": ["n01629819", "European_fire_salamander"], "26": ["n01630670", "common_newt"], "27": ["n01631663", "eft"], "28": ["n01632458", "spotted_salamander"], "29": ["n01632777", "axolotl"], "30": ["n01641577", "bullfrog"], "31": ["n01644373", "tree_frog"], "32": ["n01644900", "tailed_frog"], "33": ["n01664065", "loggerhead"], "34": ["n01665541", "leatherback_turtle"], "35": ["n01667114", "mud_turtle"], "36": ["n01667778", "terrapin"], "37": ["n01669191", "box_turtle"], "38": ["n01675722", "banded_gecko"], "39": ["n01677366", "common_iguana"], "40": ["n01682714", "American_chameleon"], "41": ["n01685808", "whiptail"], "42": ["n01687978", "agama"], "43": ["n01688243", "frilled_lizard"], "44": ["n01689811", "alligator_lizard"], "45": ["n01692333", "Gila_monster"], "46": ["n01693334", "green_lizard"], "47": ["n01694178", "African_chameleon"], "48": ["n01695060", "Komodo_dragon"], "49": ["n01697457", "African_crocodile"], "50": ["n01698640", "American_alligator"], "51": ["n01704323", "triceratops"], "52": ["n01728572", "thunder_snake"], "53": ["n01728920", "ringneck_snake"], "54": ["n01729322", "hognose_snake"], "55": ["n01729977", "green_snake"], "56": ["n01734418", "king_snake"], "57": ["n01735189", "garter_snake"], "58": ["n01737021", "water_snake"], "59": ["n01739381", "vine_snake"], "60": ["n01740131", "night_snake"], "61": ["n01742172", "boa_constrictor"], "62": ["n01744401", "rock_python"], "63": ["n01748264", "Indian_cobra"], "64": ["n01749939", "green_mamba"], "65": ["n01751748", "sea_snake"], "66": ["n01753488", "horned_viper"], "67": ["n01755581", "diamondback"], "68": ["n01756291", "sidewinder"], "69": ["n01768244", "trilobite"], "70": ["n01770081", "harvestman"], "71": ["n01770393", "scorpion"], "72": ["n01773157", "black_and_gold_garden_spider"], "73": ["n01773549", "barn_spider"], "74": ["n01773797", "garden_spider"], "75": ["n01774384", "black_widow"], "76": ["n01774750", "tarantula"], "77": ["n01775062", "wolf_spider"], "78": ["n01776313", "tick"], "79": ["n01784675", "centipede"], "80": ["n01795545", "black_grouse"], "81": ["n01796340", "ptarmigan"], "82": ["n01797886", "ruffed_grouse"], "83": ["n01798484", "prairie_chicken"], "84": ["n01806143", "peacock"], "85": ["n01806567", "quail"], "86": ["n01807496", "partridge"], "87": ["n01817953", "African_grey"], "88": ["n01818515", "macaw"], "89": ["n01819313", "sulphur-crested_cockatoo"], "90": ["n01820546", "lorikeet"], "91": ["n01824575", "coucal"], "92": ["n01828970", "bee_eater"], "93": ["n01829413", "hornbill"], "94": ["n01833805", "hummingbird"], "95": ["n01843065", "jacamar"], "96": ["n01843383", "toucan"], "97": ["n01847000", "drake"], "98": ["n01855032", "red-breasted_merganser"], "99": ["n01855672", "goose"], "100": ["n01860187", "black_swan"], "101": ["n01871265", "tusker"], "102": ["n01872401", "echidna"], "103": ["n01873310", "platypus"], "104": ["n01877812", "wallaby"], "105": ["n01882714", "koala"], "106": ["n01883070", "wombat"], "107": ["n01910747", "jellyfish"], "108": ["n01914609", "sea_anemone"], "109": ["n01917289", "brain_coral"], "110": ["n01924916", "flatworm"], "111": ["n01930112", "nematode"], "112": ["n01943899", "conch"], "113": ["n01944390", "snail"], "114": ["n01945685", "slug"], "115": ["n01950731", "sea_slug"], "116": ["n01955084", "chiton"], "117": ["n01968897", "chambered_nautilus"], "118": ["n01978287", "Dungeness_crab"], "119": ["n01978455", "rock_crab"], "120": ["n01980166", "fiddler_crab"], "121": ["n01981276", "king_crab"], "122": ["n01983481", "American_lobster"], "123": ["n01984695", "spiny_lobster"], "124": ["n01985128", "crayfish"], "125": ["n01986214", "hermit_crab"], "126": ["n01990800", "isopod"], "127": ["n02002556", "white_stork"], "128": ["n02002724", "black_stork"], "129": ["n02006656", "spoonbill"], "130": ["n02007558", "flamingo"], "131": ["n02009229", "little_blue_heron"], "132": ["n02009912", "American_egret"], "133": ["n02011460", "bittern"], "134": ["n02012849", "crane"], "135": ["n02013706", "limpkin"], "136": ["n02017213", "European_gallinule"], "137": ["n02018207", "American_coot"], "138": ["n02018795", "bustard"], "139": ["n02025239", "ruddy_turnstone"], "140": ["n02027492", "red-backed_sandpiper"], "141": ["n02028035", "redshank"], "142": ["n02033041", "dowitcher"], "143": ["n02037110", "oystercatcher"], "144": ["n02051845", "pelican"], "145": ["n02056570", "king_penguin"], "146": ["n02058221", "albatross"], "147": ["n02066245", "grey_whale"], "148": ["n02071294", "killer_whale"], "149": ["n02074367", "dugong"], "150": ["n02077923", "sea_lion"], "151": ["n02085620", "Chihuahua"], "152": ["n02085782", "Japanese_spaniel"], "153": ["n02085936", "Maltese_dog"], "154": ["n02086079", "Pekinese"], "155": ["n02086240", "Shih-Tzu"], "156": ["n02086646", "Blenheim_spaniel"], "157": ["n02086910", "papillon"], "158": ["n02087046", "toy_terrier"], "159": ["n02087394", "Rhodesian_ridgeback"], "160": ["n02088094", "Afghan_hound"], "161": ["n02088238", "basset"], "162": ["n02088364", "beagle"], "163": ["n02088466", "bloodhound"], "164": ["n02088632", "bluetick"], "165": ["n02089078", "black-and-tan_coonhound"], "166": ["n02089867", "Walker_hound"], "167": ["n02089973", "English_foxhound"], "168": ["n02090379", "redbone"], "169": ["n02090622", "borzoi"], "170": ["n02090721", "Irish_wolfhound"], "171": ["n02091032", "Italian_greyhound"], "172": ["n02091134", "whippet"], "173": ["n02091244", "Ibizan_hound"], "174": ["n02091467", "Norwegian_elkhound"], "175": ["n02091635", "otterhound"], "176": ["n02091831", "Saluki"], "177": ["n02092002", "Scottish_deerhound"], "178": ["n02092339", "Weimaraner"], "179": ["n02093256", "Staffordshire_bullterrier"], "180": ["n02093428", "American_Staffordshire_terrier"], "181": ["n02093647", "Bedlington_terrier"], "182": ["n02093754", "Border_terrier"], "183": ["n02093859", "Kerry_blue_terrier"], "184": ["n02093991", "Irish_terrier"], "185": ["n02094114", "Norfolk_terrier"], "186": ["n02094258", "Norwich_terrier"], "187": ["n02094433", "Yorkshire_terrier"], "188": ["n02095314", "wire-haired_fox_terrier"], "189": ["n02095570", "Lakeland_terrier"], "190": ["n02095889", "Sealyham_terrier"], "191": ["n02096051", "Airedale"], "192": ["n02096177", "cairn"], "193": ["n02096294", "Australian_terrier"], "194": ["n02096437", "Dandie_Dinmont"], "195": ["n02096585", "Boston_bull"], "196": ["n02097047", "miniature_schnauzer"], "197": ["n02097130", "giant_schnauzer"], "198": ["n02097209", "standard_schnauzer"], "199": ["n02097298", "Scotch_terrier"], "200": ["n02097474", "Tibetan_terrier"], "201": ["n02097658", "silky_terrier"], "202": ["n02098105", "soft-coated_wheaten_terrier"], "203": ["n02098286", "West_Highland_white_terrier"], "204": ["n02098413", "Lhasa"], "205": ["n02099267", "flat-coated_retriever"], "206": ["n02099429", "curly-coated_retriever"], "207": ["n02099601", "golden_retriever"], "208": ["n02099712", "Labrador_retriever"], "209": ["n02099849", "Chesapeake_Bay_retriever"], "210": ["n02100236", "German_short-haired_pointer"], "211": ["n02100583", "vizsla"], "212": ["n02100735", "English_setter"], "213": ["n02100877", "Irish_setter"], "214": ["n02101006", "Gordon_setter"], "215": ["n02101388", "Brittany_spaniel"], "216": ["n02101556", "clumber"], "217": ["n02102040", "English_springer"], "218": ["n02102177", "Welsh_springer_spaniel"], "219": ["n02102318", "cocker_spaniel"], "220": ["n02102480", "Sussex_spaniel"], "221": ["n02102973", "Irish_water_spaniel"], "222": ["n02104029", "kuvasz"], "223": ["n02104365", "schipperke"], "224": ["n02105056", "groenendael"], "225": ["n02105162", "malinois"], "226": ["n02105251", "briard"], "227": ["n02105412", "kelpie"], "228": ["n02105505", "komondor"], "229": ["n02105641", "Old_English_sheepdog"], "230": ["n02105855", "Shetland_sheepdog"], "231": ["n02106030", "collie"], "232": ["n02106166", "Border_collie"], "233": ["n02106382", "Bouvier_des_Flandres"], "234": ["n02106550", "Rottweiler"], "235": ["n02106662", "German_shepherd"], "236": ["n02107142", "Doberman"], "237": ["n02107312", "miniature_pinscher"], "238": ["n02107574", "Greater_Swiss_Mountain_dog"], "239": ["n02107683", "Bernese_mountain_dog"], "240": ["n02107908", "Appenzeller"], "241": ["n02108000", "EntleBucher"], "242": ["n02108089", "boxer"], "243": ["n02108422", "bull_mastiff"], "244": ["n02108551", "Tibetan_mastiff"], "245": ["n02108915", "French_bulldog"], "246": ["n02109047", "Great_Dane"], "247": ["n02109525", "Saint_Bernard"], "248": ["n02109961", "Eskimo_dog"], "249": ["n02110063", "malamute"], "250": ["n02110185", "Siberian_husky"], "251": ["n02110341", "dalmatian"], "252": ["n02110627", "affenpinscher"], "253": ["n02110806", "basenji"], "254": ["n02110958", "pug"], "255": ["n02111129", "Leonberg"], "256": ["n02111277", "Newfoundland"], "257": ["n02111500", "Great_Pyrenees"], "258": ["n02111889", "Samoyed"], "259": ["n02112018", "Pomeranian"], "260": ["n02112137", "chow"], "261": ["n02112350", "keeshond"], "262": ["n02112706", "Brabancon_griffon"], "263": ["n02113023", "Pembroke"], "264": ["n02113186", "Cardigan"], "265": ["n02113624", "toy_poodle"], "266": ["n02113712", "miniature_poodle"], "267": ["n02113799", "standard_poodle"], "268": ["n02113978", "Mexican_hairless"], "269": ["n02114367", "timber_wolf"], "270": ["n02114548", "white_wolf"], "271": ["n02114712", "red_wolf"], "272": ["n02114855", "coyote"], "273": ["n02115641", "dingo"], "274": ["n02115913", "dhole"], "275": ["n02116738", "African_hunting_dog"], "276": ["n02117135", "hyena"], "277": ["n02119022", "red_fox"], "278": ["n02119789", "kit_fox"], "279": ["n02120079", "Arctic_fox"], "280": ["n02120505", "grey_fox"], "281": ["n02123045", "tabby"], "282": ["n02123159", "tiger_cat"], "283": ["n02123394", "Persian_cat"], "284": ["n02123597", "Siamese_cat"], "285": ["n02124075", "Egyptian_cat"], "286": ["n02125311", "cougar"], "287": ["n02127052", "lynx"], "288": ["n02128385", "leopard"], "289": ["n02128757", "snow_leopard"], "290": ["n02128925", "jaguar"], "291": ["n02129165", "lion"], "292": ["n02129604", "tiger"], "293": ["n02130308", "cheetah"], "294": ["n02132136", "brown_bear"], "295": ["n02133161", "American_black_bear"], "296": ["n02134084", "ice_bear"], "297": ["n02134418", "sloth_bear"], "298": ["n02137549", "mongoose"], "299": ["n02138441", "meerkat"], "300": ["n02165105", "tiger_beetle"], "301": ["n02165456", "ladybug"], "302": ["n02167151", "ground_beetle"], "303": ["n02168699", "long-horned_beetle"], "304": ["n02169497", "leaf_beetle"], "305": ["n02172182", "dung_beetle"], "306": ["n02174001", "rhinoceros_beetle"], "307": ["n02177972", "weevil"], "308": ["n02190166", "fly"], "309": ["n02206856", "bee"], "310": ["n02219486", "ant"], "311": ["n02226429", "grasshopper"], "312": ["n02229544", "cricket"], "313": ["n02231487", "walking_stick"], "314": ["n02233338", "cockroach"], "315": ["n02236044", "mantis"], "316": ["n02256656", "cicada"], "317": ["n02259212", "leafhopper"], "318": ["n02264363", "lacewing"], "319": ["n02268443", "dragonfly"], "320": ["n02268853", "damselfly"], "321": ["n02276258", "admiral"], "322": ["n02277742", "ringlet"], "323": ["n02279972", "monarch"], "324": ["n02280649", "cabbage_butterfly"], "325": ["n02281406", "sulphur_butterfly"], "326": ["n02281787", "lycaenid"], "327": ["n02317335", "starfish"], "328": ["n02319095", "sea_urchin"], "329": ["n02321529", "sea_cucumber"], "330": ["n02325366", "wood_rabbit"], "331": ["n02326432", "hare"], "332": ["n02328150", "Angora"], "333": ["n02342885", "hamster"], "334": ["n02346627", "porcupine"], "335": ["n02356798", "fox_squirrel"], "336": ["n02361337", "marmot"], "337": ["n02363005", "beaver"], "338": ["n02364673", "guinea_pig"], "339": ["n02389026", "sorrel"], "340": ["n02391049", "zebra"], "341": ["n02395406", "hog"], "342": ["n02396427", "wild_boar"], "343": ["n02397096", "warthog"], "344": ["n02398521", "hippopotamus"], "345": ["n02403003", "ox"], "346": ["n02408429", "water_buffalo"], "347": ["n02410509", "bison"], "348": ["n02412080", "ram"], "349": ["n02415577", "bighorn"], "350": ["n02417914", "ibex"], "351": ["n02422106", "hartebeest"], "352": ["n02422699", "impala"], "353": ["n02423022", "gazelle"], "354": ["n02437312", "Arabian_camel"], "355": ["n02437616", "llama"], "356": ["n02441942", "weasel"], "357": ["n02442845", "mink"], "358": ["n02443114", "polecat"], "359": ["n02443484", "black-footed_ferret"], "360": ["n02444819", "otter"], "361": ["n02445715", "skunk"], "362": ["n02447366", "badger"], "363": ["n02454379", "armadillo"], "364": ["n02457408", "three-toed_sloth"], "365": ["n02480495", "orangutan"], "366": ["n02480855", "gorilla"], "367": ["n02481823", "chimpanzee"], "368": ["n02483362", "gibbon"], "369": ["n02483708", "siamang"], "370": ["n02484975", "guenon"], "371": ["n02486261", "patas"], "372": ["n02486410", "baboon"], "373": ["n02487347", "macaque"], "374": ["n02488291", "langur"], "375": ["n02488702", "colobus"], "376": ["n02489166", "proboscis_monkey"], "377": ["n02490219", "marmoset"], "378": ["n02492035", "capuchin"], "379": ["n02492660", "howler_monkey"], "380": ["n02493509", "titi"], "381": ["n02493793", "spider_monkey"], "382": ["n02494079", "squirrel_monkey"], "383": ["n02497673", "Madagascar_cat"], "384": ["n02500267", "indri"], "385": ["n02504013", "Indian_elephant"], "386": ["n02504458", "African_elephant"], "387": ["n02509815", "lesser_panda"], "388": ["n02510455", "giant_panda"], "389": ["n02514041", "barracouta"], "390": ["n02526121", "eel"], "391": ["n02536864", "coho"], "392": ["n02606052", "rock_beauty"], "393": ["n02607072", "anemone_fish"], "394": ["n02640242", "sturgeon"], "395": ["n02641379", "gar"], "396": ["n02643566", "lionfish"], "397": ["n02655020", "puffer"], "398": ["n02666196", "abacus"], "399": ["n02667093", "abaya"], "400": ["n02669723", "academic_gown"], "401": ["n02672831", "accordion"], "402": ["n02676566", "acoustic_guitar"], "403": ["n02687172", "aircraft_carrier"], "404": ["n02690373", "airliner"], "405": ["n02692877", "airship"], "406": ["n02699494", "altar"], "407": ["n02701002", "ambulance"], "408": ["n02704792", "amphibian"], "409": ["n02708093", "analog_clock"], "410": ["n02727426", "apiary"], "411": ["n02730930", "apron"], "412": ["n02747177", "ashcan"], "413": ["n02749479", "assault_rifle"], "414": ["n02769748", "backpack"], "415": ["n02776631", "bakery"], "416": ["n02777292", "balance_beam"], "417": ["n02782093", "balloon"], "418": ["n02783161", "ballpoint"], "419": ["n02786058", "Band_Aid"], "420": ["n02787622", "banjo"], "421": ["n02788148", "bannister"], "422": ["n02790996", "barbell"], "423": ["n02791124", "barber_chair"], "424": ["n02791270", "barbershop"], "425": ["n02793495", "barn"], "426": ["n02794156", "barometer"], "427": ["n02795169", "barrel"], "428": ["n02797295", "barrow"], "429": ["n02799071", "baseball"], "430": ["n02802426", "basketball"], "431": ["n02804414", "bassinet"], "432": ["n02804610", "bassoon"], "433": ["n02807133", "bathing_cap"], "434": ["n02808304", "bath_towel"], "435": ["n02808440", "bathtub"], "436": ["n02814533", "beach_wagon"], "437": ["n02814860", "beacon"], "438": ["n02815834", "beaker"], "439": ["n02817516", "bearskin"], "440": ["n02823428", "beer_bottle"], "441": ["n02823750", "beer_glass"], "442": ["n02825657", "bell_cote"], "443": ["n02834397", "bib"], "444": ["n02835271", "bicycle-built-for-two"], "445": ["n02837789", "bikini"], "446": ["n02840245", "binder"], "447": ["n02841315", "binoculars"], "448": ["n02843684", "birdhouse"], "449": ["n02859443", "boathouse"], "450": ["n02860847", "bobsled"], "451": ["n02865351", "bolo_tie"], "452": ["n02869837", "bonnet"], "453": ["n02870880", "bookcase"], "454": ["n02871525", "bookshop"], "455": ["n02877765", "bottlecap"], "456": ["n02879718", "bow"], "457": ["n02883205", "bow_tie"], "458": ["n02892201", "brass"], "459": ["n02892767", "brassiere"], "460": ["n02894605", "breakwater"], "461": ["n02895154", "breastplate"], "462": ["n02906734", "broom"], "463": ["n02909870", "bucket"], "464": ["n02910353", "buckle"], "465": ["n02916936", "bulletproof_vest"], "466": ["n02917067", "bullet_train"], "467": ["n02927161", "butcher_shop"], "468": ["n02930766", "cab"], "469": ["n02939185", "caldron"], "470": ["n02948072", "candle"], "471": ["n02950826", "cannon"], "472": ["n02951358", "canoe"], "473": ["n02951585", "can_opener"], "474": ["n02963159", "cardigan"], "475": ["n02965783", "car_mirror"], "476": ["n02966193", "carousel"], "477": ["n02966687", "carpenter's_kit"], "478": ["n02971356", "carton"], "479": ["n02974003", "car_wheel"], "480": ["n02977058", "cash_machine"], "481": ["n02978881", "cassette"], "482": ["n02979186", "cassette_player"], "483": ["n02980441", "castle"], "484": ["n02981792", "catamaran"], "485": ["n02988304", "CD_player"], "486": ["n02992211", "cello"], "487": ["n02992529", "cellular_telephone"], "488": ["n02999410", "chain"], "489": ["n03000134", "chainlink_fence"], "490": ["n03000247", "chain_mail"], "491": ["n03000684", "chain_saw"], "492": ["n03014705", "chest"], "493": ["n03016953", "chiffonier"], "494": ["n03017168", "chime"], "495": ["n03018349", "china_cabinet"], "496": ["n03026506", "Christmas_stocking"], "497": ["n03028079", "church"], "498": ["n03032252", "cinema"], "499": ["n03041632", "cleaver"], "500": ["n03042490", "cliff_dwelling"], "501": ["n03045698", "cloak"], "502": ["n03047690", "clog"], "503": ["n03062245", "cocktail_shaker"], "504": ["n03063599", "coffee_mug"], "505": ["n03063689", "coffeepot"], "506": ["n03065424", "coil"], "507": ["n03075370", "combination_lock"], "508": ["n03085013", "computer_keyboard"], "509": ["n03089624", "confectionery"], "510": ["n03095699", "container_ship"], "511": ["n03100240", "convertible"], "512": ["n03109150", "corkscrew"], "513": ["n03110669", "cornet"], "514": ["n03124043", "cowboy_boot"], "515": ["n03124170", "cowboy_hat"], "516": ["n03125729", "cradle"], "517": ["n03126707", "crane"], "518": ["n03127747", "crash_helmet"], "519": ["n03127925", "crate"], "520": ["n03131574", "crib"], "521": ["n03133878", "Crock_Pot"], "522": ["n03134739", "croquet_ball"], "523": ["n03141823", "crutch"], "524": ["n03146219", "cuirass"], "525": ["n03160309", "dam"], "526": ["n03179701", "desk"], "527": ["n03180011", "desktop_computer"], "528": ["n03187595", "dial_telephone"], "529": ["n03188531", "diaper"], "530": ["n03196217", "digital_clock"], "531": ["n03197337", "digital_watch"], "532": ["n03201208", "dining_table"], "533": ["n03207743", "dishrag"], "534": ["n03207941", "dishwasher"], "535": ["n03208938", "disk_brake"], "536": ["n03216828", "dock"], "537": ["n03218198", "dogsled"], "538": ["n03220513", "dome"], "539": ["n03223299", "doormat"], "540": ["n03240683", "drilling_platform"], "541": ["n03249569", "drum"], "542": ["n03250847", "drumstick"], "543": ["n03255030", "dumbbell"], "544": ["n03259280", "Dutch_oven"], "545": ["n03271574", "electric_fan"], "546": ["n03272010", "electric_guitar"], "547": ["n03272562", "electric_locomotive"], "548": ["n03290653", "entertainment_center"], "549": ["n03291819", "envelope"], "550": ["n03297495", "espresso_maker"], "551": ["n03314780", "face_powder"], "552": ["n03325584", "feather_boa"], "553": ["n03337140", "file"], "554": ["n03344393", "fireboat"], "555": ["n03345487", "fire_engine"], "556": ["n03347037", "fire_screen"], "557": ["n03355925", "flagpole"], "558": ["n03372029", "flute"], "559": ["n03376595", "folding_chair"], "560": ["n03379051", "football_helmet"], "561": ["n03384352", "forklift"], "562": ["n03388043", "fountain"], "563": ["n03388183", "fountain_pen"], "564": ["n03388549", "four-poster"], "565": ["n03393912", "freight_car"], "566": ["n03394916", "French_horn"], "567": ["n03400231", "frying_pan"], "568": ["n03404251", "fur_coat"], "569": ["n03417042", "garbage_truck"], "570": ["n03424325", "gasmask"], "571": ["n03425413", "gas_pump"], "572": ["n03443371", "goblet"], "573": ["n03444034", "go-kart"], "574": ["n03445777", "golf_ball"], "575": ["n03445924", "golfcart"], "576": ["n03447447", "gondola"], "577": ["n03447721", "gong"], "578": ["n03450230", "gown"], "579": ["n03452741", "grand_piano"], "580": ["n03457902", "greenhouse"], "581": ["n03459775", "grille"], "582": ["n03461385", "grocery_store"], "583": ["n03467068", "guillotine"], "584": ["n03476684", "hair_slide"], "585": ["n03476991", "hair_spray"], "586": ["n03478589", "half_track"], "587": ["n03481172", "hammer"], "588": ["n03482405", "hamper"], "589": ["n03483316", "hand_blower"], "590": ["n03485407", "hand-held_computer"], "591": ["n03485794", "handkerchief"], "592": ["n03492542", "hard_disc"], "593": ["n03494278", "harmonica"], "594": ["n03495258", "harp"], "595": ["n03496892", "harvester"], "596": ["n03498962", "hatchet"], "597": ["n03527444", "holster"], "598": ["n03529860", "home_theater"], "599": ["n03530642", "honeycomb"], "600": ["n03532672", "hook"], "601": ["n03534580", "hoopskirt"], "602": ["n03535780", "horizontal_bar"], "603": ["n03538406", "horse_cart"], "604": ["n03544143", "hourglass"], "605": ["n03584254", "iPod"], "606": ["n03584829", "iron"], "607": ["n03590841", "jack-o'-lantern"], "608": ["n03594734", "jean"], "609": ["n03594945", "jeep"], "610": ["n03595614", "jersey"], "611": ["n03598930", "jigsaw_puzzle"], "612": ["n03599486", "jinrikisha"], "613": ["n03602883", "joystick"], "614": ["n03617480", "kimono"], "615": ["n03623198", "knee_pad"], "616": ["n03627232", "knot"], "617": ["n03630383", "lab_coat"], "618": ["n03633091", "ladle"], "619": ["n03637318", "lampshade"], "620": ["n03642806", "laptop"], "621": ["n03649909", "lawn_mower"], "622": ["n03657121", "lens_cap"], "623": ["n03658185", "letter_opener"], "624": ["n03661043", "library"], "625": ["n03662601", "lifeboat"], "626": ["n03666591", "lighter"], "627": ["n03670208", "limousine"], "628": ["n03673027", "liner"], "629": ["n03676483", "lipstick"], "630": ["n03680355", "Loafer"], "631": ["n03690938", "lotion"], "632": ["n03691459", "loudspeaker"], "633": ["n03692522", "loupe"], "634": ["n03697007", "lumbermill"], "635": ["n03706229", "magnetic_compass"], "636": ["n03709823", "mailbag"], "637": ["n03710193", "mailbox"], "638": ["n03710637", "maillot"], "639": ["n03710721", "maillot"], "640": ["n03717622", "manhole_cover"], "641": ["n03720891", "maraca"], "642": ["n03721384", "marimba"], "643": ["n03724870", "mask"], "644": ["n03729826", "matchstick"], "645": ["n03733131", "maypole"], "646": ["n03733281", "maze"], "647": ["n03733805", "measuring_cup"], "648": ["n03742115", "medicine_chest"], "649": ["n03743016", "megalith"], "650": ["n03759954", "microphone"], "651": ["n03761084", "microwave"], "652": ["n03763968", "military_uniform"], "653": ["n03764736", "milk_can"], "654": ["n03769881", "minibus"], "655": ["n03770439", "miniskirt"], "656": ["n03770679", "minivan"], "657": ["n03773504", "missile"], "658": ["n03775071", "mitten"], "659": ["n03775546", "mixing_bowl"], "660": ["n03776460", "mobile_home"], "661": ["n03777568", "Model_T"], "662": ["n03777754", "modem"], "663": ["n03781244", "monastery"], "664": ["n03782006", "monitor"], "665": ["n03785016", "moped"], "666": ["n03786901", "mortar"], "667": ["n03787032", "mortarboard"], "668": ["n03788195", "mosque"], "669": ["n03788365", "mosquito_net"], "670": ["n03791053", "motor_scooter"], "671": ["n03792782", "mountain_bike"], "672": ["n03792972", "mountain_tent"], "673": ["n03793489", "mouse"], "674": ["n03794056", "mousetrap"], "675": ["n03796401", "moving_van"], "676": ["n03803284", "muzzle"], "677": ["n03804744", "nail"], "678": ["n03814639", "neck_brace"], "679": ["n03814906", "necklace"], "680": ["n03825788", "nipple"], "681": ["n03832673", "notebook"], "682": ["n03837869", "obelisk"], "683": ["n03838899", "oboe"], "684": ["n03840681", "ocarina"], "685": ["n03841143", "odometer"], "686": ["n03843555", "oil_filter"], "687": ["n03854065", "organ"], "688": ["n03857828", "oscilloscope"], "689": ["n03866082", "overskirt"], "690": ["n03868242", "oxcart"], "691": ["n03868863", "oxygen_mask"], "692": ["n03871628", "packet"], "693": ["n03873416", "paddle"], "694": ["n03874293", "paddlewheel"], "695": ["n03874599", "padlock"], "696": ["n03876231", "paintbrush"], "697": ["n03877472", "pajama"], "698": ["n03877845", "palace"], "699": ["n03884397", "panpipe"], "700": ["n03887697", "paper_towel"], "701": ["n03888257", "parachute"], "702": ["n03888605", "parallel_bars"], "703": ["n03891251", "park_bench"], "704": ["n03891332", "parking_meter"], "705": ["n03895866", "passenger_car"], "706": ["n03899768", "patio"], "707": ["n03902125", "pay-phone"], "708": ["n03903868", "pedestal"], "709": ["n03908618", "pencil_box"], "710": ["n03908714", "pencil_sharpener"], "711": ["n03916031", "perfume"], "712": ["n03920288", "Petri_dish"], "713": ["n03924679", "photocopier"], "714": ["n03929660", "pick"], "715": ["n03929855", "pickelhaube"], "716": ["n03930313", "picket_fence"], "717": ["n03930630", "pickup"], "718": ["n03933933", "pier"], "719": ["n03935335", "piggy_bank"], "720": ["n03937543", "pill_bottle"], "721": ["n03938244", "pillow"], "722": ["n03942813", "ping-pong_ball"], "723": ["n03944341", "pinwheel"], "724": ["n03947888", "pirate"], "725": ["n03950228", "pitcher"], "726": ["n03954731", "plane"], "727": ["n03956157", "planetarium"], "728": ["n03958227", "plastic_bag"], "729": ["n03961711", "plate_rack"], "730": ["n03967562", "plow"], "731": ["n03970156", "plunger"], "732": ["n03976467", "Polaroid_camera"], "733": ["n03976657", "pole"], "734": ["n03977966", "police_van"], "735": ["n03980874", "poncho"], "736": ["n03982430", "pool_table"], "737": ["n03983396", "pop_bottle"], "738": ["n03991062", "pot"], "739": ["n03992509", "potter's_wheel"], "740": ["n03995372", "power_drill"], "741": ["n03998194", "prayer_rug"], "742": ["n04004767", "printer"], "743": ["n04005630", "prison"], "744": ["n04008634", "projectile"], "745": ["n04009552", "projector"], "746": ["n04019541", "puck"], "747": ["n04023962", "punching_bag"], "748": ["n04026417", "purse"], "749": ["n04033901", "quill"], "750": ["n04033995", "quilt"], "751": ["n04037443", "racer"], "752": ["n04039381", "racket"], "753": ["n04040759", "radiator"], "754": ["n04041544", "radio"], "755": ["n04044716", "radio_telescope"], "756": ["n04049303", "rain_barrel"], "757": ["n04065272", "recreational_vehicle"], "758": ["n04067472", "reel"], "759": ["n04069434", "reflex_camera"], "760": ["n04070727", "refrigerator"], "761": ["n04074963", "remote_control"], "762": ["n04081281", "restaurant"], "763": ["n04086273", "revolver"], "764": ["n04090263", "rifle"], "765": ["n04099969", "rocking_chair"], "766": ["n04111531", "rotisserie"], "767": ["n04116512", "rubber_eraser"], "768": ["n04118538", "rugby_ball"], "769": ["n04118776", "rule"], "770": ["n04120489", "running_shoe"], "771": ["n04125021", "safe"], "772": ["n04127249", "safety_pin"], "773": ["n04131690", "saltshaker"], "774": ["n04133789", "sandal"], "775": ["n04136333", "sarong"], "776": ["n04141076", "sax"], "777": ["n04141327", "scabbard"], "778": ["n04141975", "scale"], "779": ["n04146614", "school_bus"], "780": ["n04147183", "schooner"], "781": ["n04149813", "scoreboard"], "782": ["n04152593", "screen"], "783": ["n04153751", "screw"], "784": ["n04154565", "screwdriver"], "785": ["n04162706", "seat_belt"], "786": ["n04179913", "sewing_machine"], "787": ["n04192698", "shield"], "788": ["n04200800", "shoe_shop"], "789": ["n04201297", "shoji"], "790": ["n04204238", "shopping_basket"], "791": ["n04204347", "shopping_cart"], "792": ["n04208210", "shovel"], "793": ["n04209133", "shower_cap"], "794": ["n04209239", "shower_curtain"], "795": ["n04228054", "ski"], "796": ["n04229816", "ski_mask"], "797": ["n04235860", "sleeping_bag"], "798": ["n04238763", "slide_rule"], "799": ["n04239074", "sliding_door"], "800": ["n04243546", "slot"], "801": ["n04251144", "snorkel"], "802": ["n04252077", "snowmobile"], "803": ["n04252225", "snowplow"], "804": ["n04254120", "soap_dispenser"], "805": ["n04254680", "soccer_ball"], "806": ["n04254777", "sock"], "807": ["n04258138", "solar_dish"], "808": ["n04259630", "sombrero"], "809": ["n04263257", "soup_bowl"], "810": ["n04264628", "space_bar"], "811": ["n04265275", "space_heater"], "812": ["n04266014", "space_shuttle"], "813": ["n04270147", "spatula"], "814": ["n04273569", "speedboat"], "815": ["n04275548", "spider_web"], "816": ["n04277352", "spindle"], "817": ["n04285008", "sports_car"], "818": ["n04286575", "spotlight"], "819": ["n04296562", "stage"], "820": ["n04310018", "steam_locomotive"], "821": ["n04311004", "steel_arch_bridge"], "822": ["n04311174", "steel_drum"], "823": ["n04317175", "stethoscope"], "824": ["n04325704", "stole"], "825": ["n04326547", "stone_wall"], "826": ["n04328186", "stopwatch"], "827": ["n04330267", "stove"], "828": ["n04332243", "strainer"], "829": ["n04335435", "streetcar"], "830": ["n04336792", "stretcher"], "831": ["n04344873", "studio_couch"], "832": ["n04346328", "stupa"], "833": ["n04347754", "submarine"], "834": ["n04350905", "suit"], "835": ["n04355338", "sundial"], "836": ["n04355933", "sunglass"], "837": ["n04356056", "sunglasses"], "838": ["n04357314", "sunscreen"], "839": ["n04366367", "suspension_bridge"], "840": ["n04367480", "swab"], "841": ["n04370456", "sweatshirt"], "842": ["n04371430", "swimming_trunks"], "843": ["n04371774", "swing"], "844": ["n04372370", "switch"], "845": ["n04376876", "syringe"], "846": ["n04380533", "table_lamp"], "847": ["n04389033", "tank"], "848": ["n04392985", "tape_player"], "849": ["n04398044", "teapot"], "850": ["n04399382", "teddy"], "851": ["n04404412", "television"], "852": ["n04409515", "tennis_ball"], "853": ["n04417672", "thatch"], "854": ["n04418357", "theater_curtain"], "855": ["n04423845", "thimble"], "856": ["n04428191", "thresher"], "857": ["n04429376", "throne"], "858": ["n04435653", "tile_roof"], "859": ["n04442312", "toaster"], "860": ["n04443257", "tobacco_shop"], "861": ["n04447861", "toilet_seat"], "862": ["n04456115", "torch"], "863": ["n04458633", "totem_pole"], "864": ["n04461696", "tow_truck"], "865": ["n04462240", "toyshop"], "866": ["n04465501", "tractor"], "867": ["n04467665", "trailer_truck"], "868": ["n04476259", "tray"], "869": ["n04479046", "trench_coat"], "870": ["n04482393", "tricycle"], "871": ["n04483307", "trimaran"], "872": ["n04485082", "tripod"], "873": ["n04486054", "triumphal_arch"], "874": ["n04487081", "trolleybus"], "875": ["n04487394", "trombone"], "876": ["n04493381", "tub"], "877": ["n04501370", "turnstile"], "878": ["n04505470", "typewriter_keyboard"], "879": ["n04507155", "umbrella"], "880": ["n04509417", "unicycle"], "881": ["n04515003", "upright"], "882": ["n04517823", "vacuum"], "883": ["n04522168", "vase"], "884": ["n04523525", "vault"], "885": ["n04525038", "velvet"], "886": ["n04525305", "vending_machine"], "887": ["n04532106", "vestment"], "888": ["n04532670", "viaduct"], "889": ["n04536866", "violin"], "890": ["n04540053", "volleyball"], "891": ["n04542943", "waffle_iron"], "892": ["n04548280", "wall_clock"], "893": ["n04548362", "wallet"], "894": ["n04550184", "wardrobe"], "895": ["n04552348", "warplane"], "896": ["n04553703", "washbasin"], "897": ["n04554684", "washer"], "898": ["n04557648", "water_bottle"], "899": ["n04560804", "water_jug"], "900": ["n04562935", "water_tower"], "901": ["n04579145", "whiskey_jug"], "902": ["n04579432", "whistle"], "903": ["n04584207", "wig"], "904": ["n04589890", "window_screen"], "905": ["n04590129", "window_shade"], "906": ["n04591157", "Windsor_tie"], "907": ["n04591713", "wine_bottle"], "908": ["n04592741", "wing"], "909": ["n04596742", "wok"], "910": ["n04597913", "wooden_spoon"], "911": ["n04599235", "wool"], "912": ["n04604644", "worm_fence"], "913": ["n04606251", "wreck"], "914": ["n04612504", "yawl"], "915": ["n04613696", "yurt"], "916": ["n06359193", "web_site"], "917": ["n06596364", "comic_book"], "918": ["n06785654", "crossword_puzzle"], "919": ["n06794110", "street_sign"], "920": ["n06874185", "traffic_light"], "921": ["n07248320", "book_jacket"], "922": ["n07565083", "menu"], "923": ["n07579787", "plate"], "924": ["n07583066", "guacamole"], "925": ["n07584110", "consomme"], "926": ["n07590611", "hot_pot"], "927": ["n07613480", "trifle"], "928": ["n07614500", "ice_cream"], "929": ["n07615774", "ice_lolly"], "930": ["n07684084", "French_loaf"], "931": ["n07693725", "bagel"], "932": ["n07695742", "pretzel"], "933": ["n07697313", "cheeseburger"], "934": ["n07697537", "hotdog"], "935": ["n07711569", "mashed_potato"], "936": ["n07714571", "head_cabbage"], "937": ["n07714990", "broccoli"], "938": ["n07715103", "cauliflower"], "939": ["n07716358", "zucchini"], "940": ["n07716906", "spaghetti_squash"], "941": ["n07717410", "acorn_squash"], "942": ["n07717556", "butternut_squash"], "943": ["n07718472", "cucumber"], "944": ["n07718747", "artichoke"], "945": ["n07720875", "bell_pepper"], "946": ["n07730033", "cardoon"], "947": ["n07734744", "mushroom"], "948": ["n07742313", "Granny_Smith"], "949": ["n07745940", "strawberry"], "950": ["n07747607", "orange"], "951": ["n07749582", "lemon"], "952": ["n07753113", "fig"], "953": ["n07753275", "pineapple"], "954": ["n07753592", "banana"], "955": ["n07754684", "jackfruit"], "956": ["n07760859", "custard_apple"], "957": ["n07768694", "pomegranate"], "958": ["n07802026", "hay"], "959": ["n07831146", "carbonara"], "960": ["n07836838", "chocolate_sauce"], "961": ["n07860988", "dough"], "962": ["n07871810", "meat_loaf"], "963": ["n07873807", "pizza"], "964": ["n07875152", "potpie"], "965": ["n07880968", "burrito"], "966": ["n07892512", "red_wine"], "967": ["n07920052", "espresso"], "968": ["n07930864", "cup"], "969": ["n07932039", "eggnog"], "970": ["n09193705", "alp"], "971": ["n09229709", "bubble"], "972": ["n09246464", "cliff"], "973": ["n09256479", "coral_reef"], "974": ["n09288635", "geyser"], "975": ["n09332890", "lakeside"], "976": ["n09399592", "promontory"], "977": ["n09421951", "sandbar"], "978": ["n09428293", "seashore"], "979": ["n09468604", "valley"], "980": ["n09472597", "volcano"], "981": ["n09835506", "ballplayer"], "982": ["n10148035", "groom"], "983": ["n10565667", "scuba_diver"], "984": ["n11879895", "rapeseed"], "985": ["n11939491", "daisy"], "986": ["n12057211", "yellow_lady's_slipper"], "987": ["n12144580", "corn"], "988": ["n12267677", "acorn"], "989": ["n12620546", "hip"], "990": ["n12768682", "buckeye"], "991": ["n12985857", "coral_fungus"], "992": ["n12998815", "agaric"], "993": ["n13037406", "gyromitra"], "994": ["n13040303", "stinkhorn"], "995": ["n13044778", "earthstar"], "996": ["n13052670", "hen-of-the-woods"], "997": ["n13054560", "bolete"], "998": ["n13133613", "ear"], "999": ["n15075141", "toilet_tissue"]} diff --git a/gallery/assets/person1.jpg b/gallery/assets/person1.jpg new file mode 100644 index 00000000000..83251c84a79 Binary files /dev/null and b/gallery/assets/person1.jpg differ diff --git a/gallery/assets/repurposing_annotations_thumbnail.png b/gallery/assets/repurposing_annotations_thumbnail.png new file mode 100644 index 00000000000..367eb4ec128 Binary files /dev/null and b/gallery/assets/repurposing_annotations_thumbnail.png differ diff --git a/gallery/assets/transforms_thumbnail.png b/gallery/assets/transforms_thumbnail.png new file mode 100644 index 00000000000..f9df96c9066 Binary files /dev/null and b/gallery/assets/transforms_thumbnail.png differ diff --git a/gallery/assets/visualization_utils_thumbnail2.png b/gallery/assets/visualization_utils_thumbnail2.png new file mode 100644 index 00000000000..cf057e04207 Binary files /dev/null and b/gallery/assets/visualization_utils_thumbnail2.png differ diff --git a/gallery/others/README.rst b/gallery/others/README.rst new file mode 100644 index 00000000000..fafb007d985 --- /dev/null +++ b/gallery/others/README.rst @@ -0,0 +1,2 @@ +Others +------ diff --git a/gallery/others/plot_optical_flow.py b/gallery/others/plot_optical_flow.py new file mode 100644 index 00000000000..6296c8e667e --- /dev/null +++ b/gallery/others/plot_optical_flow.py @@ -0,0 +1,198 @@ +""" +===================================================== +Optical Flow: Predicting movement with the RAFT model +===================================================== + +.. note:: + Try on `Colab `_ + or :ref:`go to the end ` to download the full example code. + +Optical flow is the task of predicting movement between two images, usually two +consecutive frames of a video. Optical flow models take two images as input, and +predict a flow: the flow indicates the displacement of every single pixel in the +first image, and maps it to its corresponding pixel in the second image. Flows +are (2, H, W)-dimensional tensors, where the first axis corresponds to the +predicted horizontal and vertical displacements. + +The following example illustrates how torchvision can be used to predict flows +using our implementation of the RAFT model. We will also see how to convert the +predicted flows to RGB images for visualization. +""" + +import numpy as np +import torch +import matplotlib.pyplot as plt +import torchvision.transforms.functional as F + + +plt.rcParams["savefig.bbox"] = "tight" +# sphinx_gallery_thumbnail_number = 2 + + +def plot(imgs, **imshow_kwargs): + if not isinstance(imgs[0], list): + # Make a 2d grid even if there's just 1 row + imgs = [imgs] + + num_rows = len(imgs) + num_cols = len(imgs[0]) + _, axs = plt.subplots(nrows=num_rows, ncols=num_cols, squeeze=False) + for row_idx, row in enumerate(imgs): + for col_idx, img in enumerate(row): + ax = axs[row_idx, col_idx] + img = F.to_pil_image(img.to("cpu")) + ax.imshow(np.asarray(img), **imshow_kwargs) + ax.set(xticklabels=[], yticklabels=[], xticks=[], yticks=[]) + + plt.tight_layout() + +# %% +# Reading Videos Using Torchvision +# -------------------------------- +# We will first read a video using :func:`~torchvision.io.read_video`. +# Alternatively one can use the new :class:`~torchvision.io.VideoReader` API (if +# torchvision is built from source). +# The video we will use here is free of use from `pexels.com +# `_, +# credits go to `Pavel Danilyuk `_. + + +import tempfile +from pathlib import Path +from urllib.request import urlretrieve + + +video_url = "https://download.pytorch.org/tutorial/pexelscom_pavel_danilyuk_basketball_hd.mp4" +video_path = Path(tempfile.mkdtemp()) / "basketball.mp4" +_ = urlretrieve(video_url, video_path) + +# %% +# :func:`~torchvision.io.read_video` returns the video frames, audio frames and +# the metadata associated with the video. In our case, we only need the video +# frames. +# +# Here we will just make 2 predictions between 2 pre-selected pairs of frames, +# namely frames (100, 101) and (150, 151). Each of these pairs corresponds to a +# single model input. + +from torchvision.io import read_video +frames, _, _ = read_video(str(video_path), output_format="TCHW") + +img1_batch = torch.stack([frames[100], frames[150]]) +img2_batch = torch.stack([frames[101], frames[151]]) + +plot(img1_batch) + +# %% +# The RAFT model accepts RGB images. We first get the frames from +# :func:`~torchvision.io.read_video` and resize them to ensure their dimensions +# are divisible by 8. Note that we explicitly use ``antialias=False``, because +# this is how those models were trained. Then we use the transforms bundled into +# the weights in order to preprocess the input and rescale its values to the +# required ``[-1, 1]`` interval. + +from torchvision.models.optical_flow import Raft_Large_Weights + +weights = Raft_Large_Weights.DEFAULT +transforms = weights.transforms() + + +def preprocess(img1_batch, img2_batch): + img1_batch = F.resize(img1_batch, size=[520, 960], antialias=False) + img2_batch = F.resize(img2_batch, size=[520, 960], antialias=False) + return transforms(img1_batch, img2_batch) + + +img1_batch, img2_batch = preprocess(img1_batch, img2_batch) + +print(f"shape = {img1_batch.shape}, dtype = {img1_batch.dtype}") + + +# %% +# Estimating Optical flow using RAFT +# ---------------------------------- +# We will use our RAFT implementation from +# :func:`~torchvision.models.optical_flow.raft_large`, which follows the same +# architecture as the one described in the `original paper `_. +# We also provide the :func:`~torchvision.models.optical_flow.raft_small` model +# builder, which is smaller and faster to run, sacrificing a bit of accuracy. + +from torchvision.models.optical_flow import raft_large + +# If you can, run this example on a GPU, it will be a lot faster. +device = "cuda" if torch.cuda.is_available() else "cpu" + +model = raft_large(weights=Raft_Large_Weights.DEFAULT, progress=False).to(device) +model = model.eval() + +list_of_flows = model(img1_batch.to(device), img2_batch.to(device)) +print(f"type = {type(list_of_flows)}") +print(f"length = {len(list_of_flows)} = number of iterations of the model") + +# %% +# The RAFT model outputs lists of predicted flows where each entry is a +# (N, 2, H, W) batch of predicted flows that corresponds to a given "iteration" +# in the model. For more details on the iterative nature of the model, please +# refer to the `original paper `_. Here, we +# are only interested in the final predicted flows (they are the most accurate +# ones), so we will just retrieve the last item in the list. +# +# As described above, a flow is a tensor with dimensions (2, H, W) (or (N, 2, H, +# W) for batches of flows) where each entry corresponds to the horizontal and +# vertical displacement of each pixel from the first image to the second image. +# Note that the predicted flows are in "pixel" unit, they are not normalized +# w.r.t. the dimensions of the images. +predicted_flows = list_of_flows[-1] +print(f"dtype = {predicted_flows.dtype}") +print(f"shape = {predicted_flows.shape} = (N, 2, H, W)") +print(f"min = {predicted_flows.min()}, max = {predicted_flows.max()}") + + +# %% +# Visualizing predicted flows +# --------------------------- +# Torchvision provides the :func:`~torchvision.utils.flow_to_image` utility to +# convert a flow into an RGB image. It also supports batches of flows. +# each "direction" in the flow will be mapped to a given RGB color. In the +# images below, pixels with similar colors are assumed by the model to be moving +# in similar directions. The model is properly able to predict the movement of +# the ball and the player. Note in particular the different predicted direction +# of the ball in the first image (going to the left) and in the second image +# (going up). + +from torchvision.utils import flow_to_image + +flow_imgs = flow_to_image(predicted_flows) + +# The images have been mapped into [-1, 1] but for plotting we want them in [0, 1] +img1_batch = [(img1 + 1) / 2 for img1 in img1_batch] + +grid = [[img1, flow_img] for (img1, flow_img) in zip(img1_batch, flow_imgs)] +plot(grid) + +# %% +# Bonus: Creating GIFs of predicted flows +# --------------------------------------- +# In the example above we have only shown the predicted flows of 2 pairs of +# frames. A fun way to apply the Optical Flow models is to run the model on an +# entire video, and create a new video from all the predicted flows. Below is a +# snippet that can get you started with this. We comment out the code, because +# this example is being rendered on a machine without a GPU, and it would take +# too long to run it. + +# from torchvision.io import write_jpeg +# for i, (img1, img2) in enumerate(zip(frames, frames[1:])): +# # Note: it would be faster to predict batches of flows instead of individual flows +# img1, img2 = preprocess(img1, img2) + +# list_of_flows = model(img1.to(device), img2.to(device)) +# predicted_flow = list_of_flows[-1][0] +# flow_img = flow_to_image(predicted_flow).to("cpu") +# output_folder = "/tmp/" # Update this to the folder of your choice +# write_jpeg(flow_img, output_folder + f"predicted_flow_{i}.jpg") + +# %% +# Once the .jpg flow images are saved, you can convert them into a video or a +# GIF using ffmpeg with e.g.: +# +# ffmpeg -f image2 -framerate 30 -i predicted_flow_%d.jpg -loop -1 flow.gif diff --git a/gallery/others/plot_repurposing_annotations.py b/gallery/others/plot_repurposing_annotations.py new file mode 100644 index 00000000000..2c2e10ffb2a --- /dev/null +++ b/gallery/others/plot_repurposing_annotations.py @@ -0,0 +1,211 @@ +""" +===================================== +Repurposing masks into bounding boxes +===================================== + +.. note:: + Try on `Colab `_ + or :ref:`go to the end ` to download the full example code. + +The following example illustrates the operations available +the :ref:`torchvision.ops ` module for repurposing +segmentation masks into object localization annotations for different tasks +(e.g. transforming masks used by instance and panoptic segmentation +methods into bounding boxes used by object detection methods). +""" + +# sphinx_gallery_thumbnail_path = "../../gallery/assets/repurposing_annotations_thumbnail.png" + +import os +import numpy as np +import torch +import matplotlib.pyplot as plt + +import torchvision.transforms.functional as F + + +ASSETS_DIRECTORY = "../assets" + +plt.rcParams["savefig.bbox"] = "tight" + + +def show(imgs): + if not isinstance(imgs, list): + imgs = [imgs] + fix, axs = plt.subplots(ncols=len(imgs), squeeze=False) + for i, img in enumerate(imgs): + img = img.detach() + img = F.to_pil_image(img) + axs[0, i].imshow(np.asarray(img)) + axs[0, i].set(xticklabels=[], yticklabels=[], xticks=[], yticks=[]) + + +# %% +# Masks +# ----- +# In tasks like instance and panoptic segmentation, masks are commonly defined, and are defined by this package, +# as a multi-dimensional array (e.g. a NumPy array or a PyTorch tensor) with the following shape: +# +# (num_objects, height, width) +# +# Where num_objects is the number of annotated objects in the image. Each (height, width) object corresponds to exactly +# one object. For example, if your input image has the dimensions 224 x 224 and has four annotated objects the shape +# of your masks annotation has the following shape: +# +# (4, 224, 224). +# +# A nice property of masks is that they can be easily repurposed to be used in methods to solve a variety of object +# localization tasks. + +# %% +# Converting Masks to Bounding Boxes +# ----------------------------------------------- +# For example, the :func:`~torchvision.ops.masks_to_boxes` operation can be used to +# transform masks into bounding boxes that can be +# used as input to detection models such as FasterRCNN and RetinaNet. +# We will take images and masks from the `PenFudan Dataset `_. + + +from torchvision.io import decode_image + +img_path = os.path.join(ASSETS_DIRECTORY, "FudanPed00054.png") +mask_path = os.path.join(ASSETS_DIRECTORY, "FudanPed00054_mask.png") +img = decode_image(img_path) +mask = decode_image(mask_path) + + +# %% +# Here the masks are represented as a PNG Image, with floating point values. +# Each pixel is encoded as different colors, with 0 being background. +# Notice that the spatial dimensions of image and mask match. + +print(mask.size()) +print(img.size()) +print(mask) + +# %% + +# We get the unique colors, as these would be the object ids. +obj_ids = torch.unique(mask) + +# first id is the background, so remove it. +obj_ids = obj_ids[1:] + +# split the color-encoded mask into a set of boolean masks. +# Note that this snippet would work as well if the masks were float values instead of ints. +masks = mask == obj_ids[:, None, None] + +# %% +# Now the masks are a boolean tensor. +# The first dimension in this case 3 and denotes the number of instances: there are 3 people in the image. +# The other two dimensions are height and width, which are equal to the dimensions of the image. +# For each instance, the boolean tensors represent if the particular pixel +# belongs to the segmentation mask of the image. + +print(masks.size()) +print(masks) + +# %% +# Let us visualize an image and plot its corresponding segmentation masks. +# We will use the :func:`~torchvision.utils.draw_segmentation_masks` to draw the segmentation masks. + +from torchvision.utils import draw_segmentation_masks + +drawn_masks = [] +for mask in masks: + drawn_masks.append(draw_segmentation_masks(img, mask, alpha=0.8, colors="blue")) + +show(drawn_masks) + +# %% +# To convert the boolean masks into bounding boxes. +# We will use the :func:`~torchvision.ops.masks_to_boxes` from the torchvision.ops module +# It returns the boxes in ``(xmin, ymin, xmax, ymax)`` format. + +from torchvision.ops import masks_to_boxes + +boxes = masks_to_boxes(masks) +print(boxes.size()) +print(boxes) + +# %% +# As the shape denotes, there are 3 boxes and in ``(xmin, ymin, xmax, ymax)`` format. +# These can be visualized very easily with :func:`~torchvision.utils.draw_bounding_boxes` utility +# provided in :ref:`torchvision.utils `. + +from torchvision.utils import draw_bounding_boxes + +drawn_boxes = draw_bounding_boxes(img, boxes, colors="red") +show(drawn_boxes) + +# %% +# These boxes can now directly be used by detection models in torchvision. +# Here is demo with a Faster R-CNN model loaded from +# :func:`~torchvision.models.detection.fasterrcnn_resnet50_fpn` + +from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights + +weights = FasterRCNN_ResNet50_FPN_Weights.DEFAULT +model = fasterrcnn_resnet50_fpn(weights=weights, progress=False) +print(img.size()) + +transforms = weights.transforms() +img = transforms(img) +target = {} +target["boxes"] = boxes +target["labels"] = labels = torch.ones((masks.size(0),), dtype=torch.int64) +detection_outputs = model(img.unsqueeze(0), [target]) + + +# %% +# Converting Segmentation Dataset to Detection Dataset +# ---------------------------------------------------- +# +# With this utility it becomes very simple to convert a segmentation dataset to a detection dataset. +# With this we can now use a segmentation dataset to train a detection model. +# One can similarly convert panoptic dataset to detection dataset. +# Here is an example where we re-purpose the dataset from the +# `PenFudan Detection Tutorial `_. + +class SegmentationToDetectionDataset(torch.utils.data.Dataset): + def __init__(self, root, transforms): + self.root = root + self.transforms = transforms + # load all image files, sorting them to + # ensure that they are aligned + self.imgs = list(sorted(os.listdir(os.path.join(root, "PNGImages")))) + self.masks = list(sorted(os.listdir(os.path.join(root, "PedMasks")))) + + def __getitem__(self, idx): + # load images and masks + img_path = os.path.join(self.root, "PNGImages", self.imgs[idx]) + mask_path = os.path.join(self.root, "PedMasks", self.masks[idx]) + + img = decode_image(img_path) + mask = decode_image(mask_path) + + img = F.convert_image_dtype(img, dtype=torch.float) + mask = F.convert_image_dtype(mask, dtype=torch.float) + + # We get the unique colors, as these would be the object ids. + obj_ids = torch.unique(mask) + + # first id is the background, so remove it. + obj_ids = obj_ids[1:] + + # split the color-encoded mask into a set of boolean masks. + masks = mask == obj_ids[:, None, None] + + boxes = masks_to_boxes(masks) + + # there is only one class + labels = torch.ones((masks.shape[0],), dtype=torch.int64) + + target = {} + target["boxes"] = boxes + target["labels"] = labels + + if self.transforms is not None: + img, target = self.transforms(img, target) + + return img, target diff --git a/gallery/others/plot_scripted_tensor_transforms.py b/gallery/others/plot_scripted_tensor_transforms.py new file mode 100644 index 00000000000..da2213347e3 --- /dev/null +++ b/gallery/others/plot_scripted_tensor_transforms.py @@ -0,0 +1,136 @@ +""" +=================== +Torchscript support +=================== + +.. note:: + Try on `Colab `_ + or :ref:`go to the end ` to download the full example code. + +This example illustrates `torchscript +`_ support of the torchvision +:ref:`transforms ` on Tensor images. +""" + +# %% +from pathlib import Path + +import matplotlib.pyplot as plt + +import torch +import torch.nn as nn + +import torchvision.transforms as v1 +from torchvision.io import decode_image + +plt.rcParams["savefig.bbox"] = 'tight' +torch.manual_seed(1) + +# If you're trying to run that on Colab, you can download the assets and the +# helpers from https://github.com/pytorch/vision/tree/main/gallery/ +import sys +sys.path += ["../transforms"] +from helpers import plot +ASSETS_PATH = Path('../assets') + + +# %% +# Most transforms support torchscript. For composing transforms, we use +# :class:`torch.nn.Sequential` instead of +# :class:`~torchvision.transforms.v2.Compose`: + +dog1 = decode_image(str(ASSETS_PATH / 'dog1.jpg')) +dog2 = decode_image(str(ASSETS_PATH / 'dog2.jpg')) + +transforms = torch.nn.Sequential( + v1.RandomCrop(224), + v1.RandomHorizontalFlip(p=0.3), +) + +scripted_transforms = torch.jit.script(transforms) + +plot([dog1, scripted_transforms(dog1), dog2, scripted_transforms(dog2)]) + + +# %% +# .. warning:: +# +# Above we have used transforms from the ``torchvision.transforms`` +# namespace, i.e. the "v1" transforms. The v2 transforms from the +# ``torchvision.transforms.v2`` namespace are the :ref:`recommended +# ` way to use transforms in your code. +# +# The v2 transforms also support torchscript, but if you call +# ``torch.jit.script()`` on a v2 **class** transform, you'll actually end up +# with its (scripted) v1 equivalent. This may lead to slightly different +# results between the scripted and eager executions due to implementation +# differences between v1 and v2. +# +# If you really need torchscript support for the v2 transforms, **we +# recommend scripting the functionals** from the +# ``torchvision.transforms.v2.functional`` namespace to avoid surprises. +# +# Below we now show how to combine image transformations and a model forward +# pass, while using ``torch.jit.script`` to obtain a single scripted module. +# +# Let's define a ``Predictor`` module that transforms the input tensor and then +# applies an ImageNet model on it. + +from torchvision.models import resnet18, ResNet18_Weights + + +class Predictor(nn.Module): + + def __init__(self): + super().__init__() + weights = ResNet18_Weights.DEFAULT + self.resnet18 = resnet18(weights=weights, progress=False).eval() + self.transforms = weights.transforms(antialias=True) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + with torch.no_grad(): + x = self.transforms(x) + y_pred = self.resnet18(x) + return y_pred.argmax(dim=1) + + +# %% +# Now, let's define scripted and non-scripted instances of ``Predictor`` and +# apply it on multiple tensor images of the same size + +device = "cuda" if torch.cuda.is_available() else "cpu" + +predictor = Predictor().to(device) +scripted_predictor = torch.jit.script(predictor).to(device) + +batch = torch.stack([dog1, dog2]).to(device) + +res = predictor(batch) +res_scripted = scripted_predictor(batch) + +# %% +# We can verify that the prediction of the scripted and non-scripted models are +# the same: + +import json + +with open(Path('../assets') / 'imagenet_class_index.json') as labels_file: + labels = json.load(labels_file) + +for i, (pred, pred_scripted) in enumerate(zip(res, res_scripted)): + assert pred == pred_scripted + print(f"Prediction for Dog {i + 1}: {labels[str(pred.item())]}") + +# %% +# Since the model is scripted, it can be easily dumped on disk and re-used + +import tempfile + +with tempfile.NamedTemporaryFile() as f: + scripted_predictor.save(f.name) + + dumped_scripted_predictor = torch.jit.load(f.name) + res_scripted_dumped = dumped_scripted_predictor(batch) +assert (res_scripted_dumped == res_scripted).all() + +# %% diff --git a/gallery/others/plot_video_api.py b/gallery/others/plot_video_api.py new file mode 100644 index 00000000000..3a67e4d86d0 --- /dev/null +++ b/gallery/others/plot_video_api.py @@ -0,0 +1,346 @@ +""" +========= +Video API +========= + +.. note:: + Try on `Colab `_ + or :ref:`go to the end ` to download the full example code. + +This example illustrates some of the APIs that torchvision offers for +videos, together with the examples on how to build datasets and more. +""" + +# %% +# 1. Introduction: building a new video object and examining the properties +# ------------------------------------------------------------------------- +# First we select a video to test the object out. For the sake of argument +# we're using one from kinetics400 dataset. +# To create it, we need to define the path and the stream we want to use. + +# %% +# Chosen video statistics: +# +# - WUzgd7C1pWA.mp4 +# - source: +# - kinetics-400 +# - video: +# - H-264 +# - MPEG-4 AVC (part 10) (avc1) +# - fps: 29.97 +# - audio: +# - MPEG AAC audio (mp4a) +# - sample rate: 48K Hz +# + +import torch +import torchvision +from torchvision.datasets.utils import download_url +torchvision.set_video_backend("video_reader") + +# Download the sample video +download_url( + "https://github.com/pytorch/vision/blob/main/test/assets/videos/WUzgd7C1pWA.mp4?raw=true", + ".", + "WUzgd7C1pWA.mp4" +) +video_path = "./WUzgd7C1pWA.mp4" + +# %% +# Streams are defined in a similar fashion as torch devices. We encode them as strings in a form +# of ``stream_type:stream_id`` where ``stream_type`` is a string and ``stream_id`` a long int. +# The constructor accepts passing a ``stream_type`` only, in which case the stream is auto-discovered. +# Firstly, let's get the metadata for our particular video: + +stream = "video" +video = torchvision.io.VideoReader(video_path, stream) +video.get_metadata() + +# %% +# Here we can see that video has two streams - a video and an audio stream. +# Currently available stream types include ['video', 'audio']. +# Each descriptor consists of two parts: stream type (e.g. 'video') and a unique stream id +# (which are determined by video encoding). +# In this way, if the video container contains multiple streams of the same type, +# users can access the one they want. +# If only stream type is passed, the decoder auto-detects first stream of that type and returns it. + +# %% +# Let's read all the frames from the video stream. By default, the return value of +# ``next(video_reader)`` is a dict containing the following fields. +# +# The return fields are: +# +# - ``data``: containing a torch.tensor +# - ``pts``: containing a float timestamp of this particular frame + +metadata = video.get_metadata() +video.set_current_stream("audio") + +frames = [] # we are going to save the frames here. +ptss = [] # pts is a presentation timestamp in seconds (float) of each frame +for frame in video: + frames.append(frame['data']) + ptss.append(frame['pts']) + +print("PTS for first five frames ", ptss[:5]) +print("Total number of frames: ", len(frames)) +approx_nf = metadata['audio']['duration'][0] * metadata['audio']['framerate'][0] +print("Approx total number of datapoints we can expect: ", approx_nf) +print("Read data size: ", frames[0].size(0) * len(frames)) + +# %% +# But what if we only want to read certain time segment of the video? +# That can be done easily using the combination of our ``seek`` function, and the fact that each call +# to next returns the presentation timestamp of the returned frame in seconds. +# +# Given that our implementation relies on python iterators, +# we can leverage itertools to simplify the process and make it more pythonic. +# +# For example, if we wanted to read ten frames from second second: + + +import itertools +video.set_current_stream("video") + +frames = [] # we are going to save the frames here. + +# We seek into a second second of the video and use islice to get 10 frames since +for frame, pts in itertools.islice(video.seek(2), 10): + frames.append(frame) + +print("Total number of frames: ", len(frames)) + +# %% +# Or if we wanted to read from 2nd to 5th second, +# We seek into a second second of the video, +# then we utilize the itertools takewhile to get the +# correct number of frames: + +video.set_current_stream("video") +frames = [] # we are going to save the frames here. +video = video.seek(2) + +for frame in itertools.takewhile(lambda x: x['pts'] <= 5, video): + frames.append(frame['data']) + +print("Total number of frames: ", len(frames)) +approx_nf = (5 - 2) * video.get_metadata()['video']['fps'][0] +print("We can expect approx: ", approx_nf) +print("Tensor size: ", frames[0].size()) + +# %% +# 2. Building a sample read_video function +# ---------------------------------------------------------------------------------------- +# We can utilize the methods above to build the read video function that follows +# the same API to the existing ``read_video`` function. + + +def example_read_video(video_object, start=0, end=None, read_video=True, read_audio=True): + if end is None: + end = float("inf") + if end < start: + raise ValueError( + "end time should be larger than start time, got " + f"start time={start} and end time={end}" + ) + + video_frames = torch.empty(0) + video_pts = [] + if read_video: + video_object.set_current_stream("video") + frames = [] + for frame in itertools.takewhile(lambda x: x['pts'] <= end, video_object.seek(start)): + frames.append(frame['data']) + video_pts.append(frame['pts']) + if len(frames) > 0: + video_frames = torch.stack(frames, 0) + + audio_frames = torch.empty(0) + audio_pts = [] + if read_audio: + video_object.set_current_stream("audio") + frames = [] + for frame in itertools.takewhile(lambda x: x['pts'] <= end, video_object.seek(start)): + frames.append(frame['data']) + audio_pts.append(frame['pts']) + if len(frames) > 0: + audio_frames = torch.cat(frames, 0) + + return video_frames, audio_frames, (video_pts, audio_pts), video_object.get_metadata() + + +# Total number of frames should be 327 for video and 523264 datapoints for audio +vf, af, info, meta = example_read_video(video) +print(vf.size(), af.size()) + +# %% +# 3. Building an example randomly sampled dataset (can be applied to training dataset of kinetics400) +# ------------------------------------------------------------------------------------------------------- +# Cool, so now we can use the same principle to make the sample dataset. +# We suggest trying out iterable dataset for this purpose. +# Here, we are going to build an example dataset that reads randomly selected 10 frames of video. + +# %% +# Make sample dataset +import os +os.makedirs("./dataset", exist_ok=True) +os.makedirs("./dataset/1", exist_ok=True) +os.makedirs("./dataset/2", exist_ok=True) + +# %% +# Download the videos +from torchvision.datasets.utils import download_url +download_url( + "https://github.com/pytorch/vision/blob/main/test/assets/videos/WUzgd7C1pWA.mp4?raw=true", + "./dataset/1", "WUzgd7C1pWA.mp4" +) +download_url( + "https://github.com/pytorch/vision/blob/main/test/assets/videos/RATRACE_wave_f_nm_np1_fr_goo_37.avi?raw=true", + "./dataset/1", + "RATRACE_wave_f_nm_np1_fr_goo_37.avi" +) +download_url( + "https://github.com/pytorch/vision/blob/main/test/assets/videos/SOX5yA1l24A.mp4?raw=true", + "./dataset/2", + "SOX5yA1l24A.mp4" +) +download_url( + "https://github.com/pytorch/vision/blob/main/test/assets/videos/v_SoccerJuggling_g23_c01.avi?raw=true", + "./dataset/2", + "v_SoccerJuggling_g23_c01.avi" +) +download_url( + "https://github.com/pytorch/vision/blob/main/test/assets/videos/v_SoccerJuggling_g24_c01.avi?raw=true", + "./dataset/2", + "v_SoccerJuggling_g24_c01.avi" +) + +# %% +# Housekeeping and utilities +import os +import random + +from torchvision.datasets.folder import make_dataset +from torchvision import transforms as t + + +def _find_classes(dir): + classes = [d.name for d in os.scandir(dir) if d.is_dir()] + classes.sort() + class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)} + return classes, class_to_idx + + +def get_samples(root, extensions=(".mp4", ".avi")): + _, class_to_idx = _find_classes(root) + return make_dataset(root, class_to_idx, extensions=extensions) + +# %% +# We are going to define the dataset and some basic arguments. +# We assume the structure of the FolderDataset, and add the following parameters: +# +# - ``clip_len``: length of a clip in frames +# - ``frame_transform``: transform for every frame individually +# - ``video_transform``: transform on a video sequence +# +# .. note:: +# We actually add epoch size as using :func:`~torch.utils.data.IterableDataset` +# class allows us to naturally oversample clips or images from each video if needed. + + +class RandomDataset(torch.utils.data.IterableDataset): + def __init__(self, root, epoch_size=None, frame_transform=None, video_transform=None, clip_len=16): + super(RandomDataset).__init__() + + self.samples = get_samples(root) + + # Allow for temporal jittering + if epoch_size is None: + epoch_size = len(self.samples) + self.epoch_size = epoch_size + + self.clip_len = clip_len + self.frame_transform = frame_transform + self.video_transform = video_transform + + def __iter__(self): + for i in range(self.epoch_size): + # Get random sample + path, target = random.choice(self.samples) + # Get video object + vid = torchvision.io.VideoReader(path, "video") + metadata = vid.get_metadata() + video_frames = [] # video frame buffer + + # Seek and return frames + max_seek = metadata["video"]['duration'][0] - (self.clip_len / metadata["video"]['fps'][0]) + start = random.uniform(0., max_seek) + for frame in itertools.islice(vid.seek(start), self.clip_len): + video_frames.append(self.frame_transform(frame['data'])) + current_pts = frame['pts'] + # Stack it into a tensor + video = torch.stack(video_frames, 0) + if self.video_transform: + video = self.video_transform(video) + output = { + 'path': path, + 'video': video, + 'target': target, + 'start': start, + 'end': current_pts} + yield output + +# %% +# Given a path of videos in a folder structure, i.e: +# +# - dataset +# - class 1 +# - file 0 +# - file 1 +# - ... +# - class 2 +# - file 0 +# - file 1 +# - ... +# - ... +# +# We can generate a dataloader and test the dataset. + + +transforms = [t.Resize((112, 112))] +frame_transform = t.Compose(transforms) + +dataset = RandomDataset("./dataset", epoch_size=None, frame_transform=frame_transform) + +# %% +from torch.utils.data import DataLoader +loader = DataLoader(dataset, batch_size=12) +data = {"video": [], 'start': [], 'end': [], 'tensorsize': []} +for batch in loader: + for i in range(len(batch['path'])): + data['video'].append(batch['path'][i]) + data['start'].append(batch['start'][i].item()) + data['end'].append(batch['end'][i].item()) + data['tensorsize'].append(batch['video'][i].size()) +print(data) + +# %% +# 4. Data Visualization +# ---------------------------------- +# Example of visualized video + +import matplotlib.pyplot as plt + +plt.figure(figsize=(12, 12)) +for i in range(16): + plt.subplot(4, 4, i + 1) + plt.imshow(batch["video"][0, i, ...].permute(1, 2, 0)) + plt.axis("off") + +# %% +# Cleanup the video and dataset: +import os +import shutil +os.remove("./WUzgd7C1pWA.mp4") +shutil.rmtree("./dataset") diff --git a/gallery/others/plot_visualization_utils.py b/gallery/others/plot_visualization_utils.py new file mode 100644 index 00000000000..72c35b53717 --- /dev/null +++ b/gallery/others/plot_visualization_utils.py @@ -0,0 +1,522 @@ +""" +======================= +Visualization utilities +======================= + +.. note:: + Try on `Colab `_ + or :ref:`go to the end ` to download the full example code. + +This example illustrates some of the utilities that torchvision offers for +visualizing images, bounding boxes, segmentation masks and keypoints. +""" + +# sphinx_gallery_thumbnail_path = "../../gallery/assets/visualization_utils_thumbnail2.png" + +import torch +import numpy as np +import matplotlib.pyplot as plt + +import torchvision.transforms.functional as F + + +plt.rcParams["savefig.bbox"] = 'tight' + + +def show(imgs): + if not isinstance(imgs, list): + imgs = [imgs] + fig, axs = plt.subplots(ncols=len(imgs), squeeze=False) + for i, img in enumerate(imgs): + img = img.detach() + img = F.to_pil_image(img) + axs[0, i].imshow(np.asarray(img)) + axs[0, i].set(xticklabels=[], yticklabels=[], xticks=[], yticks=[]) + + +# %% +# Visualizing a grid of images +# ---------------------------- +# The :func:`~torchvision.utils.make_grid` function can be used to create a +# tensor that represents multiple images in a grid. This util requires a single +# image of dtype ``uint8`` as input. + +from torchvision.utils import make_grid +from torchvision.io import decode_image +from pathlib import Path + +dog1_int = decode_image(str(Path('../assets') / 'dog1.jpg')) +dog2_int = decode_image(str(Path('../assets') / 'dog2.jpg')) +dog_list = [dog1_int, dog2_int] + +grid = make_grid(dog_list) +show(grid) + +# %% +# Visualizing bounding boxes +# -------------------------- +# We can use :func:`~torchvision.utils.draw_bounding_boxes` to draw boxes on an +# image. We can set the colors, labels, width as well as font and font size. +# The boxes are in ``(xmin, ymin, xmax, ymax)`` format. + +from torchvision.utils import draw_bounding_boxes + + +boxes = torch.tensor([[50, 50, 100, 200], [210, 150, 350, 430]], dtype=torch.float) +colors = ["blue", "yellow"] +result = draw_bounding_boxes(dog1_int, boxes, colors=colors, width=5) +show(result) + + +# %% +# Naturally, we can also plot bounding boxes produced by torchvision detection +# models. Here is a demo with a Faster R-CNN model loaded from +# :func:`~torchvision.models.detection.fasterrcnn_resnet50_fpn` +# model. For more details on the output of such models, you may +# refer to :ref:`instance_seg_output`. + +from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights + + +weights = FasterRCNN_ResNet50_FPN_Weights.DEFAULT +transforms = weights.transforms() + +images = [transforms(d) for d in dog_list] + +model = fasterrcnn_resnet50_fpn(weights=weights, progress=False) +model = model.eval() + +outputs = model(images) +print(outputs) + +# %% +# Let's plot the boxes detected by our model. We will only plot the boxes with a +# score greater than a given threshold. + +score_threshold = .8 +dogs_with_boxes = [ + draw_bounding_boxes(dog_int, boxes=output['boxes'][output['scores'] > score_threshold], width=4) + for dog_int, output in zip(dog_list, outputs) +] +show(dogs_with_boxes) + +# %% +# Visualizing segmentation masks +# ------------------------------ +# The :func:`~torchvision.utils.draw_segmentation_masks` function can be used to +# draw segmentation masks on images. Semantic segmentation and instance +# segmentation models have different outputs, so we will treat each +# independently. +# +# .. _semantic_seg_output: +# +# Semantic segmentation models +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# We will see how to use it with torchvision's FCN Resnet-50, loaded with +# :func:`~torchvision.models.segmentation.fcn_resnet50`. Let's start by looking +# at the output of the model. + +from torchvision.models.segmentation import fcn_resnet50, FCN_ResNet50_Weights + +weights = FCN_ResNet50_Weights.DEFAULT +transforms = weights.transforms(resize_size=None) + +model = fcn_resnet50(weights=weights, progress=False) +model = model.eval() + +batch = torch.stack([transforms(d) for d in dog_list]) +output = model(batch)['out'] +print(output.shape, output.min().item(), output.max().item()) + +# %% +# As we can see above, the output of the segmentation model is a tensor of shape +# ``(batch_size, num_classes, H, W)``. Each value is a non-normalized score, and +# we can normalize them into ``[0, 1]`` by using a softmax. After the softmax, +# we can interpret each value as a probability indicating how likely a given +# pixel is to belong to a given class. +# +# Let's plot the masks that have been detected for the dog class and for the +# boat class: + +sem_class_to_idx = {cls: idx for (idx, cls) in enumerate(weights.meta["categories"])} + +normalized_masks = torch.nn.functional.softmax(output, dim=1) + +dog_and_boat_masks = [ + normalized_masks[img_idx, sem_class_to_idx[cls]] + for img_idx in range(len(dog_list)) + for cls in ('dog', 'boat') +] + +show(dog_and_boat_masks) + +# %% +# As expected, the model is confident about the dog class, but not so much for +# the boat class. +# +# The :func:`~torchvision.utils.draw_segmentation_masks` function can be used to +# plots those masks on top of the original image. This function expects the +# masks to be boolean masks, but our masks above contain probabilities in ``[0, +# 1]``. To get boolean masks, we can do the following: + +class_dim = 1 +boolean_dog_masks = (normalized_masks.argmax(class_dim) == sem_class_to_idx['dog']) +print(f"shape = {boolean_dog_masks.shape}, dtype = {boolean_dog_masks.dtype}") +show([m.float() for m in boolean_dog_masks]) + + +# %% +# The line above where we define ``boolean_dog_masks`` is a bit cryptic, but you +# can read it as the following query: "For which pixels is 'dog' the most likely +# class?" +# +# .. note:: +# While we're using the ``normalized_masks`` here, we would have +# gotten the same result by using the non-normalized scores of the model +# directly (as the softmax operation preserves the order). +# +# Now that we have boolean masks, we can use them with +# :func:`~torchvision.utils.draw_segmentation_masks` to plot them on top of the +# original images: + +from torchvision.utils import draw_segmentation_masks + +dogs_with_masks = [ + draw_segmentation_masks(img, masks=mask, alpha=0.7) + for img, mask in zip(dog_list, boolean_dog_masks) +] +show(dogs_with_masks) + +# %% +# We can plot more than one mask per image! Remember that the model returned as +# many masks as there are classes. Let's ask the same query as above, but this +# time for *all* classes, not just the dog class: "For each pixel and each class +# C, is class C the most likely class?" +# +# This one is a bit more involved, so we'll first show how to do it with a +# single image, and then we'll generalize to the batch + +num_classes = normalized_masks.shape[1] +dog1_masks = normalized_masks[0] +class_dim = 0 +dog1_all_classes_masks = dog1_masks.argmax(class_dim) == torch.arange(num_classes)[:, None, None] + +print(f"dog1_masks shape = {dog1_masks.shape}, dtype = {dog1_masks.dtype}") +print(f"dog1_all_classes_masks = {dog1_all_classes_masks.shape}, dtype = {dog1_all_classes_masks.dtype}") + +dog_with_all_masks = draw_segmentation_masks(dog1_int, masks=dog1_all_classes_masks, alpha=.6) +show(dog_with_all_masks) + +# %% +# We can see in the image above that only 2 masks were drawn: the mask for the +# background and the mask for the dog. This is because the model thinks that +# only these 2 classes are the most likely ones across all the pixels. If the +# model had detected another class as the most likely among other pixels, we +# would have seen its mask above. +# +# Removing the background mask is as simple as passing +# ``masks=dog1_all_classes_masks[1:]``, because the background class is the +# class with index 0. +# +# Let's now do the same but for an entire batch of images. The code is similar +# but involves a bit more juggling with the dimensions. + +class_dim = 1 +all_classes_masks = normalized_masks.argmax(class_dim) == torch.arange(num_classes)[:, None, None, None] +print(f"shape = {all_classes_masks.shape}, dtype = {all_classes_masks.dtype}") +# The first dimension is the classes now, so we need to swap it +all_classes_masks = all_classes_masks.swapaxes(0, 1) + +dogs_with_masks = [ + draw_segmentation_masks(img, masks=mask, alpha=.6) + for img, mask in zip(dog_list, all_classes_masks) +] +show(dogs_with_masks) + + +# %% +# .. _instance_seg_output: +# +# Instance segmentation models +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# Instance segmentation models have a significantly different output from the +# semantic segmentation models. We will see here how to plot the masks for such +# models. Let's start by analyzing the output of a Mask-RCNN model. Note that +# these models don't require the images to be normalized, so we don't need to +# use the normalized batch. +# +# .. note:: +# +# We will here describe the output of a Mask-RCNN model. The models in +# :ref:`object_det_inst_seg_pers_keypoint_det` all have a similar output +# format, but some of them may have extra info like keypoints for +# :func:`~torchvision.models.detection.keypointrcnn_resnet50_fpn`, and some +# of them may not have masks, like +# :func:`~torchvision.models.detection.fasterrcnn_resnet50_fpn`. + +from torchvision.models.detection import maskrcnn_resnet50_fpn, MaskRCNN_ResNet50_FPN_Weights + +weights = MaskRCNN_ResNet50_FPN_Weights.DEFAULT +transforms = weights.transforms() + +images = [transforms(d) for d in dog_list] + +model = maskrcnn_resnet50_fpn(weights=weights, progress=False) +model = model.eval() + +output = model(images) +print(output) + +# %% +# Let's break this down. For each image in the batch, the model outputs some +# detections (or instances). The number of detections varies for each input +# image. Each instance is described by its bounding box, its label, its score +# and its mask. +# +# The way the output is organized is as follows: the output is a list of length +# ``batch_size``. Each entry in the list corresponds to an input image, and it +# is a dict with keys 'boxes', 'labels', 'scores', and 'masks'. Each value +# associated to those keys has ``num_instances`` elements in it. In our case +# above there are 3 instances detected in the first image, and 2 instances in +# the second one. +# +# The boxes can be plotted with :func:`~torchvision.utils.draw_bounding_boxes` +# as above, but here we're more interested in the masks. These masks are quite +# different from the masks that we saw above for the semantic segmentation +# models. + +dog1_output = output[0] +dog1_masks = dog1_output['masks'] +print(f"shape = {dog1_masks.shape}, dtype = {dog1_masks.dtype}, " + f"min = {dog1_masks.min()}, max = {dog1_masks.max()}") + +# %% +# Here the masks correspond to probabilities indicating, for each pixel, how +# likely it is to belong to the predicted label of that instance. Those +# predicted labels correspond to the 'labels' element in the same output dict. +# Let's see which labels were predicted for the instances of the first image. + +print("For the first dog, the following instances were detected:") +print([weights.meta["categories"][label] for label in dog1_output['labels']]) + +# %% +# Interestingly, the model detects two persons in the image. Let's go ahead and +# plot those masks. Since :func:`~torchvision.utils.draw_segmentation_masks` +# expects boolean masks, we need to convert those probabilities into boolean +# values. Remember that the semantic of those masks is "How likely is this pixel +# to belong to the predicted class?". As a result, a natural way of converting +# those masks into boolean values is to threshold them with the 0.5 probability +# (one could also choose a different threshold). + +proba_threshold = 0.5 +dog1_bool_masks = dog1_output['masks'] > proba_threshold +print(f"shape = {dog1_bool_masks.shape}, dtype = {dog1_bool_masks.dtype}") + +# There's an extra dimension (1) to the masks. We need to remove it +dog1_bool_masks = dog1_bool_masks.squeeze(1) + +show(draw_segmentation_masks(dog1_int, dog1_bool_masks, alpha=0.9)) + +# %% +# The model seems to have properly detected the dog, but it also confused trees +# with people. Looking more closely at the scores will help us plot more +# relevant masks: + +print(dog1_output['scores']) + +# %% +# Clearly the model is more confident about the dog detection than it is about +# the people detections. That's good news. When plotting the masks, we can ask +# for only those that have a good score. Let's use a score threshold of .75 +# here, and also plot the masks of the second dog. + +score_threshold = .75 + +boolean_masks = [ + out['masks'][out['scores'] > score_threshold] > proba_threshold + for out in output +] + +dogs_with_masks = [ + draw_segmentation_masks(img, mask.squeeze(1)) + for img, mask in zip(dog_list, boolean_masks) +] +show(dogs_with_masks) + +# %% +# The two 'people' masks in the first image where not selected because they have +# a lower score than the score threshold. Similarly, in the second image, the +# instance with class 15 (which corresponds to 'bench') was not selected. + +# %% +# .. _keypoint_output: +# +# Visualizing keypoints +# ------------------------------ +# The :func:`~torchvision.utils.draw_keypoints` function can be used to +# draw keypoints on images. We will see how to use it with +# torchvision's KeypointRCNN loaded with :func:`~torchvision.models.detection.keypointrcnn_resnet50_fpn`. +# We will first have a look at output of the model. +# + +from torchvision.models.detection import keypointrcnn_resnet50_fpn, KeypointRCNN_ResNet50_FPN_Weights +from torchvision.io import decode_image + +person_int = decode_image(str(Path("../assets") / "person1.jpg")) + +weights = KeypointRCNN_ResNet50_FPN_Weights.DEFAULT +transforms = weights.transforms() + +person_float = transforms(person_int) + +model = keypointrcnn_resnet50_fpn(weights=weights, progress=False) +model = model.eval() + +outputs = model([person_float]) +print(outputs) + +# %% +# As we see the output contains a list of dictionaries. +# The output list is of length batch_size. +# We currently have just a single image so length of list is 1. +# Each entry in the list corresponds to an input image, +# and it is a dict with keys `boxes`, `labels`, `scores`, `keypoints` and `keypoint_scores`. +# Each value associated to those keys has `num_instances` elements in it. +# In our case above there are 2 instances detected in the image. + +kpts = outputs[0]['keypoints'] +scores = outputs[0]['scores'] + +print(kpts) +print(scores) + +# %% +# The KeypointRCNN model detects there are two instances in the image. +# If you plot the boxes by using :func:`~draw_bounding_boxes` +# you would recognize they are the person and the surfboard. +# If we look at the scores, we will realize that the model is much more confident about the person than surfboard. +# We could now set a threshold confidence and plot instances which we are confident enough. +# Let us set a threshold of 0.75 and filter out the keypoints corresponding to the person. + +detect_threshold = 0.75 +idx = torch.where(scores > detect_threshold) +keypoints = kpts[idx] + +print(keypoints) + +# %% +# Great, now we have the keypoints corresponding to the person. +# Each keypoint is represented by x, y coordinates and the visibility. +# We can now use the :func:`~torchvision.utils.draw_keypoints` function to draw keypoints. +# Note that the utility expects uint8 images. + +from torchvision.utils import draw_keypoints + +res = draw_keypoints(person_int, keypoints, colors="blue", radius=3) +show(res) + +# %% +# As we see, the keypoints appear as colored circles over the image. +# The coco keypoints for a person are ordered and represent the following list.\ + +coco_keypoints = [ + "nose", "left_eye", "right_eye", "left_ear", "right_ear", + "left_shoulder", "right_shoulder", "left_elbow", "right_elbow", + "left_wrist", "right_wrist", "left_hip", "right_hip", + "left_knee", "right_knee", "left_ankle", "right_ankle", +] + +# %% +# What if we are interested in joining the keypoints? +# This is especially useful in creating pose detection or action recognition. +# We can join the keypoints easily using the `connectivity` parameter. +# A close observation would reveal that we would need to join the points in below +# order to construct human skeleton. +# +# nose -> left_eye -> left_ear. (0, 1), (1, 3) +# +# nose -> right_eye -> right_ear. (0, 2), (2, 4) +# +# nose -> left_shoulder -> left_elbow -> left_wrist. (0, 5), (5, 7), (7, 9) +# +# nose -> right_shoulder -> right_elbow -> right_wrist. (0, 6), (6, 8), (8, 10) +# +# left_shoulder -> left_hip -> left_knee -> left_ankle. (5, 11), (11, 13), (13, 15) +# +# right_shoulder -> right_hip -> right_knee -> right_ankle. (6, 12), (12, 14), (14, 16) +# +# We will create a list containing these keypoint ids to be connected. + +connect_skeleton = [ + (0, 1), (0, 2), (1, 3), (2, 4), (0, 5), (0, 6), (5, 7), (6, 8), + (7, 9), (8, 10), (5, 11), (6, 12), (11, 13), (12, 14), (13, 15), (14, 16) +] + +# %% +# We pass the above list to the connectivity parameter to connect the keypoints. +# + +res = draw_keypoints(person_int, keypoints, connectivity=connect_skeleton, colors="blue", radius=4, width=3) +show(res) + +# %% +# That looks pretty good. +# +# .. _draw_keypoints_with_visibility: +# +# Drawing Keypoints with Visibility +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# Let's have a look at the results, another keypoint prediction module produced, and show the connectivity: + +prediction = torch.tensor( + [[[208.0176, 214.2409, 1.0000], + [000.0000, 000.0000, 0.0000], + [197.8246, 210.6392, 1.0000], + [000.0000, 000.0000, 0.0000], + [178.6378, 217.8425, 1.0000], + [221.2086, 253.8591, 1.0000], + [160.6502, 269.4662, 1.0000], + [243.9929, 304.2822, 1.0000], + [138.4654, 328.8935, 1.0000], + [277.5698, 340.8990, 1.0000], + [153.4551, 374.5145, 1.0000], + [000.0000, 000.0000, 0.0000], + [226.0053, 370.3125, 1.0000], + [221.8081, 455.5516, 1.0000], + [273.9723, 448.9486, 1.0000], + [193.6275, 546.1933, 1.0000], + [273.3727, 545.5930, 1.0000]]] +) + +res = draw_keypoints(person_int, prediction, connectivity=connect_skeleton, colors="blue", radius=4, width=3) +show(res) + +# %% +# What happened there? +# The model, which predicted the new keypoints, +# can't detect the three points that are hidden on the upper left body of the skateboarder. +# More precisely, the model predicted that `(x, y, vis) = (0, 0, 0)` for the left_eye, left_ear, and left_hip. +# So we definitely don't want to display those keypoints and connections, and you don't have to. +# Looking at the parameters of :func:`~torchvision.utils.draw_keypoints`, +# we can see that we can pass a visibility tensor as an additional argument. +# Given the models' prediction, we have the visibility as the third keypoint dimension, we just need to extract it. +# Let's split the ``prediction`` into the keypoint coordinates and their respective visibility, +# and pass both of them as arguments to :func:`~torchvision.utils.draw_keypoints`. + +coordinates, visibility = prediction.split([2, 1], dim=-1) +visibility = visibility.bool() + +res = draw_keypoints( + person_int, coordinates, visibility=visibility, connectivity=connect_skeleton, colors="blue", radius=4, width=3 +) +show(res) + +# %% +# We can see that the undetected keypoints are not draw and the invisible keypoint connections were skipped. +# This can reduce the noise on images with multiple detections, or in cases like ours, +# when the keypoint-prediction model missed some detections. +# Most torch keypoint-prediction models return the visibility for every prediction, ready for you to use it. +# The :func:`~torchvision.models.detection.keypointrcnn_resnet50_fpn` model, +# which we used in the first case, does so too. diff --git a/gallery/transforms/README.rst b/gallery/transforms/README.rst new file mode 100644 index 00000000000..1b8b1b08155 --- /dev/null +++ b/gallery/transforms/README.rst @@ -0,0 +1,4 @@ +.. _transforms_gallery: + +Transforms +---------- diff --git a/gallery/transforms/helpers.py b/gallery/transforms/helpers.py new file mode 100644 index 00000000000..e94d717eb7d --- /dev/null +++ b/gallery/transforms/helpers.py @@ -0,0 +1,50 @@ +import matplotlib.pyplot as plt +import torch +from torchvision.utils import draw_bounding_boxes, draw_segmentation_masks +from torchvision import tv_tensors +from torchvision.transforms.v2 import functional as F + + +def plot(imgs, row_title=None, **imshow_kwargs): + if not isinstance(imgs[0], list): + # Make a 2d grid even if there's just 1 row + imgs = [imgs] + + num_rows = len(imgs) + num_cols = len(imgs[0]) + _, axs = plt.subplots(nrows=num_rows, ncols=num_cols, squeeze=False) + for row_idx, row in enumerate(imgs): + for col_idx, img in enumerate(row): + boxes = None + masks = None + if isinstance(img, tuple): + img, target = img + if isinstance(target, dict): + boxes = target.get("boxes") + masks = target.get("masks") + elif isinstance(target, tv_tensors.BoundingBoxes): + boxes = target + else: + raise ValueError(f"Unexpected target type: {type(target)}") + img = F.to_image(img) + if img.dtype.is_floating_point and img.min() < 0: + # Poor man's re-normalization for the colors to be OK-ish. This + # is useful for images coming out of Normalize() + img -= img.min() + img /= img.max() + + img = F.to_dtype(img, torch.uint8, scale=True) + if boxes is not None: + img = draw_bounding_boxes(img, boxes, colors="yellow", width=3) + if masks is not None: + img = draw_segmentation_masks(img, masks.to(torch.bool), colors=["green"] * masks.shape[0], alpha=.65) + + ax = axs[row_idx, col_idx] + ax.imshow(img.permute(1, 2, 0).numpy(), **imshow_kwargs) + ax.set(xticklabels=[], yticklabels=[], xticks=[], yticks=[]) + + if row_title is not None: + for row_idx in range(num_rows): + axs[row_idx, 0].set(ylabel=row_title[row_idx]) + + plt.tight_layout() diff --git a/gallery/transforms/plot_custom_transforms.py b/gallery/transforms/plot_custom_transforms.py new file mode 100644 index 00000000000..d1bd9455bfb --- /dev/null +++ b/gallery/transforms/plot_custom_transforms.py @@ -0,0 +1,200 @@ +""" +=================================== +How to write your own v2 transforms +=================================== + +.. note:: + Try on `Colab `_ + or :ref:`go to the end ` to download the full example code. + +This guide explains how to write transforms that are compatible with the +torchvision transforms V2 API. +""" + +# %% +from typing import Any, Dict, List + +import torch +from torchvision import tv_tensors +from torchvision.transforms import v2 + + +# %% +# Just create a ``nn.Module`` and override the ``forward`` method +# =============================================================== +# +# In most cases, this is all you're going to need, as long as you already know +# the structure of the input that your transform will expect. For example if +# you're just doing image classification, your transform will typically accept a +# single image as input, or a ``(img, label)`` input. So you can just hard-code +# your ``forward`` method to accept just that, e.g. +# +# .. code:: python +# +# class MyCustomTransform(torch.nn.Module): +# def forward(self, img, label): +# # Do some transformations +# return new_img, new_label +# +# .. note:: +# +# This means that if you have a custom transform that is already compatible +# with the V1 transforms (those in ``torchvision.transforms``), it will +# still work with the V2 transforms without any change! +# +# We will illustrate this more completely below with a typical detection case, +# where our samples are just images, bounding boxes and labels: + +class MyCustomTransform(torch.nn.Module): + def forward(self, img, bboxes, label): # we assume inputs are always structured like this + print( + f"I'm transforming an image of shape {img.shape} " + f"with bboxes = {bboxes}\n{label = }" + ) + # Do some transformations. Here, we're just passing though the input + return img, bboxes, label + + +transforms = v2.Compose([ + MyCustomTransform(), + v2.RandomResizedCrop((224, 224), antialias=True), + v2.RandomHorizontalFlip(p=1), + v2.Normalize(mean=[0, 0, 0], std=[1, 1, 1]) +]) + +H, W = 256, 256 +img = torch.rand(3, H, W) +bboxes = tv_tensors.BoundingBoxes( + torch.tensor([[0, 10, 10, 20], [50, 50, 70, 70]]), + format="XYXY", + canvas_size=(H, W) +) +label = 3 + +out_img, out_bboxes, out_label = transforms(img, bboxes, label) +# %% +print(f"Output image shape: {out_img.shape}\nout_bboxes = {out_bboxes}\n{out_label = }") +# %% +# .. note:: +# While working with TVTensor classes in your code, make sure to +# familiarize yourself with this section: +# :ref:`tv_tensor_unwrapping_behaviour` +# +# Supporting arbitrary input structures +# ===================================== +# +# In the section above, we have assumed that you already know the structure of +# your inputs and that you're OK with hard-coding this expected structure in +# your code. If you want your custom transforms to be as flexible as possible, +# this can be a bit limiting. +# +# A key feature of the builtin Torchvision V2 transforms is that they can accept +# arbitrary input structure and return the same structure as output (with +# transformed entries). For example, transforms can accept a single image, or a +# tuple of ``(img, label)``, or an arbitrary nested dictionary as input. Here's +# an example on the built-in transform :class:`~torchvision.transforms.v2.RandomHorizontalFlip`: + +structured_input = { + "img": img, + "annotations": (bboxes, label), + "something that will be ignored": (1, "hello"), + "another tensor that is ignored": torch.arange(10), +} +structured_output = v2.RandomHorizontalFlip(p=1)(structured_input) + +assert isinstance(structured_output, dict) +assert structured_output["something that will be ignored"] == (1, "hello") +assert (structured_output["another tensor that is ignored"] == torch.arange(10)).all() +print(f"The input bboxes are:\n{structured_input['annotations'][0]}") +print(f"The transformed bboxes are:\n{structured_output['annotations'][0]}") + +# %% +# Basics: override the `transform()` method +# ----------------------------------------- +# +# In order to support arbitrary inputs in your custom transform, you will need +# to inherit from :class:`~torchvision.transforms.v2.Transform` and override the +# `.transform()` method (not the `forward()` method!). Below is a basic example: + + +class MyCustomTransform(v2.Transform): + def transform(self, inpt: Any, params: Dict[str, Any]): + if type(inpt) == torch.Tensor: + print(f"I'm transforming an image of shape {inpt.shape}") + return inpt + 1 # dummy transformation + elif isinstance(inpt, tv_tensors.BoundingBoxes): + print(f"I'm transforming bounding boxes! {inpt.canvas_size = }") + return tv_tensors.wrap(inpt + 100, like=inpt) # dummy transformation + + +my_custom_transform = MyCustomTransform() +structured_output = my_custom_transform(structured_input) + +assert isinstance(structured_output, dict) +assert structured_output["something that will be ignored"] == (1, "hello") +assert (structured_output["another tensor that is ignored"] == torch.arange(10)).all() +print(f"The input bboxes are:\n{structured_input['annotations'][0]}") +print(f"The transformed bboxes are:\n{structured_output['annotations'][0]}") + +# %% +# An important thing to note is that when we call ``my_custom_transform`` on +# ``structured_input``, the input is flattened and then each individual part is +# passed to ``transform()``. That is, ``transform()``` receives the input image, +# then the bounding boxes, etc. Within ``transform()``, you can decide how to +# transform each input, based on their type. +# +# If you're curious why the other tensor (``torch.arange()``) didn't get passed +# to ``transform()``, see :ref:`this note ` for more +# details. +# +# Advanced: The ``make_params()`` method +# -------------------------------------- +# +# The ``make_params()`` method is called internally before calling +# ``transform()`` on each input. This is typically useful to generate random +# parameter values. In the example below, we use it to randomly apply the +# transformation with a probability of 0.5 + + +class MyRandomTransform(MyCustomTransform): + def __init__(self, p=0.5): + self.p = p + super().__init__() + + def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]: + apply_transform = (torch.rand(size=(1,)) < self.p).item() + params = dict(apply_transform=apply_transform) + return params + + def transform(self, inpt: Any, params: Dict[str, Any]): + if not params["apply_transform"]: + print("Not transforming anything!") + return inpt + else: + return super().transform(inpt, params) + + +my_random_transform = MyRandomTransform() + +torch.manual_seed(0) +_ = my_random_transform(structured_input) # transforms +_ = my_random_transform(structured_input) # doesn't transform + +# %% +# +# .. note:: +# +# It's important for such random parameter generation to happen within +# ``make_params()`` and not within ``transform()``, so that for a given +# transform call, the same RNG applies to all the inputs in the same way. If +# we were to perform the RNG within ``transform()``, we would risk e.g. +# transforming the image while *not* transforming the bounding boxes. +# +# The ``make_params()`` method takes the list of all the inputs as parameter +# (each of the elements in this list will later be pased to ``transform()``). +# You can use ``flat_inputs`` to e.g. figure out the dimensions on the input, +# using :func:`~torchvision.transforms.v2.query_chw` or +# :func:`~torchvision.transforms.v2.query_size`. +# +# ``make_params()`` should return a dict (or actually, anything you want) that +# will then be passed to ``transform()``. diff --git a/gallery/transforms/plot_custom_tv_tensors.py b/gallery/transforms/plot_custom_tv_tensors.py new file mode 100644 index 00000000000..9b113901461 --- /dev/null +++ b/gallery/transforms/plot_custom_tv_tensors.py @@ -0,0 +1,119 @@ +""" +==================================== +How to write your own TVTensor class +==================================== + +.. note:: + Try on `Colab `_ + or :ref:`go to the end ` to download the full example code. + +This guide is intended for advanced users and downstream library maintainers. We explain how to +write your own TVTensor class, and how to make it compatible with the built-in +Torchvision v2 transforms. Before continuing, make sure you have read +:ref:`sphx_glr_auto_examples_transforms_plot_tv_tensors.py`. +""" + +# %% +import torch +from torchvision import tv_tensors +from torchvision.transforms import v2 + +# %% +# We will create a very simple class that just inherits from the base +# :class:`~torchvision.tv_tensors.TVTensor` class. It will be enough to cover +# what you need to know to implement your more elaborate uses-cases. If you need +# to create a class that carries meta-data, take a look at how the +# :class:`~torchvision.tv_tensors.BoundingBoxes` class is `implemented +# `_. + + +class MyTVTensor(tv_tensors.TVTensor): + pass + + +my_dp = MyTVTensor([1, 2, 3]) +my_dp + +# %% +# Now that we have defined our custom TVTensor class, we want it to be +# compatible with the built-in torchvision transforms, and the functional API. +# For that, we need to implement a kernel which performs the core of the +# transformation, and then "hook" it to the functional that we want to support +# via :func:`~torchvision.transforms.v2.functional.register_kernel`. +# +# We illustrate this process below: we create a kernel for the "horizontal flip" +# operation of our MyTVTensor class, and register it to the functional API. + +from torchvision.transforms.v2 import functional as F + + +@F.register_kernel(functional="hflip", tv_tensor_cls=MyTVTensor) +def hflip_my_tv_tensor(my_dp, *args, **kwargs): + print("Flipping!") + out = my_dp.flip(-1) + return tv_tensors.wrap(out, like=my_dp) + + +# %% +# To understand why :func:`~torchvision.tv_tensors.wrap` is used, see +# :ref:`tv_tensor_unwrapping_behaviour`. Ignore the ``*args, **kwargs`` for now, +# we will explain it below in :ref:`param_forwarding`. +# +# .. note:: +# +# In our call to ``register_kernel`` above we used a string +# ``functional="hflip"`` to refer to the functional we want to hook into. We +# could also have used the functional *itself*, i.e. +# ``@register_kernel(functional=F.hflip, ...)``. +# +# Now that we have registered our kernel, we can call the functional API on a +# ``MyTVTensor`` instance: + +my_dp = MyTVTensor(torch.rand(3, 256, 256)) +_ = F.hflip(my_dp) + +# %% +# And we can also use the +# :class:`~torchvision.transforms.v2.RandomHorizontalFlip` transform, since it relies on :func:`~torchvision.transforms.v2.functional.hflip` internally: +t = v2.RandomHorizontalFlip(p=1) +_ = t(my_dp) + +# %% +# .. note:: +# +# We cannot register a kernel for a transform class, we can only register a +# kernel for a **functional**. The reason we can't register a transform +# class is because one transform may internally rely on more than one +# functional, so in general we can't register a single kernel for a given +# class. +# +# .. _param_forwarding: +# +# Parameter forwarding, and ensuring future compatibility of your kernels +# ----------------------------------------------------------------------- +# +# The functional API that you're hooking into is public and therefore +# **backward** compatible: we guarantee that the parameters of these functionals +# won't be removed or renamed without a proper deprecation cycle. However, we +# don't guarantee **forward** compatibility, and we may add new parameters in +# the future. +# +# Imagine that in a future version, Torchvision adds a new ``inplace`` parameter +# to its :func:`~torchvision.transforms.v2.functional.hflip` functional. If you +# already defined and registered your own kernel as + +def hflip_my_tv_tensor(my_dp): # noqa + print("Flipping!") + out = my_dp.flip(-1) + return tv_tensors.wrap(out, like=my_dp) + + +# %% +# then calling ``F.hflip(my_dp)`` will **fail**, because ``hflip`` will try to +# pass the new ``inplace`` parameter to your kernel, but your kernel doesn't +# accept it. +# +# For this reason, we recommend to always define your kernels with +# ``*args, **kwargs`` in their signature, as done above. This way, your kernel +# will be able to accept any new parameter that we may add in the future. +# (Technically, adding `**kwargs` only should be enough). diff --git a/gallery/transforms/plot_cutmix_mixup.py b/gallery/transforms/plot_cutmix_mixup.py new file mode 100644 index 00000000000..222be0ff359 --- /dev/null +++ b/gallery/transforms/plot_cutmix_mixup.py @@ -0,0 +1,150 @@ + +""" +=========================== +How to use CutMix and MixUp +=========================== + +.. note:: + Try on `Colab `_ + or :ref:`go to the end ` to download the full example code. + +:class:`~torchvision.transforms.v2.CutMix` and +:class:`~torchvision.transforms.v2.MixUp` are popular augmentation strategies +that can improve classification accuracy. + +These transforms are slightly different from the rest of the Torchvision +transforms, because they expect +**batches** of samples as input, not individual images. In this example we'll +explain how to use them: after the ``DataLoader``, or as part of a collation +function. +""" + +# %% +import torch +from torchvision.datasets import FakeData +from torchvision.transforms import v2 + + +NUM_CLASSES = 100 + +# %% +# Pre-processing pipeline +# ----------------------- +# +# We'll use a simple but typical image classification pipeline: + +preproc = v2.Compose([ + v2.PILToTensor(), + v2.RandomResizedCrop(size=(224, 224), antialias=True), + v2.RandomHorizontalFlip(p=0.5), + v2.ToDtype(torch.float32, scale=True), # to float32 in [0, 1] + v2.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), # typically from ImageNet +]) + +dataset = FakeData(size=1000, num_classes=NUM_CLASSES, transform=preproc) + +img, label = dataset[0] +print(f"{type(img) = }, {img.dtype = }, {img.shape = }, {label = }") + +# %% +# +# One important thing to note is that neither CutMix nor MixUp are part of this +# pre-processing pipeline. We'll add them a bit later once we define the +# DataLoader. Just as a refresher, this is what the DataLoader and training loop +# would look like if we weren't using CutMix or MixUp: + +from torch.utils.data import DataLoader + +dataloader = DataLoader(dataset, batch_size=4, shuffle=True) + +for images, labels in dataloader: + print(f"{images.shape = }, {labels.shape = }") + print(labels.dtype) + # + break +# %% + +# %% +# Where to use MixUp and CutMix +# ----------------------------- +# +# After the DataLoader +# ^^^^^^^^^^^^^^^^^^^^ +# +# Now let's add CutMix and MixUp. The simplest way to do this right after the +# DataLoader: the Dataloader has already batched the images and labels for us, +# and this is exactly what these transforms expect as input: + +dataloader = DataLoader(dataset, batch_size=4, shuffle=True) + +cutmix = v2.CutMix(num_classes=NUM_CLASSES) +mixup = v2.MixUp(num_classes=NUM_CLASSES) +cutmix_or_mixup = v2.RandomChoice([cutmix, mixup]) + +for images, labels in dataloader: + print(f"Before CutMix/MixUp: {images.shape = }, {labels.shape = }") + images, labels = cutmix_or_mixup(images, labels) + print(f"After CutMix/MixUp: {images.shape = }, {labels.shape = }") + + # + break +# %% +# +# Note how the labels were also transformed: we went from a batched label of +# shape (batch_size,) to a tensor of shape (batch_size, num_classes). The +# transformed labels can still be passed as-is to a loss function like +# :func:`torch.nn.functional.cross_entropy`. +# +# As part of the collation function +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# Passing the transforms after the DataLoader is the simplest way to use CutMix +# and MixUp, but one disadvantage is that it does not take advantage of the +# DataLoader multi-processing. For that, we can pass those transforms as part of +# the collation function (refer to the `PyTorch docs +# `_ to learn +# more about collation). + +from torch.utils.data import default_collate + + +def collate_fn(batch): + return cutmix_or_mixup(*default_collate(batch)) + + +dataloader = DataLoader(dataset, batch_size=4, shuffle=True, num_workers=2, collate_fn=collate_fn) + +for images, labels in dataloader: + print(f"{images.shape = }, {labels.shape = }") + # No need to call cutmix_or_mixup, it's already been called as part of the DataLoader! + # + break + +# %% +# Non-standard input format +# ------------------------- +# +# So far we've used a typical sample structure where we pass ``(images, +# labels)`` as inputs. MixUp and CutMix will magically work by default with most +# common sample structures: tuples where the second parameter is a tensor label, +# or dict with a "label[s]" key. Look at the documentation of the +# ``labels_getter`` parameter for more details. +# +# If your samples have a different structure, you can still use CutMix and MixUp +# by passing a callable to the ``labels_getter`` parameter. For example: + +batch = { + "imgs": torch.rand(4, 3, 224, 224), + "target": { + "classes": torch.randint(0, NUM_CLASSES, size=(4,)), + "some_other_key": "this is going to be passed-through" + } +} + + +def labels_getter(batch): + return batch["target"]["classes"] + + +out = v2.CutMix(num_classes=NUM_CLASSES, labels_getter=labels_getter)(batch) +print(f"{out['imgs'].shape = }, {out['target']['classes'].shape = }") diff --git a/gallery/transforms/plot_transforms_e2e.py b/gallery/transforms/plot_transforms_e2e.py new file mode 100644 index 00000000000..765d7ad51e5 --- /dev/null +++ b/gallery/transforms/plot_transforms_e2e.py @@ -0,0 +1,181 @@ +""" +=============================================================== +Transforms v2: End-to-end object detection/segmentation example +=============================================================== + +.. note:: + Try on `Colab `_ + or :ref:`go to the end ` to download the full example code. + +Object detection and segmentation tasks are natively supported: +``torchvision.transforms.v2`` enables jointly transforming images, videos, +bounding boxes, and masks. + +This example showcases an end-to-end instance segmentation training case using +Torchvision utils from ``torchvision.datasets``, ``torchvision.models`` and +``torchvision.transforms.v2``. Everything covered here can be applied similarly +to object detection or semantic segmentation tasks. +""" + +# %% +import pathlib + +import torch +import torch.utils.data + +from torchvision import models, datasets, tv_tensors +from torchvision.transforms import v2 + +torch.manual_seed(0) + +# This loads fake data for illustration purposes of this example. In practice, you'll have +# to replace this with the proper data. +# If you're trying to run that on Colab, you can download the assets and the +# helpers from https://github.com/pytorch/vision/tree/main/gallery/ +ROOT = pathlib.Path("../assets") / "coco" +IMAGES_PATH = str(ROOT / "images") +ANNOTATIONS_PATH = str(ROOT / "instances.json") +from helpers import plot + + +# %% +# Dataset preparation +# ------------------- +# +# We start off by loading the :class:`~torchvision.datasets.CocoDetection` dataset to have a look at what it currently +# returns. + +dataset = datasets.CocoDetection(IMAGES_PATH, ANNOTATIONS_PATH) + +sample = dataset[0] +img, target = sample +print(f"{type(img) = }\n{type(target) = }\n{type(target[0]) = }\n{target[0].keys() = }") + + +# %% +# Torchvision datasets preserve the data structure and types as it was intended +# by the datasets authors. So by default, the output structure may not always be +# compatible with the models or the transforms. +# +# To overcome that, we can use the +# :func:`~torchvision.datasets.wrap_dataset_for_transforms_v2` function. For +# :class:`~torchvision.datasets.CocoDetection`, this changes the target +# structure to a single dictionary of lists: + +dataset = datasets.wrap_dataset_for_transforms_v2(dataset, target_keys=("boxes", "labels", "masks")) + +sample = dataset[0] +img, target = sample +print(f"{type(img) = }\n{type(target) = }\n{target.keys() = }") +print(f"{type(target['boxes']) = }\n{type(target['labels']) = }\n{type(target['masks']) = }") + +# %% +# We used the ``target_keys`` parameter to specify the kind of output we're +# interested in. Our dataset now returns a target which is dict where the values +# are :ref:`TVTensors ` (all are :class:`torch.Tensor` +# subclasses). We're dropped all unncessary keys from the previous output, but +# if you need any of the original keys e.g. "image_id", you can still ask for +# it. +# +# .. note:: +# +# If you just want to do detection, you don't need and shouldn't pass +# "masks" in ``target_keys``: if masks are present in the sample, they will +# be transformed, slowing down your transformations unnecessarily. +# +# As baseline, let's have a look at a sample without transformations: + +plot([dataset[0], dataset[1]]) + + +# %% +# Transforms +# ---------- +# +# Let's now define our pre-processing transforms. All the transforms know how +# to handle images, bounding boxes and masks when relevant. +# +# Transforms are typically passed as the ``transforms`` parameter of the +# dataset so that they can leverage multi-processing from the +# :class:`torch.utils.data.DataLoader`. + +transforms = v2.Compose( + [ + v2.ToImage(), + v2.RandomPhotometricDistort(p=1), + v2.RandomZoomOut(fill={tv_tensors.Image: (123, 117, 104), "others": 0}), + v2.RandomIoUCrop(), + v2.RandomHorizontalFlip(p=1), + v2.SanitizeBoundingBoxes(), + v2.ToDtype(torch.float32, scale=True), + ] +) + +dataset = datasets.CocoDetection(IMAGES_PATH, ANNOTATIONS_PATH, transforms=transforms) +dataset = datasets.wrap_dataset_for_transforms_v2(dataset, target_keys=["boxes", "labels", "masks"]) + +# %% +# A few things are worth noting here: +# +# - We're converting the PIL image into a +# :class:`~torchvision.transforms.v2.Image` object. This isn't strictly +# necessary, but relying on Tensors (here: a Tensor subclass) will +# :ref:`generally be faster `. +# - We are calling :class:`~torchvision.transforms.v2.SanitizeBoundingBoxes` to +# make sure we remove degenerate bounding boxes, as well as their +# corresponding labels and masks. +# :class:`~torchvision.transforms.v2.SanitizeBoundingBoxes` should be placed +# at least once at the end of a detection pipeline; it is particularly +# critical if :class:`~torchvision.transforms.v2.RandomIoUCrop` was used. +# +# Let's look how the sample looks like with our augmentation pipeline in place: + +# sphinx_gallery_thumbnail_number = 2 +plot([dataset[0], dataset[1]]) + + +# %% +# We can see that the color of the images were distorted, zoomed in or out, and flipped. +# The bounding boxes and the masks were transformed accordingly. And without any further ado, we can start training. +# +# Data loading and training loop +# ------------------------------ +# +# Below we're using Mask-RCNN which is an instance segmentation model, but +# everything we've covered in this tutorial also applies to object detection and +# semantic segmentation tasks. + +data_loader = torch.utils.data.DataLoader( + dataset, + batch_size=2, + # We need a custom collation function here, since the object detection + # models expect a sequence of images and target dictionaries. The default + # collation function tries to torch.stack() the individual elements, + # which fails in general for object detection, because the number of bounding + # boxes varies between the images of the same batch. + collate_fn=lambda batch: tuple(zip(*batch)), +) + +model = models.get_model("maskrcnn_resnet50_fpn_v2", weights=None, weights_backbone=None).train() + +for imgs, targets in data_loader: + loss_dict = model(imgs, targets) + # Put your training logic here + + print(f"{[img.shape for img in imgs] = }") + print(f"{[type(target) for target in targets] = }") + for name, loss_val in loss_dict.items(): + print(f"{name:<20}{loss_val:.3f}") + +# %% +# Training References +# ------------------- +# +# From there, you can check out the `torchvision references +# `_ where you'll find +# the actual training scripts we use to train our models. +# +# **Disclaimer** The code in our references is more complex than what you'll +# need for your own use-cases: this is because we're supporting different +# backends (PIL, tensors, TVTensors) and different transforms namespaces (v1 and +# v2). So don't be afraid to simplify and only keep what you need. diff --git a/gallery/transforms/plot_transforms_getting_started.py b/gallery/transforms/plot_transforms_getting_started.py new file mode 100644 index 00000000000..2696a9e57e7 --- /dev/null +++ b/gallery/transforms/plot_transforms_getting_started.py @@ -0,0 +1,266 @@ +""" +================================== +Getting started with transforms v2 +================================== + +.. note:: + Try on `Colab `_ + or :ref:`go to the end ` to download the full example code. + +This example illustrates all of what you need to know to get started with the +new :mod:`torchvision.transforms.v2` API. We'll cover simple tasks like +image classification, and more advanced ones like object detection / +segmentation. +""" + +# %% +# First, a bit of setup +from pathlib import Path +import torch +import matplotlib.pyplot as plt +plt.rcParams["savefig.bbox"] = 'tight' + +from torchvision.transforms import v2 +from torchvision.io import decode_image + +torch.manual_seed(1) + +# If you're trying to run that on Colab, you can download the assets and the +# helpers from https://github.com/pytorch/vision/tree/main/gallery/ +from helpers import plot +img = decode_image(str(Path('../assets') / 'astronaut.jpg')) +print(f"{type(img) = }, {img.dtype = }, {img.shape = }") + +# %% +# The basics +# ---------- +# +# The Torchvision transforms behave like a regular :class:`torch.nn.Module` (in +# fact, most of them are): instantiate a transform, pass an input, get a +# transformed output: + +transform = v2.RandomCrop(size=(224, 224)) +out = transform(img) + +plot([img, out]) + +# %% +# I just want to do image classification +# -------------------------------------- +# +# If you just care about image classification, things are very simple. A basic +# classification pipeline may look like this: + +transforms = v2.Compose([ + v2.RandomResizedCrop(size=(224, 224), antialias=True), + v2.RandomHorizontalFlip(p=0.5), + v2.ToDtype(torch.float32, scale=True), + v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), +]) +out = transforms(img) + +plot([img, out]) + +# %% +# Such transformation pipeline is typically passed as the ``transform`` argument +# to the :ref:`Datasets `, e.g. ``ImageNet(..., +# transform=transforms)``. +# +# That's pretty much all there is. From there, read through our :ref:`main docs +# ` to learn more about recommended practices and conventions, or +# explore more :ref:`examples ` e.g. how to use augmentation +# transforms like :ref:`CutMix and MixUp +# `. +# +# .. note:: +# +# If you're already relying on the ``torchvision.transforms`` v1 API, +# we recommend to :ref:`switch to the new v2 transforms`. It's +# very easy: the v2 transforms are fully compatible with the v1 API, so you +# only need to change the import! +# +# Detection, Segmentation, Videos +# ------------------------------- +# +# The new Torchvision transforms in the ``torchvision.transforms.v2`` namespace +# support tasks beyond image classification: they can also transform bounding +# boxes, segmentation / detection masks, or videos. +# +# Let's briefly look at a detection example with bounding boxes. + +from torchvision import tv_tensors # we'll describe this a bit later, bare with us + +boxes = tv_tensors.BoundingBoxes( + [ + [15, 10, 370, 510], + [275, 340, 510, 510], + [130, 345, 210, 425] + ], + format="XYXY", canvas_size=img.shape[-2:]) + +transforms = v2.Compose([ + v2.RandomResizedCrop(size=(224, 224), antialias=True), + v2.RandomPhotometricDistort(p=1), + v2.RandomHorizontalFlip(p=1), +]) +out_img, out_boxes = transforms(img, boxes) +print(type(boxes), type(out_boxes)) + +plot([(img, boxes), (out_img, out_boxes)]) + +# %% +# +# The example above focuses on object detection. But if we had masks +# (:class:`torchvision.tv_tensors.Mask`) for object segmentation or semantic +# segmentation, or videos (:class:`torchvision.tv_tensors.Video`), we could have +# passed them to the transforms in exactly the same way. +# +# By now you likely have a few questions: what are these TVTensors, how do we +# use them, and what is the expected input/output of those transforms? We'll +# answer these in the next sections. + +# %% +# +# .. _what_are_tv_tensors: +# +# What are TVTensors? +# -------------------- +# +# TVTensors are :class:`torch.Tensor` subclasses. The available TVTensors are +# :class:`~torchvision.tv_tensors.Image`, +# :class:`~torchvision.tv_tensors.BoundingBoxes`, +# :class:`~torchvision.tv_tensors.Mask`, and +# :class:`~torchvision.tv_tensors.Video`. +# +# TVTensors look and feel just like regular tensors - they **are** tensors. +# Everything that is supported on a plain :class:`torch.Tensor` like ``.sum()`` +# or any ``torch.*`` operator will also work on a TVTensor: + +img_dp = tv_tensors.Image(torch.randint(0, 256, (3, 256, 256), dtype=torch.uint8)) + +print(f"{isinstance(img_dp, torch.Tensor) = }") +print(f"{img_dp.dtype = }, {img_dp.shape = }, {img_dp.sum() = }") + +# %% +# These TVTensor classes are at the core of the transforms: in order to +# transform a given input, the transforms first look at the **class** of the +# object, and dispatch to the appropriate implementation accordingly. +# +# You don't need to know much more about TVTensors at this point, but advanced +# users who want to learn more can refer to +# :ref:`sphx_glr_auto_examples_transforms_plot_tv_tensors.py`. +# +# What do I pass as input? +# ------------------------ +# +# Above, we've seen two examples: one where we passed a single image as input +# i.e. ``out = transforms(img)``, and one where we passed both an image and +# bounding boxes, i.e. ``out_img, out_boxes = transforms(img, boxes)``. +# +# In fact, transforms support **arbitrary input structures**. The input can be a +# single image, a tuple, an arbitrarily nested dictionary... pretty much +# anything. The same structure will be returned as output. Below, we use the +# same detection transforms, but pass a tuple (image, target_dict) as input and +# we're getting the same structure as output: + +target = { + "boxes": boxes, + "labels": torch.arange(boxes.shape[0]), + "this_is_ignored": ("arbitrary", {"structure": "!"}) +} + +# Re-using the transforms and definitions from above. +out_img, out_target = transforms(img, target) + +# sphinx_gallery_thumbnail_number = 4 +plot([(img, target["boxes"]), (out_img, out_target["boxes"])]) +print(f"{out_target['this_is_ignored']}") + +# %% +# We passed a tuple so we get a tuple back, and the second element is the +# tranformed target dict. Transforms don't really care about the structure of +# the input; as mentioned above, they only care about the **type** of the +# objects and transforms them accordingly. +# +# *Foreign* objects like strings or ints are simply passed-through. This can be +# useful e.g. if you want to associate a path with every single sample when +# debugging! +# +# .. _passthrough_heuristic: +# +# .. note:: +# +# **Disclaimer** This note is slightly advanced and can be safely skipped on +# a first read. +# +# Pure :class:`torch.Tensor` objects are, in general, treated as images (or +# as videos for video-specific transforms). Indeed, you may have noticed +# that in the code above we haven't used the +# :class:`~torchvision.tv_tensors.Image` class at all, and yet our images +# got transformed properly. Transforms follow the following logic to +# determine whether a pure Tensor should be treated as an image (or video), +# or just ignored: +# +# * If there is an :class:`~torchvision.tv_tensors.Image`, +# :class:`~torchvision.tv_tensors.Video`, +# or :class:`PIL.Image.Image` instance in the input, all other pure +# tensors are passed-through. +# * If there is no :class:`~torchvision.tv_tensors.Image` or +# :class:`~torchvision.tv_tensors.Video` instance, only the first pure +# :class:`torch.Tensor` will be transformed as image or video, while all +# others will be passed-through. Here "first" means "first in a depth-wise +# traversal". +# +# This is what happened in the detection example above: the first pure +# tensor was the image so it got transformed properly, and all other pure +# tensor instances like the ``labels`` were passed-through (although labels +# can still be transformed by some transforms like +# :class:`~torchvision.transforms.v2.SanitizeBoundingBoxes`!). +# +# .. _transforms_datasets_intercompatibility: +# +# Transforms and Datasets intercompatibility +# ------------------------------------------ +# +# Roughly speaking, the output of the datasets must correspond to the input of +# the transforms. How to do that depends on whether you're using the torchvision +# :ref:`built-in datatsets `, or your own custom datasets. +# +# Using built-in datasets +# ^^^^^^^^^^^^^^^^^^^^^^^ +# +# If you're just doing image classification, you don't need to do anything. Just +# use ``transform`` argument of the dataset e.g. ``ImageNet(..., +# transform=transforms)`` and you're good to go. +# +# Torchvision also supports datasets for object detection or segmentation like +# :class:`torchvision.datasets.CocoDetection`. Those datasets predate +# the existence of the :mod:`torchvision.transforms.v2` module and of the +# TVTensors, so they don't return TVTensors out of the box. +# +# An easy way to force those datasets to return TVTensors and to make them +# compatible with v2 transforms is to use the +# :func:`torchvision.datasets.wrap_dataset_for_transforms_v2` function: +# +# .. code-block:: python +# +# from torchvision.datasets import CocoDetection, wrap_dataset_for_transforms_v2 +# +# dataset = CocoDetection(..., transforms=my_transforms) +# dataset = wrap_dataset_for_transforms_v2(dataset) +# # Now the dataset returns TVTensors! +# +# Using your own datasets +# ^^^^^^^^^^^^^^^^^^^^^^^ +# +# If you have a custom dataset, then you'll need to convert your objects into +# the appropriate TVTensor classes. Creating TVTensor instances is very easy, +# refer to :ref:`tv_tensor_creation` for more details. +# +# There are two main places where you can implement that conversion logic: +# +# - At the end of the datasets's ``__getitem__`` method, before returning the +# sample (or by sub-classing the dataset). +# - As the very first step of your transforms pipeline +# +# Either way, the logic will depend on your specific dataset. diff --git a/gallery/transforms/plot_transforms_illustrations.py b/gallery/transforms/plot_transforms_illustrations.py new file mode 100644 index 00000000000..0c1f3b40021 --- /dev/null +++ b/gallery/transforms/plot_transforms_illustrations.py @@ -0,0 +1,331 @@ +""" +========================== +Illustration of transforms +========================== + +.. note:: + Try on `Colab `_ + or :ref:`go to the end ` to download the full example code. + +This example illustrates some of the various transforms available in :ref:`the +torchvision.transforms.v2 module `. +""" +# %% + +# sphinx_gallery_thumbnail_path = "../../gallery/assets/transforms_thumbnail.png" + +from PIL import Image +from pathlib import Path +import matplotlib.pyplot as plt + +import torch +from torchvision.transforms import v2 + +plt.rcParams["savefig.bbox"] = 'tight' + +# if you change the seed, make sure that the randomly-applied transforms +# properly show that the image can be both transformed and *not* transformed! +torch.manual_seed(0) + +# If you're trying to run that on Colab, you can download the assets and the +# helpers from https://github.com/pytorch/vision/tree/main/gallery/ +from helpers import plot +orig_img = Image.open(Path('../assets') / 'astronaut.jpg') + +# %% +# Geometric Transforms +# -------------------- +# Geometric image transformation refers to the process of altering the geometric properties of an image, +# such as its shape, size, orientation, or position. +# It involves applying mathematical operations to the image pixels or coordinates to achieve the desired transformation. +# +# Pad +# ~~~ +# The :class:`~torchvision.transforms.Pad` transform +# (see also :func:`~torchvision.transforms.functional.pad`) +# pads all image borders with some pixel values. +padded_imgs = [v2.Pad(padding=padding)(orig_img) for padding in (3, 10, 30, 50)] +plot([orig_img] + padded_imgs) + +# %% +# Resize +# ~~~~~~ +# The :class:`~torchvision.transforms.Resize` transform +# (see also :func:`~torchvision.transforms.functional.resize`) +# resizes an image. +resized_imgs = [v2.Resize(size=size)(orig_img) for size in (30, 50, 100, orig_img.size)] +plot([orig_img] + resized_imgs) + +# %% +# CenterCrop +# ~~~~~~~~~~ +# The :class:`~torchvision.transforms.CenterCrop` transform +# (see also :func:`~torchvision.transforms.functional.center_crop`) +# crops the given image at the center. +center_crops = [v2.CenterCrop(size=size)(orig_img) for size in (30, 50, 100, orig_img.size)] +plot([orig_img] + center_crops) + +# %% +# FiveCrop +# ~~~~~~~~ +# The :class:`~torchvision.transforms.FiveCrop` transform +# (see also :func:`~torchvision.transforms.functional.five_crop`) +# crops the given image into four corners and the central crop. +(top_left, top_right, bottom_left, bottom_right, center) = v2.FiveCrop(size=(100, 100))(orig_img) +plot([orig_img] + [top_left, top_right, bottom_left, bottom_right, center]) + +# %% +# RandomPerspective +# ~~~~~~~~~~~~~~~~~ +# The :class:`~torchvision.transforms.RandomPerspective` transform +# (see also :func:`~torchvision.transforms.functional.perspective`) +# performs random perspective transform on an image. +perspective_transformer = v2.RandomPerspective(distortion_scale=0.6, p=1.0) +perspective_imgs = [perspective_transformer(orig_img) for _ in range(4)] +plot([orig_img] + perspective_imgs) + +# %% +# RandomRotation +# ~~~~~~~~~~~~~~ +# The :class:`~torchvision.transforms.RandomRotation` transform +# (see also :func:`~torchvision.transforms.functional.rotate`) +# rotates an image with random angle. +rotater = v2.RandomRotation(degrees=(0, 180)) +rotated_imgs = [rotater(orig_img) for _ in range(4)] +plot([orig_img] + rotated_imgs) + +# %% +# RandomAffine +# ~~~~~~~~~~~~ +# The :class:`~torchvision.transforms.RandomAffine` transform +# (see also :func:`~torchvision.transforms.functional.affine`) +# performs random affine transform on an image. +affine_transfomer = v2.RandomAffine(degrees=(30, 70), translate=(0.1, 0.3), scale=(0.5, 0.75)) +affine_imgs = [affine_transfomer(orig_img) for _ in range(4)] +plot([orig_img] + affine_imgs) + +# %% +# ElasticTransform +# ~~~~~~~~~~~~~~~~ +# The :class:`~torchvision.transforms.ElasticTransform` transform +# (see also :func:`~torchvision.transforms.functional.elastic_transform`) +# Randomly transforms the morphology of objects in images and produces a +# see-through-water-like effect. +elastic_transformer = v2.ElasticTransform(alpha=250.0) +transformed_imgs = [elastic_transformer(orig_img) for _ in range(2)] +plot([orig_img] + transformed_imgs) + +# %% +# RandomCrop +# ~~~~~~~~~~ +# The :class:`~torchvision.transforms.RandomCrop` transform +# (see also :func:`~torchvision.transforms.functional.crop`) +# crops an image at a random location. +cropper = v2.RandomCrop(size=(128, 128)) +crops = [cropper(orig_img) for _ in range(4)] +plot([orig_img] + crops) + +# %% +# RandomResizedCrop +# ~~~~~~~~~~~~~~~~~ +# The :class:`~torchvision.transforms.RandomResizedCrop` transform +# (see also :func:`~torchvision.transforms.functional.resized_crop`) +# crops an image at a random location, and then resizes the crop to a given +# size. +resize_cropper = v2.RandomResizedCrop(size=(32, 32)) +resized_crops = [resize_cropper(orig_img) for _ in range(4)] +plot([orig_img] + resized_crops) + +# %% +# Photometric Transforms +# ---------------------- +# Photometric image transformation refers to the process of modifying the photometric properties of an image, +# such as its brightness, contrast, color, or tone. +# These transformations are applied to change the visual appearance of an image +# while preserving its geometric structure. +# +# Except :class:`~torchvision.transforms.Grayscale`, the following transforms are random, +# which means that the same transform +# instance will produce different result each time it transforms a given image. +# +# Grayscale +# ~~~~~~~~~ +# The :class:`~torchvision.transforms.Grayscale` transform +# (see also :func:`~torchvision.transforms.functional.to_grayscale`) +# converts an image to grayscale +gray_img = v2.Grayscale()(orig_img) +plot([orig_img, gray_img], cmap='gray') + +# %% +# ColorJitter +# ~~~~~~~~~~~ +# The :class:`~torchvision.transforms.ColorJitter` transform +# randomly changes the brightness, contrast, saturation, hue, and other properties of an image. +jitter = v2.ColorJitter(brightness=.5, hue=.3) +jittered_imgs = [jitter(orig_img) for _ in range(4)] +plot([orig_img] + jittered_imgs) + +# %% +# GaussianBlur +# ~~~~~~~~~~~~ +# The :class:`~torchvision.transforms.GaussianBlur` transform +# (see also :func:`~torchvision.transforms.functional.gaussian_blur`) +# performs gaussian blur transform on an image. +blurrer = v2.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5.)) +blurred_imgs = [blurrer(orig_img) for _ in range(4)] +plot([orig_img] + blurred_imgs) + +# %% +# RandomInvert +# ~~~~~~~~~~~~ +# The :class:`~torchvision.transforms.RandomInvert` transform +# (see also :func:`~torchvision.transforms.functional.invert`) +# randomly inverts the colors of the given image. +inverter = v2.RandomInvert() +invertered_imgs = [inverter(orig_img) for _ in range(4)] +plot([orig_img] + invertered_imgs) + +# %% +# RandomPosterize +# ~~~~~~~~~~~~~~~ +# The :class:`~torchvision.transforms.RandomPosterize` transform +# (see also :func:`~torchvision.transforms.functional.posterize`) +# randomly posterizes the image by reducing the number of bits +# of each color channel. +posterizer = v2.RandomPosterize(bits=2) +posterized_imgs = [posterizer(orig_img) for _ in range(4)] +plot([orig_img] + posterized_imgs) + +# %% +# RandomSolarize +# ~~~~~~~~~~~~~~ +# The :class:`~torchvision.transforms.RandomSolarize` transform +# (see also :func:`~torchvision.transforms.functional.solarize`) +# randomly solarizes the image by inverting all pixel values above +# the threshold. +solarizer = v2.RandomSolarize(threshold=192.0) +solarized_imgs = [solarizer(orig_img) for _ in range(4)] +plot([orig_img] + solarized_imgs) + +# %% +# RandomAdjustSharpness +# ~~~~~~~~~~~~~~~~~~~~~ +# The :class:`~torchvision.transforms.RandomAdjustSharpness` transform +# (see also :func:`~torchvision.transforms.functional.adjust_sharpness`) +# randomly adjusts the sharpness of the given image. +sharpness_adjuster = v2.RandomAdjustSharpness(sharpness_factor=2) +sharpened_imgs = [sharpness_adjuster(orig_img) for _ in range(4)] +plot([orig_img] + sharpened_imgs) + +# %% +# RandomAutocontrast +# ~~~~~~~~~~~~~~~~~~ +# The :class:`~torchvision.transforms.RandomAutocontrast` transform +# (see also :func:`~torchvision.transforms.functional.autocontrast`) +# randomly applies autocontrast to the given image. +autocontraster = v2.RandomAutocontrast() +autocontrasted_imgs = [autocontraster(orig_img) for _ in range(4)] +plot([orig_img] + autocontrasted_imgs) + +# %% +# RandomEqualize +# ~~~~~~~~~~~~~~ +# The :class:`~torchvision.transforms.RandomEqualize` transform +# (see also :func:`~torchvision.transforms.functional.equalize`) +# randomly equalizes the histogram of the given image. +equalizer = v2.RandomEqualize() +equalized_imgs = [equalizer(orig_img) for _ in range(4)] +plot([orig_img] + equalized_imgs) + +# %% +# JPEG +# ~~~~~~~~~~~~~~ +# The :class:`~torchvision.transforms.v2.JPEG` transform +# (see also :func:`~torchvision.transforms.v2.functional.jpeg`) +# applies JPEG compression to the given image with random +# degree of compression. +jpeg = v2.JPEG((5, 50)) +jpeg_imgs = [jpeg(orig_img) for _ in range(4)] +plot([orig_img] + jpeg_imgs) + +# %% +# Augmentation Transforms +# ----------------------- +# The following transforms are combinations of multiple transforms, +# either geometric or photometric, or both. +# +# AutoAugment +# ~~~~~~~~~~~ +# The :class:`~torchvision.transforms.AutoAugment` transform +# automatically augments data based on a given auto-augmentation policy. +# See :class:`~torchvision.transforms.AutoAugmentPolicy` for the available policies. +policies = [v2.AutoAugmentPolicy.CIFAR10, v2.AutoAugmentPolicy.IMAGENET, v2.AutoAugmentPolicy.SVHN] +augmenters = [v2.AutoAugment(policy) for policy in policies] +imgs = [ + [augmenter(orig_img) for _ in range(4)] + for augmenter in augmenters +] +row_title = [str(policy).split('.')[-1] for policy in policies] +plot([[orig_img] + row for row in imgs], row_title=row_title) + +# %% +# RandAugment +# ~~~~~~~~~~~ +# The :class:`~torchvision.transforms.RandAugment` is an alternate version of AutoAugment. +augmenter = v2.RandAugment() +imgs = [augmenter(orig_img) for _ in range(4)] +plot([orig_img] + imgs) + +# %% +# TrivialAugmentWide +# ~~~~~~~~~~~~~~~~~~ +# The :class:`~torchvision.transforms.TrivialAugmentWide` is an alternate implementation of AutoAugment. +# However, instead of transforming an image multiple times, it transforms an image only once +# using a random transform from a given list with a random strength number. +augmenter = v2.TrivialAugmentWide() +imgs = [augmenter(orig_img) for _ in range(4)] +plot([orig_img] + imgs) + +# %% +# AugMix +# ~~~~~~ +# The :class:`~torchvision.transforms.AugMix` transform interpolates between augmented versions of an image. +augmenter = v2.AugMix() +imgs = [augmenter(orig_img) for _ in range(4)] +plot([orig_img] + imgs) + +# %% +# Randomly-applied Transforms +# --------------------------- +# +# The following transforms are randomly-applied given a probability ``p``. That is, given ``p = 0.5``, +# there is a 50% chance to return the original image, and a 50% chance to return the transformed image, +# even when called with the same transform instance! +# +# RandomHorizontalFlip +# ~~~~~~~~~~~~~~~~~~~~ +# The :class:`~torchvision.transforms.RandomHorizontalFlip` transform +# (see also :func:`~torchvision.transforms.functional.hflip`) +# performs horizontal flip of an image, with a given probability. +hflipper = v2.RandomHorizontalFlip(p=0.5) +transformed_imgs = [hflipper(orig_img) for _ in range(4)] +plot([orig_img] + transformed_imgs) + +# %% +# RandomVerticalFlip +# ~~~~~~~~~~~~~~~~~~ +# The :class:`~torchvision.transforms.RandomVerticalFlip` transform +# (see also :func:`~torchvision.transforms.functional.vflip`) +# performs vertical flip of an image, with a given probability. +vflipper = v2.RandomVerticalFlip(p=0.5) +transformed_imgs = [vflipper(orig_img) for _ in range(4)] +plot([orig_img] + transformed_imgs) + +# %% +# RandomApply +# ~~~~~~~~~~~ +# The :class:`~torchvision.transforms.RandomApply` transform +# randomly applies a list of transforms, with a given probability. +applier = v2.RandomApply(transforms=[v2.RandomCrop(size=(64, 64))], p=0.5) +transformed_imgs = [applier(orig_img) for _ in range(4)] +plot([orig_img] + transformed_imgs) diff --git a/gallery/transforms/plot_tv_tensors.py b/gallery/transforms/plot_tv_tensors.py new file mode 100644 index 00000000000..5bce37aa374 --- /dev/null +++ b/gallery/transforms/plot_tv_tensors.py @@ -0,0 +1,224 @@ +""" +============= +TVTensors FAQ +============= + +.. note:: + Try on `Colab `_ + or :ref:`go to the end ` to download the full example code. + + +TVTensors are Tensor subclasses introduced together with +``torchvision.transforms.v2``. This example showcases what these TVTensors are +and how they behave. + +.. warning:: + + **Intended Audience** Unless you're writing your own transforms or your own TVTensors, you + probably do not need to read this guide. This is a fairly low-level topic + that most users will not need to worry about: you do not need to understand + the internals of TVTensors to efficiently rely on + ``torchvision.transforms.v2``. It may however be useful for advanced users + trying to implement their own datasets, transforms, or work directly with + the TVTensors. +""" + +# %% +import PIL.Image + +import torch +from torchvision import tv_tensors + + +# %% +# What are TVTensors? +# ------------------- +# +# TVTensors are zero-copy tensor subclasses: + +tensor = torch.rand(3, 256, 256) +image = tv_tensors.Image(tensor) + +assert isinstance(image, torch.Tensor) +assert image.data_ptr() == tensor.data_ptr() + +# %% +# Under the hood, they are needed in :mod:`torchvision.transforms.v2` to correctly dispatch to the appropriate function +# for the input data. +# +# :mod:`torchvision.tv_tensors` supports four types of TVTensors: +# +# * :class:`~torchvision.tv_tensors.Image` +# * :class:`~torchvision.tv_tensors.Video` +# * :class:`~torchvision.tv_tensors.BoundingBoxes` +# * :class:`~torchvision.tv_tensors.Mask` +# +# What can I do with a TVTensor? +# ------------------------------ +# +# TVTensors look and feel just like regular tensors - they **are** tensors. +# Everything that is supported on a plain :class:`torch.Tensor` like ``.sum()`` or +# any ``torch.*`` operator will also work on TVTensors. See +# :ref:`tv_tensor_unwrapping_behaviour` for a few gotchas. + +# %% +# .. _tv_tensor_creation: +# +# How do I construct a TVTensor? +# ------------------------------ +# +# Using the constructor +# ^^^^^^^^^^^^^^^^^^^^^ +# +# Each TVTensor class takes any tensor-like data that can be turned into a :class:`~torch.Tensor` + +image = tv_tensors.Image([[[[0, 1], [1, 0]]]]) +print(image) + + +# %% +# Similar to other PyTorch creations ops, the constructor also takes the ``dtype``, ``device``, and ``requires_grad`` +# parameters. + +float_image = tv_tensors.Image([[[0, 1], [1, 0]]], dtype=torch.float32, requires_grad=True) +print(float_image) + + +# %% +# In addition, :class:`~torchvision.tv_tensors.Image` and :class:`~torchvision.tv_tensors.Mask` can also take a +# :class:`PIL.Image.Image` directly: + +image = tv_tensors.Image(PIL.Image.open("../assets/astronaut.jpg")) +print(image.shape, image.dtype) + +# %% +# Some TVTensors require additional metadata to be passed in ordered to be constructed. For example, +# :class:`~torchvision.tv_tensors.BoundingBoxes` requires the coordinate format as well as the size of the +# corresponding image (``canvas_size``) alongside the actual values. These +# metadata are required to properly transform the bounding boxes. + +bboxes = tv_tensors.BoundingBoxes( + [[17, 16, 344, 495], [0, 10, 0, 10]], + format=tv_tensors.BoundingBoxFormat.XYXY, + canvas_size=image.shape[-2:] +) +print(bboxes) + +# %% +# Using ``tv_tensors.wrap()`` +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# You can also use the :func:`~torchvision.tv_tensors.wrap` function to wrap a tensor object +# into a TVTensor. This is useful when you already have an object of the +# desired type, which typically happens when writing transforms: you just want +# to wrap the output like the input. + +new_bboxes = torch.tensor([0, 20, 30, 40]) +new_bboxes = tv_tensors.wrap(new_bboxes, like=bboxes) +assert isinstance(new_bboxes, tv_tensors.BoundingBoxes) +assert new_bboxes.canvas_size == bboxes.canvas_size + +# %% +# The metadata of ``new_bboxes`` is the same as ``bboxes``, but you could pass +# it as a parameter to override it. +# +# .. _tv_tensor_unwrapping_behaviour: +# +# I had a TVTensor but now I have a Tensor. Help! +# ----------------------------------------------- +# +# By default, operations on :class:`~torchvision.tv_tensors.TVTensor` objects +# will return a pure Tensor: + + +assert isinstance(bboxes, tv_tensors.BoundingBoxes) + +# Shift bboxes by 3 pixels in both H and W +new_bboxes = bboxes + 3 + +assert isinstance(new_bboxes, torch.Tensor) +assert not isinstance(new_bboxes, tv_tensors.BoundingBoxes) + +# %% +# .. note:: +# +# This behavior only affects native ``torch`` operations. If you are using +# the built-in ``torchvision`` transforms or functionals, you will always get +# as output the same type that you passed as input (pure ``Tensor`` or +# ``TVTensor``). + +# %% +# But I want a TVTensor back! +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# You can re-wrap a pure tensor into a TVTensor by just calling the TVTensor +# constructor, or by using the :func:`~torchvision.tv_tensors.wrap` function +# (see more details above in :ref:`tv_tensor_creation`): + +new_bboxes = bboxes + 3 +new_bboxes = tv_tensors.wrap(new_bboxes, like=bboxes) +assert isinstance(new_bboxes, tv_tensors.BoundingBoxes) + +# %% +# Alternatively, you can use the :func:`~torchvision.tv_tensors.set_return_type` +# as a global config setting for the whole program, or as a context manager +# (read its docs to learn more about caveats): + +with tv_tensors.set_return_type("TVTensor"): + new_bboxes = bboxes + 3 +assert isinstance(new_bboxes, tv_tensors.BoundingBoxes) + +# %% +# Why is this happening? +# ^^^^^^^^^^^^^^^^^^^^^^ +# +# **For performance reasons**. :class:`~torchvision.tv_tensors.TVTensor` +# classes are Tensor subclasses, so any operation involving a +# :class:`~torchvision.tv_tensors.TVTensor` object will go through the +# `__torch_function__ +# `_ +# protocol. This induces a small overhead, which we want to avoid when possible. +# This doesn't matter for built-in ``torchvision`` transforms because we can +# avoid the overhead there, but it could be a problem in your model's +# ``forward``. +# +# **The alternative isn't much better anyway.** For every operation where +# preserving the :class:`~torchvision.tv_tensors.TVTensor` type makes +# sense, there are just as many operations where returning a pure Tensor is +# preferable: for example, is ``img.sum()`` still an :class:`~torchvision.tv_tensors.Image`? +# If we were to preserve :class:`~torchvision.tv_tensors.TVTensor` types all +# the way, even model's logits or the output of the loss function would end up +# being of type :class:`~torchvision.tv_tensors.Image`, and surely that's not +# desirable. +# +# .. note:: +# +# This behaviour is something we're actively seeking feedback on. If you find this surprising or if you +# have any suggestions on how to better support your use-cases, please reach out to us via this issue: +# https://github.com/pytorch/vision/issues/7319 +# +# Exceptions +# ^^^^^^^^^^ +# +# There are a few exceptions to this "unwrapping" rule: +# :meth:`~torch.Tensor.clone`, :meth:`~torch.Tensor.to`, +# :meth:`torch.Tensor.detach`, and :meth:`~torch.Tensor.requires_grad_` retain +# the TVTensor type. +# +# Inplace operations on TVTensors like ``obj.add_()`` will preserve the type of +# ``obj``. However, the **returned** value of inplace operations will be a pure +# tensor: + +image = tv_tensors.Image([[[0, 1], [1, 0]]]) + +new_image = image.add_(1).mul_(2) + +# image got transformed in-place and is still a TVTensor Image, but new_image +# is a Tensor. They share the same underlying data and they're equal, just +# different classes. +assert isinstance(image, tv_tensors.Image) +print(image) + +assert isinstance(new_image, torch.Tensor) and not isinstance(new_image, tv_tensors.Image) +assert (new_image == image).all() +assert new_image.data_ptr() == image.data_ptr() diff --git a/hubconf.py b/hubconf.py index f43f922e89c..637827127ca 100644 --- a/hubconf.py +++ b/hubconf.py @@ -1,14 +1,85 @@ # Optional list of dependencies required by the package -dependencies = ['torch'] +dependencies = ["torch"] +from torchvision.models import get_model_weights, get_weight from torchvision.models.alexnet import alexnet -from torchvision.models.densenet import densenet121, densenet169, densenet201, densenet161 +from torchvision.models.convnext import convnext_base, convnext_large, convnext_small, convnext_tiny +from torchvision.models.densenet import densenet121, densenet161, densenet169, densenet201 +from torchvision.models.efficientnet import ( + efficientnet_b0, + efficientnet_b1, + efficientnet_b2, + efficientnet_b3, + efficientnet_b4, + efficientnet_b5, + efficientnet_b6, + efficientnet_b7, + efficientnet_v2_l, + efficientnet_v2_m, + efficientnet_v2_s, +) +from torchvision.models.googlenet import googlenet from torchvision.models.inception import inception_v3 -from torchvision.models.resnet import resnet18, resnet34, resnet50, resnet101, resnet152,\ - resnext50_32x4d, resnext101_32x8d, wide_resnet50_2, wide_resnet101_2 +from torchvision.models.maxvit import maxvit_t +from torchvision.models.mnasnet import mnasnet0_5, mnasnet0_75, mnasnet1_0, mnasnet1_3 +from torchvision.models.mobilenetv2 import mobilenet_v2 +from torchvision.models.mobilenetv3 import mobilenet_v3_large, mobilenet_v3_small +from torchvision.models.optical_flow import raft_large, raft_small +from torchvision.models.regnet import ( + regnet_x_16gf, + regnet_x_1_6gf, + regnet_x_32gf, + regnet_x_3_2gf, + regnet_x_400mf, + regnet_x_800mf, + regnet_x_8gf, + regnet_y_128gf, + regnet_y_16gf, + regnet_y_1_6gf, + regnet_y_32gf, + regnet_y_3_2gf, + regnet_y_400mf, + regnet_y_800mf, + regnet_y_8gf, +) +from torchvision.models.resnet import ( + resnet101, + resnet152, + resnet18, + resnet34, + resnet50, + resnext101_32x8d, + resnext101_64x4d, + resnext50_32x4d, + wide_resnet101_2, + wide_resnet50_2, +) +from torchvision.models.segmentation import ( + deeplabv3_mobilenet_v3_large, + deeplabv3_resnet101, + deeplabv3_resnet50, + fcn_resnet101, + fcn_resnet50, + lraspp_mobilenet_v3_large, +) +from torchvision.models.shufflenetv2 import ( + shufflenet_v2_x0_5, + shufflenet_v2_x1_0, + shufflenet_v2_x1_5, + shufflenet_v2_x2_0, +) from torchvision.models.squeezenet import squeezenet1_0, squeezenet1_1 -from torchvision.models.vgg import vgg11, vgg13, vgg16, vgg19, vgg11_bn, vgg13_bn, vgg16_bn, vgg19_bn -from torchvision.models.segmentation import fcn_resnet101, deeplabv3_resnet101 -from torchvision.models.googlenet import googlenet -from torchvision.models.shufflenetv2 import shufflenet_v2_x0_5, shufflenet_v2_x1_0 -from torchvision.models.mobilenet import mobilenet_v2 +from torchvision.models.swin_transformer import swin_b, swin_s, swin_t, swin_v2_b, swin_v2_s, swin_v2_t +from torchvision.models.vgg import vgg11, vgg11_bn, vgg13, vgg13_bn, vgg16, vgg16_bn, vgg19, vgg19_bn +from torchvision.models.video import ( + mc3_18, + mvit_v1_b, + mvit_v2_s, + r2plus1d_18, + r3d_18, + s3d, + swin3d_b, + swin3d_s, + swin3d_t, +) +from torchvision.models.vision_transformer import vit_b_16, vit_b_32, vit_h_14, vit_l_16, vit_l_32 diff --git a/ios/CMakeLists.txt b/ios/CMakeLists.txt new file mode 100644 index 00000000000..4201240a427 --- /dev/null +++ b/ios/CMakeLists.txt @@ -0,0 +1,23 @@ +cmake_minimum_required(VERSION 3.4.1) +set(TARGET torchvision_ops) +project(${TARGET} CXX) +set(CMAKE_CXX_STANDARD 17) +set(LIBTORCH_HEADER_ROOT ${LIBTORCH_HEADER_ROOT}) +set(LIBRARY_OUTPUT_PATH ../lib) + +file(GLOB VISION_SRCS + ../torchvision/csrc/ops/cpu/*.h + ../torchvision/csrc/ops/cpu/*.cpp + ../torchvision/csrc/ops/*.h + ../torchvision/csrc/ops/*.cpp) + +add_library(${TARGET} STATIC + ${VISION_SRCS} +) + +file(GLOB PYTORCH_HEADERS "${LIBTORCH_HEADER_ROOT}") +file(GLOB PYTORCH_HEADERS_CSRC "${LIBTORCH_HEADER_ROOT}/torch/csrc/api/include") +target_include_directories(${TARGET} PRIVATE + ${PYTORCH_HEADERS} + ${PYTORCH_HEADERS_CSRC} +) diff --git a/ios/LibTorchvision.podspec b/ios/LibTorchvision.podspec new file mode 100644 index 00000000000..b88fb70ac40 --- /dev/null +++ b/ios/LibTorchvision.podspec @@ -0,0 +1,24 @@ +pytorch_version = '2.0.0' + +Pod::Spec.new do |s| + s.name = 'LibTorchvision' + s.version = '0.15.1' + s.authors = 'PyTorch Team' + s.license = { :type => 'BSD' } + s.homepage = 'https://github.com/pytorch/vision' + s.source = { :http => "https://ossci-ios.s3.amazonaws.com/libtorchvision_ops_ios_#{s.version}.zip" } + s.summary = '"The C++ library of TorchVision ops for iOS' + s.description = <<-DESC + The C++ library of TorchVision ops for iOS. + This version (#{s.version}) requires the installation of LibTorch #{pytorch_version} or LibTorch-Lite #{pytorch_version}. + DESC + s.ios.deployment_target = '12.0' + s.vendored_libraries = 'install/lib/*.a' + s.user_target_xcconfig = { + 'VALID_ARCHS' => 'x86_64 arm64', + 'OTHER_LDFLAGS' => '$(inherited) -force_load "$(PODS_ROOT)/LibTorchvision/install/lib/libtorchvision_ops.a"', + 'CLANG_CXX_LANGUAGE_STANDARD' => 'c++14', + 'CLANG_CXX_LIBRARY' => 'libc++' + } + s.library = ['c++', 'stdc++'] +end diff --git a/ios/README.md b/ios/README.md new file mode 100644 index 00000000000..0b50245f1ee --- /dev/null +++ b/ios/README.md @@ -0,0 +1,3 @@ +## Status + +The iOS demo of TorchVision is currently unmaintained, untested and likely out-of-date. diff --git a/ios/VisionTestApp/VisionTestApp.xcodeproj/project.pbxproj b/ios/VisionTestApp/VisionTestApp.xcodeproj/project.pbxproj new file mode 100644 index 00000000000..1c25d9d350e --- /dev/null +++ b/ios/VisionTestApp/VisionTestApp.xcodeproj/project.pbxproj @@ -0,0 +1,411 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 50; + objects = { + +/* Begin PBXBuildFile section */ + 0C12EF7626163B7600B66C86 /* frcnn_mnetv3.pt in Resources */ = {isa = PBXBuildFile; fileRef = 0C12EF7526163B7600B66C86 /* frcnn_mnetv3.pt */; }; + 0CDCAE46274ED8FA006F9077 /* CoreML.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 0CDCAE45274ED8FA006F9077 /* CoreML.framework */; }; + 0CDCAE48274ED902006F9077 /* MetalPerformanceShaders.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 0CDCAE47274ED902006F9077 /* MetalPerformanceShaders.framework */; }; + 0CDCAE4A274ED909006F9077 /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 0CDCAE49274ED909006F9077 /* Accelerate.framework */; }; + 0CEB0AC026151A8800F1F7D5 /* AppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 0CEB0ABF26151A8800F1F7D5 /* AppDelegate.m */; }; + 0CEB0AC626151A8800F1F7D5 /* ViewController.mm in Sources */ = {isa = PBXBuildFile; fileRef = 0CEB0AC526151A8800F1F7D5 /* ViewController.mm */; }; + 0CEB0AC926151A8800F1F7D5 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 0CEB0AC726151A8800F1F7D5 /* Main.storyboard */; }; + 0CEB0ACB26151A8900F1F7D5 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0CEB0ACA26151A8900F1F7D5 /* Assets.xcassets */; }; + 0CEB0ACE26151A8900F1F7D5 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 0CEB0ACC26151A8900F1F7D5 /* LaunchScreen.storyboard */; }; + 0CEB0AD126151A8900F1F7D5 /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = 0CEB0AD026151A8900F1F7D5 /* main.m */; }; + 0CEB0B3A26152ED900F1F7D5 /* ModelRunner.mm in Sources */ = {isa = PBXBuildFile; fileRef = 0CEB0B3926152ED900F1F7D5 /* ModelRunner.mm */; }; +/* End PBXBuildFile section */ + +/* Begin PBXFileReference section */ + 0C12EF7526163B7600B66C86 /* frcnn_mnetv3.pt */ = {isa = PBXFileReference; lastKnownFileType = file; path = frcnn_mnetv3.pt; sourceTree = ""; }; + 0CDCAE45274ED8FA006F9077 /* CoreML.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreML.framework; path = System/Library/Frameworks/CoreML.framework; sourceTree = SDKROOT; }; + 0CDCAE47274ED902006F9077 /* MetalPerformanceShaders.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = MetalPerformanceShaders.framework; path = System/Library/Frameworks/MetalPerformanceShaders.framework; sourceTree = SDKROOT; }; + 0CDCAE49274ED909006F9077 /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; }; + 0CEB0ABB26151A8800F1F7D5 /* VisionTestApp.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = VisionTestApp.app; sourceTree = BUILT_PRODUCTS_DIR; }; + 0CEB0ABE26151A8800F1F7D5 /* AppDelegate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = ""; }; + 0CEB0ABF26151A8800F1F7D5 /* AppDelegate.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = AppDelegate.m; sourceTree = ""; }; + 0CEB0AC426151A8800F1F7D5 /* ViewController.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ViewController.h; sourceTree = ""; }; + 0CEB0AC526151A8800F1F7D5 /* ViewController.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = ViewController.mm; sourceTree = ""; }; + 0CEB0AC826151A8800F1F7D5 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = ""; }; + 0CEB0ACA26151A8900F1F7D5 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; + 0CEB0ACD26151A8900F1F7D5 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = ""; }; + 0CEB0ACF26151A8900F1F7D5 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; + 0CEB0AD026151A8900F1F7D5 /* main.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = main.m; sourceTree = ""; }; + 0CEB0B3826152ED900F1F7D5 /* ModelRunner.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ModelRunner.h; sourceTree = ""; }; + 0CEB0B3926152ED900F1F7D5 /* ModelRunner.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = ModelRunner.mm; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 0CEB0AB826151A8800F1F7D5 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 0CDCAE4A274ED909006F9077 /* Accelerate.framework in Frameworks */, + 0CDCAE48274ED902006F9077 /* MetalPerformanceShaders.framework in Frameworks */, + 0CDCAE46274ED8FA006F9077 /* CoreML.framework in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 0C12EF6F26163A4C00B66C86 /* Frameworks */ = { + isa = PBXGroup; + children = ( + 0CDCAE49274ED909006F9077 /* Accelerate.framework */, + 0CDCAE47274ED902006F9077 /* MetalPerformanceShaders.framework */, + 0CDCAE45274ED8FA006F9077 /* CoreML.framework */, + ); + name = Frameworks; + sourceTree = ""; + }; + 0CEB0AB226151A8800F1F7D5 = { + isa = PBXGroup; + children = ( + 0CEB0ABD26151A8800F1F7D5 /* VisionTestApp */, + 0CEB0ABC26151A8800F1F7D5 /* Products */, + 0C12EF6F26163A4C00B66C86 /* Frameworks */, + ); + sourceTree = ""; + }; + 0CEB0ABC26151A8800F1F7D5 /* Products */ = { + isa = PBXGroup; + children = ( + 0CEB0ABB26151A8800F1F7D5 /* VisionTestApp.app */, + ); + name = Products; + sourceTree = ""; + }; + 0CEB0ABD26151A8800F1F7D5 /* VisionTestApp */ = { + isa = PBXGroup; + children = ( + 0CEB0B3826152ED900F1F7D5 /* ModelRunner.h */, + 0CEB0B3926152ED900F1F7D5 /* ModelRunner.mm */, + 0CEB0ABE26151A8800F1F7D5 /* AppDelegate.h */, + 0CEB0ABF26151A8800F1F7D5 /* AppDelegate.m */, + 0CEB0AC426151A8800F1F7D5 /* ViewController.h */, + 0CEB0AC526151A8800F1F7D5 /* ViewController.mm */, + 0CEB0AC726151A8800F1F7D5 /* Main.storyboard */, + 0CEB0ACA26151A8900F1F7D5 /* Assets.xcassets */, + 0CEB0ACC26151A8900F1F7D5 /* LaunchScreen.storyboard */, + 0CEB0ACF26151A8900F1F7D5 /* Info.plist */, + 0CEB0AD026151A8900F1F7D5 /* main.m */, + 0C12EF7526163B7600B66C86 /* frcnn_mnetv3.pt */, + ); + path = VisionTestApp; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + 0CEB0ABA26151A8800F1F7D5 /* VisionTestApp */ = { + isa = PBXNativeTarget; + buildConfigurationList = 0CEB0AEA26151A8900F1F7D5 /* Build configuration list for PBXNativeTarget "VisionTestApp" */; + buildPhases = ( + 0CEB0AB726151A8800F1F7D5 /* Sources */, + 0CEB0AB826151A8800F1F7D5 /* Frameworks */, + 0CEB0AB926151A8800F1F7D5 /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = VisionTestApp; + productName = VisionTestApp; + productReference = 0CEB0ABB26151A8800F1F7D5 /* VisionTestApp.app */; + productType = "com.apple.product-type.application"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 0CEB0AB326151A8800F1F7D5 /* Project object */ = { + isa = PBXProject; + attributes = { + LastUpgradeCheck = 1240; + TargetAttributes = { + 0CEB0ABA26151A8800F1F7D5 = { + CreatedOnToolsVersion = 12.4; + }; + }; + }; + buildConfigurationList = 0CEB0AB626151A8800F1F7D5 /* Build configuration list for PBXProject "VisionTestApp" */; + compatibilityVersion = "Xcode 9.3"; + developmentRegion = en; + hasScannedForEncodings = 0; + knownRegions = ( + en, + Base, + ); + mainGroup = 0CEB0AB226151A8800F1F7D5; + productRefGroup = 0CEB0ABC26151A8800F1F7D5 /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 0CEB0ABA26151A8800F1F7D5 /* VisionTestApp */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXResourcesBuildPhase section */ + 0CEB0AB926151A8800F1F7D5 /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 0CEB0ACE26151A8900F1F7D5 /* LaunchScreen.storyboard in Resources */, + 0C12EF7626163B7600B66C86 /* frcnn_mnetv3.pt in Resources */, + 0CEB0ACB26151A8900F1F7D5 /* Assets.xcassets in Resources */, + 0CEB0AC926151A8800F1F7D5 /* Main.storyboard in Resources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXResourcesBuildPhase section */ + +/* Begin PBXSourcesBuildPhase section */ + 0CEB0AB726151A8800F1F7D5 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 0CEB0AC626151A8800F1F7D5 /* ViewController.mm in Sources */, + 0CEB0AC026151A8800F1F7D5 /* AppDelegate.m in Sources */, + 0CEB0AD126151A8900F1F7D5 /* main.m in Sources */, + 0CEB0B3A26152ED900F1F7D5 /* ModelRunner.mm in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin PBXVariantGroup section */ + 0CEB0AC726151A8800F1F7D5 /* Main.storyboard */ = { + isa = PBXVariantGroup; + children = ( + 0CEB0AC826151A8800F1F7D5 /* Base */, + ); + name = Main.storyboard; + sourceTree = ""; + }; + 0CEB0ACC26151A8900F1F7D5 /* LaunchScreen.storyboard */ = { + isa = PBXVariantGroup; + children = ( + 0CEB0ACD26151A8900F1F7D5 /* Base */, + ); + name = LaunchScreen.storyboard; + sourceTree = ""; + }; +/* End PBXVariantGroup section */ + +/* Begin XCBuildConfiguration section */ + 0CEB0AE826151A8900F1F7D5 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_BITCODE = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + HEADER_SEARCH_PATHS = ""; + IPHONEOS_DEPLOYMENT_TARGET = 13.0; + LIBRARY_SEARCH_PATHS = ""; + MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; + MTL_FAST_MATH = YES; + ONLY_ACTIVE_ARCH = YES; + OTHER_LDFLAGS = ""; + SDKROOT = iphoneos; + }; + name = Debug; + }; + 0CEB0AE926151A8900F1F7D5 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_BITCODE = NO; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + HEADER_SEARCH_PATHS = ""; + IPHONEOS_DEPLOYMENT_TARGET = 13.0; + LIBRARY_SEARCH_PATHS = ""; + MTL_ENABLE_DEBUG_INFO = NO; + MTL_FAST_MATH = YES; + OTHER_LDFLAGS = ""; + SDKROOT = iphoneos; + VALIDATE_PRODUCT = YES; + }; + name = Release; + }; + 0CEB0AEB26151A8900F1F7D5 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; + CODE_SIGN_STYLE = Automatic; + ENABLE_BITCODE = NO; + HEADER_SEARCH_PATHS = ( + "$(inherited)", + "$(PROJECT_DIR)/install/include", + ); + INFOPLIST_FILE = VisionTestApp/Info.plist; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + ); + LIBRARY_SEARCH_PATHS = ( + "$(inherited)", + "$(PROJECT_DIR)/VisionTestApp", + "$(PROJECT_DIR)", + "$(PROJECT_DIR)/install/lib", + ); + OTHER_LDFLAGS = ( + "$(inherited)", + "-ObjC", + "-all_load", + ); + PRODUCT_BUNDLE_IDENTIFIER = com.pytorch.ios.VisionTestApp.VisionTestApp; + PRODUCT_NAME = "$(TARGET_NAME)"; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Debug; + }; + 0CEB0AEC26151A8900F1F7D5 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; + CODE_SIGN_STYLE = Automatic; + ENABLE_BITCODE = NO; + HEADER_SEARCH_PATHS = ( + "$(inherited)", + "$(PROJECT_DIR)/install/include", + ); + INFOPLIST_FILE = VisionTestApp/Info.plist; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + ); + LIBRARY_SEARCH_PATHS = ( + "$(inherited)", + "$(PROJECT_DIR)/VisionTestApp", + "$(PROJECT_DIR)", + "$(PROJECT_DIR)/install/lib", + ); + OTHER_LDFLAGS = ( + "$(inherited)", + "-ObjC", + "-all_load", + ); + PRODUCT_BUNDLE_IDENTIFIER = com.pytorch.ios.VisionTestApp.VisionTestApp; + PRODUCT_NAME = "$(TARGET_NAME)"; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 0CEB0AB626151A8800F1F7D5 /* Build configuration list for PBXProject "VisionTestApp" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 0CEB0AE826151A8900F1F7D5 /* Debug */, + 0CEB0AE926151A8900F1F7D5 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 0CEB0AEA26151A8900F1F7D5 /* Build configuration list for PBXNativeTarget "VisionTestApp" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 0CEB0AEB26151A8900F1F7D5 /* Debug */, + 0CEB0AEC26151A8900F1F7D5 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 0CEB0AB326151A8800F1F7D5 /* Project object */; +} diff --git a/ios/VisionTestApp/VisionTestApp.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/ios/VisionTestApp/VisionTestApp.xcodeproj/project.xcworkspace/contents.xcworkspacedata new file mode 100644 index 00000000000..919434a6254 --- /dev/null +++ b/ios/VisionTestApp/VisionTestApp.xcodeproj/project.xcworkspace/contents.xcworkspacedata @@ -0,0 +1,7 @@ + + + + + diff --git a/ios/VisionTestApp/VisionTestApp.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/ios/VisionTestApp/VisionTestApp.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist new file mode 100644 index 00000000000..18d981003d6 --- /dev/null +++ b/ios/VisionTestApp/VisionTestApp.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist @@ -0,0 +1,8 @@ + + + + + IDEDidComputeMac32BitWarning + + + diff --git a/ios/VisionTestApp/VisionTestApp/AppDelegate.h b/ios/VisionTestApp/VisionTestApp/AppDelegate.h new file mode 100644 index 00000000000..27716f4b6ab --- /dev/null +++ b/ios/VisionTestApp/VisionTestApp/AppDelegate.h @@ -0,0 +1,7 @@ +#import + +@interface AppDelegate : UIResponder + +@property(strong, nonatomic) UIWindow* window; + +@end diff --git a/ios/VisionTestApp/VisionTestApp/AppDelegate.m b/ios/VisionTestApp/VisionTestApp/AppDelegate.m new file mode 100644 index 00000000000..a20d3987c80 --- /dev/null +++ b/ios/VisionTestApp/VisionTestApp/AppDelegate.m @@ -0,0 +1,44 @@ + +#import "AppDelegate.h" + +@interface AppDelegate () + +@end + +@implementation AppDelegate + + +- (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions { + // Override point for customization after application launch. + return YES; +} + + +- (void)applicationWillResignActive:(UIApplication *)application { + // Sent when the application is about to move from active to inactive state. This can occur for certain types of temporary interruptions (such as an incoming phone call or SMS message) or when the user quits the application and it begins the transition to the background state. + // Use this method to pause ongoing tasks, disable timers, and invalidate graphics rendering callbacks. Games should use this method to pause the game. +} + + +- (void)applicationDidEnterBackground:(UIApplication *)application { + // Use this method to release shared resources, save user data, invalidate timers, and store enough application state information to restore your application to its current state in case it is terminated later. + // If your application supports background execution, this method is called instead of applicationWillTerminate: when the user quits. +} + + +- (void)applicationWillEnterForeground:(UIApplication *)application { + // Called as part of the transition from the background to the active state; here you can undo many of the changes made on entering the background. +} + + +- (void)applicationDidBecomeActive:(UIApplication *)application { + // Restart any tasks that were paused (or not yet started) while the application was inactive. If the application was previously in the background, optionally refresh the user interface. +} + + +- (void)applicationWillTerminate:(UIApplication *)application { + // Called when the application is about to terminate. Save data if appropriate. See also applicationDidEnterBackground:. +} + + +@end diff --git a/ios/VisionTestApp/VisionTestApp/Assets.xcassets/AccentColor.colorset/Contents.json b/ios/VisionTestApp/VisionTestApp/Assets.xcassets/AccentColor.colorset/Contents.json new file mode 100644 index 00000000000..eb878970081 --- /dev/null +++ b/ios/VisionTestApp/VisionTestApp/Assets.xcassets/AccentColor.colorset/Contents.json @@ -0,0 +1,11 @@ +{ + "colors" : [ + { + "idiom" : "universal" + } + ], + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/ios/VisionTestApp/VisionTestApp/Assets.xcassets/AppIcon.appiconset/Contents.json b/ios/VisionTestApp/VisionTestApp/Assets.xcassets/AppIcon.appiconset/Contents.json new file mode 100644 index 00000000000..9221b9bb1a3 --- /dev/null +++ b/ios/VisionTestApp/VisionTestApp/Assets.xcassets/AppIcon.appiconset/Contents.json @@ -0,0 +1,98 @@ +{ + "images" : [ + { + "idiom" : "iphone", + "scale" : "2x", + "size" : "20x20" + }, + { + "idiom" : "iphone", + "scale" : "3x", + "size" : "20x20" + }, + { + "idiom" : "iphone", + "scale" : "2x", + "size" : "29x29" + }, + { + "idiom" : "iphone", + "scale" : "3x", + "size" : "29x29" + }, + { + "idiom" : "iphone", + "scale" : "2x", + "size" : "40x40" + }, + { + "idiom" : "iphone", + "scale" : "3x", + "size" : "40x40" + }, + { + "idiom" : "iphone", + "scale" : "2x", + "size" : "60x60" + }, + { + "idiom" : "iphone", + "scale" : "3x", + "size" : "60x60" + }, + { + "idiom" : "ipad", + "scale" : "1x", + "size" : "20x20" + }, + { + "idiom" : "ipad", + "scale" : "2x", + "size" : "20x20" + }, + { + "idiom" : "ipad", + "scale" : "1x", + "size" : "29x29" + }, + { + "idiom" : "ipad", + "scale" : "2x", + "size" : "29x29" + }, + { + "idiom" : "ipad", + "scale" : "1x", + "size" : "40x40" + }, + { + "idiom" : "ipad", + "scale" : "2x", + "size" : "40x40" + }, + { + "idiom" : "ipad", + "scale" : "1x", + "size" : "76x76" + }, + { + "idiom" : "ipad", + "scale" : "2x", + "size" : "76x76" + }, + { + "idiom" : "ipad", + "scale" : "2x", + "size" : "83.5x83.5" + }, + { + "idiom" : "ios-marketing", + "scale" : "1x", + "size" : "1024x1024" + } + ], + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/ios/VisionTestApp/VisionTestApp/Assets.xcassets/Contents.json b/ios/VisionTestApp/VisionTestApp/Assets.xcassets/Contents.json new file mode 100644 index 00000000000..73c00596a7f --- /dev/null +++ b/ios/VisionTestApp/VisionTestApp/Assets.xcassets/Contents.json @@ -0,0 +1,6 @@ +{ + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/ios/VisionTestApp/VisionTestApp/Base.lproj/LaunchScreen.storyboard b/ios/VisionTestApp/VisionTestApp/Base.lproj/LaunchScreen.storyboard new file mode 100644 index 00000000000..0b64f641701 --- /dev/null +++ b/ios/VisionTestApp/VisionTestApp/Base.lproj/LaunchScreen.storyboard @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/ios/VisionTestApp/VisionTestApp/Base.lproj/Main.storyboard b/ios/VisionTestApp/VisionTestApp/Base.lproj/Main.storyboard new file mode 100644 index 00000000000..b20f277b049 --- /dev/null +++ b/ios/VisionTestApp/VisionTestApp/Base.lproj/Main.storyboard @@ -0,0 +1,79 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/ios/VisionTestApp/VisionTestApp/Info.plist b/ios/VisionTestApp/VisionTestApp/Info.plist new file mode 100644 index 00000000000..5bae3d0ded5 --- /dev/null +++ b/ios/VisionTestApp/VisionTestApp/Info.plist @@ -0,0 +1,45 @@ + + + + + CFBundleDevelopmentRegion + $(DEVELOPMENT_LANGUAGE) + CFBundleExecutable + $(EXECUTABLE_NAME) + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + $(PRODUCT_NAME) + CFBundlePackageType + $(PRODUCT_BUNDLE_PACKAGE_TYPE) + CFBundleShortVersionString + 1.0 + CFBundleVersion + 1 + LSRequiresIPhoneOS + + UIApplicationSupportsIndirectInputEvents + + UILaunchStoryboardName + LaunchScreen + UIMainStoryboardFile + Main + UIRequiredDeviceCapabilities + + armv7 + + UISupportedInterfaceOrientations + + UIInterfaceOrientationPortrait + + UISupportedInterfaceOrientations~ipad + + UIInterfaceOrientationPortrait + UIInterfaceOrientationPortraitUpsideDown + UIInterfaceOrientationLandscapeLeft + UIInterfaceOrientationLandscapeRight + + + diff --git a/ios/VisionTestApp/VisionTestApp/ModelRunner.h b/ios/VisionTestApp/VisionTestApp/ModelRunner.h new file mode 100644 index 00000000000..cfef3a3f347 --- /dev/null +++ b/ios/VisionTestApp/VisionTestApp/ModelRunner.h @@ -0,0 +1,13 @@ + +#import + +NS_ASSUME_NONNULL_BEGIN + +@interface ModelRunner : NSObject + ++ (NSString*)run; ++ (BOOL)setUp; + +@end + +NS_ASSUME_NONNULL_END diff --git a/ios/VisionTestApp/VisionTestApp/ModelRunner.mm b/ios/VisionTestApp/VisionTestApp/ModelRunner.mm new file mode 100644 index 00000000000..dea3822df26 --- /dev/null +++ b/ios/VisionTestApp/VisionTestApp/ModelRunner.mm @@ -0,0 +1,73 @@ + +#import "ModelRunner.h" +#include +#include +#include "ATen/ATen.h" +#include "caffe2/core/timer.h" +#include "caffe2/utils/string_utils.h" +#include "torch/csrc/autograd/grad_mode.h" +#include "torch/csrc/jit/serialization/import.h" +#include "torch/script.h" + +static NSString *model_name = @"frcnn_mnetv3"; +static NSString *model_suffix = @"pt"; +static NSString *model_path = nil; +static int warmup = 5; +static int iter = 20; + +@implementation ModelRunner + ++ (NSString *)run { + std::vector logs; +#define UI_LOG(fmt, ...) \ + { \ + NSString* log = [NSString stringWithFormat:fmt, __VA_ARGS__]; \ + NSLog(@"%@", log); \ + logs.push_back(log.UTF8String); \ + } + + auto module = torch::jit::load(std::string(model_path.UTF8String)); + module.eval(); + + std::vector inputs; + auto img_tensor = torch::ones({3, 224, 224}, at::ScalarType::Float); + inputs.push_back(c10::List(img_tensor)); + torch::autograd::AutoGradMode guard(false); + at::InferenceMode nonVarTypeModeGuard(true); + + UI_LOG(@"Running warmup runs...", nil); + for (int i = 0; i < warmup; ++i) { + module.forward(inputs); + } + UI_LOG(@"Warmup runs finished.\nMain runs...", nil); + caffe2::Timer timer; + auto millis = timer.MilliSeconds(); + for (int i = 0; i < iter; ++i) { + module.forward(inputs); + } + millis = timer.MilliSeconds(); + UI_LOG(@"Main run finished. \nMilliseconds per iter: %.3f", millis / iter, nil); + UI_LOG(@"Iters per second: : %.3f", 1000.0 * iter / millis, nil); + UI_LOG(@"Done.", nil); + + std::cout << module.forward(inputs) << std::endl; + + NSString* log_text = @""; + for (auto& msg : logs) { + log_text = [log_text stringByAppendingString:[NSString stringWithUTF8String:msg.c_str()]]; + log_text = [log_text stringByAppendingString:@"\n"]; + } + return log_text; +} + ++ (BOOL)setUp { + model_path = [[NSBundle mainBundle] pathForResource:model_name ofType:model_suffix]; + if (![[NSFileManager defaultManager] fileExistsAtPath:model_path]) { + NSLog(@"Invalid model path!"); + model_path = nil; + return NO; + } + return YES; +} + +@end diff --git a/ios/VisionTestApp/VisionTestApp/ViewController.h b/ios/VisionTestApp/VisionTestApp/ViewController.h new file mode 100644 index 00000000000..d29a133d373 --- /dev/null +++ b/ios/VisionTestApp/VisionTestApp/ViewController.h @@ -0,0 +1,6 @@ + +#import + +@interface ViewController : UIViewController + +@end diff --git a/ios/VisionTestApp/VisionTestApp/ViewController.mm b/ios/VisionTestApp/VisionTestApp/ViewController.mm new file mode 100644 index 00000000000..900005d3990 --- /dev/null +++ b/ios/VisionTestApp/VisionTestApp/ViewController.mm @@ -0,0 +1,44 @@ + +#import "ViewController.h" +#include +#import "ModelRunner.h" + +@interface ViewController () +@property (weak, nonatomic) IBOutlet UITextView *textView; +@end + +static NSString const *config_error_msg = @"Wrong model configurations... Please fix and click \"Redo\""; + +@implementation ViewController + +- (void)viewDidLoad { + [super viewDidLoad]; + if ([ModelRunner setUp]) { + [self testModel]; + } else { + self.textView.text = [config_error_msg copy]; + } +} + + +- (IBAction)rerun:(id)sender { + self.textView.text = @""; + if (![ModelRunner setUp]) { + self.textView.text = [config_error_msg copy]; + return; + } + dispatch_async(dispatch_get_main_queue(), ^{ + [self testModel]; + }); +} + +- (void)testModel { + dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{ + NSString *text = [ModelRunner run]; + dispatch_async(dispatch_get_main_queue(), ^{ + self.textView.text = [self.textView.text stringByAppendingString:text]; + }); + }); +} + +@end diff --git a/ios/VisionTestApp/VisionTestApp/main.m b/ios/VisionTestApp/VisionTestApp/main.m new file mode 100644 index 00000000000..1a8b57c33bc --- /dev/null +++ b/ios/VisionTestApp/VisionTestApp/main.m @@ -0,0 +1,18 @@ +// +// main.m +// VisionTestApp +// +// Created by Yuchen Huang on 3/31/21. +// + +#import +#import "AppDelegate.h" + +int main(int argc, char * argv[]) { + NSString * appDelegateClassName; + @autoreleasepool { + // Setup code that might create autoreleased objects goes here. + appDelegateClassName = NSStringFromClass([AppDelegate class]); + } + return UIApplicationMain(argc, argv, nil, appDelegateClassName); +} diff --git a/ios/VisionTestApp/clean.sh b/ios/VisionTestApp/clean.sh new file mode 100755 index 00000000000..20bedc784d9 --- /dev/null +++ b/ios/VisionTestApp/clean.sh @@ -0,0 +1,8 @@ +#!/bin/bash +set -ex -o pipefail + +TEST_APP_PATH=$(dirname $(realpath $0)) +cd ${TEST_APP_PATH} + +rm -rf ./install +rm ./VisionTestApp/*.pt diff --git a/ios/VisionTestApp/make_assets.py b/ios/VisionTestApp/make_assets.py new file mode 100644 index 00000000000..f14223e6a42 --- /dev/null +++ b/ios/VisionTestApp/make_assets.py @@ -0,0 +1,21 @@ +import torch +from torch.utils.mobile_optimizer import optimize_for_mobile +from torchvision.models.detection import ( + fasterrcnn_mobilenet_v3_large_320_fpn, + FasterRCNN_MobileNet_V3_Large_320_FPN_Weights, +) + +print(torch.__version__) + +model = fasterrcnn_mobilenet_v3_large_320_fpn( + weights=FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT, + box_score_thresh=0.7, + rpn_post_nms_top_n_test=100, + rpn_score_thresh=0.4, + rpn_pre_nms_top_n_test=150, +) + +model.eval() +script_model = torch.jit.script(model) +opt_script_model = optimize_for_mobile(script_model) +opt_script_model.save("VisionTestApp/frcnn_mnetv3.pt") diff --git a/ios/VisionTestApp/setup.sh b/ios/VisionTestApp/setup.sh new file mode 100755 index 00000000000..3b3520d7052 --- /dev/null +++ b/ios/VisionTestApp/setup.sh @@ -0,0 +1,33 @@ +#!/bin/bash +set -ex -o pipefail + +echo "" +echo "DIR: $(pwd)" + +TEST_APP_PATH=$(dirname $(realpath $0)) +cd ${TEST_APP_PATH} + +PYTORCH_IOS_NIGHTLY_NAME=libtorch_ios_nightly_build.zip +VISION_IOS_NIGHTLY_NAME=libtorchvision_ops_ios_nightly_build.zip + +echo "Downloading torch libs and vision libs..." +wget https://ossci-ios-build.s3.amazonaws.com/${PYTORCH_IOS_NIGHTLY_NAME} +wget https://ossci-ios-build.s3.amazonaws.com/${VISION_IOS_NIGHTLY_NAME} + +mkdir -p ./library/torch +mkdir -p ./library/vision + +echo "Unziping torch libs and vision libs..." +unzip -d ./library/torch ./${PYTORCH_IOS_NIGHTLY_NAME} +unzip -d ./library/vision ./${VISION_IOS_NIGHTLY_NAME} + +cp ./library/vision/install/lib/*.a ./library/torch/install/lib +cp -r ./library/torch/install . + +rm -rf ./library +rm -rf ./*.zip + +echo "Generating the vision model..." +python ./make_assets.py + +echo "Finished project setups." diff --git a/ios/build_ios.sh b/ios/build_ios.sh new file mode 100755 index 00000000000..81ac2f2a218 --- /dev/null +++ b/ios/build_ios.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +set -ex -o pipefail +echo "" +echo "DIR: $(pwd)" +VISION_IOS_ROOT=$(dirname $(realpath $0)) + +if ! [ -n "${LIBTORCH_HEADER_ROOT:-}" ]; then + echo "Missing parameter: LIBTORCH_HEADER_ROOT" + exit 1 +fi + +if [ -n "${IOS_ARCH:-}" ]; then + if [ "${IOS_ARCH:-}" == "arm64" ]; then + IOS_PLATFORM="OS" + elif [ "${IOS_ARCH:-}" == "x86_64" ]; then + IOS_PLATFORM="SIMULATOR" + fi +fi + +mkdir -p ${VISION_IOS_ROOT}/lib +mkdir -p ${VISION_IOS_ROOT}/build +cd ${VISION_IOS_ROOT}/build +cmake -DLIBTORCH_HEADER_ROOT=${LIBTORCH_HEADER_ROOT} \ + -DCMAKE_TOOLCHAIN_FILE=${VISION_IOS_ROOT}/../cmake/iOS.cmake \ + -DIOS_ARCH=${IOS_ARCH} \ + -DIOS_PLATFORM=${IOS_PLATFORM} \ + .. +make +rm -rf ${VISION_IOS_ROOT}/build diff --git a/maintainer_guide.md b/maintainer_guide.md new file mode 100644 index 00000000000..3d66a701be1 --- /dev/null +++ b/maintainer_guide.md @@ -0,0 +1,76 @@ +# Torchvision maintainers guide + +This document aims at documenting user-facing policies / principles used when +developing and maintaining torchvision. Other maintainer info (e.g. release +process) can be found in the meta-internal wiki. + +### What is public and what is private? + +For the Python API, torchvision largely follows the [PyTorch +policy](https://github.com/pytorch/pytorch/wiki/Public-API-definition-and-documentation) +which is consistent with other major packages +([numpy](https://numpy.org/neps/nep-0023-backwards-compatibility.html), +[scikit-learn](https://scikit-learn.org/dev/glossary.html#term-API) etc.). +We recognize that his policy is somewhat imperfect for some edge cases, and that +it's difficult to come up with an accurate technical definition. In broad terms, +which are usually well understood by users, the policy is that: + +- modules that can be accessed without leading underscore are public +- objects in a public file that don't have a leading underscore are public +- class attributes are public iff they have no leading underscore +- the rest of the modules / objects / class attributes are considered private + +The public API has backward-compatible (BC) guarantees defined in our +deprecation policy (see below). The private API has not BC guarantees. + +For C++, code is private. For Meta employees: if a C++ change breaks fbcode, fix +fbcode or revert the change. We should be careful about models running in +production and relying on torchvision ops. + +The `test` folder is not importable and is **private.** Even meta-internal +projects should *not* rely on it (it has happened in the past and is now +programmatically impossible). + +The training references do not have BC guarantees. Breaking changes are +possible, but we should make sure that the tutorials are still running properly, +and that their intended narrative is preserved (by e.g. checking outputs, +etc.). + +The rest of the folders (build, android, ios, etc.) are private and have no BC +guarantees. + +### Deprecation policy. + +Because they're disruptive, **deprecations should only be used sparingly**. + +We largely follow the [PyTorch +policy](https://github.com/pytorch/pytorch/wiki/PyTorch's-Python-Frontend-Backward-and-Forward-Compatibility-Policy): +breaking changes require a deprecation period of at least 2 versions. + +Deprecations should clearly indicate their deadline in the docs and warning +messages. Avoid not committing to a deadline, or keeping deprecated APIs for too +long: it gives no incentive for users to update their code, sends conflicting +messages ("why was this API removed while this other one is still around?"), and +accumulates debt in the project. + +### Should this attribute be public? Should this function be private? + +When designing an API it’s not always obvious what should be exposed as public, +and what should be kept as a private implementation detail. The following +guidelines can be useful: + +* Functional consistency throughout the library is a top priority, for users and + developers’ sake. In doubt and unless it’s clearly wrong, expose what other + similar classes expose. +* Think really hard about the users and their use-cases, and try to expose what + they would need to address those use-cases. Aggressively keep everything else + private. Remember that the “private -> public” direction is way smoother than + the “public -> private” one: in doubt, keep it private. +* When thinking about use-cases, the general API motto applies: make what’s + simple and common easy, and make what’s complex possible (80% / 20% rule). + There might be a ~1% left that’s not addressed: that’s OK. Also, **make what’s + wrong very hard**, if not impossible. + +As a good practice, always create new files and even classes with a leading +underscore in their name. This way, everything is private by default and the +only public surface is explicitly present in an `__init__.py` file. diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 00000000000..d8ab11d0d21 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,150 @@ +[mypy] + +files = torchvision +show_error_codes = True +pretty = True +allow_redefinition = True +no_implicit_optional = True +warn_redundant_casts = True + +[mypy-torchvision.prototype.datapoints.*] + +; untyped definitions and calls +disallow_untyped_defs = True + +; None and Optional handling +no_implicit_optional = True + +; warnings +warn_unused_ignores = True + +; miscellaneous strictness flags +allow_redefinition = True + +[mypy-torchvision.prototype.transforms.*] + +ignore_errors = True + +[mypy-torchvision.prototype.datasets.*] + +ignore_errors = True + +[mypy-torchvision.io.image.*] + +ignore_errors = True + +[mypy-torchvision.io.video.*] + +ignore_errors = True + +[mypy-torchvision.io.video_reader] + +ignore_errors = True + +[mypy-torchvision.models.densenet.*] + +ignore_errors=True + +[mypy-torchvision.models.maxvit.*] + +ignore_errors=True + +[mypy-torchvision.models.detection.anchor_utils] + +ignore_errors = True + +[mypy-torchvision.models.detection.transform] + +ignore_errors = True + +[mypy-torchvision.models.detection.roi_heads] + +ignore_errors = True + +[mypy-torchvision.models.detection.faster_rcnn] + +ignore_errors = True + +[mypy-torchvision.models.detection.mask_rcnn] + +ignore_errors = True + +[mypy-torchvision.models.detection.keypoint_rcnn] + +ignore_errors = True + +[mypy-torchvision.models.detection.retinanet] + +ignore_errors = True + +[mypy-torchvision.models.detection.ssd] + +ignore_errors = True + +[mypy-torchvision.models.detection.ssdlite] + +ignore_errors = True + +[mypy-torchvision.models.detection.fcos] + +ignore_errors = True + +[mypy-torchvision.ops.*] + +ignore_errors = True + +[mypy-torchvision.transforms._functional_pil] + +ignore_errors = True + +[mypy-torchvision.transforms.functional.*] + +ignore_errors = True + +[mypy-torchvision.transforms.transforms.*] + +ignore_errors = True + +[mypy-PIL.*] + +ignore_missing_imports = True + +[mypy-numpy.*] + +ignore_missing_imports = True + +[mypy-scipy.*] + +ignore_missing_imports = True + +[mypy-pycocotools.*] + +ignore_missing_imports = True + +[mypy-lmdb.*] + +ignore_missing_imports = True + +[mypy-accimage.*] + +ignore_missing_imports = True + +[mypy-av.*] + +ignore_missing_imports = True + +[mypy-defusedxml.*] + +ignore_missing_imports = True + +[mypy-torchdata.*] + +ignore_missing_imports = True + +[mypy-h5py.*] + +ignore_missing_imports = True + +[mypy-gdown.*] + +ignore_missing_imports = True diff --git a/packaging/README.md b/packaging/README.md deleted file mode 100644 index 7d3c5f7831b..00000000000 --- a/packaging/README.md +++ /dev/null @@ -1,90 +0,0 @@ -# Building torchvision packages for release - -## Anaconda packages - -### Linux - -```bash -nvidia-docker run -it --ipc=host --rm -v $(pwd):/remote soumith/conda-cuda bash -pushd remote/conda - -./build_vision.sh 9.0 -./build_vision.sh 10.0 -./build_vision.sh cpu - -# copy packages over to /remote -# exit docker -# anaconda upload -u pytorch torchvision*.bz2 -``` - -### OSX - -```bash -# create a fresh anaconda environment / install and activate it -conda install -y conda-build anaconda-client -./build_vision.sh cpu - -# copy packages over to /remote -# exit docker -# anaconda upload -u pytorch torchvision*.bz2 -``` - -### Windows - -```bash -# Open `Git Bash` and change dir to `conda` -./build_vision.sh 9.0 -./build_vision.sh 10.0 -./build_vision.sh cpu - -# copy packages to a output directory -# anaconda upload -u pytorch torchvision*.bz2 -``` - -## Wheels - -### Linux - -pushd wheel - -```bash -nvidia-docker run -it --ipc=host --rm -v $(pwd):/remote soumith/manylinux-cuda90:latest bash -cd remote -./linux_manywheel.sh cu90 - -rm -rf /usr/local/cuda* -./linux_manywheel.sh cpu -``` - -```bash -nvidia-docker run -it --ipc=host --rm -v $(pwd):/remote soumith/manylinux-cuda100:latest bash -cd remote -./linux_manywheel.sh cu100 -``` - -wheels are in the folders `cpu`, `cu90`, `cu100`. - -You can upload the `cu90` wheels to twine with `twine upload *.whl`. -Which wheels we upload depends on which wheels PyTorch uploads as default, and right now, it's `cu90`. - -### OSX - -```bash -pushd wheel -./osx_wheel.sh -``` - -### Windows - -```cmd -set PYTORCH_REPO=pytorch - -pushd windows -call build_vision.bat 90 0.3.0 1 -call build_vision.bat 100 0.3.0 1 -call build_vision.bat cpu 0.3.0 1 -``` - -wheels are in the current folder. - -You can upload them to twine with `twine upload *.whl` diff --git a/packaging/build_conda.sh b/packaging/build_conda.sh deleted file mode 100755 index aaddf0710c8..00000000000 --- a/packaging/build_conda.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -set -ex - -script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -. "$script_dir/pkg_helpers.bash" - -export BUILD_TYPE=conda -setup_env 0.5.0 -export SOURCE_ROOT_DIR="$PWD" -setup_conda_pytorch_constraint -setup_conda_cudatoolkit_constraint -setup_visual_studio_constraint -conda build $CONDA_CHANNEL_FLAGS -c defaults -c conda-forge --no-anaconda-upload --python "$PYTHON_VERSION" packaging/torchvision diff --git a/packaging/build_wheel.sh b/packaging/build_wheel.sh deleted file mode 100755 index 7d37239563d..00000000000 --- a/packaging/build_wheel.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash -set -ex - -script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -. "$script_dir/pkg_helpers.bash" - -export BUILD_TYPE=wheel -setup_env 0.5.0 -setup_wheel_python -pip_install numpy pyyaml future ninja -# TODO remove after https://github.com/pytorch/pytorch/pull/27282 gets merged -pip_install six -setup_pip_pytorch_version -python setup.py clean -IS_WHEEL=1 python setup.py bdist_wheel diff --git a/packaging/conda/build_vision.sh b/packaging/conda/build_vision.sh deleted file mode 100755 index 000f314670b..00000000000 --- a/packaging/conda/build_vision.sh +++ /dev/null @@ -1,217 +0,0 @@ -#!/usr/bin/env bash -if [[ -x "/remote/anaconda_token" ]]; then - . /remote/anaconda_token || true -fi - -set -ex - -# Function to retry functions that sometimes timeout or have flaky failures -retry () { - $* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) -} - -# Parse arguments and determmine version -########################################################### -if [[ -n "$DESIRED_CUDA" && -n "$TORCHVISION_BUILD_VERSION" && -n "$TORCHVISION_BUILD_NUMBER" ]]; then - desired_cuda="$DESIRED_CUDA" - build_version="$PYTORCH_BUILD_VERSION" - build_number="$PYTORCH_BUILD_NUMBER" -else - if [ "$#" -ne 3 ]; then - echo "Illegal number of parameters. Pass cuda version, pytorch version, build number" - echo "CUDA version should be Mm with no dot, e.g. '80'" - echo "DESIRED_PYTHON should be M.m, e.g. '2.7'" - exit 1 - fi - - desired_cuda="$1" - build_version="$2" - build_number="$3" -fi -if [[ "$desired_cuda" != cpu ]]; then - desired_cuda="$(echo $desired_cuda | tr -d cuda. )" -fi -echo "Building cuda version $desired_cuda and torchvision version: $build_version build_number: $build_number" - -if [[ "$desired_cuda" == 'cpu' ]]; then - cpu_only=1 - cuver="cpu" -else - # Switch desired_cuda to be M.m to be consistent with other scripts in - # pytorch/builder - export FORCE_CUDA=1 - cuda_nodot="$desired_cuda" - - if [[ ${#cuda_nodot} -eq 2 ]]; then - desired_cuda="${desired_cuda:0:1}.${desired_cuda:1:1}" - elif [[ ${#cuda_nodot} -eq 3 ]]; then - desired_cuda="${desired_cuda:0:2}.${desired_cuda:2:1}" - else - echo "unknown cuda version $cuda_nodot" - exit 1 - fi - - cuver="cu$cuda_nodot" -fi - -export TORCHVISION_BUILD_VERSION=$build_version -export TORCHVISION_BUILD_NUMBER=$build_number - -if [[ -z "$DESIRED_PYTHON" ]]; then - DESIRED_PYTHON=('3.5' '3.6' '3.7') -fi - -SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" - -if [[ -z "$WIN_PACKAGE_WORK_DIR" ]]; then - WIN_PACKAGE_WORK_DIR="$(echo $(pwd -W) | tr '/' '\\')\\tmp_conda_$(date +%H%M%S)" -fi - -mkdir -p "$WIN_PACKAGE_WORK_DIR" || true -vision_rootdir="$(realpath ${WIN_PACKAGE_WORK_DIR})/torchvision-src" -git config --system core.longpaths true - -if [[ ! -d "$vision_rootdir" ]]; then - rm -rf "$vision_rootdir" - git clone "https://github.com/pytorch/vision" "$vision_rootdir" - pushd "$vision_rootdir" - git checkout $PYTORCH_BRANCH - popd -fi - -cd "$SOURCE_DIR" - -export tmp_conda="${WIN_PACKAGE_WORK_DIR}\\conda" -export miniconda_exe="${WIN_PACKAGE_WORK_DIR}\\miniconda.exe" -rm -rf "$tmp_conda" -rm -f "$miniconda_exe" -curl -sSk https://repo.continuum.io/miniconda/Miniconda3-latest-Windows-x86_64.exe -o "$miniconda_exe" -"$SOURCE_DIR/install_conda.bat" && rm "$miniconda_exe" -pushd $tmp_conda -export PATH="$(pwd):$(pwd)/Library/usr/bin:$(pwd)/Library/bin:$(pwd)/Scripts:$(pwd)/bin:$PATH" -popd -retry conda install -yq conda-build - -ANACONDA_USER=pytorch-nightly -conda config --set anaconda_upload no - - -export TORCHVISION_PACKAGE_SUFFIX="" -if [[ "$desired_cuda" == 'cpu' ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="" - export CONDA_CPUONLY_FEATURE="- cpuonly # [not osx]" - export CUDA_VERSION="None" -else - export CONDA_CPUONLY_FEATURE="" - . ./switch_cuda_version.sh $desired_cuda - if [[ "$desired_cuda" == "10.1" ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.1,<10.2 # [not osx]" - elif [[ "$desired_cuda" == "10.0" ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.0,<10.1 # [not osx]" - elif [[ "$desired_cuda" == "9.2" ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=9.2,<9.3 # [not osx]" - elif [[ "$desired_cuda" == "9.0" ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=9.0,<9.1 # [not osx]" - elif [[ "$desired_cuda" == "8.0" ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=8.0,<8.1 # [not osx]" - else - echo "unhandled desired_cuda: $desired_cuda" - exit 1 - fi -fi - -if [[ -z "$PYTORCH_VERSION" ]]; then - export CONDA_CHANNEL_FLAGS="-c pytorch-nightly" - export PYTORCH_VERSION="$(conda search --json 'pytorch[channel=pytorch-nightly]' | \ - python -c "import os, sys, json, re; cuver = '$cuver'; \ - cuver = cuver.replace('cu', 'cuda') if cuver != 'cpu' else cuver; \ - print(re.sub(r'\\+.*$', '', \ - [x['version'] for x in json.load(sys.stdin)['pytorch'] \ - if (x['platform'] == 'darwin' or cuver in x['fn']) \ - and 'py' + os.environ['DESIRED_PYTHON'] in x['fn']][-1]))")" - if [[ -z "$PYTORCH_VERSION" ]]; then - echo "PyTorch version auto detection failed" - echo "No package found for desired_cuda=$desired_cuda and DESIRED_PYTHON=$DESIRED_PYTHON" - exit 1 - fi -else - export CONDA_CHANNEL_FLAGS="-c pytorch -c pytorch-nightly" -fi -if [[ "$desired_cuda" == 'cpu' ]]; then - export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==$PYTORCH_VERSION" - export CONDA_PYTORCH_CONSTRAINT="- pytorch==$PYTORCH_VERSION" -else - export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==${PYTORCH_VERSION}" - export CONDA_PYTORCH_CONSTRAINT="- pytorch==${PYTORCH_VERSION}" -fi - -# Loop through all Python versions to build a package for each -for py_ver in "${DESIRED_PYTHON[@]}"; do - build_string="py${py_ver}_${build_string_suffix}" - folder_tag="${build_string}_$(date +'%Y%m%d')" - - # Create the conda package into this temporary folder. This is so we can find - # the package afterwards, as there's no easy way to extract the final filename - # from conda-build - output_folder="out_$folder_tag" - rm -rf "$output_folder" - mkdir "$output_folder" - - export VSTOOLCHAIN_PACKAGE=vs2017 - - # We need to build the compiler activation scripts first on Windows - time VSDEVCMD_ARGS=${VSDEVCMD_ARGS[@]} \ - conda build -c "$ANACONDA_USER" \ - --no-anaconda-upload \ - --output-folder "$output_folder" \ - ../$VSTOOLCHAIN_PACKAGE - - cp ../$VSTOOLCHAIN_PACKAGE/conda_build_config.yaml ../torchvision/conda_build_config.yaml - - conda config --set anaconda_upload no - echo "Calling conda-build at $(date)" - if [[ "$desired_cuda" == "9.2" ]]; then - time CMAKE_ARGS=${CMAKE_ARGS[@]} \ - BUILD_VERSION="$TORCHVISION_BUILD_VERSION" \ - CU_VERSION="$cuver" \ - SOURCE_ROOT_DIR="$vision_rootdir" \ - conda build -c "$ANACONDA_USER" \ - -c defaults \ - -c conda-forge \ - -c "numba/label/dev" \ - --no-anaconda-upload \ - --python "$py_ver" \ - --output-folder "$output_folder" \ - --no-verify \ - --no-test \ - ../torchvision - else - time CMAKE_ARGS=${CMAKE_ARGS[@]} \ - BUILD_VERSION="$TORCHVISION_BUILD_VERSION" \ - CU_VERSION="$cuver" \ - SOURCE_ROOT_DIR="$vision_rootdir" \ - conda build -c "$ANACONDA_USER" \ - -c defaults \ - -c conda-forge \ - --no-anaconda-upload \ - --python "$py_ver" \ - --output-folder "$output_folder" \ - --no-verify \ - --no-test \ - ../torchvision - fi - echo "Finished conda-build at $(date)" - - # Extract the package for testing - ls -lah "$output_folder" - built_package="$(find $output_folder/ -name '*torchvision*.tar.bz2')" - - # Copy the built package to the host machine for persistence before testing - if [[ -n "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then - mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true - cp "$built_package" "$PYTORCH_FINAL_PACKAGE_DIR/" - fi -done - - -set +e diff --git a/packaging/conda/install_conda.bat b/packaging/conda/install_conda.bat deleted file mode 100644 index 6052ad08b10..00000000000 --- a/packaging/conda/install_conda.bat +++ /dev/null @@ -1 +0,0 @@ -start /wait "" "%miniconda_exe%" /S /InstallationType=JustMe /RegisterPython=0 /AddToPath=0 /D=%tmp_conda% diff --git a/packaging/conda/switch_cuda_version.sh b/packaging/conda/switch_cuda_version.sh deleted file mode 100755 index 342def93899..00000000000 --- a/packaging/conda/switch_cuda_version.sh +++ /dev/null @@ -1,28 +0,0 @@ -if [[ "$OSTYPE" == "msys" ]]; then - CUDA_DIR="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v$1" -else - CUDA_DIR="/usr/local/cuda-$1" -fi - -if ! ls "$CUDA_DIR" -then - echo "folder $CUDA_DIR not found to switch" -fi - -echo "Switching symlink to $CUDA_DIR" -mkdir -p /usr/local -rm -fr /usr/local/cuda -ln -s "$CUDA_DIR" /usr/local/cuda - -if [[ "$OSTYPE" == "msys" ]]; then - export CUDA_VERSION=`ls /usr/local/cuda/bin/cudart64*.dll | head -1 | tr '._' ' ' | cut -d ' ' -f2` - export CUDNN_VERSION=`ls /usr/local/cuda/bin/cudnn64*.dll | head -1 | tr '._' ' ' | cut -d ' ' -f2` -else - export CUDA_VERSION=$(ls /usr/local/cuda/lib64/libcudart.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev) - export CUDNN_VERSION=$(ls /usr/local/cuda/lib64/libcudnn.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev) -fi - -ls -alh /usr/local/cuda - -echo "CUDA_VERSION=$CUDA_VERSION" -echo "CUDNN_VERSION=$CUDNN_VERSION" diff --git a/packaging/cut_release.sh b/packaging/cut_release.sh new file mode 100755 index 00000000000..91e0e5ff15d --- /dev/null +++ b/packaging/cut_release.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# +# Usage (run from root of project): +# TEST_INFRA_BRANCH=release/2.1 RELEASE_BRANCH=release/2.1 RELEASE_VERSION=2.1.0 packaging/cut_release.sh +# +# TEST_INFRA_BRANCH: The release branch of test-infra that houses all reusable +# workflows +# +# RELEASE_BRANCH: The name of the release branch for this repo +# +# RELEASE_VERSION: Version of this current release + +set -eou pipefail + +# Create and Check out to Release Branch +git checkout -b "${RELEASE_BRANCH}" + +# Change all GitHub Actions to reference the test-infra release branch +# as opposed to main. +for i in .github/workflows/*.yml; do + if [[ "$OSTYPE" == "darwin"* ]]; then + sed -i '' -e s#@main#@"${TEST_INFRA_BRANCH}"# $i; + sed -i '' -e s#test-infra-ref:[[:space:]]main#"test-infra-ref: ${TEST_INFRA_BRANCH}"# $i; + else + sed -i -e s#@main#@"${TEST_INFRA_BRANCH}"# $i; + sed -i -e s#test-infra-ref:[[:space:]]main#"test-infra-ref: ${TEST_INFRA_BRANCH}"# $i; + fi +done + +# Update the Release Version in version.txt +echo "${RELEASE_VERSION}" >version.txt + +# Optional +# git add ./github/workflows/*.yml version.txt +# git commit -m "[RELEASE-ONLY CHANGES] Branch Cut for Release {RELEASE_VERSION}" +# git push origin "${RELEASE_BRANCH}" diff --git a/packaging/pkg_helpers.bash b/packaging/pkg_helpers.bash deleted file mode 100644 index 5d7109efe93..00000000000 --- a/packaging/pkg_helpers.bash +++ /dev/null @@ -1,261 +0,0 @@ -# A set of useful bash functions for common functionality we need to do in -# many build scripts - - -# Setup CUDA environment variables, based on CU_VERSION -# -# Inputs: -# CU_VERSION (cpu, cu92, cu100) -# NO_CUDA_PACKAGE (bool) -# BUILD_TYPE (conda, wheel) -# -# Outputs: -# VERSION_SUFFIX (e.g., "") -# PYTORCH_VERSION_SUFFIX (e.g., +cpu) -# WHEEL_DIR (e.g., cu100/) -# CUDA_HOME (e.g., /usr/local/cuda-9.2, respected by torch.utils.cpp_extension) -# FORCE_CUDA (respected by torchvision setup.py) -# NVCC_FLAGS (respected by torchvision setup.py) -# -# Precondition: CUDA versions are installed in their conventional locations in -# /usr/local/cuda-* -# -# NOTE: Why VERSION_SUFFIX versus PYTORCH_VERSION_SUFFIX? If you're building -# a package with CUDA on a platform we support CUDA on, VERSION_SUFFIX == -# PYTORCH_VERSION_SUFFIX and everyone is happy. However, if you are building a -# package with only CPU bits (e.g., torchaudio), then VERSION_SUFFIX is always -# empty, but PYTORCH_VERSION_SUFFIX is +cpu (because that's how you get a CPU -# version of a Python package. But that doesn't apply if you're on OS X, -# since the default CU_VERSION on OS X is cpu. -setup_cuda() { - - # First, compute version suffixes. By default, assume no version suffixes - export VERSION_SUFFIX="" - export PYTORCH_VERSION_SUFFIX="" - export WHEEL_DIR="" - # Wheel builds need suffixes (but not if they're on OS X, which never has suffix) - if [[ "$BUILD_TYPE" == "wheel" ]] && [[ "$(uname)" != Darwin ]]; then - # The default CUDA has no suffix - if [[ "$CU_VERSION" != "cu101" ]]; then - export PYTORCH_VERSION_SUFFIX="+$CU_VERSION" - fi - # Match the suffix scheme of pytorch, unless this package does not have - # CUDA builds (in which case, use default) - if [[ -z "$NO_CUDA_PACKAGE" ]]; then - export VERSION_SUFFIX="$PYTORCH_VERSION_SUFFIX" - export WHEEL_DIR="$CU_VERSION/" - fi - fi - - # Now work out the CUDA settings - case "$CU_VERSION" in - cu101) - if [[ "$OSTYPE" == "msys" ]]; then - export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.1" - else - export CUDA_HOME=/usr/local/cuda-10.1/ - fi - export FORCE_CUDA=1 - # Hard-coding gencode flags is temporary situation until - # https://github.com/pytorch/pytorch/pull/23408 lands - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50" - ;; - cu100) - if [[ "$OSTYPE" == "msys" ]]; then - export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.0" - else - export CUDA_HOME=/usr/local/cuda-10.0/ - fi - export FORCE_CUDA=1 - # Hard-coding gencode flags is temporary situation until - # https://github.com/pytorch/pytorch/pull/23408 lands - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50" - ;; - cu92) - if [[ "$OSTYPE" == "msys" ]]; then - export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.2" - else - export CUDA_HOME=/usr/local/cuda-9.2/ - fi - export FORCE_CUDA=1 - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50" - ;; - cpu) - ;; - *) - echo "Unrecognized CU_VERSION=$CU_VERSION" - exit 1 - ;; - esac -} - -# Populate build version if necessary, and add version suffix -# -# Inputs: -# BUILD_VERSION (e.g., 0.2.0 or empty) -# VERSION_SUFFIX (e.g., +cpu) -# -# Outputs: -# BUILD_VERSION (e.g., 0.2.0.dev20190807+cpu) -# -# Fill BUILD_VERSION if it doesn't exist already with a nightly string -# Usage: setup_build_version 0.2.0 -setup_build_version() { - if [[ -z "$BUILD_VERSION" ]]; then - export BUILD_VERSION="$1.dev$(date "+%Y%m%d")$VERSION_SUFFIX" - else - export BUILD_VERSION="$BUILD_VERSION$VERSION_SUFFIX" - fi -} - -# Set some useful variables for OS X, if applicable -setup_macos() { - if [[ "$(uname)" == Darwin ]]; then - export MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ - fi -} - -# Top-level entry point for things every package will need to do -# -# Usage: setup_env 0.2.0 -setup_env() { - setup_cuda - setup_build_version "$1" - setup_macos -} - -# Function to retry functions that sometimes timeout or have flaky failures -retry () { - $* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) -} - -# Inputs: -# PYTHON_VERSION (2.7, 3.5, 3.6, 3.7) -# UNICODE_ABI (bool) -# -# Outputs: -# PATH modified to put correct Python version in PATH -# -# Precondition: If Linux, you are in a soumith/manylinux-cuda* Docker image -setup_wheel_python() { - if [[ "$(uname)" == Darwin ]]; then - eval "$(conda shell.bash hook)" - conda env remove -n "env$PYTHON_VERSION" || true - conda create -yn "env$PYTHON_VERSION" python="$PYTHON_VERSION" - conda activate "env$PYTHON_VERSION" - else - case "$PYTHON_VERSION" in - 2.7) - if [[ -n "$UNICODE_ABI" ]]; then - python_abi=cp27-cp27mu - else - python_abi=cp27-cp27m - fi - ;; - 3.5) python_abi=cp35-cp35m ;; - 3.6) python_abi=cp36-cp36m ;; - 3.7) python_abi=cp37-cp37m ;; - *) - echo "Unrecognized PYTHON_VERSION=$PYTHON_VERSION" - exit 1 - ;; - esac - export PATH="/opt/python/$python_abi/bin:$PATH" - fi -} - -# Install with pip a bit more robustly than the default -pip_install() { - retry pip install --progress-bar off "$@" -} - -# Install torch with pip, respecting PYTORCH_VERSION, and record the installed -# version into PYTORCH_VERSION, if applicable -setup_pip_pytorch_version() { - if [[ -z "$PYTORCH_VERSION" ]]; then - # Install latest prerelease version of torch, per our nightlies, consistent - # with the requested cuda version - pip_install --pre torch -f "https://download.pytorch.org/whl/nightly/${WHEEL_DIR}torch_nightly.html" - if [[ "$CUDA_VERSION" == "cpu" ]]; then - # CUDA and CPU are ABI compatible on the CPU-only parts, so strip - # in this case - export PYTORCH_VERSION="$(pip show torch | grep ^Version: | sed 's/Version: *//' | sed 's/+.\+//')" - else - export PYTORCH_VERSION="$(pip show torch | grep ^Version: | sed 's/Version: *//')" - fi - else - pip_install "torch==$PYTORCH_VERSION$CUDA_SUFFIX" \ - -f https://download.pytorch.org/whl/torch_stable.html \ - -f https://download.pytorch.org/whl/nightly/torch_nightly.html - fi -} - -# Fill PYTORCH_VERSION with the latest conda nightly version, and -# CONDA_CHANNEL_FLAGS with appropriate flags to retrieve these versions -# -# You MUST have populated CUDA_SUFFIX before hand. -setup_conda_pytorch_constraint() { - if [[ -z "$PYTORCH_VERSION" ]]; then - export CONDA_CHANNEL_FLAGS="-c pytorch-nightly" - export PYTORCH_VERSION="$(conda search --json 'pytorch[channel=pytorch-nightly]' | \ - python -c "import os, sys, json, re; cuver = os.environ.get('CU_VERSION'); \ - cuver_1 = cuver.replace('cu', 'cuda') if cuver != 'cpu' else cuver; \ - cuver_2 = (cuver[:-1] + '.' + cuver[-1]).replace('cu', 'cuda') if cuver != 'cpu' else cuver; \ - print(re.sub(r'\\+.*$', '', \ - [x['version'] for x in json.load(sys.stdin)['pytorch'] \ - if (x['platform'] == 'darwin' or cuver_1 in x['fn'] or cuver_2 in x['fn']) \ - and 'py' + os.environ['PYTHON_VERSION'] in x['fn']][-1]))")" - if [[ -z "$PYTORCH_VERSION" ]]; then - echo "PyTorch version auto detection failed" - echo "No package found for CU_VERSION=$CU_VERSION and PYTHON_VERSION=$PYTHON_VERSION" - exit 1 - fi - else - export CONDA_CHANNEL_FLAGS="-c pytorch -c pytorch-nightly" - fi - if [[ "$CU_VERSION" == cpu ]]; then - export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==$PYTORCH_VERSION${PYTORCH_VERSION_SUFFIX}" - export CONDA_PYTORCH_CONSTRAINT="- pytorch==$PYTORCH_VERSION" - else - export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==${PYTORCH_VERSION}${PYTORCH_VERSION_SUFFIX}" - export CONDA_PYTORCH_CONSTRAINT="- pytorch==${PYTORCH_VERSION}${PYTORCH_VERSION_SUFFIX}" - fi -} - -# Translate CUDA_VERSION into CUDA_CUDATOOLKIT_CONSTRAINT -setup_conda_cudatoolkit_constraint() { - export CONDA_CPUONLY_FEATURE="" - if [[ "$(uname)" == Darwin ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="" - else - case "$CU_VERSION" in - cu101) - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.1,<10.2 # [not osx]" - ;; - cu100) - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.0,<10.1 # [not osx]" - ;; - cu92) - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=9.2,<9.3 # [not osx]" - ;; - cpu) - export CONDA_CUDATOOLKIT_CONSTRAINT="" - export CONDA_CPUONLY_FEATURE="- cpuonly" - ;; - *) - echo "Unrecognized CU_VERSION=$CU_VERSION" - exit 1 - ;; - esac - fi -} - -# Build the proper compiler package before building the final package -setup_visual_studio_constraint() { - if [[ "$OSTYPE" == "msys" ]]; then - export VSTOOLCHAIN_PACKAGE=vs2019 - export VSDEVCMD_ARGS='' - conda build $CONDA_CHANNEL_FLAGS --no-anaconda-upload packaging/$VSTOOLCHAIN_PACKAGE - cp packaging/$VSTOOLCHAIN_PACKAGE/conda_build_config.yaml packaging/torchvision/conda_build_config.yaml - fi -} diff --git a/packaging/post_build_script.sh b/packaging/post_build_script.sh new file mode 100644 index 00000000000..253980b98c3 --- /dev/null +++ b/packaging/post_build_script.sh @@ -0,0 +1,4 @@ +#!/bin/bash +LD_LIBRARY_PATH="/usr/local/lib:$CUDA_HOME/lib64:$LD_LIBRARY_PATH" python packaging/wheel/relocate.py + +pip install torchvision-extra-decoders diff --git a/packaging/pre_build_script.sh b/packaging/pre_build_script.sh new file mode 100644 index 00000000000..6bc3cdc703f --- /dev/null +++ b/packaging/pre_build_script.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +if [[ "$(uname)" == Darwin ]]; then + # Uninstall Conflicting jpeg brew formulae + jpeg_packages=$(brew list | grep jpeg) + echo "Existing Jpeg-related Brew libraries" + echo $jpeg_packages + for pkg in $jpeg_packages; do + brew uninstall --ignore-dependencies --force $pkg || true + done + + conda install -yq wget +fi + +if [[ "$(uname)" == Darwin || "$OSTYPE" == "msys" ]]; then + conda install libpng libwebp -yq + # Installing webp also installs a non-turbo jpeg, so we uninstall jpeg stuff + # before re-installing them + conda uninstall libjpeg-turbo libjpeg -y + conda install -yq ffmpeg=4.2 libjpeg-turbo -c pytorch + + # Copy binaries to be included in the wheel distribution + if [[ "$OSTYPE" == "msys" ]]; then + python_exec="$(which python)" + bin_path=$(dirname $python_exec) + cp "$bin_path/Library/bin/libjpeg.dll" torchvision + fi +else + + if [[ "$ARCH" == "aarch64" ]]; then + conda install libpng -yq + conda install -yq ffmpeg=4.2 libjpeg-turbo -c pytorch-nightly + fi + + conda install libwebp -yq + conda install libjpeg-turbo -c pytorch + yum install -y freetype gnutls + pip install auditwheel +fi + +pip install numpy pyyaml future ninja +pip install --upgrade setuptools==72.1.0 diff --git a/packaging/torchvision/bld.bat b/packaging/torchvision/bld.bat deleted file mode 100644 index 73f217c2cf1..00000000000 --- a/packaging/torchvision/bld.bat +++ /dev/null @@ -1,26 +0,0 @@ -@echo on - -set TORCHVISION_BUILD_VERSION=%PKG_VERSION% -set TORCHVISION_BUILD_NUMBER=%PKG_BUILDNUM% - -set build_with_cuda= - -if "%CUDA_VERSION%" == "None" goto cuda_flags_end -if "%CUDA_VERSION%" == "cpu" goto cuda_flags_end -if "%CUDA_VERSION%" == "" goto cuda_flags_end - -set build_with_cuda=1 -set desired_cuda=%CUDA_VERSION:~0,-1%.%CUDA_VERSION:~-1,1% - -set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%desired_cuda% -set CUDA_BIN_PATH=%CUDA_PATH%\bin -set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -if "%desired_cuda%" == "9.0" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50 -if "%desired_cuda%" == "9.2" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_61,code=sm_61 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50 -if "%desired_cuda%" == "10.0" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50 -if "%desired_cuda%" == "10.1" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50 - -:cuda_flags_end - -python setup.py install --single-version-externally-managed --record=record.txt -if errorlevel 1 exit /b 1 diff --git a/packaging/torchvision/conda_build_config.yaml b/packaging/torchvision/conda_build_config.yaml index 5188bb0ebec..a7c25c6d534 100644 --- a/packaging/torchvision/conda_build_config.yaml +++ b/packaging/torchvision/conda_build_config.yaml @@ -1,3 +1,5 @@ +channel_sources: + - pytorch-nightly,pytorch,defaults blas_impl: - mkl # [x86_64] c_compiler: @@ -5,8 +7,7 @@ c_compiler: cxx_compiler: - vs2017 # [win] python: - - 3.5 - - 3.6 + - 3.8 # This differs from target_platform in that it determines what subdir the compiler # will target, not what subdir the compiler package will be itself. # For example, we need a win-64 vs2008_win-32 package, so that we compile win-32 diff --git a/packaging/torchvision/meta.yaml b/packaging/torchvision/meta.yaml index da075ff03cb..a847328a77e 100644 --- a/packaging/torchvision/meta.yaml +++ b/packaging/torchvision/meta.yaml @@ -1,3 +1,4 @@ +{% set build_variant = environ.get('CONDA_BUILD_VARIANT', 'cpu') %} package: name: torchvision version: "{{ environ.get('BUILD_VERSION') }}" @@ -8,31 +9,49 @@ source: requirements: build: - {{ compiler('c') }} # [win] + - libpng + - libjpeg-turbo + - libwebp + - ffmpeg >=4.2.2, <5.0.0 # [linux] host: - python - setuptools - {{ environ.get('CONDA_PYTORCH_BUILD_CONSTRAINT') }} - {{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT') }} - {{ environ.get('CONDA_CPUONLY_FEATURE') }} + - pytorch-mutex 1.0 {{ build_variant }} # [not osx ] + {{ environ.get('CONDA_PYTORCH_BUILD_CONSTRAINT', 'pytorch') }} + {{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT', '') }} run: - python - - pillow >=4.1.1 - - numpy >=1.11 - - six - {{ environ.get('CONDA_PYTORCH_CONSTRAINT') }} - {{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT') }} + - defaults::numpy >=1.11 # [py <= 310] + - numpy >=1.23.5 # [py >= 311] + - requests + - libpng + - ffmpeg >=4.2.2, <5.0.0 # [linux] + - libjpeg-turbo + - libwebp + - pillow >=5.3.0, !=8.3.* + - pytorch-mutex 1.0 {{ build_variant }} # [not osx ] + {{ environ.get('CONDA_PYTORCH_CONSTRAINT', 'pytorch') }} + {{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT', '') }} + + {% if build_variant == 'cpu' %} + run_constrained: + - cpuonly + {% elif not osx %} + run_constrained: + - cpuonly <0 + {% endif %} build: string: py{{py}}_{{ environ['CU_VERSION'] }} - script: python setup.py install --single-version-externally-managed --record=record.txt # [not win] + script: python setup.py install --single-version-externally-managed --record=record.txt script_env: - CUDA_HOME - FORCE_CUDA - - NVCC_FLAGS - features: - {{ environ.get('CONDA_CPUONLY_FEATURE') }} + - BUILD_VERSION + - TORCH_CUDA_ARCH_LIST + - MACOSX_DEPLOYMENT_TARGET test: imports: @@ -44,12 +63,8 @@ test: requires: - pytest - scipy - - mock - - av + - libjpeg-turbo - ca-certificates - - typing - commands: - pytest . about: diff --git a/packaging/vs2017/activate.bat b/packaging/vs2017/activate.bat deleted file mode 100644 index ccecfc25442..00000000000 --- a/packaging/vs2017/activate.bat +++ /dev/null @@ -1,44 +0,0 @@ -:: Set env vars that tell distutils to use the compiler that we put on path -SET DISTUTILS_USE_SDK=1 -SET MSSdk=1 - -SET "VS_VERSION=15.0" -SET "VS_MAJOR=15" -SET "VS_YEAR=2017" - -set "MSYS2_ARG_CONV_EXCL=/AI;/AL;/OUT;/out" -set "MSYS2_ENV_CONV_EXCL=CL" - -:: For Python 3.5+, ensure that we link with the dynamic runtime. See -:: http://stevedower.id.au/blog/building-for-python-3-5-part-two/ for more info -set "PY_VCRUNTIME_REDIST=%PREFIX%\\bin\\vcruntime140.dll" - -for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do ( - if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( - set "VSINSTALLDIR=%%i\" - goto :vswhere - ) -) - -:vswhere - -:: Shorten PATH to avoid the `input line too long` error. -SET MyPath=%PATH% - -setlocal EnableDelayedExpansion - -SET TempPath="%MyPath:;=";"%" -SET var= -FOR %%a IN (%TempPath%) DO ( - IF EXIST %%~sa ( - SET "var=!var!;%%~sa" - ) -) - -set "TempPath=!var:~1!" -endlocal & set "PATH=%TempPath%" - -:: Shorten current directory too -FOR %%A IN (.) DO CD "%%~sA" - -:: other things added by install_activate.bat at package build time diff --git a/packaging/vs2017/conda_build_config.yaml b/packaging/vs2017/conda_build_config.yaml deleted file mode 100644 index 5188bb0ebec..00000000000 --- a/packaging/vs2017/conda_build_config.yaml +++ /dev/null @@ -1,24 +0,0 @@ -blas_impl: - - mkl # [x86_64] -c_compiler: - - vs2017 # [win] -cxx_compiler: - - vs2017 # [win] -python: - - 3.5 - - 3.6 -# This differs from target_platform in that it determines what subdir the compiler -# will target, not what subdir the compiler package will be itself. -# For example, we need a win-64 vs2008_win-32 package, so that we compile win-32 -# code on win-64 miniconda. -cross_compiler_target_platform: - - win-64 # [win] -target_platform: - - win-64 # [win] -vc: - - 14 -zip_keys: - - # [win] - - vc # [win] - - c_compiler # [win] - - cxx_compiler # [win] diff --git a/packaging/vs2017/install_activate.bat b/packaging/vs2017/install_activate.bat deleted file mode 100644 index de0e6ff3c52..00000000000 --- a/packaging/vs2017/install_activate.bat +++ /dev/null @@ -1,30 +0,0 @@ -set YEAR=2017 -set VER=15 - -mkdir "%PREFIX%\etc\conda\activate.d" -COPY "%RECIPE_DIR%\activate.bat" "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - -IF "%cross_compiler_target_platform%" == "win-64" ( - set "target_platform=amd64" - echo SET "CMAKE_GENERATOR=Visual Studio %VER% %YEAR% Win64" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - IF "%VSDEVCMD_ARGS%" == "" ( - echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x64 >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - ) ELSE ( - echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x64 %VSDEVCMD_ARGS% >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 %VSDEVCMD_ARGS% >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - ) - echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - ) else ( - set "target_platform=x86" - echo SET "CMAKE_GENERATOR=Visual Studio %VER% %YEAR%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo CALL "VC\Auxiliary\Build\vcvars32.bat" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo popd - ) - diff --git a/packaging/vs2017/install_runtime.bat b/packaging/vs2017/install_runtime.bat deleted file mode 100644 index 5163c16cf24..00000000000 --- a/packaging/vs2017/install_runtime.bat +++ /dev/null @@ -1,49 +0,0 @@ -set VC_PATH=x86 -if "%ARCH%"=="64" ( - set VC_PATH=x64 -) - -set MSC_VER=2017 - -rem :: This should always be present for VC installed with VS. Not sure about VC installed with Visual C++ Build Tools 2015 -rem FOR /F "usebackq tokens=3*" %%A IN (`REG QUERY "HKEY_LOCAL_MACHINE\Software\Microsoft\DevDiv\VC\Servicing\14.0\IDE.x64" /v UpdateVersion`) DO ( -rem set SP=%%A -rem ) - -rem if not "%SP%" == "%PKG_VERSION%" ( -rem echo "Version detected from registry: %SP%" -rem echo "does not match version of package being built (%PKG_VERSION%)" -rem echo "Do you have current updates for VS 2015 installed?" -rem exit 1 -rem ) - - -REM ========== REQUIRES Win 10 SDK be installed, or files otherwise copied to location below! -robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%" "%LIBRARY_BIN%" *.dll /E -robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%" "%PREFIX%" *.dll /E -if %ERRORLEVEL% GEQ 8 exit 1 - -REM ========== This one comes from visual studio 2017 -set "VC_VER=141" - -for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do ( - if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( - set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat" - goto :eof - ) -) - -@setlocal -call "%VS15VARSALL%" x64 - -set "REDIST_ROOT=%VCToolsRedistDir%%VC_PATH%" - -robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%LIBRARY_BIN%" *.dll /E -if %ERRORLEVEL% LSS 8 exit 0 -robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%PREFIX%" *.dll /E -if %ERRORLEVEL% LSS 8 exit 0 -robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%LIBRARY_BIN%" *.dll /E -if %ERRORLEVEL% LSS 8 exit 0 -robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%PREFIX%" *.dll /E -if %ERRORLEVEL% LSS 8 exit 0 -@endlocal diff --git a/packaging/vs2017/meta.yaml b/packaging/vs2017/meta.yaml deleted file mode 100644 index 34f4860ba85..00000000000 --- a/packaging/vs2017/meta.yaml +++ /dev/null @@ -1,45 +0,0 @@ -{% set vcver="14.1" %} -{% set vcfeature="14" %} -{% set vsyear="2017" %} -{% set fullver="15.4.27004.2010" %} - -package: - name: vs{{ vsyear }} - version: {{ fullver }} - -build: - skip: True [not win] - script_env: - - VSDEVCMD_ARGS # [win] - -outputs: - - name: vs{{ vsyear }}_{{ cross_compiler_target_platform }} - script: install_activate.bat - track_features: - # VS 2017 is binary-compatible with VS 2015/vc14. Tools are "v141". - strong: - - vc{{ vcfeature }} - run_exports: - - vc {{ vcver }} - about: - summary: Activation and version verification of MSVC {{ vcver }} (VS {{ vsyear }}) compiler - license: BSD 3-clause - - name: vs{{ vsyear }}_runtime - script: install_runtime.bat - - name: vc - version: {{ vcver }} - track_features: - - vc{{ vcfeature }} - requirements: - run: - - {{ pin_subpackage('vs' ~ vsyear ~ '_runtime') }} - about: - home: https://github.com/conda/conda/wiki/VC-features - license: Modified BSD License (3-clause) - license_family: BSD - summary: A meta-package to track VC features. - description: | - This metapackage is used to activate vc features without - depending on Python. - doc_url: https://github.com/conda/conda/wiki/VC-features - dev_url: https://github.com/conda/conda/wiki/VC-features diff --git a/packaging/vs2019/conda_build_config.yaml b/packaging/vs2019/conda_build_config.yaml index 358052ec012..b4dc99341d0 100644 --- a/packaging/vs2019/conda_build_config.yaml +++ b/packaging/vs2019/conda_build_config.yaml @@ -5,8 +5,7 @@ c_compiler: cxx_compiler: - vs2019 # [win] python: - - 3.5 - - 3.6 + - 3.8 # This differs from target_platform in that it determines what subdir the compiler # will target, not what subdir the compiler package will be itself. # For example, we need a win-64 vs2008_win-32 package, so that we compile win-32 diff --git a/packaging/vs2019/install_activate.bat b/packaging/vs2019/install_activate.bat index 3c38253aa5d..9e60ccfd2dc 100644 --- a/packaging/vs2019/install_activate.bat +++ b/packaging/vs2019/install_activate.bat @@ -27,4 +27,3 @@ IF "%cross_compiler_target_platform%" == "win-64" ( echo CALL "VC\Auxiliary\Build\vcvars32.bat" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" echo popd ) - diff --git a/packaging/vs2019/install_runtime.bat b/packaging/vs2019/install_runtime.bat deleted file mode 100644 index e09a5ccfb0f..00000000000 --- a/packaging/vs2019/install_runtime.bat +++ /dev/null @@ -1,49 +0,0 @@ -set VC_PATH=x86 -if "%ARCH%"=="64" ( - set VC_PATH=x64 -) - -set MSC_VER=2019 - -rem :: This should always be present for VC installed with VS. Not sure about VC installed with Visual C++ Build Tools 2015 -rem FOR /F "usebackq tokens=3*" %%A IN (`REG QUERY "HKEY_LOCAL_MACHINE\Software\Microsoft\DevDiv\VC\Servicing\14.0\IDE.x64" /v UpdateVersion`) DO ( -rem set SP=%%A -rem ) - -rem if not "%SP%" == "%PKG_VERSION%" ( -rem echo "Version detected from registry: %SP%" -rem echo "does not match version of package being built (%PKG_VERSION%)" -rem echo "Do you have current updates for VS 2015 installed?" -rem exit 1 -rem ) - - -REM ========== REQUIRES Win 10 SDK be installed, or files otherwise copied to location below! -robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%" "%LIBRARY_BIN%" *.dll /E -robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%" "%PREFIX%" *.dll /E -if %ERRORLEVEL% GEQ 8 exit 1 - -REM ========== This one comes from visual studio 2019 -set "VC_VER=142" - -for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [16^,17^) -property installationPath`) do ( - if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( - set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat" - goto :eof - ) -) - -@setlocal -call "%VS15VARSALL%" x64 - -set "REDIST_ROOT=%VCToolsRedistDir%%VC_PATH%" - -robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%LIBRARY_BIN%" *.dll /E -if %ERRORLEVEL% LSS 8 exit 0 -robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%PREFIX%" *.dll /E -if %ERRORLEVEL% LSS 8 exit 0 -robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%LIBRARY_BIN%" *.dll /E -if %ERRORLEVEL% LSS 8 exit 0 -robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%PREFIX%" *.dll /E -if %ERRORLEVEL% LSS 8 exit 0 -@endlocal diff --git a/packaging/vs2019/meta.yaml b/packaging/vs2019/meta.yaml index e3f8b471481..94a0ed4db3e 100644 --- a/packaging/vs2019/meta.yaml +++ b/packaging/vs2019/meta.yaml @@ -19,27 +19,6 @@ outputs: # VS 2019 is binary-compatible with VS 2017/vc 14.1 and 2015/vc14. Tools are "v142". strong: - vc{{ vcfeature }} - run_exports: - - vc {{ vcver }} about: summary: Activation and version verification of MSVC {{ vcver }} (VS {{ vsyear }}) compiler license: BSD 3-clause - - name: vs{{ vsyear }}_runtime - script: install_runtime.bat - - name: vc - version: {{ vcver }} - track_features: - - vc{{ vcfeature }} - requirements: - run: - - {{ pin_subpackage('vs' ~ vsyear ~ '_runtime') }} - about: - home: https://github.com/conda/conda/wiki/VC-features - license: Modified BSD License (3-clause) - license_family: BSD - summary: A meta-package to track VC features. - description: | - This metapackage is used to activate vc features without - depending on Python. - doc_url: https://github.com/conda/conda/wiki/VC-features - dev_url: https://github.com/conda/conda/wiki/VC-features diff --git a/packaging/wheel/linux_manywheel.sh b/packaging/wheel/linux_manywheel.sh deleted file mode 100644 index d04e334d237..00000000000 --- a/packaging/wheel/linux_manywheel.sh +++ /dev/null @@ -1,62 +0,0 @@ -#!/bin/bash -set -ex - -if [ "$#" -ne 1 ]; then - echo "Illegal number of parameters. Pass cuda version" - echo "CUDA version should be cu92, cu100 or cpu" - exit 1 -fi -export CUVER="$1" # cu92 cu100 cpu - -if [[ "$CUVER" == "cu101" ]]; then - cu_suffix="" -else - cu_suffix="+$CUVER" -fi - -export TORCHVISION_BUILD_VERSION="0.4.0.dev$(date "+%Y%m%d")${cu_suffix}" -export TORCHVISION_BUILD_NUMBER="1" -export TORCHVISION_LOCAL_VERSION_LABEL="$CUVER" -export OUT_DIR="/remote/$CUVER" - -pushd /opt/python -DESIRED_PYTHON=(*/) -popd -for desired_py in "${DESIRED_PYTHON[@]}"; do - python_installations+=("/opt/python/$desired_py") -done - -OLD_PATH=$PATH -cd /tmp -rm -rf vision -git clone https://github.com/pytorch/vision - -cd /tmp/vision - -for PYDIR in "${python_installations[@]}"; do - export PATH=$PYDIR/bin:$OLD_PATH - pip install --upgrade pip - pip install numpy pyyaml future - - pip uninstall -y torch || true - pip uninstall -y torch_nightly || true - - export TORCHVISION_PYTORCH_DEPENDENCY_NAME=torch_nightly - pip install torch_nightly -f https://download.pytorch.org/whl/nightly/$CUVER/torch_nightly.html - # CPU/CUDA variants of PyTorch have ABI compatible PyTorch for - # the CPU only bits. Therefore, we - # strip off the local package qualifier, but ONLY if we're - # doing a CPU build. - if [[ "$CUVER" == "cpu" ]]; then - export TORCHVISION_PYTORCH_DEPENDENCY_VERSION="$(pip show torch_nightly | grep ^Version: | sed 's/Version: \+//' | sed 's/+.\+//')" - else - export TORCHVISION_PYTORCH_DEPENDENCY_VERSION="$(pip show torch_nightly | grep ^Version: | sed 's/Version: \+//')" - fi - echo "Building against ${TORCHVISION_PYTORCH_DEPENDENCY_VERSION}" - - pip install ninja - python setup.py clean - python setup.py bdist_wheel - mkdir -p $OUT_DIR - cp dist/*.whl $OUT_DIR/ -done diff --git a/packaging/wheel/osx_wheel.sh b/packaging/wheel/osx_wheel.sh deleted file mode 100644 index 900485d3199..00000000000 --- a/packaging/wheel/osx_wheel.sh +++ /dev/null @@ -1,52 +0,0 @@ -if [[ ":$PATH:" == *"conda"* ]]; then - echo "existing anaconda install in PATH, remove it and run script" - exit 1 -fi -# download and activate anaconda -rm -rf ~/minconda_wheel_env_tmp -wget -q https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh && \ - chmod +x Miniconda3-latest-MacOSX-x86_64.sh && \ - ./Miniconda3-latest-MacOSX-x86_64.sh -b -p ~/minconda_wheel_env_tmp && \ - rm Miniconda3-latest-MacOSX-x86_64.sh - -. ~/minconda_wheel_env_tmp/bin/activate - - -export TORCHVISION_BUILD_VERSION="0.4.0.dev$(date "+%Y%m%d")" -export TORCHVISION_BUILD_NUMBER="1" -export OUT_DIR=~/torchvision_wheels - -export MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ - -pushd /tmp -rm -rf vision -git clone https://github.com/pytorch/vision -pushd vision - -desired_pythons=( "2.7" "3.5" "3.6" "3.7" ) -# for each python -for desired_python in "${desired_pythons[@]}" -do - # create and activate python env - env_name="env$desired_python" - conda create -yn $env_name python="$desired_python" - conda activate $env_name - - pip uninstall -y torch || true - pip uninstall -y torch_nightly || true - - export TORCHVISION_PYTORCH_DEPENDENCY_NAME=torch_nightly - pip install torch_nightly -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html - export TORCHVISION_PYTORCH_DEPENDENCY_VERSION="$(pip show torch_nightly | grep ^Version: | sed 's/Version: *//')" - echo "Building against ${TORCHAUDIO_PYTORCH_DEPENDENCY_VERSION}" - - # install torchvision dependencies - pip install ninja scipy pytest - - python setup.py clean - python setup.py bdist_wheel - mkdir -p $OUT_DIR - cp dist/*.whl $OUT_DIR/ -done -popd -popd diff --git a/packaging/wheel/relocate.py b/packaging/wheel/relocate.py new file mode 100644 index 00000000000..fb110abd873 --- /dev/null +++ b/packaging/wheel/relocate.py @@ -0,0 +1,380 @@ +"""Helper script to package wheels and relocate binaries.""" + +import glob +import hashlib + +# Standard library imports +import os +import os.path as osp +import platform +import shutil +import subprocess +import sys +import zipfile +from base64 import urlsafe_b64encode + +# Third party imports +if sys.platform == "linux": + from auditwheel.lddtree import lddtree + + +ALLOWLIST = { + "libgcc_s.so.1", + "libstdc++.so.6", + "libm.so.6", + "libdl.so.2", + "librt.so.1", + "libc.so.6", + "libnsl.so.1", + "libutil.so.1", + "libpthread.so.0", + "libresolv.so.2", + "libX11.so.6", + "libXext.so.6", + "libXrender.so.1", + "libICE.so.6", + "libSM.so.6", + "libGL.so.1", + "libgobject-2.0.so.0", + "libgthread-2.0.so.0", + "libglib-2.0.so.0", + "ld-linux-x86-64.so.2", + "ld-2.17.so", +} + +WINDOWS_ALLOWLIST = { + "MSVCP140.dll", + "KERNEL32.dll", + "VCRUNTIME140_1.dll", + "VCRUNTIME140.dll", + "api-ms-win-crt-heap-l1-1-0.dll", + "api-ms-win-crt-runtime-l1-1-0.dll", + "api-ms-win-crt-stdio-l1-1-0.dll", + "api-ms-win-crt-filesystem-l1-1-0.dll", + "api-ms-win-crt-string-l1-1-0.dll", + "api-ms-win-crt-environment-l1-1-0.dll", + "api-ms-win-crt-math-l1-1-0.dll", + "api-ms-win-crt-convert-l1-1-0.dll", +} + + +HERE = osp.dirname(osp.abspath(__file__)) +PACKAGE_ROOT = osp.dirname(osp.dirname(HERE)) +PLATFORM_ARCH = platform.machine() +PYTHON_VERSION = sys.version_info + + +def rehash(path, blocksize=1 << 20): + """Return (hash, length) for path using hashlib.sha256()""" + h = hashlib.sha256() + length = 0 + with open(path, "rb") as f: + while block := f.read(blocksize): + length += len(block) + h.update(block) + digest = "sha256=" + urlsafe_b64encode(h.digest()).decode("latin1").rstrip("=") + # unicode/str python2 issues + return (digest, str(length)) # type: ignore + + +def unzip_file(file, dest): + """Decompress zip `file` into directory `dest`.""" + with zipfile.ZipFile(file, "r") as zip_ref: + zip_ref.extractall(dest) + + +def is_program_installed(basename): + """ + Return program absolute path if installed in PATH. + Otherwise, return None + On macOS systems, a .app is considered installed if + it exists. + """ + if sys.platform == "darwin" and basename.endswith(".app") and osp.exists(basename): + return basename + + for path in os.environ["PATH"].split(os.pathsep): + abspath = osp.join(path, basename) + if osp.isfile(abspath): + return abspath + + +def find_program(basename): + """ + Find program in PATH and return absolute path + Try adding .exe or .bat to basename on Windows platforms + (return None if not found) + """ + names = [basename] + if os.name == "nt": + # Windows platforms + extensions = (".exe", ".bat", ".cmd", ".dll") + if not basename.endswith(extensions): + names = [basename + ext for ext in extensions] + [basename] + for name in names: + path = is_program_installed(name) + if path: + return path + + +def patch_new_path(library_path, new_dir): + library = osp.basename(library_path) + name, *rest = library.split(".") + rest = ".".join(rest) + hash_id = hashlib.sha256(library_path.encode("utf-8")).hexdigest()[:8] + new_name = ".".join([name, hash_id, rest]) + return osp.join(new_dir, new_name) + + +def find_dll_dependencies(dumpbin, binary): + out = subprocess.run([dumpbin, "/dependents", binary], stdout=subprocess.PIPE) + out = out.stdout.strip().decode("utf-8") + start_index = out.find("dependencies:") + len("dependencies:") + end_index = out.find("Summary") + dlls = out[start_index:end_index].strip() + dlls = dlls.split(os.linesep) + dlls = [dll.strip() for dll in dlls] + return dlls + + +def relocate_elf_library(patchelf, output_dir, output_library, binary): + """ + Relocate an ELF shared library to be packaged on a wheel. + + Given a shared library, find the transitive closure of its dependencies, + rename and copy them into the wheel while updating their respective rpaths. + """ + + print(f"Relocating {binary}") + binary_path = osp.join(output_library, binary) + + ld_tree = lddtree(binary_path) + tree_libs = ld_tree["libs"] + + binary_queue = [(n, binary) for n in ld_tree["needed"]] + binary_paths = {binary: binary_path} + binary_dependencies = {} + + while binary_queue != []: + library, parent = binary_queue.pop(0) + library_info = tree_libs[library] + print(library) + + if library_info["path"] is None: + print(f"Omitting {library}") + continue + + if library in ALLOWLIST: + # Omit glibc/gcc/system libraries + print(f"Omitting {library}") + continue + + parent_dependencies = binary_dependencies.get(parent, []) + parent_dependencies.append(library) + binary_dependencies[parent] = parent_dependencies + + if library in binary_paths: + continue + + binary_paths[library] = library_info["path"] + binary_queue += [(n, library) for n in library_info["needed"]] + + print("Copying dependencies to wheel directory") + new_libraries_path = osp.join(output_dir, "torchvision.libs") + os.makedirs(new_libraries_path, exist_ok=True) + + new_names = {binary: binary_path} + + for library in binary_paths: + if library != binary: + library_path = binary_paths[library] + new_library_path = patch_new_path(library_path, new_libraries_path) + print(f"{library} -> {new_library_path}") + shutil.copyfile(library_path, new_library_path) + new_names[library] = new_library_path + + print("Updating dependency names by new files") + for library in binary_paths: + if library != binary: + if library not in binary_dependencies: + continue + library_dependencies = binary_dependencies[library] + new_library_name = new_names[library] + for dep in library_dependencies: + new_dep = osp.basename(new_names[dep]) + print(f"{library}: {dep} -> {new_dep}") + subprocess.check_output( + [patchelf, "--replace-needed", dep, new_dep, new_library_name], cwd=new_libraries_path + ) + + print("Updating library rpath") + subprocess.check_output([patchelf, "--set-rpath", "$ORIGIN", new_library_name], cwd=new_libraries_path) + + subprocess.check_output([patchelf, "--print-rpath", new_library_name], cwd=new_libraries_path) + + print("Update library dependencies") + library_dependencies = binary_dependencies[binary] + for dep in library_dependencies: + new_dep = osp.basename(new_names[dep]) + print(f"{binary}: {dep} -> {new_dep}") + subprocess.check_output([patchelf, "--replace-needed", dep, new_dep, binary], cwd=output_library) + + print("Update library rpath") + subprocess.check_output( + [patchelf, "--set-rpath", "$ORIGIN:$ORIGIN/../torchvision.libs", binary_path], cwd=output_library + ) + + +def relocate_dll_library(dumpbin, output_dir, output_library, binary): + """ + Relocate a DLL/PE shared library to be packaged on a wheel. + + Given a shared library, find the transitive closure of its dependencies, + rename and copy them into the wheel. + """ + print(f"Relocating {binary}") + binary_path = osp.join(output_library, binary) + + library_dlls = find_dll_dependencies(dumpbin, binary_path) + binary_queue = [(dll, binary) for dll in library_dlls] + binary_paths = {binary: binary_path} + binary_dependencies = {} + + while binary_queue != []: + library, parent = binary_queue.pop(0) + if library in WINDOWS_ALLOWLIST or library.startswith("api-ms-win"): + print(f"Omitting {library}") + continue + + library_path = find_program(library) + if library_path is None: + print(f"{library} not found") + continue + + if osp.basename(osp.dirname(library_path)) == "system32": + continue + + print(f"{library}: {library_path}") + parent_dependencies = binary_dependencies.get(parent, []) + parent_dependencies.append(library) + binary_dependencies[parent] = parent_dependencies + + if library in binary_paths: + continue + + binary_paths[library] = library_path + downstream_dlls = find_dll_dependencies(dumpbin, library_path) + binary_queue += [(n, library) for n in downstream_dlls] + + print("Copying dependencies to wheel directory") + package_dir = osp.join(output_dir, "torchvision") + for library in binary_paths: + if library != binary: + library_path = binary_paths[library] + new_library_path = osp.join(package_dir, library) + print(f"{library} -> {new_library_path}") + shutil.copyfile(library_path, new_library_path) + + +def compress_wheel(output_dir, wheel, wheel_dir, wheel_name): + """Create RECORD file and compress wheel distribution.""" + print("Update RECORD file in wheel") + dist_info = glob.glob(osp.join(output_dir, "*.dist-info"))[0] + record_file = osp.join(dist_info, "RECORD") + + with open(record_file, "w") as f: + for root, _, files in os.walk(output_dir): + for this_file in files: + full_file = osp.join(root, this_file) + rel_file = osp.relpath(full_file, output_dir) + if full_file == record_file: + f.write(f"{rel_file},,\n") + else: + digest, size = rehash(full_file) + f.write(f"{rel_file},{digest},{size}\n") + + print("Compressing wheel") + base_wheel_name = osp.join(wheel_dir, wheel_name) + shutil.make_archive(base_wheel_name, "zip", output_dir) + os.remove(wheel) + shutil.move(f"{base_wheel_name}.zip", wheel) + shutil.rmtree(output_dir) + + +def patch_linux(): + # Get patchelf location + patchelf = find_program("patchelf") + if patchelf is None: + raise FileNotFoundError("Patchelf was not found in the system, please make sure that is available on the PATH.") + + # Find wheel + print("Finding wheels...") + wheels = glob.glob(osp.join(PACKAGE_ROOT, "dist", "*.whl")) + output_dir = osp.join(PACKAGE_ROOT, "dist", ".wheel-process") + + image_binary = "image.so" + video_binary = "video_reader.so" + torchvision_binaries = [image_binary, video_binary] + for wheel in wheels: + if osp.exists(output_dir): + shutil.rmtree(output_dir) + + os.makedirs(output_dir) + + print("Unzipping wheel...") + wheel_file = osp.basename(wheel) + wheel_dir = osp.dirname(wheel) + print(f"{wheel_file}") + wheel_name, _ = osp.splitext(wheel_file) + unzip_file(wheel, output_dir) + + print("Finding ELF dependencies...") + output_library = osp.join(output_dir, "torchvision") + for binary in torchvision_binaries: + if osp.exists(osp.join(output_library, binary)): + relocate_elf_library(patchelf, output_dir, output_library, binary) + + compress_wheel(output_dir, wheel, wheel_dir, wheel_name) + + +def patch_win(): + # Get dumpbin location + dumpbin = find_program("dumpbin") + if dumpbin is None: + raise FileNotFoundError("Dumpbin was not found in the system, please make sure that is available on the PATH.") + + # Find wheel + print("Finding wheels...") + wheels = glob.glob(osp.join(PACKAGE_ROOT, "dist", "*.whl")) + output_dir = osp.join(PACKAGE_ROOT, "dist", ".wheel-process") + + image_binary = "image.pyd" + video_binary = "video_reader.pyd" + torchvision_binaries = [image_binary, video_binary] + for wheel in wheels: + if osp.exists(output_dir): + shutil.rmtree(output_dir) + + os.makedirs(output_dir) + + print("Unzipping wheel...") + wheel_file = osp.basename(wheel) + wheel_dir = osp.dirname(wheel) + print(f"{wheel_file}") + wheel_name, _ = osp.splitext(wheel_file) + unzip_file(wheel, output_dir) + + print("Finding DLL/PE dependencies...") + output_library = osp.join(output_dir, "torchvision") + for binary in torchvision_binaries: + if osp.exists(osp.join(output_library, binary)): + relocate_dll_library(dumpbin, output_dir, output_library, binary) + + compress_wheel(output_dir, wheel, wheel_dir, wheel_name) + + +if __name__ == "__main__": + if sys.platform == "linux": + patch_linux() + elif sys.platform == "win32": + patch_win() diff --git a/packaging/windows/azure-pipelines-ci.yml b/packaging/windows/azure-pipelines-ci.yml deleted file mode 100644 index 6f9f3468cfe..00000000000 --- a/packaging/windows/azure-pipelines-ci.yml +++ /dev/null @@ -1,11 +0,0 @@ - -# Turn off auto builds for commits -trigger: none -pr: none - -jobs: -- template: templates/build_task.yml - parameters: - package: 'Wheels' - spec: 'CPU' - msagent: true diff --git a/packaging/windows/azure-pipelines.yml b/packaging/windows/azure-pipelines.yml deleted file mode 100644 index d0240570012..00000000000 --- a/packaging/windows/azure-pipelines.yml +++ /dev/null @@ -1,35 +0,0 @@ - -# Turn off auto builds for commits -trigger: none -pr: none - -jobs: -- template: templates/auth_task.yml - -- template: templates/build_task.yml - parameters: - package: 'Wheels' - spec: 'CPU' - msagent: true - -- template: templates/build_task.yml - parameters: - package: 'Conda' - spec: 'CPU' - msagent: true - -- template: templates/build_task.yml - parameters: - package: 'Wheels' - spec: 'CUDA' - msagent: true - -- template: templates/build_task.yml - parameters: - package: 'Conda' - spec: 'CUDA' - msagent: true - -- template: templates/linux_build_task.yml - parameters: - msagent: $(ms.hosted.agent.cpu) diff --git a/packaging/windows/build_vision.bat b/packaging/windows/build_vision.bat deleted file mode 100644 index 995c43905cb..00000000000 --- a/packaging/windows/build_vision.bat +++ /dev/null @@ -1,145 +0,0 @@ -@echo off - -:: This script parses args, installs required libraries (miniconda, MKL, -:: Magma), and then delegates to cpu.bat, cuda80.bat, etc. - -IF NOT "%CUDA_VERSION%" == "" IF NOT "%TORCHVISION_BUILD_VERSION%" == "" if NOT "%TORCHVISION_BUILD_NUMBER%" == "" goto env_end -if "%~1"=="" goto arg_error -if "%~2"=="" goto arg_error -if "%~3"=="" goto arg_error -if NOT "%~4"=="" goto arg_error -goto arg_end - -:arg_error - -echo Illegal number of parameters. Pass cuda version, pytorch version, build number -echo CUDA version should be Mm with no dot, e.g. '80' -echo DESIRED_PYTHON should be M.m, e.g. '2.7' -exit /b 1 - -:arg_end - -set CUDA_VERSION=%~1 -set TORCHVISION_BUILD_VERSION=%~2 -set TORCHVISION_BUILD_NUMBER=%~3 - -set BUILD_VERSION=%TORCHVISION_BUILD_VERSION% - -:env_end - -if NOT "%CUDA_VERSION%" == "cpu" ( - set CUDA_PREFIX=cuda%CUDA_VERSION% - set CUVER=cu%CUDA_VERSION% - set FORCE_CUDA=1 -) else ( - set CUDA_PREFIX=cpu - set CUVER=cpu -) - -set BUILD_VISION=1 -REM set TORCH_WHEEL=torch -f https://download.pytorch.org/whl/%CUVER%/stable.html --no-index - -IF "%DESIRED_PYTHON%" == "" set DESIRED_PYTHON=3.5;3.6;3.7 -set DESIRED_PYTHON_PREFIX=%DESIRED_PYTHON:.=% -set DESIRED_PYTHON_PREFIX=py%DESIRED_PYTHON_PREFIX:;=;py% - -set SRC_DIR=%~dp0 -pushd %SRC_DIR% - -:: Install Miniconda3 -set "CONDA_HOME=%CD%\conda" -set "tmp_conda=%CONDA_HOME%" -set "miniconda_exe=%CD%\miniconda.exe" -rmdir /s /q conda -del miniconda.exe -curl -k https://repo.continuum.io/miniconda/Miniconda3-latest-Windows-x86_64.exe -o "%miniconda_exe%" -call ..\conda\install_conda.bat -IF ERRORLEVEL 1 exit /b 1 -set "ORIG_PATH=%PATH%" -set "PATH=%CONDA_HOME%;%CONDA_HOME%\scripts;%CONDA_HOME%\Library\bin;%PATH%" - -:: Create a new conda environment -setlocal EnableDelayedExpansion -FOR %%v IN (%DESIRED_PYTHON%) DO ( - set PYTHON_VERSION_STR=%%v - set PYTHON_VERSION_STR=!PYTHON_VERSION_STR:.=! - conda remove -n py!PYTHON_VERSION_STR! --all -y || rmdir %CONDA_HOME%\envs\py!PYTHON_VERSION_STR! /s - conda create -n py!PYTHON_VERSION_STR! -y -q -c defaults -c conda-forge numpy>=1.11 mkl>=2018 python=%%v ca-certificates scipy av -) - -:: Uncomment for stable releases -:: FOR %%v IN (%DESIRED_PYTHON%) DO ( -:: set PYTHON_VERSION_STR=%%v -:: set PYTHON_VERSION_STR=!PYTHON_VERSION_STR:.=! -:: set "PATH=%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!;%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!\scripts;%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!\Library\bin;%ORIG_PATH%" - -:: if "%CUDA_VERSION%" == "100" ( -:: set TORCH_WHEEL=https://download.pytorch.org/whl/%CUVER%/torch-1.2.0-cp!PYTHON_VERSION_STR!-cp!PYTHON_VERSION_STR!m-win_amd64.whl -:: ) else ( -:: set TORCH_WHEEL=https://download.pytorch.org/whl/%CUVER%/torch-1.2.0%%2B%CUVER%-cp!PYTHON_VERSION_STR!-cp!PYTHON_VERSION_STR!m-win_amd64.whl -:: ) -:: echo Installing !TORCH_WHEEL!... -:: pip install "!TORCH_WHEEL!" -:: ) - -:: Uncomment for nightly releases -FOR %%v IN (%DESIRED_PYTHON%) DO ( - set PYTHON_VERSION_STR=%%v - set PYTHON_VERSION_STR=!PYTHON_VERSION_STR:.=! - set "PATH=%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!;%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!\scripts;%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!\Library\bin;%ORIG_PATH%" - - set TORCH_WHEEL=torch --pre -f https://download.pytorch.org/whl/nightly/%CUVER%/torch_nightly.html - echo Installing !TORCH_WHEEL!... - pip install !TORCH_WHEEL! -) - -endlocal - -if "%DEBUG%" == "1" ( - set BUILD_TYPE=debug -) ELSE ( - set BUILD_TYPE=release -) - -:: Install sccache -if "%USE_SCCACHE%" == "1" ( - mkdir %CD%\tmp_bin - curl -k https://s3.amazonaws.com/ossci-windows/sccache.exe --output %CD%\tmp_bin\sccache.exe - if not "%CUDA_VERSION%" == "" ( - copy %CD%\tmp_bin\sccache.exe %CD%\tmp_bin\nvcc.exe - - set CUDA_NVCC_EXECUTABLE=%CD%\tmp_bin\nvcc - set "PATH=%CD%\tmp_bin;%PATH%" - ) -) - -for %%v in (%DESIRED_PYTHON_PREFIX%) do ( - :: Activate Python Environment - set PYTHON_PREFIX=%%v - set "PATH=%CONDA_HOME%\envs\%%v;%CONDA_HOME%\envs\%%v\scripts;%CONDA_HOME%\envs\%%v\Library\bin;%ORIG_PATH%" - if defined INCLUDE ( - set "INCLUDE=%INCLUDE%;%CONDA_HOME%\envs\%%v\Library\include" - ) else ( - set "INCLUDE=%CONDA_HOME%\envs\%%v\Library\include" - ) - if defined LIB ( - set "LIB=%LIB%;%CONDA_HOME%\envs\%%v\Library\lib" - ) else ( - set "LIB=%CONDA_HOME%\envs\%%v\Library\lib" - ) - @setlocal - :: Set Flags - if NOT "%CUDA_VERSION%"=="cpu" ( - set CUDNN_VERSION=7 - ) - call %CUDA_PREFIX%.bat - IF ERRORLEVEL 1 exit /b 1 - call internal\test.bat - IF ERRORLEVEL 1 exit /b 1 - @endlocal -) - -set "PATH=%ORIG_PATH%" -popd - -IF ERRORLEVEL 1 exit /b 1 diff --git a/packaging/windows/cpu.bat b/packaging/windows/cpu.bat deleted file mode 100644 index 392a687f9dc..00000000000 --- a/packaging/windows/cpu.bat +++ /dev/null @@ -1,37 +0,0 @@ -@echo off - -IF NOT "%BUILD_VISION%" == "" ( - set MODULE_NAME=vision -) ELSE ( - set MODULE_NAME=pytorch -) - -IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" ( - call internal\clone.bat - cd .. - IF ERRORLEVEL 1 goto eof -) ELSE ( - call internal\clean.bat -) - -call internal\check_deps.bat -IF ERRORLEVEL 1 goto eof - -REM Check for optional components - -echo Disabling CUDA -set NO_CUDA=1 -set USE_CUDA=0 - -IF "%BUILD_VISION%" == "" ( - call internal\check_opts.bat - IF ERRORLEVEL 1 goto eof - - call internal\copy_cpu.bat - IF ERRORLEVEL 1 goto eof -) - -call internal\setup.bat -IF ERRORLEVEL 1 goto eof - -:eof diff --git a/packaging/windows/cuda101.bat b/packaging/windows/cuda101.bat deleted file mode 100644 index db397d593c8..00000000000 --- a/packaging/windows/cuda101.bat +++ /dev/null @@ -1,59 +0,0 @@ -@echo off - -IF NOT "%BUILD_VISION%" == "" ( - set MODULE_NAME=vision -) ELSE ( - set MODULE_NAME=pytorch -) - -IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" ( - call internal\clone.bat - cd .. - IF ERRORLEVEL 1 goto eof -) ELSE ( - call internal\clean.bat -) - -call internal\check_deps.bat -IF ERRORLEVEL 1 goto eof - -REM Check for optional components - -set NO_CUDA= -set CMAKE_GENERATOR=Visual Studio 15 2017 Win64 - -IF "%NVTOOLSEXT_PATH%"=="" ( - echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing - exit /b 1 - goto optcheck -) - -IF "%CUDA_PATH_V10_1%"=="" ( - echo CUDA 10.1 not found, failing - exit /b 1 -) ELSE ( - IF "%BUILD_VISION%" == "" ( - set TORCH_CUDA_ARCH_LIST=3.5;5.0+PTX;6.0;6.1;7.0;7.5 - set TORCH_NVCC_FLAGS=-Xfatbin -compress-all - ) ELSE ( - set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50 - ) - - set "CUDA_PATH=%CUDA_PATH_V10_1%" - set "PATH=%CUDA_PATH_V10_1%\bin;%PATH%" -) - -:optcheck - -IF "%BUILD_VISION%" == "" ( - call internal\check_opts.bat - IF ERRORLEVEL 1 goto eof - - call internal\copy.bat - IF ERRORLEVEL 1 goto eof -) - -call internal\setup.bat -IF ERRORLEVEL 1 goto eof - -:eof diff --git a/packaging/windows/cuda92.bat b/packaging/windows/cuda92.bat deleted file mode 100644 index 0bfcdc8e463..00000000000 --- a/packaging/windows/cuda92.bat +++ /dev/null @@ -1,59 +0,0 @@ -@echo off - -IF NOT "%BUILD_VISION%" == "" ( - set MODULE_NAME=vision -) ELSE ( - set MODULE_NAME=pytorch -) - -IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" ( - call internal\clone.bat - cd .. - IF ERRORLEVEL 1 goto eof -) ELSE ( - call internal\clean.bat -) - -call internal\check_deps.bat -IF ERRORLEVEL 1 goto eof - -REM Check for optional components - -set USE_CUDA= -set CMAKE_GENERATOR=Visual Studio 15 2017 Win64 - -IF "%NVTOOLSEXT_PATH%"=="" ( - echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing - exit /b 1 - goto optcheck -) - -IF "%CUDA_PATH_V9_2%"=="" ( - echo CUDA 9.2 not found, failing - exit /b 1 -) ELSE ( - IF "%BUILD_VISION%" == "" ( - set TORCH_CUDA_ARCH_LIST=3.5;5.0+PTX;6.0;6.1;7.0 - set TORCH_NVCC_FLAGS=-Xfatbin -compress-all - ) ELSE ( - set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_61,code=sm_61 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50 - ) - - set "CUDA_PATH=%CUDA_PATH_V9_2%" - set "PATH=%CUDA_PATH_V9_2%\bin;%PATH%" -) - -:optcheck - -IF "%BUILD_VISION%" == "" ( - call internal\check_opts.bat - IF ERRORLEVEL 1 goto eof - - call internal\copy.bat - IF ERRORLEVEL 1 goto eof -) - -call internal\setup.bat -IF ERRORLEVEL 1 goto eof - -:eof diff --git a/packaging/windows/internal/auth.bat b/packaging/windows/internal/auth.bat deleted file mode 100644 index c874bce493c..00000000000 --- a/packaging/windows/internal/auth.bat +++ /dev/null @@ -1,46 +0,0 @@ -@echo off - -: From the following doc, the build won't be triggered if the users don't sign in daily. -: https://docs.microsoft.com/en-us/azure/devops/pipelines/build/triggers?tabs=yaml&view=vsts#my-build-didnt-run-what-happened -: To avoid this problem, we can just go through the sign in process using the following command. - -:auth_start - -if "%RETRY_TIMES%" == "" ( - set /a RETRY_TIMES=10 - set /a SLEEP_TIME=2 -) else ( - set /a RETRY_TIMES=%RETRY_TIMES%-1 - set /a SLEEP_TIME=%SLEEP_TIME%*2 -) - -for /f "usebackq tokens=*" %%i in (`curl -so NUL -w "%%{http_code}" -u %VSTS_AUTH% https://dev.azure.com/pytorch`) do ( - set STATUS_CODE=%%i -) - -IF NOT "%STATUS_CODE%" == "200" ( - echo Auth retry times remaining: %RETRY_TIMES% - echo Sleep time: %SLEEP_TIME% seconds - IF %RETRY_TIMES% EQU 0 ( - echo Auth failed - goto err - ) - waitfor SomethingThatIsNeverHappening /t %SLEEP_TIME% 2>nul || ver >nul - goto auth_start -) ELSE ( - echo Login Attempt Succeeded - goto auth_end -) - -:err - -: Throw a warning if it fails -powershell -c "Write-Warning 'Login Attempt Failed'" - -:auth_end - -set RETRY_TIMES= -set SLEEP_TIME= -set STATUS_CODE= - -exit /b 0 diff --git a/packaging/windows/internal/build_cmake.bat b/packaging/windows/internal/build_cmake.bat new file mode 100644 index 00000000000..a29160538d2 --- /dev/null +++ b/packaging/windows/internal/build_cmake.bat @@ -0,0 +1,3 @@ +@echo on +msbuild "-p:Configuration=Release" "-p:BuildInParallel=true" "-p:MultiProcessorCompilation=true" "-p:CL_MPCount=%1" torchvision.vcxproj -maxcpucount:%1 +msbuild "-p:Configuration=Release" "-p:BuildInParallel=true" "-p:MultiProcessorCompilation=true" "-p:CL_MPCount=%1" INSTALL.vcxproj -maxcpucount:%1 diff --git a/packaging/windows/internal/build_cpp_example.bat b/packaging/windows/internal/build_cpp_example.bat new file mode 100644 index 00000000000..129c574e391 --- /dev/null +++ b/packaging/windows/internal/build_cpp_example.bat @@ -0,0 +1,3 @@ +@echo on +set CL=/I"C:\Program Files (x86)\torchvision\include" +msbuild "-p:Configuration=Release" "-p:BuildInParallel=true" "-p:MultiProcessorCompilation=true" "-p:CL_MPCount=%1" run_model.vcxproj -maxcpucount:%1 diff --git a/packaging/windows/internal/check_deps.bat b/packaging/windows/internal/check_deps.bat deleted file mode 100644 index a159d4436d6..00000000000 --- a/packaging/windows/internal/check_deps.bat +++ /dev/null @@ -1,67 +0,0 @@ -@echo off - -REM Check for necessary components - -IF NOT "%PROCESSOR_ARCHITECTURE%"=="AMD64" ( - echo You should use 64 bits Windows to build and run PyTorch - exit /b 1 -) - -IF "%BUILD_VISION%" == "" ( - where /q cmake.exe - - IF ERRORLEVEL 1 ( - echo CMake is required to compile PyTorch on Windows - exit /b 1 - ) -) - -IF NOT EXIST "%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" ( - echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch on Windows - exit /b 1 -) - -for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do ( - if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( - set "VS15INSTALLDIR=%%i" - set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat" - goto vswhere - ) -) - -:vswhere -IF "%VS15VCVARSALL%"=="" ( - echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch on Windows - exit /b 1 -) - -set MSSdk=1 -set DISTUTILS_USE_SDK=1 - -where /q python.exe - -IF ERRORLEVEL 1 ( - echo Python x64 3.5 or up is required to compile PyTorch on Windows - exit /b 1 -) - -for /F "usebackq delims=" %%i in (`python -c "import sys; print('{0[0]}{0[1]}'.format(sys.version_info))"`) do ( - set /a PYVER=%%i -) - -if %PYVER% LSS 35 ( - echo Warning: PyTorch for Python 2 under Windows is experimental. - echo Python x64 3.5 or up is recommended to compile PyTorch on Windows - echo Maybe you can create a virual environment if you have conda installed: - echo ^> conda create -n test python=3.6 pyyaml mkl numpy - echo ^> activate test -) - -for /F "usebackq delims=" %%i in (`python -c "import struct;print( 8 * struct.calcsize('P'))"`) do ( - set /a PYSIZE=%%i -) - -if %PYSIZE% NEQ 64 ( - echo Python x64 3.5 or up is required to compile PyTorch on Windows - exit /b 1 -) diff --git a/packaging/windows/internal/check_opts.bat b/packaging/windows/internal/check_opts.bat deleted file mode 100644 index 003ad921328..00000000000 --- a/packaging/windows/internal/check_opts.bat +++ /dev/null @@ -1,33 +0,0 @@ -@echo off - -REM Check for optional components - -where /q ninja.exe - -IF NOT ERRORLEVEL 1 ( - echo Ninja found, using it to speed up builds - set CMAKE_GENERATOR=Ninja -) - -where /q clcache.exe - -IF NOT ERRORLEVEL 1 ( - echo clcache found, using it to speed up builds - set CC=clcache - set CXX=clcache -) - -where /q sccache.exe - -IF NOT ERRORLEVEL 1 ( - echo sccache found, using it to speed up builds - set CC=sccache cl - set CXX=sccache cl -) - -IF exist "%MKLProductDir%\mkl\lib\intel64_win" ( - echo MKL found, adding it to build - set "LIB=%MKLProductDir%\mkl\lib\intel64_win;%MKLProductDir%\compiler\lib\intel64_win;%LIB%"; -) - -exit /b 0 diff --git a/packaging/windows/internal/clean.bat b/packaging/windows/internal/clean.bat deleted file mode 100644 index 7489640f49a..00000000000 --- a/packaging/windows/internal/clean.bat +++ /dev/null @@ -1,5 +0,0 @@ -@echo off - -cd %MODULE_NAME% -python setup.py clean -cd .. diff --git a/packaging/windows/internal/clone.bat b/packaging/windows/internal/clone.bat deleted file mode 100644 index 4ba181fa804..00000000000 --- a/packaging/windows/internal/clone.bat +++ /dev/null @@ -1,56 +0,0 @@ -@echo off - -:: The conda and wheels jobs are seperated on Windows, so we don't need to clone again. -IF "%BUILD_VISION%" == "" ( - if exist "%NIGHTLIES_PYTORCH_ROOT%" ( - xcopy /E /Y /Q "%NIGHTLIES_PYTORCH_ROOT%" pytorch\ - cd pytorch - goto submodule - ) -) - -git clone https://github.com/%PYTORCH_REPO%/%MODULE_NAME% - -cd %MODULE_NAME% - -IF NOT "%BUILD_VISION%" == "" goto latest_end - -IF "%PYTORCH_BRANCH%" == "latest" ( goto latest_start ) else ( goto latest_end ) - -:latest_start - -if "%NIGHTLIES_DATE%" == "" ( goto date_start ) else ( goto date_end ) - -:date_start - -set "DATE_CMD=Get-Date ([System.TimeZoneInfo]::ConvertTimeFromUtc((Get-Date).ToUniversalTime(), [System.TimeZoneInfo]::FindSystemTimeZoneById('Pacific Standard Time'))) -f 'yyyy_MM_dd'" -set "DATE_COMPACT_CMD=Get-Date ([System.TimeZoneInfo]::ConvertTimeFromUtc((Get-Date).ToUniversalTime(), [System.TimeZoneInfo]::FindSystemTimeZoneById('Pacific Standard Time'))) -f 'yyyyMMdd'" - -FOR /F "delims=" %%i IN ('powershell -c "%DATE_CMD%"') DO set NIGHTLIES_DATE=%%i -FOR /F "delims=" %%i IN ('powershell -c "%DATE_COMPACT_CMD%"') DO set NIGHTLIES_DATE_COMPACT=%%i - -:date_end - -if "%NIGHTLIES_DATE_COMPACT%" == "" set NIGHTLIES_DATE_COMPACT=%NIGHTLIES_DATE:~0,4%%NIGHTLIES_DATE:~5,2%%NIGHTLIES_DATE:~8,2% - -:: Switch to the latest commit by 11:59 yesterday -echo PYTORCH_BRANCH is set to latest so I will find the last commit -echo before 0:00 midnight on %NIGHTLIES_DATE% -set git_date=%NIGHTLIES_DATE:_=-% -FOR /F "delims=" %%i IN ('git log --before %git_date% -n 1 "--pretty=%%H"') DO set last_commit=%%i -echo Setting PYTORCH_BRANCH to %last_commit% since that was the last -echo commit before %NIGHTLIES_DATE% -set PYTORCH_BRANCH=%last_commit% - -:latest_end - -IF "%PYTORCH_BRANCH%" == "" ( - set PYTORCH_BRANCH=v%TORCHVISION_BUILD_VERSION% -) -git checkout %PYTORCH_BRANCH% -IF ERRORLEVEL 1 git checkout tags/%PYTORCH_BRANCH% - -:submodule - -git submodule update --init --recursive -IF ERRORLEVEL 1 exit /b 1 diff --git a/packaging/windows/internal/copy.bat b/packaging/windows/internal/copy.bat deleted file mode 100644 index b4aa397c6c1..00000000000 --- a/packaging/windows/internal/copy.bat +++ /dev/null @@ -1,13 +0,0 @@ -copy "%CUDA_PATH%\bin\cusparse64_%CUDA_VERSION%.dll*" pytorch\torch\lib -copy "%CUDA_PATH%\bin\cublas64_%CUDA_VERSION%.dll*" pytorch\torch\lib -copy "%CUDA_PATH%\bin\cudart64_%CUDA_VERSION%.dll*" pytorch\torch\lib -copy "%CUDA_PATH%\bin\curand64_%CUDA_VERSION%.dll*" pytorch\torch\lib -copy "%CUDA_PATH%\bin\cufft64_%CUDA_VERSION%.dll*" pytorch\torch\lib -copy "%CUDA_PATH%\bin\cufftw64_%CUDA_VERSION%.dll*" pytorch\torch\lib - -copy "%CUDA_PATH%\bin\cudnn64_%CUDNN_VERSION%.dll*" pytorch\torch\lib -copy "%CUDA_PATH%\bin\nvrtc64_%CUDA_VERSION%*.dll*" pytorch\torch\lib -copy "%CUDA_PATH%\bin\nvrtc-builtins64_%CUDA_VERSION%.dll*" pytorch\torch\lib - -copy "C:\Program Files\NVIDIA Corporation\NvToolsExt\bin\x64\nvToolsExt64_1.dll*" pytorch\torch\lib -copy "%CONDA_LIB_PATH%\libiomp*5md.dll" pytorch\torch\lib diff --git a/packaging/windows/internal/copy_cpu.bat b/packaging/windows/internal/copy_cpu.bat deleted file mode 100644 index f5b9d11515f..00000000000 --- a/packaging/windows/internal/copy_cpu.bat +++ /dev/null @@ -1 +0,0 @@ -copy "%CONDA_LIB_PATH%\libiomp*5md.dll" pytorch\torch\lib diff --git a/packaging/windows/internal/cuda_install.bat b/packaging/windows/internal/cuda_install.bat deleted file mode 100644 index cdd5a9ac206..00000000000 --- a/packaging/windows/internal/cuda_install.bat +++ /dev/null @@ -1,117 +0,0 @@ -@echo on - -if "%CUDA_VERSION%" == "cpu" ( - echo Skipping for CPU builds - exit /b 0 -) - -set SRC_DIR=%~dp0\.. - -if not exist "%SRC_DIR%\temp_build" mkdir "%SRC_DIR%\temp_build" - -set /a CUDA_VER=%CUDA_VERSION% -set CUDA_VER_MAJOR=%CUDA_VERSION:~0,-1% -set CUDA_VER_MINOR=%CUDA_VERSION:~-1,1% -set CUDA_VERSION_STR=%CUDA_VER_MAJOR%.%CUDA_VER_MINOR% - -if %CUDA_VER% EQU 92 goto cuda92 -if %CUDA_VER% EQU 100 goto cuda100 -if %CUDA_VER% EQU 101 goto cuda101 - -echo CUDA %CUDA_VERSION_STR% is not supported -exit /b 1 - -:cuda92 -if not exist "%SRC_DIR%\temp_build\cuda_9.2.148_win10.exe" ( - curl -k -L https://ossci-windows.s3.amazonaws.com/win2016/cuda_9.2.148_win10.exe --output "%SRC_DIR%\temp_build\cuda_9.2.148_win10.exe" - if errorlevel 1 exit /b 1 - set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_9.2.148_win10.exe" - set "ARGS=nvcc_9.2 cuobjdump_9.2 nvprune_9.2 cupti_9.2 cublas_9.2 cublas_dev_9.2 cudart_9.2 cufft_9.2 cufft_dev_9.2 curand_9.2 curand_dev_9.2 cusolver_9.2 cusolver_dev_9.2 cusparse_9.2 cusparse_dev_9.2 nvgraph_9.2 nvgraph_dev_9.2 npp_9.2 npp_dev_9.2 nvrtc_9.2 nvrtc_dev_9.2 nvml_dev_9.2" -) - -if not exist "%SRC_DIR%\temp_build\cudnn-9.2-windows10-x64-v7.2.1.38.zip" ( - curl -k -L https://ossci-windows.s3.amazonaws.com/win2016/cudnn-9.2-windows10-x64-v7.2.1.38.zip --output "%SRC_DIR%\temp_build\cudnn-9.2-windows10-x64-v7.2.1.38.zip" - if errorlevel 1 exit /b 1 - set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-9.2-windows10-x64-v7.2.1.38.zip" -) - -goto cuda_common - -:cuda100 - -if not exist "%SRC_DIR%\temp_build\cuda_10.0.130_411.31_win10.exe" ( - curl -k -L https://ossci-windows.s3.amazonaws.com/win2016/cuda_10.0.130_411.31_win10.exe --output "%SRC_DIR%\temp_build\cuda_10.0.130_411.31_win10.exe" - if errorlevel 1 exit /b 1 - set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_10.0.130_411.31_win10.exe" - set "ARGS=nvcc_10.0 cuobjdump_10.0 nvprune_10.0 cupti_10.0 cublas_10.0 cublas_dev_10.0 cudart_10.0 cufft_10.0 cufft_dev_10.0 curand_10.0 curand_dev_10.0 cusolver_10.0 cusolver_dev_10.0 cusparse_10.0 cusparse_dev_10.0 nvgraph_10.0 nvgraph_dev_10.0 npp_10.0 npp_dev_10.0 nvrtc_10.0 nvrtc_dev_10.0 nvml_dev_10.0" -) - -if not exist "%SRC_DIR%\temp_build\cudnn-10.0-windows10-x64-v7.4.1.5.zip" ( - curl -k -L https://ossci-windows.s3.amazonaws.com/win2016/cudnn-10.0-windows10-x64-v7.4.1.5.zip --output "%SRC_DIR%\temp_build\cudnn-10.0-windows10-x64-v7.4.1.5.zip" - if errorlevel 1 exit /b 1 - set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-10.0-windows10-x64-v7.4.1.5.zip" -) - -goto cuda_common - -:cuda101 - -if not exist "%SRC_DIR%\temp_build\cuda_10.1.243_426.00_win10.exe" ( - curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_10.1.243_426.00_win10.exe --output "%SRC_DIR%\temp_build\cuda_10.1.243_426.00_win10.exe" - if errorlevel 1 exit /b 1 - set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_10.1.243_426.00_win10.exe" - set "ARGS=nvcc_10.1 cuobjdump_10.1 nvprune_10.1 cupti_10.1 cublas_10.1 cublas_dev_10.1 cudart_10.1 cufft_10.1 cufft_dev_10.1 curand_10.1 curand_dev_10.1 cusolver_10.1 cusolver_dev_10.1 cusparse_10.1 cusparse_dev_10.1 nvgraph_10.1 nvgraph_dev_10.1 npp_10.1 npp_dev_10.1 nvrtc_10.1 nvrtc_dev_10.1 nvml_dev_10.1" -) - -if not exist "%SRC_DIR%\temp_build\cudnn-10.1-windows10-x64-v7.6.4.38.zip" ( - curl -k -L https://ossci-windows.s3.amazonaws.com/cudnn-10.1-windows10-x64-v7.6.4.38.zip --output "%SRC_DIR%\temp_build\cudnn-10.1-windows10-x64-v7.6.4.38.zip" - if errorlevel 1 exit /b 1 - set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-10.1-windows10-x64-v7.6.4.38.zip" -) - -goto cuda_common - -:cuda_common - -if not exist "%SRC_DIR%\temp_build\NvToolsExt.7z" ( - curl -k -L https://www.dropbox.com/s/9mcolalfdj4n979/NvToolsExt.7z?dl=1 --output "%SRC_DIR%\temp_build\NvToolsExt.7z" - if errorlevel 1 exit /b 1 -) - -echo Installing CUDA toolkit... -7z x %CUDA_SETUP_FILE% -o"%SRC_DIR%\temp_build\cuda" -pushd "%SRC_DIR%\temp_build\cuda" -start /wait setup.exe -s %ARGS% -popd - -echo Installing VS integration... -xcopy /Y "%SRC_DIR%\temp_build\cuda\CUDAVisualStudioIntegration\extras\visual_studio_integration\MSBuildExtensions\*.*" "C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\Common7\IDE\VC\VCTargets\BuildCustomizations" - -echo Installing NvToolsExt... -7z x %SRC_DIR%\temp_build\NvToolsExt.7z -o"%SRC_DIR%\temp_build\NvToolsExt" -mkdir "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\bin\x64" -mkdir "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\include" -mkdir "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\lib\x64" -xcopy /Y "%SRC_DIR%\temp_build\NvToolsExt\bin\x64\*.*" "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\bin\x64" -xcopy /Y "%SRC_DIR%\temp_build\NvToolsExt\include\*.*" "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\include" -xcopy /Y "%SRC_DIR%\temp_build\NvToolsExt\lib\x64\*.*" "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\lib\x64" - -echo Setting up environment... -set "PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\libnvvp;%PATH%" -set "CUDA_PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%" -set "CUDA_PATH_V%CUDA_VER_MAJOR%_%CUDA_VER_MINOR%=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%" -set "NVTOOLSEXT_PATH=%ProgramFiles%\NVIDIA Corporation\NvToolsExt\bin\x64" - -if not exist "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin\nvcc.exe" ( - echo CUDA %CUDA_VERSION_STR% installed failed. - exit /b 1 -) - -echo Installing cuDNN... -7z x %CUDNN_SETUP_FILE% -o"%SRC_DIR%\temp_build\cudnn" -xcopy /Y "%SRC_DIR%\temp_build\cudnn\cuda\bin\*.*" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin" -xcopy /Y "%SRC_DIR%\temp_build\cudnn\cuda\lib\x64\*.*" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\lib\x64" -xcopy /Y "%SRC_DIR%\temp_build\cudnn\cuda\include\*.*" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\include" - -echo Cleaning temp files -rd /s /q "%SRC_DIR%\temp_build" || ver > nul diff --git a/packaging/windows/internal/dep_install.bat b/packaging/windows/internal/dep_install.bat deleted file mode 100644 index db665a99f26..00000000000 --- a/packaging/windows/internal/dep_install.bat +++ /dev/null @@ -1,14 +0,0 @@ -@echo off - -REM curl -k https://www.7-zip.org/a/7z1805-x64.exe -O -REM if errorlevel 1 exit /b 1 - -REM start /wait 7z1805-x64.exe /S -REM if errorlevel 1 exit /b 1 - -REM set "PATH=%ProgramFiles%\7-Zip;%PATH%" - -choco feature disable --name showDownloadProgress -choco feature enable --name allowGlobalConfirmation - -choco install curl 7zip diff --git a/packaging/windows/internal/env_fix.bat b/packaging/windows/internal/env_fix.bat deleted file mode 100644 index dd0aaf5f2d5..00000000000 --- a/packaging/windows/internal/env_fix.bat +++ /dev/null @@ -1,31 +0,0 @@ -@echo off - -:: Caution: Please don't use this script locally -:: It may destroy your build environment. - -setlocal - -IF NOT EXIST "%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" ( - echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch on Windows - exit /b 1 -) - -for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do ( - if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( - set "VS15INSTALLDIR=%%i" - set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat" - goto vswhere - ) -) - -:vswhere - -IF "%VS15VCVARSALL%"=="" ( - echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch on Windows - exit /b 1 -) - -call "%VS15VCVARSALL%" x86_amd64 -for /f "usebackq tokens=*" %%i in (`where link.exe`) do move "%%i" "%%i.bak" - -endlocal diff --git a/packaging/windows/internal/nightly_defaults.bat b/packaging/windows/internal/nightly_defaults.bat deleted file mode 100644 index 1bba23209b1..00000000000 --- a/packaging/windows/internal/nightly_defaults.bat +++ /dev/null @@ -1,200 +0,0 @@ -@echo on - -if "%~1"=="" goto arg_error -if NOT "%~2"=="" goto arg_error -goto arg_end - -:arg_error - -echo Illegal number of parameters. Pass packge type `Conda` or `Wheels`. -exit /b 1 - -:arg_end - -echo "nightly_defaults.bat at %CD% starting at %DATE%" - -set SRC_DIR=%~dp0\.. - -:: NIGHTLIES_FOLDER -:: N.B. this is also defined in cron_start.sh -:: An arbitrary root folder to store all nightlies folders, each of which is a -:: parent level date folder with separate subdirs for logs, wheels, conda -:: packages, etc. This should be kept the same across all scripts called in a -:: cron job, so it only has a default value in the top-most script -:: build_cron.sh to avoid the default values from diverging. -if "%NIGHTLIES_FOLDER%" == "" set "NIGHTLIES_FOLDER=%SRC_DIR%" - -:: NIGHTLIES_DATE -:: N.B. this is also defined in cron_start.sh -:: The date in YYYY_mm_dd format that we are building for. If this is not -:: already set, then this will first try to find the date of the nightlies -:: folder that this builder repo exists in; e.g. if this script exists in -:: some_dir/2019_09_04/builder/cron/ then this will be set to 2019_09_04 (must -:: match YYYY_mm_dd). This is for convenience when debugging/uploading past -:: dates, so that you don't have to set NIGHTLIES_DATE yourself. If a date -:: folder cannot be found in that exact location, then this will default to -:: the current date. - - -if "%NIGHTLIES_DATE%" == "" ( goto date_start ) else ( goto date_end ) - -:date_start - -set "DATE_CMD=Get-Date ([System.TimeZoneInfo]::ConvertTimeFromUtc((Get-Date).ToUniversalTime(), [System.TimeZoneInfo]::FindSystemTimeZoneById('Pacific Standard Time'))) -f 'yyyy_MM_dd'" -set "DATE_COMPACT_CMD=Get-Date ([System.TimeZoneInfo]::ConvertTimeFromUtc((Get-Date).ToUniversalTime(), [System.TimeZoneInfo]::FindSystemTimeZoneById('Pacific Standard Time'))) -f 'yyyyMMdd'" - -FOR /F "delims=" %%i IN ('powershell -c "%DATE_CMD%"') DO set NIGHTLIES_DATE=%%i -FOR /F "delims=" %%i IN ('powershell -c "%DATE_COMPACT_CMD%"') DO set NIGHTLIES_DATE_COMPACT=%%i - -:date_end - -if "%NIGHTLIES_DATE_COMPACT%" == "" set NIGHTLIES_DATE_COMPACT=%NIGHTLIES_DATE:~0,4%%NIGHTLIES_DATE:~5,2%%NIGHTLIES_DATE:~8,2% - -:: Used in lots of places as the root dir to store all conda/wheel/manywheel -:: packages as well as logs for the day -set today=%NIGHTLIES_FOLDER%\%NIGHTLIES_DATE% -mkdir "%today%" || ver >nul - - -::############################################################################# -:: Add new configuration variables below this line. 'today' should always be -:: defined ASAP to avoid weird errors -::############################################################################# - - -:: List of people to email when things go wrong. This is passed directly to -:: `mail -t` -:: TODO: Not supported yet -if "%NIGHTLIES_EMAIL_LIST%" == "" set NIGHTLIES_EMAIL_LIST=peterghost86@gmail.com - -:: PYTORCH_CREDENTIALS_FILE -:: A bash file that exports credentials needed to upload to aws and anaconda. -:: Needed variables are PYTORCH_ANACONDA_USERNAME, PYTORCH_ANACONDA_PASSWORD, -:: AWS_ACCESS_KEY_ID, and AWS_SECRET_ACCESS_KEY. Or it can just export the AWS -:: keys and then prepend a logged-in conda installation to the path. -:: TODO: Not supported yet -if "%PYTORCH_CREDENTIALS_FILE%" == "" set PYTORCH_CREDENTIALS_FILE=/c/Users/administrator/nightlies/credentials.sh - -:: Location of the temporary miniconda that is downloaded to install conda-build -:: and aws to upload finished packages TODO this is messy to install this in -:: upload.sh and later use it in upload_logs.sh -if "%CONDA_UPLOADER_INSTALLATION%" == "" set "CONDA_UPLOADER_INSTALLATION=%today%\miniconda" - -:: N.B. BUILDER_REPO and BUILDER_BRANCH are both set in cron_start.sh, as that -:: is the script that actually clones the builder repo that /this/ script is -:: running from. -pushd "%SRC_DIR%\.." -set NIGHTLIES_BUILDER_ROOT=%CD% -popd - -:: The shared pytorch repo to be used by all builds -if "%NIGHTLIES_PYTORCH_ROOT%" == "" set "NIGHTLIES_PYTORCH_ROOT=%today%\vision" - -:: PYTORCH_REPO -:: The Github org/user whose fork of Pytorch to check out (git clone -:: https://github.com//pytorch.git). This will always be cloned -:: fresh to build with. Default is 'pytorch' -if "%PYTORCH_REPO%" == "" set PYTORCH_REPO=pytorch - -:: PYTORCH_BRANCH -:: The branch of Pytorch to checkout for building (git checkout ). -:: This can either be the name of the branch (e.g. git checkout -:: my_branch_name) or can be a git commit (git checkout 4b2674n...). Default -:: is 'latest', which is a special term that signals to pull the last commit -:: before 0:00 midnight on the NIGHTLIES_DATE -if "%PYTORCH_BRANCH%" == "" set PYTORCH_BRANCH=latest - -:: Clone the requested pytorch checkout -if exist "%NIGHTLIES_PYTORCH_ROOT%" ( goto clone_end ) else ( goto clone_start ) - -:clone_start - -git clone --recursive "https://github.com/%PYTORCH_REPO%/vision.git" "%NIGHTLIES_PYTORCH_ROOT%" -pushd "%NIGHTLIES_PYTORCH_ROOT%" - -if "%PYTORCH_BRANCH%" == "latest" ( goto latest_start ) else ( goto latest_end ) - -:latest_start - -:: Switch to the latest commit by 11:59 yesterday -echo PYTORCH_BRANCH is set to latest so I will find the last commit -echo before 0:00 midnight on %NIGHTLIES_DATE% -set git_date=%NIGHTLIES_DATE:_=-% -FOR /F "delims=" %%i IN ('git log --before %git_date% -n 1 "--pretty=%%H"') DO set last_commit=%%i -echo Setting PYTORCH_BRANCH to %last_commit% since that was the last -echo commit before %NIGHTLIES_DATE% -set PYTORCH_BRANCH=%last_commit% - -:latest_end - -git checkout "%PYTORCH_BRANCH%" -git submodule update -popd - -:clone_end - -if "%CUDA_VERSION%" == "cpu" ( - set _DESIRED_CUDA=cpu -) else ( - set _DESIRED_CUDA=cu%CUDA_VERSION% -) - -:: PYTORCH_BUILD_VERSION -:: The actual version string. Used in conda like -:: pytorch-nightly==1.0.0.dev20180908 -:: or in manylinux like -:: torch_nightly-1.0.0.dev20180908-cp27-cp27m-linux_x86_64.whl -if "%TORCHVISION_BUILD_VERSION%" == "" set TORCHVISION_BUILD_VERSION=0.5.0.dev%NIGHTLIES_DATE_COMPACT% - -if "%~1" == "Wheels" ( - if not "%CUDA_VERSION%" == "101" ( - set TORCHVISION_BUILD_VERSION=%TORCHVISION_BUILD_VERSION%+%_DESIRED_CUDA% - ) -) - -:: PYTORCH_BUILD_NUMBER -:: This is usually the number 1. If more than one build is uploaded for the -:: same version/date, then this can be incremented to 2,3 etc in which case -:: '.post2' will be appended to the version string of the package. This can -:: be set to '0' only if OVERRIDE_PACKAGE_VERSION is being used to bypass -:: all the version string logic in downstream scripts. Since we use the -:: override below, exporting this shouldn't actually matter. -if "%TORCHVISION_BUILD_NUMBER%" == "" set /a TORCHVISION_BUILD_NUMBER=1 -if %TORCHVISION_BUILD_NUMBER% GTR 1 set TORCHVISION_BUILD_VERSION=%TORCHVISION_BUILD_VERSION%%TORCHVISION_BUILD_NUMBER% - -:: The nightly builds use their own versioning logic, so we override whatever -:: logic is in setup.py or other scripts -:: TODO: Not supported yet -set OVERRIDE_PACKAGE_VERSION=%TORCHVISION_BUILD_VERSION% -set BUILD_VERSION=%TORCHVISION_BUILD_VERSION% - -:: Build folder for conda builds to use -if "%TORCH_CONDA_BUILD_FOLDER%" == "" set TORCH_CONDA_BUILD_FOLDER=torchvision - -:: TORCH_PACKAGE_NAME -:: The name of the package to upload. This should probably be pytorch or -:: pytorch-nightly. N.B. that pip will change all '-' to '_' but conda will -:: not. This is dealt with in downstream scripts. -:: TODO: Not supported yet -if "%TORCH_PACKAGE_NAME%" == "" set TORCH_PACKAGE_NAME=torchvision - -:: PIP_UPLOAD_FOLDER should end in a slash. This is to handle it being empty -:: (when uploading to e.g. whl/cpu/) and also to handle nightlies (when -:: uploading to e.g. /whl/nightly/cpu) -:: TODO: Not supported yet -if "%PIP_UPLOAD_FOLDER%" == "" set "PIP_UPLOAD_FOLDER=nightly\" - -:: The location of the binary_sizes dir in s3 is hardcoded into -:: upload_binary_sizes.sh - -:: DAYS_TO_KEEP -:: How many days to keep around for clean.sh. Build folders older than this -:: will be purged at the end of cron jobs. '1' means to keep only the current -:: day. Values less than 1 are not allowed. The default is 5. -:: TODO: Not supported yet -if "%DAYS_TO_KEEP%" == "" set /a DAYS_TO_KEEP=5 -if %DAYS_TO_KEEP% LSS 1 ( - echo DAYS_TO_KEEP cannot be less than 1. - echo A value of 1 means to only keep the build for today - exit /b 1 -) diff --git a/packaging/windows/internal/publish.bat b/packaging/windows/internal/publish.bat deleted file mode 100644 index 7f118bbb6e3..00000000000 --- a/packaging/windows/internal/publish.bat +++ /dev/null @@ -1,89 +0,0 @@ -@echo off - -set SRC_DIR=%~dp0 -pushd %SRC_DIR% - -if NOT "%CUDA_VERSION%" == "cpu" ( - set PACKAGE_SUFFIX=_cuda%CUDA_VERSION% -) else ( - set PACKAGE_SUFFIX= -) - -if "%PACKAGEFULLNAME%" == "Conda" ( - set PACKAGE=conda -) else ( - set PACKAGE=wheels -) - -if not defined PACKAGE_SUFFIX ( - set PUBLISH_BRANCH=vision_%PACKAGE%_%DESIRED_PYTHON% -) else ( - set PUBLISH_BRANCH=vision_%PACKAGE%_%DESIRED_PYTHON%%PACKAGE_SUFFIX% -) - -git clone %ARTIFACT_REPO_URL% -b %PUBLISH_BRANCH% --single-branch >nul 2>&1 - -IF ERRORLEVEL 1 ( - echo Branch %PUBLISH_BRANCH% not exist, falling back to master - set NO_BRANCH=1 - git clone %ARTIFACT_REPO_URL% -b master --single-branch >nul 2>&1 -) - -IF ERRORLEVEL 1 ( - echo Clone failed - goto err -) - -cd pytorch_builder -attrib -s -h -r . /s /d - -:: Empty repo -rd /s /q . || ver >nul - -IF NOT EXIST %PACKAGE% mkdir %PACKAGE% - -xcopy /S /E /Y ..\..\output\*.* %PACKAGE%\ - -git config --global user.name "Azure DevOps" -git config --global user.email peterghost86@gmail.com -git init -git checkout --orphan %PUBLISH_BRANCH% -git remote add origin %ARTIFACT_REPO_URL% -git add . -git commit -m "Update artifacts" - -:push - -if "%RETRY_TIMES%" == "" ( - set /a RETRY_TIMES=10 - set /a SLEEP_TIME=2 -) else ( - set /a RETRY_TIMES=%RETRY_TIMES%-1 - set /a SLEEP_TIME=%SLEEP_TIME%*2 -) - -git push origin %PUBLISH_BRANCH% -f > nul 2>&1 - -IF ERRORLEVEL 1 ( - echo Git push retry times remaining: %RETRY_TIMES% - echo Sleep time: %SLEEP_TIME% seconds - IF %RETRY_TIMES% EQU 0 ( - echo Push failed - goto err - ) - waitfor SomethingThatIsNeverHappening /t %SLEEP_TIME% 2>nul || ver >nul - goto push -) ELSE ( - set RETRY_TIMES= - set SLEEP_TIME= -) - -popd - -exit /b 0 - -:err - -popd - -exit /b 1 diff --git a/packaging/windows/internal/setup.bat b/packaging/windows/internal/setup.bat deleted file mode 100644 index d18dfb35023..00000000000 --- a/packaging/windows/internal/setup.bat +++ /dev/null @@ -1,44 +0,0 @@ -@echo off - -echo The flags after configuring: -echo NO_CUDA=%NO_CUDA% -echo CMAKE_GENERATOR=%CMAKE_GENERATOR% -if "%NO_CUDA%"=="" echo CUDA_PATH=%CUDA_PATH% -if NOT "%CC%"=="" echo CC=%CC% -if NOT "%CXX%"=="" echo CXX=%CXX% -if NOT "%DISTUTILS_USE_SDK%"=="" echo DISTUTILS_USE_SDK=%DISTUTILS_USE_SDK% - -set SRC_DIR=%~dp0\.. - -IF "%VSDEVCMD_ARGS%" == "" ( - call "%VS15VCVARSALL%" x64 -) ELSE ( - call "%VS15VCVARSALL%" x64 %VSDEVCMD_ARGS% -) - -pushd %SRC_DIR% - -IF NOT exist "setup.py" ( - cd %MODULE_NAME% -) - -if "%CXX%"=="sccache cl" ( - sccache --stop-server - sccache --start-server - sccache --zero-stats -) - -:pytorch -:: This stores in e.g. D:/_work/1/s/windows/output/cpu -pip wheel -e . --no-deps --wheel-dir ../output/%CUDA_PREFIX% - -:build_end -IF ERRORLEVEL 1 exit /b 1 -IF NOT ERRORLEVEL 0 exit /b 1 - -if "%CXX%"=="sccache cl" ( - taskkill /im sccache.exe /f /t || ver > nul - taskkill /im nvcc.exe /f /t || ver > nul -) - -cd .. diff --git a/packaging/windows/internal/test.bat b/packaging/windows/internal/test.bat deleted file mode 100644 index a87fc1a2858..00000000000 --- a/packaging/windows/internal/test.bat +++ /dev/null @@ -1,79 +0,0 @@ -@echo off - -set SRC_DIR=%~dp0\.. -pushd %SRC_DIR% - -set PYTHON_VERSION=%PYTHON_PREFIX:py=cp% - -if "%BUILD_VISION%" == "" ( - pip install future pytest coverage hypothesis protobuf -) ELSE ( - pip install future pytest "pillow>=4.1.1" mock -) - -for /F "delims=" %%i in ('where /R %SRC_DIR%\output\%CUDA_PREFIX% *%MODULE_NAME%*%PYTHON_VERSION%*.whl') do pip install "%%i" - -if ERRORLEVEL 1 exit /b 1 - -if NOT "%BUILD_VISION%" == "" ( - echo Smoke testing imports - python -c "import torchvision" - if ERRORLEVEL 1 exit /b 1 - goto smoke_test_end -) - -echo Smoke testing imports -python -c "import torch" -if ERRORLEVEL 1 exit /b 1 - -python -c "from caffe2.python import core" -if ERRORLEVEL 1 exit /b 1 - -echo Checking that MKL is available -python -c "import torch; exit(0 if torch.backends.mkl.is_available() else 1)" -if ERRORLEVEL 1 exit /b 1 - -setlocal EnableDelayedExpansion -set NVIDIA_GPU_EXISTS=0 -for /F "delims=" %%i in ('wmic path win32_VideoController get name') do ( - set GPUS=%%i - if not "x!GPUS:NVIDIA=!" == "x!GPUS!" ( - SET NVIDIA_GPU_EXISTS=1 - goto gpu_check_end - ) -) -:gpu_check_end -endlocal & set NVIDIA_GPU_EXISTS=%NVIDIA_GPU_EXISTS% - -if NOT "%CUDA_PREFIX%" == "cpu" if "%NVIDIA_GPU_EXISTS%" == "1" ( - echo Checking that CUDA archs are setup correctly - python -c "import torch; torch.randn([3,5]).cuda()" - if ERRORLEVEL 1 exit /b 1 - - echo Checking that magma is available - python -c "import torch; torch.rand(1).cuda(); exit(0 if torch.cuda.has_magma else 1)" - if ERRORLEVEL 1 exit /b 1 - - echo Checking that CuDNN is available - python -c "import torch; exit(0 if torch.backends.cudnn.is_available() else 1)" - if ERRORLEVEL 1 exit /b 1 -) -:smoke_test_end - -echo Not running unit tests. Hopefully these problems are caught by CI -goto test_end - -if "%BUILD_VISION%" == "" ( - cd pytorch\test - python run_test.py -v -) else ( - cd vision - pytest . -) - -if ERRORLEVEL 1 exit /b 1 - -:test_end - -popd -exit /b 0 diff --git a/packaging/windows/internal/upload.bat b/packaging/windows/internal/upload.bat deleted file mode 100644 index a23391a2935..00000000000 --- a/packaging/windows/internal/upload.bat +++ /dev/null @@ -1,96 +0,0 @@ -@echo off - -IF "%CONDA_UPLOADER_INSTALLATION%" == "" goto precheck_fail -IF "%PYTORCH_FINAL_PACKAGE_DIR%" == "" goto precheck_fail -IF "%today%" == "" goto precheck_fail -IF "%PYTORCH_ANACONDA_USERNAME%" == "" goto precheck_fail -IF "%PYTORCH_ANACONDA_PASSWORD%" == "" goto precheck_fail - -goto precheck_pass - -:precheck_fail - -echo Please run nightly_defaults.bat first. -echo And remember to set `PYTORCH_FINAL_PACKAGE_DIR` -echo Finally, don't forget to set anaconda tokens -exit /b 1 - -:precheck_pass - -pushd %today% - -:: Install anaconda client -set "CONDA_HOME=%CONDA_UPLOADER_INSTALLATION%" -set "tmp_conda=%CONDA_HOME%" -set "miniconda_exe=%CD%\miniconda.exe" -rmdir /s /q "%CONDA_HOME%" -del miniconda.exe -curl -k https://repo.continuum.io/miniconda/Miniconda3-latest-Windows-x86_64.exe -o "%miniconda_exe%" -popd - -IF ERRORLEVEL 1 ( - echo Conda download failed - exit /b 1 -) - -call %~dp0\..\..\conda\install_conda.bat - -IF ERRORLEVEL 1 ( - echo Conda installation failed - exit /b 1 -) - -set "ORIG_PATH=%PATH%" -set "PATH=%CONDA_HOME%;%CONDA_HOME%\scripts;%CONDA_HOME%\Library\bin;%PATH%" - -REM conda install -y anaconda-client -pip install git+https://github.com/peterjc123/anaconda-client.git@log_more_meaningfull_errors -IF ERRORLEVEL 1 ( - echo Anaconda client installation failed - exit /b 1 -) - -set PYTORCH_FINAL_PACKAGE= -:: Upload all the packages under `PYTORCH_FINAL_PACKAGE_DIR` -FOR /F "delims=" %%i IN ('where /R %PYTORCH_FINAL_PACKAGE_DIR% *vision*.tar.bz2') DO ( - set "PYTORCH_FINAL_PACKAGE=%%i" -) - -IF "%PYTORCH_FINAL_PACKAGE%" == "" ( - echo No package to upload - exit /b 0 -) - -:upload - -if "%RETRY_TIMES%" == "" ( - set /a RETRY_TIMES=10 - set /a SLEEP_TIME=2 -) else ( - set /a RETRY_TIMES=%RETRY_TIMES%-1 - set /a SLEEP_TIME=%SLEEP_TIME%*2 -) - -REM bash -c "yes | anaconda login --username "%PYTORCH_ANACONDA_USERNAME%" --password "%PYTORCH_ANACONDA_PASSWORD%"" -anaconda login --username "%PYTORCH_ANACONDA_USERNAME%" --password "%PYTORCH_ANACONDA_PASSWORD%" -IF ERRORLEVEL 1 ( - echo Anaconda client login failed - exit /b 1 -) - -echo Uploading %PYTORCH_FINAL_PACKAGE% to Anaconda Cloud -anaconda upload "%PYTORCH_FINAL_PACKAGE%" -u pytorch-nightly --label main --force --no-progress - -IF ERRORLEVEL 1 ( - echo Anaconda upload retry times remaining: %RETRY_TIMES% - echo Sleep time: %SLEEP_TIME% seconds - IF %RETRY_TIMES% EQU 0 ( - echo Upload failed - exit /b 1 - ) - waitfor SomethingThatIsNeverHappening /t %SLEEP_TIME% 2>nul || ver >nul - goto upload -) ELSE ( - set RETRY_TIMES= - set SLEEP_TIME= -) diff --git a/packaging/windows/internal/vc_env_helper.bat b/packaging/windows/internal/vc_env_helper.bat new file mode 100644 index 00000000000..699876beb8a --- /dev/null +++ b/packaging/windows/internal/vc_env_helper.bat @@ -0,0 +1,49 @@ +@echo on + +set VC_VERSION_LOWER=17 +set VC_VERSION_UPPER=18 +if "%VC_YEAR%" == "2019" ( + set VC_VERSION_LOWER=16 + set VC_VERSION_UPPER=17 +) +if "%VC_YEAR%" == "2017" ( + set VC_VERSION_LOWER=15 + set VC_VERSION_UPPER=16 +) + +for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [%VC_VERSION_LOWER%^,%VC_VERSION_UPPER%^) -property installationPath`) do ( + if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( + set "VS15INSTALLDIR=%%i" + set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat" + goto vswhere + ) +) + +:vswhere +if "%VSDEVCMD_ARGS%" == "" ( + call "%VS15VCVARSALL%" x64 || exit /b 1 +) else ( + call "%VS15VCVARSALL%" x64 %VSDEVCMD_ARGS% || exit /b 1 +) + +@echo on + +if "%CU_VERSION%" == "xpu" call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" + +set DISTUTILS_USE_SDK=1 + +set args=%1 +shift +:start +if [%1] == [] goto done +set args=%args% %1 +shift +goto start + +:done +if "%args%" == "" ( + echo Usage: vc_env_helper.bat [command] [args] + echo e.g. vc_env_helper.bat cl /c test.cpp +) + +%args% || exit /b 1 diff --git a/packaging/windows/internal/vs_install.bat b/packaging/windows/internal/vs_install.bat deleted file mode 100644 index e6589092372..00000000000 --- a/packaging/windows/internal/vs_install.bat +++ /dev/null @@ -1,28 +0,0 @@ -@echo off - -set VS_DOWNLOAD_LINK=https://aka.ms/vs/15/release/vs_buildtools.exe -REM IF "%VS_LATEST%" == "1" ( -REM set VS_INSTALL_ARGS= --nocache --norestart --quiet --wait --add Microsoft.VisualStudio.Workload.VCTools -REM set VSDEVCMD_ARGS= -REM ) ELSE ( -set VS_INSTALL_ARGS=--nocache --quiet --wait --add Microsoft.VisualStudio.Workload.VCTools ^ - --add Microsoft.VisualStudio.Component.VC.Tools.14.11 ^ - --add Microsoft.Component.MSBuild ^ - --add Microsoft.VisualStudio.Component.Roslyn.Compiler ^ - --add Microsoft.VisualStudio.Component.TextTemplating ^ - --add Microsoft.VisualStudio.Component.VC.CoreIde ^ - --add Microsoft.VisualStudio.Component.VC.Redist.14.Latest ^ - --add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Core ^ - --add Microsoft.VisualStudio.Component.VC.Tools.x86.x64 ^ - --add Microsoft.VisualStudio.Component.VC.Tools.14.11 ^ - --add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Win81 -set VSDEVCMD_ARGS=-vcvars_ver=14.11 -REM ) - -curl -k -L %VS_DOWNLOAD_LINK% --output vs_installer.exe -if errorlevel 1 exit /b 1 - -start /wait .\vs_installer.exe %VS_INSTALL_ARGS% -if not errorlevel 0 exit /b 1 -if errorlevel 1 if not errorlevel 3010 exit /b 1 -if errorlevel 3011 exit /b 1 diff --git a/packaging/windows/old/cuda100.bat b/packaging/windows/old/cuda100.bat deleted file mode 100644 index ac9be3c6907..00000000000 --- a/packaging/windows/old/cuda100.bat +++ /dev/null @@ -1,59 +0,0 @@ -@echo off - -IF NOT "%BUILD_VISION%" == "" ( - set MODULE_NAME=vision -) ELSE ( - set MODULE_NAME=pytorch -) - -IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" ( - call internal\clone.bat - cd .. - IF ERRORLEVEL 1 goto eof -) ELSE ( - call internal\clean.bat -) - -call internal\check_deps.bat -IF ERRORLEVEL 1 goto eof - -REM Check for optional components - -set NO_CUDA= -set CMAKE_GENERATOR=Visual Studio 15 2017 Win64 - -IF "%NVTOOLSEXT_PATH%"=="" ( - echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing - exit /b 1 - goto optcheck -) - -IF "%CUDA_PATH_V10_0%"=="" ( - echo CUDA 10.0 not found, failing - exit /b 1 -) ELSE ( - IF "%BUILD_VISION%" == "" ( - set TORCH_CUDA_ARCH_LIST=3.5;5.0+PTX;6.0;6.1;7.0;7.5 - set TORCH_NVCC_FLAGS=-Xfatbin -compress-all - ) ELSE ( - set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50 - ) - - set "CUDA_PATH=%CUDA_PATH_V10_0%" - set "PATH=%CUDA_PATH_V10_0%\bin;%PATH%" -) - -:optcheck - -IF "%BUILD_VISION%" == "" ( - call internal\check_opts.bat - IF ERRORLEVEL 1 goto eof - - call internal\copy.bat - IF ERRORLEVEL 1 goto eof -) - -call internal\setup.bat -IF ERRORLEVEL 1 goto eof - -:eof diff --git a/packaging/windows/old/cuda90.bat b/packaging/windows/old/cuda90.bat deleted file mode 100644 index fe0294812e2..00000000000 --- a/packaging/windows/old/cuda90.bat +++ /dev/null @@ -1,59 +0,0 @@ -@echo off - -IF NOT "%BUILD_VISION%" == "" ( - set MODULE_NAME=vision -) ELSE ( - set MODULE_NAME=pytorch -) - -IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" ( - call internal\clone.bat - cd .. - IF ERRORLEVEL 1 goto eof -) ELSE ( - call internal\clean.bat -) - -call internal\check_deps.bat -IF ERRORLEVEL 1 goto eof - -REM Check for optional components - -set NO_CUDA= -set CMAKE_GENERATOR=Visual Studio 15 2017 Win64 - -IF "%NVTOOLSEXT_PATH%"=="" ( - echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing - exit /b 1 - goto optcheck -) - -IF "%CUDA_PATH_V9_0%"=="" ( - echo CUDA 9 not found, failing - exit /b 1 -) ELSE ( - IF "%BUILD_VISION%" == "" ( - set TORCH_CUDA_ARCH_LIST=3.5;5.0+PTX;6.0;7.0 - set TORCH_NVCC_FLAGS=-Xfatbin -compress-all - ) ELSE ( - set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50 - ) - - set "CUDA_PATH=%CUDA_PATH_V9_0%" - set "PATH=%CUDA_PATH_V9_0%\bin;%PATH%" -) - -:optcheck - -IF "%BUILD_VISION%" == "" ( - call internal\check_opts.bat - IF ERRORLEVEL 1 goto eof - - call internal\copy.bat - IF ERRORLEVEL 1 goto eof -) - -call internal\setup.bat -IF ERRORLEVEL 1 goto eof - -:eof diff --git a/packaging/windows/templates/auth_task.yml b/packaging/windows/templates/auth_task.yml deleted file mode 100644 index ece66412ff4..00000000000 --- a/packaging/windows/templates/auth_task.yml +++ /dev/null @@ -1,17 +0,0 @@ -jobs: -- job: 'VSTS_Auth_Task' - timeoutInMinutes: 5 - cancelTimeoutInMinutes: 5 - variables: - - group: 'peterjc-vsts-token' - - pool: - vmImage: 'win1803' - - steps: - - checkout: self - clean: true - - - template: vsts_auth.yml - parameters: - auth: $(vsts_auth) diff --git a/packaging/windows/templates/build_conda.yml b/packaging/windows/templates/build_conda.yml deleted file mode 100644 index 2d88271ad33..00000000000 --- a/packaging/windows/templates/build_conda.yml +++ /dev/null @@ -1,15 +0,0 @@ -parameters: - msagent: false - -steps: -- bash: 'find . -name "*.sh" -exec dos2unix {} +' - displayName: Replace file endings - -- script: 'if not exist %PYTORCH_FINAL_PACKAGE_DIR% mkdir %PYTORCH_FINAL_PACKAGE_DIR%' - displayName: 'Create final package directory' - -- bash: './packaging/conda/build_vision.sh $CUDA_VERSION $TORCHVISION_BUILD_VERSION $TORCHVISION_BUILD_NUMBER' - displayName: Build - env: - ${{ if eq(parameters.msagent, 'true') }}: - MAX_JOBS: 2 diff --git a/packaging/windows/templates/build_task.yml b/packaging/windows/templates/build_task.yml deleted file mode 100644 index e595662d313..00000000000 --- a/packaging/windows/templates/build_task.yml +++ /dev/null @@ -1,140 +0,0 @@ -parameters: - package: '' - spec: '' - jobDesc: '' - packageDesc: '' - msagent: true - cpuEnabled: true - cudaEnabled: true - condaEnabled: true - wheelsEnabled: true - override: false - -jobs: -- job: 'Windows_${{ parameters.spec }}_${{ parameters.package }}_Build' - timeoutInMinutes: 60 - cancelTimeoutInMinutes: 5 - condition: > - or(and(eq('${{ parameters.package }}', 'Conda'), eq('${{ parameters.spec }}', 'CPU'), - eq('${{ parameters.condaEnabled }}', 'true'), eq('${{ parameters.cpuEnabled }}', 'true')), - and(eq('${{ parameters.package }}', 'Wheels'), eq('${{ parameters.spec }}', 'CPU'), - eq('${{ parameters.wheelsEnabled }}', 'true'), eq('${{ parameters.cpuEnabled }}', 'true')), - and(eq('${{ parameters.package }}', 'Conda'), eq('${{ parameters.spec }}', 'CUDA'), - eq('${{ parameters.condaEnabled }}', 'true'), eq('${{ parameters.cudaEnabled }}', 'true')), - and(eq('${{ parameters.package }}', 'Wheels'), eq('${{ parameters.spec }}', 'CUDA'), - eq('${{ parameters.wheelsEnabled }}', 'true'), eq('${{ parameters.cudaEnabled }}', 'true'))) - variables: - - ${{ if eq(parameters.override, 'true') }}: - - name: TORCHVISION_BUILD_NUMBER - value: 1 - - name: PYTORCH_REPO - value: 'pytorch' - - name: PYTORCH_BRANCH - value: 'v0.4.0' - - ${{ if eq(parameters.msagent, 'true') }}: - - name: USE_SCCACHE - value: 0 - - ${{ if eq(parameters.msagent, 'false') }}: - - name: USE_SCCACHE - value: 1 - - ${{ if eq(parameters.package, 'Conda') }}: - - group: peterjc_anaconda_token - - name: PYTORCH_FINAL_PACKAGE_DIR - value: '$(Build.Repository.LocalPath)\packaging\windows\output' - - strategy: - maxParallel: 10 - matrix: - ${{ if eq(parameters.spec, 'CPU') }}: - PY3.5: - DESIRED_PYTHON: 3.5 - CUDA_VERSION: cpu - PY3.6: - DESIRED_PYTHON: 3.6 - CUDA_VERSION: cpu - PY3.7: - DESIRED_PYTHON: 3.7 - CUDA_VERSION: cpu - ${{ if ne(parameters.spec, 'CPU') }}: - PY3.5_92: - DESIRED_PYTHON: 3.5 - CUDA_VERSION: 92 - PY3.6_92: - DESIRED_PYTHON: 3.6 - CUDA_VERSION: 92 - PY3.7_92: - DESIRED_PYTHON: 3.7 - CUDA_VERSION: 92 - PY3.5_101: - DESIRED_PYTHON: 3.5 - CUDA_VERSION: 101 - PY3.6_101: - DESIRED_PYTHON: 3.6 - CUDA_VERSION: 101 - PY3.7_101: - DESIRED_PYTHON: 3.7 - CUDA_VERSION: 101 - - pool: - ${{ if eq(parameters.msagent, 'true') }}: - vmImage: 'win1803' - ${{ if eq(parameters.msagent, 'false') }}: - name: 'release' - - steps: - - checkout: self - clean: true - - - template: setup_env_for_msagent.yml - parameters: - msagent: ${{ parameters.msagent }} - - # - ${{ if and(eq(parameters.override, 'true'), eq(parameters.package, 'Wheels')) }}: - # - template: override_pytorch_version.yml - - - template: setup_nightly_variables.yml - parameters: - package: ${{ parameters.package }} - - - ${{ if eq(parameters.package, 'Wheels') }}: - - template: build_wheels.yml - parameters: - msagent: ${{ parameters.msagent }} - - - ${{ if eq(parameters.package, 'Conda') }}: - - template: build_conda.yml - parameters: - msagent: ${{ parameters.msagent }} - - - ${{ if or(eq(parameters.package, 'Wheels'), eq(parameters.package, 'Conda')) }}: - - template: publish_test_results.yml - parameters: - msagent: ${{ parameters.msagent }} - - # If you want to upload binaries to S3 & Anaconda Cloud, please uncomment this section. - - ${{ if and(eq(parameters.package, 'Wheels'), eq(parameters.spec, 'CPU')) }}: - - template: upload_to_s3.yml - parameters: - cuVer: '$(CUDA_VERSION)' - cudaVer: '$(CUDA_VERSION)' - - - ${{ if and(eq(parameters.package, 'Wheels'), ne(parameters.spec, 'CPU')) }}: - - template: upload_to_s3.yml - parameters: - cuVer: 'cu$(CUDA_VERSION)' - cudaVer: 'cuda$(CUDA_VERSION)' - - - ${{ if eq(parameters.package, 'Conda') }}: - - template: upload_to_conda.yml - parameters: - user: $(peterjc_conda_username) - pass: $(peterjc_conda_password) - - # If you want to upload binaries to Azure Git, please uncomment this section. - # - ${{ if or(eq(parameters.package, 'Wheels'), eq(parameters.package, 'Conda')) }}: - # - template: publish_test_results.yml - # parameters: - # msagent: ${{ parameters.msagent }} - # - template: publish_packages.yml - # parameters: - # package: ${{ parameters.package }} diff --git a/packaging/windows/templates/build_wheels.yml b/packaging/windows/templates/build_wheels.yml deleted file mode 100644 index 05c5712e334..00000000000 --- a/packaging/windows/templates/build_wheels.yml +++ /dev/null @@ -1,9 +0,0 @@ -parameters: - msagent: false - -steps: -- script: 'call packaging/windows/build_vision.bat %CUDA_VERSION% %TORCHVISION_BUILD_VERSION% %TORCHVISION_BUILD_NUMBER%' - displayName: Build - env: - ${{ if eq(parameters.msagent, 'true') }}: - MAX_JOBS: 2 diff --git a/packaging/windows/templates/linux_build_task.yml b/packaging/windows/templates/linux_build_task.yml deleted file mode 100644 index 0b32892791a..00000000000 --- a/packaging/windows/templates/linux_build_task.yml +++ /dev/null @@ -1,38 +0,0 @@ -parameters: - msagent: true - enabled: false - -jobs: -- job: 'Linux_CPU_Conda_Build' - timeoutInMinutes: 0 - cancelTimeoutInMinutes: 5 - condition: ${{ eq(parameters.enabled, 'true') }} - variables: - CUDA_VERSION: cpu - TORCH_CONDA_BUILD_FOLDER: pytorch-nightly - PYTORCH_FINAL_PACKAGE_DIR: '$(Build.Repository.LocalPath)/output' - - strategy: - maxParallel: 10 - matrix: - PY3.5: - DESIRED_PYTHON: 3.5 - - pool: - vmImage: 'ubuntu-16.04' - - steps: - - checkout: self - clean: true - - - script: 'sudo apt-get install p7zip-full' - displayName: 'Install 7Zip' - - - task: CondaEnvironment@1 - displayName: 'Install conda-build' - inputs: - packageSpecs: 'conda-build' - - - template: build_conda.yml - parameters: - msagent: ${{ parameters.msagent }} diff --git a/packaging/windows/templates/override_pytorch_version.yml b/packaging/windows/templates/override_pytorch_version.yml deleted file mode 100644 index 8af93ae43a4..00000000000 --- a/packaging/windows/templates/override_pytorch_version.yml +++ /dev/null @@ -1,6 +0,0 @@ -steps: -- script: 'windows/internal/override_pytorch_version.bat' - displayName: 'Override PyTorch Build Version for Wheels' - -- script: 'echo $(PYTORCH_BUILD_VERSION)' - displayName: 'Show PyTorch Build Version' diff --git a/packaging/windows/templates/publish_packages.yml b/packaging/windows/templates/publish_packages.yml deleted file mode 100644 index 51ce8247bf7..00000000000 --- a/packaging/windows/templates/publish_packages.yml +++ /dev/null @@ -1,8 +0,0 @@ -parameters: - package: '' - -steps: -- script: 'packaging/windows/internal/publish.bat' - displayName: 'Upload packages to Azure DevOps Repo' - env: - PACKAGEFULLNAME: ${{ parameters.package }} diff --git a/packaging/windows/templates/publish_test_results.yml b/packaging/windows/templates/publish_test_results.yml deleted file mode 100644 index 1e0dc0215d3..00000000000 --- a/packaging/windows/templates/publish_test_results.yml +++ /dev/null @@ -1,6 +0,0 @@ -steps: -- task: PublishTestResults@2 # No test results to publish - inputs: - testResultsFiles: 'windows/pytorch/test/**/*.xml' - testRunTitle: 'Publish test results' - enabled: false diff --git a/packaging/windows/templates/setup_env_for_msagent.yml b/packaging/windows/templates/setup_env_for_msagent.yml deleted file mode 100644 index 377734fa3db..00000000000 --- a/packaging/windows/templates/setup_env_for_msagent.yml +++ /dev/null @@ -1,25 +0,0 @@ -parameters: - msagent: false - -steps: -- ${{ if eq(parameters.msagent, 'true') }}: - - task: BatchScript@1 - displayName: 'Install 7Zip & cURL' - inputs: - filename: 'packaging/windows/internal/dep_install.bat' - - modifyEnvironment: true - - - task: BatchScript@1 - displayName: 'Install Visual Studio 2017' - inputs: - filename: 'packaging/windows/internal/vs_install.bat' - - modifyEnvironment: true - - - task: BatchScript@1 - displayName: 'Install CUDA' - inputs: - filename: 'packaging/windows/internal/cuda_install.bat' - - modifyEnvironment: true diff --git a/packaging/windows/templates/setup_nightly_variables.yml b/packaging/windows/templates/setup_nightly_variables.yml deleted file mode 100644 index 94b2fe934ce..00000000000 --- a/packaging/windows/templates/setup_nightly_variables.yml +++ /dev/null @@ -1,11 +0,0 @@ -parameters: - package: '' - -steps: -- task: BatchScript@1 - displayName: 'Setup nightly variables' - inputs: - filename: 'packaging/windows/internal/nightly_defaults.bat' - arguments: ${{ parameters.package }} - - modifyEnvironment: true diff --git a/packaging/windows/templates/upload_to_conda.yml b/packaging/windows/templates/upload_to_conda.yml deleted file mode 100644 index dc172bcf878..00000000000 --- a/packaging/windows/templates/upload_to_conda.yml +++ /dev/null @@ -1,10 +0,0 @@ -parameters: - user: '' - pass: '' - -steps: -- script: 'call packaging/windows/internal/upload.bat' - displayName: 'Upload packages to Anaconda Cloud' - env: - PYTORCH_ANACONDA_USERNAME: ${{ parameters.user }} - PYTORCH_ANACONDA_PASSWORD: ${{ parameters.pass }} diff --git a/packaging/windows/templates/upload_to_s3.yml b/packaging/windows/templates/upload_to_s3.yml deleted file mode 100644 index a31bcb15ae1..00000000000 --- a/packaging/windows/templates/upload_to_s3.yml +++ /dev/null @@ -1,15 +0,0 @@ -parameters: - cuVer: '' - cudaVer: '' - -steps: -- task: AmazonWebServices.aws-vsts-tools.S3Upload.S3Upload@1 - displayName: 'Upload ${{ parameters.cuVer }} wheel to S3' - inputs: - awsCredentials: 'Pytorch S3 bucket' - bucketName: 'pytorch' - sourceFolder: 'packaging/windows/output/${{ parameters.cudaVer }}' - globExpressions: '*.whl' - targetFolder: 'whl/nightly/${{ parameters.cuVer }}/' - filesAcl: 'public-read' - flattenFolders: 'true' diff --git a/packaging/windows/templates/vsts_auth.yml b/packaging/windows/templates/vsts_auth.yml deleted file mode 100644 index fde767d7f12..00000000000 --- a/packaging/windows/templates/vsts_auth.yml +++ /dev/null @@ -1,8 +0,0 @@ -parameters: - auth: '' - -steps: -- script: 'call packaging/windows/internal/auth.bat' - displayName: 'Sign in to Azure Pipelines' - env: - VSTS_AUTH: ${{ parameters.auth }} diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000000..61e4a957fc5 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,18 @@ +[tool.usort] + +first_party_detection = false + +[tool.black] + +line-length = 120 +target-version = ["py38"] + +[tool.ufmt] + +excludes = [ + "gallery", +] + +[build-system] + +requires = ["setuptools", "torch", "wheel"] diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000000..8d52b55d5a6 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,13 @@ +[pytest] +addopts = + # show tests that (f)ailed, (E)rror, or (X)passed in the summary + -rfEX + # Make tracebacks shorter + --tb=short + # enable all warnings + -Wd + --ignore=test/test_datasets_download.py + --ignore-glob=test/test_prototype_*.py +testpaths = + test +xfail_strict = True diff --git a/references/classification/README.md b/references/classification/README.md index acc2b0b4ed0..bc481f421ed 100644 --- a/references/classification/README.md +++ b/references/classification/README.md @@ -4,58 +4,320 @@ This folder contains reference training scripts for image classification. They serve as a log of how to train specific models, as provide baseline training and evaluation scripts to quickly bootstrap research. -Except otherwise noted, all models have been trained on 8x V100 GPUs. +Except otherwise noted, all models have been trained on 8x V100 GPUs with +the following parameters: + +| Parameter | value | +| ------------------------ | ------ | +| `--batch_size` | `32` | +| `--epochs` | `90` | +| `--lr` | `0.1` | +| `--momentum` | `0.9` | +| `--wd`, `--weight-decay` | `1e-4` | +| `--lr-step-size` | `30` | +| `--lr-gamma` | `0.1` | + +### AlexNet and VGG + +Since `AlexNet` and the original `VGG` architectures do not include batch +normalization, the default initial learning rate `--lr 0.1` is too high. -### ResNext-50 32x4d ``` -python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ - --model resnext50_32x4d --epochs 100 +torchrun --nproc_per_node=8 train.py\ + --model $MODEL --lr 1e-2 ``` +Here `$MODEL` is one of `alexnet`, `vgg11`, `vgg13`, `vgg16` or `vgg19`. Note +that `vgg11_bn`, `vgg13_bn`, `vgg16_bn`, and `vgg19_bn` include batch +normalization and thus are trained with the default parameters. + +### GoogLeNet -### ResNext-101 32x8d +The weights of the GoogLeNet model are ported from the original paper rather than trained from scratch. + +### Inception V3 + +The weights of the Inception V3 model are ported from the original paper rather than trained from scratch. + +Since it expects tensors with a size of N x 3 x 299 x 299, to validate the model use the following command: -On 8 nodes, each with 8 GPUs (for a total of 64 GPUS) ``` -python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ - --model resnext101_32x8d --epochs 100 +torchrun --nproc_per_node=8 train.py --model inception_v3\ + --test-only --weights Inception_V3_Weights.IMAGENET1K_V1 ``` +### ResNet +``` +torchrun --nproc_per_node=8 train.py --model $MODEL +``` + +Here `$MODEL` is one of `resnet18`, `resnet34`, `resnet50`, `resnet101` or `resnet152`. + +### ResNext +``` +torchrun --nproc_per_node=8 train.py\ + --model $MODEL --epochs 100 +``` + +Here `$MODEL` is one of `resnext50_32x4d` or `resnext101_32x8d`. +Note that the above command corresponds to a single node with 8 GPUs. If you use +a different number of GPUs and/or a different batch size, then the learning rate +should be scaled accordingly. For example, the pretrained model provided by +`torchvision` was trained on 8 nodes, each with 8 GPUs (for a total of 64 GPUs), +with `--batch_size 16` and `--lr 0.4`, instead of the current defaults +which are respectively batch_size=32 and lr=0.1 ### MobileNetV2 ``` -python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ +torchrun --nproc_per_node=8 train.py\ --model mobilenet_v2 --epochs 300 --lr 0.045 --wd 0.00004\ --lr-step-size 1 --lr-gamma 0.98 ``` + +### MobileNetV3 Large & Small +``` +torchrun --nproc_per_node=8 train.py\ + --model $MODEL --epochs 600 --opt rmsprop --batch-size 128 --lr 0.064\ + --wd 0.00001 --lr-step-size 2 --lr-gamma 0.973 --auto-augment imagenet --random-erase 0.2 +``` + +Here `$MODEL` is one of `mobilenet_v3_large` or `mobilenet_v3_small`. + +Then we averaged the parameters of the last 3 checkpoints that improved the Acc@1. See [#3182](https://github.com/pytorch/vision/pull/3182) +and [#3354](https://github.com/pytorch/vision/pull/3354) for details. + + +### EfficientNet-V1 + +The weights of the B0-B4 variants are ported from Ross Wightman's [timm repo](https://github.com/rwightman/pytorch-image-models/blob/01cb46a9a50e3ba4be167965b5764e9702f09b30/timm/models/efficientnet.py#L95-L108). + +The weights of the B5-B7 variants are ported from Luke Melas' [EfficientNet-PyTorch repo](https://github.com/lukemelas/EfficientNet-PyTorch/blob/1039e009545d9329ea026c9f7541341439712b96/efficientnet_pytorch/utils.py#L562-L564). + +All models were trained using Bicubic interpolation and each have custom crop and resize sizes. To validate the models use the following commands: +``` +torchrun --nproc_per_node=8 train.py --model efficientnet_b0 --test-only --weights EfficientNet_B0_Weights.IMAGENET1K_V1 +torchrun --nproc_per_node=8 train.py --model efficientnet_b1 --test-only --weights EfficientNet_B1_Weights.IMAGENET1K_V1 +torchrun --nproc_per_node=8 train.py --model efficientnet_b2 --test-only --weights EfficientNet_B2_Weights.IMAGENET1K_V1 +torchrun --nproc_per_node=8 train.py --model efficientnet_b3 --test-only --weights EfficientNet_B3_Weights.IMAGENET1K_V1 +torchrun --nproc_per_node=8 train.py --model efficientnet_b4 --test-only --weights EfficientNet_B4_Weights.IMAGENET1K_V1 +torchrun --nproc_per_node=8 train.py --model efficientnet_b5 --test-only --weights EfficientNet_B5_Weights.IMAGENET1K_V1 +torchrun --nproc_per_node=8 train.py --model efficientnet_b6 --test-only --weights EfficientNet_B6_Weights.IMAGENET1K_V1 +torchrun --nproc_per_node=8 train.py --model efficientnet_b7 --test-only --weights EfficientNet_B7_Weights.IMAGENET1K_V1 +``` + + +### EfficientNet-V2 +``` +torchrun --nproc_per_node=8 train.py \ +--model $MODEL --batch-size 128 --lr 0.5 --lr-scheduler cosineannealinglr \ +--lr-warmup-epochs 5 --lr-warmup-method linear --auto-augment ta_wide --epochs 600 --random-erase 0.1 \ +--label-smoothing 0.1 --mixup-alpha 0.2 --cutmix-alpha 1.0 --weight-decay 0.00002 --norm-weight-decay 0.0 \ +--train-crop-size $TRAIN_SIZE --model-ema --val-crop-size $EVAL_SIZE --val-resize-size $EVAL_SIZE \ +--ra-sampler --ra-reps 4 +``` +Here `$MODEL` is one of `efficientnet_v2_s` and `efficientnet_v2_m`. +Note that the Small variant had a `$TRAIN_SIZE` of `300` and a `$EVAL_SIZE` of `384`, while the Medium `384` and `480` respectively. + +Note that the above command corresponds to training on a single node with 8 GPUs. +For generating the pre-trained weights, we trained with 4 nodes, each with 8 GPUs (for a total of 32 GPUs), +and `--batch_size 32`. + +The weights of the Large variant are ported from the original paper rather than trained from scratch. See the `EfficientNet_V2_L_Weights` entry for their exact preprocessing transforms. + + +### RegNet + +#### Small models +``` +torchrun --nproc_per_node=8 train.py\ + --model $MODEL --epochs 100 --batch-size 128 --wd 0.00005 --lr=0.8\ + --lr-scheduler=cosineannealinglr --lr-warmup-method=linear\ + --lr-warmup-epochs=5 --lr-warmup-decay=0.1 +``` +Here `$MODEL` is one of `regnet_x_400mf`, `regnet_x_800mf`, `regnet_x_1_6gf`, `regnet_y_400mf`, `regnet_y_800mf` and `regnet_y_1_6gf`. Please note we used learning rate 0.4 for `regent_y_400mf` to get the same Acc@1 as [the paper](https://arxiv.org/abs/2003.13678). + +#### Medium models +``` +torchrun --nproc_per_node=8 train.py\ + --model $MODEL --epochs 100 --batch-size 64 --wd 0.00005 --lr=0.4\ + --lr-scheduler=cosineannealinglr --lr-warmup-method=linear\ + --lr-warmup-epochs=5 --lr-warmup-decay=0.1 +``` +Here `$MODEL` is one of `regnet_x_3_2gf`, `regnet_x_8gf`, `regnet_x_16gf`, `regnet_y_3_2gf` and `regnet_y_8gf`. + +#### Large models +``` +torchrun --nproc_per_node=8 train.py\ + --model $MODEL --epochs 100 --batch-size 32 --wd 0.00005 --lr=0.2\ + --lr-scheduler=cosineannealinglr --lr-warmup-method=linear\ + --lr-warmup-epochs=5 --lr-warmup-decay=0.1 +``` +Here `$MODEL` is one of `regnet_x_32gf`, `regnet_y_16gf` and `regnet_y_32gf`. + +### Vision Transformer + +#### vit_b_16 +``` +torchrun --nproc_per_node=8 train.py\ + --model vit_b_16 --epochs 300 --batch-size 512 --opt adamw --lr 0.003 --wd 0.3\ + --lr-scheduler cosineannealinglr --lr-warmup-method linear --lr-warmup-epochs 30\ + --lr-warmup-decay 0.033 --amp --label-smoothing 0.11 --mixup-alpha 0.2 --auto-augment ra\ + --clip-grad-norm 1 --ra-sampler --cutmix-alpha 1.0 --model-ema +``` + +Note that the above command corresponds to training on a single node with 8 GPUs. +For generating the pre-trained weights, we trained with 8 nodes, each with 8 GPUs (for a total of 64 GPUs), +and `--batch_size 64`. + +#### vit_b_32 +``` +torchrun --nproc_per_node=8 train.py\ + --model vit_b_32 --epochs 300 --batch-size 512 --opt adamw --lr 0.003 --wd 0.3\ + --lr-scheduler cosineannealinglr --lr-warmup-method linear --lr-warmup-epochs 30\ + --lr-warmup-decay 0.033 --amp --label-smoothing 0.11 --mixup-alpha 0.2 --auto-augment imagenet\ + --clip-grad-norm 1 --ra-sampler --cutmix-alpha 1.0 --model-ema +``` + +Note that the above command corresponds to training on a single node with 8 GPUs. +For generating the pre-trained weights, we trained with 2 nodes, each with 8 GPUs (for a total of 16 GPUs), +and `--batch_size 256`. + +#### vit_l_16 +``` +torchrun --nproc_per_node=8 train.py\ + --model vit_l_16 --epochs 600 --batch-size 128 --lr 0.5 --lr-scheduler cosineannealinglr\ + --lr-warmup-method linear --lr-warmup-epochs 5 --label-smoothing 0.1 --mixup-alpha 0.2\ + --auto-augment ta_wide --random-erase 0.1 --weight-decay 0.00002 --norm-weight-decay 0.0\ + --clip-grad-norm 1 --ra-sampler --cutmix-alpha 1.0 --model-ema --val-resize-size 232 +``` + +Note that the above command corresponds to training on a single node with 8 GPUs. +For generating the pre-trained weights, we trained with 2 nodes, each with 8 GPUs (for a total of 16 GPUs), +and `--batch_size 64`. + +#### vit_l_32 +``` +torchrun --nproc_per_node=8 train.py\ + --model vit_l_32 --epochs 300 --batch-size 512 --opt adamw --lr 0.003 --wd 0.3\ + --lr-scheduler cosineannealinglr --lr-warmup-method linear --lr-warmup-epochs 30\ + --lr-warmup-decay 0.033 --amp --label-smoothing 0.11 --mixup-alpha 0.2 --auto-augment ra\ + --clip-grad-norm 1 --ra-sampler --cutmix-alpha 1.0 --model-ema +``` + +Note that the above command corresponds to training on a single node with 8 GPUs. +For generating the pre-trained weights, we trained with 8 nodes, each with 8 GPUs (for a total of 64 GPUs), +and `--batch_size 64`. + + +### ConvNeXt +``` +torchrun --nproc_per_node=8 train.py\ +--model $MODEL --batch-size 128 --opt adamw --lr 1e-3 --lr-scheduler cosineannealinglr \ +--lr-warmup-epochs 5 --lr-warmup-method linear --auto-augment ta_wide --epochs 600 --random-erase 0.1 \ +--label-smoothing 0.1 --mixup-alpha 0.2 --cutmix-alpha 1.0 --weight-decay 0.05 --norm-weight-decay 0.0 \ +--train-crop-size 176 --model-ema --val-resize-size 232 --ra-sampler --ra-reps 4 +``` +Here `$MODEL` is one of `convnext_tiny`, `convnext_small`, `convnext_base` and `convnext_large`. Note that each variant had its `--val-resize-size` optimized in a post-training step, see their `Weights` entry for their exact value. + +Note that the above command corresponds to training on a single node with 8 GPUs. +For generating the pre-trained weights, we trained with 2 nodes, each with 8 GPUs (for a total of 16 GPUs), +and `--batch_size 64`. + + +### SwinTransformer +``` +torchrun --nproc_per_node=8 train.py\ +--model $MODEL --epochs 300 --batch-size 128 --opt adamw --lr 0.001 --weight-decay 0.05 --norm-weight-decay 0.0 --bias-weight-decay 0.0 --transformer-embedding-decay 0.0 --lr-scheduler cosineannealinglr --lr-min 0.00001 --lr-warmup-method linear --lr-warmup-epochs 20 --lr-warmup-decay 0.01 --amp --label-smoothing 0.1 --mixup-alpha 0.8 --clip-grad-norm 5.0 --cutmix-alpha 1.0 --random-erase 0.25 --interpolation bicubic --auto-augment ta_wide --model-ema --ra-sampler --ra-reps 4 --val-resize-size 224 +``` +Here `$MODEL` is one of `swin_t`, `swin_s` or `swin_b`. +Note that `--val-resize-size` was optimized in a post-training step, see their `Weights` entry for the exact value. + + + + +### SwinTransformer V2 +``` +torchrun --nproc_per_node=8 train.py\ +--model $MODEL --epochs 300 --batch-size 128 --opt adamw --lr 0.001 --weight-decay 0.05 --norm-weight-decay 0.0 --bias-weight-decay 0.0 --transformer-embedding-decay 0.0 --lr-scheduler cosineannealinglr --lr-min 0.00001 --lr-warmup-method linear --lr-warmup-epochs 20 --lr-warmup-decay 0.01 --amp --label-smoothing 0.1 --mixup-alpha 0.8 --clip-grad-norm 5.0 --cutmix-alpha 1.0 --random-erase 0.25 --interpolation bicubic --auto-augment ta_wide --model-ema --ra-sampler --ra-reps 4 --val-resize-size 256 --val-crop-size 256 --train-crop-size 256 +``` +Here `$MODEL` is one of `swin_v2_t`, `swin_v2_s` or `swin_v2_b`. +Note that `--val-resize-size` was optimized in a post-training step, see their `Weights` entry for the exact value. + + +### MaxViT +``` +torchrun --nproc_per_node=8 --n_nodes=4 train.py\ +--model $MODEL --epochs 400 --batch-size 128 --opt adamw --lr 3e-3 --weight-decay 0.05 --lr-scheduler cosineannealinglr --lr-min 1e-5 --lr-warmup-method linear --lr-warmup-epochs 32 --label-smoothing 0.1 --mixup-alpha 0.8 --clip-grad-norm 1.0 --interpolation bicubic --auto-augment ta_wide --policy-magnitude 15 --model-ema --val-resize-size 224\ +--val-crop-size 224 --train-crop-size 224 --amp --model-ema-steps 32 --transformer-embedding-decay 0 --sync-bn +``` +Here `$MODEL` is `maxvit_t`. +Note that `--val-resize-size` was not optimized in a post-training step. + + +### ShuffleNet V2 +``` +torchrun --nproc_per_node=8 train.py \ +--batch-size=128 \ +--lr=0.5 --lr-scheduler=cosineannealinglr --lr-warmup-epochs=5 --lr-warmup-method=linear \ +--auto-augment=ta_wide --epochs=600 --random-erase=0.1 --weight-decay=0.00002 \ +--norm-weight-decay=0.0 --label-smoothing=0.1 --mixup-alpha=0.2 --cutmix-alpha=1.0 \ +--train-crop-size=176 --model-ema --val-resize-size=232 --ra-sampler --ra-reps=4 +``` +Here `$MODEL` is either `shufflenet_v2_x1_5` or `shufflenet_v2_x2_0`. + +The models `shufflenet_v2_x0_5` and `shufflenet_v2_x1_0` were contributed by the community. See [PR-849](https://github.com/pytorch/vision/pull/849#issuecomment-483391686) for details. + + ## Mixed precision training -Automatic Mixed Precision (AMP) training on GPU for Pytorch can be enabled with the [NVIDIA Apex extension](https://github.com/NVIDIA/apex). +Automatic Mixed Precision (AMP) training on GPU for Pytorch can be enabled with the [torch.cuda.amp](https://pytorch.org/docs/stable/amp.html?highlight=amp#module-torch.cuda.amp). -Mixed precision training makes use of both FP32 and FP16 precisions where appropriate. FP16 operations can leverage the Tensor cores on NVIDIA GPUs (Volta, Turing or newer architectures) for improved throughput, generally without loss in model accuracy. Mixed precision training also often allows larger batch sizes. GPU automatic mixed precision training for Pytorch Vision can be enabled via the flag value `--apex=True`. +Mixed precision training makes use of both FP32 and FP16 precisions where appropriate. FP16 operations can leverage the Tensor cores on NVIDIA GPUs (Volta, Turing or newer architectures) for improved throughput, generally without loss in model accuracy. Mixed precision training also often allows larger batch sizes. GPU automatic mixed precision training for Pytorch Vision can be enabled via the flag value `--amp=True`. ``` -python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ - --model resnext50_32x4d --epochs 100 --apex +torchrun --nproc_per_node=8 train.py\ + --model resnext50_32x4d --epochs 100 --amp ``` ## Quantized -### Parameters used for generating quantized models: +### Post training quantized models -For all post training quantized models (All quantized models except mobilenet-v2), the settings are: +For all post training quantized models, the settings are: 1. num_calibration_batches: 32 2. num_workers: 16 3. batch_size: 32 4. eval_batch_size: 128 -5. backend: 'fbgemm' +5. qbackend: 'fbgemm' + +``` +python train_quantization.py --device='cpu' --post-training-quantize --qbackend='fbgemm' --model='$MODEL' +``` +Here `$MODEL` is one of `googlenet`, `inception_v3`, `resnet18`, `resnet50`, `resnext101_32x8d`, `shufflenet_v2_x0_5` and `shufflenet_v2_x1_0`. + +### Quantized ShuffleNet V2 + +Here are commands that we use to quantize the `shufflenet_v2_x1_5` and `shufflenet_v2_x2_0` models. +``` +# For shufflenet_v2_x1_5 +python train_quantization.py --device='cpu' --post-training-quantize --qbackend='fbgemm' \ + --model=shufflenet_v2_x1_5 --weights="ShuffleNet_V2_X1_5_Weights.IMAGENET1K_V1" \ + --train-crop-size 176 --val-resize-size 232 --data-path /datasets01_ontap/imagenet_full_size/061417/ + +# For shufflenet_v2_x2_0 +python train_quantization.py --device='cpu' --post-training-quantize --qbackend='fbgemm' \ + --model=shufflenet_v2_x2_0 --weights="ShuffleNet_V2_X2_0_Weights.IMAGENET1K_V1" \ + --train-crop-size 176 --val-resize-size 232 --data-path /datasets01_ontap/imagenet_full_size/061417/ +``` + +### QAT MobileNetV2 For Mobilenet-v2, the model was trained with quantization aware training, the settings used are: 1. num_workers: 16 2. batch_size: 32 3. eval_batch_size: 128 -4. backend: 'qnnpack' +4. qbackend: 'qnnpack' 5. learning-rate: 0.0001 6. num_epochs: 90 7. num_observer_update_epochs:4 @@ -63,28 +325,44 @@ For Mobilenet-v2, the model was trained with quantization aware training, the se 9. momentum: 0.9 10. lr_step_size:30 11. lr_gamma: 0.1 +12. weight-decay: 0.0001 + +``` +torchrun --nproc_per_node=8 train_quantization.py --model='mobilenet_v2' +``` Training converges at about 10 epochs. -For post training quant, device is set to CPU. For training, the device is set to CUDA +### QAT MobileNetV3 + +For Mobilenet-v3 Large, the model was trained with quantization aware training, the settings used are: +1. num_workers: 16 +2. batch_size: 32 +3. eval_batch_size: 128 +4. qbackend: 'qnnpack' +5. learning-rate: 0.001 +6. num_epochs: 90 +7. num_observer_update_epochs:4 +8. num_batch_norm_update_epochs:3 +9. momentum: 0.9 +10. lr_step_size:30 +11. lr_gamma: 0.1 +12. weight-decay: 0.00001 -### Command to evaluate quantized models using the pre-trained weights: -For all quantized models except inception_v3: ``` -python references/classification/train_quantization.py --data-path='imagenet_full_size/' \ - --device='cpu' --test-only --backend='fbgemm' --model='' +torchrun --nproc_per_node=8 train_quantization.py --model='mobilenet_v3_large' \ + --wd 0.00001 --lr 0.001 ``` -For inception_v3, since it expects tensors with a size of N x 3 x 299 x 299, before running above command, -need to change the input size of dataset_test in train.py to: +For post training quant, device is set to CPU. For training, the device is set to CUDA. + +### Command to evaluate quantized models using the pre-trained weights: + ``` -dataset_test = torchvision.datasets.ImageFolder( - valdir, - transforms.Compose([ - transforms.Resize(342), - transforms.CenterCrop(299), - transforms.ToTensor(), - normalize, - ])) +python train_quantization.py --device='cpu' --test-only --qbackend='' --model='' ``` +For inception_v3 you need to pass the following extra parameters: +``` +--val-resize-size 342 --val-crop-size 299 --train-crop-size 299 +``` diff --git a/references/classification/presets.py b/references/classification/presets.py new file mode 100644 index 00000000000..8653957a576 --- /dev/null +++ b/references/classification/presets.py @@ -0,0 +1,119 @@ +import torch +from torchvision.transforms.functional import InterpolationMode + + +def get_module(use_v2): + # We need a protected import to avoid the V2 warning in case just V1 is used + if use_v2: + import torchvision.transforms.v2 + + return torchvision.transforms.v2 + else: + import torchvision.transforms + + return torchvision.transforms + + +class ClassificationPresetTrain: + # Note: this transform assumes that the input to forward() are always PIL + # images, regardless of the backend parameter. We may change that in the + # future though, if we change the output type from the dataset. + def __init__( + self, + *, + crop_size, + mean=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + interpolation=InterpolationMode.BILINEAR, + hflip_prob=0.5, + auto_augment_policy=None, + ra_magnitude=9, + augmix_severity=3, + random_erase_prob=0.0, + backend="pil", + use_v2=False, + ): + T = get_module(use_v2) + + transforms = [] + backend = backend.lower() + if backend == "tensor": + transforms.append(T.PILToTensor()) + elif backend != "pil": + raise ValueError(f"backend can be 'tensor' or 'pil', but got {backend}") + + transforms.append(T.RandomResizedCrop(crop_size, interpolation=interpolation, antialias=True)) + if hflip_prob > 0: + transforms.append(T.RandomHorizontalFlip(hflip_prob)) + if auto_augment_policy is not None: + if auto_augment_policy == "ra": + transforms.append(T.RandAugment(interpolation=interpolation, magnitude=ra_magnitude)) + elif auto_augment_policy == "ta_wide": + transforms.append(T.TrivialAugmentWide(interpolation=interpolation)) + elif auto_augment_policy == "augmix": + transforms.append(T.AugMix(interpolation=interpolation, severity=augmix_severity)) + else: + aa_policy = T.AutoAugmentPolicy(auto_augment_policy) + transforms.append(T.AutoAugment(policy=aa_policy, interpolation=interpolation)) + + if backend == "pil": + transforms.append(T.PILToTensor()) + + transforms.extend( + [ + T.ToDtype(torch.float, scale=True) if use_v2 else T.ConvertImageDtype(torch.float), + T.Normalize(mean=mean, std=std), + ] + ) + if random_erase_prob > 0: + transforms.append(T.RandomErasing(p=random_erase_prob)) + + if use_v2: + transforms.append(T.ToPureTensor()) + + self.transforms = T.Compose(transforms) + + def __call__(self, img): + return self.transforms(img) + + +class ClassificationPresetEval: + def __init__( + self, + *, + crop_size, + resize_size=256, + mean=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + interpolation=InterpolationMode.BILINEAR, + backend="pil", + use_v2=False, + ): + T = get_module(use_v2) + transforms = [] + backend = backend.lower() + if backend == "tensor": + transforms.append(T.PILToTensor()) + elif backend != "pil": + raise ValueError(f"backend can be 'tensor' or 'pil', but got {backend}") + + transforms += [ + T.Resize(resize_size, interpolation=interpolation, antialias=True), + T.CenterCrop(crop_size), + ] + + if backend == "pil": + transforms.append(T.PILToTensor()) + + transforms += [ + T.ToDtype(torch.float, scale=True) if use_v2 else T.ConvertImageDtype(torch.float), + T.Normalize(mean=mean, std=std), + ] + + if use_v2: + transforms.append(T.ToPureTensor()) + + self.transforms = T.Compose(transforms) + + def __call__(self, img): + return self.transforms(img) diff --git a/references/classification/sampler.py b/references/classification/sampler.py new file mode 100644 index 00000000000..e9dc1735a58 --- /dev/null +++ b/references/classification/sampler.py @@ -0,0 +1,62 @@ +import math + +import torch +import torch.distributed as dist + + +class RASampler(torch.utils.data.Sampler): + """Sampler that restricts data loading to a subset of the dataset for distributed, + with repeated augmentation. + It ensures that different each augmented version of a sample will be visible to a + different process (GPU). + Heavily based on 'torch.utils.data.DistributedSampler'. + + This is borrowed from the DeiT Repo: + https://github.com/facebookresearch/deit/blob/main/samplers.py + """ + + def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True, seed=0, repetitions=3): + if num_replicas is None: + if not dist.is_available(): + raise RuntimeError("Requires distributed package to be available!") + num_replicas = dist.get_world_size() + if rank is None: + if not dist.is_available(): + raise RuntimeError("Requires distributed package to be available!") + rank = dist.get_rank() + self.dataset = dataset + self.num_replicas = num_replicas + self.rank = rank + self.epoch = 0 + self.num_samples = int(math.ceil(len(self.dataset) * float(repetitions) / self.num_replicas)) + self.total_size = self.num_samples * self.num_replicas + self.num_selected_samples = int(math.floor(len(self.dataset) // 256 * 256 / self.num_replicas)) + self.shuffle = shuffle + self.seed = seed + self.repetitions = repetitions + + def __iter__(self): + if self.shuffle: + # Deterministically shuffle based on epoch + g = torch.Generator() + g.manual_seed(self.seed + self.epoch) + indices = torch.randperm(len(self.dataset), generator=g).tolist() + else: + indices = list(range(len(self.dataset))) + + # Add extra samples to make it evenly divisible + indices = [ele for ele in indices for i in range(self.repetitions)] + indices += indices[: (self.total_size - len(indices))] + assert len(indices) == self.total_size + + # Subsample + indices = indices[self.rank : self.total_size : self.num_replicas] + assert len(indices) == self.num_samples + + return iter(indices[: self.num_selected_samples]) + + def __len__(self): + return self.num_selected_samples + + def set_epoch(self, epoch): + self.epoch = epoch diff --git a/references/classification/train.py b/references/classification/train.py index 480092a0331..d52124fcf33 100644 --- a/references/classification/train.py +++ b/references/classification/train.py @@ -1,57 +1,71 @@ -from __future__ import print_function import datetime import os import time -import sys +import warnings +import presets import torch import torch.utils.data -from torch import nn import torchvision -from torchvision import transforms - +import torchvision.transforms import utils - -try: - from apex import amp -except ImportError: - amp = None +from sampler import RASampler +from torch import nn +from torch.utils.data.dataloader import default_collate +from torchvision.transforms.functional import InterpolationMode +from transforms import get_mixup_cutmix -def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, print_freq, apex=False): +def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, args, model_ema=None, scaler=None): model.train() metric_logger = utils.MetricLogger(delimiter=" ") - metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value}')) - metric_logger.add_meter('img/s', utils.SmoothedValue(window_size=10, fmt='{value}')) + metric_logger.add_meter("lr", utils.SmoothedValue(window_size=1, fmt="{value}")) + metric_logger.add_meter("img/s", utils.SmoothedValue(window_size=10, fmt="{value}")) - header = 'Epoch: [{}]'.format(epoch) - for image, target in metric_logger.log_every(data_loader, print_freq, header): + header = f"Epoch: [{epoch}]" + for i, (image, target) in enumerate(metric_logger.log_every(data_loader, args.print_freq, header)): start_time = time.time() image, target = image.to(device), target.to(device) - output = model(image) - loss = criterion(output, target) + with torch.cuda.amp.autocast(enabled=scaler is not None): + output = model(image) + loss = criterion(output, target) optimizer.zero_grad() - if apex: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() + if scaler is not None: + scaler.scale(loss).backward() + if args.clip_grad_norm is not None: + # we should unscale the gradients of optimizer's assigned params if do gradient clipping + scaler.unscale_(optimizer) + nn.utils.clip_grad_norm_(model.parameters(), args.clip_grad_norm) + scaler.step(optimizer) + scaler.update() else: loss.backward() - optimizer.step() + if args.clip_grad_norm is not None: + nn.utils.clip_grad_norm_(model.parameters(), args.clip_grad_norm) + optimizer.step() + + if model_ema and i % args.model_ema_steps == 0: + model_ema.update_parameters(model) + if epoch < args.lr_warmup_epochs: + # Reset ema buffer to keep copying weights during warmup period + model_ema.n_averaged.fill_(0) acc1, acc5 = utils.accuracy(output, target, topk=(1, 5)) batch_size = image.shape[0] metric_logger.update(loss=loss.item(), lr=optimizer.param_groups[0]["lr"]) - metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) - metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) - metric_logger.meters['img/s'].update(batch_size / (time.time() - start_time)) + metric_logger.meters["acc1"].update(acc1.item(), n=batch_size) + metric_logger.meters["acc5"].update(acc5.item(), n=batch_size) + metric_logger.meters["img/s"].update(batch_size / (time.time() - start_time)) -def evaluate(model, criterion, data_loader, device, print_freq=100): +def evaluate(model, criterion, data_loader, device, print_freq=100, log_suffix=""): model.eval() metric_logger = utils.MetricLogger(delimiter=" ") - header = 'Test:' - with torch.no_grad(): + header = f"Test: {log_suffix}" + + num_processed_samples = 0 + with torch.inference_mode(): for image, target in metric_logger.log_every(data_loader, print_freq, header): image = image.to(device, non_blocking=True) target = target.to(device, non_blocking=True) @@ -63,76 +77,123 @@ def evaluate(model, criterion, data_loader, device, print_freq=100): # could have been padded in distributed setup batch_size = image.shape[0] metric_logger.update(loss=loss.item()) - metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) - metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) + metric_logger.meters["acc1"].update(acc1.item(), n=batch_size) + metric_logger.meters["acc5"].update(acc5.item(), n=batch_size) + num_processed_samples += batch_size # gather the stats from all processes + + num_processed_samples = utils.reduce_across_processes(num_processed_samples) + if ( + hasattr(data_loader.dataset, "__len__") + and len(data_loader.dataset) != num_processed_samples + and torch.distributed.get_rank() == 0 + ): + # See FIXME above + warnings.warn( + f"It looks like the dataset has {len(data_loader.dataset)} samples, but {num_processed_samples} " + "samples were used for the validation, which might bias the results. " + "Try adjusting the batch size and / or the world size. " + "Setting the world size to 1 is always a safe bet." + ) + metric_logger.synchronize_between_processes() - print(' * Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f}' - .format(top1=metric_logger.acc1, top5=metric_logger.acc5)) + print(f"{header} Acc@1 {metric_logger.acc1.global_avg:.3f} Acc@5 {metric_logger.acc5.global_avg:.3f}") return metric_logger.acc1.global_avg def _get_cache_path(filepath): import hashlib + h = hashlib.sha1(filepath.encode()).hexdigest() cache_path = os.path.join("~", ".torch", "vision", "datasets", "imagefolder", h[:10] + ".pt") cache_path = os.path.expanduser(cache_path) return cache_path -def load_data(traindir, valdir, cache_dataset, distributed): +def load_data(traindir, valdir, args): # Data loading code print("Loading data") - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) + val_resize_size, val_crop_size, train_crop_size = ( + args.val_resize_size, + args.val_crop_size, + args.train_crop_size, + ) + interpolation = InterpolationMode(args.interpolation) print("Loading training data") st = time.time() cache_path = _get_cache_path(traindir) - if cache_dataset and os.path.exists(cache_path): + if args.cache_dataset and os.path.exists(cache_path): # Attention, as the transforms are also cached! - print("Loading dataset_train from {}".format(cache_path)) - dataset, _ = torch.load(cache_path) + print(f"Loading dataset_train from {cache_path}") + # TODO: this could probably be weights_only=True + dataset, _ = torch.load(cache_path, weights_only=False) else: + # We need a default value for the variables below because args may come + # from train_quantization.py which doesn't define them. + auto_augment_policy = getattr(args, "auto_augment", None) + random_erase_prob = getattr(args, "random_erase", 0.0) + ra_magnitude = getattr(args, "ra_magnitude", None) + augmix_severity = getattr(args, "augmix_severity", None) dataset = torchvision.datasets.ImageFolder( traindir, - transforms.Compose([ - transforms.RandomResizedCrop(224), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - normalize, - ])) - if cache_dataset: - print("Saving dataset_train to {}".format(cache_path)) + presets.ClassificationPresetTrain( + crop_size=train_crop_size, + interpolation=interpolation, + auto_augment_policy=auto_augment_policy, + random_erase_prob=random_erase_prob, + ra_magnitude=ra_magnitude, + augmix_severity=augmix_severity, + backend=args.backend, + use_v2=args.use_v2, + ), + ) + if args.cache_dataset: + print(f"Saving dataset_train to {cache_path}") utils.mkdir(os.path.dirname(cache_path)) utils.save_on_master((dataset, traindir), cache_path) print("Took", time.time() - st) print("Loading validation data") cache_path = _get_cache_path(valdir) - if cache_dataset and os.path.exists(cache_path): + if args.cache_dataset and os.path.exists(cache_path): # Attention, as the transforms are also cached! - print("Loading dataset_test from {}".format(cache_path)) - dataset_test, _ = torch.load(cache_path) + print(f"Loading dataset_test from {cache_path}") + # TODO: this could probably be weights_only=True + dataset_test, _ = torch.load(cache_path, weights_only=False) else: + if args.weights and args.test_only: + weights = torchvision.models.get_weight(args.weights) + preprocessing = weights.transforms(antialias=True) + if args.backend == "tensor": + preprocessing = torchvision.transforms.Compose([torchvision.transforms.PILToTensor(), preprocessing]) + + else: + preprocessing = presets.ClassificationPresetEval( + crop_size=val_crop_size, + resize_size=val_resize_size, + interpolation=interpolation, + backend=args.backend, + use_v2=args.use_v2, + ) + dataset_test = torchvision.datasets.ImageFolder( valdir, - transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - normalize, - ])) - if cache_dataset: - print("Saving dataset_test to {}".format(cache_path)) + preprocessing, + ) + if args.cache_dataset: + print(f"Saving dataset_test to {cache_path}") utils.mkdir(os.path.dirname(cache_path)) utils.save_on_master((dataset_test, valdir), cache_path) print("Creating data loaders") - if distributed: - train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) - test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test) + if args.distributed: + if hasattr(args, "ra_sampler") and args.ra_sampler: + train_sampler = RASampler(dataset, shuffle=True, repetitions=args.ra_reps) + else: + train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) + test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test, shuffle=False) else: train_sampler = torch.utils.data.RandomSampler(dataset) test_sampler = torch.utils.data.SequentialSampler(dataset_test) @@ -141,13 +202,6 @@ def load_data(traindir, valdir, cache_dataset, distributed): def main(args): - if args.apex: - if sys.version_info < (3, 0): - raise RuntimeError("Apex currently only supports Python 3. Aborting.") - if amp is None: - raise RuntimeError("Failed to import apex. Please install apex from https://www.github.com/nvidia/apex " - "to enable mixed-precision training.") - if args.output_dir: utils.mkdir(args.output_dir) @@ -156,52 +210,154 @@ def main(args): device = torch.device(args.device) - torch.backends.cudnn.benchmark = True + if args.use_deterministic_algorithms: + torch.backends.cudnn.benchmark = False + torch.use_deterministic_algorithms(True) + else: + torch.backends.cudnn.benchmark = True - train_dir = os.path.join(args.data_path, 'train') - val_dir = os.path.join(args.data_path, 'val') - dataset, dataset_test, train_sampler, test_sampler = load_data(train_dir, val_dir, - args.cache_dataset, args.distributed) - data_loader = torch.utils.data.DataLoader( - dataset, batch_size=args.batch_size, - sampler=train_sampler, num_workers=args.workers, pin_memory=True) + train_dir = os.path.join(args.data_path, "train") + val_dir = os.path.join(args.data_path, "val") + dataset, dataset_test, train_sampler, test_sampler = load_data(train_dir, val_dir, args) + num_classes = len(dataset.classes) + mixup_cutmix = get_mixup_cutmix( + mixup_alpha=args.mixup_alpha, cutmix_alpha=args.cutmix_alpha, num_classes=num_classes, use_v2=args.use_v2 + ) + if mixup_cutmix is not None: + + def collate_fn(batch): + return mixup_cutmix(*default_collate(batch)) + + else: + collate_fn = default_collate + + data_loader = torch.utils.data.DataLoader( + dataset, + batch_size=args.batch_size, + sampler=train_sampler, + num_workers=args.workers, + pin_memory=True, + collate_fn=collate_fn, + ) data_loader_test = torch.utils.data.DataLoader( - dataset_test, batch_size=args.batch_size, - sampler=test_sampler, num_workers=args.workers, pin_memory=True) + dataset_test, batch_size=args.batch_size, sampler=test_sampler, num_workers=args.workers, pin_memory=True + ) print("Creating model") - model = torchvision.models.__dict__[args.model](pretrained=args.pretrained) + model = torchvision.models.get_model(args.model, weights=args.weights, num_classes=num_classes) model.to(device) + if args.distributed and args.sync_bn: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) - criterion = nn.CrossEntropyLoss() - - optimizer = torch.optim.SGD( - model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) - - if args.apex: - model, optimizer = amp.initialize(model, optimizer, - opt_level=args.apex_opt_level - ) + criterion = nn.CrossEntropyLoss(label_smoothing=args.label_smoothing) + + custom_keys_weight_decay = [] + if args.bias_weight_decay is not None: + custom_keys_weight_decay.append(("bias", args.bias_weight_decay)) + if args.transformer_embedding_decay is not None: + for key in ["class_token", "position_embedding", "relative_position_bias_table"]: + custom_keys_weight_decay.append((key, args.transformer_embedding_decay)) + parameters = utils.set_weight_decay( + model, + args.weight_decay, + norm_weight_decay=args.norm_weight_decay, + custom_keys_weight_decay=custom_keys_weight_decay if len(custom_keys_weight_decay) > 0 else None, + ) - lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma) + opt_name = args.opt.lower() + if opt_name.startswith("sgd"): + optimizer = torch.optim.SGD( + parameters, + lr=args.lr, + momentum=args.momentum, + weight_decay=args.weight_decay, + nesterov="nesterov" in opt_name, + ) + elif opt_name == "rmsprop": + optimizer = torch.optim.RMSprop( + parameters, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, eps=0.0316, alpha=0.9 + ) + elif opt_name == "adamw": + optimizer = torch.optim.AdamW(parameters, lr=args.lr, weight_decay=args.weight_decay) + else: + raise RuntimeError(f"Invalid optimizer {args.opt}. Only SGD, RMSprop and AdamW are supported.") + + scaler = torch.cuda.amp.GradScaler() if args.amp else None + + args.lr_scheduler = args.lr_scheduler.lower() + if args.lr_scheduler == "steplr": + main_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma) + elif args.lr_scheduler == "cosineannealinglr": + main_lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( + optimizer, T_max=args.epochs - args.lr_warmup_epochs, eta_min=args.lr_min + ) + elif args.lr_scheduler == "exponentiallr": + main_lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=args.lr_gamma) + else: + raise RuntimeError( + f"Invalid lr scheduler '{args.lr_scheduler}'. Only StepLR, CosineAnnealingLR and ExponentialLR " + "are supported." + ) + + if args.lr_warmup_epochs > 0: + if args.lr_warmup_method == "linear": + warmup_lr_scheduler = torch.optim.lr_scheduler.LinearLR( + optimizer, start_factor=args.lr_warmup_decay, total_iters=args.lr_warmup_epochs + ) + elif args.lr_warmup_method == "constant": + warmup_lr_scheduler = torch.optim.lr_scheduler.ConstantLR( + optimizer, factor=args.lr_warmup_decay, total_iters=args.lr_warmup_epochs + ) + else: + raise RuntimeError( + f"Invalid warmup lr method '{args.lr_warmup_method}'. Only linear and constant are supported." + ) + lr_scheduler = torch.optim.lr_scheduler.SequentialLR( + optimizer, schedulers=[warmup_lr_scheduler, main_lr_scheduler], milestones=[args.lr_warmup_epochs] + ) + else: + lr_scheduler = main_lr_scheduler model_without_ddp = model if args.distributed: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) model_without_ddp = model.module + model_ema = None + if args.model_ema: + # Decay adjustment that aims to keep the decay independent of other hyper-parameters originally proposed at: + # https://github.com/facebookresearch/pycls/blob/f8cd9627/pycls/core/net.py#L123 + # + # total_ema_updates = (Dataset_size / n_GPUs) * epochs / (batch_size_per_gpu * EMA_steps) + # We consider constant = Dataset_size for a given dataset/setup and omit it. Thus: + # adjust = 1 / total_ema_updates ~= n_GPUs * batch_size_per_gpu * EMA_steps / epochs + adjust = args.world_size * args.batch_size * args.model_ema_steps / args.epochs + alpha = 1.0 - args.model_ema_decay + alpha = min(1.0, alpha * adjust) + model_ema = utils.ExponentialMovingAverage(model_without_ddp, device=device, decay=1.0 - alpha) + if args.resume: - checkpoint = torch.load(args.resume, map_location='cpu') - model_without_ddp.load_state_dict(checkpoint['model']) - optimizer.load_state_dict(checkpoint['optimizer']) - lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) - args.start_epoch = checkpoint['epoch'] + 1 + checkpoint = torch.load(args.resume, map_location="cpu", weights_only=True) + model_without_ddp.load_state_dict(checkpoint["model"]) + if not args.test_only: + optimizer.load_state_dict(checkpoint["optimizer"]) + lr_scheduler.load_state_dict(checkpoint["lr_scheduler"]) + args.start_epoch = checkpoint["epoch"] + 1 + if model_ema: + model_ema.load_state_dict(checkpoint["model_ema"]) + if scaler: + scaler.load_state_dict(checkpoint["scaler"]) if args.test_only: - evaluate(model, criterion, data_loader_test, device=device) + # We disable the cudnn benchmarking because it can noticeably affect the accuracy + torch.backends.cudnn.benchmark = False + torch.backends.cudnn.deterministic = True + if model_ema: + evaluate(model_ema, criterion, data_loader_test, device=device, log_suffix="EMA") + else: + evaluate(model, criterion, data_loader_test, device=device) return print("Start training") @@ -209,53 +365,94 @@ def main(args): for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) - train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, args.print_freq, args.apex) + train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, args, model_ema, scaler) lr_scheduler.step() evaluate(model, criterion, data_loader_test, device=device) + if model_ema: + evaluate(model_ema, criterion, data_loader_test, device=device, log_suffix="EMA") if args.output_dir: checkpoint = { - 'model': model_without_ddp.state_dict(), - 'optimizer': optimizer.state_dict(), - 'lr_scheduler': lr_scheduler.state_dict(), - 'epoch': epoch, - 'args': args} - utils.save_on_master( - checkpoint, - os.path.join(args.output_dir, 'model_{}.pth'.format(epoch))) - utils.save_on_master( - checkpoint, - os.path.join(args.output_dir, 'checkpoint.pth')) + "model": model_without_ddp.state_dict(), + "optimizer": optimizer.state_dict(), + "lr_scheduler": lr_scheduler.state_dict(), + "epoch": epoch, + "args": args, + } + if model_ema: + checkpoint["model_ema"] = model_ema.state_dict() + if scaler: + checkpoint["scaler"] = scaler.state_dict() + utils.save_on_master(checkpoint, os.path.join(args.output_dir, f"model_{epoch}.pth")) + utils.save_on_master(checkpoint, os.path.join(args.output_dir, "checkpoint.pth")) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) - print('Training time {}'.format(total_time_str)) + print(f"Training time {total_time_str}") -def parse_args(): +def get_args_parser(add_help=True): import argparse - parser = argparse.ArgumentParser(description='PyTorch Classification Training') - - parser.add_argument('--data-path', default='/datasets01/imagenet_full_size/061417/', help='dataset') - parser.add_argument('--model', default='resnet18', help='model') - parser.add_argument('--device', default='cuda', help='device') - parser.add_argument('-b', '--batch-size', default=32, type=int) - parser.add_argument('--epochs', default=90, type=int, metavar='N', - help='number of total epochs to run') - parser.add_argument('-j', '--workers', default=16, type=int, metavar='N', - help='number of data loading workers (default: 16)') - parser.add_argument('--lr', default=0.1, type=float, help='initial learning rate') - parser.add_argument('--momentum', default=0.9, type=float, metavar='M', - help='momentum') - parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, - metavar='W', help='weight decay (default: 1e-4)', - dest='weight_decay') - parser.add_argument('--lr-step-size', default=30, type=int, help='decrease lr every step-size epochs') - parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma') - parser.add_argument('--print-freq', default=10, type=int, help='print frequency') - parser.add_argument('--output-dir', default='.', help='path where to save') - parser.add_argument('--resume', default='', help='resume from checkpoint') - parser.add_argument('--start-epoch', default=0, type=int, metavar='N', - help='start epoch') + + parser = argparse.ArgumentParser(description="PyTorch Classification Training", add_help=add_help) + + parser.add_argument("--data-path", default="/datasets01/imagenet_full_size/061417/", type=str, help="dataset path") + parser.add_argument("--model", default="resnet18", type=str, help="model name") + parser.add_argument("--device", default="cuda", type=str, help="device (Use cuda or cpu Default: cuda)") + parser.add_argument( + "-b", "--batch-size", default=32, type=int, help="images per gpu, the total batch size is $NGPU x batch_size" + ) + parser.add_argument("--epochs", default=90, type=int, metavar="N", help="number of total epochs to run") + parser.add_argument( + "-j", "--workers", default=16, type=int, metavar="N", help="number of data loading workers (default: 16)" + ) + parser.add_argument("--opt", default="sgd", type=str, help="optimizer") + parser.add_argument("--lr", default=0.1, type=float, help="initial learning rate") + parser.add_argument("--momentum", default=0.9, type=float, metavar="M", help="momentum") + parser.add_argument( + "--wd", + "--weight-decay", + default=1e-4, + type=float, + metavar="W", + help="weight decay (default: 1e-4)", + dest="weight_decay", + ) + parser.add_argument( + "--norm-weight-decay", + default=None, + type=float, + help="weight decay for Normalization layers (default: None, same value as --wd)", + ) + parser.add_argument( + "--bias-weight-decay", + default=None, + type=float, + help="weight decay for bias parameters of all layers (default: None, same value as --wd)", + ) + parser.add_argument( + "--transformer-embedding-decay", + default=None, + type=float, + help="weight decay for embedding parameters for vision transformer models (default: None, same value as --wd)", + ) + parser.add_argument( + "--label-smoothing", default=0.0, type=float, help="label smoothing (default: 0.0)", dest="label_smoothing" + ) + parser.add_argument("--mixup-alpha", default=0.0, type=float, help="mixup alpha (default: 0.0)") + parser.add_argument("--cutmix-alpha", default=0.0, type=float, help="cutmix alpha (default: 0.0)") + parser.add_argument("--lr-scheduler", default="steplr", type=str, help="the lr scheduler (default: steplr)") + parser.add_argument("--lr-warmup-epochs", default=0, type=int, help="the number of epochs to warmup (default: 0)") + parser.add_argument( + "--lr-warmup-method", default="constant", type=str, help="the warmup method (default: constant)" + ) + parser.add_argument("--lr-warmup-decay", default=0.01, type=float, help="the decay for lr") + parser.add_argument("--lr-step-size", default=30, type=int, help="decrease lr every step-size epochs") + parser.add_argument("--lr-gamma", default=0.1, type=float, help="decrease lr by a factor of lr-gamma") + parser.add_argument("--lr-min", default=0.0, type=float, help="minimum lr of lr schedule (default: 0.0)") + parser.add_argument("--print-freq", default=10, type=int, help="print frequency") + parser.add_argument("--output-dir", default=".", type=str, help="path to save outputs") + parser.add_argument("--resume", default="", type=str, help="path of checkpoint") + parser.add_argument("--start-epoch", default=0, type=int, metavar="N", help="start epoch") parser.add_argument( "--cache-dataset", dest="cache_dataset", @@ -274,32 +471,58 @@ def parse_args(): help="Only test the model", action="store_true", ) - parser.add_argument( - "--pretrained", - dest="pretrained", - help="Use pre-trained models from the modelzoo", - action="store_true", - ) + parser.add_argument("--auto-augment", default=None, type=str, help="auto augment policy (default: None)") + parser.add_argument("--ra-magnitude", default=9, type=int, help="magnitude of auto augment policy") + parser.add_argument("--augmix-severity", default=3, type=int, help="severity of augmix policy") + parser.add_argument("--random-erase", default=0.0, type=float, help="random erasing probability (default: 0.0)") # Mixed precision training parameters - parser.add_argument('--apex', action='store_true', - help='Use apex for mixed precision training') - parser.add_argument('--apex-opt-level', default='O1', type=str, - help='For apex mixed precision training' - 'O0 for FP32 training, O1 for mixed precision training.' - 'For further detail, see https://github.com/NVIDIA/apex/tree/master/examples/imagenet' - ) + parser.add_argument("--amp", action="store_true", help="Use torch.cuda.amp for mixed precision training") # distributed training parameters - parser.add_argument('--world-size', default=1, type=int, - help='number of distributed processes') - parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training') - - args = parser.parse_args() - - return args + parser.add_argument("--world-size", default=1, type=int, help="number of distributed processes") + parser.add_argument("--dist-url", default="env://", type=str, help="url used to set up distributed training") + parser.add_argument( + "--model-ema", action="store_true", help="enable tracking Exponential Moving Average of model parameters" + ) + parser.add_argument( + "--model-ema-steps", + type=int, + default=32, + help="the number of iterations that controls how often to update the EMA model (default: 32)", + ) + parser.add_argument( + "--model-ema-decay", + type=float, + default=0.99998, + help="decay factor for Exponential Moving Average of model parameters (default: 0.99998)", + ) + parser.add_argument( + "--use-deterministic-algorithms", action="store_true", help="Forces the use of deterministic algorithms only." + ) + parser.add_argument( + "--interpolation", default="bilinear", type=str, help="the interpolation method (default: bilinear)" + ) + parser.add_argument( + "--val-resize-size", default=256, type=int, help="the resize size used for validation (default: 256)" + ) + parser.add_argument( + "--val-crop-size", default=224, type=int, help="the central crop size used for validation (default: 224)" + ) + parser.add_argument( + "--train-crop-size", default=224, type=int, help="the random crop size used for training (default: 224)" + ) + parser.add_argument("--clip-grad-norm", default=None, type=float, help="the maximum gradient norm (default None)") + parser.add_argument("--ra-sampler", action="store_true", help="whether to use Repeated Augmentation in training") + parser.add_argument( + "--ra-reps", default=3, type=int, help="number of repetitions for Repeated Augmentation (default: 3)" + ) + parser.add_argument("--weights", default=None, type=str, help="the weights enum name to load") + parser.add_argument("--backend", default="PIL", type=str.lower, help="PIL or tensor - case insensitive") + parser.add_argument("--use-v2", action="store_true", help="Use V2 transforms") + return parser if __name__ == "__main__": - args = parse_args() + args = get_args_parser().parse_args() main(args) diff --git a/references/classification/train_quantization.py b/references/classification/train_quantization.py index 22621fe2404..bd324c6eef7 100644 --- a/references/classification/train_quantization.py +++ b/references/classification/train_quantization.py @@ -1,21 +1,18 @@ -from __future__ import print_function +import copy import datetime import os import time -import sys -import copy import torch +import torch.ao.quantization import torch.utils.data -from torch import nn import torchvision -import torch.quantization import utils -from train import train_one_epoch, evaluate, load_data +from torch import nn +from train import evaluate, load_data, train_one_epoch def main(args): - if args.output_dir: utils.mkdir(args.output_dir) @@ -23,49 +20,52 @@ def main(args): print(args) if args.post_training_quantize and args.distributed: - raise RuntimeError("Post training quantization example should not be performed " - "on distributed mode") + raise RuntimeError("Post training quantization example should not be performed on distributed mode") # Set backend engine to ensure that quantized model runs on the correct kernels - if args.backend not in torch.backends.quantized.supported_engines: - raise RuntimeError("Quantized backend not supported: " + str(args.backend)) - torch.backends.quantized.engine = args.backend + if args.qbackend not in torch.backends.quantized.supported_engines: + raise RuntimeError("Quantized backend not supported: " + str(args.qbackend)) + torch.backends.quantized.engine = args.qbackend device = torch.device(args.device) torch.backends.cudnn.benchmark = True # Data loading code print("Loading data") - train_dir = os.path.join(args.data_path, 'train') - val_dir = os.path.join(args.data_path, 'val') + train_dir = os.path.join(args.data_path, "train") + val_dir = os.path.join(args.data_path, "val") - dataset, dataset_test, train_sampler, test_sampler = load_data(train_dir, val_dir, - args.cache_dataset, args.distributed) + dataset, dataset_test, train_sampler, test_sampler = load_data(train_dir, val_dir, args) data_loader = torch.utils.data.DataLoader( - dataset, batch_size=args.batch_size, - sampler=train_sampler, num_workers=args.workers, pin_memory=True) + dataset, batch_size=args.batch_size, sampler=train_sampler, num_workers=args.workers, pin_memory=True + ) data_loader_test = torch.utils.data.DataLoader( - dataset_test, batch_size=args.eval_batch_size, - sampler=test_sampler, num_workers=args.workers, pin_memory=True) + dataset_test, batch_size=args.eval_batch_size, sampler=test_sampler, num_workers=args.workers, pin_memory=True + ) print("Creating model", args.model) # when training quantized models, we always start from a pre-trained fp32 reference model - model = torchvision.models.quantization.__dict__[args.model](pretrained=True, quantize=args.test_only) + prefix = "quantized_" + model_name = args.model + if not model_name.startswith(prefix): + model_name = prefix + model_name + model = torchvision.models.get_model(model_name, weights=args.weights, quantize=args.test_only) model.to(device) if not (args.test_only or args.post_training_quantize): - model.fuse_model() - model.qconfig = torch.quantization.get_default_qat_qconfig(args.backend) - torch.quantization.prepare_qat(model, inplace=True) + model.fuse_model(is_qat=True) + model.qconfig = torch.ao.quantization.get_default_qat_qconfig(args.qbackend) + torch.ao.quantization.prepare_qat(model, inplace=True) + + if args.distributed and args.sync_bn: + model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) optimizer = torch.optim.SGD( - model.parameters(), lr=args.lr, momentum=args.momentum, - weight_decay=args.weight_decay) + model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay + ) - lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, - step_size=args.lr_step_size, - gamma=args.lr_gamma) + lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma) criterion = nn.CrossEntropyLoss() model_without_ddp = model @@ -74,34 +74,31 @@ def main(args): model_without_ddp = model.module if args.resume: - checkpoint = torch.load(args.resume, map_location='cpu') - model_without_ddp.load_state_dict(checkpoint['model']) - optimizer.load_state_dict(checkpoint['optimizer']) - lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) - args.start_epoch = checkpoint['epoch'] + 1 + checkpoint = torch.load(args.resume, map_location="cpu", weights_only=True) + model_without_ddp.load_state_dict(checkpoint["model"]) + optimizer.load_state_dict(checkpoint["optimizer"]) + lr_scheduler.load_state_dict(checkpoint["lr_scheduler"]) + args.start_epoch = checkpoint["epoch"] + 1 if args.post_training_quantize: # perform calibration on a subset of the training dataset # for that, create a subset of the training dataset - ds = torch.utils.data.Subset( - dataset, - indices=list(range(args.batch_size * args.num_calibration_batches))) + ds = torch.utils.data.Subset(dataset, indices=list(range(args.batch_size * args.num_calibration_batches))) data_loader_calibration = torch.utils.data.DataLoader( - ds, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, - pin_memory=True) + ds, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True + ) model.eval() - model.fuse_model() - model.qconfig = torch.quantization.get_default_qconfig(args.backend) - torch.quantization.prepare(model, inplace=True) + model.fuse_model(is_qat=False) + model.qconfig = torch.ao.quantization.get_default_qconfig(args.qbackend) + torch.ao.quantization.prepare(model, inplace=True) # Calibrate first print("Calibrating") evaluate(model, criterion, data_loader_calibration, device=device, print_freq=1) - torch.quantization.convert(model, inplace=True) + torch.ao.quantization.convert(model, inplace=True) if args.output_dir: - print('Saving quantized model') + print("Saving quantized model") if utils.is_main_process(): - torch.save(model.state_dict(), os.path.join(args.output_dir, - 'quantized_post_train_model.pth')) + torch.save(model.state_dict(), os.path.join(args.output_dir, "quantized_post_train_model.pth")) print("Evaluating post-training quantized model") evaluate(model, criterion, data_loader_test, device=device) return @@ -110,113 +107,111 @@ def main(args): evaluate(model, criterion, data_loader_test, device=device) return - model.apply(torch.quantization.enable_observer) - model.apply(torch.quantization.enable_fake_quant) + model.apply(torch.ao.quantization.enable_observer) + model.apply(torch.ao.quantization.enable_fake_quant) start_time = time.time() for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) - print('Starting training for epoch', epoch) - train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, - args.print_freq) + print("Starting training for epoch", epoch) + train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, args) lr_scheduler.step() - with torch.no_grad(): + with torch.inference_mode(): if epoch >= args.num_observer_update_epochs: - print('Disabling observer for subseq epochs, epoch = ', epoch) - model.apply(torch.quantization.disable_observer) + print("Disabling observer for subseq epochs, epoch = ", epoch) + model.apply(torch.ao.quantization.disable_observer) if epoch >= args.num_batch_norm_update_epochs: - print('Freezing BN for subseq epochs, epoch = ', epoch) + print("Freezing BN for subseq epochs, epoch = ", epoch) model.apply(torch.nn.intrinsic.qat.freeze_bn_stats) - print('Evaluate QAT model') + print("Evaluate QAT model") - evaluate(model, criterion, data_loader_test, device=device) - quantized_eval_model = copy.deepcopy(model) + evaluate(model, criterion, data_loader_test, device=device, log_suffix="QAT") + quantized_eval_model = copy.deepcopy(model_without_ddp) quantized_eval_model.eval() - quantized_eval_model.to(torch.device('cpu')) - torch.quantization.convert(quantized_eval_model, inplace=True) + quantized_eval_model.to(torch.device("cpu")) + torch.ao.quantization.convert(quantized_eval_model, inplace=True) - print('Evaluate Quantized model') - evaluate(quantized_eval_model, criterion, data_loader_test, - device=torch.device('cpu')) + print("Evaluate Quantized model") + evaluate(quantized_eval_model, criterion, data_loader_test, device=torch.device("cpu")) model.train() if args.output_dir: checkpoint = { - 'model': model_without_ddp.state_dict(), - 'eval_model': quantized_eval_model.state_dict(), - 'optimizer': optimizer.state_dict(), - 'lr_scheduler': lr_scheduler.state_dict(), - 'epoch': epoch, - 'args': args} - utils.save_on_master( - checkpoint, - os.path.join(args.output_dir, 'model_{}.pth'.format(epoch))) - utils.save_on_master( - checkpoint, - os.path.join(args.output_dir, 'checkpoint.pth')) - print('Saving models after epoch ', epoch) + "model": model_without_ddp.state_dict(), + "eval_model": quantized_eval_model.state_dict(), + "optimizer": optimizer.state_dict(), + "lr_scheduler": lr_scheduler.state_dict(), + "epoch": epoch, + "args": args, + } + utils.save_on_master(checkpoint, os.path.join(args.output_dir, f"model_{epoch}.pth")) + utils.save_on_master(checkpoint, os.path.join(args.output_dir, "checkpoint.pth")) + print("Saving models after epoch ", epoch) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) - print('Training time {}'.format(total_time_str)) + print(f"Training time {total_time_str}") -def parse_args(): +def get_args_parser(add_help=True): import argparse - parser = argparse.ArgumentParser(description='PyTorch Classification Training') - - parser.add_argument('--data-path', - default='/datasets01/imagenet_full_size/061417/', - help='dataset') - parser.add_argument('--model', - default='mobilenet_v2', - help='model') - parser.add_argument('--backend', - default='qnnpack', - help='fbgemm or qnnpack') - parser.add_argument('--device', - default='cuda', - help='device') - - parser.add_argument('-b', '--batch-size', default=32, type=int, - help='batch size for calibration/training') - parser.add_argument('--eval-batch-size', default=128, type=int, - help='batch size for evaluation') - parser.add_argument('--epochs', default=90, type=int, metavar='N', - help='number of total epochs to run') - parser.add_argument('--num-observer-update-epochs', - default=4, type=int, metavar='N', - help='number of total epochs to update observers') - parser.add_argument('--num-batch-norm-update-epochs', default=3, - type=int, metavar='N', - help='number of total epochs to update batch norm stats') - parser.add_argument('--num-calibration-batches', - default=32, type=int, metavar='N', - help='number of batches of training set for \ - observer calibration ') - - parser.add_argument('-j', '--workers', default=16, type=int, metavar='N', - help='number of data loading workers (default: 16)') - parser.add_argument('--lr', - default=0.0001, type=float, - help='initial learning rate') - parser.add_argument('--momentum', - default=0.9, type=float, metavar='M', - help='momentum') - parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, - metavar='W', help='weight decay (default: 1e-4)', - dest='weight_decay') - parser.add_argument('--lr-step-size', default=30, type=int, - help='decrease lr every step-size epochs') - parser.add_argument('--lr-gamma', default=0.1, type=float, - help='decrease lr by a factor of lr-gamma') - parser.add_argument('--print-freq', default=10, type=int, - help='print frequency') - parser.add_argument('--output-dir', default='.', help='path where to save') - parser.add_argument('--resume', default='', help='resume from checkpoint') - parser.add_argument('--start-epoch', default=0, type=int, metavar='N', - help='start epoch') + + parser = argparse.ArgumentParser(description="PyTorch Quantized Classification Training", add_help=add_help) + + parser.add_argument("--data-path", default="/datasets01/imagenet_full_size/061417/", type=str, help="dataset path") + parser.add_argument("--model", default="mobilenet_v2", type=str, help="model name") + parser.add_argument("--qbackend", default="qnnpack", type=str, help="Quantized backend: fbgemm or qnnpack") + parser.add_argument("--device", default="cuda", type=str, help="device (Use cuda or cpu Default: cuda)") + + parser.add_argument( + "-b", "--batch-size", default=32, type=int, help="images per gpu, the total batch size is $NGPU x batch_size" + ) + parser.add_argument("--eval-batch-size", default=128, type=int, help="batch size for evaluation") + parser.add_argument("--epochs", default=90, type=int, metavar="N", help="number of total epochs to run") + parser.add_argument( + "--num-observer-update-epochs", + default=4, + type=int, + metavar="N", + help="number of total epochs to update observers", + ) + parser.add_argument( + "--num-batch-norm-update-epochs", + default=3, + type=int, + metavar="N", + help="number of total epochs to update batch norm stats", + ) + parser.add_argument( + "--num-calibration-batches", + default=32, + type=int, + metavar="N", + help="number of batches of training set for \ + observer calibration ", + ) + + parser.add_argument( + "-j", "--workers", default=16, type=int, metavar="N", help="number of data loading workers (default: 16)" + ) + parser.add_argument("--lr", default=0.0001, type=float, help="initial learning rate") + parser.add_argument("--momentum", default=0.9, type=float, metavar="M", help="momentum") + parser.add_argument( + "--wd", + "--weight-decay", + default=1e-4, + type=float, + metavar="W", + help="weight decay (default: 1e-4)", + dest="weight_decay", + ) + parser.add_argument("--lr-step-size", default=30, type=int, help="decrease lr every step-size epochs") + parser.add_argument("--lr-gamma", default=0.1, type=float, help="decrease lr by a factor of lr-gamma") + parser.add_argument("--print-freq", default=10, type=int, help="print frequency") + parser.add_argument("--output-dir", default=".", type=str, help="path to save outputs") + parser.add_argument("--resume", default="", type=str, help="path of checkpoint") + parser.add_argument("--start-epoch", default=0, type=int, metavar="N", help="start epoch") parser.add_argument( "--cache-dataset", dest="cache_dataset", @@ -224,6 +219,12 @@ def parse_args(): It also serializes the transforms", action="store_true", ) + parser.add_argument( + "--sync-bn", + dest="sync_bn", + help="Use sync batch norm", + action="store_true", + ) parser.add_argument( "--test-only", dest="test_only", @@ -238,17 +239,35 @@ def parse_args(): ) # distributed training parameters - parser.add_argument('--world-size', default=1, type=int, - help='number of distributed processes') - parser.add_argument('--dist-url', - default='env://', - help='url used to set up distributed training') + parser.add_argument("--world-size", default=1, type=int, help="number of distributed processes") + parser.add_argument("--dist-url", default="env://", type=str, help="url used to set up distributed training") + + parser.add_argument( + "--interpolation", default="bilinear", type=str, help="the interpolation method (default: bilinear)" + ) + parser.add_argument( + "--val-resize-size", default=256, type=int, help="the resize size used for validation (default: 256)" + ) + parser.add_argument( + "--val-crop-size", default=224, type=int, help="the central crop size used for validation (default: 224)" + ) + parser.add_argument( + "--train-crop-size", default=224, type=int, help="the random crop size used for training (default: 224)" + ) + parser.add_argument("--clip-grad-norm", default=None, type=float, help="the maximum gradient norm (default None)") + parser.add_argument("--weights", default=None, type=str, help="the weights enum name to load") - args = parser.parse_args() + parser.add_argument("--backend", default="PIL", type=str.lower, help="PIL or tensor - case insensitive") + parser.add_argument("--use-v2", action="store_true", help="Use V2 transforms") - return args + return parser if __name__ == "__main__": - args = parse_args() + args = get_args_parser().parse_args() + if args.backend in ("fbgemm", "qnnpack"): + raise ValueError( + "The --backend parameter has been re-purposed to specify the backend of the transforms (PIL or Tensor) " + "instead of the quantized backend. Please use the --qbackend parameter to specify the quantized backend." + ) main(args) diff --git a/references/classification/transforms.py b/references/classification/transforms.py new file mode 100644 index 00000000000..96236608eec --- /dev/null +++ b/references/classification/transforms.py @@ -0,0 +1,206 @@ +import math +from typing import Tuple + +import torch +from presets import get_module +from torch import Tensor +from torchvision.transforms import functional as F + + +def get_mixup_cutmix(*, mixup_alpha, cutmix_alpha, num_classes, use_v2): + transforms_module = get_module(use_v2) + + mixup_cutmix = [] + if mixup_alpha > 0: + mixup_cutmix.append( + transforms_module.MixUp(alpha=mixup_alpha, num_classes=num_classes) + if use_v2 + else RandomMixUp(num_classes=num_classes, p=1.0, alpha=mixup_alpha) + ) + if cutmix_alpha > 0: + mixup_cutmix.append( + transforms_module.CutMix(alpha=cutmix_alpha, num_classes=num_classes) + if use_v2 + else RandomCutMix(num_classes=num_classes, p=1.0, alpha=cutmix_alpha) + ) + if not mixup_cutmix: + return None + + return transforms_module.RandomChoice(mixup_cutmix) + + +class RandomMixUp(torch.nn.Module): + """Randomly apply MixUp to the provided batch and targets. + The class implements the data augmentations as described in the paper + `"mixup: Beyond Empirical Risk Minimization" `_. + + Args: + num_classes (int): number of classes used for one-hot encoding. + p (float): probability of the batch being transformed. Default value is 0.5. + alpha (float): hyperparameter of the Beta distribution used for mixup. + Default value is 1.0. + inplace (bool): boolean to make this transform inplace. Default set to False. + """ + + def __init__(self, num_classes: int, p: float = 0.5, alpha: float = 1.0, inplace: bool = False) -> None: + super().__init__() + + if num_classes < 1: + raise ValueError( + f"Please provide a valid positive value for the num_classes. Got num_classes={num_classes}" + ) + + if alpha <= 0: + raise ValueError("Alpha param can't be zero.") + + self.num_classes = num_classes + self.p = p + self.alpha = alpha + self.inplace = inplace + + def forward(self, batch: Tensor, target: Tensor) -> Tuple[Tensor, Tensor]: + """ + Args: + batch (Tensor): Float tensor of size (B, C, H, W) + target (Tensor): Integer tensor of size (B, ) + + Returns: + Tensor: Randomly transformed batch. + """ + if batch.ndim != 4: + raise ValueError(f"Batch ndim should be 4. Got {batch.ndim}") + if target.ndim != 1: + raise ValueError(f"Target ndim should be 1. Got {target.ndim}") + if not batch.is_floating_point(): + raise TypeError(f"Batch dtype should be a float tensor. Got {batch.dtype}.") + if target.dtype != torch.int64: + raise TypeError(f"Target dtype should be torch.int64. Got {target.dtype}") + + if not self.inplace: + batch = batch.clone() + target = target.clone() + + if target.ndim == 1: + target = torch.nn.functional.one_hot(target, num_classes=self.num_classes).to(dtype=batch.dtype) + + if torch.rand(1).item() >= self.p: + return batch, target + + # It's faster to roll the batch by one instead of shuffling it to create image pairs + batch_rolled = batch.roll(1, 0) + target_rolled = target.roll(1, 0) + + # Implemented as on mixup paper, page 3. + lambda_param = float(torch._sample_dirichlet(torch.tensor([self.alpha, self.alpha]))[0]) + batch_rolled.mul_(1.0 - lambda_param) + batch.mul_(lambda_param).add_(batch_rolled) + + target_rolled.mul_(1.0 - lambda_param) + target.mul_(lambda_param).add_(target_rolled) + + return batch, target + + def __repr__(self) -> str: + s = ( + f"{self.__class__.__name__}(" + f"num_classes={self.num_classes}" + f", p={self.p}" + f", alpha={self.alpha}" + f", inplace={self.inplace}" + f")" + ) + return s + + +class RandomCutMix(torch.nn.Module): + """Randomly apply CutMix to the provided batch and targets. + The class implements the data augmentations as described in the paper + `"CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features" + `_. + + Args: + num_classes (int): number of classes used for one-hot encoding. + p (float): probability of the batch being transformed. Default value is 0.5. + alpha (float): hyperparameter of the Beta distribution used for cutmix. + Default value is 1.0. + inplace (bool): boolean to make this transform inplace. Default set to False. + """ + + def __init__(self, num_classes: int, p: float = 0.5, alpha: float = 1.0, inplace: bool = False) -> None: + super().__init__() + if num_classes < 1: + raise ValueError("Please provide a valid positive value for the num_classes.") + if alpha <= 0: + raise ValueError("Alpha param can't be zero.") + + self.num_classes = num_classes + self.p = p + self.alpha = alpha + self.inplace = inplace + + def forward(self, batch: Tensor, target: Tensor) -> Tuple[Tensor, Tensor]: + """ + Args: + batch (Tensor): Float tensor of size (B, C, H, W) + target (Tensor): Integer tensor of size (B, ) + + Returns: + Tensor: Randomly transformed batch. + """ + if batch.ndim != 4: + raise ValueError(f"Batch ndim should be 4. Got {batch.ndim}") + if target.ndim != 1: + raise ValueError(f"Target ndim should be 1. Got {target.ndim}") + if not batch.is_floating_point(): + raise TypeError(f"Batch dtype should be a float tensor. Got {batch.dtype}.") + if target.dtype != torch.int64: + raise TypeError(f"Target dtype should be torch.int64. Got {target.dtype}") + + if not self.inplace: + batch = batch.clone() + target = target.clone() + + if target.ndim == 1: + target = torch.nn.functional.one_hot(target, num_classes=self.num_classes).to(dtype=batch.dtype) + + if torch.rand(1).item() >= self.p: + return batch, target + + # It's faster to roll the batch by one instead of shuffling it to create image pairs + batch_rolled = batch.roll(1, 0) + target_rolled = target.roll(1, 0) + + # Implemented as on cutmix paper, page 12 (with minor corrections on typos). + lambda_param = float(torch._sample_dirichlet(torch.tensor([self.alpha, self.alpha]))[0]) + _, H, W = F.get_dimensions(batch) + + r_x = torch.randint(W, (1,)) + r_y = torch.randint(H, (1,)) + + r = 0.5 * math.sqrt(1.0 - lambda_param) + r_w_half = int(r * W) + r_h_half = int(r * H) + + x1 = int(torch.clamp(r_x - r_w_half, min=0)) + y1 = int(torch.clamp(r_y - r_h_half, min=0)) + x2 = int(torch.clamp(r_x + r_w_half, max=W)) + y2 = int(torch.clamp(r_y + r_h_half, max=H)) + + batch[:, :, y1:y2, x1:x2] = batch_rolled[:, :, y1:y2, x1:x2] + lambda_param = float(1.0 - (x2 - x1) * (y2 - y1) / (W * H)) + + target_rolled.mul_(1.0 - lambda_param) + target.mul_(lambda_param).add_(target_rolled) + + return batch, target + + def __repr__(self) -> str: + s = ( + f"{self.__class__.__name__}(" + f"num_classes={self.num_classes}" + f", p={self.p}" + f", alpha={self.alpha}" + f", inplace={self.inplace}" + f")" + ) + return s diff --git a/references/classification/utils.py b/references/classification/utils.py index 5ea6dfef341..7d9f0136ae8 100644 --- a/references/classification/utils.py +++ b/references/classification/utils.py @@ -1,15 +1,17 @@ -from __future__ import print_function -from collections import defaultdict, deque +import copy import datetime +import errno +import hashlib +import os import time +from collections import defaultdict, deque, OrderedDict +from typing import List, Optional, Tuple + import torch import torch.distributed as dist -import errno -import os - -class SmoothedValue(object): +class SmoothedValue: """Track a series of values and provide access to smoothed values over a window or the global series average. """ @@ -31,11 +33,7 @@ def synchronize_between_processes(self): """ Warning: does not synchronize the deque! """ - if not is_dist_avail_and_initialized(): - return - t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda') - dist.barrier() - dist.all_reduce(t) + t = reduce_across_processes([self.count, self.total]) t = t.tolist() self.count = int(t[0]) self.total = t[1] @@ -64,14 +62,11 @@ def value(self): def __str__(self): return self.fmt.format( - median=self.median, - avg=self.avg, - global_avg=self.global_avg, - max=self.max, - value=self.value) + median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value + ) -class MetricLogger(object): +class MetricLogger: def __init__(self, delimiter="\t"): self.meters = defaultdict(SmoothedValue) self.delimiter = delimiter @@ -88,15 +83,12 @@ def __getattr__(self, attr): return self.meters[attr] if attr in self.__dict__: return self.__dict__[attr] - raise AttributeError("'{}' object has no attribute '{}'".format( - type(self).__name__, attr)) + raise AttributeError(f"'{type(self).__name__}' object has no attribute '{attr}'") def __str__(self): loss_str = [] for name, meter in self.meters.items(): - loss_str.append( - "{}: {}".format(name, str(meter)) - ) + loss_str.append(f"{name}: {str(meter)}") return self.delimiter.join(loss_str) def synchronize_between_processes(self): @@ -109,31 +101,28 @@ def add_meter(self, name, meter): def log_every(self, iterable, print_freq, header=None): i = 0 if not header: - header = '' + header = "" start_time = time.time() end = time.time() - iter_time = SmoothedValue(fmt='{avg:.4f}') - data_time = SmoothedValue(fmt='{avg:.4f}') - space_fmt = ':' + str(len(str(len(iterable)))) + 'd' + iter_time = SmoothedValue(fmt="{avg:.4f}") + data_time = SmoothedValue(fmt="{avg:.4f}") + space_fmt = ":" + str(len(str(len(iterable)))) + "d" if torch.cuda.is_available(): - log_msg = self.delimiter.join([ - header, - '[{0' + space_fmt + '}/{1}]', - 'eta: {eta}', - '{meters}', - 'time: {time}', - 'data: {data}', - 'max mem: {memory:.0f}' - ]) + log_msg = self.delimiter.join( + [ + header, + "[{0" + space_fmt + "}/{1}]", + "eta: {eta}", + "{meters}", + "time: {time}", + "data: {data}", + "max mem: {memory:.0f}", + ] + ) else: - log_msg = self.delimiter.join([ - header, - '[{0' + space_fmt + '}/{1}]', - 'eta: {eta}', - '{meters}', - 'time: {time}', - 'data: {data}' - ]) + log_msg = self.delimiter.join( + [header, "[{0" + space_fmt + "}/{1}]", "eta: {eta}", "{meters}", "time: {time}", "data: {data}"] + ) MB = 1024.0 * 1024.0 for obj in iterable: data_time.update(time.time() - end) @@ -143,28 +132,51 @@ def log_every(self, iterable, print_freq, header=None): eta_seconds = iter_time.global_avg * (len(iterable) - i) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if torch.cuda.is_available(): - print(log_msg.format( - i, len(iterable), eta=eta_string, - meters=str(self), - time=str(iter_time), data=str(data_time), - memory=torch.cuda.max_memory_allocated() / MB)) + print( + log_msg.format( + i, + len(iterable), + eta=eta_string, + meters=str(self), + time=str(iter_time), + data=str(data_time), + memory=torch.cuda.max_memory_allocated() / MB, + ) + ) else: - print(log_msg.format( - i, len(iterable), eta=eta_string, - meters=str(self), - time=str(iter_time), data=str(data_time))) + print( + log_msg.format( + i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time) + ) + ) i += 1 end = time.time() total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) - print('{} Total time: {}'.format(header, total_time_str)) + print(f"{header} Total time: {total_time_str}") + + +class ExponentialMovingAverage(torch.optim.swa_utils.AveragedModel): + """Maintains moving averages of model parameters using an exponential decay. + ``ema_avg = decay * avg_model_param + (1 - decay) * model_param`` + `torch.optim.swa_utils.AveragedModel `_ + is used to compute the EMA. + """ + + def __init__(self, model, decay, device="cpu"): + def ema_avg(avg_model_param, model_param, num_averaged): + return decay * avg_model_param + (1 - decay) * model_param + + super().__init__(model, device, ema_avg, use_buffers=True) def accuracy(output, target, topk=(1,)): """Computes the accuracy over the k top predictions for the specified values of k""" - with torch.no_grad(): + with torch.inference_mode(): maxk = max(topk) batch_size = target.size(0) + if target.ndim == 2: + target = target.max(dim=1)[1] _, pred = output.topk(maxk, 1, True, True) pred = pred.t() @@ -190,10 +202,11 @@ def setup_for_distributed(is_master): This function disables printing when not in master process """ import builtins as __builtin__ + builtin_print = __builtin__.print def print(*args, **kwargs): - force = kwargs.pop('force', False) + force = kwargs.pop("force", False) if is_master or force: builtin_print(*args, **kwargs) @@ -230,26 +243,222 @@ def save_on_master(*args, **kwargs): def init_distributed_mode(args): - if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: + if "RANK" in os.environ and "WORLD_SIZE" in os.environ: args.rank = int(os.environ["RANK"]) - args.world_size = int(os.environ['WORLD_SIZE']) - args.gpu = int(os.environ['LOCAL_RANK']) - elif 'SLURM_PROCID' in os.environ: - args.rank = int(os.environ['SLURM_PROCID']) + args.world_size = int(os.environ["WORLD_SIZE"]) + args.gpu = int(os.environ["LOCAL_RANK"]) + elif "SLURM_PROCID" in os.environ: + args.rank = int(os.environ["SLURM_PROCID"]) args.gpu = args.rank % torch.cuda.device_count() elif hasattr(args, "rank"): pass else: - print('Not using distributed mode') + print("Not using distributed mode") args.distributed = False return args.distributed = True torch.cuda.set_device(args.gpu) - args.dist_backend = 'nccl' - print('| distributed init (rank {}): {}'.format( - args.rank, args.dist_url), flush=True) - torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, - world_size=args.world_size, rank=args.rank) + args.dist_backend = "nccl" + print(f"| distributed init (rank {args.rank}): {args.dist_url}", flush=True) + torch.distributed.init_process_group( + backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank + ) + torch.distributed.barrier() setup_for_distributed(args.rank == 0) + + +def average_checkpoints(inputs): + """Loads checkpoints from inputs and returns a model with averaged weights. Original implementation taken from: + https://github.com/pytorch/fairseq/blob/a48f235636557b8d3bc4922a6fa90f3a0fa57955/scripts/average_checkpoints.py#L16 + + Args: + inputs (List[str]): An iterable of string paths of checkpoints to load from. + Returns: + A dict of string keys mapping to various values. The 'model' key + from the returned dict should correspond to an OrderedDict mapping + string parameter names to torch Tensors. + """ + params_dict = OrderedDict() + params_keys = None + new_state = None + num_models = len(inputs) + for fpath in inputs: + with open(fpath, "rb") as f: + state = torch.load( + f, map_location=(lambda s, _: torch.serialization.default_restore_location(s, "cpu")), weights_only=True + ) + # Copies over the settings from the first checkpoint + if new_state is None: + new_state = state + model_params = state["model"] + model_params_keys = list(model_params.keys()) + if params_keys is None: + params_keys = model_params_keys + elif params_keys != model_params_keys: + raise KeyError( + f"For checkpoint {f}, expected list of params: {params_keys}, but found: {model_params_keys}" + ) + for k in params_keys: + p = model_params[k] + if isinstance(p, torch.HalfTensor): + p = p.float() + if k not in params_dict: + params_dict[k] = p.clone() + # NOTE: clone() is needed in case of p is a shared parameter + else: + params_dict[k] += p + averaged_params = OrderedDict() + for k, v in params_dict.items(): + averaged_params[k] = v + if averaged_params[k].is_floating_point(): + averaged_params[k].div_(num_models) + else: + averaged_params[k] //= num_models + new_state["model"] = averaged_params + return new_state + + +def store_model_weights(model, checkpoint_path, checkpoint_key="model", strict=True): + """ + This method can be used to prepare weights files for new models. It receives as + input a model architecture and a checkpoint from the training script and produces + a file with the weights ready for release. + + Examples: + from torchvision import models as M + + # Classification + model = M.mobilenet_v3_large(weights=None) + print(store_model_weights(model, './class.pth')) + + # Quantized Classification + model = M.quantization.mobilenet_v3_large(weights=None, quantize=False) + model.fuse_model(is_qat=True) + model.qconfig = torch.ao.quantization.get_default_qat_qconfig('qnnpack') + _ = torch.ao.quantization.prepare_qat(model, inplace=True) + print(store_model_weights(model, './qat.pth')) + + # Object Detection + model = M.detection.fasterrcnn_mobilenet_v3_large_fpn(weights=None, weights_backbone=None) + print(store_model_weights(model, './obj.pth')) + + # Segmentation + model = M.segmentation.deeplabv3_mobilenet_v3_large(weights=None, weights_backbone=None, aux_loss=True) + print(store_model_weights(model, './segm.pth', strict=False)) + + Args: + model (pytorch.nn.Module): The model on which the weights will be loaded for validation purposes. + checkpoint_path (str): The path of the checkpoint we will load. + checkpoint_key (str, optional): The key of the checkpoint where the model weights are stored. + Default: "model". + strict (bool): whether to strictly enforce that the keys + in :attr:`state_dict` match the keys returned by this module's + :meth:`~torch.nn.Module.state_dict` function. Default: ``True`` + + Returns: + output_path (str): The location where the weights are saved. + """ + # Store the new model next to the checkpoint_path + checkpoint_path = os.path.abspath(checkpoint_path) + output_dir = os.path.dirname(checkpoint_path) + + # Deep copy to avoid side effects on the model object. + model = copy.deepcopy(model) + checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True) + + # Load the weights to the model to validate that everything works + # and remove unnecessary weights (such as auxiliaries, etc.) + if checkpoint_key == "model_ema": + del checkpoint[checkpoint_key]["n_averaged"] + torch.nn.modules.utils.consume_prefix_in_state_dict_if_present(checkpoint[checkpoint_key], "module.") + model.load_state_dict(checkpoint[checkpoint_key], strict=strict) + + tmp_path = os.path.join(output_dir, str(model.__hash__())) + torch.save(model.state_dict(), tmp_path) + + sha256_hash = hashlib.sha256() + with open(tmp_path, "rb") as f: + # Read and update hash string value in blocks of 4K + for byte_block in iter(lambda: f.read(4096), b""): + sha256_hash.update(byte_block) + hh = sha256_hash.hexdigest() + + output_path = os.path.join(output_dir, "weights-" + str(hh[:8]) + ".pth") + os.replace(tmp_path, output_path) + + return output_path + + +def reduce_across_processes(val): + if not is_dist_avail_and_initialized(): + # nothing to sync, but we still convert to tensor for consistency with the distributed case. + return torch.tensor(val) + + t = torch.tensor(val, device="cuda") + dist.barrier() + dist.all_reduce(t) + return t + + +def set_weight_decay( + model: torch.nn.Module, + weight_decay: float, + norm_weight_decay: Optional[float] = None, + norm_classes: Optional[List[type]] = None, + custom_keys_weight_decay: Optional[List[Tuple[str, float]]] = None, +): + if not norm_classes: + norm_classes = [ + torch.nn.modules.batchnorm._BatchNorm, + torch.nn.LayerNorm, + torch.nn.GroupNorm, + torch.nn.modules.instancenorm._InstanceNorm, + torch.nn.LocalResponseNorm, + ] + norm_classes = tuple(norm_classes) + + params = { + "other": [], + "norm": [], + } + params_weight_decay = { + "other": weight_decay, + "norm": norm_weight_decay, + } + custom_keys = [] + if custom_keys_weight_decay is not None: + for key, weight_decay in custom_keys_weight_decay: + params[key] = [] + params_weight_decay[key] = weight_decay + custom_keys.append(key) + + def _add_params(module, prefix=""): + for name, p in module.named_parameters(recurse=False): + if not p.requires_grad: + continue + is_custom_key = False + for key in custom_keys: + target_name = f"{prefix}.{name}" if prefix != "" and "." in key else name + if key == target_name: + params[key].append(p) + is_custom_key = True + break + if not is_custom_key: + if norm_weight_decay is not None and isinstance(module, norm_classes): + params["norm"].append(p) + else: + params["other"].append(p) + + for child_name, child_module in module.named_children(): + child_prefix = f"{prefix}.{child_name}" if prefix != "" else child_name + _add_params(child_module, prefix=child_prefix) + + _add_params(model) + + param_groups = [] + for key in params: + if len(params[key]) > 0: + param_groups.append({"params": params[key], "weight_decay": params_weight_decay[key]}) + return param_groups diff --git a/references/depth/stereo/README.md b/references/depth/stereo/README.md new file mode 100644 index 00000000000..22bcae27ab0 --- /dev/null +++ b/references/depth/stereo/README.md @@ -0,0 +1,180 @@ +# Stereo Matching reference training scripts + +This folder contains reference training scripts for Stereo Matching. +They serve as a log of how to train specific models, so as to provide baseline +training and evaluation scripts to quickly bootstrap research. + + +### CREStereo + +The CREStereo model was trained on a dataset mixture between **CREStereo**, **ETH3D** and the additional split from **Middlebury2014**. +A ratio of **88-6-6** was used in order to train a baseline weight set. We provide multi-set variant as well. +Both used 8 A100 GPUs and a batch size of 2 (so effective batch size is 16). The +rest of the hyper-parameters loosely follow the recipe from https://github.com/megvii-research/CREStereo. +The original recipe trains for **300000** updates (or steps) on the dataset mixture. We modify the learning rate +schedule to one that starts decaying the weight much sooner. Throughout the experiments we found that this reduces +overfitting during evaluation time and gradient clip help stabilize the loss during a pre-mature learning rate change. + +``` +torchrun --nproc_per_node 8 --nnodes 1 train.py \ + --dataset-root $dataset_root \ + --name $name_cre \ + --model crestereo_base \ + --train-datasets crestereo eth3d-train middlebury2014-other \ + --dataset-steps 264000 18000 18000 + --batch-size 2 \ + --lr 0.0004 \ + --min-lr 0.00002 \ + --lr-decay-method cosine \ + --warmup-steps 6000 \ + --decay-after-steps 30000 \ + --clip-grad-norm 1.0 \ +``` + +We employ a multi-set fine-tuning stage where we uniformly sample from multiple datasets. Given hat some of these datasets have extremely large images (``2048x2048`` or more) we opt for a very aggressive scale-range ``[0.2 - 0.8]`` such that as much of the original frame composition is captured inside the ``384x512`` crop. + +``` +torchrun --nproc_per_node 8 --nnodes 1 train.py \ + --dataset-root $dataset_root \ + --name $name_things \ + --model crestereo_base \ + --train-datasets crestereo eth3d-train middlebury2014-other instereo2k fallingthings carla-highres sintel sceneflow-monkaa sceneflow-driving \ + --dataset-steps 12000 12000 12000 12000 12000 12000 12000 12000 12000 + --batch-size 2 \ + --scale-range 0.2 0.8 \ + --lr 0.0004 \ + --lr-decay-method cosine \ + --decay-after-steps 0 \ + --warmup-steps 0 \ + --min-lr 0.00002 \ + --resume-path $checkpoint_dir/$name_cre.pth +``` + + +### Evaluation + +Evaluating the base weights + +``` +torchrun --nproc_per_node 1 --nnodes 1 cascade_evaluation.py --dataset middlebury2014-train --batch-size 1 --dataset-root $dataset_root --model crestereo_base --weights CREStereo_Base_Weights.CRESTEREO_ETH_MBL_V1 +``` + +This should give an **mae of about 1.416** on the train set of `Middlebury2014`. Results may vary slightly depending on the batch size and the number of GPUs. For the most accurate results use 1 GPU and `--batch-size 1`. The created log file should look like this, where the first key is the number of cascades and the nested key is the number of recursive iterations: + +``` +Dataset: middlebury2014-train @size: [384, 512]: +{ + 1: { + 2: {'mae': 2.363, 'rmse': 4.352, '1px': 0.611, '3px': 0.828, '5px': 0.891, 'relepe': 0.176, 'fl-all': 64.511} + 5: {'mae': 1.618, 'rmse': 3.71, '1px': 0.761, '3px': 0.879, '5px': 0.918, 'relepe': 0.154, 'fl-all': 77.128} + 10: {'mae': 1.416, 'rmse': 3.53, '1px': 0.777, '3px': 0.896, '5px': 0.933, 'relepe': 0.148, 'fl-all': 78.388} + 20: {'mae': 1.448, 'rmse': 3.583, '1px': 0.771, '3px': 0.893, '5px': 0.931, 'relepe': 0.145, 'fl-all': 77.7} + }, +} +{ + 2: { + 2: {'mae': 1.972, 'rmse': 4.125, '1px': 0.73, '3px': 0.865, '5px': 0.908, 'relepe': 0.169, 'fl-all': 74.396} + 5: {'mae': 1.403, 'rmse': 3.448, '1px': 0.793, '3px': 0.905, '5px': 0.937, 'relepe': 0.151, 'fl-all': 80.186} + 10: {'mae': 1.312, 'rmse': 3.368, '1px': 0.799, '3px': 0.912, '5px': 0.943, 'relepe': 0.148, 'fl-all': 80.379} + 20: {'mae': 1.376, 'rmse': 3.542, '1px': 0.796, '3px': 0.91, '5px': 0.942, 'relepe': 0.149, 'fl-all': 80.054} + }, +} +``` + +You can also evaluate the Finetuned weights: + +``` +torchrun --nproc_per_node 1 --nnodes 1 cascade_evaluation.py --dataset middlebury2014-train --batch-size 1 --dataset-root $dataset_root --model crestereo_base --weights CREStereo_Base_Weights.CRESTEREO_FINETUNE_MULTI_V1 +``` + +``` +Dataset: middlebury2014-train @size: [384, 512]: +{ + 1: { + 2: {'mae': 1.85, 'rmse': 3.797, '1px': 0.673, '3px': 0.862, '5px': 0.917, 'relepe': 0.171, 'fl-all': 69.736} + 5: {'mae': 1.111, 'rmse': 3.166, '1px': 0.838, '3px': 0.93, '5px': 0.957, 'relepe': 0.134, 'fl-all': 84.596} + 10: {'mae': 1.02, 'rmse': 3.073, '1px': 0.854, '3px': 0.938, '5px': 0.96, 'relepe': 0.129, 'fl-all': 86.042} + 20: {'mae': 0.993, 'rmse': 3.059, '1px': 0.855, '3px': 0.942, '5px': 0.967, 'relepe': 0.126, 'fl-all': 85.784} + }, +} +{ + 2: { + 2: {'mae': 1.667, 'rmse': 3.867, '1px': 0.78, '3px': 0.891, '5px': 0.922, 'relepe': 0.165, 'fl-all': 78.89} + 5: {'mae': 1.158, 'rmse': 3.278, '1px': 0.843, '3px': 0.926, '5px': 0.955, 'relepe': 0.135, 'fl-all': 84.556} + 10: {'mae': 1.046, 'rmse': 3.13, '1px': 0.85, '3px': 0.934, '5px': 0.96, 'relepe': 0.13, 'fl-all': 85.464} + 20: {'mae': 1.021, 'rmse': 3.102, '1px': 0.85, '3px': 0.935, '5px': 0.963, 'relepe': 0.129, 'fl-all': 85.417} + }, +} +``` + +Evaluating the author provided weights: + +``` +torchrun --nproc_per_node 1 --nnodes 1 cascade_evaluation.py --dataset middlebury2014-train --batch-size 1 --dataset-root $dataset_root --model crestereo_base --weights CREStereo_Base_Weights.MEGVII_V1 +``` + +``` +Dataset: middlebury2014-train @size: [384, 512]: +{ + 1: { + 2: {'mae': 1.704, 'rmse': 3.738, '1px': 0.738, '3px': 0.896, '5px': 0.933, 'relepe': 0.157, 'fl-all': 76.464} + 5: {'mae': 0.956, 'rmse': 2.963, '1px': 0.88, '3px': 0.948, '5px': 0.965, 'relepe': 0.124, 'fl-all': 88.186} + 10: {'mae': 0.792, 'rmse': 2.765, '1px': 0.905, '3px': 0.958, '5px': 0.97, 'relepe': 0.114, 'fl-all': 90.429} + 20: {'mae': 0.749, 'rmse': 2.706, '1px': 0.907, '3px': 0.961, '5px': 0.972, 'relepe': 0.113, 'fl-all': 90.807} + }, +} +{ + 2: { + 2: {'mae': 1.702, 'rmse': 3.784, '1px': 0.784, '3px': 0.894, '5px': 0.924, 'relepe': 0.172, 'fl-all': 80.313} + 5: {'mae': 0.932, 'rmse': 2.907, '1px': 0.877, '3px': 0.944, '5px': 0.963, 'relepe': 0.125, 'fl-all': 87.979} + 10: {'mae': 0.773, 'rmse': 2.768, '1px': 0.901, '3px': 0.958, '5px': 0.972, 'relepe': 0.117, 'fl-all': 90.43} + 20: {'mae': 0.854, 'rmse': 2.971, '1px': 0.9, '3px': 0.957, '5px': 0.97, 'relepe': 0.122, 'fl-all': 90.269} + }, +} +``` + +# Concerns when training + +We encourage users to be aware of the **aspect-ratio** and **disparity scale** they are targeting when doing any sort of training or fine-tuning. The model is highly sensitive to these two factors, as a consequence of naive multi-set fine-tuning one can achieve `0.2 mae` relatively fast. We recommend that users pay close attention to how they **balance dataset sizing** when training such networks. + + Ideally, dataset scaling should be trated at an individual level and a thorough **EDA** of the disparity distribution in random crops at the desired training / inference size should be performed prior to any large compute investments. + +### Disparity scaling + +##### Sample A + The top row contains a sample from `Sintel` whereas the bottom row one from `Middlebury`. + +![Disparity1](assets/disparity-domain-drift.jpg) + +From left to right (`left_image`, `right_image`, `valid_mask`, `valid_mask & ground_truth`, `prediction`). **Darker is further away, lighter is closer**. In the case of `Sintel` which is more closely aligned to the original distribution of `CREStereo` we notice that the model accurately predicts the background scale whereas in the case of `Middlebury2014` it cannot correctly estimate the continuous disparity. Notice that the frame composition is similar for both examples. The blue skybox in the `Sintel` scene behaves similarly to the `Middlebury` black background. However, because the `Middlebury` samples comes from an extremely large scene the crop size of `384x512` does not correctly capture the general training distribution. + + + + +##### Sample B + +The top row contains a scene from `Sceneflow` using the `Monkaa` split whilst the bottom row is a scene from `Middlebury`. This sample exhibits the same issues when it comes to **background estimation**. Given the exaggerated size of the `Middlebury` samples the model **colapses the smooth background** of the sample to what it considers to be a mean background disparity value. + +![Disparity2](assets/disparity-background-mode-collapse.jpg) + + +For more detail on why this behaviour occurs based on the training distribution proportions you can read more about the network at: https://github.com/pytorch/vision/pull/6629#discussion_r978160493 + + +### Metric overfitting + +##### Learning is critical in the beginning + +We also advise users to make user of faster training schedules, as the performance gain over long periods time is marginal. Here we exhibit a difference between a faster decay schedule and later decay schedule. + +![Loss1](assets/Loss.jpg) + +In **grey** we set the lr decay to begin after `30000` steps whilst in **orange** we opt for a very late learning rate decay at around `180000` steps. Although exhibiting stronger variance, we can notice that unfreezing the learning rate earlier whilst employing `gradient-norm` out-performs the default configuration. + +##### Gradient norm saves time + +![Loss2](assets/gradient-norm-removal.jpg) + +In **grey** we keep ``gradient norm`` enabled whilst in **orange** we do not. We can notice that remvoing the gradient norm exacerbates the performance decrease in the early stages whilst also showcasing an almost complete collapse around the `60000` steps mark where we started decaying the lr for **orange**. + +Although both runs ahieve an improvement of about ``0.1`` mae after the lr decay start, the benefits of it are observable much faster when ``gradient norm`` is employed as the recovery period is no longer accounted for. diff --git a/references/depth/stereo/__init__.py b/references/depth/stereo/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/references/depth/stereo/assets/Loss.jpg b/references/depth/stereo/assets/Loss.jpg new file mode 100644 index 00000000000..b6db8e204af Binary files /dev/null and b/references/depth/stereo/assets/Loss.jpg differ diff --git a/references/depth/stereo/assets/disparity-background-mode-collapse.jpg b/references/depth/stereo/assets/disparity-background-mode-collapse.jpg new file mode 100644 index 00000000000..b6542e8814f Binary files /dev/null and b/references/depth/stereo/assets/disparity-background-mode-collapse.jpg differ diff --git a/references/depth/stereo/assets/disparity-domain-drift.jpg b/references/depth/stereo/assets/disparity-domain-drift.jpg new file mode 100644 index 00000000000..8a98de03675 Binary files /dev/null and b/references/depth/stereo/assets/disparity-domain-drift.jpg differ diff --git a/references/depth/stereo/assets/gradient-norm-removal.jpg b/references/depth/stereo/assets/gradient-norm-removal.jpg new file mode 100644 index 00000000000..2c3c8459d5e Binary files /dev/null and b/references/depth/stereo/assets/gradient-norm-removal.jpg differ diff --git a/references/depth/stereo/cascade_evaluation.py b/references/depth/stereo/cascade_evaluation.py new file mode 100644 index 00000000000..7cb6413f1a5 --- /dev/null +++ b/references/depth/stereo/cascade_evaluation.py @@ -0,0 +1,299 @@ +import os +import warnings + +import torch +import torchvision +import torchvision.prototype.models.depth.stereo +import utils +from torch.nn import functional as F +from train import make_eval_loader + +from utils.metrics import AVAILABLE_METRICS +from visualization import make_prediction_image_side_to_side + + +def get_args_parser(add_help=True): + import argparse + + parser = argparse.ArgumentParser(description="PyTorch Stereo Matching Evaluation", add_help=add_help) + parser.add_argument("--dataset", type=str, default="middlebury2014-train", help="dataset to use") + parser.add_argument("--dataset-root", type=str, default="", help="root of the dataset") + + parser.add_argument("--checkpoint", type=str, default="", help="path to weights") + parser.add_argument("--weights", type=str, default=None, help="torchvision API weight") + parser.add_argument( + "--model", + type=str, + default="crestereo_base", + help="which model to use if not speciffying a training checkpoint", + ) + parser.add_argument("--img-folder", type=str, default="images") + + parser.add_argument("--batch-size", type=int, default=1, help="batch size") + parser.add_argument("--workers", type=int, default=0, help="number of workers") + + parser.add_argument("--eval-size", type=int, nargs="+", default=[384, 512], help="resize size") + parser.add_argument( + "--norm-mean", type=float, nargs="+", default=[0.5, 0.5, 0.5], help="mean for image normalization" + ) + parser.add_argument( + "--norm-std", type=float, nargs="+", default=[0.5, 0.5, 0.5], help="std for image normalization" + ) + parser.add_argument( + "--use-grayscale", action="store_true", help="use grayscale images instead of RGB", default=False + ) + parser.add_argument("--max-disparity", type=float, default=None, help="maximum disparity") + parser.add_argument( + "--interpolation-strategy", + type=str, + default="bilinear", + help="interpolation strategy", + choices=["bilinear", "bicubic", "mixed"], + ) + + parser.add_argument("--n_iterations", nargs="+", type=int, default=[10], help="number of recurent iterations") + parser.add_argument("--n_cascades", nargs="+", type=int, default=[1], help="number of cascades") + parser.add_argument( + "--metrics", + type=str, + nargs="+", + default=["mae", "rmse", "1px", "3px", "5px", "relepe"], + help="metrics to log", + choices=AVAILABLE_METRICS, + ) + parser.add_argument("--mixed-precision", action="store_true", help="use mixed precision training") + + parser.add_argument("--world-size", type=int, default=1, help="number of distributed processes") + parser.add_argument("--dist-url", type=str, default="env://", help="url used to set up distributed training") + parser.add_argument("--device", type=str, default="cuda", help="device to use for training") + + parser.add_argument("--save-images", action="store_true", help="save images of the predictions") + parser.add_argument("--padder-type", type=str, default="kitti", help="padder type", choices=["kitti", "sintel"]) + + return parser + + +def cascade_inference(model, image_left, image_right, iterations, cascades): + # check that image size is divisible by 16 * (2 ** (cascades - 1)) + for image in [image_left, image_right]: + if image.shape[-2] % ((2 ** (cascades - 1))) != 0: + raise ValueError( + f"image height is not divisible by {16 * (2 ** (cascades - 1))}. Image shape: {image.shape[-2]}" + ) + + if image.shape[-1] % ((2 ** (cascades - 1))) != 0: + raise ValueError( + f"image width is not divisible by {16 * (2 ** (cascades - 1))}. Image shape: {image.shape[-2]}" + ) + + left_image_pyramid = [image_left] + right_image_pyramid = [image_right] + for idx in range(0, cascades - 1): + ds_factor = int(2 ** (idx + 1)) + ds_shape = (image_left.shape[-2] // ds_factor, image_left.shape[-1] // ds_factor) + left_image_pyramid += F.interpolate(image_left, size=ds_shape, mode="bilinear", align_corners=True).unsqueeze(0) + right_image_pyramid += F.interpolate(image_right, size=ds_shape, mode="bilinear", align_corners=True).unsqueeze( + 0 + ) + + flow_init = None + for left_image, right_image in zip(reversed(left_image_pyramid), reversed(right_image_pyramid)): + flow_pred = model(left_image, right_image, flow_init, num_iters=iterations) + # flow pred is a list + flow_init = flow_pred[-1] + + return flow_init + + +@torch.inference_mode() +def _evaluate( + model, + args, + val_loader, + *, + padder_mode, + print_freq=10, + writer=None, + step=None, + iterations=10, + cascades=1, + batch_size=None, + header=None, + save_images=False, + save_path="", +): + """Helper function to compute various metrics (epe, etc.) for a model on a given dataset. + We process as many samples as possible with ddp. + """ + model.eval() + header = header or "Test:" + device = torch.device(args.device) + metric_logger = utils.MetricLogger(delimiter=" ") + + iterations = iterations or args.recurrent_updates + + logger = utils.MetricLogger() + for meter_name in args.metrics: + logger.add_meter(meter_name, fmt="{global_avg:.4f}") + if "fl-all" not in args.metrics: + logger.add_meter("fl-all", fmt="{global_avg:.4f}") + + num_processed_samples = 0 + with torch.cuda.amp.autocast(enabled=args.mixed_precision, dtype=torch.float16): + batch_idx = 0 + for blob in metric_logger.log_every(val_loader, print_freq, header): + image_left, image_right, disp_gt, valid_disp_mask = (x.to(device) for x in blob) + padder = utils.InputPadder(image_left.shape, mode=padder_mode) + image_left, image_right = padder.pad(image_left, image_right) + + disp_pred = cascade_inference(model, image_left, image_right, iterations, cascades) + disp_pred = disp_pred[:, :1, :, :] + disp_pred = padder.unpad(disp_pred) + + if save_images: + if args.distributed: + rank_prefix = args.rank + else: + rank_prefix = 0 + make_prediction_image_side_to_side( + disp_pred, disp_gt, valid_disp_mask, save_path, prefix=f"batch_{rank_prefix}_{batch_idx}" + ) + + metrics, _ = utils.compute_metrics(disp_pred, disp_gt, valid_disp_mask, metrics=logger.meters.keys()) + num_processed_samples += image_left.shape[0] + for name in metrics: + logger.meters[name].update(metrics[name], n=1) + + batch_idx += 1 + + num_processed_samples = utils.reduce_across_processes(num_processed_samples) / args.world_size + + print("Num_processed_samples: ", num_processed_samples) + if ( + hasattr(val_loader.dataset, "__len__") + and len(val_loader.dataset) != num_processed_samples + and torch.distributed.get_rank() == 0 + ): + warnings.warn( + f"Number of processed samples {num_processed_samples} is different" + f"from the dataset size {len(val_loader.dataset)}. This may happen if" + "the dataset is not divisible by the batch size. Try lowering the batch size for more accurate results." + ) + + if writer is not None and args.rank == 0: + for meter_name, meter_value in logger.meters.items(): + scalar_name = f"{meter_name} {header}" + writer.add_scalar(scalar_name, meter_value.avg, step) + + logger.synchronize_between_processes() + print(header, logger) + + logger_metrics = {k: v.global_avg for k, v in logger.meters.items()} + return logger_metrics + + +def evaluate(model, loader, args, writer=None, step=None): + os.makedirs(args.img_folder, exist_ok=True) + checkpoint_name = os.path.basename(args.checkpoint) or args.weights + image_checkpoint_folder = os.path.join(args.img_folder, checkpoint_name) + + metrics = {} + base_image_folder = os.path.join(image_checkpoint_folder, args.dataset) + os.makedirs(base_image_folder, exist_ok=True) + + for n_cascades in args.n_cascades: + for n_iters in args.n_iterations: + + config = f"{n_cascades}c_{n_iters}i" + config_image_folder = os.path.join(base_image_folder, config) + os.makedirs(config_image_folder, exist_ok=True) + + metrics[config] = _evaluate( + model, + args, + loader, + padder_mode=args.padder_type, + header=f"{args.dataset} evaluation@ size:{args.eval_size} n_cascades:{n_cascades} n_iters:{n_iters}", + batch_size=args.batch_size, + writer=writer, + step=step, + iterations=n_iters, + cascades=n_cascades, + save_path=config_image_folder, + save_images=args.save_images, + ) + + metric_log = [] + metric_log_dict = {} + # print the final results + for config in metrics: + config_tokens = config.split("_") + config_iters = config_tokens[1][:-1] + config_cascades = config_tokens[0][:-1] + + metric_log_dict[config_cascades] = metric_log_dict.get(config_cascades, {}) + metric_log_dict[config_cascades][config_iters] = metrics[config] + + evaluation_str = f"{args.dataset} evaluation@ size:{args.eval_size} n_cascades:{config_cascades} recurrent_updates:{config_iters}" + metrics_str = f"Metrics: {metrics[config]}" + metric_log.extend([evaluation_str, metrics_str]) + + print(evaluation_str) + print(metrics_str) + + eval_log_name = f"{checkpoint_name.replace('.pth', '')}_eval.log" + print("Saving eval log to: ", eval_log_name) + with open(eval_log_name, "w") as f: + f.write(f"Dataset: {args.dataset} @size: {args.eval_size}:\n") + # write the dict line by line for each key, and each value in the keys + for config_cascades in metric_log_dict: + f.write("{\n") + f.write(f"\t{config_cascades}: {{\n") + for config_iters in metric_log_dict[config_cascades]: + # convert every metric to 4 decimal places + metrics = metric_log_dict[config_cascades][config_iters] + metrics = {k: float(f"{v:.3f}") for k, v in metrics.items()} + f.write(f"\t\t{config_iters}: {metrics}\n") + f.write("\t},\n") + f.write("}\n") + + +def load_checkpoint(args): + utils.setup_ddp(args) + + if not args.weights: + checkpoint = torch.load(args.checkpoint, map_location=torch.device("cpu"), weights_only=True) + if "model" in checkpoint: + experiment_args = checkpoint["args"] + model = torchvision.prototype.models.depth.stereo.__dict__[experiment_args.model](weights=None) + model.load_state_dict(checkpoint["model"]) + else: + model = torchvision.prototype.models.depth.stereo.__dict__[args.model](weights=None) + model.load_state_dict(checkpoint) + + # set the appropriate devices + if args.distributed and args.device == "cpu": + raise ValueError("The device must be cuda if we want to run in distributed mode using torchrun") + device = torch.device(args.device) + else: + model = torchvision.prototype.models.depth.stereo.__dict__[args.model](weights=args.weights) + + # convert to DDP if need be + if args.distributed: + model = model.to(args.device) + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) + else: + model.to(device) + + return model + + +def main(args): + model = load_checkpoint(args) + loader = make_eval_loader(args.dataset, args) + evaluate(model, loader, args) + + +if __name__ == "__main__": + args = get_args_parser().parse_args() + main(args) diff --git a/references/depth/stereo/parsing.py b/references/depth/stereo/parsing.py new file mode 100644 index 00000000000..71a3ba9904e --- /dev/null +++ b/references/depth/stereo/parsing.py @@ -0,0 +1,89 @@ +import argparse +from functools import partial + +import torch + +from presets import StereoMatchingEvalPreset, StereoMatchingTrainPreset +from torchvision.datasets import ( + CarlaStereo, + CREStereo, + ETH3DStereo, + FallingThingsStereo, + InStereo2k, + Kitti2012Stereo, + Kitti2015Stereo, + Middlebury2014Stereo, + SceneFlowStereo, + SintelStereo, +) + +VALID_DATASETS = { + "crestereo": partial(CREStereo), + "carla-highres": partial(CarlaStereo), + "instereo2k": partial(InStereo2k), + "sintel": partial(SintelStereo), + "sceneflow-monkaa": partial(SceneFlowStereo, variant="Monkaa", pass_name="both"), + "sceneflow-flyingthings": partial(SceneFlowStereo, variant="FlyingThings3D", pass_name="both"), + "sceneflow-driving": partial(SceneFlowStereo, variant="Driving", pass_name="both"), + "fallingthings": partial(FallingThingsStereo, variant="both"), + "eth3d-train": partial(ETH3DStereo, split="train"), + "eth3d-test": partial(ETH3DStereo, split="test"), + "kitti2015-train": partial(Kitti2015Stereo, split="train"), + "kitti2015-test": partial(Kitti2015Stereo, split="test"), + "kitti2012-train": partial(Kitti2012Stereo, split="train"), + "kitti2012-test": partial(Kitti2012Stereo, split="train"), + "middlebury2014-other": partial( + Middlebury2014Stereo, split="additional", use_ambient_view=True, calibration="both" + ), + "middlebury2014-train": partial(Middlebury2014Stereo, split="train", calibration="perfect"), + "middlebury2014-test": partial(Middlebury2014Stereo, split="test", calibration=None), + "middlebury2014-train-ambient": partial( + Middlebury2014Stereo, split="train", use_ambient_views=True, calibrartion="perfect" + ), +} + + +def make_train_transform(args: argparse.Namespace) -> torch.nn.Module: + return StereoMatchingTrainPreset( + resize_size=args.resize_size, + crop_size=args.crop_size, + rescale_prob=args.rescale_prob, + scaling_type=args.scaling_type, + scale_range=args.scale_range, + scale_interpolation_type=args.interpolation_strategy, + use_grayscale=args.use_grayscale, + mean=args.norm_mean, + std=args.norm_std, + horizontal_flip_prob=args.flip_prob, + gpu_transforms=args.gpu_transforms, + max_disparity=args.max_disparity, + spatial_shift_prob=args.spatial_shift_prob, + spatial_shift_max_angle=args.spatial_shift_max_angle, + spatial_shift_max_displacement=args.spatial_shift_max_displacement, + spatial_shift_interpolation_type=args.interpolation_strategy, + gamma_range=args.gamma_range, + brightness=args.brightness_range, + contrast=args.contrast_range, + saturation=args.saturation_range, + hue=args.hue_range, + asymmetric_jitter_prob=args.asymmetric_jitter_prob, + ) + + +def make_eval_transform(args: argparse.Namespace) -> torch.nn.Module: + if args.eval_size is None: + resize_size = args.crop_size + else: + resize_size = args.eval_size + + return StereoMatchingEvalPreset( + mean=args.norm_mean, + std=args.norm_std, + use_grayscale=args.use_grayscale, + resize_size=resize_size, + interpolation_type=args.interpolation_strategy, + ) + + +def make_dataset(dataset_name: str, dataset_root: str, transforms: torch.nn.Module) -> torch.utils.data.Dataset: + return VALID_DATASETS[dataset_name](root=dataset_root, transforms=transforms) diff --git a/references/depth/stereo/presets.py b/references/depth/stereo/presets.py new file mode 100644 index 00000000000..cadd2405178 --- /dev/null +++ b/references/depth/stereo/presets.py @@ -0,0 +1,144 @@ +from typing import Optional, Tuple, Union + +import torch +import transforms as T + + +class StereoMatchingEvalPreset(torch.nn.Module): + def __init__( + self, + mean: float = 0.5, + std: float = 0.5, + resize_size: Optional[Tuple[int, ...]] = None, + max_disparity: Optional[float] = None, + interpolation_type: str = "bilinear", + use_grayscale: bool = False, + ) -> None: + super().__init__() + + transforms = [ + T.ToTensor(), + T.ConvertImageDtype(torch.float32), + ] + + if use_grayscale: + transforms.append(T.ConvertToGrayscale()) + + if resize_size is not None: + transforms.append(T.Resize(resize_size, interpolation_type=interpolation_type)) + + transforms.extend( + [ + T.Normalize(mean=mean, std=std), + T.MakeValidDisparityMask(max_disparity=max_disparity), + T.ValidateModelInput(), + ] + ) + + self.transforms = T.Compose(transforms) + + def forward(self, images, disparities, masks): + return self.transforms(images, disparities, masks) + + +class StereoMatchingTrainPreset(torch.nn.Module): + def __init__( + self, + *, + resize_size: Optional[Tuple[int, ...]], + resize_interpolation_type: str = "bilinear", + # RandomResizeAndCrop params + crop_size: Tuple[int, int], + rescale_prob: float = 1.0, + scaling_type: str = "exponential", + scale_range: Tuple[float, float] = (-0.2, 0.5), + scale_interpolation_type: str = "bilinear", + # convert to grayscale + use_grayscale: bool = False, + # normalization params + mean: float = 0.5, + std: float = 0.5, + # processing device + gpu_transforms: bool = False, + # masking + max_disparity: Optional[int] = 256, + # SpatialShift params + spatial_shift_prob: float = 0.5, + spatial_shift_max_angle: float = 0.5, + spatial_shift_max_displacement: float = 0.5, + spatial_shift_interpolation_type: str = "bilinear", + # AssymetricColorJitter + gamma_range: Tuple[float, float] = (0.8, 1.2), + brightness: Union[int, Tuple[int, int]] = (0.8, 1.2), + contrast: Union[int, Tuple[int, int]] = (0.8, 1.2), + saturation: Union[int, Tuple[int, int]] = 0.0, + hue: Union[int, Tuple[int, int]] = 0.0, + asymmetric_jitter_prob: float = 1.0, + # RandomHorizontalFlip + horizontal_flip_prob: float = 0.5, + # RandomOcclusion + occlusion_prob: float = 0.0, + occlusion_px_range: Tuple[int, int] = (50, 100), + # RandomErase + erase_prob: float = 0.0, + erase_px_range: Tuple[int, int] = (50, 100), + erase_num_repeats: int = 1, + ) -> None: + + if scaling_type not in ["linear", "exponential"]: + raise ValueError(f"Unknown scaling type: {scaling_type}. Available types: linear, exponential") + + super().__init__() + transforms = [T.ToTensor()] + + # when fixing size across multiple datasets, we ensure + # that the same size is used for all datasets when cropping + if resize_size is not None: + transforms.append(T.Resize(resize_size, interpolation_type=resize_interpolation_type)) + + if gpu_transforms: + transforms.append(T.ToGPU()) + + # color handling + color_transforms = [ + T.AsymmetricColorJitter( + brightness=brightness, contrast=contrast, saturation=saturation, hue=hue, p=asymmetric_jitter_prob + ), + T.AsymetricGammaAdjust(p=asymmetric_jitter_prob, gamma_range=gamma_range), + ] + + if use_grayscale: + color_transforms.append(T.ConvertToGrayscale()) + + transforms.extend(color_transforms) + + transforms.extend( + [ + T.RandomSpatialShift( + p=spatial_shift_prob, + max_angle=spatial_shift_max_angle, + max_px_shift=spatial_shift_max_displacement, + interpolation_type=spatial_shift_interpolation_type, + ), + T.ConvertImageDtype(torch.float32), + T.RandomRescaleAndCrop( + crop_size=crop_size, + scale_range=scale_range, + rescale_prob=rescale_prob, + scaling_type=scaling_type, + interpolation_type=scale_interpolation_type, + ), + T.RandomHorizontalFlip(horizontal_flip_prob), + # occlusion after flip, otherwise we're occluding the reference image + T.RandomOcclusion(p=occlusion_prob, occlusion_px_range=occlusion_px_range), + T.RandomErase(p=erase_prob, erase_px_range=erase_px_range, max_erase=erase_num_repeats), + T.Normalize(mean=mean, std=std), + T.MakeValidDisparityMask(max_disparity), + T.ValidateModelInput(), + ] + ) + + self.transforms = T.Compose(transforms) + + def forward(self, images, disparties, mask): + return self.transforms(images, disparties, mask) diff --git a/references/depth/stereo/train.py b/references/depth/stereo/train.py new file mode 100644 index 00000000000..34332b46129 --- /dev/null +++ b/references/depth/stereo/train.py @@ -0,0 +1,789 @@ +import argparse +import os +import warnings +from pathlib import Path +from typing import List, Union + +import numpy as np +import numpy.typing as npt +import torch +import torch.distributed as dist +import torchvision.models.optical_flow +import torchvision.prototype.models.depth.stereo +import utils +import visualization + +from parsing import make_dataset, make_eval_transform, make_train_transform, VALID_DATASETS +from torch import nn +from torchvision.transforms.functional import get_dimensions, InterpolationMode, resize +from utils.metrics import AVAILABLE_METRICS +from utils.norm import freeze_batch_norm + + +def make_stereo_flow(flow: Union[torch.Tensor, List[torch.Tensor]], model_out_channels: int) -> torch.Tensor: + """Helper function to make stereo flow from a given model output""" + if isinstance(flow, list): + return [make_stereo_flow(flow_i, model_out_channels) for flow_i in flow] + + B, C, H, W = flow.shape + # we need to add zero flow if the model outputs 2 channels + if C == 1 and model_out_channels == 2: + zero_flow = torch.zeros_like(flow) + # by convention the flow is X-Y axis, so we need the Y flow last + flow = torch.cat([flow, zero_flow], dim=1) + return flow + + +def make_lr_schedule(args: argparse.Namespace, optimizer: torch.optim.Optimizer) -> npt.NDArray: + """Helper function to return a learning rate scheduler for CRE-stereo""" + if args.decay_after_steps < args.warmup_steps: + raise ValueError(f"decay_after_steps: {args.function} must be greater than warmup_steps: {args.warmup_steps}") + + warmup_steps = args.warmup_steps if args.warmup_steps else 0 + flat_lr_steps = args.decay_after_steps - warmup_steps if args.decay_after_steps else 0 + decay_lr_steps = args.total_iterations - flat_lr_steps + + max_lr = args.lr + min_lr = args.min_lr + + schedulers = [] + milestones = [] + + if warmup_steps > 0: + if args.lr_warmup_method == "linear": + warmup_lr_scheduler = torch.optim.lr_scheduler.LinearLR( + optimizer, start_factor=args.lr_warmup_factor, total_iters=warmup_steps + ) + elif args.lr_warmup_method == "constant": + warmup_lr_scheduler = torch.optim.lr_scheduler.ConstantLR( + optimizer, factor=args.lr_warmup_factor, total_iters=warmup_steps + ) + else: + raise ValueError(f"Unknown lr warmup method {args.lr_warmup_method}") + schedulers.append(warmup_lr_scheduler) + milestones.append(warmup_steps) + + if flat_lr_steps > 0: + flat_lr_scheduler = torch.optim.lr_scheduler.ConstantLR(optimizer, factor=max_lr, total_iters=flat_lr_steps) + schedulers.append(flat_lr_scheduler) + milestones.append(flat_lr_steps + warmup_steps) + + if decay_lr_steps > 0: + if args.lr_decay_method == "cosine": + decay_lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( + optimizer, T_max=decay_lr_steps, eta_min=min_lr + ) + elif args.lr_decay_method == "linear": + decay_lr_scheduler = torch.optim.lr_scheduler.LinearLR( + optimizer, start_factor=max_lr, end_factor=min_lr, total_iters=decay_lr_steps + ) + elif args.lr_decay_method == "exponential": + decay_lr_scheduler = torch.optim.lr_scheduler.ExponentialLR( + optimizer, gamma=args.lr_decay_gamma, last_epoch=-1 + ) + else: + raise ValueError(f"Unknown lr decay method {args.lr_decay_method}") + schedulers.append(decay_lr_scheduler) + + scheduler = torch.optim.lr_scheduler.SequentialLR(optimizer, schedulers, milestones=milestones) + return scheduler + + +def shuffle_dataset(dataset): + """Shuffle the dataset""" + perm = torch.randperm(len(dataset)) + return torch.utils.data.Subset(dataset, perm) + + +def resize_dataset_to_n_steps( + dataset: torch.utils.data.Dataset, dataset_steps: int, samples_per_step: int, args: argparse.Namespace +) -> torch.utils.data.Dataset: + original_size = len(dataset) + if args.steps_is_epochs: + samples_per_step = original_size + target_size = dataset_steps * samples_per_step + + dataset_copies = [] + n_expands, remainder = divmod(target_size, original_size) + for idx in range(n_expands): + dataset_copies.append(dataset) + + if remainder > 0: + dataset_copies.append(torch.utils.data.Subset(dataset, list(range(remainder)))) + + if args.dataset_shuffle: + dataset_copies = [shuffle_dataset(dataset_copy) for dataset_copy in dataset_copies] + + dataset = torch.utils.data.ConcatDataset(dataset_copies) + return dataset + + +def get_train_dataset(dataset_root: str, args: argparse.Namespace) -> torch.utils.data.Dataset: + datasets = [] + for dataset_name in args.train_datasets: + transform = make_train_transform(args) + dataset = make_dataset(dataset_name, dataset_root, transform) + datasets.append(dataset) + + if len(datasets) == 0: + raise ValueError("No datasets specified for training") + + samples_per_step = args.world_size * args.batch_size + + for idx, (dataset, steps_per_dataset) in enumerate(zip(datasets, args.dataset_steps)): + datasets[idx] = resize_dataset_to_n_steps(dataset, steps_per_dataset, samples_per_step, args) + + dataset = torch.utils.data.ConcatDataset(datasets) + if args.dataset_order_shuffle: + dataset = shuffle_dataset(dataset) + + print(f"Training dataset: {len(dataset)} samples") + return dataset + + +@torch.inference_mode() +def _evaluate( + model, + args, + val_loader, + *, + padder_mode, + print_freq=10, + writer=None, + step=None, + iterations=None, + batch_size=None, + header=None, +): + """Helper function to compute various metrics (epe, etc.) for a model on a given dataset.""" + model.eval() + header = header or "Test:" + device = torch.device(args.device) + metric_logger = utils.MetricLogger(delimiter=" ") + + iterations = iterations or args.recurrent_updates + + logger = utils.MetricLogger() + for meter_name in args.metrics: + logger.add_meter(meter_name, fmt="{global_avg:.4f}") + if "fl-all" not in args.metrics: + logger.add_meter("fl-all", fmt="{global_avg:.4f}") + + num_processed_samples = 0 + with torch.cuda.amp.autocast(enabled=args.mixed_precision, dtype=torch.float16): + for blob in metric_logger.log_every(val_loader, print_freq, header): + image_left, image_right, disp_gt, valid_disp_mask = (x.to(device) for x in blob) + padder = utils.InputPadder(image_left.shape, mode=padder_mode) + image_left, image_right = padder.pad(image_left, image_right) + + disp_predictions = model(image_left, image_right, flow_init=None, num_iters=iterations) + disp_pred = disp_predictions[-1][:, :1, :, :] + disp_pred = padder.unpad(disp_pred) + + metrics, _ = utils.compute_metrics(disp_pred, disp_gt, valid_disp_mask, metrics=logger.meters.keys()) + num_processed_samples += image_left.shape[0] + for name in metrics: + logger.meters[name].update(metrics[name], n=1) + + num_processed_samples = utils.reduce_across_processes(num_processed_samples) + + print("Num_processed_samples: ", num_processed_samples) + if ( + hasattr(val_loader.dataset, "__len__") + and len(val_loader.dataset) != num_processed_samples + and torch.distributed.get_rank() == 0 + ): + warnings.warn( + f"Number of processed samples {num_processed_samples} is different" + f"from the dataset size {len(val_loader.dataset)}. This may happen if" + "the dataset is not divisible by the batch size. Try lowering the batch size or GPU number for more accurate results." + ) + + if writer is not None and args.rank == 0: + for meter_name, meter_value in logger.meters.items(): + scalar_name = f"{meter_name} {header}" + writer.add_scalar(scalar_name, meter_value.avg, step) + + logger.synchronize_between_processes() + print(header, logger) + + +def make_eval_loader(dataset_name: str, args: argparse.Namespace) -> torch.utils.data.DataLoader: + if args.weights: + weights = torchvision.models.get_weight(args.weights) + trans = weights.transforms() + + def preprocessing(image_left, image_right, disp, valid_disp_mask): + C_o, H_o, W_o = get_dimensions(image_left) + image_left, image_right = trans(image_left, image_right) + + C_t, H_t, W_t = get_dimensions(image_left) + scale_factor = W_t / W_o + + if disp is not None and not isinstance(disp, torch.Tensor): + disp = torch.from_numpy(disp) + if W_t != W_o: + disp = resize(disp, (H_t, W_t), mode=InterpolationMode.BILINEAR) * scale_factor + if valid_disp_mask is not None and not isinstance(valid_disp_mask, torch.Tensor): + valid_disp_mask = torch.from_numpy(valid_disp_mask) + if W_t != W_o: + valid_disp_mask = resize(valid_disp_mask, (H_t, W_t), mode=InterpolationMode.NEAREST) + return image_left, image_right, disp, valid_disp_mask + + else: + preprocessing = make_eval_transform(args) + + val_dataset = make_dataset(dataset_name, args.dataset_root, transforms=preprocessing) + if args.distributed: + sampler = torch.utils.data.distributed.DistributedSampler(val_dataset, shuffle=False, drop_last=False) + else: + sampler = torch.utils.data.SequentialSampler(val_dataset) + + val_loader = torch.utils.data.DataLoader( + val_dataset, + sampler=sampler, + batch_size=args.batch_size, + pin_memory=True, + num_workers=args.workers, + ) + + return val_loader + + +def evaluate(model, loaders, args, writer=None, step=None): + for loader_name, loader in loaders.items(): + _evaluate( + model, + args, + loader, + iterations=args.recurrent_updates, + padder_mode=args.padder_type, + header=f"{loader_name} evaluation", + batch_size=args.batch_size, + writer=writer, + step=step, + ) + + +def run(model, optimizer, scheduler, train_loader, val_loaders, logger, writer, scaler, args): + device = torch.device(args.device) + # wrap the loader in a logger + loader = iter(logger.log_every(train_loader)) + # output channels + model_out_channels = model.module.output_channels if args.distributed else model.output_channels + + torch.set_num_threads(args.threads) + + sequence_criterion = utils.SequenceLoss( + gamma=args.gamma, + max_flow=args.max_disparity, + exclude_large_flows=args.flow_loss_exclude_large, + ).to(device) + + if args.consistency_weight: + consistency_criterion = utils.FlowSequenceConsistencyLoss( + args.gamma, + resize_factor=0.25, + rescale_factor=0.25, + rescale_mode="bilinear", + ).to(device) + else: + consistency_criterion = None + + if args.psnr_weight: + psnr_criterion = utils.PSNRLoss().to(device) + else: + psnr_criterion = None + + if args.smoothness_weight: + smoothness_criterion = utils.SmoothnessLoss().to(device) + else: + smoothness_criterion = None + + if args.photometric_weight: + photometric_criterion = utils.FlowPhotoMetricLoss( + ssim_weight=args.photometric_ssim_weight, + max_displacement_ratio=args.photometric_max_displacement_ratio, + ssim_use_padding=False, + ).to(device) + else: + photometric_criterion = None + + for step in range(args.start_step + 1, args.total_iterations + 1): + data_blob = next(loader) + optimizer.zero_grad() + + # unpack the data blob + image_left, image_right, disp_mask, valid_disp_mask = (x.to(device) for x in data_blob) + with torch.cuda.amp.autocast(enabled=args.mixed_precision, dtype=torch.float16): + disp_predictions = model(image_left, image_right, flow_init=None, num_iters=args.recurrent_updates) + # different models have different outputs, make sure we get the right ones for this task + disp_predictions = make_stereo_flow(disp_predictions, model_out_channels) + # should the architecture or training loop require it, we have to adjust the disparity mask + # target to possibly look like an optical flow mask + disp_mask = make_stereo_flow(disp_mask, model_out_channels) + # sequence loss on top of the model outputs + + loss = sequence_criterion(disp_predictions, disp_mask, valid_disp_mask) * args.flow_loss_weight + + if args.consistency_weight > 0: + loss_consistency = consistency_criterion(disp_predictions) + loss += loss_consistency * args.consistency_weight + + if args.psnr_weight > 0: + loss_psnr = 0.0 + for pred in disp_predictions: + # predictions might have 2 channels + loss_psnr += psnr_criterion( + pred * valid_disp_mask.unsqueeze(1), + disp_mask * valid_disp_mask.unsqueeze(1), + ).mean() # mean the psnr loss over the batch + loss += loss_psnr / len(disp_predictions) * args.psnr_weight + + if args.photometric_weight > 0: + loss_photometric = 0.0 + for pred in disp_predictions: + # predictions might have 1 channel, therefore we need to inpute 0s for the second channel + if model_out_channels == 1: + pred = torch.cat([pred, torch.zeros_like(pred)], dim=1) + + loss_photometric += photometric_criterion( + image_left, image_right, pred, valid_disp_mask + ) # photometric loss already comes out meaned over the batch + loss += loss_photometric / len(disp_predictions) * args.photometric_weight + + if args.smoothness_weight > 0: + loss_smoothness = 0.0 + for pred in disp_predictions: + # predictions might have 2 channels + loss_smoothness += smoothness_criterion( + image_left, pred[:, :1, :, :] + ).mean() # mean the smoothness loss over the batch + loss += loss_smoothness / len(disp_predictions) * args.smoothness_weight + + with torch.no_grad(): + metrics, _ = utils.compute_metrics( + disp_predictions[-1][:, :1, :, :], # predictions might have 2 channels + disp_mask[:, :1, :, :], # so does the ground truth + valid_disp_mask, + args.metrics, + ) + + metrics.pop("fl-all", None) + logger.update(loss=loss, **metrics) + + if scaler is not None: + scaler.scale(loss).backward() + scaler.unscale_(optimizer) + if args.clip_grad_norm: + torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=args.clip_grad_norm) + scaler.step(optimizer) + scaler.update() + else: + loss.backward() + if args.clip_grad_norm: + torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=args.clip_grad_norm) + optimizer.step() + + scheduler.step() + + if not dist.is_initialized() or dist.get_rank() == 0: + if writer is not None and step % args.tensorboard_log_frequency == 0: + # log the loss and metrics to tensorboard + + writer.add_scalar("loss", loss, step) + for name, value in logger.meters.items(): + writer.add_scalar(name, value.avg, step) + # log the images to tensorboard + pred_grid = visualization.make_training_sample_grid( + image_left, image_right, disp_mask, valid_disp_mask, disp_predictions + ) + writer.add_image("predictions", pred_grid, step, dataformats="HWC") + + # second thing we want to see is how relevant the iterative refinement is + pred_sequence_grid = visualization.make_disparity_sequence_grid(disp_predictions, disp_mask) + writer.add_image("sequence", pred_sequence_grid, step, dataformats="HWC") + + if step % args.save_frequency == 0: + if not args.distributed or args.rank == 0: + model_without_ddp = ( + model.module if isinstance(model, torch.nn.parallel.DistributedDataParallel) else model + ) + checkpoint = { + "model": model_without_ddp.state_dict(), + "optimizer": optimizer.state_dict(), + "scheduler": scheduler.state_dict(), + "step": step, + "args": args, + } + os.makedirs(args.checkpoint_dir, exist_ok=True) + torch.save(checkpoint, Path(args.checkpoint_dir) / f"{args.name}_{step}.pth") + torch.save(checkpoint, Path(args.checkpoint_dir) / f"{args.name}.pth") + + if step % args.valid_frequency == 0: + evaluate(model, val_loaders, args, writer, step) + model.train() + if args.freeze_batch_norm: + if isinstance(model, nn.parallel.DistributedDataParallel): + freeze_batch_norm(model.module) + else: + freeze_batch_norm(model) + + # one final save at the end + if not args.distributed or args.rank == 0: + model_without_ddp = model.module if isinstance(model, torch.nn.parallel.DistributedDataParallel) else model + checkpoint = { + "model": model_without_ddp.state_dict(), + "optimizer": optimizer.state_dict(), + "scheduler": scheduler.state_dict(), + "step": step, + "args": args, + } + os.makedirs(args.checkpoint_dir, exist_ok=True) + torch.save(checkpoint, Path(args.checkpoint_dir) / f"{args.name}_{step}.pth") + torch.save(checkpoint, Path(args.checkpoint_dir) / f"{args.name}.pth") + + +def main(args): + args.total_iterations = sum(args.dataset_steps) + + # initialize DDP setting + utils.setup_ddp(args) + print(args) + + args.test_only = args.train_datasets is None + + # set the appropriate devices + if args.distributed and args.device == "cpu": + raise ValueError("The device must be cuda if we want to run in distributed mode using torchrun") + device = torch.device(args.device) + + # select model architecture + model = torchvision.prototype.models.depth.stereo.__dict__[args.model](weights=args.weights) + + # convert to DDP if need be + if args.distributed: + model = model.to(args.gpu) + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) + model_without_ddp = model.module + else: + model.to(device) + model_without_ddp = model + + os.makedirs(args.checkpoint_dir, exist_ok=True) + + val_loaders = {name: make_eval_loader(name, args) for name in args.test_datasets} + + # EVAL ONLY configurations + if args.test_only: + evaluate(model, val_loaders, args) + return + + # Sanity check for the parameter count + print(f"Parameter Count: {sum(p.numel() for p in model.parameters() if p.requires_grad)}") + + # Compose the training dataset + train_dataset = get_train_dataset(args.dataset_root, args) + + # initialize the optimizer + if args.optimizer == "adam": + optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) + elif args.optimizer == "sgd": + optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.weight_decay, momentum=0.9) + else: + raise ValueError(f"Unknown optimizer {args.optimizer}. Please choose between adam and sgd") + + # initialize the learning rate schedule + scheduler = make_lr_schedule(args, optimizer) + + # load them from checkpoint if needed + args.start_step = 0 + if args.resume_path is not None: + checkpoint = torch.load(args.resume_path, map_location="cpu", weights_only=True) + if "model" in checkpoint: + # this means the user requested to resume from a training checkpoint + model_without_ddp.load_state_dict(checkpoint["model"]) + # this means the user wants to continue training from where it was left off + if args.resume_schedule: + optimizer.load_state_dict(checkpoint["optimizer"]) + scheduler.load_state_dict(checkpoint["scheduler"]) + args.start_step = checkpoint["step"] + 1 + # modify starting point of the dat + sample_start_step = args.start_step * args.batch_size * args.world_size + train_dataset = train_dataset[sample_start_step:] + + else: + # this means the user wants to finetune on top of a model state dict + # and that no other changes are required + model_without_ddp.load_state_dict(checkpoint) + + torch.backends.cudnn.benchmark = True + + # enable training mode + model.train() + if args.freeze_batch_norm: + freeze_batch_norm(model_without_ddp) + + # put dataloader on top of the dataset + # make sure to disable shuffling since the dataset is already shuffled + # in order to guarantee quasi randomness whilst retaining a deterministic + # dataset consumption order + if args.distributed: + # the train dataset is preshuffled in order to respect the iteration order + sampler = torch.utils.data.distributed.DistributedSampler(train_dataset, shuffle=False, drop_last=True) + else: + # the train dataset is already shuffled, so we can use a simple SequentialSampler + sampler = torch.utils.data.SequentialSampler(train_dataset) + + train_loader = torch.utils.data.DataLoader( + train_dataset, + sampler=sampler, + batch_size=args.batch_size, + pin_memory=True, + num_workers=args.workers, + ) + + # initialize the logger + if args.tensorboard_summaries: + from torch.utils.tensorboard import SummaryWriter + + tensorboard_path = Path(args.checkpoint_dir) / "tensorboard" + os.makedirs(tensorboard_path, exist_ok=True) + + tensorboard_run = tensorboard_path / f"{args.name}" + writer = SummaryWriter(tensorboard_run) + else: + writer = None + + logger = utils.MetricLogger(delimiter=" ") + + scaler = torch.cuda.amp.GradScaler() if args.mixed_precision else None + # run the training loop + # this will perform optimization, respectively logging and saving checkpoints + # when need be + run( + model=model, + optimizer=optimizer, + scheduler=scheduler, + train_loader=train_loader, + val_loaders=val_loaders, + logger=logger, + writer=writer, + scaler=scaler, + args=args, + ) + + +def get_args_parser(add_help=True): + import argparse + + parser = argparse.ArgumentParser(description="PyTorch Stereo Matching Training", add_help=add_help) + # checkpointing + parser.add_argument("--name", default="crestereo", help="name of the experiment") + parser.add_argument("--resume", type=str, default=None, help="from which checkpoint to resume") + parser.add_argument("--checkpoint-dir", type=str, default="checkpoints", help="path to the checkpoint directory") + + # dataset + parser.add_argument("--dataset-root", type=str, default="", help="path to the dataset root directory") + parser.add_argument( + "--train-datasets", + type=str, + nargs="+", + default=["crestereo"], + help="dataset(s) to train on", + choices=list(VALID_DATASETS.keys()), + ) + parser.add_argument( + "--dataset-steps", type=int, nargs="+", default=[300_000], help="number of steps for each dataset" + ) + parser.add_argument( + "--steps-is-epochs", action="store_true", help="if set, dataset-steps are interpreted as epochs" + ) + parser.add_argument( + "--test-datasets", + type=str, + nargs="+", + default=["middlebury2014-train"], + help="dataset(s) to test on", + choices=["middlebury2014-train"], + ) + parser.add_argument("--dataset-shuffle", type=bool, help="shuffle the dataset", default=True) + parser.add_argument("--dataset-order-shuffle", type=bool, help="shuffle the dataset order", default=True) + parser.add_argument("--batch-size", type=int, default=2, help="batch size per GPU") + parser.add_argument("--workers", type=int, default=4, help="number of workers per GPU") + parser.add_argument( + "--threads", + type=int, + default=16, + help="number of CPU threads per GPU. This can be changed around to speed-up transforms if needed. This can lead to worker thread contention so use with care.", + ) + + # model architecture + parser.add_argument( + "--model", + type=str, + default="crestereo_base", + help="model architecture", + choices=["crestereo_base", "raft_stereo"], + ) + parser.add_argument("--recurrent-updates", type=int, default=10, help="number of recurrent updates") + parser.add_argument("--freeze-batch-norm", action="store_true", help="freeze batch norm parameters") + + # loss parameters + parser.add_argument("--gamma", type=float, default=0.8, help="gamma parameter for the flow sequence loss") + parser.add_argument("--flow-loss-weight", type=float, default=1.0, help="weight for the flow loss") + parser.add_argument( + "--flow-loss-exclude-large", + action="store_true", + help="exclude large flow values from the loss. A large value is defined as a value greater than the ground truth flow norm", + default=False, + ) + parser.add_argument("--consistency-weight", type=float, default=0.0, help="consistency loss weight") + parser.add_argument( + "--consistency-resize-factor", + type=float, + default=0.25, + help="consistency loss resize factor to account for the fact that the flow is computed on a downsampled image", + ) + parser.add_argument("--psnr-weight", type=float, default=0.0, help="psnr loss weight") + parser.add_argument("--smoothness-weight", type=float, default=0.0, help="smoothness loss weight") + parser.add_argument("--photometric-weight", type=float, default=0.0, help="photometric loss weight") + parser.add_argument( + "--photometric-max-displacement-ratio", + type=float, + default=0.15, + help="Only pixels with a displacement smaller than this ratio of the image width will be considered for the photometric loss", + ) + parser.add_argument("--photometric-ssim-weight", type=float, default=0.85, help="photometric ssim loss weight") + + # transforms parameters + parser.add_argument("--gpu-transforms", action="store_true", help="use GPU transforms") + parser.add_argument( + "--eval-size", type=int, nargs="+", default=[384, 512], help="size of the images for evaluation" + ) + parser.add_argument("--resize-size", type=int, nargs=2, default=None, help="resize size") + parser.add_argument("--crop-size", type=int, nargs=2, default=[384, 512], help="crop size") + parser.add_argument("--scale-range", type=float, nargs=2, default=[0.6, 1.0], help="random scale range") + parser.add_argument("--rescale-prob", type=float, default=1.0, help="probability of resizing the image") + parser.add_argument( + "--scaling-type", type=str, default="linear", help="scaling type", choices=["exponential", "linear"] + ) + parser.add_argument("--flip-prob", type=float, default=0.5, help="probability of flipping the image") + parser.add_argument( + "--norm-mean", type=float, nargs="+", default=[0.5, 0.5, 0.5], help="mean for image normalization" + ) + parser.add_argument( + "--norm-std", type=float, nargs="+", default=[0.5, 0.5, 0.5], help="std for image normalization" + ) + parser.add_argument( + "--use-grayscale", action="store_true", help="use grayscale images instead of RGB", default=False + ) + parser.add_argument("--max-disparity", type=float, default=None, help="maximum disparity") + parser.add_argument( + "--interpolation-strategy", + type=str, + default="bilinear", + help="interpolation strategy", + choices=["bilinear", "bicubic", "mixed"], + ) + parser.add_argument("--spatial-shift-prob", type=float, default=1.0, help="probability of shifting the image") + parser.add_argument( + "--spatial-shift-max-angle", type=float, default=0.1, help="maximum angle for the spatial shift" + ) + parser.add_argument( + "--spatial-shift-max-displacement", type=float, default=2.0, help="maximum displacement for the spatial shift" + ) + parser.add_argument("--gamma-range", type=float, nargs="+", default=[0.8, 1.2], help="range for gamma correction") + parser.add_argument( + "--brightness-range", type=float, nargs="+", default=[0.8, 1.2], help="range for brightness correction" + ) + parser.add_argument( + "--contrast-range", type=float, nargs="+", default=[0.8, 1.2], help="range for contrast correction" + ) + parser.add_argument( + "--saturation-range", type=float, nargs="+", default=0.0, help="range for saturation correction" + ) + parser.add_argument("--hue-range", type=float, nargs="+", default=0.0, help="range for hue correction") + parser.add_argument( + "--asymmetric-jitter-prob", + type=float, + default=1.0, + help="probability of using asymmetric jitter instead of symmetric jitter", + ) + parser.add_argument("--occlusion-prob", type=float, default=0.5, help="probability of occluding the rightimage") + parser.add_argument( + "--occlusion-px-range", type=int, nargs="+", default=[50, 100], help="range for the number of occluded pixels" + ) + parser.add_argument("--erase-prob", type=float, default=0.0, help="probability of erasing in both images") + parser.add_argument( + "--erase-px-range", type=int, nargs="+", default=[50, 100], help="range for the number of erased pixels" + ) + parser.add_argument( + "--erase-num-repeats", type=int, default=1, help="number of times to repeat the erase operation" + ) + + # optimizer parameters + parser.add_argument("--optimizer", type=str, default="adam", help="optimizer", choices=["adam", "sgd"]) + parser.add_argument("--lr", type=float, default=4e-4, help="learning rate") + parser.add_argument("--weight-decay", type=float, default=0.0, help="weight decay") + parser.add_argument("--clip-grad-norm", type=float, default=0.0, help="clip grad norm") + + # lr_scheduler parameters + parser.add_argument("--min-lr", type=float, default=2e-5, help="minimum learning rate") + parser.add_argument("--warmup-steps", type=int, default=6_000, help="number of warmup steps") + parser.add_argument( + "--decay-after-steps", type=int, default=180_000, help="number of steps after which to start decay the lr" + ) + parser.add_argument( + "--lr-warmup-method", type=str, default="linear", help="warmup method", choices=["linear", "cosine"] + ) + parser.add_argument("--lr-warmup-factor", type=float, default=0.02, help="warmup factor for the learning rate") + parser.add_argument( + "--lr-decay-method", + type=str, + default="linear", + help="decay method", + choices=["linear", "cosine", "exponential"], + ) + parser.add_argument("--lr-decay-gamma", type=float, default=0.8, help="decay factor for the learning rate") + + # deterministic behaviour + parser.add_argument("--seed", type=int, default=42, help="seed for random number generators") + + # mixed precision training + parser.add_argument("--mixed-precision", action="store_true", help="use mixed precision training") + + # logging + parser.add_argument("--tensorboard-summaries", action="store_true", help="log to tensorboard") + parser.add_argument("--tensorboard-log-frequency", type=int, default=100, help="log frequency") + parser.add_argument("--save-frequency", type=int, default=1_000, help="save frequency") + parser.add_argument("--valid-frequency", type=int, default=1_000, help="validation frequency") + parser.add_argument( + "--metrics", + type=str, + nargs="+", + default=["mae", "rmse", "1px", "3px", "5px", "relepe"], + help="metrics to log", + choices=AVAILABLE_METRICS, + ) + + # distributed parameters + parser.add_argument("--world-size", type=int, default=8, help="number of distributed processes") + parser.add_argument("--dist-url", type=str, default="env://", help="url used to set up distributed training") + parser.add_argument("--device", type=str, default="cuda", help="device to use for training") + + # weights API + parser.add_argument("--weights", type=str, default=None, help="weights API url") + parser.add_argument( + "--resume-path", type=str, default=None, help="a path from which to resume or start fine-tuning" + ) + parser.add_argument("--resume-schedule", action="store_true", help="resume optimizer state") + + # padder parameters + parser.add_argument("--padder-type", type=str, default="kitti", help="padder type", choices=["kitti", "sintel"]) + return parser + + +if __name__ == "__main__": + args = get_args_parser().parse_args() + main(args) diff --git a/references/depth/stereo/transforms.py b/references/depth/stereo/transforms.py new file mode 100644 index 00000000000..9c4a6bab6d3 --- /dev/null +++ b/references/depth/stereo/transforms.py @@ -0,0 +1,650 @@ +import random +from typing import Callable, List, Optional, Sequence, Tuple, Union + +import numpy as np +import PIL.Image +import torch +import torchvision.transforms as T +import torchvision.transforms.functional as F +from torch import Tensor + +T_FLOW = Union[Tensor, np.ndarray, None] +T_MASK = Union[Tensor, np.ndarray, None] +T_STEREO_TENSOR = Tuple[Tensor, Tensor] +T_COLOR_AUG_PARAM = Union[float, Tuple[float, float]] + + +def rand_float_range(size: Sequence[int], low: float, high: float) -> Tensor: + return (low - high) * torch.rand(size) + high + + +class InterpolationStrategy: + + _valid_modes: List[str] = ["mixed", "bicubic", "bilinear"] + + def __init__(self, mode: str = "mixed") -> None: + if mode not in self._valid_modes: + raise ValueError(f"Invalid interpolation mode: {mode}. Valid modes are: {self._valid_modes}") + + if mode == "mixed": + self.strategies = [F.InterpolationMode.BILINEAR, F.InterpolationMode.BICUBIC] + elif mode == "bicubic": + self.strategies = [F.InterpolationMode.BICUBIC] + elif mode == "bilinear": + self.strategies = [F.InterpolationMode.BILINEAR] + + def __call__(self) -> F.InterpolationMode: + return random.choice(self.strategies) + + @classmethod + def is_valid(mode: str) -> bool: + return mode in InterpolationStrategy._valid_modes + + @property + def valid_modes() -> List[str]: + return InterpolationStrategy._valid_modes + + +class ValidateModelInput(torch.nn.Module): + # Pass-through transform that checks the shape and dtypes to make sure the model gets what it expects + def forward(self, images: T_STEREO_TENSOR, disparities: T_FLOW, masks: T_MASK): + if images[0].shape != images[1].shape: + raise ValueError("img1 and img2 should have the same shape.") + h, w = images[0].shape[-2:] + if disparities[0] is not None and disparities[0].shape != (1, h, w): + raise ValueError(f"disparities[0].shape should be (1, {h}, {w}) instead of {disparities[0].shape}") + if masks[0] is not None: + if masks[0].shape != (h, w): + raise ValueError(f"masks[0].shape should be ({h}, {w}) instead of {masks[0].shape}") + if masks[0].dtype != torch.bool: + raise TypeError(f"masks[0] should be of dtype torch.bool instead of {masks[0].dtype}") + + return images, disparities, masks + + +class ConvertToGrayscale(torch.nn.Module): + def __init__(self) -> None: + super().__init__() + + def forward( + self, + images: Tuple[PIL.Image.Image, PIL.Image.Image], + disparities: Tuple[T_FLOW, T_FLOW], + masks: Tuple[T_MASK, T_MASK], + ) -> Tuple[T_STEREO_TENSOR, Tuple[T_FLOW, T_FLOW], Tuple[T_MASK, T_MASK]]: + img_left = F.rgb_to_grayscale(images[0], num_output_channels=3) + img_right = F.rgb_to_grayscale(images[1], num_output_channels=3) + + return (img_left, img_right), disparities, masks + + +class MakeValidDisparityMask(torch.nn.Module): + def __init__(self, max_disparity: Optional[int] = 256) -> None: + super().__init__() + self.max_disparity = max_disparity + + def forward( + self, + images: T_STEREO_TENSOR, + disparities: Tuple[T_FLOW, T_FLOW], + masks: Tuple[T_MASK, T_MASK], + ) -> Tuple[T_STEREO_TENSOR, Tuple[T_FLOW, T_FLOW], Tuple[T_MASK, T_MASK]]: + valid_masks = tuple( + torch.ones(images[idx].shape[-2:], dtype=torch.bool, device=images[idx].device) if mask is None else mask + for idx, mask in enumerate(masks) + ) + + valid_masks = tuple( + torch.logical_and(mask, disparity > 0).squeeze(0) if disparity is not None else mask + for mask, disparity in zip(valid_masks, disparities) + ) + + if self.max_disparity is not None: + valid_masks = tuple( + torch.logical_and(mask, disparity < self.max_disparity).squeeze(0) if disparity is not None else mask + for mask, disparity in zip(valid_masks, disparities) + ) + + return images, disparities, valid_masks + + +class ToGPU(torch.nn.Module): + def __init__(self) -> None: + super().__init__() + + def forward( + self, + images: T_STEREO_TENSOR, + disparities: Tuple[T_FLOW, T_FLOW], + masks: Tuple[T_MASK, T_MASK], + ) -> Tuple[T_STEREO_TENSOR, Tuple[T_FLOW, T_FLOW], Tuple[T_MASK, T_MASK]]: + dev_images = tuple(image.cuda() for image in images) + dev_disparities = tuple(map(lambda x: x.cuda() if x is not None else None, disparities)) + dev_masks = tuple(map(lambda x: x.cuda() if x is not None else None, masks)) + return dev_images, dev_disparities, dev_masks + + +class ConvertImageDtype(torch.nn.Module): + def __init__(self, dtype: torch.dtype): + super().__init__() + self.dtype = dtype + + def forward( + self, + images: T_STEREO_TENSOR, + disparities: Tuple[T_FLOW, T_FLOW], + masks: Tuple[T_MASK, T_MASK], + ) -> Tuple[T_STEREO_TENSOR, Tuple[T_FLOW, T_FLOW], Tuple[T_MASK, T_MASK]]: + img_left = F.convert_image_dtype(images[0], dtype=self.dtype) + img_right = F.convert_image_dtype(images[1], dtype=self.dtype) + + img_left = img_left.contiguous() + img_right = img_right.contiguous() + + return (img_left, img_right), disparities, masks + + +class Normalize(torch.nn.Module): + def __init__(self, mean: List[float], std: List[float]) -> None: + super().__init__() + self.mean = mean + self.std = std + + def forward( + self, + images: T_STEREO_TENSOR, + disparities: Tuple[T_FLOW, T_FLOW], + masks: Tuple[T_MASK, T_MASK], + ) -> Tuple[T_STEREO_TENSOR, Tuple[T_FLOW, T_FLOW], Tuple[T_MASK, T_MASK]]: + + img_left = F.normalize(images[0], mean=self.mean, std=self.std) + img_right = F.normalize(images[1], mean=self.mean, std=self.std) + + img_left = img_left.contiguous() + img_right = img_right.contiguous() + + return (img_left, img_right), disparities, masks + + +class ToTensor(torch.nn.Module): + def forward( + self, + images: Tuple[PIL.Image.Image, PIL.Image.Image], + disparities: Tuple[T_FLOW, T_FLOW], + masks: Tuple[T_MASK, T_MASK], + ) -> Tuple[T_STEREO_TENSOR, Tuple[T_FLOW, T_FLOW], Tuple[T_MASK, T_MASK]]: + if images[0] is None: + raise ValueError("img_left is None") + if images[1] is None: + raise ValueError("img_right is None") + + img_left = F.pil_to_tensor(images[0]) + img_right = F.pil_to_tensor(images[1]) + disparity_tensors = () + mask_tensors = () + + for idx in range(2): + disparity_tensors += (torch.from_numpy(disparities[idx]),) if disparities[idx] is not None else (None,) + mask_tensors += (torch.from_numpy(masks[idx]),) if masks[idx] is not None else (None,) + + return (img_left, img_right), disparity_tensors, mask_tensors + + +class AsymmetricColorJitter(T.ColorJitter): + # p determines the probability of doing asymmetric vs symmetric color jittering + def __init__( + self, + brightness: T_COLOR_AUG_PARAM = 0, + contrast: T_COLOR_AUG_PARAM = 0, + saturation: T_COLOR_AUG_PARAM = 0, + hue: T_COLOR_AUG_PARAM = 0, + p: float = 0.2, + ): + super().__init__(brightness=brightness, contrast=contrast, saturation=saturation, hue=hue) + self.p = p + + def forward( + self, + images: T_STEREO_TENSOR, + disparities: Tuple[T_FLOW, T_FLOW], + masks: Tuple[T_MASK, T_MASK], + ) -> Tuple[T_STEREO_TENSOR, Tuple[T_FLOW, T_FLOW], Tuple[T_MASK, T_MASK]]: + + if torch.rand(1) < self.p: + # asymmetric: different transform for img1 and img2 + img_left = super().forward(images[0]) + img_right = super().forward(images[1]) + else: + # symmetric: same transform for img1 and img2 + batch = torch.stack(images) + batch = super().forward(batch) + img_left, img_right = batch[0], batch[1] + + return (img_left, img_right), disparities, masks + + +class AsymetricGammaAdjust(torch.nn.Module): + def __init__(self, p: float, gamma_range: Tuple[float, float], gain: float = 1) -> None: + super().__init__() + self.gamma_range = gamma_range + self.gain = gain + self.p = p + + def forward( + self, + images: T_STEREO_TENSOR, + disparities: Tuple[T_FLOW, T_FLOW], + masks: Tuple[T_MASK, T_MASK], + ) -> Tuple[T_STEREO_TENSOR, Tuple[T_FLOW, T_FLOW], Tuple[T_MASK, T_MASK]]: + + gamma = rand_float_range((1,), low=self.gamma_range[0], high=self.gamma_range[1]).item() + + if torch.rand(1) < self.p: + # asymmetric: different transform for img1 and img2 + img_left = F.adjust_gamma(images[0], gamma, gain=self.gain) + img_right = F.adjust_gamma(images[1], gamma, gain=self.gain) + else: + # symmetric: same transform for img1 and img2 + batch = torch.stack(images) + batch = F.adjust_gamma(batch, gamma, gain=self.gain) + img_left, img_right = batch[0], batch[1] + + return (img_left, img_right), disparities, masks + + +class RandomErase(torch.nn.Module): + # Produces multiple symmetric random erasures + # these can be viewed as occlusions present in both camera views. + # Similarly to Optical Flow occlusion prediction tasks, we mask these pixels in the disparity map + def __init__( + self, + p: float = 0.5, + erase_px_range: Tuple[int, int] = (50, 100), + value: Union[Tensor, float] = 0, + inplace: bool = False, + max_erase: int = 2, + ): + super().__init__() + self.min_px_erase = erase_px_range[0] + self.max_px_erase = erase_px_range[1] + if self.max_px_erase < 0: + raise ValueError("erase_px_range[1] should be equal or greater than 0") + if self.min_px_erase < 0: + raise ValueError("erase_px_range[0] should be equal or greater than 0") + if self.min_px_erase > self.max_px_erase: + raise ValueError("erase_prx_range[0] should be equal or lower than erase_px_range[1]") + + self.p = p + self.value = value + self.inplace = inplace + self.max_erase = max_erase + + def forward( + self, + images: T_STEREO_TENSOR, + disparities: T_STEREO_TENSOR, + masks: T_STEREO_TENSOR, + ) -> Tuple[T_STEREO_TENSOR, Tuple[T_FLOW, T_FLOW], Tuple[T_MASK, T_MASK]]: + + if torch.rand(1) < self.p: + return images, disparities, masks + + image_left, image_right = images + mask_left, mask_right = masks + for _ in range(torch.randint(self.max_erase, size=(1,)).item()): + y, x, h, w, v = self._get_params(image_left) + image_right = F.erase(image_right, y, x, h, w, v, self.inplace) + image_left = F.erase(image_left, y, x, h, w, v, self.inplace) + # similarly to optical flow occlusion prediction, we consider + # any erasure pixels that are in both images to be occluded therefore + # we mark them as invalid + if mask_left is not None: + mask_left = F.erase(mask_left, y, x, h, w, False, self.inplace) + if mask_right is not None: + mask_right = F.erase(mask_right, y, x, h, w, False, self.inplace) + + return (image_left, image_right), disparities, (mask_left, mask_right) + + def _get_params(self, img: torch.Tensor) -> Tuple[int, int, int, int, float]: + img_h, img_w = img.shape[-2:] + crop_h, crop_w = ( + random.randint(self.min_px_erase, self.max_px_erase), + random.randint(self.min_px_erase, self.max_px_erase), + ) + crop_x, crop_y = (random.randint(0, img_w - crop_w), random.randint(0, img_h - crop_h)) + + return crop_y, crop_x, crop_h, crop_w, self.value + + +class RandomOcclusion(torch.nn.Module): + # This adds an occlusion in the right image + # the occluded patch works as a patch erase where the erase value is the mean + # of the pixels from the selected zone + def __init__(self, p: float = 0.5, occlusion_px_range: Tuple[int, int] = (50, 100), inplace: bool = False): + super().__init__() + + self.min_px_occlusion = occlusion_px_range[0] + self.max_px_occlusion = occlusion_px_range[1] + + if self.max_px_occlusion < 0: + raise ValueError("occlusion_px_range[1] should be greater or equal than 0") + if self.min_px_occlusion < 0: + raise ValueError("occlusion_px_range[0] should be greater or equal than 0") + if self.min_px_occlusion > self.max_px_occlusion: + raise ValueError("occlusion_px_range[0] should be lower than occlusion_px_range[1]") + + self.p = p + self.inplace = inplace + + def forward( + self, + images: T_STEREO_TENSOR, + disparities: T_STEREO_TENSOR, + masks: T_STEREO_TENSOR, + ) -> Tuple[T_STEREO_TENSOR, Tuple[T_FLOW, T_FLOW], Tuple[T_MASK, T_MASK]]: + + left_image, right_image = images + + if torch.rand(1) < self.p: + return images, disparities, masks + + y, x, h, w, v = self._get_params(right_image) + right_image = F.erase(right_image, y, x, h, w, v, self.inplace) + + return ((left_image, right_image), disparities, masks) + + def _get_params(self, img: torch.Tensor) -> Tuple[int, int, int, int, float]: + img_h, img_w = img.shape[-2:] + crop_h, crop_w = ( + random.randint(self.min_px_occlusion, self.max_px_occlusion), + random.randint(self.min_px_occlusion, self.max_px_occlusion), + ) + + crop_x, crop_y = (random.randint(0, img_w - crop_w), random.randint(0, img_h - crop_h)) + occlusion_value = img[..., crop_y : crop_y + crop_h, crop_x : crop_x + crop_w].mean(dim=(-2, -1), keepdim=True) + + return (crop_y, crop_x, crop_h, crop_w, occlusion_value) + + +class RandomSpatialShift(torch.nn.Module): + # This transform applies a vertical shift and a slight angle rotation and the same time + def __init__( + self, p: float = 0.5, max_angle: float = 0.1, max_px_shift: int = 2, interpolation_type: str = "bilinear" + ) -> None: + super().__init__() + self.p = p + self.max_angle = max_angle + self.max_px_shift = max_px_shift + self._interpolation_mode_strategy = InterpolationStrategy(interpolation_type) + + def forward( + self, + images: T_STEREO_TENSOR, + disparities: T_STEREO_TENSOR, + masks: T_STEREO_TENSOR, + ) -> Tuple[T_STEREO_TENSOR, Tuple[T_FLOW, T_FLOW], Tuple[T_MASK, T_MASK]]: + # the transform is applied only on the right image + # in order to mimic slight calibration issues + img_left, img_right = images + + INTERP_MODE = self._interpolation_mode_strategy() + + if torch.rand(1) < self.p: + # [0, 1] -> [-a, a] + shift = rand_float_range((1,), low=-self.max_px_shift, high=self.max_px_shift).item() + angle = rand_float_range((1,), low=-self.max_angle, high=self.max_angle).item() + # sample center point for the rotation matrix + y = torch.randint(size=(1,), low=0, high=img_right.shape[-2]).item() + x = torch.randint(size=(1,), low=0, high=img_right.shape[-1]).item() + # apply affine transformations + img_right = F.affine( + img_right, + angle=angle, + translate=[0, shift], # translation only on the y-axis + center=[x, y], + scale=1.0, + shear=0.0, + interpolation=INTERP_MODE, + ) + + return ((img_left, img_right), disparities, masks) + + +class RandomHorizontalFlip(torch.nn.Module): + def __init__(self, p: float = 0.5) -> None: + super().__init__() + self.p = p + + def forward( + self, + images: T_STEREO_TENSOR, + disparities: Tuple[T_FLOW, T_FLOW], + masks: Tuple[T_MASK, T_MASK], + ) -> Tuple[T_STEREO_TENSOR, Tuple[T_FLOW, T_FLOW], Tuple[T_MASK, T_MASK]]: + + img_left, img_right = images + dsp_left, dsp_right = disparities + mask_left, mask_right = masks + + if dsp_right is not None and torch.rand(1) < self.p: + img_left, img_right = F.hflip(img_left), F.hflip(img_right) + dsp_left, dsp_right = F.hflip(dsp_left), F.hflip(dsp_right) + if mask_left is not None and mask_right is not None: + mask_left, mask_right = F.hflip(mask_left), F.hflip(mask_right) + return ((img_right, img_left), (dsp_right, dsp_left), (mask_right, mask_left)) + + return images, disparities, masks + + +class Resize(torch.nn.Module): + def __init__(self, resize_size: Tuple[int, ...], interpolation_type: str = "bilinear") -> None: + super().__init__() + self.resize_size = list(resize_size) # doing this to keep mypy happy + self._interpolation_mode_strategy = InterpolationStrategy(interpolation_type) + + def forward( + self, + images: T_STEREO_TENSOR, + disparities: Tuple[T_FLOW, T_FLOW], + masks: Tuple[T_MASK, T_MASK], + ) -> Tuple[T_STEREO_TENSOR, Tuple[T_FLOW, T_FLOW], Tuple[T_MASK, T_MASK]]: + resized_images = () + resized_disparities = () + resized_masks = () + + INTERP_MODE = self._interpolation_mode_strategy() + + for img in images: + # We hard-code antialias=False to preserve results after we changed + # its default from None to True (see + # https://github.com/pytorch/vision/pull/7160) + # TODO: we could re-train the stereo models with antialias=True? + resized_images += (F.resize(img, self.resize_size, interpolation=INTERP_MODE, antialias=False),) + + for dsp in disparities: + if dsp is not None: + # rescale disparity to match the new image size + scale_x = self.resize_size[1] / dsp.shape[-1] + resized_disparities += (F.resize(dsp, self.resize_size, interpolation=INTERP_MODE) * scale_x,) + else: + resized_disparities += (None,) + + for mask in masks: + if mask is not None: + resized_masks += ( + # we squeeze and unsqueeze because the API requires > 3D tensors + F.resize( + mask.unsqueeze(0), + self.resize_size, + interpolation=F.InterpolationMode.NEAREST, + ).squeeze(0), + ) + else: + resized_masks += (None,) + + return resized_images, resized_disparities, resized_masks + + +class RandomRescaleAndCrop(torch.nn.Module): + # This transform will resize the input with a given proba, and then crop it. + # These are the reversed operations of the built-in RandomResizedCrop, + # although the order of the operations doesn't matter too much: resizing a + # crop would give the same result as cropping a resized image, up to + # interpolation artifact at the borders of the output. + # + # The reason we don't rely on RandomResizedCrop is because of a significant + # difference in the parametrization of both transforms, in particular, + # because of the way the random parameters are sampled in both transforms, + # which leads to fairly different results (and different epe). For more details see + # https://github.com/pytorch/vision/pull/5026/files#r762932579 + def __init__( + self, + crop_size: Tuple[int, int], + scale_range: Tuple[float, float] = (-0.2, 0.5), + rescale_prob: float = 0.8, + scaling_type: str = "exponential", + interpolation_type: str = "bilinear", + ) -> None: + super().__init__() + self.crop_size = crop_size + self.min_scale = scale_range[0] + self.max_scale = scale_range[1] + self.rescale_prob = rescale_prob + self.scaling_type = scaling_type + self._interpolation_mode_strategy = InterpolationStrategy(interpolation_type) + + if self.scaling_type == "linear" and self.min_scale < 0: + raise ValueError("min_scale must be >= 0 for linear scaling") + + def forward( + self, + images: T_STEREO_TENSOR, + disparities: Tuple[T_FLOW, T_FLOW], + masks: Tuple[T_MASK, T_MASK], + ) -> Tuple[T_STEREO_TENSOR, Tuple[T_FLOW, T_FLOW], Tuple[T_MASK, T_MASK]]: + + img_left, img_right = images + dsp_left, dsp_right = disparities + mask_left, mask_right = masks + INTERP_MODE = self._interpolation_mode_strategy() + + # randomly sample scale + h, w = img_left.shape[-2:] + # Note: in original code, they use + 1 instead of + 8 for sparse datasets (e.g. Kitti) + # It shouldn't matter much + min_scale = max((self.crop_size[0] + 8) / h, (self.crop_size[1] + 8) / w) + + # exponential scaling will draw a random scale in (min_scale, max_scale) and then raise + # 2 to the power of that random value. This final scale distribution will have a different + # mean and variance than a uniform distribution. Note that a scale of 1 will result in + # a rescaling of 2X the original size, whereas a scale of -1 will result in a rescaling + # of 0.5X the original size. + if self.scaling_type == "exponential": + scale = 2 ** torch.empty(1, dtype=torch.float32).uniform_(self.min_scale, self.max_scale).item() + # linear scaling will draw a random scale in (min_scale, max_scale) + elif self.scaling_type == "linear": + scale = torch.empty(1, dtype=torch.float32).uniform_(self.min_scale, self.max_scale).item() + + scale = max(scale, min_scale) + + new_h, new_w = round(h * scale), round(w * scale) + + if torch.rand(1).item() < self.rescale_prob: + # rescale the images + img_left = F.resize(img_left, size=(new_h, new_w), interpolation=INTERP_MODE) + img_right = F.resize(img_right, size=(new_h, new_w), interpolation=INTERP_MODE) + + resized_masks, resized_disparities = (), () + + for disparity, mask in zip(disparities, masks): + if disparity is not None: + if mask is None: + resized_disparity = F.resize(disparity, size=(new_h, new_w), interpolation=INTERP_MODE) + # rescale the disparity + resized_disparity = ( + resized_disparity * torch.tensor([scale], device=resized_disparity.device)[:, None, None] + ) + resized_mask = None + else: + resized_disparity, resized_mask = _resize_sparse_flow( + disparity, mask, scale_x=scale, scale_y=scale + ) + resized_masks += (resized_mask,) + resized_disparities += (resized_disparity,) + + else: + resized_disparities = disparities + resized_masks = masks + + disparities = resized_disparities + masks = resized_masks + + # Note: For sparse datasets (Kitti), the original code uses a "margin" + # See e.g. https://github.com/princeton-vl/RAFT/blob/master/core/utils/augmentor.py#L220:L220 + # We don't, not sure if it matters much + y0 = torch.randint(0, img_left.shape[1] - self.crop_size[0], size=(1,)).item() + x0 = torch.randint(0, img_right.shape[2] - self.crop_size[1], size=(1,)).item() + + img_left = F.crop(img_left, y0, x0, self.crop_size[0], self.crop_size[1]) + img_right = F.crop(img_right, y0, x0, self.crop_size[0], self.crop_size[1]) + if dsp_left is not None: + dsp_left = F.crop(disparities[0], y0, x0, self.crop_size[0], self.crop_size[1]) + if dsp_right is not None: + dsp_right = F.crop(disparities[1], y0, x0, self.crop_size[0], self.crop_size[1]) + + cropped_masks = () + for mask in masks: + if mask is not None: + mask = F.crop(mask, y0, x0, self.crop_size[0], self.crop_size[1]) + cropped_masks += (mask,) + + return ((img_left, img_right), (dsp_left, dsp_right), cropped_masks) + + +def _resize_sparse_flow( + flow: Tensor, valid_flow_mask: Tensor, scale_x: float = 1.0, scale_y: float = 0.0 +) -> Tuple[Tensor, Tensor]: + # This resizes both the flow and the valid_flow_mask mask (which is assumed to be reasonably sparse) + # There are as-many non-zero values in the original flow as in the resized flow (up to OOB) + # So for example if scale_x = scale_y = 2, the sparsity of the output flow is multiplied by 4 + + h, w = flow.shape[-2:] + + h_new = int(round(h * scale_y)) + w_new = int(round(w * scale_x)) + flow_new = torch.zeros(size=[1, h_new, w_new], dtype=flow.dtype) + valid_new = torch.zeros(size=[h_new, w_new], dtype=valid_flow_mask.dtype) + + jj, ii = torch.meshgrid(torch.arange(w), torch.arange(h), indexing="xy") + + ii_valid, jj_valid = ii[valid_flow_mask], jj[valid_flow_mask] + + ii_valid_new = torch.round(ii_valid.to(float) * scale_y).to(torch.long) + jj_valid_new = torch.round(jj_valid.to(float) * scale_x).to(torch.long) + + within_bounds_mask = (0 <= ii_valid_new) & (ii_valid_new < h_new) & (0 <= jj_valid_new) & (jj_valid_new < w_new) + + ii_valid = ii_valid[within_bounds_mask] + jj_valid = jj_valid[within_bounds_mask] + ii_valid_new = ii_valid_new[within_bounds_mask] + jj_valid_new = jj_valid_new[within_bounds_mask] + + valid_flow_new = flow[:, ii_valid, jj_valid] + valid_flow_new *= scale_x + + flow_new[:, ii_valid_new, jj_valid_new] = valid_flow_new + valid_new[ii_valid_new, jj_valid_new] = valid_flow_mask[ii_valid, jj_valid] + + return flow_new, valid_new.bool() + + +class Compose(torch.nn.Module): + def __init__(self, transforms: List[Callable]): + super().__init__() + self.transforms = transforms + + @torch.inference_mode() + def forward(self, images, disparities, masks): + for t in self.transforms: + images, disparities, masks = t(images, disparities, masks) + return images, disparities, masks diff --git a/references/depth/stereo/utils/__init__.py b/references/depth/stereo/utils/__init__.py new file mode 100644 index 00000000000..4dacbe61ba0 --- /dev/null +++ b/references/depth/stereo/utils/__init__.py @@ -0,0 +1,6 @@ +from .losses import * +from .metrics import * +from .distributed import * +from .logger import * +from .padder import * +from .norm import * diff --git a/references/depth/stereo/utils/distributed.py b/references/depth/stereo/utils/distributed.py new file mode 100644 index 00000000000..228aa2a0f9a --- /dev/null +++ b/references/depth/stereo/utils/distributed.py @@ -0,0 +1,60 @@ +import os + +import torch +import torch.distributed as dist + + +def _redefine_print(is_main): + """disables printing when not in main process""" + import builtins as __builtin__ + + builtin_print = __builtin__.print + + def print(*args, **kwargs): + force = kwargs.pop("force", False) + if is_main or force: + builtin_print(*args, **kwargs) + + __builtin__.print = print + + +def setup_ddp(args): + # Set the local_rank, rank, and world_size values as args fields + # This is done differently depending on how we're running the script. We + # currently support either torchrun or the custom run_with_submitit.py + # If you're confused (like I was), this might help a bit + # https://discuss.pytorch.org/t/what-is-the-difference-between-rank-and-local-rank/61940/2 + + if "RANK" in os.environ and "WORLD_SIZE" in os.environ: + args.rank = int(os.environ["RANK"]) + args.world_size = int(os.environ["WORLD_SIZE"]) + args.gpu = int(os.environ["LOCAL_RANK"]) + elif "SLURM_PROCID" in os.environ: + args.rank = int(os.environ["SLURM_PROCID"]) + args.gpu = args.rank % torch.cuda.device_count() + elif hasattr(args, "rank"): + pass + else: + print("Not using distributed mode") + args.distributed = False + args.world_size = 1 + return + + args.distributed = True + + torch.cuda.set_device(args.gpu) + dist.init_process_group( + backend="nccl", + rank=args.rank, + world_size=args.world_size, + init_method=args.dist_url, + ) + torch.distributed.barrier() + _redefine_print(is_main=(args.rank == 0)) + + +def reduce_across_processes(val): + t = torch.tensor(val, device="cuda") + dist.barrier() + dist.all_reduce(t) + return t diff --git a/references/depth/stereo/utils/logger.py b/references/depth/stereo/utils/logger.py new file mode 100644 index 00000000000..803e9aebd7b --- /dev/null +++ b/references/depth/stereo/utils/logger.py @@ -0,0 +1,153 @@ +import datetime +import time +from collections import defaultdict, deque + +import torch + +from .distributed import reduce_across_processes + + +class SmoothedValue: + """Track a series of values and provide access to smoothed values over a + window or the global series average. + """ + + def __init__(self, window_size=20, fmt="{median:.4f} ({global_avg:.4f})"): + self.deque = deque(maxlen=window_size) + self.total = 0.0 + self.count = 0 + self.fmt = fmt + + def update(self, value, n=1): + self.deque.append(value) + self.count += n + self.total += value * n + + def synchronize_between_processes(self): + """ + Warning: does not synchronize the deque! + """ + t = reduce_across_processes([self.count, self.total]) + t = t.tolist() + self.count = int(t[0]) + self.total = t[1] + + @property + def median(self): + d = torch.tensor(list(self.deque)) + return d.median().item() + + @property + def avg(self): + d = torch.tensor(list(self.deque), dtype=torch.float32) + return d.mean().item() + + @property + def global_avg(self): + return self.total / self.count + + @property + def max(self): + return max(self.deque) + + @property + def value(self): + return self.deque[-1] + + def __str__(self): + return self.fmt.format( + median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value + ) + + +class MetricLogger: + def __init__(self, delimiter="\t"): + self.meters = defaultdict(SmoothedValue) + self.delimiter = delimiter + + def update(self, **kwargs): + for k, v in kwargs.items(): + if isinstance(v, torch.Tensor): + v = v.item() + if not isinstance(v, (float, int)): + raise TypeError( + f"This method expects the value of the input arguments to be of type float or int, instead got {type(v)}" + ) + self.meters[k].update(v) + + def __getattr__(self, attr): + if attr in self.meters: + return self.meters[attr] + if attr in self.__dict__: + return self.__dict__[attr] + raise AttributeError(f"'{type(self).__name__}' object has no attribute '{attr}'") + + def __str__(self): + loss_str = [] + for name, meter in self.meters.items(): + loss_str.append(f"{name}: {str(meter)}") + return self.delimiter.join(loss_str) + + def synchronize_between_processes(self): + for meter in self.meters.values(): + meter.synchronize_between_processes() + + def add_meter(self, name, **kwargs): + self.meters[name] = SmoothedValue(**kwargs) + + def log_every(self, iterable, print_freq=5, header=None): + i = 0 + if not header: + header = "" + start_time = time.time() + end = time.time() + iter_time = SmoothedValue(fmt="{avg:.4f}") + data_time = SmoothedValue(fmt="{avg:.4f}") + space_fmt = ":" + str(len(str(len(iterable)))) + "d" + if torch.cuda.is_available(): + log_msg = self.delimiter.join( + [ + header, + "[{0" + space_fmt + "}/{1}]", + "eta: {eta}", + "{meters}", + "time: {time}", + "data: {data}", + "max mem: {memory:.0f}", + ] + ) + else: + log_msg = self.delimiter.join( + [header, "[{0" + space_fmt + "}/{1}]", "eta: {eta}", "{meters}", "time: {time}", "data: {data}"] + ) + MB = 1024.0 * 1024.0 + for obj in iterable: + data_time.update(time.time() - end) + yield obj + iter_time.update(time.time() - end) + if print_freq is not None and i % print_freq == 0: + eta_seconds = iter_time.global_avg * (len(iterable) - i) + eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) + if torch.cuda.is_available(): + print( + log_msg.format( + i, + len(iterable), + eta=eta_string, + meters=str(self), + time=str(iter_time), + data=str(data_time), + memory=torch.cuda.max_memory_allocated() / MB, + ) + ) + else: + print( + log_msg.format( + i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time) + ) + ) + i += 1 + end = time.time() + total_time = time.time() - start_time + total_time_str = str(datetime.timedelta(seconds=int(total_time))) + print(f"{header} Total time: {total_time_str}") diff --git a/references/depth/stereo/utils/losses.py b/references/depth/stereo/utils/losses.py new file mode 100644 index 00000000000..1c21353a056 --- /dev/null +++ b/references/depth/stereo/utils/losses.py @@ -0,0 +1,503 @@ +from typing import List, Optional + +import torch +from torch import nn, Tensor +from torch.nn import functional as F +from torchvision.prototype.models.depth.stereo.raft_stereo import grid_sample, make_coords_grid + + +def make_gaussian_kernel(kernel_size: int, sigma: float) -> torch.Tensor: + """Function to create a 2D Gaussian kernel.""" + + x = torch.arange(kernel_size, dtype=torch.float32) + y = torch.arange(kernel_size, dtype=torch.float32) + x = x - (kernel_size - 1) / 2 + y = y - (kernel_size - 1) / 2 + x, y = torch.meshgrid(x, y, indexing="ij") + grid = (x**2 + y**2) / (2 * sigma**2) + kernel = torch.exp(-grid) + kernel = kernel / kernel.sum() + return kernel + + +def _sequence_loss_fn( + flow_preds: List[Tensor], + flow_gt: Tensor, + valid_flow_mask: Optional[Tensor], + gamma: Tensor, + max_flow: int = 256, + exclude_large: bool = False, + weights: Optional[Tensor] = None, +): + """Loss function defined over sequence of flow predictions""" + torch._assert( + gamma < 1, + "sequence_loss: `gamma` must be lower than 1, but got {}".format(gamma), + ) + + if exclude_large: + # exclude invalid pixels and extremely large diplacements + flow_norm = torch.sum(flow_gt**2, dim=1).sqrt() + if valid_flow_mask is not None: + valid_flow_mask = valid_flow_mask & (flow_norm < max_flow) + else: + valid_flow_mask = flow_norm < max_flow + + if valid_flow_mask is not None: + valid_flow_mask = valid_flow_mask.unsqueeze(1) + flow_preds = torch.stack(flow_preds) # shape = (num_flow_updates, batch_size, 2, H, W) + + abs_diff = (flow_preds - flow_gt).abs() + if valid_flow_mask is not None: + abs_diff = abs_diff * valid_flow_mask.unsqueeze(0) + + abs_diff = abs_diff.mean(axis=(1, 2, 3, 4)) + num_predictions = flow_preds.shape[0] + + # allocating on CPU and moving to device during run-time can force + # an unwanted GPU synchronization that produces a large overhead + if weights is None or len(weights) != num_predictions: + weights = gamma ** torch.arange(num_predictions - 1, -1, -1, device=flow_preds.device, dtype=flow_preds.dtype) + + flow_loss = (abs_diff * weights).sum() + return flow_loss, weights + + +class SequenceLoss(nn.Module): + def __init__(self, gamma: float = 0.8, max_flow: int = 256, exclude_large_flows: bool = False) -> None: + """ + Args: + gamma: value for the exponential weighting of the loss across frames + max_flow: maximum flow value to exclude + exclude_large_flows: whether to exclude large flows + """ + + super().__init__() + self.max_flow = max_flow + self.excluding_large = exclude_large_flows + self.register_buffer("gamma", torch.tensor([gamma])) + # cache the scale factor for the loss + self._weights = None + + def forward(self, flow_preds: List[Tensor], flow_gt: Tensor, valid_flow_mask: Optional[Tensor]) -> Tensor: + """ + Args: + flow_preds: list of flow predictions of shape (batch_size, C, H, W) + flow_gt: ground truth flow of shape (batch_size, C, H, W) + valid_flow_mask: mask of valid flow pixels of shape (batch_size, H, W) + """ + loss, weights = _sequence_loss_fn( + flow_preds, flow_gt, valid_flow_mask, self.gamma, self.max_flow, self.excluding_large, self._weights + ) + self._weights = weights + return loss + + def set_gamma(self, gamma: float) -> None: + self.gamma.fill_(gamma) + # reset the cached scale factor + self._weights = None + + +def _ssim_loss_fn( + source: Tensor, + reference: Tensor, + kernel: Tensor, + eps: float = 1e-8, + c1: float = 0.01**2, + c2: float = 0.03**2, + use_padding: bool = False, +) -> Tensor: + # ref: Algorithm section: https://en.wikipedia.org/wiki/Structural_similarity + # ref: Alternative implementation: https://kornia.readthedocs.io/en/latest/_modules/kornia/metrics/ssim.html#ssim + + torch._assert( + source.ndim == reference.ndim == 4, + "SSIM: `source` and `reference` must be 4-dimensional tensors", + ) + + torch._assert( + source.shape == reference.shape, + "SSIM: `source` and `reference` must have the same shape, but got {} and {}".format( + source.shape, reference.shape + ), + ) + + B, C, H, W = source.shape + kernel = kernel.unsqueeze(0).unsqueeze(0).repeat(C, 1, 1, 1) + if use_padding: + pad_size = kernel.shape[2] // 2 + source = F.pad(source, (pad_size, pad_size, pad_size, pad_size), "reflect") + reference = F.pad(reference, (pad_size, pad_size, pad_size, pad_size), "reflect") + + mu1 = F.conv2d(source, kernel, groups=C) + mu2 = F.conv2d(reference, kernel, groups=C) + + mu1_sq = mu1.pow(2) + mu2_sq = mu2.pow(2) + + mu1_mu2 = mu1 * mu2 + mu_img1_sq = F.conv2d(source.pow(2), kernel, groups=C) + mu_img2_sq = F.conv2d(reference.pow(2), kernel, groups=C) + mu_img1_mu2 = F.conv2d(source * reference, kernel, groups=C) + + sigma1_sq = mu_img1_sq - mu1_sq + sigma2_sq = mu_img2_sq - mu2_sq + sigma12 = mu_img1_mu2 - mu1_mu2 + + numerator = (2 * mu1_mu2 + c1) * (2 * sigma12 + c2) + denominator = (mu1_sq + mu2_sq + c1) * (sigma1_sq + sigma2_sq + c2) + ssim = numerator / (denominator + eps) + + # doing 1 - ssim because we want to maximize the ssim + return 1 - ssim.mean(dim=(1, 2, 3)) + + +class SSIM(nn.Module): + def __init__( + self, + kernel_size: int = 11, + max_val: float = 1.0, + sigma: float = 1.5, + eps: float = 1e-12, + use_padding: bool = True, + ) -> None: + """SSIM loss function. + + Args: + kernel_size: size of the Gaussian kernel + max_val: constant scaling factor + sigma: sigma of the Gaussian kernel + eps: constant for division by zero + use_padding: whether to pad the input tensor such that we have a score for each pixel + """ + super().__init__() + + self.kernel_size = kernel_size + self.max_val = max_val + self.sigma = sigma + + gaussian_kernel = make_gaussian_kernel(kernel_size, sigma) + self.register_buffer("gaussian_kernel", gaussian_kernel) + + self.c1 = (0.01 * self.max_val) ** 2 + self.c2 = (0.03 * self.max_val) ** 2 + + self.use_padding = use_padding + self.eps = eps + + def forward(self, source: torch.Tensor, reference: torch.Tensor) -> torch.Tensor: + """ + Args: + source: source image of shape (batch_size, C, H, W) + reference: reference image of shape (batch_size, C, H, W) + + Returns: + SSIM loss of shape (batch_size,) + """ + return _ssim_loss_fn( + source, + reference, + kernel=self.gaussian_kernel, + c1=self.c1, + c2=self.c2, + use_padding=self.use_padding, + eps=self.eps, + ) + + +def _smoothness_loss_fn(img_gx: Tensor, img_gy: Tensor, val_gx: Tensor, val_gy: Tensor): + # ref: https://github.com/nianticlabs/monodepth2/blob/b676244e5a1ca55564eb5d16ab521a48f823af31/layers.py#L202 + + torch._assert( + img_gx.ndim >= 3, + "smoothness_loss: `img_gx` must be at least 3-dimensional tensor of shape (..., C, H, W)", + ) + + torch._assert( + img_gx.ndim == val_gx.ndim, + "smoothness_loss: `img_gx` and `depth_gx` must have the same dimensionality, but got {} and {}".format( + img_gx.ndim, val_gx.ndim + ), + ) + + for idx in range(img_gx.ndim): + torch._assert( + (img_gx.shape[idx] == val_gx.shape[idx] or (img_gx.shape[idx] == 1 or val_gx.shape[idx] == 1)), + "smoothness_loss: `img_gx` and `depth_gx` must have either the same shape or broadcastable shape, but got {} and {}".format( + img_gx.shape, val_gx.shape + ), + ) + + # -3 is channel dimension + weights_x = torch.exp(-torch.mean(torch.abs(val_gx), axis=-3, keepdim=True)) + weights_y = torch.exp(-torch.mean(torch.abs(val_gy), axis=-3, keepdim=True)) + + smoothness_x = img_gx * weights_x + smoothness_y = img_gy * weights_y + + smoothness = (torch.abs(smoothness_x) + torch.abs(smoothness_y)).mean(axis=(-3, -2, -1)) + return smoothness + + +class SmoothnessLoss(nn.Module): + def __init__(self) -> None: + super().__init__() + + def _x_gradient(self, img: Tensor) -> Tensor: + if img.ndim > 4: + original_shape = img.shape + is_reshaped = True + img = img.reshape(-1, *original_shape[-3:]) + else: + is_reshaped = False + + padded = F.pad(img, (0, 1, 0, 0), mode="replicate") + grad = padded[..., :, :-1] - padded[..., :, 1:] + if is_reshaped: + grad = grad.reshape(original_shape) + return grad + + def _y_gradient(self, x: torch.Tensor) -> torch.Tensor: + if x.ndim > 4: + original_shape = x.shape + is_reshaped = True + x = x.reshape(-1, *original_shape[-3:]) + else: + is_reshaped = False + + padded = F.pad(x, (0, 0, 0, 1), mode="replicate") + grad = padded[..., :-1, :] - padded[..., 1:, :] + if is_reshaped: + grad = grad.reshape(original_shape) + return grad + + def forward(self, images: Tensor, vals: Tensor) -> Tensor: + """ + Args: + images: tensor of shape (D1, D2, ..., DN, C, H, W) + vals: tensor of shape (D1, D2, ..., DN, 1, H, W) + + Returns: + smoothness loss of shape (D1, D2, ..., DN) + """ + img_gx = self._x_gradient(images) + img_gy = self._y_gradient(images) + + val_gx = self._x_gradient(vals) + val_gy = self._y_gradient(vals) + + return _smoothness_loss_fn(img_gx, img_gy, val_gx, val_gy) + + +def _flow_sequence_consistency_loss_fn( + flow_preds: List[Tensor], + gamma: float = 0.8, + resize_factor: float = 0.25, + rescale_factor: float = 0.25, + rescale_mode: str = "bilinear", + weights: Optional[Tensor] = None, +): + """Loss function defined over sequence of flow predictions""" + + # Simplified version of ref: https://arxiv.org/pdf/2006.11242.pdf + # In the original paper, an additional refinement network is used to refine a flow prediction. + # Each step performed by the recurrent module in Raft or CREStereo is a refinement step using a delta_flow update. + # which should be consistent with the previous step. In this implementation, we simplify the overall loss + # term and ignore left-right consistency loss or photometric loss which can be treated separately. + + torch._assert( + rescale_factor <= 1.0, + "sequence_consistency_loss: `rescale_factor` must be less than or equal to 1, but got {}".format( + rescale_factor + ), + ) + + flow_preds = torch.stack(flow_preds) # shape = (num_flow_updates, batch_size, 2, H, W) + N, B, C, H, W = flow_preds.shape + + # rescale flow predictions to account for bilinear upsampling artifacts + if rescale_factor: + flow_preds = ( + F.interpolate( + flow_preds.view(N * B, C, H, W), scale_factor=resize_factor, mode=rescale_mode, align_corners=True + ) + ) * rescale_factor + flow_preds = torch.stack(torch.chunk(flow_preds, N, dim=0), dim=0) + + # force the next prediction to be similar to the previous prediction + abs_diff = (flow_preds[1:] - flow_preds[:-1]).square() + abs_diff = abs_diff.mean(axis=(1, 2, 3, 4)) + + num_predictions = flow_preds.shape[0] - 1 # because we are comparing differences + if weights is None or len(weights) != num_predictions: + weights = gamma ** torch.arange(num_predictions - 1, -1, -1, device=flow_preds.device, dtype=flow_preds.dtype) + + flow_loss = (abs_diff * weights).sum() + return flow_loss, weights + + +class FlowSequenceConsistencyLoss(nn.Module): + def __init__( + self, + gamma: float = 0.8, + resize_factor: float = 0.25, + rescale_factor: float = 0.25, + rescale_mode: str = "bilinear", + ) -> None: + super().__init__() + self.gamma = gamma + self.resize_factor = resize_factor + self.rescale_factor = rescale_factor + self.rescale_mode = rescale_mode + self._weights = None + + def forward(self, flow_preds: List[Tensor]) -> Tensor: + """ + Args: + flow_preds: list of tensors of shape (batch_size, C, H, W) + + Returns: + sequence consistency loss of shape (batch_size,) + """ + loss, weights = _flow_sequence_consistency_loss_fn( + flow_preds, + gamma=self.gamma, + resize_factor=self.resize_factor, + rescale_factor=self.rescale_factor, + rescale_mode=self.rescale_mode, + weights=self._weights, + ) + self._weights = weights + return loss + + def set_gamma(self, gamma: float) -> None: + self.gamma.fill_(gamma) + # reset the cached scale factor + self._weights = None + + +def _psnr_loss_fn(source: torch.Tensor, target: torch.Tensor, max_val: float) -> torch.Tensor: + torch._assert( + source.shape == target.shape, + "psnr_loss: source and target must have the same shape, but got {} and {}".format(source.shape, target.shape), + ) + + # ref https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio + return 10 * torch.log10(max_val**2 / ((source - target).pow(2).mean(axis=(-3, -2, -1)))) + + +class PSNRLoss(nn.Module): + def __init__(self, max_val: float = 256) -> None: + """ + Args: + max_val: maximum value of the input tensor. This refers to the maximum domain value of the input tensor. + + """ + super().__init__() + self.max_val = max_val + + def forward(self, source: Tensor, target: Tensor) -> Tensor: + """ + Args: + source: tensor of shape (D1, D2, ..., DN, C, H, W) + target: tensor of shape (D1, D2, ..., DN, C, H, W) + + Returns: + psnr loss of shape (D1, D2, ..., DN) + """ + + # multiply by -1 as we want to maximize the psnr + return -1 * _psnr_loss_fn(source, target, self.max_val) + + +class FlowPhotoMetricLoss(nn.Module): + def __init__( + self, + ssim_weight: float = 0.85, + ssim_window_size: int = 11, + ssim_max_val: float = 1.0, + ssim_sigma: float = 1.5, + ssim_eps: float = 1e-12, + ssim_use_padding: bool = True, + max_displacement_ratio: float = 0.15, + ) -> None: + super().__init__() + + self._ssim_loss = SSIM( + kernel_size=ssim_window_size, + max_val=ssim_max_val, + sigma=ssim_sigma, + eps=ssim_eps, + use_padding=ssim_use_padding, + ) + + self._L1_weight = 1 - ssim_weight + self._SSIM_weight = ssim_weight + self._max_displacement_ratio = max_displacement_ratio + + def forward( + self, + source: Tensor, + reference: Tensor, + flow_pred: Tensor, + valid_mask: Optional[Tensor] = None, + ): + """ + Args: + source: tensor of shape (B, C, H, W) + reference: tensor of shape (B, C, H, W) + flow_pred: tensor of shape (B, 2, H, W) + valid_mask: tensor of shape (B, H, W) or None + + Returns: + photometric loss of shape + + """ + torch._assert( + source.ndim == 4, + "FlowPhotoMetricLoss: source must have 4 dimensions, but got {}".format(source.ndim), + ) + torch._assert( + reference.ndim == source.ndim, + "FlowPhotoMetricLoss: source and other must have the same number of dimensions, but got {} and {}".format( + source.ndim, reference.ndim + ), + ) + torch._assert( + flow_pred.shape[1] == 2, + "FlowPhotoMetricLoss: flow_pred must have 2 channels, but got {}".format(flow_pred.shape[1]), + ) + torch._assert( + flow_pred.ndim == 4, + "FlowPhotoMetricLoss: flow_pred must have 4 dimensions, but got {}".format(flow_pred.ndim), + ) + + B, C, H, W = source.shape + flow_channels = flow_pred.shape[1] + + max_displacements = [] + for dim in range(flow_channels): + shape_index = -1 - dim + max_displacements.append(int(self._max_displacement_ratio * source.shape[shape_index])) + + # mask out all pixels that have larger flow than the max flow allowed + max_flow_mask = torch.logical_and( + *[flow_pred[:, dim, :, :] < max_displacements[dim] for dim in range(flow_channels)] + ) + + if valid_mask is not None: + valid_mask = torch.logical_and(valid_mask, max_flow_mask).unsqueeze(1) + else: + valid_mask = max_flow_mask.unsqueeze(1) + + grid = make_coords_grid(B, H, W, device=str(source.device)) + resampled_grids = grid - flow_pred + resampled_grids = resampled_grids.permute(0, 2, 3, 1) + resampled_source = grid_sample(reference, resampled_grids, mode="bilinear") + + # compute SSIM loss + ssim_loss = self._ssim_loss(resampled_source * valid_mask, source * valid_mask) + l1_loss = (resampled_source * valid_mask - source * valid_mask).abs().mean(axis=(-3, -2, -1)) + loss = self._L1_weight * l1_loss + self._SSIM_weight * ssim_loss + + return loss.mean() diff --git a/references/depth/stereo/utils/metrics.py b/references/depth/stereo/utils/metrics.py new file mode 100644 index 00000000000..05b149fb048 --- /dev/null +++ b/references/depth/stereo/utils/metrics.py @@ -0,0 +1,49 @@ +from typing import Dict, List, Optional, Tuple + +from torch import Tensor + +AVAILABLE_METRICS = ["mae", "rmse", "epe", "bad1", "bad2", "epe", "1px", "3px", "5px", "fl-all", "relepe"] + + +def compute_metrics( + flow_pred: Tensor, flow_gt: Tensor, valid_flow_mask: Optional[Tensor], metrics: List[str] +) -> Tuple[Dict[str, float], int]: + for m in metrics: + if m not in AVAILABLE_METRICS: + raise ValueError(f"Invalid metric: {m}. Valid metrics are: {AVAILABLE_METRICS}") + + metrics_dict = {} + + pixels_diffs = (flow_pred - flow_gt).abs() + # there is no Y flow in Stereo Matching, therefore flow.abs() = flow.pow(2).sum(dim=1).sqrt() + flow_norm = flow_gt.abs() + + if valid_flow_mask is not None: + valid_flow_mask = valid_flow_mask.unsqueeze(1) + pixels_diffs = pixels_diffs[valid_flow_mask] + flow_norm = flow_norm[valid_flow_mask] + + num_pixels = pixels_diffs.numel() + if "bad1" in metrics: + metrics_dict["bad1"] = (pixels_diffs > 1).float().mean().item() + if "bad2" in metrics: + metrics_dict["bad2"] = (pixels_diffs > 2).float().mean().item() + + if "mae" in metrics: + metrics_dict["mae"] = pixels_diffs.mean().item() + if "rmse" in metrics: + metrics_dict["rmse"] = pixels_diffs.pow(2).mean().sqrt().item() + if "epe" in metrics: + metrics_dict["epe"] = pixels_diffs.mean().item() + if "1px" in metrics: + metrics_dict["1px"] = (pixels_diffs < 1).float().mean().item() + if "3px" in metrics: + metrics_dict["3px"] = (pixels_diffs < 3).float().mean().item() + if "5px" in metrics: + metrics_dict["5px"] = (pixels_diffs < 5).float().mean().item() + if "fl-all" in metrics: + metrics_dict["fl-all"] = ((pixels_diffs < 3) & ((pixels_diffs / flow_norm) < 0.05)).float().mean().item() * 100 + if "relepe" in metrics: + metrics_dict["relepe"] = (pixels_diffs / flow_norm).mean().item() + + return metrics_dict, num_pixels diff --git a/references/depth/stereo/utils/norm.py b/references/depth/stereo/utils/norm.py new file mode 100644 index 00000000000..7f6e0011160 --- /dev/null +++ b/references/depth/stereo/utils/norm.py @@ -0,0 +1,13 @@ +import torch + + +def freeze_batch_norm(model): + for m in model.modules(): + if isinstance(m, torch.nn.BatchNorm2d): + m.eval() + + +def unfreeze_batch_norm(model): + for m in model.modules(): + if isinstance(m, torch.nn.BatchNorm2d): + m.train() diff --git a/references/depth/stereo/utils/padder.py b/references/depth/stereo/utils/padder.py new file mode 100644 index 00000000000..7d2c63afba6 --- /dev/null +++ b/references/depth/stereo/utils/padder.py @@ -0,0 +1,28 @@ +import torch.nn.functional as F + + +class InputPadder: + """Pads images such that dimensions are divisible by 8""" + + # TODO: Ideally, this should be part of the eval transforms preset, instead + # of being part of the validation code. It's not obvious what a good + # solution would be, because we need to unpad the predicted flows according + # to the input images' size, and in some datasets (Kitti) images can have + # variable sizes. + + def __init__(self, dims, mode="sintel"): + self.ht, self.wd = dims[-2:] + pad_ht = (((self.ht // 8) + 1) * 8 - self.ht) % 8 + pad_wd = (((self.wd // 8) + 1) * 8 - self.wd) % 8 + if mode == "sintel": + self._pad = [pad_wd // 2, pad_wd - pad_wd // 2, pad_ht // 2, pad_ht - pad_ht // 2] + else: + self._pad = [pad_wd // 2, pad_wd - pad_wd // 2, 0, pad_ht] + + def pad(self, *inputs): + return [F.pad(x, self._pad, mode="replicate") for x in inputs] + + def unpad(self, x): + ht, wd = x.shape[-2:] + c = [self._pad[2], ht - self._pad[3], self._pad[0], wd - self._pad[1]] + return x[..., c[0] : c[1], c[2] : c[3]] diff --git a/references/depth/stereo/visualization.py b/references/depth/stereo/visualization.py new file mode 100644 index 00000000000..07a7e7167d3 --- /dev/null +++ b/references/depth/stereo/visualization.py @@ -0,0 +1,127 @@ +import os +from typing import List + +import numpy as np +import numpy.typing as npt +import torch +from torch import Tensor +from torchvision.utils import make_grid + + +@torch.no_grad() +def make_disparity_image(disparity: Tensor): + # normalize image to [0, 1] + disparity = disparity.detach().cpu() + disparity = (disparity - disparity.min()) / (disparity.max() - disparity.min()) + return disparity + + +@torch.no_grad() +def make_disparity_image_pairs(disparity: Tensor, image: Tensor): + disparity = make_disparity_image(disparity) + # image is in [-1, 1], bring it to [0, 1] + image = image.detach().cpu() + image = image * 0.5 + 0.5 + return disparity, image + + +@torch.no_grad() +def make_disparity_sequence(disparities: List[Tensor]): + # convert each disparity to [0, 1] + for idx, disparity_batch in enumerate(disparities): + disparities[idx] = torch.stack(list(map(make_disparity_image, disparity_batch))) + # make the list into a batch + disparity_sequences = torch.stack(disparities) + return disparity_sequences + + +@torch.no_grad() +def make_pair_grid(*inputs, orientation="horizontal"): + # make a grid of images with the outputs and references side by side + if orientation == "horizontal": + # interleave the outputs and references + canvas = torch.zeros_like(inputs[0]) + canvas = torch.cat([canvas] * len(inputs), dim=0) + size = len(inputs) + for idx, inp in enumerate(inputs): + canvas[idx::size, ...] = inp + grid = make_grid(canvas, nrow=len(inputs), padding=16, normalize=True, scale_each=True) + elif orientation == "vertical": + # interleave the outputs and references + canvas = torch.cat(inputs, dim=0) + size = len(inputs) + for idx, inp in enumerate(inputs): + canvas[idx::size, ...] = inp + grid = make_grid(canvas, nrow=len(inputs[0]), padding=16, normalize=True, scale_each=True) + else: + raise ValueError("Unknown orientation: {}".format(orientation)) + return grid + + +@torch.no_grad() +def make_training_sample_grid( + left_images: Tensor, + right_images: Tensor, + disparities: Tensor, + masks: Tensor, + predictions: List[Tensor], +) -> npt.NDArray: + # detach images and renormalize to [0, 1] + images_left = left_images.detach().cpu() * 0.5 + 0.5 + images_right = right_images.detach().cpu() * 0.5 + 0.5 + # detach the disparties and predictions + disparities = disparities.detach().cpu() + predictions = predictions[-1].detach().cpu() + # keep only the first channel of pixels, and repeat it 3 times + disparities = disparities[:, :1, ...].repeat(1, 3, 1, 1) + predictions = predictions[:, :1, ...].repeat(1, 3, 1, 1) + # unsqueeze and repeat the masks + masks = masks.detach().cpu().unsqueeze(1).repeat(1, 3, 1, 1) + # make a grid that will self normalize across the batch + pred_grid = make_pair_grid(images_left, images_right, masks, disparities, predictions, orientation="horizontal") + pred_grid = pred_grid.permute(1, 2, 0).numpy() + pred_grid = (pred_grid * 255).astype(np.uint8) + return pred_grid + + +@torch.no_grad() +def make_disparity_sequence_grid(predictions: List[Tensor], disparities: Tensor) -> npt.NDArray: + # right most we will be adding the ground truth + seq_len = len(predictions) + 1 + predictions = list(map(lambda x: x[:, :1, :, :].detach().cpu(), predictions + [disparities])) + sequence = make_disparity_sequence(predictions) + # swap axes to have the in the correct order for each batch sample + sequence = torch.swapaxes(sequence, 0, 1).contiguous().reshape(-1, 1, disparities.shape[-2], disparities.shape[-1]) + sequence = make_grid(sequence, nrow=seq_len, padding=16, normalize=True, scale_each=True) + sequence = sequence.permute(1, 2, 0).numpy() + sequence = (sequence * 255).astype(np.uint8) + return sequence + + +@torch.no_grad() +def make_prediction_image_side_to_side( + predictions: Tensor, disparities: Tensor, valid_mask: Tensor, save_path: str, prefix: str +) -> None: + import matplotlib.pyplot as plt + + # normalize the predictions and disparities in [0, 1] + predictions = (predictions - predictions.min()) / (predictions.max() - predictions.min()) + disparities = (disparities - disparities.min()) / (disparities.max() - disparities.min()) + predictions = predictions * valid_mask + disparities = disparities * valid_mask + + predictions = predictions.detach().cpu() + disparities = disparities.detach().cpu() + + for idx, (pred, gt) in enumerate(zip(predictions, disparities)): + pred = pred.permute(1, 2, 0).numpy() + gt = gt.permute(1, 2, 0).numpy() + # plot pred and gt side by side + fig, ax = plt.subplots(1, 2, figsize=(10, 5)) + ax[0].imshow(pred) + ax[0].set_title("Prediction") + ax[1].imshow(gt) + ax[1].set_title("Ground Truth") + save_name = os.path.join(save_path, "{}_{}.png".format(prefix, idx)) + plt.savefig(save_name) + plt.close() diff --git a/references/detection/README.md b/references/detection/README.md new file mode 100644 index 00000000000..d9af26523a5 --- /dev/null +++ b/references/detection/README.md @@ -0,0 +1,88 @@ +# Object detection reference training scripts + +This folder contains reference training scripts for object detection. +They serve as a log of how to train specific models, to provide baseline +training and evaluation scripts to quickly bootstrap research. + +To execute the example commands below you must install the following: + +``` +cython +pycocotools +matplotlib +``` + +You must modify the following flags: + +`--data-path=/path/to/coco/dataset` + +`--nproc_per_node=` + +Except otherwise noted, all models have been trained on 8x V100 GPUs. + +### Faster R-CNN ResNet-50 FPN +``` +torchrun --nproc_per_node=8 train.py\ + --dataset coco --model fasterrcnn_resnet50_fpn --epochs 26\ + --lr-steps 16 22 --aspect-ratio-group-factor 3 --weights-backbone ResNet50_Weights.IMAGENET1K_V1 +``` + +### Faster R-CNN MobileNetV3-Large FPN +``` +torchrun --nproc_per_node=8 train.py\ + --dataset coco --model fasterrcnn_mobilenet_v3_large_fpn --epochs 26\ + --lr-steps 16 22 --aspect-ratio-group-factor 3 --weights-backbone MobileNet_V3_Large_Weights.IMAGENET1K_V1 +``` + +### Faster R-CNN MobileNetV3-Large 320 FPN +``` +torchrun --nproc_per_node=8 train.py\ + --dataset coco --model fasterrcnn_mobilenet_v3_large_320_fpn --epochs 26\ + --lr-steps 16 22 --aspect-ratio-group-factor 3 --weights-backbone MobileNet_V3_Large_Weights.IMAGENET1K_V1 +``` + +### FCOS ResNet-50 FPN +``` +torchrun --nproc_per_node=8 train.py\ + --dataset coco --model fcos_resnet50_fpn --epochs 26\ + --lr-steps 16 22 --aspect-ratio-group-factor 3 --lr 0.01 --amp --weights-backbone ResNet50_Weights.IMAGENET1K_V1 +``` + +### RetinaNet +``` +torchrun --nproc_per_node=8 train.py\ + --dataset coco --model retinanet_resnet50_fpn --epochs 26\ + --lr-steps 16 22 --aspect-ratio-group-factor 3 --lr 0.01 --weights-backbone ResNet50_Weights.IMAGENET1K_V1 +``` + +### SSD300 VGG16 +``` +torchrun --nproc_per_node=8 train.py\ + --dataset coco --model ssd300_vgg16 --epochs 120\ + --lr-steps 80 110 --aspect-ratio-group-factor 3 --lr 0.002 --batch-size 4\ + --weight-decay 0.0005 --data-augmentation ssd --weights-backbone VGG16_Weights.IMAGENET1K_FEATURES +``` + +### SSDlite320 MobileNetV3-Large +``` +torchrun --nproc_per_node=8 train.py\ + --dataset coco --model ssdlite320_mobilenet_v3_large --epochs 660\ + --aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr --lr 0.15 --batch-size 24\ + --weight-decay 0.00004 --data-augmentation ssdlite +``` + + +### Mask R-CNN +``` +torchrun --nproc_per_node=8 train.py\ + --dataset coco --model maskrcnn_resnet50_fpn --epochs 26\ + --lr-steps 16 22 --aspect-ratio-group-factor 3 --weights-backbone ResNet50_Weights.IMAGENET1K_V1 +``` + + +### Keypoint R-CNN +``` +torchrun --nproc_per_node=8 train.py\ + --dataset coco_kp --model keypointrcnn_resnet50_fpn --epochs 46\ + --lr-steps 36 43 --aspect-ratio-group-factor 3 --weights-backbone ResNet50_Weights.IMAGENET1K_V1 +``` diff --git a/references/detection/coco_eval.py b/references/detection/coco_eval.py index d758a64a909..ba1359f8c65 100644 --- a/references/detection/coco_eval.py +++ b/references/detection/coco_eval.py @@ -1,24 +1,19 @@ -import json -import tempfile - -import numpy as np import copy -import time -import torch -import torch._six +import io +from contextlib import redirect_stdout -from pycocotools.cocoeval import COCOeval -from pycocotools.coco import COCO +import numpy as np import pycocotools.mask as mask_util - -from collections import defaultdict - +import torch import utils +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval -class CocoEvaluator(object): +class CocoEvaluator: def __init__(self, coco_gt, iou_types): - assert isinstance(iou_types, (list, tuple)) + if not isinstance(iou_types, (list, tuple)): + raise TypeError(f"This constructor expects iou_types of type list or tuple, instead got {type(iou_types)}") coco_gt = copy.deepcopy(coco_gt) self.coco_gt = coco_gt @@ -36,7 +31,8 @@ def update(self, predictions): for iou_type in self.iou_types: results = self.prepare(predictions, iou_type) - coco_dt = loadRes(self.coco_gt, results) if results else COCO() + with redirect_stdout(io.StringIO()): + coco_dt = COCO.loadRes(self.coco_gt, results) if results else COCO() coco_eval = self.coco_eval[iou_type] coco_eval.cocoDt = coco_dt @@ -56,18 +52,17 @@ def accumulate(self): def summarize(self): for iou_type, coco_eval in self.coco_eval.items(): - print("IoU metric: {}".format(iou_type)) + print(f"IoU metric: {iou_type}") coco_eval.summarize() def prepare(self, predictions, iou_type): if iou_type == "bbox": return self.prepare_for_coco_detection(predictions) - elif iou_type == "segm": + if iou_type == "segm": return self.prepare_for_coco_segmentation(predictions) - elif iou_type == "keypoints": + if iou_type == "keypoints": return self.prepare_for_coco_keypoint(predictions) - else: - raise ValueError("Unknown iou type {}".format(iou_type)) + raise ValueError(f"Unknown iou type {iou_type}") def prepare_for_coco_detection(self, predictions): coco_results = [] @@ -109,8 +104,7 @@ def prepare_for_coco_segmentation(self, predictions): labels = prediction["labels"].tolist() rles = [ - mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0] - for mask in masks + mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0] for mask in masks ] for rle in rles: rle["counts"] = rle["counts"].decode("utf-8") @@ -146,7 +140,7 @@ def prepare_for_coco_keypoint(self, predictions): { "image_id": original_id, "category_id": labels[k], - 'keypoints': keypoint, + "keypoints": keypoint, "score": scores[k], } for k, keypoint in enumerate(keypoints) @@ -192,158 +186,7 @@ def create_common_coco_eval(coco_eval, img_ids, eval_imgs): coco_eval._paramsEval = copy.deepcopy(coco_eval.params) -################################################################# -# From pycocotools, just removed the prints and fixed -# a Python3 bug about unicode not defined -################################################################# - -# Ideally, pycocotools wouldn't have hard-coded prints -# so that we could avoid copy-pasting those two functions - -def createIndex(self): - # create index - # print('creating index...') - anns, cats, imgs = {}, {}, {} - imgToAnns, catToImgs = defaultdict(list), defaultdict(list) - if 'annotations' in self.dataset: - for ann in self.dataset['annotations']: - imgToAnns[ann['image_id']].append(ann) - anns[ann['id']] = ann - - if 'images' in self.dataset: - for img in self.dataset['images']: - imgs[img['id']] = img - - if 'categories' in self.dataset: - for cat in self.dataset['categories']: - cats[cat['id']] = cat - - if 'annotations' in self.dataset and 'categories' in self.dataset: - for ann in self.dataset['annotations']: - catToImgs[ann['category_id']].append(ann['image_id']) - - # print('index created!') - - # create class members - self.anns = anns - self.imgToAnns = imgToAnns - self.catToImgs = catToImgs - self.imgs = imgs - self.cats = cats - - -maskUtils = mask_util - - -def loadRes(self, resFile): - """ - Load result file and return a result api object. - :param resFile (str) : file name of result file - :return: res (obj) : result api object - """ - res = COCO() - res.dataset['images'] = [img for img in self.dataset['images']] - - # print('Loading and preparing results...') - # tic = time.time() - if isinstance(resFile, torch._six.string_classes): - anns = json.load(open(resFile)) - elif type(resFile) == np.ndarray: - anns = self.loadNumpyAnnotations(resFile) - else: - anns = resFile - assert type(anns) == list, 'results in not an array of objects' - annsImgIds = [ann['image_id'] for ann in anns] - assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \ - 'Results do not correspond to current coco set' - if 'caption' in anns[0]: - imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns]) - res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds] - for id, ann in enumerate(anns): - ann['id'] = id + 1 - elif 'bbox' in anns[0] and not anns[0]['bbox'] == []: - res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) - for id, ann in enumerate(anns): - bb = ann['bbox'] - x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]] - if 'segmentation' not in ann: - ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]] - ann['area'] = bb[2] * bb[3] - ann['id'] = id + 1 - ann['iscrowd'] = 0 - elif 'segmentation' in anns[0]: - res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) - for id, ann in enumerate(anns): - # now only support compressed RLE format as segmentation results - ann['area'] = maskUtils.area(ann['segmentation']) - if 'bbox' not in ann: - ann['bbox'] = maskUtils.toBbox(ann['segmentation']) - ann['id'] = id + 1 - ann['iscrowd'] = 0 - elif 'keypoints' in anns[0]: - res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) - for id, ann in enumerate(anns): - s = ann['keypoints'] - x = s[0::3] - y = s[1::3] - x1, x2, y1, y2 = np.min(x), np.max(x), np.min(y), np.max(y) - ann['area'] = (x2 - x1) * (y2 - y1) - ann['id'] = id + 1 - ann['bbox'] = [x1, y1, x2 - x1, y2 - y1] - # print('DONE (t={:0.2f}s)'.format(time.time()- tic)) - - res.dataset['annotations'] = anns - createIndex(res) - return res - - -def evaluate(self): - ''' - Run per image evaluation on given images and store results (a list of dict) in self.evalImgs - :return: None - ''' - # tic = time.time() - # print('Running per image evaluation...') - p = self.params - # add backward compatibility if useSegm is specified in params - if p.useSegm is not None: - p.iouType = 'segm' if p.useSegm == 1 else 'bbox' - print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType)) - # print('Evaluate annotation type *{}*'.format(p.iouType)) - p.imgIds = list(np.unique(p.imgIds)) - if p.useCats: - p.catIds = list(np.unique(p.catIds)) - p.maxDets = sorted(p.maxDets) - self.params = p - - self._prepare() - # loop through images, area range, max detection number - catIds = p.catIds if p.useCats else [-1] - - if p.iouType == 'segm' or p.iouType == 'bbox': - computeIoU = self.computeIoU - elif p.iouType == 'keypoints': - computeIoU = self.computeOks - self.ious = { - (imgId, catId): computeIoU(imgId, catId) - for imgId in p.imgIds - for catId in catIds} - - evaluateImg = self.evaluateImg - maxDet = p.maxDets[-1] - evalImgs = [ - evaluateImg(imgId, catId, areaRng, maxDet) - for catId in catIds - for areaRng in p.areaRng - for imgId in p.imgIds - ] - # this is NOT in the pycocotools code, but could be done outside - evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds)) - self._paramsEval = copy.deepcopy(self.params) - # toc = time.time() - # print('DONE (t={:0.2f}s).'.format(toc-tic)) - return p.imgIds, evalImgs - -################################################################# -# end of straight copy from pycocotools, just removing the prints -################################################################# +def evaluate(imgs): + with redirect_stdout(io.StringIO()): + imgs.evaluate() + return imgs.params.imgIds, np.asarray(imgs.evalImgs).reshape(-1, len(imgs.params.areaRng), len(imgs.params.imgIds)) diff --git a/references/detection/coco_utils.py b/references/detection/coco_utils.py index 26701a2cbee..f40dcdff783 100644 --- a/references/detection/coco_utils.py +++ b/references/detection/coco_utils.py @@ -1,34 +1,12 @@ -import copy import os -from PIL import Image import torch import torch.utils.data import torchvision - +import transforms as T from pycocotools import mask as coco_mask from pycocotools.coco import COCO -import transforms as T - - -class FilterAndRemapCocoCategories(object): - def __init__(self, categories, remap=True): - self.categories = categories - self.remap = remap - - def __call__(self, image, target): - anno = target["annotations"] - anno = [obj for obj in anno if obj["category_id"] in self.categories] - if not self.remap: - target["annotations"] = anno - return image, target - anno = copy.deepcopy(anno) - for obj in anno: - obj["category_id"] = self.categories.index(obj["category_id"]) - target["annotations"] = anno - return image, target - def convert_coco_poly_to_mask(segmentations, height, width): masks = [] @@ -47,16 +25,15 @@ def convert_coco_poly_to_mask(segmentations, height, width): return masks -class ConvertCocoPolysToMask(object): +class ConvertCocoPolysToMask: def __call__(self, image, target): w, h = image.size image_id = target["image_id"] - image_id = torch.tensor([image_id]) anno = target["annotations"] - anno = [obj for obj in anno if obj['iscrowd'] == 0] + anno = [obj for obj in anno if obj["iscrowd"] == 0] boxes = [obj["bbox"] for obj in anno] # guard against no boxes via resizing @@ -119,7 +96,7 @@ def _has_valid_annotation(anno): # if all boxes have close to zero area, there is no annotation if _has_only_empty_bbox(anno): return False - # keypoints task have a slight different critera for considering + # keypoints task have a slight different criteria for considering # if an annotation is valid if "keypoints" not in anno[0]: return True @@ -129,7 +106,6 @@ def _has_valid_annotation(anno): return True return False - assert isinstance(dataset, torchvision.datasets.CocoDetection) ids = [] for ds_idx, img_id in enumerate(dataset.ids): ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None) @@ -147,55 +123,56 @@ def convert_to_coco_api(ds): coco_ds = COCO() # annotation IDs need to start at 1, not 0, see torchvision issue #1530 ann_id = 1 - dataset = {'images': [], 'categories': [], 'annotations': []} + dataset = {"images": [], "categories": [], "annotations": []} categories = set() for img_idx in range(len(ds)): # find better way to get target # targets = ds.get_annotations(img_idx) img, targets = ds[img_idx] - image_id = targets["image_id"].item() + image_id = targets["image_id"] img_dict = {} - img_dict['id'] = image_id - img_dict['height'] = img.shape[-2] - img_dict['width'] = img.shape[-1] - dataset['images'].append(img_dict) - bboxes = targets["boxes"] + img_dict["id"] = image_id + img_dict["height"] = img.shape[-2] + img_dict["width"] = img.shape[-1] + dataset["images"].append(img_dict) + bboxes = targets["boxes"].clone() bboxes[:, 2:] -= bboxes[:, :2] bboxes = bboxes.tolist() - labels = targets['labels'].tolist() - areas = targets['area'].tolist() - iscrowd = targets['iscrowd'].tolist() - if 'masks' in targets: - masks = targets['masks'] + labels = targets["labels"].tolist() + areas = targets["area"].tolist() + iscrowd = targets["iscrowd"].tolist() + if "masks" in targets: + masks = targets["masks"] # make masks Fortran contiguous for coco_mask masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1) - if 'keypoints' in targets: - keypoints = targets['keypoints'] + if "keypoints" in targets: + keypoints = targets["keypoints"] keypoints = keypoints.reshape(keypoints.shape[0], -1).tolist() num_objs = len(bboxes) for i in range(num_objs): ann = {} - ann['image_id'] = image_id - ann['bbox'] = bboxes[i] - ann['category_id'] = labels[i] + ann["image_id"] = image_id + ann["bbox"] = bboxes[i] + ann["category_id"] = labels[i] categories.add(labels[i]) - ann['area'] = areas[i] - ann['iscrowd'] = iscrowd[i] - ann['id'] = ann_id - if 'masks' in targets: + ann["area"] = areas[i] + ann["iscrowd"] = iscrowd[i] + ann["id"] = ann_id + if "masks" in targets: ann["segmentation"] = coco_mask.encode(masks[i].numpy()) - if 'keypoints' in targets: - ann['keypoints'] = keypoints[i] - ann['num_keypoints'] = sum(k != 0 for k in keypoints[i][2::3]) - dataset['annotations'].append(ann) + if "keypoints" in targets: + ann["keypoints"] = keypoints[i] + ann["num_keypoints"] = sum(k != 0 for k in keypoints[i][2::3]) + dataset["annotations"].append(ann) ann_id += 1 - dataset['categories'] = [{'id': i} for i in sorted(categories)] + dataset["categories"] = [{"id": i} for i in sorted(categories)] coco_ds.dataset = dataset coco_ds.createIndex() return coco_ds def get_coco_api_from_dataset(dataset): + # FIXME: This is... awful? for _ in range(10): if isinstance(dataset, torchvision.datasets.CocoDetection): break @@ -208,11 +185,11 @@ def get_coco_api_from_dataset(dataset): class CocoDetection(torchvision.datasets.CocoDetection): def __init__(self, img_folder, ann_file, transforms): - super(CocoDetection, self).__init__(img_folder, ann_file) + super().__init__(img_folder, ann_file) self._transforms = transforms def __getitem__(self, idx): - img, target = super(CocoDetection, self).__getitem__(idx) + img, target = super().__getitem__(idx) image_id = self.ids[idx] target = dict(image_id=image_id, annotations=target) if self._transforms is not None: @@ -220,7 +197,7 @@ def __getitem__(self, idx): return img, target -def get_coco(root, image_set, transforms, mode='instances'): +def get_coco(root, image_set, transforms, mode="instances", use_v2=False, with_masks=False): anno_file_template = "{}_{}2017.json" PATHS = { "train": ("train2017", os.path.join("annotations", anno_file_template.format(mode, "train"))), @@ -228,17 +205,26 @@ def get_coco(root, image_set, transforms, mode='instances'): # "train": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))) } - t = [ConvertCocoPolysToMask()] - - if transforms is not None: - t.append(transforms) - transforms = T.Compose(t) - img_folder, ann_file = PATHS[image_set] img_folder = os.path.join(root, img_folder) ann_file = os.path.join(root, ann_file) - dataset = CocoDetection(img_folder, ann_file, transforms=transforms) + if use_v2: + from torchvision.datasets import wrap_dataset_for_transforms_v2 + + dataset = torchvision.datasets.CocoDetection(img_folder, ann_file, transforms=transforms) + target_keys = ["boxes", "labels", "image_id"] + if with_masks: + target_keys += ["masks"] + dataset = wrap_dataset_for_transforms_v2(dataset, target_keys=target_keys) + else: + # TODO: handle with_masks for V1? + t = [ConvertCocoPolysToMask()] + if transforms is not None: + t.append(transforms) + transforms = T.Compose(t) + + dataset = CocoDetection(img_folder, ann_file, transforms=transforms) if image_set == "train": dataset = _coco_remove_images_without_annotations(dataset) @@ -246,7 +232,3 @@ def get_coco(root, image_set, transforms, mode='instances'): # dataset = torch.utils.data.Subset(dataset, [i for i in range(500)]) return dataset - - -def get_coco_kp(root, image_set, transforms): - return get_coco(root, image_set, transforms, mode="person_keypoints") diff --git a/references/detection/engine.py b/references/detection/engine.py index 68c39a4fc1b..0e9bfffdf8a 100644 --- a/references/detection/engine.py +++ b/references/detection/engine.py @@ -1,35 +1,35 @@ import math import sys import time -import torch +import torch import torchvision.models.detection.mask_rcnn - -from coco_utils import get_coco_api_from_dataset -from coco_eval import CocoEvaluator import utils +from coco_eval import CocoEvaluator +from coco_utils import get_coco_api_from_dataset -def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq): +def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq, scaler=None): model.train() metric_logger = utils.MetricLogger(delimiter=" ") - metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) - header = 'Epoch: [{}]'.format(epoch) + metric_logger.add_meter("lr", utils.SmoothedValue(window_size=1, fmt="{value:.6f}")) + header = f"Epoch: [{epoch}]" lr_scheduler = None if epoch == 0: - warmup_factor = 1. / 1000 + warmup_factor = 1.0 / 1000 warmup_iters = min(1000, len(data_loader) - 1) - lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) + lr_scheduler = torch.optim.lr_scheduler.LinearLR( + optimizer, start_factor=warmup_factor, total_iters=warmup_iters + ) for images, targets in metric_logger.log_every(data_loader, print_freq, header): images = list(image.to(device) for image in images) - targets = [{k: v.to(device) for k, v in t.items()} for t in targets] - - loss_dict = model(images, targets) - - losses = sum(loss for loss in loss_dict.values()) + targets = [{k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in t.items()} for t in targets] + with torch.cuda.amp.autocast(enabled=scaler is not None): + loss_dict = model(images, targets) + losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = utils.reduce_dict(loss_dict) @@ -38,13 +38,18 @@ def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq): loss_value = losses_reduced.item() if not math.isfinite(loss_value): - print("Loss is {}, stopping training".format(loss_value)) + print(f"Loss is {loss_value}, stopping training") print(loss_dict_reduced) sys.exit(1) optimizer.zero_grad() - losses.backward() - optimizer.step() + if scaler is not None: + scaler.scale(losses).backward() + scaler.step(optimizer) + scaler.update() + else: + losses.backward() + optimizer.step() if lr_scheduler is not None: lr_scheduler.step() @@ -52,6 +57,8 @@ def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq): metric_logger.update(loss=losses_reduced, **loss_dict_reduced) metric_logger.update(lr=optimizer.param_groups[0]["lr"]) + return metric_logger + def _get_iou_types(model): model_without_ddp = model @@ -65,7 +72,7 @@ def _get_iou_types(model): return iou_types -@torch.no_grad() +@torch.inference_mode() def evaluate(model, data_loader, device): n_threads = torch.get_num_threads() # FIXME remove this and make paste_masks_in_image run on the GPU @@ -73,24 +80,24 @@ def evaluate(model, data_loader, device): cpu_device = torch.device("cpu") model.eval() metric_logger = utils.MetricLogger(delimiter=" ") - header = 'Test:' + header = "Test:" coco = get_coco_api_from_dataset(data_loader.dataset) iou_types = _get_iou_types(model) coco_evaluator = CocoEvaluator(coco, iou_types) - for image, targets in metric_logger.log_every(data_loader, 100, header): - image = list(img.to(device) for img in image) - targets = [{k: v.to(device) for k, v in t.items()} for t in targets] + for images, targets in metric_logger.log_every(data_loader, 100, header): + images = list(img.to(device) for img in images) - torch.cuda.synchronize() + if torch.cuda.is_available(): + torch.cuda.synchronize() model_time = time.time() - outputs = model(image) + outputs = model(images) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] model_time = time.time() - model_time - res = {target["image_id"].item(): output for target, output in zip(targets, outputs)} + res = {target["image_id"]: output for target, output in zip(targets, outputs)} evaluator_time = time.time() coco_evaluator.update(res) evaluator_time = time.time() - evaluator_time diff --git a/references/detection/group_by_aspect_ratio.py b/references/detection/group_by_aspect_ratio.py index 61694cd63a4..d12e14b540c 100644 --- a/references/detection/group_by_aspect_ratio.py +++ b/references/detection/group_by_aspect_ratio.py @@ -1,15 +1,22 @@ import bisect -from collections import defaultdict import copy -import numpy as np +import math +from collections import defaultdict +from itertools import chain, repeat +import numpy as np import torch import torch.utils.data +import torchvision +from PIL import Image from torch.utils.data.sampler import BatchSampler, Sampler from torch.utils.model_zoo import tqdm -import torchvision -from PIL import Image + +def _repeat_to_at_least(iterable, n): + repeat_times = math.ceil(n / len(iterable)) + repeated = chain.from_iterable(repeat(iterable, repeat_times)) + return list(repeated) class GroupedBatchSampler(BatchSampler): @@ -18,7 +25,7 @@ class GroupedBatchSampler(BatchSampler): It enforces that the batch only contain elements from the same group. It also tries to provide mini-batches which follows an ordering which is as close as possible to the ordering from the original sampler. - Arguments: + Args: sampler (Sampler): Base sampler. group_ids (list[int]): If the sampler produces indices in range [0, N), `group_ids` must be a list of `N` ints which contains the group id of each sample. @@ -26,12 +33,10 @@ class GroupedBatchSampler(BatchSampler): 0, i.e. they must be in the range [0, num_groups). batch_size (int): Size of mini-batch. """ + def __init__(self, sampler, group_ids, batch_size): if not isinstance(sampler, Sampler): - raise ValueError( - "sampler should be an instance of " - "torch.utils.data.Sampler, but got sampler={}".format(sampler) - ) + raise ValueError(f"sampler should be an instance of torch.utils.data.Sampler, but got sampler={sampler}") self.sampler = sampler self.group_ids = group_ids self.batch_size = batch_size @@ -58,13 +63,12 @@ def __iter__(self): expected_num_batches = len(self) num_remaining = expected_num_batches - num_batches if num_remaining > 0: - # for the remaining batches, take first the buffers with largest number + # for the remaining batches, take first the buffers with the largest number # of elements - for group_id, _ in sorted(buffer_per_group.items(), - key=lambda x: len(x[1]), reverse=True): + for group_id, _ in sorted(buffer_per_group.items(), key=lambda x: len(x[1]), reverse=True): remaining = self.batch_size - len(buffer_per_group[group_id]) - buffer_per_group[group_id].extend( - samples_per_group[group_id][:remaining]) + samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining) + buffer_per_group[group_id].extend(samples_from_group_id[:remaining]) assert len(buffer_per_group[group_id]) == self.batch_size yield buffer_per_group[group_id] num_remaining -= 1 @@ -77,10 +81,12 @@ def __len__(self): def _compute_aspect_ratios_slow(dataset, indices=None): - print("Your dataset doesn't support the fast path for " - "computing the aspect ratios, so will iterate over " - "the full dataset and load every image instead. " - "This might take some time...") + print( + "Your dataset doesn't support the fast path for " + "computing the aspect ratios, so will iterate over " + "the full dataset and load every image instead. " + "This might take some time..." + ) if indices is None: indices = range(len(dataset)) @@ -96,9 +102,12 @@ def __len__(self): sampler = SubsetSampler(indices) data_loader = torch.utils.data.DataLoader( - dataset, batch_size=1, sampler=sampler, + dataset, + batch_size=1, + sampler=sampler, num_workers=14, # you might want to increase it for faster processing - collate_fn=lambda x: x[0]) + collate_fn=lambda x: x[0], + ) aspect_ratios = [] with tqdm(total=len(dataset)) as pbar: for _i, (img, _) in enumerate(data_loader): @@ -182,6 +191,6 @@ def create_aspect_ratio_groups(dataset, k=0): # count number of elements per group counts = np.unique(groups, return_counts=True)[1] fbins = [0] + bins + [np.inf] - print("Using {} as bins for aspect ratio quantization".format(fbins)) - print("Count of instances per bin: {}".format(counts)) + print(f"Using {fbins} as bins for aspect ratio quantization") + print(f"Count of instances per bin: {counts}") return groups diff --git a/references/detection/presets.py b/references/detection/presets.py new file mode 100644 index 00000000000..e9b6d56c886 --- /dev/null +++ b/references/detection/presets.py @@ -0,0 +1,114 @@ +from collections import defaultdict + +import torch +import transforms as reference_transforms + + +def get_modules(use_v2): + # We need a protected import to avoid the V2 warning in case just V1 is used + if use_v2: + import torchvision.transforms.v2 + import torchvision.tv_tensors + + return torchvision.transforms.v2, torchvision.tv_tensors + else: + return reference_transforms, None + + +class DetectionPresetTrain: + # Note: this transform assumes that the input to forward() are always PIL + # images, regardless of the backend parameter. + def __init__( + self, + *, + data_augmentation, + hflip_prob=0.5, + mean=(123.0, 117.0, 104.0), + backend="pil", + use_v2=False, + ): + + T, tv_tensors = get_modules(use_v2) + + transforms = [] + backend = backend.lower() + if backend == "tv_tensor": + transforms.append(T.ToImage()) + elif backend == "tensor": + transforms.append(T.PILToTensor()) + elif backend != "pil": + raise ValueError(f"backend can be 'tv_tensor', 'tensor' or 'pil', but got {backend}") + + if data_augmentation == "hflip": + transforms += [T.RandomHorizontalFlip(p=hflip_prob)] + elif data_augmentation == "lsj": + transforms += [ + T.ScaleJitter(target_size=(1024, 1024), antialias=True), + # TODO: FixedSizeCrop below doesn't work on tensors! + reference_transforms.FixedSizeCrop(size=(1024, 1024), fill=mean), + T.RandomHorizontalFlip(p=hflip_prob), + ] + elif data_augmentation == "multiscale": + transforms += [ + T.RandomShortestSize(min_size=(480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800), max_size=1333), + T.RandomHorizontalFlip(p=hflip_prob), + ] + elif data_augmentation == "ssd": + fill = defaultdict(lambda: mean, {tv_tensors.Mask: 0}) if use_v2 else list(mean) + transforms += [ + T.RandomPhotometricDistort(), + T.RandomZoomOut(fill=fill), + T.RandomIoUCrop(), + T.RandomHorizontalFlip(p=hflip_prob), + ] + elif data_augmentation == "ssdlite": + transforms += [ + T.RandomIoUCrop(), + T.RandomHorizontalFlip(p=hflip_prob), + ] + else: + raise ValueError(f'Unknown data augmentation policy "{data_augmentation}"') + + if backend == "pil": + # Note: we could just convert to pure tensors even in v2. + transforms += [T.ToImage() if use_v2 else T.PILToTensor()] + + transforms += [T.ToDtype(torch.float, scale=True)] + + if use_v2: + transforms += [ + T.ConvertBoundingBoxFormat(tv_tensors.BoundingBoxFormat.XYXY), + T.SanitizeBoundingBoxes(), + T.ToPureTensor(), + ] + + self.transforms = T.Compose(transforms) + + def __call__(self, img, target): + return self.transforms(img, target) + + +class DetectionPresetEval: + def __init__(self, backend="pil", use_v2=False): + T, _ = get_modules(use_v2) + transforms = [] + backend = backend.lower() + if backend == "pil": + # Note: we could just convert to pure tensors even in v2? + transforms += [T.ToImage() if use_v2 else T.PILToTensor()] + elif backend == "tensor": + transforms += [T.PILToTensor()] + elif backend == "tv_tensor": + transforms += [T.ToImage()] + else: + raise ValueError(f"backend can be 'tv_tensor', 'tensor' or 'pil', but got {backend}") + + transforms += [T.ToDtype(torch.float, scale=True)] + + if use_v2: + transforms += [T.ToPureTensor()] + + self.transforms = T.Compose(transforms) + + def __call__(self, img, target): + return self.transforms(img, target) diff --git a/references/detection/train.py b/references/detection/train.py index 3b928611b4f..6a9ffb0af4d 100644 --- a/references/detection/train.py +++ b/references/detection/train.py @@ -8,62 +8,208 @@ The default hyperparameters are tuned for training on 8 gpus and 2 images per gpu. --lr 0.02 --batch-size 2 --world-size 8 If you use different number of gpus, the learning rate should be changed to 0.02/8*$NGPU. + +On top of that, for training Faster/Mask R-CNN, the default hyperparameters are + --epochs 26 --lr-steps 16 22 --aspect-ratio-group-factor 3 + +Also, if you train Keypoint R-CNN, the default hyperparameters are + --epochs 46 --lr-steps 36 43 --aspect-ratio-group-factor 3 +Because the number of images is smaller in the person keypoint subset of COCO, +the number of epochs should be adapted so that we have the same number of iterations. """ import datetime import os import time +import presets import torch import torch.utils.data -from torch import nn import torchvision import torchvision.models.detection import torchvision.models.detection.mask_rcnn +import utils +from coco_utils import get_coco +from engine import evaluate, train_one_epoch +from group_by_aspect_ratio import create_aspect_ratio_groups, GroupedBatchSampler +from torchvision.transforms import InterpolationMode +from transforms import SimpleCopyPaste + + +def copypaste_collate_fn(batch): + copypaste = SimpleCopyPaste(blending=True, resize_interpolation=InterpolationMode.BILINEAR) + return copypaste(*utils.collate_fn(batch)) + + +def get_dataset(is_train, args): + image_set = "train" if is_train else "val" + num_classes, mode = {"coco": (91, "instances"), "coco_kp": (2, "person_keypoints")}[args.dataset] + with_masks = "mask" in args.model + ds = get_coco( + root=args.data_path, + image_set=image_set, + transforms=get_transform(is_train, args), + mode=mode, + use_v2=args.use_v2, + with_masks=with_masks, + ) + return ds, num_classes -from coco_utils import get_coco, get_coco_kp -from group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups -from engine import train_one_epoch, evaluate +def get_transform(is_train, args): + if is_train: + return presets.DetectionPresetTrain( + data_augmentation=args.data_augmentation, backend=args.backend, use_v2=args.use_v2 + ) + elif args.weights and args.test_only: + weights = torchvision.models.get_weight(args.weights) + trans = weights.transforms() + return lambda img, target: (trans(img), target) + else: + return presets.DetectionPresetEval(backend=args.backend, use_v2=args.use_v2) -import utils -import transforms as T +def get_args_parser(add_help=True): + import argparse -def get_dataset(name, image_set, transform, data_path): - paths = { - "coco": (data_path, get_coco, 91), - "coco_kp": (data_path, get_coco_kp, 2) - } - p, ds_fn, num_classes = paths[name] + parser = argparse.ArgumentParser(description="PyTorch Detection Training", add_help=add_help) - ds = ds_fn(p, image_set=image_set, transforms=transform) - return ds, num_classes + parser.add_argument("--data-path", default="/datasets01/COCO/022719/", type=str, help="dataset path") + parser.add_argument( + "--dataset", + default="coco", + type=str, + help="dataset name. Use coco for object detection and instance segmentation and coco_kp for Keypoint detection", + ) + parser.add_argument("--model", default="maskrcnn_resnet50_fpn", type=str, help="model name") + parser.add_argument("--device", default="cuda", type=str, help="device (Use cuda or cpu Default: cuda)") + parser.add_argument( + "-b", "--batch-size", default=2, type=int, help="images per gpu, the total batch size is $NGPU x batch_size" + ) + parser.add_argument("--epochs", default=26, type=int, metavar="N", help="number of total epochs to run") + parser.add_argument( + "-j", "--workers", default=4, type=int, metavar="N", help="number of data loading workers (default: 4)" + ) + parser.add_argument("--opt", default="sgd", type=str, help="optimizer") + parser.add_argument( + "--lr", + default=0.02, + type=float, + help="initial learning rate, 0.02 is the default value for training on 8 gpus and 2 images_per_gpu", + ) + parser.add_argument("--momentum", default=0.9, type=float, metavar="M", help="momentum") + parser.add_argument( + "--wd", + "--weight-decay", + default=1e-4, + type=float, + metavar="W", + help="weight decay (default: 1e-4)", + dest="weight_decay", + ) + parser.add_argument( + "--norm-weight-decay", + default=None, + type=float, + help="weight decay for Normalization layers (default: None, same value as --wd)", + ) + parser.add_argument( + "--lr-scheduler", default="multisteplr", type=str, help="name of lr scheduler (default: multisteplr)" + ) + parser.add_argument( + "--lr-step-size", default=8, type=int, help="decrease lr every step-size epochs (multisteplr scheduler only)" + ) + parser.add_argument( + "--lr-steps", + default=[16, 22], + nargs="+", + type=int, + help="decrease lr every step-size epochs (multisteplr scheduler only)", + ) + parser.add_argument( + "--lr-gamma", default=0.1, type=float, help="decrease lr by a factor of lr-gamma (multisteplr scheduler only)" + ) + parser.add_argument("--print-freq", default=20, type=int, help="print frequency") + parser.add_argument("--output-dir", default=".", type=str, help="path to save outputs") + parser.add_argument("--resume", default="", type=str, help="path of checkpoint") + parser.add_argument("--start_epoch", default=0, type=int, help="start epoch") + parser.add_argument("--aspect-ratio-group-factor", default=3, type=int) + parser.add_argument("--rpn-score-thresh", default=None, type=float, help="rpn score threshold for faster-rcnn") + parser.add_argument( + "--trainable-backbone-layers", default=None, type=int, help="number of trainable layers of backbone" + ) + parser.add_argument( + "--data-augmentation", default="hflip", type=str, help="data augmentation policy (default: hflip)" + ) + parser.add_argument( + "--sync-bn", + dest="sync_bn", + help="Use sync batch norm", + action="store_true", + ) + parser.add_argument( + "--test-only", + dest="test_only", + help="Only test the model", + action="store_true", + ) + parser.add_argument( + "--use-deterministic-algorithms", action="store_true", help="Forces the use of deterministic algorithms only." + ) -def get_transform(train): - transforms = [] - transforms.append(T.ToTensor()) - if train: - transforms.append(T.RandomHorizontalFlip(0.5)) - return T.Compose(transforms) + # distributed training parameters + parser.add_argument("--world-size", default=1, type=int, help="number of distributed processes") + parser.add_argument("--dist-url", default="env://", type=str, help="url used to set up distributed training") + parser.add_argument("--weights", default=None, type=str, help="the weights enum name to load") + parser.add_argument("--weights-backbone", default=None, type=str, help="the backbone weights enum name to load") + + # Mixed precision training parameters + parser.add_argument("--amp", action="store_true", help="Use torch.cuda.amp for mixed precision training") + + # Use CopyPaste augmentation training parameter + parser.add_argument( + "--use-copypaste", + action="store_true", + help="Use CopyPaste data augmentation. Works only with data-augmentation='lsj'.", + ) + + parser.add_argument("--backend", default="PIL", type=str.lower, help="PIL or tensor - case insensitive") + parser.add_argument("--use-v2", action="store_true", help="Use V2 transforms") + + return parser def main(args): + if args.backend.lower() == "tv_tensor" and not args.use_v2: + raise ValueError("Use --use-v2 if you want to use the tv_tensor backend.") + if args.dataset not in ("coco", "coco_kp"): + raise ValueError(f"Dataset should be coco or coco_kp, got {args.dataset}") + if "keypoint" in args.model and args.dataset != "coco_kp": + raise ValueError("Oops, if you want Keypoint detection, set --dataset coco_kp") + if args.dataset == "coco_kp" and args.use_v2: + raise ValueError("KeyPoint detection doesn't support V2 transforms yet") + + if args.output_dir: + utils.mkdir(args.output_dir) + utils.init_distributed_mode(args) print(args) device = torch.device(args.device) + if args.use_deterministic_algorithms: + torch.use_deterministic_algorithms(True) + # Data loading code print("Loading data") - dataset, num_classes = get_dataset(args.dataset, "train", get_transform(train=True), args.data_path) - dataset_test, _ = get_dataset(args.dataset, "val", get_transform(train=False), args.data_path) + dataset, num_classes = get_dataset(is_train=True, args=args) + dataset_test, _ = get_dataset(is_train=False, args=args) print("Creating data loaders") if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) - test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test) + test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test, shuffle=False) else: train_sampler = torch.utils.data.RandomSampler(dataset) test_sampler = torch.utils.data.SequentialSampler(dataset_test) @@ -72,119 +218,117 @@ def main(args): group_ids = create_aspect_ratio_groups(dataset, k=args.aspect_ratio_group_factor) train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size) else: - train_batch_sampler = torch.utils.data.BatchSampler( - train_sampler, args.batch_size, drop_last=True) + train_batch_sampler = torch.utils.data.BatchSampler(train_sampler, args.batch_size, drop_last=True) + + train_collate_fn = utils.collate_fn + if args.use_copypaste: + if args.data_augmentation != "lsj": + raise RuntimeError("SimpleCopyPaste algorithm currently only supports the 'lsj' data augmentation policies") + + train_collate_fn = copypaste_collate_fn data_loader = torch.utils.data.DataLoader( - dataset, batch_sampler=train_batch_sampler, num_workers=args.workers, - collate_fn=utils.collate_fn) + dataset, batch_sampler=train_batch_sampler, num_workers=args.workers, collate_fn=train_collate_fn + ) data_loader_test = torch.utils.data.DataLoader( - dataset_test, batch_size=1, - sampler=test_sampler, num_workers=args.workers, - collate_fn=utils.collate_fn) + dataset_test, batch_size=1, sampler=test_sampler, num_workers=args.workers, collate_fn=utils.collate_fn + ) print("Creating model") - model = torchvision.models.detection.__dict__[args.model](num_classes=num_classes, - pretrained=args.pretrained) + kwargs = {"trainable_backbone_layers": args.trainable_backbone_layers} + if args.data_augmentation in ["multiscale", "lsj"]: + kwargs["_skip_resize"] = True + if "rcnn" in args.model: + if args.rpn_score_thresh is not None: + kwargs["rpn_score_thresh"] = args.rpn_score_thresh + model = torchvision.models.get_model( + args.model, weights=args.weights, weights_backbone=args.weights_backbone, num_classes=num_classes, **kwargs + ) model.to(device) + if args.distributed and args.sync_bn: + model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) model_without_ddp = model if args.distributed: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) model_without_ddp = model.module - params = [p for p in model.parameters() if p.requires_grad] - optimizer = torch.optim.SGD( - params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) + if args.norm_weight_decay is None: + parameters = [p for p in model.parameters() if p.requires_grad] + else: + param_groups = torchvision.ops._utils.split_normalization_params(model) + wd_groups = [args.norm_weight_decay, args.weight_decay] + parameters = [{"params": p, "weight_decay": w} for p, w in zip(param_groups, wd_groups) if p] + + opt_name = args.opt.lower() + if opt_name.startswith("sgd"): + optimizer = torch.optim.SGD( + parameters, + lr=args.lr, + momentum=args.momentum, + weight_decay=args.weight_decay, + nesterov="nesterov" in opt_name, + ) + elif opt_name == "adamw": + optimizer = torch.optim.AdamW(parameters, lr=args.lr, weight_decay=args.weight_decay) + else: + raise RuntimeError(f"Invalid optimizer {args.opt}. Only SGD and AdamW are supported.") + + scaler = torch.cuda.amp.GradScaler() if args.amp else None - # lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma) - lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma) + args.lr_scheduler = args.lr_scheduler.lower() + if args.lr_scheduler == "multisteplr": + lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma) + elif args.lr_scheduler == "cosineannealinglr": + lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs) + else: + raise RuntimeError( + f"Invalid lr scheduler '{args.lr_scheduler}'. Only MultiStepLR and CosineAnnealingLR are supported." + ) if args.resume: - checkpoint = torch.load(args.resume, map_location='cpu') - model_without_ddp.load_state_dict(checkpoint['model']) - optimizer.load_state_dict(checkpoint['optimizer']) - lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) + checkpoint = torch.load(args.resume, map_location="cpu", weights_only=True) + model_without_ddp.load_state_dict(checkpoint["model"]) + optimizer.load_state_dict(checkpoint["optimizer"]) + lr_scheduler.load_state_dict(checkpoint["lr_scheduler"]) + args.start_epoch = checkpoint["epoch"] + 1 + if args.amp: + scaler.load_state_dict(checkpoint["scaler"]) if args.test_only: + torch.backends.cudnn.deterministic = True evaluate(model, data_loader_test, device=device) return print("Start training") start_time = time.time() - for epoch in range(args.epochs): + for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) - train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq) + train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq, scaler) lr_scheduler.step() if args.output_dir: - utils.save_on_master({ - 'model': model_without_ddp.state_dict(), - 'optimizer': optimizer.state_dict(), - 'lr_scheduler': lr_scheduler.state_dict(), - 'args': args}, - os.path.join(args.output_dir, 'model_{}.pth'.format(epoch))) + checkpoint = { + "model": model_without_ddp.state_dict(), + "optimizer": optimizer.state_dict(), + "lr_scheduler": lr_scheduler.state_dict(), + "args": args, + "epoch": epoch, + } + if args.amp: + checkpoint["scaler"] = scaler.state_dict() + utils.save_on_master(checkpoint, os.path.join(args.output_dir, f"model_{epoch}.pth")) + utils.save_on_master(checkpoint, os.path.join(args.output_dir, "checkpoint.pth")) # evaluate after every epoch evaluate(model, data_loader_test, device=device) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) - print('Training time {}'.format(total_time_str)) + print(f"Training time {total_time_str}") if __name__ == "__main__": - import argparse - parser = argparse.ArgumentParser( - description=__doc__) - - parser.add_argument('--data-path', default='/datasets01/COCO/022719/', help='dataset') - parser.add_argument('--dataset', default='coco', help='dataset') - parser.add_argument('--model', default='maskrcnn_resnet50_fpn', help='model') - parser.add_argument('--device', default='cuda', help='device') - parser.add_argument('-b', '--batch-size', default=2, type=int, - help='images per gpu, the total batch size is $NGPU x batch_size') - parser.add_argument('--epochs', default=13, type=int, metavar='N', - help='number of total epochs to run') - parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', - help='number of data loading workers (default: 4)') - parser.add_argument('--lr', default=0.02, type=float, - help='initial learning rate, 0.02 is the default value for training ' - 'on 8 gpus and 2 images_per_gpu') - parser.add_argument('--momentum', default=0.9, type=float, metavar='M', - help='momentum') - parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, - metavar='W', help='weight decay (default: 1e-4)', - dest='weight_decay') - parser.add_argument('--lr-step-size', default=8, type=int, help='decrease lr every step-size epochs') - parser.add_argument('--lr-steps', default=[8, 11], nargs='+', type=int, help='decrease lr every step-size epochs') - parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma') - parser.add_argument('--print-freq', default=20, type=int, help='print frequency') - parser.add_argument('--output-dir', default='.', help='path where to save') - parser.add_argument('--resume', default='', help='resume from checkpoint') - parser.add_argument('--aspect-ratio-group-factor', default=0, type=int) - parser.add_argument( - "--test-only", - dest="test_only", - help="Only test the model", - action="store_true", - ) - parser.add_argument( - "--pretrained", - dest="pretrained", - help="Use pre-trained models from the modelzoo", - action="store_true", - ) - - # distributed training parameters - parser.add_argument('--world-size', default=1, type=int, - help='number of distributed processes') - parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training') - - args = parser.parse_args() - - if args.output_dir: - utils.mkdir(args.output_dir) - + args = get_args_parser().parse_args() main(args) diff --git a/references/detection/transforms.py b/references/detection/transforms.py index 73efc92bdef..e07ccfc9921 100644 --- a/references/detection/transforms.py +++ b/references/detection/transforms.py @@ -1,7 +1,10 @@ -import random -import torch +from typing import Dict, List, Optional, Tuple, Union -from torchvision.transforms import functional as F +import torch +import torchvision +from torch import nn, Tensor +from torchvision import ops +from torchvision.transforms import functional as F, InterpolationMode, transforms as T def _flip_coco_person_keypoints(kps, width): @@ -14,7 +17,7 @@ def _flip_coco_person_keypoints(kps, width): return flipped_data -class Compose(object): +class Compose: def __init__(self, transforms): self.transforms = transforms @@ -24,27 +27,575 @@ def __call__(self, image, target): return image, target -class RandomHorizontalFlip(object): - def __init__(self, prob): - self.prob = prob +class RandomHorizontalFlip(T.RandomHorizontalFlip): + def forward( + self, image: Tensor, target: Optional[Dict[str, Tensor]] = None + ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + if torch.rand(1) < self.p: + image = F.hflip(image) + if target is not None: + _, _, width = F.get_dimensions(image) + target["boxes"][:, [0, 2]] = width - target["boxes"][:, [2, 0]] + if "masks" in target: + target["masks"] = target["masks"].flip(-1) + if "keypoints" in target: + keypoints = target["keypoints"] + keypoints = _flip_coco_person_keypoints(keypoints, width) + target["keypoints"] = keypoints + return image, target - def __call__(self, image, target): - if random.random() < self.prob: - height, width = image.shape[-2:] - image = image.flip(-1) - bbox = target["boxes"] - bbox[:, [0, 2]] = width - bbox[:, [2, 0]] - target["boxes"] = bbox + +class PILToTensor(nn.Module): + def forward( + self, image: Tensor, target: Optional[Dict[str, Tensor]] = None + ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + image = F.pil_to_tensor(image) + return image, target + + +class ToDtype(nn.Module): + def __init__(self, dtype: torch.dtype, scale: bool = False) -> None: + super().__init__() + self.dtype = dtype + self.scale = scale + + def forward( + self, image: Tensor, target: Optional[Dict[str, Tensor]] = None + ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + if not self.scale: + return image.to(dtype=self.dtype), target + image = F.convert_image_dtype(image, self.dtype) + return image, target + + +class RandomIoUCrop(nn.Module): + def __init__( + self, + min_scale: float = 0.3, + max_scale: float = 1.0, + min_aspect_ratio: float = 0.5, + max_aspect_ratio: float = 2.0, + sampler_options: Optional[List[float]] = None, + trials: int = 40, + ): + super().__init__() + # Configuration similar to https://github.com/weiliu89/caffe/blob/ssd/examples/ssd/ssd_coco.py#L89-L174 + self.min_scale = min_scale + self.max_scale = max_scale + self.min_aspect_ratio = min_aspect_ratio + self.max_aspect_ratio = max_aspect_ratio + if sampler_options is None: + sampler_options = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0] + self.options = sampler_options + self.trials = trials + + def forward( + self, image: Tensor, target: Optional[Dict[str, Tensor]] = None + ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + if target is None: + raise ValueError("The targets can't be None for this transform.") + + if isinstance(image, torch.Tensor): + if image.ndimension() not in {2, 3}: + raise ValueError(f"image should be 2/3 dimensional. Got {image.ndimension()} dimensions.") + elif image.ndimension() == 2: + image = image.unsqueeze(0) + + _, orig_h, orig_w = F.get_dimensions(image) + + while True: + # sample an option + idx = int(torch.randint(low=0, high=len(self.options), size=(1,))) + min_jaccard_overlap = self.options[idx] + if min_jaccard_overlap >= 1.0: # a value larger than 1 encodes the leave as-is option + return image, target + + for _ in range(self.trials): + # check the aspect ratio limitations + r = self.min_scale + (self.max_scale - self.min_scale) * torch.rand(2) + new_w = int(orig_w * r[0]) + new_h = int(orig_h * r[1]) + aspect_ratio = new_w / new_h + if not (self.min_aspect_ratio <= aspect_ratio <= self.max_aspect_ratio): + continue + + # check for 0 area crops + r = torch.rand(2) + left = int((orig_w - new_w) * r[0]) + top = int((orig_h - new_h) * r[1]) + right = left + new_w + bottom = top + new_h + if left == right or top == bottom: + continue + + # check for any valid boxes with centers within the crop area + cx = 0.5 * (target["boxes"][:, 0] + target["boxes"][:, 2]) + cy = 0.5 * (target["boxes"][:, 1] + target["boxes"][:, 3]) + is_within_crop_area = (left < cx) & (cx < right) & (top < cy) & (cy < bottom) + if not is_within_crop_area.any(): + continue + + # check at least 1 box with jaccard limitations + boxes = target["boxes"][is_within_crop_area] + ious = torchvision.ops.boxes.box_iou( + boxes, torch.tensor([[left, top, right, bottom]], dtype=boxes.dtype, device=boxes.device) + ) + if ious.max() < min_jaccard_overlap: + continue + + # keep only valid boxes and perform cropping + target["boxes"] = boxes + target["labels"] = target["labels"][is_within_crop_area] + target["boxes"][:, 0::2] -= left + target["boxes"][:, 1::2] -= top + target["boxes"][:, 0::2].clamp_(min=0, max=new_w) + target["boxes"][:, 1::2].clamp_(min=0, max=new_h) + image = F.crop(image, top, left, new_h, new_w) + + return image, target + + +class RandomZoomOut(nn.Module): + def __init__( + self, fill: Optional[List[float]] = None, side_range: Tuple[float, float] = (1.0, 4.0), p: float = 0.5 + ): + super().__init__() + if fill is None: + fill = [0.0, 0.0, 0.0] + self.fill = fill + self.side_range = side_range + if side_range[0] < 1.0 or side_range[0] > side_range[1]: + raise ValueError(f"Invalid canvas side range provided {side_range}.") + self.p = p + + @torch.jit.unused + def _get_fill_value(self, is_pil): + # type: (bool) -> int + # We fake the type to make it work on JIT + return tuple(int(x) for x in self.fill) if is_pil else 0 + + def forward( + self, image: Tensor, target: Optional[Dict[str, Tensor]] = None + ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + if isinstance(image, torch.Tensor): + if image.ndimension() not in {2, 3}: + raise ValueError(f"image should be 2/3 dimensional. Got {image.ndimension()} dimensions.") + elif image.ndimension() == 2: + image = image.unsqueeze(0) + + if torch.rand(1) >= self.p: + return image, target + + _, orig_h, orig_w = F.get_dimensions(image) + + r = self.side_range[0] + torch.rand(1) * (self.side_range[1] - self.side_range[0]) + canvas_width = int(orig_w * r) + canvas_height = int(orig_h * r) + + r = torch.rand(2) + left = int((canvas_width - orig_w) * r[0]) + top = int((canvas_height - orig_h) * r[1]) + right = canvas_width - (left + orig_w) + bottom = canvas_height - (top + orig_h) + + if torch.jit.is_scripting(): + fill = 0 + else: + fill = self._get_fill_value(F._is_pil_image(image)) + + image = F.pad(image, [left, top, right, bottom], fill=fill) + if isinstance(image, torch.Tensor): + # PyTorch's pad supports only integers on fill. So we need to overwrite the colour + v = torch.tensor(self.fill, device=image.device, dtype=image.dtype).view(-1, 1, 1) + image[..., :top, :] = image[..., :, :left] = image[..., (top + orig_h) :, :] = image[ + ..., :, (left + orig_w) : + ] = v + + if target is not None: + target["boxes"][:, 0::2] += left + target["boxes"][:, 1::2] += top + + return image, target + + +class RandomPhotometricDistort(nn.Module): + def __init__( + self, + contrast: Tuple[float, float] = (0.5, 1.5), + saturation: Tuple[float, float] = (0.5, 1.5), + hue: Tuple[float, float] = (-0.05, 0.05), + brightness: Tuple[float, float] = (0.875, 1.125), + p: float = 0.5, + ): + super().__init__() + self._brightness = T.ColorJitter(brightness=brightness) + self._contrast = T.ColorJitter(contrast=contrast) + self._hue = T.ColorJitter(hue=hue) + self._saturation = T.ColorJitter(saturation=saturation) + self.p = p + + def forward( + self, image: Tensor, target: Optional[Dict[str, Tensor]] = None + ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + if isinstance(image, torch.Tensor): + if image.ndimension() not in {2, 3}: + raise ValueError(f"image should be 2/3 dimensional. Got {image.ndimension()} dimensions.") + elif image.ndimension() == 2: + image = image.unsqueeze(0) + + r = torch.rand(7) + + if r[0] < self.p: + image = self._brightness(image) + + contrast_before = r[1] < 0.5 + if contrast_before: + if r[2] < self.p: + image = self._contrast(image) + + if r[3] < self.p: + image = self._saturation(image) + + if r[4] < self.p: + image = self._hue(image) + + if not contrast_before: + if r[5] < self.p: + image = self._contrast(image) + + if r[6] < self.p: + channels, _, _ = F.get_dimensions(image) + permutation = torch.randperm(channels) + + is_pil = F._is_pil_image(image) + if is_pil: + image = F.pil_to_tensor(image) + image = F.convert_image_dtype(image) + image = image[..., permutation, :, :] + if is_pil: + image = F.to_pil_image(image) + + return image, target + + +class ScaleJitter(nn.Module): + """Randomly resizes the image and its bounding boxes within the specified scale range. + The class implements the Scale Jitter augmentation as described in the paper + `"Simple Copy-Paste is a Strong Data Augmentation Method for Instance Segmentation" `_. + + Args: + target_size (tuple of ints): The target size for the transform provided in (height, weight) format. + scale_range (tuple of ints): scaling factor interval, e.g (a, b), then scale is randomly sampled from the + range a <= scale <= b. + interpolation (InterpolationMode): Desired interpolation enum defined by + :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``. + """ + + def __init__( + self, + target_size: Tuple[int, int], + scale_range: Tuple[float, float] = (0.1, 2.0), + interpolation: InterpolationMode = InterpolationMode.BILINEAR, + antialias=True, + ): + super().__init__() + self.target_size = target_size + self.scale_range = scale_range + self.interpolation = interpolation + self.antialias = antialias + + def forward( + self, image: Tensor, target: Optional[Dict[str, Tensor]] = None + ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + if isinstance(image, torch.Tensor): + if image.ndimension() not in {2, 3}: + raise ValueError(f"image should be 2/3 dimensional. Got {image.ndimension()} dimensions.") + elif image.ndimension() == 2: + image = image.unsqueeze(0) + + _, orig_height, orig_width = F.get_dimensions(image) + + scale = self.scale_range[0] + torch.rand(1) * (self.scale_range[1] - self.scale_range[0]) + r = min(self.target_size[1] / orig_height, self.target_size[0] / orig_width) * scale + new_width = int(orig_width * r) + new_height = int(orig_height * r) + + image = F.resize(image, [new_height, new_width], interpolation=self.interpolation, antialias=self.antialias) + + if target is not None: + target["boxes"][:, 0::2] *= new_width / orig_width + target["boxes"][:, 1::2] *= new_height / orig_height if "masks" in target: - target["masks"] = target["masks"].flip(-1) - if "keypoints" in target: - keypoints = target["keypoints"] - keypoints = _flip_coco_person_keypoints(keypoints, width) - target["keypoints"] = keypoints + target["masks"] = F.resize( + target["masks"], + [new_height, new_width], + interpolation=InterpolationMode.NEAREST, + antialias=self.antialias, + ) + return image, target -class ToTensor(object): - def __call__(self, image, target): - image = F.to_tensor(image) +class FixedSizeCrop(nn.Module): + def __init__(self, size, fill=0, padding_mode="constant"): + super().__init__() + size = tuple(T._setup_size(size, error_msg="Please provide only two dimensions (h, w) for size.")) + self.crop_height = size[0] + self.crop_width = size[1] + self.fill = fill # TODO: Fill is currently respected only on PIL. Apply tensor patch. + self.padding_mode = padding_mode + + def _pad(self, img, target, padding): + # Taken from the functional_tensor.py pad + if isinstance(padding, int): + pad_left = pad_right = pad_top = pad_bottom = padding + elif len(padding) == 1: + pad_left = pad_right = pad_top = pad_bottom = padding[0] + elif len(padding) == 2: + pad_left = pad_right = padding[0] + pad_top = pad_bottom = padding[1] + else: + pad_left = padding[0] + pad_top = padding[1] + pad_right = padding[2] + pad_bottom = padding[3] + + padding = [pad_left, pad_top, pad_right, pad_bottom] + img = F.pad(img, padding, self.fill, self.padding_mode) + if target is not None: + target["boxes"][:, 0::2] += pad_left + target["boxes"][:, 1::2] += pad_top + if "masks" in target: + target["masks"] = F.pad(target["masks"], padding, 0, "constant") + + return img, target + + def _crop(self, img, target, top, left, height, width): + img = F.crop(img, top, left, height, width) + if target is not None: + boxes = target["boxes"] + boxes[:, 0::2] -= left + boxes[:, 1::2] -= top + boxes[:, 0::2].clamp_(min=0, max=width) + boxes[:, 1::2].clamp_(min=0, max=height) + + is_valid = (boxes[:, 0] < boxes[:, 2]) & (boxes[:, 1] < boxes[:, 3]) + + target["boxes"] = boxes[is_valid] + target["labels"] = target["labels"][is_valid] + if "masks" in target: + target["masks"] = F.crop(target["masks"][is_valid], top, left, height, width) + + return img, target + + def forward(self, img, target=None): + _, height, width = F.get_dimensions(img) + new_height = min(height, self.crop_height) + new_width = min(width, self.crop_width) + + if new_height != height or new_width != width: + offset_height = max(height - self.crop_height, 0) + offset_width = max(width - self.crop_width, 0) + + r = torch.rand(1) + top = int(offset_height * r) + left = int(offset_width * r) + + img, target = self._crop(img, target, top, left, new_height, new_width) + + pad_bottom = max(self.crop_height - new_height, 0) + pad_right = max(self.crop_width - new_width, 0) + if pad_bottom != 0 or pad_right != 0: + img, target = self._pad(img, target, [0, 0, pad_right, pad_bottom]) + + return img, target + + +class RandomShortestSize(nn.Module): + def __init__( + self, + min_size: Union[List[int], Tuple[int], int], + max_size: int, + interpolation: InterpolationMode = InterpolationMode.BILINEAR, + ): + super().__init__() + self.min_size = [min_size] if isinstance(min_size, int) else list(min_size) + self.max_size = max_size + self.interpolation = interpolation + + def forward( + self, image: Tensor, target: Optional[Dict[str, Tensor]] = None + ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + _, orig_height, orig_width = F.get_dimensions(image) + + min_size = self.min_size[torch.randint(len(self.min_size), (1,)).item()] + r = min(min_size / min(orig_height, orig_width), self.max_size / max(orig_height, orig_width)) + + new_width = int(orig_width * r) + new_height = int(orig_height * r) + + image = F.resize(image, [new_height, new_width], interpolation=self.interpolation) + + if target is not None: + target["boxes"][:, 0::2] *= new_width / orig_width + target["boxes"][:, 1::2] *= new_height / orig_height + if "masks" in target: + target["masks"] = F.resize( + target["masks"], [new_height, new_width], interpolation=InterpolationMode.NEAREST + ) + return image, target + + +def _copy_paste( + image: torch.Tensor, + target: Dict[str, Tensor], + paste_image: torch.Tensor, + paste_target: Dict[str, Tensor], + blending: bool = True, + resize_interpolation: F.InterpolationMode = F.InterpolationMode.BILINEAR, +) -> Tuple[torch.Tensor, Dict[str, Tensor]]: + + # Random paste targets selection: + num_masks = len(paste_target["masks"]) + + if num_masks < 1: + # Such degerante case with num_masks=0 can happen with LSJ + # Let's just return (image, target) + return image, target + + # We have to please torch script by explicitly specifying dtype as torch.long + random_selection = torch.randint(0, num_masks, (num_masks,), device=paste_image.device) + random_selection = torch.unique(random_selection).to(torch.long) + + paste_masks = paste_target["masks"][random_selection] + paste_boxes = paste_target["boxes"][random_selection] + paste_labels = paste_target["labels"][random_selection] + + masks = target["masks"] + + # We resize source and paste data if they have different sizes + # This is something we introduced here as originally the algorithm works + # on equal-sized data (for example, coming from LSJ data augmentations) + size1 = image.shape[-2:] + size2 = paste_image.shape[-2:] + if size1 != size2: + paste_image = F.resize(paste_image, size1, interpolation=resize_interpolation) + paste_masks = F.resize(paste_masks, size1, interpolation=F.InterpolationMode.NEAREST) + # resize bboxes: + ratios = torch.tensor((size1[1] / size2[1], size1[0] / size2[0]), device=paste_boxes.device) + paste_boxes = paste_boxes.view(-1, 2, 2).mul(ratios).view(paste_boxes.shape) + + paste_alpha_mask = paste_masks.sum(dim=0) > 0 + + if blending: + paste_alpha_mask = F.gaussian_blur( + paste_alpha_mask.unsqueeze(0), + kernel_size=(5, 5), + sigma=[ + 2.0, + ], + ) + + # Copy-paste images: + image = (image * (~paste_alpha_mask)) + (paste_image * paste_alpha_mask) + + # Copy-paste masks: + masks = masks * (~paste_alpha_mask) + non_all_zero_masks = masks.sum((-1, -2)) > 0 + masks = masks[non_all_zero_masks] + + # Do a shallow copy of the target dict + out_target = {k: v for k, v in target.items()} + + out_target["masks"] = torch.cat([masks, paste_masks]) + + # Copy-paste boxes and labels + boxes = ops.masks_to_boxes(masks) + out_target["boxes"] = torch.cat([boxes, paste_boxes]) + + labels = target["labels"][non_all_zero_masks] + out_target["labels"] = torch.cat([labels, paste_labels]) + + # Update additional optional keys: area and iscrowd if exist + if "area" in target: + out_target["area"] = out_target["masks"].sum((-1, -2)).to(torch.float32) + + if "iscrowd" in target and "iscrowd" in paste_target: + # target['iscrowd'] size can be differ from mask size (non_all_zero_masks) + # For example, if previous transforms geometrically modifies masks/boxes/labels but + # does not update "iscrowd" + if len(target["iscrowd"]) == len(non_all_zero_masks): + iscrowd = target["iscrowd"][non_all_zero_masks] + paste_iscrowd = paste_target["iscrowd"][random_selection] + out_target["iscrowd"] = torch.cat([iscrowd, paste_iscrowd]) + + # Check for degenerated boxes and remove them + boxes = out_target["boxes"] + degenerate_boxes = boxes[:, 2:] <= boxes[:, :2] + if degenerate_boxes.any(): + valid_targets = ~degenerate_boxes.any(dim=1) + + out_target["boxes"] = boxes[valid_targets] + out_target["masks"] = out_target["masks"][valid_targets] + out_target["labels"] = out_target["labels"][valid_targets] + + if "area" in out_target: + out_target["area"] = out_target["area"][valid_targets] + if "iscrowd" in out_target and len(out_target["iscrowd"]) == len(valid_targets): + out_target["iscrowd"] = out_target["iscrowd"][valid_targets] + + return image, out_target + + +class SimpleCopyPaste(torch.nn.Module): + def __init__(self, blending=True, resize_interpolation=F.InterpolationMode.BILINEAR): + super().__init__() + self.resize_interpolation = resize_interpolation + self.blending = blending + + def forward( + self, images: List[torch.Tensor], targets: List[Dict[str, Tensor]] + ) -> Tuple[List[torch.Tensor], List[Dict[str, Tensor]]]: + torch._assert( + isinstance(images, (list, tuple)) and all([isinstance(v, torch.Tensor) for v in images]), + "images should be a list of tensors", + ) + torch._assert( + isinstance(targets, (list, tuple)) and len(images) == len(targets), + "targets should be a list of the same size as images", + ) + for target in targets: + # Can not check for instance type dict with inside torch.jit.script + # torch._assert(isinstance(target, dict), "targets item should be a dict") + for k in ["masks", "boxes", "labels"]: + torch._assert(k in target, f"Key {k} should be present in targets") + torch._assert(isinstance(target[k], torch.Tensor), f"Value for the key {k} should be a tensor") + + # images = [t1, t2, ..., tN] + # Let's define paste_images as shifted list of input images + # paste_images = [t2, t3, ..., tN, t1] + # FYI: in TF they mix data on the dataset level + images_rolled = images[-1:] + images[:-1] + targets_rolled = targets[-1:] + targets[:-1] + + output_images: List[torch.Tensor] = [] + output_targets: List[Dict[str, Tensor]] = [] + + for image, target, paste_image, paste_target in zip(images, targets, images_rolled, targets_rolled): + output_image, output_data = _copy_paste( + image, + target, + paste_image, + paste_target, + blending=self.blending, + resize_interpolation=self.resize_interpolation, + ) + output_images.append(output_image) + output_targets.append(output_data) + + return output_images, output_targets + + def __repr__(self) -> str: + s = f"{self.__class__.__name__}(blending={self.blending}, resize_interpolation={self.resize_interpolation})" + return s diff --git a/references/detection/utils.py b/references/detection/utils.py index 0e8e8560118..f73915580f7 100644 --- a/references/detection/utils.py +++ b/references/detection/utils.py @@ -1,18 +1,14 @@ -from __future__ import print_function - -from collections import defaultdict, deque import datetime -import pickle +import errno +import os import time +from collections import defaultdict, deque import torch import torch.distributed as dist -import errno -import os - -class SmoothedValue(object): +class SmoothedValue: """Track a series of values and provide access to smoothed values over a window or the global series average. """ @@ -36,7 +32,7 @@ def synchronize_between_processes(self): """ if not is_dist_avail_and_initialized(): return - t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda') + t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda") dist.barrier() dist.all_reduce(t) t = t.tolist() @@ -67,11 +63,8 @@ def value(self): def __str__(self): return self.fmt.format( - median=self.median, - avg=self.avg, - global_avg=self.global_avg, - max=self.max, - value=self.value) + median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value + ) def all_gather(data): @@ -85,35 +78,8 @@ def all_gather(data): world_size = get_world_size() if world_size == 1: return [data] - - # serialized to a Tensor - buffer = pickle.dumps(data) - storage = torch.ByteStorage.from_buffer(buffer) - tensor = torch.ByteTensor(storage).to("cuda") - - # obtain Tensor size of each rank - local_size = torch.tensor([tensor.numel()], device="cuda") - size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)] - dist.all_gather(size_list, local_size) - size_list = [int(size.item()) for size in size_list] - max_size = max(size_list) - - # receiving Tensor from all ranks - # we pad the tensor because torch all_gather does not support - # gathering tensors of different shapes - tensor_list = [] - for _ in size_list: - tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda")) - if local_size != max_size: - padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda") - tensor = torch.cat((tensor, padding), dim=0) - dist.all_gather(tensor_list, tensor) - - data_list = [] - for size, tensor in zip(size_list, tensor_list): - buffer = tensor.cpu().numpy().tobytes()[:size] - data_list.append(pickle.loads(buffer)) - + data_list = [None] * world_size + dist.all_gather_object(data_list, data) return data_list @@ -129,7 +95,7 @@ def reduce_dict(input_dict, average=True): world_size = get_world_size() if world_size < 2: return input_dict - with torch.no_grad(): + with torch.inference_mode(): names = [] values = [] # sort the keys so that they are consistent across processes @@ -144,7 +110,7 @@ def reduce_dict(input_dict, average=True): return reduced_dict -class MetricLogger(object): +class MetricLogger: def __init__(self, delimiter="\t"): self.meters = defaultdict(SmoothedValue) self.delimiter = delimiter @@ -161,15 +127,12 @@ def __getattr__(self, attr): return self.meters[attr] if attr in self.__dict__: return self.__dict__[attr] - raise AttributeError("'{}' object has no attribute '{}'".format( - type(self).__name__, attr)) + raise AttributeError(f"'{type(self).__name__}' object has no attribute '{attr}'") def __str__(self): loss_str = [] for name, meter in self.meters.items(): - loss_str.append( - "{}: {}".format(name, str(meter)) - ) + loss_str.append(f"{name}: {str(meter)}") return self.delimiter.join(loss_str) def synchronize_between_processes(self): @@ -182,31 +145,28 @@ def add_meter(self, name, meter): def log_every(self, iterable, print_freq, header=None): i = 0 if not header: - header = '' + header = "" start_time = time.time() end = time.time() - iter_time = SmoothedValue(fmt='{avg:.4f}') - data_time = SmoothedValue(fmt='{avg:.4f}') - space_fmt = ':' + str(len(str(len(iterable)))) + 'd' + iter_time = SmoothedValue(fmt="{avg:.4f}") + data_time = SmoothedValue(fmt="{avg:.4f}") + space_fmt = ":" + str(len(str(len(iterable)))) + "d" if torch.cuda.is_available(): - log_msg = self.delimiter.join([ - header, - '[{0' + space_fmt + '}/{1}]', - 'eta: {eta}', - '{meters}', - 'time: {time}', - 'data: {data}', - 'max mem: {memory:.0f}' - ]) + log_msg = self.delimiter.join( + [ + header, + "[{0" + space_fmt + "}/{1}]", + "eta: {eta}", + "{meters}", + "time: {time}", + "data: {data}", + "max mem: {memory:.0f}", + ] + ) else: - log_msg = self.delimiter.join([ - header, - '[{0' + space_fmt + '}/{1}]', - 'eta: {eta}', - '{meters}', - 'time: {time}', - 'data: {data}' - ]) + log_msg = self.delimiter.join( + [header, "[{0" + space_fmt + "}/{1}]", "eta: {eta}", "{meters}", "time: {time}", "data: {data}"] + ) MB = 1024.0 * 1024.0 for obj in iterable: data_time.update(time.time() - end) @@ -216,39 +176,34 @@ def log_every(self, iterable, print_freq, header=None): eta_seconds = iter_time.global_avg * (len(iterable) - i) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if torch.cuda.is_available(): - print(log_msg.format( - i, len(iterable), eta=eta_string, - meters=str(self), - time=str(iter_time), data=str(data_time), - memory=torch.cuda.max_memory_allocated() / MB)) + print( + log_msg.format( + i, + len(iterable), + eta=eta_string, + meters=str(self), + time=str(iter_time), + data=str(data_time), + memory=torch.cuda.max_memory_allocated() / MB, + ) + ) else: - print(log_msg.format( - i, len(iterable), eta=eta_string, - meters=str(self), - time=str(iter_time), data=str(data_time))) + print( + log_msg.format( + i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time) + ) + ) i += 1 end = time.time() total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) - print('{} Total time: {} ({:.4f} s / it)'.format( - header, total_time_str, total_time / len(iterable))) + print(f"{header} Total time: {total_time_str} ({total_time / len(iterable):.4f} s / it)") def collate_fn(batch): return tuple(zip(*batch)) -def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor): - - def f(x): - if x >= warmup_iters: - return 1 - alpha = float(x) / warmup_iters - return warmup_factor * (1 - alpha) + alpha - - return torch.optim.lr_scheduler.LambdaLR(optimizer, f) - - def mkdir(path): try: os.makedirs(path) @@ -262,10 +217,11 @@ def setup_for_distributed(is_master): This function disables printing when not in master process """ import builtins as __builtin__ + builtin_print = __builtin__.print def print(*args, **kwargs): - force = kwargs.pop('force', False) + force = kwargs.pop("force", False) if is_master or force: builtin_print(*args, **kwargs) @@ -302,25 +258,25 @@ def save_on_master(*args, **kwargs): def init_distributed_mode(args): - if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: + if "RANK" in os.environ and "WORLD_SIZE" in os.environ: args.rank = int(os.environ["RANK"]) - args.world_size = int(os.environ['WORLD_SIZE']) - args.gpu = int(os.environ['LOCAL_RANK']) - elif 'SLURM_PROCID' in os.environ: - args.rank = int(os.environ['SLURM_PROCID']) + args.world_size = int(os.environ["WORLD_SIZE"]) + args.gpu = int(os.environ["LOCAL_RANK"]) + elif "SLURM_PROCID" in os.environ: + args.rank = int(os.environ["SLURM_PROCID"]) args.gpu = args.rank % torch.cuda.device_count() else: - print('Not using distributed mode') + print("Not using distributed mode") args.distributed = False return args.distributed = True torch.cuda.set_device(args.gpu) - args.dist_backend = 'nccl' - print('| distributed init (rank {}): {}'.format( - args.rank, args.dist_url), flush=True) - torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, - world_size=args.world_size, rank=args.rank) + args.dist_backend = "nccl" + print(f"| distributed init (rank {args.rank}): {args.dist_url}", flush=True) + torch.distributed.init_process_group( + backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank + ) torch.distributed.barrier() setup_for_distributed(args.rank == 0) diff --git a/references/optical_flow/README.md b/references/optical_flow/README.md new file mode 100644 index 00000000000..6ad1d4079f7 --- /dev/null +++ b/references/optical_flow/README.md @@ -0,0 +1,72 @@ +# Optical flow reference training scripts + +This folder contains reference training scripts for optical flow. +They serve as a log of how to train specific models, so as to provide baseline +training and evaluation scripts to quickly bootstrap research. + + +### RAFT Large + +The RAFT large model was trained on Flying Chairs and then on Flying Things. +Both used 8 A100 GPUs and a batch size of 2 (so effective batch size is 16). The +rest of the hyper-parameters are exactly the same as the original RAFT training +recipe from https://github.com/princeton-vl/RAFT. The original recipe trains for +100000 updates (or steps) on each dataset - this corresponds to about 72 and 20 +epochs on Chairs and Things respectively: + +``` +num_epochs = ceil(num_steps / number_of_steps_per_epoch) + = ceil(num_steps / (num_samples / effective_batch_size)) +``` + +``` +torchrun --nproc_per_node 8 --nnodes 1 train.py \ + --dataset-root $dataset_root \ + --name $name_chairs \ + --model raft_large \ + --train-dataset chairs \ + --batch-size 2 \ + --lr 0.0004 \ + --weight-decay 0.0001 \ + --epochs 72 \ + --output-dir $chairs_dir +``` + +``` +torchrun --nproc_per_node 8 --nnodes 1 train.py \ + --dataset-root $dataset_root \ + --name $name_things \ + --model raft_large \ + --train-dataset things \ + --batch-size 2 \ + --lr 0.000125 \ + --weight-decay 0.0001 \ + --epochs 20 \ + --freeze-batch-norm \ + --output-dir $things_dir\ + --resume $chairs_dir/$name_chairs.pth +``` + + +### Evaluation + +``` +torchrun --nproc_per_node 1 --nnodes 1 train.py --val-dataset sintel --batch-size 1 --dataset-root $dataset_root --model raft_large --weights Raft_Large_Weights.C_T_SKHT_V2 +``` + +This should give an epe of about 1.3822 on the clean pass and 2.7161 on the +final pass of Sintel-train. Results may vary slightly depending on the batch +size and the number of GPUs. For the most accurate results use 1 GPU and +`--batch-size 1`: + +``` +Sintel val clean epe: 1.3822 1px: 0.9028 3px: 0.9573 5px: 0.9697 per_image_epe: 1.3822 f1: 4.0248 +Sintel val final epe: 2.7161 1px: 0.8528 3px: 0.9204 5px: 0.9392 per_image_epe: 2.7161 f1: 7.5964 +``` + +You can also evaluate on Kitti train: + +``` +torchrun --nproc_per_node 1 --nnodes 1 train.py --val-dataset kitti --batch-size 1 --dataset-root $dataset_root --model raft_large --weights Raft_Large_Weights.C_T_SKHT_V2 +Kitti val epe: 4.7968 1px: 0.6388 3px: 0.8197 5px: 0.8661 per_image_epe: 4.5118 f1: 16.0679 +``` diff --git a/references/optical_flow/presets.py b/references/optical_flow/presets.py new file mode 100644 index 00000000000..32d9542e692 --- /dev/null +++ b/references/optical_flow/presets.py @@ -0,0 +1,65 @@ +import torch +import transforms as T + + +class OpticalFlowPresetEval(torch.nn.Module): + def __init__(self): + super().__init__() + + self.transforms = T.Compose( + [ + T.PILToTensor(), + T.ConvertImageDtype(torch.float32), + T.Normalize(mean=0.5, std=0.5), # map [0, 1] into [-1, 1] + T.ValidateModelInput(), + ] + ) + + def forward(self, img1, img2, flow, valid): + return self.transforms(img1, img2, flow, valid) + + +class OpticalFlowPresetTrain(torch.nn.Module): + def __init__( + self, + *, + # RandomResizeAndCrop params + crop_size, + min_scale=-0.2, + max_scale=0.5, + stretch_prob=0.8, + # AsymmetricColorJitter params + brightness=0.4, + contrast=0.4, + saturation=0.4, + hue=0.5 / 3.14, + # Random[H,V]Flip params + asymmetric_jitter_prob=0.2, + do_flip=True, + ): + super().__init__() + + transforms = [ + T.PILToTensor(), + T.AsymmetricColorJitter( + brightness=brightness, contrast=contrast, saturation=saturation, hue=hue, p=asymmetric_jitter_prob + ), + T.RandomResizeAndCrop( + crop_size=crop_size, min_scale=min_scale, max_scale=max_scale, stretch_prob=stretch_prob + ), + ] + + if do_flip: + transforms += [T.RandomHorizontalFlip(p=0.5), T.RandomVerticalFlip(p=0.1)] + + transforms += [ + T.ConvertImageDtype(torch.float32), + T.Normalize(mean=0.5, std=0.5), # map [0, 1] into [-1, 1] + T.RandomErasing(max_erase=2), + T.MakeValidFlowMask(), + T.ValidateModelInput(), + ] + self.transforms = T.Compose(transforms) + + def forward(self, img1, img2, flow, valid): + return self.transforms(img1, img2, flow, valid) diff --git a/references/optical_flow/train.py b/references/optical_flow/train.py new file mode 100644 index 00000000000..7012ea6f810 --- /dev/null +++ b/references/optical_flow/train.py @@ -0,0 +1,389 @@ +import argparse +import warnings +from math import ceil +from pathlib import Path + +import torch +import torchvision.models.optical_flow +import utils +from presets import OpticalFlowPresetEval, OpticalFlowPresetTrain +from torchvision.datasets import FlyingChairs, FlyingThings3D, HD1K, KittiFlow, Sintel + + +def get_train_dataset(stage, dataset_root): + if stage == "chairs": + transforms = OpticalFlowPresetTrain(crop_size=(368, 496), min_scale=0.1, max_scale=1.0, do_flip=True) + return FlyingChairs(root=dataset_root, split="train", transforms=transforms) + elif stage == "things": + transforms = OpticalFlowPresetTrain(crop_size=(400, 720), min_scale=-0.4, max_scale=0.8, do_flip=True) + return FlyingThings3D(root=dataset_root, split="train", pass_name="both", transforms=transforms) + elif stage == "sintel_SKH": # S + K + H as from paper + crop_size = (368, 768) + transforms = OpticalFlowPresetTrain(crop_size=crop_size, min_scale=-0.2, max_scale=0.6, do_flip=True) + + things_clean = FlyingThings3D(root=dataset_root, split="train", pass_name="clean", transforms=transforms) + sintel = Sintel(root=dataset_root, split="train", pass_name="both", transforms=transforms) + + kitti_transforms = OpticalFlowPresetTrain(crop_size=crop_size, min_scale=-0.3, max_scale=0.5, do_flip=True) + kitti = KittiFlow(root=dataset_root, split="train", transforms=kitti_transforms) + + hd1k_transforms = OpticalFlowPresetTrain(crop_size=crop_size, min_scale=-0.5, max_scale=0.2, do_flip=True) + hd1k = HD1K(root=dataset_root, split="train", transforms=hd1k_transforms) + + # As future improvement, we could probably be using a distributed sampler here + # The distribution is S(.71), T(.135), K(.135), H(.02) + return 100 * sintel + 200 * kitti + 5 * hd1k + things_clean + elif stage == "kitti": + transforms = OpticalFlowPresetTrain( + # resize and crop params + crop_size=(288, 960), + min_scale=-0.2, + max_scale=0.4, + stretch_prob=0, + # flip params + do_flip=False, + # jitter params + brightness=0.3, + contrast=0.3, + saturation=0.3, + hue=0.3 / 3.14, + asymmetric_jitter_prob=0, + ) + return KittiFlow(root=dataset_root, split="train", transforms=transforms) + else: + raise ValueError(f"Unknown stage {stage}") + + +@torch.no_grad() +def _evaluate(model, args, val_dataset, *, padder_mode, num_flow_updates=None, batch_size=None, header=None): + """Helper function to compute various metrics (epe, etc.) for a model on a given dataset. + + We process as many samples as possible with ddp, and process the rest on a single worker. + """ + batch_size = batch_size or args.batch_size + device = torch.device(args.device) + + model.eval() + + if args.distributed: + sampler = torch.utils.data.distributed.DistributedSampler(val_dataset, shuffle=False, drop_last=True) + else: + sampler = torch.utils.data.SequentialSampler(val_dataset) + + val_loader = torch.utils.data.DataLoader( + val_dataset, + sampler=sampler, + batch_size=batch_size, + pin_memory=True, + num_workers=args.workers, + ) + + num_flow_updates = num_flow_updates or args.num_flow_updates + + def inner_loop(blob): + if blob[0].dim() == 3: + # input is not batched, so we add an extra dim for consistency + blob = [x[None, :, :, :] if x is not None else None for x in blob] + + image1, image2, flow_gt = blob[:3] + valid_flow_mask = None if len(blob) == 3 else blob[-1] + + image1, image2 = image1.to(device), image2.to(device) + + padder = utils.InputPadder(image1.shape, mode=padder_mode) + image1, image2 = padder.pad(image1, image2) + + flow_predictions = model(image1, image2, num_flow_updates=num_flow_updates) + flow_pred = flow_predictions[-1] + flow_pred = padder.unpad(flow_pred).cpu() + + metrics, num_pixels_tot = utils.compute_metrics(flow_pred, flow_gt, valid_flow_mask) + + # We compute per-pixel epe (epe) and per-image epe (called f1-epe in RAFT paper). + # per-pixel epe: average epe of all pixels of all images + # per-image epe: average epe on each image independently, then average over images + for name in ("epe", "1px", "3px", "5px", "f1"): # f1 is called f1-all in paper + logger.meters[name].update(metrics[name], n=num_pixels_tot) + logger.meters["per_image_epe"].update(metrics["epe"], n=batch_size) + + logger = utils.MetricLogger() + for meter_name in ("epe", "1px", "3px", "5px", "per_image_epe", "f1"): + logger.add_meter(meter_name, fmt="{global_avg:.4f}") + + num_processed_samples = 0 + for blob in logger.log_every(val_loader, header=header, print_freq=None): + inner_loop(blob) + num_processed_samples += blob[0].shape[0] # batch size + + if args.distributed: + num_processed_samples = utils.reduce_across_processes(num_processed_samples) + print( + f"Batch-processed {num_processed_samples} / {len(val_dataset)} samples. " + "Going to process the remaining samples individually, if any." + ) + if args.rank == 0: # we only need to process the rest on a single worker + for i in range(num_processed_samples, len(val_dataset)): + inner_loop(val_dataset[i]) + + logger.synchronize_between_processes() + + print(header, logger) + + +def evaluate(model, args): + val_datasets = args.val_dataset or [] + + if args.weights and args.test_only: + weights = torchvision.models.get_weight(args.weights) + trans = weights.transforms() + + def preprocessing(img1, img2, flow, valid_flow_mask): + img1, img2 = trans(img1, img2) + if flow is not None and not isinstance(flow, torch.Tensor): + flow = torch.from_numpy(flow) + if valid_flow_mask is not None and not isinstance(valid_flow_mask, torch.Tensor): + valid_flow_mask = torch.from_numpy(valid_flow_mask) + return img1, img2, flow, valid_flow_mask + + else: + preprocessing = OpticalFlowPresetEval() + + for name in val_datasets: + if name == "kitti": + # Kitti has different image sizes, so we need to individually pad them, we can't batch. + # see comment in InputPadder + if args.batch_size != 1 and (not args.distributed or args.rank == 0): + warnings.warn( + f"Batch-size={args.batch_size} was passed. For technical reasons, evaluating on Kitti can only be done with a batch-size of 1." + ) + + val_dataset = KittiFlow(root=args.dataset_root, split="train", transforms=preprocessing) + _evaluate( + model, args, val_dataset, num_flow_updates=24, padder_mode="kitti", header="Kitti val", batch_size=1 + ) + elif name == "sintel": + for pass_name in ("clean", "final"): + val_dataset = Sintel( + root=args.dataset_root, split="train", pass_name=pass_name, transforms=preprocessing + ) + _evaluate( + model, + args, + val_dataset, + num_flow_updates=32, + padder_mode="sintel", + header=f"Sintel val {pass_name}", + ) + else: + warnings.warn(f"Can't validate on {val_dataset}, skipping.") + + +def train_one_epoch(model, optimizer, scheduler, train_loader, logger, args): + device = torch.device(args.device) + for data_blob in logger.log_every(train_loader): + + optimizer.zero_grad() + + image1, image2, flow_gt, valid_flow_mask = (x.to(device) for x in data_blob) + flow_predictions = model(image1, image2, num_flow_updates=args.num_flow_updates) + + loss = utils.sequence_loss(flow_predictions, flow_gt, valid_flow_mask, args.gamma) + metrics, _ = utils.compute_metrics(flow_predictions[-1], flow_gt, valid_flow_mask) + + metrics.pop("f1") + logger.update(loss=loss, **metrics) + + loss.backward() + + torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1) + + optimizer.step() + scheduler.step() + + +def main(args): + utils.setup_ddp(args) + args.test_only = args.train_dataset is None + + if args.distributed and args.device == "cpu": + raise ValueError("The device must be cuda if we want to run in distributed mode using torchrun") + device = torch.device(args.device) + + if args.use_deterministic_algorithms: + torch.backends.cudnn.benchmark = False + torch.use_deterministic_algorithms(True) + else: + torch.backends.cudnn.benchmark = True + + model = torchvision.models.get_model(args.model, weights=args.weights) + + if args.distributed: + model = model.to(args.local_rank) + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank]) + model_without_ddp = model.module + else: + model.to(device) + model_without_ddp = model + + if args.resume is not None: + checkpoint = torch.load(args.resume, map_location="cpu", weights_only=True) + model_without_ddp.load_state_dict(checkpoint["model"]) + + if args.test_only: + # Set deterministic CUDNN algorithms, since they can affect epe a fair bit. + torch.backends.cudnn.benchmark = False + torch.backends.cudnn.deterministic = True + evaluate(model, args) + return + + print(f"Parameter Count: {sum(p.numel() for p in model.parameters() if p.requires_grad)}") + + train_dataset = get_train_dataset(args.train_dataset, args.dataset_root) + + optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay, eps=args.adamw_eps) + + scheduler = torch.optim.lr_scheduler.OneCycleLR( + optimizer=optimizer, + max_lr=args.lr, + epochs=args.epochs, + steps_per_epoch=ceil(len(train_dataset) / (args.world_size * args.batch_size)), + pct_start=0.05, + cycle_momentum=False, + anneal_strategy="linear", + ) + + if args.resume is not None: + optimizer.load_state_dict(checkpoint["optimizer"]) + scheduler.load_state_dict(checkpoint["scheduler"]) + args.start_epoch = checkpoint["epoch"] + 1 + else: + args.start_epoch = 0 + + torch.backends.cudnn.benchmark = True + + model.train() + if args.freeze_batch_norm: + utils.freeze_batch_norm(model.module) + + if args.distributed: + sampler = torch.utils.data.distributed.DistributedSampler(train_dataset, shuffle=True, drop_last=True) + else: + sampler = torch.utils.data.RandomSampler(train_dataset) + + train_loader = torch.utils.data.DataLoader( + train_dataset, + sampler=sampler, + batch_size=args.batch_size, + pin_memory=True, + num_workers=args.workers, + ) + + logger = utils.MetricLogger() + + done = False + for epoch in range(args.start_epoch, args.epochs): + print(f"EPOCH {epoch}") + if args.distributed: + # needed on distributed mode, otherwise the data loading order would be the same for all epochs + sampler.set_epoch(epoch) + + train_one_epoch( + model=model, + optimizer=optimizer, + scheduler=scheduler, + train_loader=train_loader, + logger=logger, + args=args, + ) + + # Note: we don't sync the SmoothedValues across processes, so the printed metrics are just those of rank 0 + print(f"Epoch {epoch} done. ", logger) + + if not args.distributed or args.rank == 0: + checkpoint = { + "model": model_without_ddp.state_dict(), + "optimizer": optimizer.state_dict(), + "scheduler": scheduler.state_dict(), + "epoch": epoch, + "args": args, + } + torch.save(checkpoint, Path(args.output_dir) / f"{args.name}_{epoch}.pth") + torch.save(checkpoint, Path(args.output_dir) / f"{args.name}.pth") + + if epoch % args.val_freq == 0 or done: + evaluate(model, args) + model.train() + if args.freeze_batch_norm: + utils.freeze_batch_norm(model.module) + + +def get_args_parser(add_help=True): + parser = argparse.ArgumentParser(add_help=add_help, description="Train or evaluate an optical-flow model.") + parser.add_argument( + "--name", + default="raft", + type=str, + help="The name of the experiment - determines the name of the files where weights are saved.", + ) + parser.add_argument("--output-dir", default=".", type=str, help="Output dir where checkpoints will be stored.") + parser.add_argument( + "--resume", + type=str, + help="A path to previously saved weights. Used to re-start training from, or evaluate a pre-saved model.", + ) + + parser.add_argument("--workers", type=int, default=12, help="Number of workers for the data loading part.") + + parser.add_argument( + "--train-dataset", + type=str, + help="The dataset to use for training. If not passed, only validation is performed (and you probably want to pass --resume).", + ) + parser.add_argument("--val-dataset", type=str, nargs="+", help="The dataset(s) to use for validation.") + parser.add_argument("--val-freq", type=int, default=2, help="Validate every X epochs") + parser.add_argument("--epochs", type=int, default=20, help="The total number of epochs to train.") + parser.add_argument("--batch-size", type=int, default=2) + + parser.add_argument("--lr", type=float, default=0.00002, help="Learning rate for AdamW optimizer") + parser.add_argument("--weight-decay", type=float, default=0.00005, help="Weight decay for AdamW optimizer") + parser.add_argument("--adamw-eps", type=float, default=1e-8, help="eps value for AdamW optimizer") + + parser.add_argument( + "--freeze-batch-norm", action="store_true", help="Set BatchNorm modules of the model in eval mode." + ) + + parser.add_argument( + "--model", type=str, default="raft_large", help="The name of the model to use - either raft_large or raft_small" + ) + # TODO: resume and weights should be in an exclusive arg group + + parser.add_argument( + "--num_flow_updates", + type=int, + default=12, + help="number of updates (or 'iters') in the update operator of the model.", + ) + + parser.add_argument("--gamma", type=float, default=0.8, help="exponential weighting for loss. Must be < 1.") + + parser.add_argument("--dist-url", default="env://", help="URL used to set up distributed training") + + parser.add_argument( + "--dataset-root", + help="Root folder where the datasets are stored. Will be passed as the 'root' parameter of the datasets.", + required=True, + ) + + parser.add_argument("--weights", default=None, type=str, help="the weights enum name to load.") + parser.add_argument("--device", default="cuda", type=str, help="device (Use cuda or cpu, Default: cuda)") + parser.add_argument( + "--use-deterministic-algorithms", action="store_true", help="Forces the use of deterministic algorithms only." + ) + + return parser + + +if __name__ == "__main__": + args = get_args_parser().parse_args() + Path(args.output_dir).mkdir(exist_ok=True) + main(args) diff --git a/references/optical_flow/transforms.py b/references/optical_flow/transforms.py new file mode 100644 index 00000000000..bc831a2ee52 --- /dev/null +++ b/references/optical_flow/transforms.py @@ -0,0 +1,271 @@ +import torch +import torchvision.transforms as T +import torchvision.transforms.functional as F + + +class ValidateModelInput(torch.nn.Module): + # Pass-through transform that checks the shape and dtypes to make sure the model gets what it expects + def forward(self, img1, img2, flow, valid_flow_mask): + + if not all(isinstance(arg, torch.Tensor) for arg in (img1, img2, flow, valid_flow_mask) if arg is not None): + raise TypeError("This method expects all input arguments to be of type torch.Tensor.") + if not all(arg.dtype == torch.float32 for arg in (img1, img2, flow) if arg is not None): + raise TypeError("This method expects the tensors img1, img2 and flow of be of dtype torch.float32.") + + if img1.shape != img2.shape: + raise ValueError("img1 and img2 should have the same shape.") + h, w = img1.shape[-2:] + if flow is not None and flow.shape != (2, h, w): + raise ValueError(f"flow.shape should be (2, {h}, {w}) instead of {flow.shape}") + if valid_flow_mask is not None: + if valid_flow_mask.shape != (h, w): + raise ValueError(f"valid_flow_mask.shape should be ({h}, {w}) instead of {valid_flow_mask.shape}") + if valid_flow_mask.dtype != torch.bool: + raise TypeError("valid_flow_mask should be of dtype torch.bool instead of {valid_flow_mask.dtype}") + + return img1, img2, flow, valid_flow_mask + + +class MakeValidFlowMask(torch.nn.Module): + # This transform generates a valid_flow_mask if it doesn't exist. + # The flow is considered valid if ||flow||_inf < threshold + # This is a noop for Kitti and HD1K which already come with a built-in flow mask. + def __init__(self, threshold=1000): + super().__init__() + self.threshold = threshold + + def forward(self, img1, img2, flow, valid_flow_mask): + if flow is not None and valid_flow_mask is None: + valid_flow_mask = (flow.abs() < self.threshold).all(axis=0) + return img1, img2, flow, valid_flow_mask + + +class ConvertImageDtype(torch.nn.Module): + def __init__(self, dtype): + super().__init__() + self.dtype = dtype + + def forward(self, img1, img2, flow, valid_flow_mask): + img1 = F.convert_image_dtype(img1, dtype=self.dtype) + img2 = F.convert_image_dtype(img2, dtype=self.dtype) + + img1 = img1.contiguous() + img2 = img2.contiguous() + + return img1, img2, flow, valid_flow_mask + + +class Normalize(torch.nn.Module): + def __init__(self, mean, std): + super().__init__() + self.mean = mean + self.std = std + + def forward(self, img1, img2, flow, valid_flow_mask): + img1 = F.normalize(img1, mean=self.mean, std=self.std) + img2 = F.normalize(img2, mean=self.mean, std=self.std) + + return img1, img2, flow, valid_flow_mask + + +class PILToTensor(torch.nn.Module): + # Converts all inputs to tensors + # Technically the flow and the valid mask are numpy arrays, not PIL images, but we keep that naming + # for consistency with the rest, e.g. the segmentation reference. + def forward(self, img1, img2, flow, valid_flow_mask): + img1 = F.pil_to_tensor(img1) + img2 = F.pil_to_tensor(img2) + if flow is not None: + flow = torch.from_numpy(flow) + if valid_flow_mask is not None: + valid_flow_mask = torch.from_numpy(valid_flow_mask) + + return img1, img2, flow, valid_flow_mask + + +class AsymmetricColorJitter(T.ColorJitter): + # p determines the proba of doing asymmertric vs symmetric color jittering + def __init__(self, brightness=0, contrast=0, saturation=0, hue=0, p=0.2): + super().__init__(brightness=brightness, contrast=contrast, saturation=saturation, hue=hue) + self.p = p + + def forward(self, img1, img2, flow, valid_flow_mask): + + if torch.rand(1) < self.p: + # asymmetric: different transform for img1 and img2 + img1 = super().forward(img1) + img2 = super().forward(img2) + else: + # symmetric: same transform for img1 and img2 + batch = torch.stack([img1, img2]) + batch = super().forward(batch) + img1, img2 = batch[0], batch[1] + + return img1, img2, flow, valid_flow_mask + + +class RandomErasing(T.RandomErasing): + # This only erases img2, and with an extra max_erase param + # This max_erase is needed because in the RAFT training ref does: + # 0 erasing with .5 proba + # 1 erase with .25 proba + # 2 erase with .25 proba + # and there's no accurate way to achieve this otherwise. + def __init__(self, p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0, inplace=False, max_erase=1): + super().__init__(p=p, scale=scale, ratio=ratio, value=value, inplace=inplace) + self.max_erase = max_erase + if self.max_erase <= 0: + raise ValueError("max_raise should be greater than 0") + + def forward(self, img1, img2, flow, valid_flow_mask): + if torch.rand(1) > self.p: + return img1, img2, flow, valid_flow_mask + + for _ in range(torch.randint(self.max_erase, size=(1,)).item()): + x, y, h, w, v = self.get_params(img2, scale=self.scale, ratio=self.ratio, value=[self.value]) + img2 = F.erase(img2, x, y, h, w, v, self.inplace) + + return img1, img2, flow, valid_flow_mask + + +class RandomHorizontalFlip(T.RandomHorizontalFlip): + def forward(self, img1, img2, flow, valid_flow_mask): + if torch.rand(1) > self.p: + return img1, img2, flow, valid_flow_mask + + img1 = F.hflip(img1) + img2 = F.hflip(img2) + flow = F.hflip(flow) * torch.tensor([-1, 1])[:, None, None] + if valid_flow_mask is not None: + valid_flow_mask = F.hflip(valid_flow_mask) + return img1, img2, flow, valid_flow_mask + + +class RandomVerticalFlip(T.RandomVerticalFlip): + def forward(self, img1, img2, flow, valid_flow_mask): + if torch.rand(1) > self.p: + return img1, img2, flow, valid_flow_mask + + img1 = F.vflip(img1) + img2 = F.vflip(img2) + flow = F.vflip(flow) * torch.tensor([1, -1])[:, None, None] + if valid_flow_mask is not None: + valid_flow_mask = F.vflip(valid_flow_mask) + return img1, img2, flow, valid_flow_mask + + +class RandomResizeAndCrop(torch.nn.Module): + # This transform will resize the input with a given proba, and then crop it. + # These are the reversed operations of the built-in RandomResizedCrop, + # although the order of the operations doesn't matter too much: resizing a + # crop would give the same result as cropping a resized image, up to + # interpolation artifact at the borders of the output. + # + # The reason we don't rely on RandomResizedCrop is because of a significant + # difference in the parametrization of both transforms, in particular, + # because of the way the random parameters are sampled in both transforms, + # which leads to fairly different results (and different epe). For more details see + # https://github.com/pytorch/vision/pull/5026/files#r762932579 + def __init__(self, crop_size, min_scale=-0.2, max_scale=0.5, stretch_prob=0.8): + super().__init__() + self.crop_size = crop_size + self.min_scale = min_scale + self.max_scale = max_scale + self.stretch_prob = stretch_prob + self.resize_prob = 0.8 + self.max_stretch = 0.2 + + def forward(self, img1, img2, flow, valid_flow_mask): + # randomly sample scale + h, w = img1.shape[-2:] + # Note: in original code, they use + 1 instead of + 8 for sparse datasets (e.g. Kitti) + # It shouldn't matter much + min_scale = max((self.crop_size[0] + 8) / h, (self.crop_size[1] + 8) / w) + + scale = 2 ** torch.empty(1, dtype=torch.float32).uniform_(self.min_scale, self.max_scale).item() + scale_x = scale + scale_y = scale + if torch.rand(1) < self.stretch_prob: + scale_x *= 2 ** torch.empty(1, dtype=torch.float32).uniform_(-self.max_stretch, self.max_stretch).item() + scale_y *= 2 ** torch.empty(1, dtype=torch.float32).uniform_(-self.max_stretch, self.max_stretch).item() + + scale_x = max(scale_x, min_scale) + scale_y = max(scale_y, min_scale) + + new_h, new_w = round(h * scale_y), round(w * scale_x) + + if torch.rand(1).item() < self.resize_prob: + # rescale the images + # We hard-code antialias=False to preserve results after we changed + # its default from None to True (see + # https://github.com/pytorch/vision/pull/7160) + # TODO: we could re-train the OF models with antialias=True? + img1 = F.resize(img1, size=(new_h, new_w), antialias=False) + img2 = F.resize(img2, size=(new_h, new_w), antialias=False) + if valid_flow_mask is None: + flow = F.resize(flow, size=(new_h, new_w)) + flow = flow * torch.tensor([scale_x, scale_y])[:, None, None] + else: + flow, valid_flow_mask = self._resize_sparse_flow( + flow, valid_flow_mask, scale_x=scale_x, scale_y=scale_y + ) + + # Note: For sparse datasets (Kitti), the original code uses a "margin" + # See e.g. https://github.com/princeton-vl/RAFT/blob/master/core/utils/augmentor.py#L220:L220 + # We don't, not sure if it matters much + y0 = torch.randint(0, img1.shape[1] - self.crop_size[0], size=(1,)).item() + x0 = torch.randint(0, img1.shape[2] - self.crop_size[1], size=(1,)).item() + + img1 = F.crop(img1, y0, x0, self.crop_size[0], self.crop_size[1]) + img2 = F.crop(img2, y0, x0, self.crop_size[0], self.crop_size[1]) + flow = F.crop(flow, y0, x0, self.crop_size[0], self.crop_size[1]) + if valid_flow_mask is not None: + valid_flow_mask = F.crop(valid_flow_mask, y0, x0, self.crop_size[0], self.crop_size[1]) + + return img1, img2, flow, valid_flow_mask + + def _resize_sparse_flow(self, flow, valid_flow_mask, scale_x=1.0, scale_y=1.0): + # This resizes both the flow and the valid_flow_mask mask (which is assumed to be reasonably sparse) + # There are as-many non-zero values in the original flow as in the resized flow (up to OOB) + # So for example if scale_x = scale_y = 2, the sparsity of the output flow is multiplied by 4 + + h, w = flow.shape[-2:] + + h_new = int(round(h * scale_y)) + w_new = int(round(w * scale_x)) + flow_new = torch.zeros(size=[2, h_new, w_new], dtype=flow.dtype) + valid_new = torch.zeros(size=[h_new, w_new], dtype=valid_flow_mask.dtype) + + jj, ii = torch.meshgrid(torch.arange(w), torch.arange(h), indexing="xy") + + ii_valid, jj_valid = ii[valid_flow_mask], jj[valid_flow_mask] + + ii_valid_new = torch.round(ii_valid.to(float) * scale_y).to(torch.long) + jj_valid_new = torch.round(jj_valid.to(float) * scale_x).to(torch.long) + + within_bounds_mask = (0 <= ii_valid_new) & (ii_valid_new < h_new) & (0 <= jj_valid_new) & (jj_valid_new < w_new) + + ii_valid = ii_valid[within_bounds_mask] + jj_valid = jj_valid[within_bounds_mask] + ii_valid_new = ii_valid_new[within_bounds_mask] + jj_valid_new = jj_valid_new[within_bounds_mask] + + valid_flow_new = flow[:, ii_valid, jj_valid] + valid_flow_new[0] *= scale_x + valid_flow_new[1] *= scale_y + + flow_new[:, ii_valid_new, jj_valid_new] = valid_flow_new + valid_new[ii_valid_new, jj_valid_new] = 1 + + return flow_new, valid_new + + +class Compose(torch.nn.Module): + def __init__(self, transforms): + super().__init__() + self.transforms = transforms + + def forward(self, img1, img2, flow, valid_flow_mask): + for t in self.transforms: + img1, img2, flow, valid_flow_mask = t(img1, img2, flow, valid_flow_mask) + return img1, img2, flow, valid_flow_mask diff --git a/references/optical_flow/utils.py b/references/optical_flow/utils.py new file mode 100644 index 00000000000..cd4b16eb0d8 --- /dev/null +++ b/references/optical_flow/utils.py @@ -0,0 +1,290 @@ +import datetime +import os +import time +from collections import defaultdict, deque + +import torch +import torch.distributed as dist +import torch.nn.functional as F + + +class SmoothedValue: + """Track a series of values and provide access to smoothed values over a + window or the global series average. + """ + + def __init__(self, window_size=20, fmt="{median:.4f} ({global_avg:.4f})"): + self.deque = deque(maxlen=window_size) + self.total = 0.0 + self.count = 0 + self.fmt = fmt + + def update(self, value, n=1): + self.deque.append(value) + self.count += n + self.total += value * n + + def synchronize_between_processes(self): + """ + Warning: does not synchronize the deque! + """ + t = reduce_across_processes([self.count, self.total]) + t = t.tolist() + self.count = int(t[0]) + self.total = t[1] + + @property + def median(self): + d = torch.tensor(list(self.deque)) + return d.median().item() + + @property + def avg(self): + d = torch.tensor(list(self.deque), dtype=torch.float32) + return d.mean().item() + + @property + def global_avg(self): + return self.total / self.count + + @property + def max(self): + return max(self.deque) + + @property + def value(self): + return self.deque[-1] + + def __str__(self): + return self.fmt.format( + median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value + ) + + +class MetricLogger: + def __init__(self, delimiter="\t"): + self.meters = defaultdict(SmoothedValue) + self.delimiter = delimiter + + def update(self, **kwargs): + for k, v in kwargs.items(): + if isinstance(v, torch.Tensor): + v = v.item() + if not isinstance(v, (float, int)): + raise TypeError( + f"This method expects the value of the input arguments to be of type float or int, instead got {type(v)}" + ) + self.meters[k].update(v) + + def __getattr__(self, attr): + if attr in self.meters: + return self.meters[attr] + if attr in self.__dict__: + return self.__dict__[attr] + raise AttributeError(f"'{type(self).__name__}' object has no attribute '{attr}'") + + def __str__(self): + loss_str = [] + for name, meter in self.meters.items(): + loss_str.append(f"{name}: {str(meter)}") + return self.delimiter.join(loss_str) + + def synchronize_between_processes(self): + for meter in self.meters.values(): + meter.synchronize_between_processes() + + def add_meter(self, name, **kwargs): + self.meters[name] = SmoothedValue(**kwargs) + + def log_every(self, iterable, print_freq=5, header=None): + i = 0 + if not header: + header = "" + start_time = time.time() + end = time.time() + iter_time = SmoothedValue(fmt="{avg:.4f}") + data_time = SmoothedValue(fmt="{avg:.4f}") + space_fmt = ":" + str(len(str(len(iterable)))) + "d" + if torch.cuda.is_available(): + log_msg = self.delimiter.join( + [ + header, + "[{0" + space_fmt + "}/{1}]", + "eta: {eta}", + "{meters}", + "time: {time}", + "data: {data}", + "max mem: {memory:.0f}", + ] + ) + else: + log_msg = self.delimiter.join( + [header, "[{0" + space_fmt + "}/{1}]", "eta: {eta}", "{meters}", "time: {time}", "data: {data}"] + ) + MB = 1024.0 * 1024.0 + for obj in iterable: + data_time.update(time.time() - end) + yield obj + iter_time.update(time.time() - end) + if print_freq is not None and i % print_freq == 0: + eta_seconds = iter_time.global_avg * (len(iterable) - i) + eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) + if torch.cuda.is_available(): + print( + log_msg.format( + i, + len(iterable), + eta=eta_string, + meters=str(self), + time=str(iter_time), + data=str(data_time), + memory=torch.cuda.max_memory_allocated() / MB, + ) + ) + else: + print( + log_msg.format( + i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time) + ) + ) + i += 1 + end = time.time() + total_time = time.time() - start_time + total_time_str = str(datetime.timedelta(seconds=int(total_time))) + print(f"{header} Total time: {total_time_str}") + + +def compute_metrics(flow_pred, flow_gt, valid_flow_mask=None): + + epe = ((flow_pred - flow_gt) ** 2).sum(dim=1).sqrt() + flow_norm = (flow_gt**2).sum(dim=1).sqrt() + + if valid_flow_mask is not None: + epe = epe[valid_flow_mask] + flow_norm = flow_norm[valid_flow_mask] + + relative_epe = epe / flow_norm + + metrics = { + "epe": epe.mean().item(), + "1px": (epe < 1).float().mean().item(), + "3px": (epe < 3).float().mean().item(), + "5px": (epe < 5).float().mean().item(), + "f1": ((epe > 3) & (relative_epe > 0.05)).float().mean().item() * 100, + } + return metrics, epe.numel() + + +def sequence_loss(flow_preds, flow_gt, valid_flow_mask, gamma=0.8, max_flow=400): + """Loss function defined over sequence of flow predictions""" + + if gamma > 1: + raise ValueError(f"Gamma should be < 1, got {gamma}.") + + # exclude invalid pixels and extremely large diplacements + flow_norm = torch.sum(flow_gt**2, dim=1).sqrt() + valid_flow_mask = valid_flow_mask & (flow_norm < max_flow) + + valid_flow_mask = valid_flow_mask[:, None, :, :] + + flow_preds = torch.stack(flow_preds) # shape = (num_flow_updates, batch_size, 2, H, W) + + abs_diff = (flow_preds - flow_gt).abs() + abs_diff = (abs_diff * valid_flow_mask).mean(axis=(1, 2, 3, 4)) + + num_predictions = flow_preds.shape[0] + weights = gamma ** torch.arange(num_predictions - 1, -1, -1).to(flow_gt.device) + flow_loss = (abs_diff * weights).sum() + + return flow_loss + + +class InputPadder: + """Pads images such that dimensions are divisible by 8""" + + # TODO: Ideally, this should be part of the eval transforms preset, instead + # of being part of the validation code. It's not obvious what a good + # solution would be, because we need to unpad the predicted flows according + # to the input images' size, and in some datasets (Kitti) images can have + # variable sizes. + + def __init__(self, dims, mode="sintel"): + self.ht, self.wd = dims[-2:] + pad_ht = (((self.ht // 8) + 1) * 8 - self.ht) % 8 + pad_wd = (((self.wd // 8) + 1) * 8 - self.wd) % 8 + if mode == "sintel": + self._pad = [pad_wd // 2, pad_wd - pad_wd // 2, pad_ht // 2, pad_ht - pad_ht // 2] + else: + self._pad = [pad_wd // 2, pad_wd - pad_wd // 2, 0, pad_ht] + + def pad(self, *inputs): + return [F.pad(x, self._pad, mode="replicate") for x in inputs] + + def unpad(self, x): + ht, wd = x.shape[-2:] + c = [self._pad[2], ht - self._pad[3], self._pad[0], wd - self._pad[1]] + return x[..., c[0] : c[1], c[2] : c[3]] + + +def _redefine_print(is_main): + """disables printing when not in main process""" + import builtins as __builtin__ + + builtin_print = __builtin__.print + + def print(*args, **kwargs): + force = kwargs.pop("force", False) + if is_main or force: + builtin_print(*args, **kwargs) + + __builtin__.print = print + + +def setup_ddp(args): + # Set the local_rank, rank, and world_size values as args fields + # This is done differently depending on how we're running the script. We + # currently support either torchrun or the custom run_with_submitit.py + # If you're confused (like I was), this might help a bit + # https://discuss.pytorch.org/t/what-is-the-difference-between-rank-and-local-rank/61940/2 + + if all(key in os.environ for key in ("LOCAL_RANK", "RANK", "WORLD_SIZE")): + # if we're here, the script was called with torchrun. Otherwise, + # these args will be set already by the run_with_submitit script + args.local_rank = int(os.environ["LOCAL_RANK"]) + args.rank = int(os.environ["RANK"]) + args.world_size = int(os.environ["WORLD_SIZE"]) + + elif "gpu" in args: + # if we're here, the script was called by run_with_submitit.py + args.local_rank = args.gpu + else: + print("Not using distributed mode!") + args.distributed = False + args.world_size = 1 + return + + args.distributed = True + + _redefine_print(is_main=(args.rank == 0)) + + torch.cuda.set_device(args.local_rank) + dist.init_process_group( + backend="nccl", + rank=args.rank, + world_size=args.world_size, + init_method=args.dist_url, + ) + torch.distributed.barrier() + + +def reduce_across_processes(val): + t = torch.tensor(val, device="cuda") + dist.barrier() + dist.all_reduce(t) + return t + + +def freeze_batch_norm(model): + for m in model.modules(): + if isinstance(m, torch.nn.BatchNorm2d): + m.eval() diff --git a/references/segmentation/README.md b/references/segmentation/README.md new file mode 100644 index 00000000000..2c8e581dac1 --- /dev/null +++ b/references/segmentation/README.md @@ -0,0 +1,43 @@ +# Semantic segmentation reference training scripts + +This folder contains reference training scripts for semantic segmentation. +They serve as a log of how to train specific models and provide baseline +training and evaluation scripts to quickly bootstrap research. + +All models have been trained on 8x V100 GPUs. + +You must modify the following flags: + +`--data-path=/path/to/dataset` + +`--nproc_per_node=` + +## fcn_resnet50 +``` +torchrun --nproc_per_node=8 train.py --lr 0.02 --dataset coco -b 4 --model fcn_resnet50 --aux-loss --weights-backbone ResNet50_Weights.IMAGENET1K_V1 +``` + +## fcn_resnet101 +``` +torchrun --nproc_per_node=8 train.py --lr 0.02 --dataset coco -b 4 --model fcn_resnet101 --aux-loss --weights-backbone ResNet101_Weights.IMAGENET1K_V1 +``` + +## deeplabv3_resnet50 +``` +torchrun --nproc_per_node=8 train.py --lr 0.02 --dataset coco -b 4 --model deeplabv3_resnet50 --aux-loss --weights-backbone ResNet50_Weights.IMAGENET1K_V1 +``` + +## deeplabv3_resnet101 +``` +torchrun --nproc_per_node=8 train.py --lr 0.02 --dataset coco -b 4 --model deeplabv3_resnet101 --aux-loss --weights-backbone ResNet101_Weights.IMAGENET1K_V1 +``` + +## deeplabv3_mobilenet_v3_large +``` +torchrun --nproc_per_node=8 train.py --dataset coco -b 4 --model deeplabv3_mobilenet_v3_large --aux-loss --wd 0.000001 --weights-backbone MobileNet_V3_Large_Weights.IMAGENET1K_V1 +``` + +## lraspp_mobilenet_v3_large +``` +torchrun --nproc_per_node=8 train.py --dataset coco -b 4 --model lraspp_mobilenet_v3_large --wd 0.000001 --weights-backbone MobileNet_V3_Large_Weights.IMAGENET1K_V1 +``` diff --git a/references/segmentation/coco_utils.py b/references/segmentation/coco_utils.py index c86d5495247..6a15dbefb52 100644 --- a/references/segmentation/coco_utils.py +++ b/references/segmentation/coco_utils.py @@ -1,17 +1,15 @@ import copy +import os + import torch import torch.utils.data import torchvision from PIL import Image - -import os - from pycocotools import mask as coco_mask - from transforms import Compose -class FilterAndRemapCocoCategories(object): +class FilterAndRemapCocoCategories: def __init__(self, categories, remap=True): self.categories = categories self.remap = remap @@ -43,7 +41,7 @@ def convert_coco_poly_to_mask(segmentations, height, width): return masks -class ConvertCocoPolysToMask(object): +class ConvertCocoPolysToMask: def __call__(self, image, anno): w, h = image.size segmentations = [obj["segmentation"] for obj in anno] @@ -70,7 +68,6 @@ def _has_valid_annotation(anno): # if more than 1k pixels occupied in the image return sum(obj["area"] for obj in anno) > 1000 - assert isinstance(dataset, torchvision.datasets.CocoDetection) ids = [] for ds_idx, img_id in enumerate(dataset.ids): ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None) @@ -84,26 +81,32 @@ def _has_valid_annotation(anno): return dataset -def get_coco(root, image_set, transforms): +def get_coco(root, image_set, transforms, use_v2=False): PATHS = { "train": ("train2017", os.path.join("annotations", "instances_train2017.json")), "val": ("val2017", os.path.join("annotations", "instances_val2017.json")), # "train": ("val2017", os.path.join("annotations", "instances_val2017.json")) } - CAT_LIST = [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4, - 1, 64, 20, 63, 7, 72] - - transforms = Compose([ - FilterAndRemapCocoCategories(CAT_LIST, remap=True), - ConvertCocoPolysToMask(), - transforms - ]) + CAT_LIST = [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4, 1, 64, 20, 63, 7, 72] img_folder, ann_file = PATHS[image_set] img_folder = os.path.join(root, img_folder) ann_file = os.path.join(root, ann_file) - dataset = torchvision.datasets.CocoDetection(img_folder, ann_file, transforms=transforms) + # The 2 "Compose" below achieve the same thing: converting coco detection + # samples into segmentation-compatible samples. They just do it with + # slightly different implementations. We could refactor and unify, but + # keeping them separate helps keeping the v2 version clean + if use_v2: + import v2_extras + from torchvision.datasets import wrap_dataset_for_transforms_v2 + + transforms = Compose([v2_extras.CocoDetectionToVOCSegmentation(), transforms]) + dataset = torchvision.datasets.CocoDetection(img_folder, ann_file, transforms=transforms) + dataset = wrap_dataset_for_transforms_v2(dataset, target_keys={"masks", "labels"}) + else: + transforms = Compose([FilterAndRemapCocoCategories(CAT_LIST, remap=True), ConvertCocoPolysToMask(), transforms]) + dataset = torchvision.datasets.CocoDetection(img_folder, ann_file, transforms=transforms) if image_set == "train": dataset = _coco_remove_images_without_annotations(dataset, CAT_LIST) diff --git a/references/segmentation/presets.py b/references/segmentation/presets.py new file mode 100644 index 00000000000..803769fcafc --- /dev/null +++ b/references/segmentation/presets.py @@ -0,0 +1,109 @@ +import torch + + +def get_modules(use_v2): + # We need a protected import to avoid the V2 warning in case just V1 is used + if use_v2: + import torchvision.transforms.v2 + import torchvision.tv_tensors + import v2_extras + + return torchvision.transforms.v2, torchvision.tv_tensors, v2_extras + else: + import transforms + + return transforms, None, None + + +class SegmentationPresetTrain: + def __init__( + self, + *, + base_size, + crop_size, + hflip_prob=0.5, + mean=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + backend="pil", + use_v2=False, + ): + T, tv_tensors, v2_extras = get_modules(use_v2) + + transforms = [] + backend = backend.lower() + if backend == "tv_tensor": + transforms.append(T.ToImage()) + elif backend == "tensor": + transforms.append(T.PILToTensor()) + elif backend != "pil": + raise ValueError(f"backend can be 'tv_tensor', 'tensor' or 'pil', but got {backend}") + + transforms += [T.RandomResize(min_size=int(0.5 * base_size), max_size=int(2.0 * base_size))] + + if hflip_prob > 0: + transforms += [T.RandomHorizontalFlip(hflip_prob)] + + if use_v2: + # We need a custom pad transform here, since the padding we want to perform here is fundamentally + # different from the padding in `RandomCrop` if `pad_if_needed=True`. + transforms += [v2_extras.PadIfSmaller(crop_size, fill={tv_tensors.Mask: 255, "others": 0})] + + transforms += [T.RandomCrop(crop_size)] + + if backend == "pil": + transforms += [T.PILToTensor()] + + if use_v2: + img_type = tv_tensors.Image if backend == "tv_tensor" else torch.Tensor + transforms += [ + T.ToDtype(dtype={img_type: torch.float32, tv_tensors.Mask: torch.int64, "others": None}, scale=True) + ] + else: + # No need to explicitly convert masks as they're magically int64 already + transforms += [T.ToDtype(torch.float, scale=True)] + + transforms += [T.Normalize(mean=mean, std=std)] + if use_v2: + transforms += [T.ToPureTensor()] + + self.transforms = T.Compose(transforms) + + def __call__(self, img, target): + return self.transforms(img, target) + + +class SegmentationPresetEval: + def __init__( + self, *, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), backend="pil", use_v2=False + ): + T, _, _ = get_modules(use_v2) + + transforms = [] + backend = backend.lower() + if backend == "tensor": + transforms += [T.PILToTensor()] + elif backend == "tv_tensor": + transforms += [T.ToImage()] + elif backend != "pil": + raise ValueError(f"backend can be 'tv_tensor', 'tensor' or 'pil', but got {backend}") + + if use_v2: + transforms += [T.Resize(size=(base_size, base_size))] + else: + transforms += [T.RandomResize(min_size=base_size, max_size=base_size)] + + if backend == "pil": + # Note: we could just convert to pure tensors even in v2? + transforms += [T.ToImage() if use_v2 else T.PILToTensor()] + + transforms += [ + T.ToDtype(torch.float, scale=True), + T.Normalize(mean=mean, std=std), + ] + if use_v2: + transforms += [T.ToPureTensor()] + + self.transforms = T.Compose(transforms) + + def __call__(self, img, target): + return self.transforms(img, target) diff --git a/references/segmentation/train.py b/references/segmentation/train.py index b1173d5323a..abdc3c6aacb 100644 --- a/references/segmentation/train.py +++ b/references/segmentation/train.py @@ -1,47 +1,56 @@ import datetime import os import time +import warnings +import presets import torch import torch.utils.data -from torch import nn import torchvision - -from coco_utils import get_coco -import transforms as T import utils +from coco_utils import get_coco +from torch import nn +from torch.optim.lr_scheduler import PolynomialLR +from torchvision.transforms import functional as F, InterpolationMode -def get_dataset(name, image_set, transform): +def get_dataset(args, is_train): def sbd(*args, **kwargs): - return torchvision.datasets.SBDataset(*args, mode='segmentation', **kwargs) + kwargs.pop("use_v2") + return torchvision.datasets.SBDataset(*args, mode="segmentation", **kwargs) + + def voc(*args, **kwargs): + kwargs.pop("use_v2") + return torchvision.datasets.VOCSegmentation(*args, **kwargs) + paths = { - "voc": ('/datasets01/VOC/060817/', torchvision.datasets.VOCSegmentation, 21), - "voc_aug": ('/datasets01/SBDD/072318/', sbd, 21), - "coco": ('/datasets01/COCO/022719/', get_coco, 21) + "voc": (args.data_path, voc, 21), + "voc_aug": (args.data_path, sbd, 21), + "coco": (args.data_path, get_coco, 21), } - p, ds_fn, num_classes = paths[name] + p, ds_fn, num_classes = paths[args.dataset] - ds = ds_fn(p, image_set=image_set, transforms=transform) + image_set = "train" if is_train else "val" + ds = ds_fn(p, image_set=image_set, transforms=get_transform(is_train, args), use_v2=args.use_v2) return ds, num_classes -def get_transform(train): - base_size = 520 - crop_size = 480 +def get_transform(is_train, args): + if is_train: + return presets.SegmentationPresetTrain(base_size=520, crop_size=480, backend=args.backend, use_v2=args.use_v2) + elif args.weights and args.test_only: + weights = torchvision.models.get_weight(args.weights) + trans = weights.transforms() - min_size = int((0.5 if train else 1.0) * base_size) - max_size = int((2.0 if train else 1.0) * base_size) - transforms = [] - transforms.append(T.RandomResize(min_size, max_size)) - if train: - transforms.append(T.RandomHorizontalFlip(0.5)) - transforms.append(T.RandomCrop(crop_size)) - transforms.append(T.ToTensor()) - transforms.append(T.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225])) + def preprocessing(img, target): + img = trans(img) + size = F.get_dimensions(img)[1:] + target = F.resize(target, size, interpolation=InterpolationMode.NEAREST) + return img, F.pil_to_tensor(target) - return T.Compose(transforms) + return preprocessing + else: + return presets.SegmentationPresetEval(base_size=520, backend=args.backend, use_v2=args.use_v2) def criterion(inputs, target): @@ -50,42 +59,66 @@ def criterion(inputs, target): losses[name] = nn.functional.cross_entropy(x, target, ignore_index=255) if len(losses) == 1: - return losses['out'] + return losses["out"] - return losses['out'] + 0.5 * losses['aux'] + return losses["out"] + 0.5 * losses["aux"] def evaluate(model, data_loader, device, num_classes): model.eval() confmat = utils.ConfusionMatrix(num_classes) metric_logger = utils.MetricLogger(delimiter=" ") - header = 'Test:' - with torch.no_grad(): + header = "Test:" + num_processed_samples = 0 + with torch.inference_mode(): for image, target in metric_logger.log_every(data_loader, 100, header): image, target = image.to(device), target.to(device) output = model(image) - output = output['out'] + output = output["out"] confmat.update(target.flatten(), output.argmax(1).flatten()) + # FIXME need to take into account that the datasets + # could have been padded in distributed setup + num_processed_samples += image.shape[0] confmat.reduce_from_all_processes() + num_processed_samples = utils.reduce_across_processes(num_processed_samples) + if ( + hasattr(data_loader.dataset, "__len__") + and len(data_loader.dataset) != num_processed_samples + and torch.distributed.get_rank() == 0 + ): + # See FIXME above + warnings.warn( + f"It looks like the dataset has {len(data_loader.dataset)} samples, but {num_processed_samples} " + "samples were used for the validation, which might bias the results. " + "Try adjusting the batch size and / or the world size. " + "Setting the world size to 1 is always a safe bet." + ) + return confmat -def train_one_epoch(model, criterion, optimizer, data_loader, lr_scheduler, device, epoch, print_freq): +def train_one_epoch(model, criterion, optimizer, data_loader, lr_scheduler, device, epoch, print_freq, scaler=None): model.train() metric_logger = utils.MetricLogger(delimiter=" ") - metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value}')) - header = 'Epoch: [{}]'.format(epoch) + metric_logger.add_meter("lr", utils.SmoothedValue(window_size=1, fmt="{value}")) + header = f"Epoch: [{epoch}]" for image, target in metric_logger.log_every(data_loader, print_freq, header): image, target = image.to(device), target.to(device) - output = model(image) - loss = criterion(output, target) + with torch.cuda.amp.autocast(enabled=scaler is not None): + output = model(image) + loss = criterion(output, target) optimizer.zero_grad() - loss.backward() - optimizer.step() + if scaler is not None: + scaler.scale(loss).backward() + scaler.step(optimizer) + scaler.update() + else: + loss.backward() + optimizer.step() lr_scheduler.step() @@ -93,6 +126,12 @@ def train_one_epoch(model, criterion, optimizer, data_loader, lr_scheduler, devi def main(args): + if args.backend.lower() != "pil" and not args.use_v2: + # TODO: Support tensor backend in V1? + raise ValueError("Use --use-v2 if you want to use the tv_tensor or tensor backend.") + if args.use_v2 and args.dataset != "coco": + raise ValueError("v2 is only support supported for coco dataset for now.") + if args.output_dir: utils.mkdir(args.output_dir) @@ -101,47 +140,51 @@ def main(args): device = torch.device(args.device) - dataset, num_classes = get_dataset(args.dataset, "train", get_transform(train=True)) - dataset_test, _ = get_dataset(args.dataset, "val", get_transform(train=False)) + if args.use_deterministic_algorithms: + torch.backends.cudnn.benchmark = False + torch.use_deterministic_algorithms(True) + else: + torch.backends.cudnn.benchmark = True + + dataset, num_classes = get_dataset(args, is_train=True) + dataset_test, _ = get_dataset(args, is_train=False) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) - test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test) + test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test, shuffle=False) else: train_sampler = torch.utils.data.RandomSampler(dataset) test_sampler = torch.utils.data.SequentialSampler(dataset_test) data_loader = torch.utils.data.DataLoader( - dataset, batch_size=args.batch_size, - sampler=train_sampler, num_workers=args.workers, - collate_fn=utils.collate_fn, drop_last=True) + dataset, + batch_size=args.batch_size, + sampler=train_sampler, + num_workers=args.workers, + collate_fn=utils.collate_fn, + drop_last=True, + ) data_loader_test = torch.utils.data.DataLoader( - dataset_test, batch_size=1, - sampler=test_sampler, num_workers=args.workers, - collate_fn=utils.collate_fn) + dataset_test, batch_size=1, sampler=test_sampler, num_workers=args.workers, collate_fn=utils.collate_fn + ) - model = torchvision.models.segmentation.__dict__[args.model](num_classes=num_classes, - aux_loss=args.aux_loss, - pretrained=args.pretrained) + model = torchvision.models.get_model( + args.model, + weights=args.weights, + weights_backbone=args.weights_backbone, + num_classes=num_classes, + aux_loss=args.aux_loss, + ) model.to(device) if args.distributed: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) - if args.resume: - checkpoint = torch.load(args.resume, map_location='cpu') - model.load_state_dict(checkpoint['model']) - model_without_ddp = model if args.distributed: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) model_without_ddp = model.module - if args.test_only: - confmat = evaluate(model, data_loader_test, device=device, num_classes=num_classes) - print(confmat) - return - params_to_optimize = [ {"params": [p for p in model_without_ddp.backbone.parameters() if p.requires_grad]}, {"params": [p for p in model_without_ddp.classifier.parameters() if p.requires_grad]}, @@ -149,58 +192,114 @@ def main(args): if args.aux_loss: params = [p for p in model_without_ddp.aux_classifier.parameters() if p.requires_grad] params_to_optimize.append({"params": params, "lr": args.lr * 10}) - optimizer = torch.optim.SGD( - params_to_optimize, - lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) + optimizer = torch.optim.SGD(params_to_optimize, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) - lr_scheduler = torch.optim.lr_scheduler.LambdaLR( - optimizer, - lambda x: (1 - x / (len(data_loader) * args.epochs)) ** 0.9) + scaler = torch.cuda.amp.GradScaler() if args.amp else None + + iters_per_epoch = len(data_loader) + main_lr_scheduler = PolynomialLR( + optimizer, total_iters=iters_per_epoch * (args.epochs - args.lr_warmup_epochs), power=0.9 + ) + + if args.lr_warmup_epochs > 0: + warmup_iters = iters_per_epoch * args.lr_warmup_epochs + args.lr_warmup_method = args.lr_warmup_method.lower() + if args.lr_warmup_method == "linear": + warmup_lr_scheduler = torch.optim.lr_scheduler.LinearLR( + optimizer, start_factor=args.lr_warmup_decay, total_iters=warmup_iters + ) + elif args.lr_warmup_method == "constant": + warmup_lr_scheduler = torch.optim.lr_scheduler.ConstantLR( + optimizer, factor=args.lr_warmup_decay, total_iters=warmup_iters + ) + else: + raise RuntimeError( + f"Invalid warmup lr method '{args.lr_warmup_method}'. Only linear and constant are supported." + ) + lr_scheduler = torch.optim.lr_scheduler.SequentialLR( + optimizer, schedulers=[warmup_lr_scheduler, main_lr_scheduler], milestones=[warmup_iters] + ) + else: + lr_scheduler = main_lr_scheduler + + if args.resume: + checkpoint = torch.load(args.resume, map_location="cpu", weights_only=True) + model_without_ddp.load_state_dict(checkpoint["model"], strict=not args.test_only) + if not args.test_only: + optimizer.load_state_dict(checkpoint["optimizer"]) + lr_scheduler.load_state_dict(checkpoint["lr_scheduler"]) + args.start_epoch = checkpoint["epoch"] + 1 + if args.amp: + scaler.load_state_dict(checkpoint["scaler"]) + + if args.test_only: + # We disable the cudnn benchmarking because it can noticeably affect the accuracy + torch.backends.cudnn.benchmark = False + torch.backends.cudnn.deterministic = True + confmat = evaluate(model, data_loader_test, device=device, num_classes=num_classes) + print(confmat) + return start_time = time.time() - for epoch in range(args.epochs): + for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) - train_one_epoch(model, criterion, optimizer, data_loader, lr_scheduler, device, epoch, args.print_freq) + train_one_epoch(model, criterion, optimizer, data_loader, lr_scheduler, device, epoch, args.print_freq, scaler) confmat = evaluate(model, data_loader_test, device=device, num_classes=num_classes) print(confmat) - utils.save_on_master( - { - 'model': model_without_ddp.state_dict(), - 'optimizer': optimizer.state_dict(), - 'epoch': epoch, - 'args': args - }, - os.path.join(args.output_dir, 'model_{}.pth'.format(epoch))) + checkpoint = { + "model": model_without_ddp.state_dict(), + "optimizer": optimizer.state_dict(), + "lr_scheduler": lr_scheduler.state_dict(), + "epoch": epoch, + "args": args, + } + if args.amp: + checkpoint["scaler"] = scaler.state_dict() + utils.save_on_master(checkpoint, os.path.join(args.output_dir, f"model_{epoch}.pth")) + utils.save_on_master(checkpoint, os.path.join(args.output_dir, "checkpoint.pth")) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) - print('Training time {}'.format(total_time_str)) + print(f"Training time {total_time_str}") -def parse_args(): +def get_args_parser(add_help=True): import argparse - parser = argparse.ArgumentParser(description='PyTorch Segmentation Training') - - parser.add_argument('--dataset', default='voc', help='dataset') - parser.add_argument('--model', default='fcn_resnet101', help='model') - parser.add_argument('--aux-loss', action='store_true', help='auxiliar loss') - parser.add_argument('--device', default='cuda', help='device') - parser.add_argument('-b', '--batch-size', default=8, type=int) - parser.add_argument('--epochs', default=30, type=int, metavar='N', - help='number of total epochs to run') - - parser.add_argument('-j', '--workers', default=16, type=int, metavar='N', - help='number of data loading workers (default: 16)') - parser.add_argument('--lr', default=0.01, type=float, help='initial learning rate') - parser.add_argument('--momentum', default=0.9, type=float, metavar='M', - help='momentum') - parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, - metavar='W', help='weight decay (default: 1e-4)', - dest='weight_decay') - parser.add_argument('--print-freq', default=10, type=int, help='print frequency') - parser.add_argument('--output-dir', default='.', help='path where to save') - parser.add_argument('--resume', default='', help='resume from checkpoint') + + parser = argparse.ArgumentParser(description="PyTorch Segmentation Training", add_help=add_help) + + parser.add_argument("--data-path", default="/datasets01/COCO/022719/", type=str, help="dataset path") + parser.add_argument("--dataset", default="coco", type=str, help="dataset name") + parser.add_argument("--model", default="fcn_resnet101", type=str, help="model name") + parser.add_argument("--aux-loss", action="store_true", help="auxiliary loss") + parser.add_argument("--device", default="cuda", type=str, help="device (Use cuda or cpu Default: cuda)") + parser.add_argument( + "-b", "--batch-size", default=8, type=int, help="images per gpu, the total batch size is $NGPU x batch_size" + ) + parser.add_argument("--epochs", default=30, type=int, metavar="N", help="number of total epochs to run") + + parser.add_argument( + "-j", "--workers", default=16, type=int, metavar="N", help="number of data loading workers (default: 16)" + ) + parser.add_argument("--lr", default=0.01, type=float, help="initial learning rate") + parser.add_argument("--momentum", default=0.9, type=float, metavar="M", help="momentum") + parser.add_argument( + "--wd", + "--weight-decay", + default=1e-4, + type=float, + metavar="W", + help="weight decay (default: 1e-4)", + dest="weight_decay", + ) + parser.add_argument("--lr-warmup-epochs", default=0, type=int, help="the number of epochs to warmup (default: 0)") + parser.add_argument("--lr-warmup-method", default="linear", type=str, help="the warmup method (default: linear)") + parser.add_argument("--lr-warmup-decay", default=0.01, type=float, help="the decay for lr") + parser.add_argument("--print-freq", default=10, type=int, help="print frequency") + parser.add_argument("--output-dir", default=".", type=str, help="path to save outputs") + parser.add_argument("--resume", default="", type=str, help="path of checkpoint") + parser.add_argument("--start-epoch", default=0, type=int, metavar="N", help="start epoch") parser.add_argument( "--test-only", dest="test_only", @@ -208,20 +307,23 @@ def parse_args(): action="store_true", ) parser.add_argument( - "--pretrained", - dest="pretrained", - help="Use pre-trained models from the modelzoo", - action="store_true", + "--use-deterministic-algorithms", action="store_true", help="Forces the use of deterministic algorithms only." ) # distributed training parameters - parser.add_argument('--world-size', default=1, type=int, - help='number of distributed processes') - parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training') + parser.add_argument("--world-size", default=1, type=int, help="number of distributed processes") + parser.add_argument("--dist-url", default="env://", type=str, help="url used to set up distributed training") + + parser.add_argument("--weights", default=None, type=str, help="the weights enum name to load") + parser.add_argument("--weights-backbone", default=None, type=str, help="the backbone weights enum name to load") + + # Mixed precision training parameters + parser.add_argument("--amp", action="store_true", help="Use torch.cuda.amp for mixed precision training") - args = parser.parse_args() - return args + parser.add_argument("--backend", default="PIL", type=str.lower, help="PIL or tensor - case insensitive") + parser.add_argument("--use-v2", action="store_true", help="Use V2 transforms") + return parser if __name__ == "__main__": - args = parse_args() + args = get_args_parser().parse_args() main(args) diff --git a/references/segmentation/transforms.py b/references/segmentation/transforms.py index bce4bfbe639..6934b9f862e 100644 --- a/references/segmentation/transforms.py +++ b/references/segmentation/transforms.py @@ -1,7 +1,6 @@ -import numpy as np -from PIL import Image import random +import numpy as np import torch from torchvision import transforms as T from torchvision.transforms import functional as F @@ -17,7 +16,7 @@ def pad_if_smaller(img, size, fill=0): return img -class Compose(object): +class Compose: def __init__(self, transforms): self.transforms = transforms @@ -27,7 +26,7 @@ def __call__(self, image, target): return image, target -class RandomResize(object): +class RandomResize: def __init__(self, min_size, max_size=None): self.min_size = min_size if max_size is None: @@ -36,12 +35,12 @@ def __init__(self, min_size, max_size=None): def __call__(self, image, target): size = random.randint(self.min_size, self.max_size) - image = F.resize(image, size) - target = F.resize(target, size, interpolation=Image.NEAREST) + image = F.resize(image, size, antialias=True) + target = F.resize(target, size, interpolation=T.InterpolationMode.NEAREST) return image, target -class RandomHorizontalFlip(object): +class RandomHorizontalFlip: def __init__(self, flip_prob): self.flip_prob = flip_prob @@ -52,7 +51,7 @@ def __call__(self, image, target): return image, target -class RandomCrop(object): +class RandomCrop: def __init__(self, size): self.size = size @@ -65,7 +64,7 @@ def __call__(self, image, target): return image, target -class CenterCrop(object): +class CenterCrop: def __init__(self, size): self.size = size @@ -75,14 +74,26 @@ def __call__(self, image, target): return image, target -class ToTensor(object): +class PILToTensor: + def __call__(self, image, target): + image = F.pil_to_tensor(image) + target = torch.as_tensor(np.array(target), dtype=torch.int64) + return image, target + + +class ToDtype: + def __init__(self, dtype, scale=False): + self.dtype = dtype + self.scale = scale + def __call__(self, image, target): - image = F.to_tensor(image) - target = torch.as_tensor(np.asarray(target), dtype=torch.int64) + if not self.scale: + return image.to(dtype=self.dtype), target + image = F.convert_image_dtype(image, self.dtype) return image, target -class Normalize(object): +class Normalize: def __init__(self, mean, std): self.mean = mean self.std = std diff --git a/references/segmentation/utils.py b/references/segmentation/utils.py index 2719996c808..92db1899851 100644 --- a/references/segmentation/utils.py +++ b/references/segmentation/utils.py @@ -1,16 +1,14 @@ -from __future__ import print_function -from collections import defaultdict, deque import datetime -import math +import errno +import os import time +from collections import defaultdict, deque + import torch import torch.distributed as dist -import errno -import os - -class SmoothedValue(object): +class SmoothedValue: """Track a series of values and provide access to smoothed values over a window or the global series average. """ @@ -32,11 +30,7 @@ def synchronize_between_processes(self): """ Warning: does not synchronize the deque! """ - if not is_dist_avail_and_initialized(): - return - t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda') - dist.barrier() - dist.all_reduce(t) + t = reduce_across_processes([self.count, self.total]) t = t.tolist() self.count = int(t[0]) self.total = t[1] @@ -65,14 +59,11 @@ def value(self): def __str__(self): return self.fmt.format( - median=self.median, - avg=self.avg, - global_avg=self.global_avg, - max=self.max, - value=self.value) + median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value + ) -class ConfusionMatrix(object): +class ConfusionMatrix: def __init__(self, num_classes): self.num_classes = num_classes self.mat = None @@ -81,7 +72,7 @@ def update(self, a, b): n = self.num_classes if self.mat is None: self.mat = torch.zeros((n, n), dtype=torch.int64, device=a.device) - with torch.no_grad(): + with torch.inference_mode(): k = (a >= 0) & (a < n) inds = n * a[k].to(torch.int64) + b[k] self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n) @@ -97,27 +88,19 @@ def compute(self): return acc_global, acc, iu def reduce_from_all_processes(self): - if not torch.distributed.is_available(): - return - if not torch.distributed.is_initialized(): - return - torch.distributed.barrier() - torch.distributed.all_reduce(self.mat) + self.mat = reduce_across_processes(self.mat).to(torch.int64) def __str__(self): acc_global, acc, iu = self.compute() - return ( - 'global correct: {:.1f}\n' - 'average row correct: {}\n' - 'IoU: {}\n' - 'mean IoU: {:.1f}').format( - acc_global.item() * 100, - ['{:.1f}'.format(i) for i in (acc * 100).tolist()], - ['{:.1f}'.format(i) for i in (iu * 100).tolist()], - iu.mean().item() * 100) - - -class MetricLogger(object): + return ("global correct: {:.1f}\naverage row correct: {}\nIoU: {}\nmean IoU: {:.1f}").format( + acc_global.item() * 100, + [f"{i:.1f}" for i in (acc * 100).tolist()], + [f"{i:.1f}" for i in (iu * 100).tolist()], + iu.mean().item() * 100, + ) + + +class MetricLogger: def __init__(self, delimiter="\t"): self.meters = defaultdict(SmoothedValue) self.delimiter = delimiter @@ -126,7 +109,10 @@ def update(self, **kwargs): for k, v in kwargs.items(): if isinstance(v, torch.Tensor): v = v.item() - assert isinstance(v, (float, int)) + if not isinstance(v, (float, int)): + raise TypeError( + f"This method expects the value of the input arguments to be of type float or int, instead got {type(v)}" + ) self.meters[k].update(v) def __getattr__(self, attr): @@ -134,15 +120,12 @@ def __getattr__(self, attr): return self.meters[attr] if attr in self.__dict__: return self.__dict__[attr] - raise AttributeError("'{}' object has no attribute '{}'".format( - type(self).__name__, attr)) + raise AttributeError(f"'{type(self).__name__}' object has no attribute '{attr}'") def __str__(self): loss_str = [] for name, meter in self.meters.items(): - loss_str.append( - "{}: {}".format(name, str(meter)) - ) + loss_str.append(f"{name}: {str(meter)}") return self.delimiter.join(loss_str) def synchronize_between_processes(self): @@ -155,31 +138,28 @@ def add_meter(self, name, meter): def log_every(self, iterable, print_freq, header=None): i = 0 if not header: - header = '' + header = "" start_time = time.time() end = time.time() - iter_time = SmoothedValue(fmt='{avg:.4f}') - data_time = SmoothedValue(fmt='{avg:.4f}') - space_fmt = ':' + str(len(str(len(iterable)))) + 'd' + iter_time = SmoothedValue(fmt="{avg:.4f}") + data_time = SmoothedValue(fmt="{avg:.4f}") + space_fmt = ":" + str(len(str(len(iterable)))) + "d" if torch.cuda.is_available(): - log_msg = self.delimiter.join([ - header, - '[{0' + space_fmt + '}/{1}]', - 'eta: {eta}', - '{meters}', - 'time: {time}', - 'data: {data}', - 'max mem: {memory:.0f}' - ]) + log_msg = self.delimiter.join( + [ + header, + "[{0" + space_fmt + "}/{1}]", + "eta: {eta}", + "{meters}", + "time: {time}", + "data: {data}", + "max mem: {memory:.0f}", + ] + ) else: - log_msg = self.delimiter.join([ - header, - '[{0' + space_fmt + '}/{1}]', - 'eta: {eta}', - '{meters}', - 'time: {time}', - 'data: {data}' - ]) + log_msg = self.delimiter.join( + [header, "[{0" + space_fmt + "}/{1}]", "eta: {eta}", "{meters}", "time: {time}", "data: {data}"] + ) MB = 1024.0 * 1024.0 for obj in iterable: data_time.update(time.time() - end) @@ -189,21 +169,28 @@ def log_every(self, iterable, print_freq, header=None): eta_seconds = iter_time.global_avg * (len(iterable) - i) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if torch.cuda.is_available(): - print(log_msg.format( - i, len(iterable), eta=eta_string, - meters=str(self), - time=str(iter_time), data=str(data_time), - memory=torch.cuda.max_memory_allocated() / MB)) + print( + log_msg.format( + i, + len(iterable), + eta=eta_string, + meters=str(self), + time=str(iter_time), + data=str(data_time), + memory=torch.cuda.max_memory_allocated() / MB, + ) + ) else: - print(log_msg.format( - i, len(iterable), eta=eta_string, - meters=str(self), - time=str(iter_time), data=str(data_time))) + print( + log_msg.format( + i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time) + ) + ) i += 1 end = time.time() total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) - print('{} Total time: {}'.format(header, total_time_str)) + print(f"{header} Total time: {total_time_str}") def cat_list(images, fill_value=0): @@ -211,7 +198,7 @@ def cat_list(images, fill_value=0): batch_shape = (len(images),) + max_size batched_imgs = images[0].new(*batch_shape).fill_(fill_value) for img, pad_img in zip(images, batched_imgs): - pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img) + pad_img[..., : img.shape[-2], : img.shape[-1]].copy_(img) return batched_imgs @@ -235,10 +222,11 @@ def setup_for_distributed(is_master): This function disables printing when not in master process """ import builtins as __builtin__ + builtin_print = __builtin__.print def print(*args, **kwargs): - force = kwargs.pop('force', False) + force = kwargs.pop("force", False) if is_master or force: builtin_print(*args, **kwargs) @@ -275,26 +263,38 @@ def save_on_master(*args, **kwargs): def init_distributed_mode(args): - if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: + if "RANK" in os.environ and "WORLD_SIZE" in os.environ: args.rank = int(os.environ["RANK"]) - args.world_size = int(os.environ['WORLD_SIZE']) - args.gpu = int(os.environ['LOCAL_RANK']) - elif 'SLURM_PROCID' in os.environ: - args.rank = int(os.environ['SLURM_PROCID']) - args.gpu = args.rank % torch.cuda.device_count() + args.world_size = int(os.environ["WORLD_SIZE"]) + args.gpu = int(os.environ["LOCAL_RANK"]) + # elif "SLURM_PROCID" in os.environ: + # args.rank = int(os.environ["SLURM_PROCID"]) + # args.gpu = args.rank % torch.cuda.device_count() elif hasattr(args, "rank"): pass else: - print('Not using distributed mode') + print("Not using distributed mode") args.distributed = False return args.distributed = True torch.cuda.set_device(args.gpu) - args.dist_backend = 'nccl' - print('| distributed init (rank {}): {}'.format( - args.rank, args.dist_url), flush=True) - torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, - world_size=args.world_size, rank=args.rank) + args.dist_backend = "nccl" + print(f"| distributed init (rank {args.rank}): {args.dist_url}", flush=True) + torch.distributed.init_process_group( + backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank + ) + torch.distributed.barrier() setup_for_distributed(args.rank == 0) + + +def reduce_across_processes(val): + if not is_dist_avail_and_initialized(): + # nothing to sync, but we still convert to tensor for consistency with the distributed case. + return torch.tensor(val) + + t = torch.tensor(val, device="cuda") + dist.barrier() + dist.all_reduce(t) + return t diff --git a/references/segmentation/v2_extras.py b/references/segmentation/v2_extras.py new file mode 100644 index 00000000000..2d9eb3e661a --- /dev/null +++ b/references/segmentation/v2_extras.py @@ -0,0 +1,83 @@ +"""This file only exists to be lazy-imported and avoid V2-related import warnings when just using V1.""" +import torch +from torchvision import tv_tensors +from torchvision.transforms import v2 + + +class PadIfSmaller(v2.Transform): + def __init__(self, size, fill=0): + super().__init__() + self.size = size + self.fill = v2._utils._setup_fill_arg(fill) + + def make_params(self, sample): + _, height, width = v2._utils.query_chw(sample) + padding = [0, 0, max(self.size - width, 0), max(self.size - height, 0)] + needs_padding = any(padding) + return dict(padding=padding, needs_padding=needs_padding) + + def transform(self, inpt, params): + if not params["needs_padding"]: + return inpt + + fill = v2._utils._get_fill(self.fill, type(inpt)) + fill = v2._utils._convert_fill_arg(fill) + + return v2.functional.pad(inpt, padding=params["padding"], fill=fill) + + +class CocoDetectionToVOCSegmentation(v2.Transform): + """Turn samples from datasets.CocoDetection into the same format as VOCSegmentation. + + This is achieved in two steps: + + 1. COCO differentiates between 91 categories while VOC only supports 21, including background for both. Fortunately, + the COCO categories are a superset of the VOC ones and thus can be mapped. Instances of the 70 categories not + present in VOC are dropped and replaced by background. + 2. COCO only offers detection masks, i.e. a (N, H, W) bool-ish tensor, where the truthy values in each individual + mask denote the instance. However, a segmentation mask is a (H, W) integer tensor (typically torch.uint8), where + the value of each pixel denotes the category it belongs to. The detection masks are merged into one segmentation + mask while pixels that belong to multiple detection masks are marked as invalid. + """ + + COCO_TO_VOC_LABEL_MAP = dict( + zip( + [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4, 1, 64, 20, 63, 7, 72], + range(21), + ) + ) + INVALID_VALUE = 255 + + def _coco_detection_masks_to_voc_segmentation_mask(self, target): + if "masks" not in target: + return None + + instance_masks, instance_labels_coco = target["masks"], target["labels"] + + valid_labels_voc = [ + (idx, label_voc) + for idx, label_coco in enumerate(instance_labels_coco.tolist()) + if (label_voc := self.COCO_TO_VOC_LABEL_MAP.get(label_coco)) is not None + ] + + if not valid_labels_voc: + return None + + valid_voc_category_idcs, instance_labels_voc = zip(*valid_labels_voc) + + instance_masks = instance_masks[list(valid_voc_category_idcs)].to(torch.uint8) + instance_labels_voc = torch.tensor(instance_labels_voc, dtype=torch.uint8) + + # Calling `.max()` on the stacked detection masks works fine to separate background from foreground as long as + # there is at most a single instance per pixel. Overlapping instances will be filtered out in the next step. + segmentation_mask, _ = (instance_masks * instance_labels_voc.reshape(-1, 1, 1)).max(dim=0) + segmentation_mask[instance_masks.sum(dim=0) > 1] = self.INVALID_VALUE + + return segmentation_mask + + def forward(self, image, target): + segmentation_mask = self._coco_detection_masks_to_voc_segmentation_mask(target) + if segmentation_mask is None: + segmentation_mask = torch.zeros(v2.functional.get_size(image), dtype=torch.uint8) + + return image, tv_tensors.Mask(segmentation_mask) diff --git a/references/similarity/loss.py b/references/similarity/loss.py index 3e467b74c52..971810a0663 100644 --- a/references/similarity/loss.py +++ b/references/similarity/loss.py @@ -1,21 +1,21 @@ -''' +""" Pytorch adaptation of https://omoindrot.github.io/triplet-loss https://github.com/omoindrot/tensorflow-triplet-loss -''' +""" import torch import torch.nn as nn class TripletMarginLoss(nn.Module): - def __init__(self, margin=1.0, p=2., mining='batch_all'): - super(TripletMarginLoss, self).__init__() + def __init__(self, margin=1.0, p=2.0, mining="batch_all"): + super().__init__() self.margin = margin self.p = p self.mining = mining - if mining == 'batch_all': + if mining == "batch_all": self.loss_fn = batch_all_triplet_loss - if mining == 'batch_hard': + if mining == "batch_hard": self.loss_fn = batch_hard_triplet_loss def forward(self, embeddings, labels): @@ -77,7 +77,7 @@ def batch_all_triplet_loss(labels, embeddings, margin, p): def _get_triplet_mask(labels): # Check that i, j and k are distinct - indices_equal = torch.eye(labels.size(0), dtype=torch.uint8, device=labels.device) + indices_equal = torch.eye(labels.size(0), dtype=torch.bool, device=labels.device) indices_not_equal = ~indices_equal i_not_equal_j = indices_not_equal.unsqueeze(2) i_not_equal_k = indices_not_equal.unsqueeze(1) @@ -96,7 +96,7 @@ def _get_triplet_mask(labels): def _get_anchor_positive_triplet_mask(labels): # Check that i and j are distinct - indices_equal = torch.eye(labels.size(0), dtype=torch.uint8, device=labels.device) + indices_equal = torch.eye(labels.size(0), dtype=torch.bool, device=labels.device) indices_not_equal = ~indices_equal # Check if labels[i] == labels[j] diff --git a/references/similarity/model.py b/references/similarity/model.py index 797ad41a48b..f235ae11116 100644 --- a/references/similarity/model.py +++ b/references/similarity/model.py @@ -1,11 +1,10 @@ -import torch import torch.nn as nn import torchvision.models as models class EmbeddingNet(nn.Module): def __init__(self, backbone=None): - super(EmbeddingNet, self).__init__() + super().__init__() if backbone is None: backbone = models.resnet50(num_classes=128) diff --git a/references/similarity/sampler.py b/references/similarity/sampler.py index 0ae6d07a77c..fe6517418ab 100644 --- a/references/similarity/sampler.py +++ b/references/similarity/sampler.py @@ -1,7 +1,8 @@ +import random +from collections import defaultdict + import torch from torch.utils.data.sampler import Sampler -from collections import defaultdict -import random def create_groups(groups, k): @@ -46,7 +47,8 @@ def __init__(self, groups, p, k): self.groups = create_groups(groups, self.k) # Ensures there are enough classes to sample from - assert len(self.groups) >= p + if len(self.groups) < p: + raise ValueError("There are not enough classes to sample from") def __iter__(self): # Shuffle samples within groups diff --git a/references/similarity/test.py b/references/similarity/test.py index a1e646111c8..3b9848594b6 100644 --- a/references/similarity/test.py +++ b/references/similarity/test.py @@ -1,15 +1,14 @@ import unittest from collections import defaultdict -from torch.utils.data import DataLoader -from torchvision.datasets import FakeData +import torch import torchvision.transforms as transforms - from sampler import PKSampler +from torch.utils.data import DataLoader +from torchvision.datasets import FakeData class Tester(unittest.TestCase): - def test_pksampler(self): p, k = 16, 4 @@ -19,24 +18,29 @@ def test_pksampler(self): self.assertRaises(AssertionError, PKSampler, targets, p, k) # Ensure p, k constraints on batch - dataset = FakeData(size=1000, num_classes=100, image_size=(3, 1, 1), - transform=transforms.ToTensor()) + trans = transforms.Compose( + [ + transforms.PILToTensor(), + transforms.ConvertImageDtype(torch.float), + ] + ) + dataset = FakeData(size=1000, num_classes=100, image_size=(3, 1, 1), transform=trans) targets = [target.item() for _, target in dataset] sampler = PKSampler(targets, p, k) loader = DataLoader(dataset, batch_size=p * k, sampler=sampler) for _, labels in loader: bins = defaultdict(int) - for l in labels.tolist(): - bins[l] += 1 + for label in labels.tolist(): + bins[label] += 1 # Ensure that each batch has samples from exactly p classes self.assertEqual(len(bins), p) # Ensure that there are k samples from each class - for l in bins: - self.assertEqual(bins[l], k) + for b in bins: + self.assertEqual(bins[b], k) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/references/similarity/train.py b/references/similarity/train.py index 9a166a14b38..7686729927e 100644 --- a/references/similarity/train.py +++ b/references/similarity/train.py @@ -1,15 +1,13 @@ import os import torch -from torch.optim import Adam -from torch.utils.data import DataLoader - import torchvision.transforms as transforms -from torchvision.datasets import FashionMNIST - from loss import TripletMarginLoss -from sampler import PKSampler from model import EmbeddingNet +from sampler import PKSampler +from torch.optim import Adam +from torch.utils.data import DataLoader +from torchvision.datasets import FashionMNIST def train_epoch(model, optimizer, criterion, data_loader, device, epoch, print_freq): @@ -33,7 +31,7 @@ def train_epoch(model, optimizer, criterion, data_loader, device, epoch, print_f i += 1 avg_loss = running_loss / print_freq avg_trip = 100.0 * running_frac_pos_triplets / print_freq - print('[{:d}, {:d}] | loss: {:.4f} | % avg hard triplets: {:.2f}%'.format(epoch, i, avg_loss, avg_trip)) + print(f"[{epoch:d}, {i:d}] | loss: {avg_loss:.4f} | % avg hard triplets: {avg_trip:.2f}%") running_loss = 0 running_frac_pos_triplets = 0 @@ -53,7 +51,7 @@ def find_best_threshold(dists, targets, device): return best_thresh, accuracy -@torch.no_grad() +@torch.inference_mode() def evaluate(model, loader, device): model.eval() embeds, labels = [], [] @@ -79,33 +77,45 @@ def evaluate(model, loader, device): threshold, accuracy = find_best_threshold(dists, targets, device) - print('accuracy: {:.3f}%, threshold: {:.2f}'.format(accuracy, threshold)) + print(f"accuracy: {accuracy:.3f}%, threshold: {threshold:.2f}") def save(model, epoch, save_dir, file_name): - file_name = 'epoch_' + str(epoch) + '__' + file_name + file_name = "epoch_" + str(epoch) + "__" + file_name save_path = os.path.join(save_dir, file_name) torch.save(model.state_dict(), save_path) def main(args): - device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + if args.use_deterministic_algorithms: + torch.backends.cudnn.benchmark = False + torch.use_deterministic_algorithms(True) + else: + torch.backends.cudnn.benchmark = True + p = args.labels_per_batch k = args.samples_per_label batch_size = p * k model = EmbeddingNet() if args.resume: - model.load_state_dict(torch.load(args.resume)) + model.load_state_dict(torch.load(args.resume, weights_only=True)) model.to(device) criterion = TripletMarginLoss(margin=args.margin) optimizer = Adam(model.parameters(), lr=args.lr) - transform = transforms.Compose([transforms.Lambda(lambda image: image.convert('RGB')), - transforms.Resize((224, 224)), - transforms.ToTensor()]) + transform = transforms.Compose( + [ + transforms.Lambda(lambda image: image.convert("RGB")), + transforms.Resize((224, 224)), + transforms.PILToTensor(), + transforms.ConvertImageDtype(torch.float), + ] + ) # Using FMNIST to demonstrate embedding learning using triplet loss. This dataset can # be replaced with any classification dataset. @@ -118,48 +128,60 @@ def main(args): # targets attribute with the same format. targets = train_dataset.targets.tolist() - train_loader = DataLoader(train_dataset, batch_size=batch_size, - sampler=PKSampler(targets, p, k), - num_workers=args.workers) - test_loader = DataLoader(test_dataset, batch_size=args.eval_batch_size, - shuffle=False, - num_workers=args.workers) + train_loader = DataLoader( + train_dataset, batch_size=batch_size, sampler=PKSampler(targets, p, k), num_workers=args.workers + ) + test_loader = DataLoader(test_dataset, batch_size=args.eval_batch_size, shuffle=False, num_workers=args.workers) + + if args.test_only: + # We disable the cudnn benchmarking because it can noticeably affect the accuracy + torch.backends.cudnn.benchmark = False + torch.backends.cudnn.deterministic = True + evaluate(model, test_loader, device) + return for epoch in range(1, args.epochs + 1): - print('Training...') + print("Training...") train_epoch(model, optimizer, criterion, train_loader, device, epoch, args.print_freq) - print('Evaluating...') + print("Evaluating...") evaluate(model, test_loader, device) - print('Saving...') - save(model, epoch, args.save_dir, 'ckpt.pth') + print("Saving...") + save(model, epoch, args.save_dir, "ckpt.pth") def parse_args(): import argparse - parser = argparse.ArgumentParser(description='PyTorch Embedding Learning') - - parser.add_argument('--dataset-dir', default='/tmp/fmnist/', - help='FashionMNIST dataset directory path') - parser.add_argument('-p', '--labels-per-batch', default=8, type=int, - help='Number of unique labels/classes per batch') - parser.add_argument('-k', '--samples-per-label', default=8, type=int, - help='Number of samples per label in a batch') - parser.add_argument('--eval-batch-size', default=512, type=int) - parser.add_argument('--epochs', default=10, type=int, metavar='N', - help='Number of training epochs to run') - parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', - help='Number of data loading workers') - parser.add_argument('--lr', default=0.0001, type=float, help='Learning rate') - parser.add_argument('--margin', default=0.2, type=float, help='Triplet loss margin') - parser.add_argument('--print-freq', default=20, type=int, help='Print frequency') - parser.add_argument('--save-dir', default='.', help='Model save directory') - parser.add_argument('--resume', default='', help='Resume from checkpoint') + + parser = argparse.ArgumentParser(description="PyTorch Embedding Learning") + + parser.add_argument("--dataset-dir", default="/tmp/fmnist/", type=str, help="FashionMNIST dataset directory path") + parser.add_argument( + "-p", "--labels-per-batch", default=8, type=int, help="Number of unique labels/classes per batch" + ) + parser.add_argument("-k", "--samples-per-label", default=8, type=int, help="Number of samples per label in a batch") + parser.add_argument("--eval-batch-size", default=512, type=int, help="batch size for evaluation") + parser.add_argument("--epochs", default=10, type=int, metavar="N", help="number of total epochs to run") + parser.add_argument("-j", "--workers", default=4, type=int, metavar="N", help="number of data loading workers") + parser.add_argument("--lr", default=0.0001, type=float, help="initial learning rate") + parser.add_argument("--margin", default=0.2, type=float, help="Triplet loss margin") + parser.add_argument("--print-freq", default=20, type=int, help="print frequency") + parser.add_argument("--save-dir", default=".", type=str, help="Model save directory") + parser.add_argument("--resume", default="", type=str, help="path of checkpoint") + parser.add_argument( + "--test-only", + dest="test_only", + help="Only test the model", + action="store_true", + ) + parser.add_argument( + "--use-deterministic-algorithms", action="store_true", help="Forces the use of deterministic algorithms only." + ) return parser.parse_args() -if __name__ == '__main__': +if __name__ == "__main__": args = parse_args() main(args) diff --git a/references/video_classification/README.md b/references/video_classification/README.md new file mode 100644 index 00000000000..39c5d8f1bba --- /dev/null +++ b/references/video_classification/README.md @@ -0,0 +1,125 @@ +# Video Classification + +We present a simple training script that can be used for replicating the result of [resenet-based video models](https://research.fb.com/wp-content/uploads/2018/04/a-closer-look-at-spatiotemporal-convolutions-for-action-recognition.pdf). All models are trained on [Kinetics400 dataset](https://deepmind.com/research/open-source/kinetics), a benchmark dataset for human-action recognition. The accuracy is reported on the traditional validation split. + +## Data preparation + +If you already have downloaded [Kinetics400 dataset](https://deepmind.com/research/open-source/kinetics), +please proceed directly to the next section. + +To download videos, one can use https://github.com/Showmax/kinetics-downloader. Please note that the dataset can take up upwards of 400GB, depending on the quality setting during download. + +## Training + +We assume the training and validation AVI videos are stored at `/data/kinectics400/train` and +`/data/kinectics400/val`. For training we suggest starting with the hyperparameters reported in the [paper](https://research.fb.com/wp-content/uploads/2018/04/a-closer-look-at-spatiotemporal-convolutions-for-action-recognition.pdf), in order to match the performance of said models. Clip sampling strategy is a particularly important parameter during training, and we suggest using random temporal jittering during training - in other words sampling multiple training clips from each video with random start times during at every epoch. This functionality is built into our training script, and optimal hyperparameters are set by default. + +### Multiple GPUs + +Run the training on a single node with 8 GPUs: +```bash +torchrun --nproc_per_node=8 train.py --data-path=/data/kinectics400 --kinetics-version="400" --lr 0.08 --cache-dataset --sync-bn --amp +``` + +**Note:** all our models were trained on 8 nodes with 8 V100 GPUs each for a total of 64 GPUs. Expected training time for 64 GPUs is 24 hours, depending on the storage solution. +**Note 2:** hyperparameters for exact replication of our training can be found on the section below. Some hyperparameters such as learning rate must be scaled linearly in proportion to the number of GPUs. The default values assume 64 GPUs. + +### Single GPU + +**Note:** training on a single gpu can be extremely slow. + + +```bash +python train.py --data-path=/data/kinectics400 --kinetics-version="400" --batch-size=8 --cache-dataset +``` + + +### Additional Kinetics versions + +Since the original release, additional versions of Kinetics dataset became available (Kinetics 600). +Our training scripts support these versions of dataset as well by setting the `--kinetics-version` parameter to `"600"`. + +**Note:** training on Kinetics 600 requires a different set of hyperparameters for optimal performance. We do not provide Kinetics 600 pretrained models. + + +## Video classification models + +Starting with version `0.4.0` we have introduced support for basic video tasks and video classification modelling. +For more information about the available models check [here](https://pytorch.org/docs/stable/torchvision/models.html#video-classification). + +### Video ResNet models + +See reference training script [here](https://github.com/pytorch/vision/blob/main/references/video_classification/train.py): + +- input space: RGB +- resize size: [128, 171] +- crop size: [112, 112] +- mean: [0.43216, 0.394666, 0.37645] +- std: [0.22803, 0.22145, 0.216989] +- number of classes: 400 + +Input data augmentations at training time (with optional parameters): + +1. ConvertImageDtype +2. Resize (resize size value above) +3. Random horizontal flip (0.5) +4. Normalization (mean, std, see values above) +5. Random Crop (crop size value above) +6. Convert BCHW to CBHW + +Input data augmentations at validation time (with optional parameters): + +1. ConvertImageDtype +2. Resize (resize size value above) +3. Normalization (mean, std, see values above) +4. Center Crop (crop size value above) +5. Convert BCHW to CBHW + +This translates in the following set of command-line arguments. Please note that `--batch-size` parameter controls the +batch size per GPU. Moreover, note that our default `--lr` is configured for 64 GPUs which is how many we used for the +Video resnet models: +``` +# number of frames per clip +--clip_len 16 \ +--frame-rate 15 \ +# allow for temporal jittering +--clips_per_video 5 \ +--batch-size 24 \ +--epochs 45 \ +--lr 0.64 \ +# we use 10 epochs for linear warmup +--lr-warmup-epochs 10 \ +# learning rate is decayed at 20, 30, and 40 epoch by a factor of 10 +--lr-milestones 20, 30, 40 \ +--lr-gamma 0.1 \ +--train-resize-size 128 171 \ +--train-crop-size 112 112 \ +--val-resize-size 128 171 \ +--val-crop-size 112 112 +``` + +### S3D + +The S3D model was trained similarly to the above but with the following changes on the default configuration: +``` +--batch-size=12 --lr 0.2 --clip-len 64 --clips-per-video 5 --sync-bn \ +--train-resize-size 256 256 --train-crop-size 224 224 --val-resize-size 256 256 --val-crop-size 224 224 +``` + +We used 64 GPUs to train the architecture. + +To estimate the validation statistics of the model, we run the reference script with the following configuration: +``` +--batch-size=16 --test-only --clip-len 128 --clips-per-video 1 +``` + +### Additional video modelling resources + +- [Video Model Zoo](https://github.com/facebookresearch/VMZ) +- [PySlowFast](https://github.com/facebookresearch/SlowFast) + +### References + +[0] _D. Tran, H. Wang, L. Torresani, J. Ray, Y. LeCun and M. Paluri_: A Closer Look at Spatiotemporal Convolutions for Action Recognition. _CVPR 2018_ ([paper](https://research.fb.com/wp-content/uploads/2018/04/a-closer-look-at-spatiotemporal-convolutions-for-action-recognition.pdf)) + +[1] _W. Kay, J. Carreira, K. Simonyan, B. Zhang, C. Hillier, S. Vijayanarasimhan, F. Viola, T. Green, T. Back, P. Natsev, M. Suleyman, A. Zisserman_: The Kinetics Human Action Video Dataset ([paper](https://arxiv.org/abs/1705.06950)) diff --git a/references/video_classification/datasets.py b/references/video_classification/datasets.py new file mode 100644 index 00000000000..dec1e16b856 --- /dev/null +++ b/references/video_classification/datasets.py @@ -0,0 +1,15 @@ +from typing import Tuple + +import torchvision +from torch import Tensor + + +class KineticsWithVideoId(torchvision.datasets.Kinetics): + def __getitem__(self, idx: int) -> Tuple[Tensor, Tensor, int]: + video, audio, info, video_idx = self.video_clips.get_clip(idx) + label = self.samples[video_idx][1] + + if self.transform is not None: + video = self.transform(video) + + return video, audio, label, video_idx diff --git a/references/video_classification/presets.py b/references/video_classification/presets.py new file mode 100644 index 00000000000..f73802c9666 --- /dev/null +++ b/references/video_classification/presets.py @@ -0,0 +1,50 @@ +import torch +from torchvision.transforms import transforms +from transforms import ConvertBCHWtoCBHW + + +class VideoClassificationPresetTrain: + def __init__( + self, + *, + crop_size, + resize_size, + mean=(0.43216, 0.394666, 0.37645), + std=(0.22803, 0.22145, 0.216989), + hflip_prob=0.5, + ): + trans = [ + transforms.ConvertImageDtype(torch.float32), + # We hard-code antialias=False to preserve results after we changed + # its default from None to True (see + # https://github.com/pytorch/vision/pull/7160) + # TODO: we could re-train the video models with antialias=True? + transforms.Resize(resize_size, antialias=False), + ] + if hflip_prob > 0: + trans.append(transforms.RandomHorizontalFlip(hflip_prob)) + trans.extend([transforms.Normalize(mean=mean, std=std), transforms.RandomCrop(crop_size), ConvertBCHWtoCBHW()]) + self.transforms = transforms.Compose(trans) + + def __call__(self, x): + return self.transforms(x) + + +class VideoClassificationPresetEval: + def __init__(self, *, crop_size, resize_size, mean=(0.43216, 0.394666, 0.37645), std=(0.22803, 0.22145, 0.216989)): + self.transforms = transforms.Compose( + [ + transforms.ConvertImageDtype(torch.float32), + # We hard-code antialias=False to preserve results after we changed + # its default from None to True (see + # https://github.com/pytorch/vision/pull/7160) + # TODO: we could re-train the video models with antialias=True? + transforms.Resize(resize_size, antialias=False), + transforms.Normalize(mean=mean, std=std), + transforms.CenterCrop(crop_size), + ConvertBCHWtoCBHW(), + ] + ) + + def __call__(self, x): + return self.transforms(x) diff --git a/references/video_classification/scheduler.py b/references/video_classification/scheduler.py deleted file mode 100644 index f0f862d41ad..00000000000 --- a/references/video_classification/scheduler.py +++ /dev/null @@ -1,47 +0,0 @@ -import torch -from bisect import bisect_right - - -class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler): - def __init__( - self, - optimizer, - milestones, - gamma=0.1, - warmup_factor=1.0 / 3, - warmup_iters=5, - warmup_method="linear", - last_epoch=-1, - ): - if not milestones == sorted(milestones): - raise ValueError( - "Milestones should be a list of" " increasing integers. Got {}", - milestones, - ) - - if warmup_method not in ("constant", "linear"): - raise ValueError( - "Only 'constant' or 'linear' warmup_method accepted" - "got {}".format(warmup_method) - ) - self.milestones = milestones - self.gamma = gamma - self.warmup_factor = warmup_factor - self.warmup_iters = warmup_iters - self.warmup_method = warmup_method - super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch) - - def get_lr(self): - warmup_factor = 1 - if self.last_epoch < self.warmup_iters: - if self.warmup_method == "constant": - warmup_factor = self.warmup_factor - elif self.warmup_method == "linear": - alpha = float(self.last_epoch) / self.warmup_iters - warmup_factor = self.warmup_factor * (1 - alpha) + alpha - return [ - base_lr * - warmup_factor * - self.gamma ** bisect_right(self.milestones, self.last_epoch) - for base_lr in self.base_lrs - ] diff --git a/references/video_classification/train.py b/references/video_classification/train.py index 8e41f9ec474..a03a9722003 100644 --- a/references/video_classification/train.py +++ b/references/video_classification/train.py @@ -1,88 +1,126 @@ -from __future__ import print_function import datetime import os import time -import sys +import warnings +import datasets +import presets import torch import torch.utils.data -from torch.utils.data.dataloader import default_collate -from torch import nn import torchvision import torchvision.datasets.video_utils -from torchvision import transforms -from torchvision.datasets.samplers import DistributedSampler, UniformClipSampler, RandomClipSampler - import utils - -from scheduler import WarmupMultiStepLR -import transforms as T - -try: - from apex import amp -except ImportError: - amp = None +from torch import nn +from torch.utils.data.dataloader import default_collate +from torchvision.datasets.samplers import DistributedSampler, RandomClipSampler, UniformClipSampler -def train_one_epoch(model, criterion, optimizer, lr_scheduler, data_loader, device, epoch, print_freq, apex=False): +def train_one_epoch(model, criterion, optimizer, lr_scheduler, data_loader, device, epoch, print_freq, scaler=None): model.train() metric_logger = utils.MetricLogger(delimiter=" ") - metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value}')) - metric_logger.add_meter('clips/s', utils.SmoothedValue(window_size=10, fmt='{value:.3f}')) + metric_logger.add_meter("lr", utils.SmoothedValue(window_size=1, fmt="{value}")) + metric_logger.add_meter("clips/s", utils.SmoothedValue(window_size=10, fmt="{value:.3f}")) - header = 'Epoch: [{}]'.format(epoch) - for video, target in metric_logger.log_every(data_loader, print_freq, header): + header = f"Epoch: [{epoch}]" + for video, target, _ in metric_logger.log_every(data_loader, print_freq, header): start_time = time.time() video, target = video.to(device), target.to(device) - output = model(video) - loss = criterion(output, target) + with torch.cuda.amp.autocast(enabled=scaler is not None): + output = model(video) + loss = criterion(output, target) optimizer.zero_grad() - if apex: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() + + if scaler is not None: + scaler.scale(loss).backward() + scaler.step(optimizer) + scaler.update() else: loss.backward() - optimizer.step() + optimizer.step() acc1, acc5 = utils.accuracy(output, target, topk=(1, 5)) batch_size = video.shape[0] metric_logger.update(loss=loss.item(), lr=optimizer.param_groups[0]["lr"]) - metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) - metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) - metric_logger.meters['clips/s'].update(batch_size / (time.time() - start_time)) + metric_logger.meters["acc1"].update(acc1.item(), n=batch_size) + metric_logger.meters["acc5"].update(acc5.item(), n=batch_size) + metric_logger.meters["clips/s"].update(batch_size / (time.time() - start_time)) lr_scheduler.step() def evaluate(model, criterion, data_loader, device): model.eval() metric_logger = utils.MetricLogger(delimiter=" ") - header = 'Test:' - with torch.no_grad(): - for video, target in metric_logger.log_every(data_loader, 100, header): + header = "Test:" + num_processed_samples = 0 + # Group and aggregate output of a video + num_videos = len(data_loader.dataset.samples) + num_classes = len(data_loader.dataset.classes) + agg_preds = torch.zeros((num_videos, num_classes), dtype=torch.float32, device=device) + agg_targets = torch.zeros((num_videos), dtype=torch.int32, device=device) + with torch.inference_mode(): + for video, target, video_idx in metric_logger.log_every(data_loader, 100, header): video = video.to(device, non_blocking=True) target = target.to(device, non_blocking=True) output = model(video) loss = criterion(output, target) + # Use softmax to convert output into prediction probability + preds = torch.softmax(output, dim=1) + for b in range(video.size(0)): + idx = video_idx[b].item() + agg_preds[idx] += preds[b].detach() + agg_targets[idx] = target[b].detach().item() + acc1, acc5 = utils.accuracy(output, target, topk=(1, 5)) # FIXME need to take into account that the datasets # could have been padded in distributed setup batch_size = video.shape[0] metric_logger.update(loss=loss.item()) - metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) - metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) + metric_logger.meters["acc1"].update(acc1.item(), n=batch_size) + metric_logger.meters["acc5"].update(acc5.item(), n=batch_size) + num_processed_samples += batch_size # gather the stats from all processes + num_processed_samples = utils.reduce_across_processes(num_processed_samples) + if isinstance(data_loader.sampler, DistributedSampler): + # Get the len of UniformClipSampler inside DistributedSampler + num_data_from_sampler = len(data_loader.sampler.dataset) + else: + num_data_from_sampler = len(data_loader.sampler) + + if ( + hasattr(data_loader.dataset, "__len__") + and num_data_from_sampler != num_processed_samples + and torch.distributed.get_rank() == 0 + ): + # See FIXME above + warnings.warn( + f"It looks like the sampler has {num_data_from_sampler} samples, but {num_processed_samples} " + "samples were used for the validation, which might bias the results. " + "Try adjusting the batch size and / or the world size. " + "Setting the world size to 1 is always a safe bet." + ) + metric_logger.synchronize_between_processes() - print(' * Clip Acc@1 {top1.global_avg:.3f} Clip Acc@5 {top5.global_avg:.3f}' - .format(top1=metric_logger.acc1, top5=metric_logger.acc5)) + print( + " * Clip Acc@1 {top1.global_avg:.3f} Clip Acc@5 {top5.global_avg:.3f}".format( + top1=metric_logger.acc1, top5=metric_logger.acc5 + ) + ) + # Reduce the agg_preds and agg_targets from all gpu and show result + agg_preds = utils.reduce_across_processes(agg_preds) + agg_targets = utils.reduce_across_processes(agg_targets, op=torch.distributed.ReduceOp.MAX) + agg_acc1, agg_acc5 = utils.accuracy(agg_preds, agg_targets, topk=(1, 5)) + print(" * Video Acc@1 {acc1:.3f} Video Acc@5 {acc5:.3f}".format(acc1=agg_acc1, acc5=agg_acc5)) return metric_logger.acc1.global_avg -def _get_cache_path(filepath): +def _get_cache_path(filepath, args): import hashlib - h = hashlib.sha1(filepath.encode()).hexdigest() + + value = f"{filepath}-{args.clip_len}-{args.kinetics_version}-{args.frame_rate}" + h = hashlib.sha1(value.encode()).hexdigest() cache_path = os.path.join("~", ".torch", "vision", "datasets", "kinetics", h[:10] + ".pt") cache_path = os.path.expanduser(cache_path) return cache_path @@ -90,97 +128,100 @@ def _get_cache_path(filepath): def collate_fn(batch): # remove audio from the batch - batch = [(d[0], d[2]) for d in batch] + batch = [(d[0], d[2], d[3]) for d in batch] return default_collate(batch) def main(args): - if args.apex: - if sys.version_info < (3, 0): - raise RuntimeError("Apex currently only supports Python 3. Aborting.") - if amp is None: - raise RuntimeError("Failed to import apex. Please install apex from https://www.github.com/nvidia/apex " - "to enable mixed-precision training.") - if args.output_dir: utils.mkdir(args.output_dir) utils.init_distributed_mode(args) print(args) - print("torch version: ", torch.__version__) - print("torchvision version: ", torchvision.__version__) device = torch.device(args.device) - torch.backends.cudnn.benchmark = True + if args.use_deterministic_algorithms: + torch.backends.cudnn.benchmark = False + torch.use_deterministic_algorithms(True) + else: + torch.backends.cudnn.benchmark = True # Data loading code print("Loading data") - traindir = os.path.join(args.data_path, 'train_avi-480p') - valdir = os.path.join(args.data_path, 'val_avi-480p') - normalize = T.Normalize(mean=[0.43216, 0.394666, 0.37645], - std=[0.22803, 0.22145, 0.216989]) + val_resize_size = tuple(args.val_resize_size) + val_crop_size = tuple(args.val_crop_size) + train_resize_size = tuple(args.train_resize_size) + train_crop_size = tuple(args.train_crop_size) + + traindir = os.path.join(args.data_path, "train") + valdir = os.path.join(args.data_path, "val") print("Loading training data") st = time.time() - cache_path = _get_cache_path(traindir) - transform_train = torchvision.transforms.Compose([ - T.ToFloatTensorInZeroOne(), - T.Resize((128, 171)), - T.RandomHorizontalFlip(), - normalize, - T.RandomCrop((112, 112)) - ]) + cache_path = _get_cache_path(traindir, args) + transform_train = presets.VideoClassificationPresetTrain(crop_size=train_crop_size, resize_size=train_resize_size) if args.cache_dataset and os.path.exists(cache_path): - print("Loading dataset_train from {}".format(cache_path)) - dataset, _ = torch.load(cache_path) + print(f"Loading dataset_train from {cache_path}") + dataset, _ = torch.load(cache_path, weights_only=False) dataset.transform = transform_train else: if args.distributed: - print("It is recommended to pre-compute the dataset cache " - "on a single-gpu first, as it will be faster") - dataset = torchvision.datasets.Kinetics400( - traindir, + print("It is recommended to pre-compute the dataset cache on a single-gpu first, as it will be faster") + dataset = datasets.KineticsWithVideoId( + args.data_path, frames_per_clip=args.clip_len, + num_classes=args.kinetics_version, + split="train", step_between_clips=1, transform=transform_train, - frame_rate=15 + frame_rate=args.frame_rate, + extensions=( + "avi", + "mp4", + ), + output_format="TCHW", ) if args.cache_dataset: - print("Saving dataset_train to {}".format(cache_path)) + print(f"Saving dataset_train to {cache_path}") utils.mkdir(os.path.dirname(cache_path)) utils.save_on_master((dataset, traindir), cache_path) print("Took", time.time() - st) print("Loading validation data") - cache_path = _get_cache_path(valdir) + cache_path = _get_cache_path(valdir, args) - transform_test = torchvision.transforms.Compose([ - T.ToFloatTensorInZeroOne(), - T.Resize((128, 171)), - normalize, - T.CenterCrop((112, 112)) - ]) + if args.weights and args.test_only: + weights = torchvision.models.get_weight(args.weights) + transform_test = weights.transforms() + else: + transform_test = presets.VideoClassificationPresetEval(crop_size=val_crop_size, resize_size=val_resize_size) if args.cache_dataset and os.path.exists(cache_path): - print("Loading dataset_test from {}".format(cache_path)) - dataset_test, _ = torch.load(cache_path) + print(f"Loading dataset_test from {cache_path}") + dataset_test, _ = torch.load(cache_path, weights_only=False) dataset_test.transform = transform_test else: if args.distributed: - print("It is recommended to pre-compute the dataset cache " - "on a single-gpu first, as it will be faster") - dataset_test = torchvision.datasets.Kinetics400( - valdir, + print("It is recommended to pre-compute the dataset cache on a single-gpu first, as it will be faster") + dataset_test = datasets.KineticsWithVideoId( + args.data_path, frames_per_clip=args.clip_len, + num_classes=args.kinetics_version, + split="val", step_between_clips=1, transform=transform_test, - frame_rate=15 + frame_rate=args.frame_rate, + extensions=( + "avi", + "mp4", + ), + output_format="TCHW", ) if args.cache_dataset: - print("Saving dataset_test to {}".format(cache_path)) + print(f"Saving dataset_test to {cache_path}") utils.mkdir(os.path.dirname(cache_path)) utils.save_on_master((dataset_test, valdir), cache_path) @@ -189,42 +230,64 @@ def main(args): test_sampler = UniformClipSampler(dataset_test.video_clips, args.clips_per_video) if args.distributed: train_sampler = DistributedSampler(train_sampler) - test_sampler = DistributedSampler(test_sampler) + test_sampler = DistributedSampler(test_sampler, shuffle=False) data_loader = torch.utils.data.DataLoader( - dataset, batch_size=args.batch_size, - sampler=train_sampler, num_workers=args.workers, - pin_memory=True, collate_fn=collate_fn) + dataset, + batch_size=args.batch_size, + sampler=train_sampler, + num_workers=args.workers, + pin_memory=True, + collate_fn=collate_fn, + ) data_loader_test = torch.utils.data.DataLoader( - dataset_test, batch_size=args.batch_size, - sampler=test_sampler, num_workers=args.workers, - pin_memory=True, collate_fn=collate_fn) + dataset_test, + batch_size=args.batch_size, + sampler=test_sampler, + num_workers=args.workers, + pin_memory=True, + collate_fn=collate_fn, + ) print("Creating model") - model = torchvision.models.video.__dict__[args.model](pretrained=args.pretrained) + model = torchvision.models.get_model(args.model, weights=args.weights) model.to(device) if args.distributed and args.sync_bn: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) criterion = nn.CrossEntropyLoss() - lr = args.lr * args.world_size - optimizer = torch.optim.SGD( - model.parameters(), lr=lr, momentum=args.momentum, weight_decay=args.weight_decay) - - if args.apex: - model, optimizer = amp.initialize(model, optimizer, - opt_level=args.apex_opt_level - ) + optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) + scaler = torch.cuda.amp.GradScaler() if args.amp else None # convert scheduler to be per iteration, not per epoch, for warmup that lasts # between different epochs - warmup_iters = args.lr_warmup_epochs * len(data_loader) - lr_milestones = [len(data_loader) * m for m in args.lr_milestones] - lr_scheduler = WarmupMultiStepLR( - optimizer, milestones=lr_milestones, gamma=args.lr_gamma, - warmup_iters=warmup_iters, warmup_factor=1e-5) + iters_per_epoch = len(data_loader) + lr_milestones = [iters_per_epoch * (m - args.lr_warmup_epochs) for m in args.lr_milestones] + main_lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=lr_milestones, gamma=args.lr_gamma) + + if args.lr_warmup_epochs > 0: + warmup_iters = iters_per_epoch * args.lr_warmup_epochs + args.lr_warmup_method = args.lr_warmup_method.lower() + if args.lr_warmup_method == "linear": + warmup_lr_scheduler = torch.optim.lr_scheduler.LinearLR( + optimizer, start_factor=args.lr_warmup_decay, total_iters=warmup_iters + ) + elif args.lr_warmup_method == "constant": + warmup_lr_scheduler = torch.optim.lr_scheduler.ConstantLR( + optimizer, factor=args.lr_warmup_decay, total_iters=warmup_iters + ) + else: + raise RuntimeError( + f"Invalid warmup lr method '{args.lr_warmup_method}'. Only linear and constant are supported." + ) + + lr_scheduler = torch.optim.lr_scheduler.SequentialLR( + optimizer, schedulers=[warmup_lr_scheduler, main_lr_scheduler], milestones=[warmup_iters] + ) + else: + lr_scheduler = main_lr_scheduler model_without_ddp = model if args.distributed: @@ -232,13 +295,18 @@ def main(args): model_without_ddp = model.module if args.resume: - checkpoint = torch.load(args.resume, map_location='cpu') - model_without_ddp.load_state_dict(checkpoint['model']) - optimizer.load_state_dict(checkpoint['optimizer']) - lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) - args.start_epoch = checkpoint['epoch'] + 1 + checkpoint = torch.load(args.resume, map_location="cpu", weights_only=True) + model_without_ddp.load_state_dict(checkpoint["model"]) + optimizer.load_state_dict(checkpoint["optimizer"]) + lr_scheduler.load_state_dict(checkpoint["lr_scheduler"]) + args.start_epoch = checkpoint["epoch"] + 1 + if args.amp: + scaler.load_state_dict(checkpoint["scaler"]) if args.test_only: + # We disable the cudnn benchmarking because it can noticeably affect the accuracy + torch.backends.cudnn.benchmark = False + torch.backends.cudnn.deterministic = True evaluate(model, criterion, data_loader_test, device=device) return @@ -247,58 +315,69 @@ def main(args): for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) - train_one_epoch(model, criterion, optimizer, lr_scheduler, data_loader, - device, epoch, args.print_freq, args.apex) + train_one_epoch(model, criterion, optimizer, lr_scheduler, data_loader, device, epoch, args.print_freq, scaler) evaluate(model, criterion, data_loader_test, device=device) if args.output_dir: checkpoint = { - 'model': model_without_ddp.state_dict(), - 'optimizer': optimizer.state_dict(), - 'lr_scheduler': lr_scheduler.state_dict(), - 'epoch': epoch, - 'args': args} - utils.save_on_master( - checkpoint, - os.path.join(args.output_dir, 'model_{}.pth'.format(epoch))) - utils.save_on_master( - checkpoint, - os.path.join(args.output_dir, 'checkpoint.pth')) + "model": model_without_ddp.state_dict(), + "optimizer": optimizer.state_dict(), + "lr_scheduler": lr_scheduler.state_dict(), + "epoch": epoch, + "args": args, + } + if args.amp: + checkpoint["scaler"] = scaler.state_dict() + utils.save_on_master(checkpoint, os.path.join(args.output_dir, f"model_{epoch}.pth")) + utils.save_on_master(checkpoint, os.path.join(args.output_dir, "checkpoint.pth")) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) - print('Training time {}'.format(total_time_str)) + print(f"Training time {total_time_str}") -def parse_args(): +def get_args_parser(add_help=True): import argparse - parser = argparse.ArgumentParser(description='PyTorch Classification Training') - - parser.add_argument('--data-path', default='/datasets01_101/kinetics/070618/', help='dataset') - parser.add_argument('--model', default='r2plus1d_18', help='model') - parser.add_argument('--device', default='cuda', help='device') - parser.add_argument('--clip-len', default=16, type=int, metavar='N', - help='number of frames per clip') - parser.add_argument('--clips-per-video', default=5, type=int, metavar='N', - help='maximum number of clips per video to consider') - parser.add_argument('-b', '--batch-size', default=24, type=int) - parser.add_argument('--epochs', default=45, type=int, metavar='N', - help='number of total epochs to run') - parser.add_argument('-j', '--workers', default=10, type=int, metavar='N', - help='number of data loading workers (default: 16)') - parser.add_argument('--lr', default=0.01, type=float, help='initial learning rate') - parser.add_argument('--momentum', default=0.9, type=float, metavar='M', - help='momentum') - parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, - metavar='W', help='weight decay (default: 1e-4)', - dest='weight_decay') - parser.add_argument('--lr-milestones', nargs='+', default=[20, 30, 40], type=int, help='decrease lr on milestones') - parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma') - parser.add_argument('--lr-warmup-epochs', default=10, type=int, help='number of warmup epochs') - parser.add_argument('--print-freq', default=10, type=int, help='print frequency') - parser.add_argument('--output-dir', default='.', help='path where to save') - parser.add_argument('--resume', default='', help='resume from checkpoint') - parser.add_argument('--start-epoch', default=0, type=int, metavar='N', - help='start epoch') + + parser = argparse.ArgumentParser(description="PyTorch Video Classification Training", add_help=add_help) + + parser.add_argument("--data-path", default="/datasets01_101/kinetics/070618/", type=str, help="dataset path") + parser.add_argument( + "--kinetics-version", default="400", type=str, choices=["400", "600"], help="Select kinetics version" + ) + parser.add_argument("--model", default="r2plus1d_18", type=str, help="model name") + parser.add_argument("--device", default="cuda", type=str, help="device (Use cuda or cpu Default: cuda)") + parser.add_argument("--clip-len", default=16, type=int, metavar="N", help="number of frames per clip") + parser.add_argument("--frame-rate", default=15, type=int, metavar="N", help="the frame rate") + parser.add_argument( + "--clips-per-video", default=5, type=int, metavar="N", help="maximum number of clips per video to consider" + ) + parser.add_argument( + "-b", "--batch-size", default=24, type=int, help="images per gpu, the total batch size is $NGPU x batch_size" + ) + parser.add_argument("--epochs", default=45, type=int, metavar="N", help="number of total epochs to run") + parser.add_argument( + "-j", "--workers", default=10, type=int, metavar="N", help="number of data loading workers (default: 10)" + ) + parser.add_argument("--lr", default=0.64, type=float, help="initial learning rate") + parser.add_argument("--momentum", default=0.9, type=float, metavar="M", help="momentum") + parser.add_argument( + "--wd", + "--weight-decay", + default=1e-4, + type=float, + metavar="W", + help="weight decay (default: 1e-4)", + dest="weight_decay", + ) + parser.add_argument("--lr-milestones", nargs="+", default=[20, 30, 40], type=int, help="decrease lr on milestones") + parser.add_argument("--lr-gamma", default=0.1, type=float, help="decrease lr by a factor of lr-gamma") + parser.add_argument("--lr-warmup-epochs", default=10, type=int, help="the number of epochs to warmup (default: 10)") + parser.add_argument("--lr-warmup-method", default="linear", type=str, help="the warmup method (default: linear)") + parser.add_argument("--lr-warmup-decay", default=0.001, type=float, help="the decay for lr") + parser.add_argument("--print-freq", default=10, type=int, help="print frequency") + parser.add_argument("--output-dir", default=".", type=str, help="path to save outputs") + parser.add_argument("--resume", default="", type=str, help="path of checkpoint") + parser.add_argument("--start-epoch", default=0, type=int, metavar="N", help="start epoch") parser.add_argument( "--cache-dataset", dest="cache_dataset", @@ -318,31 +397,50 @@ def parse_args(): action="store_true", ) parser.add_argument( - "--pretrained", - dest="pretrained", - help="Use pre-trained models from the modelzoo", - action="store_true", + "--use-deterministic-algorithms", action="store_true", help="Forces the use of deterministic algorithms only." ) - # Mixed precision training parameters - parser.add_argument('--apex', action='store_true', - help='Use apex for mixed precision training') - parser.add_argument('--apex-opt-level', default='O1', type=str, - help='For apex mixed precision training' - 'O0 for FP32 training, O1 for mixed precision training.' - 'For further detail, see https://github.com/NVIDIA/apex/tree/master/examples/imagenet' - ) - # distributed training parameters - parser.add_argument('--world-size', default=1, type=int, - help='number of distributed processes') - parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training') + parser.add_argument("--world-size", default=1, type=int, help="number of distributed processes") + parser.add_argument("--dist-url", default="env://", type=str, help="url used to set up distributed training") - args = parser.parse_args() + parser.add_argument( + "--val-resize-size", + default=(128, 171), + nargs="+", + type=int, + help="the resize size used for validation (default: (128, 171))", + ) + parser.add_argument( + "--val-crop-size", + default=(112, 112), + nargs="+", + type=int, + help="the central crop size used for validation (default: (112, 112))", + ) + parser.add_argument( + "--train-resize-size", + default=(128, 171), + nargs="+", + type=int, + help="the resize size used for training (default: (128, 171))", + ) + parser.add_argument( + "--train-crop-size", + default=(112, 112), + nargs="+", + type=int, + help="the random crop size used for training (default: (112, 112))", + ) + + parser.add_argument("--weights", default=None, type=str, help="the weights enum name to load") + + # Mixed precision training parameters + parser.add_argument("--amp", action="store_true", help="Use torch.cuda.amp for mixed precision training") - return args + return parser if __name__ == "__main__": - args = parse_args() + args = get_args_parser().parse_args() main(args) diff --git a/references/video_classification/transforms.py b/references/video_classification/transforms.py index 9435450c4b3..2a7cc2a4a66 100644 --- a/references/video_classification/transforms.py +++ b/references/video_classification/transforms.py @@ -1,122 +1,9 @@ import torch -import random +import torch.nn as nn -def crop(vid, i, j, h, w): - return vid[..., i:(i + h), j:(j + w)] +class ConvertBCHWtoCBHW(nn.Module): + """Convert tensor from (B, C, H, W) to (C, B, H, W)""" - -def center_crop(vid, output_size): - h, w = vid.shape[-2:] - th, tw = output_size - - i = int(round((h - th) / 2.)) - j = int(round((w - tw) / 2.)) - return crop(vid, i, j, th, tw) - - -def hflip(vid): - return vid.flip(dims=(-1,)) - - -# NOTE: for those functions, which generally expect mini-batches, we keep them -# as non-minibatch so that they are applied as if they were 4d (thus image). -# this way, we only apply the transformation in the spatial domain -def resize(vid, size, interpolation='bilinear'): - # NOTE: using bilinear interpolation because we don't work on minibatches - # at this level - scale = None - if isinstance(size, int): - scale = float(size) / min(vid.shape[-2:]) - size = None - return torch.nn.functional.interpolate( - vid, size=size, scale_factor=scale, mode=interpolation, align_corners=False) - - -def pad(vid, padding, fill=0, padding_mode="constant"): - # NOTE: don't want to pad on temporal dimension, so let as non-batch - # (4d) before padding. This works as expected - return torch.nn.functional.pad(vid, padding, value=fill, mode=padding_mode) - - -def to_normalized_float_tensor(vid): - return vid.permute(3, 0, 1, 2).to(torch.float32) / 255 - - -def normalize(vid, mean, std): - shape = (-1,) + (1,) * (vid.dim() - 1) - mean = torch.as_tensor(mean).reshape(shape) - std = torch.as_tensor(std).reshape(shape) - return (vid - mean) / std - - -# Class interface - -class RandomCrop(object): - def __init__(self, size): - self.size = size - - @staticmethod - def get_params(vid, output_size): - """Get parameters for ``crop`` for a random crop. - """ - h, w = vid.shape[-2:] - th, tw = output_size - if w == tw and h == th: - return 0, 0, h, w - i = random.randint(0, h - th) - j = random.randint(0, w - tw) - return i, j, th, tw - - def __call__(self, vid): - i, j, h, w = self.get_params(vid, self.size) - return crop(vid, i, j, h, w) - - -class CenterCrop(object): - def __init__(self, size): - self.size = size - - def __call__(self, vid): - return center_crop(vid, self.size) - - -class Resize(object): - def __init__(self, size): - self.size = size - - def __call__(self, vid): - return resize(vid, self.size) - - -class ToFloatTensorInZeroOne(object): - def __call__(self, vid): - return to_normalized_float_tensor(vid) - - -class Normalize(object): - def __init__(self, mean, std): - self.mean = mean - self.std = std - - def __call__(self, vid): - return normalize(vid, self.mean, self.std) - - -class RandomHorizontalFlip(object): - def __init__(self, p=0.5): - self.p = p - - def __call__(self, vid): - if random.random() < self.p: - return hflip(vid) - return vid - - -class Pad(object): - def __init__(self, padding, fill=0): - self.padding = padding - self.fill = fill - - def __call__(self, vid): - return pad(vid, self.padding, self.fill) + def forward(self, vid: torch.Tensor) -> torch.Tensor: + return vid.permute(1, 0, 2, 3) diff --git a/references/video_classification/utils.py b/references/video_classification/utils.py index 5ea6dfef341..934f62f66ae 100644 --- a/references/video_classification/utils.py +++ b/references/video_classification/utils.py @@ -1,15 +1,14 @@ -from __future__ import print_function -from collections import defaultdict, deque import datetime +import errno +import os import time +from collections import defaultdict, deque + import torch import torch.distributed as dist -import errno -import os - -class SmoothedValue(object): +class SmoothedValue: """Track a series of values and provide access to smoothed values over a window or the global series average. """ @@ -31,11 +30,7 @@ def synchronize_between_processes(self): """ Warning: does not synchronize the deque! """ - if not is_dist_avail_and_initialized(): - return - t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda') - dist.barrier() - dist.all_reduce(t) + t = reduce_across_processes([self.count, self.total]) t = t.tolist() self.count = int(t[0]) self.total = t[1] @@ -64,14 +59,11 @@ def value(self): def __str__(self): return self.fmt.format( - median=self.median, - avg=self.avg, - global_avg=self.global_avg, - max=self.max, - value=self.value) + median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value + ) -class MetricLogger(object): +class MetricLogger: def __init__(self, delimiter="\t"): self.meters = defaultdict(SmoothedValue) self.delimiter = delimiter @@ -80,7 +72,10 @@ def update(self, **kwargs): for k, v in kwargs.items(): if isinstance(v, torch.Tensor): v = v.item() - assert isinstance(v, (float, int)) + if not isinstance(v, (float, int)): + raise TypeError( + f"This method expects the value of the input arguments to be of type float or int, instead got {type(v)}" + ) self.meters[k].update(v) def __getattr__(self, attr): @@ -88,15 +83,12 @@ def __getattr__(self, attr): return self.meters[attr] if attr in self.__dict__: return self.__dict__[attr] - raise AttributeError("'{}' object has no attribute '{}'".format( - type(self).__name__, attr)) + raise AttributeError(f"'{type(self).__name__}' object has no attribute '{attr}'") def __str__(self): loss_str = [] for name, meter in self.meters.items(): - loss_str.append( - "{}: {}".format(name, str(meter)) - ) + loss_str.append(f"{name}: {str(meter)}") return self.delimiter.join(loss_str) def synchronize_between_processes(self): @@ -109,31 +101,28 @@ def add_meter(self, name, meter): def log_every(self, iterable, print_freq, header=None): i = 0 if not header: - header = '' + header = "" start_time = time.time() end = time.time() - iter_time = SmoothedValue(fmt='{avg:.4f}') - data_time = SmoothedValue(fmt='{avg:.4f}') - space_fmt = ':' + str(len(str(len(iterable)))) + 'd' + iter_time = SmoothedValue(fmt="{avg:.4f}") + data_time = SmoothedValue(fmt="{avg:.4f}") + space_fmt = ":" + str(len(str(len(iterable)))) + "d" if torch.cuda.is_available(): - log_msg = self.delimiter.join([ - header, - '[{0' + space_fmt + '}/{1}]', - 'eta: {eta}', - '{meters}', - 'time: {time}', - 'data: {data}', - 'max mem: {memory:.0f}' - ]) + log_msg = self.delimiter.join( + [ + header, + "[{0" + space_fmt + "}/{1}]", + "eta: {eta}", + "{meters}", + "time: {time}", + "data: {data}", + "max mem: {memory:.0f}", + ] + ) else: - log_msg = self.delimiter.join([ - header, - '[{0' + space_fmt + '}/{1}]', - 'eta: {eta}', - '{meters}', - 'time: {time}', - 'data: {data}' - ]) + log_msg = self.delimiter.join( + [header, "[{0" + space_fmt + "}/{1}]", "eta: {eta}", "{meters}", "time: {time}", "data: {data}"] + ) MB = 1024.0 * 1024.0 for obj in iterable: data_time.update(time.time() - end) @@ -143,26 +132,33 @@ def log_every(self, iterable, print_freq, header=None): eta_seconds = iter_time.global_avg * (len(iterable) - i) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if torch.cuda.is_available(): - print(log_msg.format( - i, len(iterable), eta=eta_string, - meters=str(self), - time=str(iter_time), data=str(data_time), - memory=torch.cuda.max_memory_allocated() / MB)) + print( + log_msg.format( + i, + len(iterable), + eta=eta_string, + meters=str(self), + time=str(iter_time), + data=str(data_time), + memory=torch.cuda.max_memory_allocated() / MB, + ) + ) else: - print(log_msg.format( - i, len(iterable), eta=eta_string, - meters=str(self), - time=str(iter_time), data=str(data_time))) + print( + log_msg.format( + i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time) + ) + ) i += 1 end = time.time() total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) - print('{} Total time: {}'.format(header, total_time_str)) + print(f"{header} Total time: {total_time_str}") def accuracy(output, target, topk=(1,)): """Computes the accuracy over the k top predictions for the specified values of k""" - with torch.no_grad(): + with torch.inference_mode(): maxk = max(topk) batch_size = target.size(0) @@ -190,10 +186,11 @@ def setup_for_distributed(is_master): This function disables printing when not in master process """ import builtins as __builtin__ + builtin_print = __builtin__.print def print(*args, **kwargs): - force = kwargs.pop('force', False) + force = kwargs.pop("force", False) if is_master or force: builtin_print(*args, **kwargs) @@ -230,26 +227,38 @@ def save_on_master(*args, **kwargs): def init_distributed_mode(args): - if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: + if "RANK" in os.environ and "WORLD_SIZE" in os.environ: args.rank = int(os.environ["RANK"]) - args.world_size = int(os.environ['WORLD_SIZE']) - args.gpu = int(os.environ['LOCAL_RANK']) - elif 'SLURM_PROCID' in os.environ: - args.rank = int(os.environ['SLURM_PROCID']) + args.world_size = int(os.environ["WORLD_SIZE"]) + args.gpu = int(os.environ["LOCAL_RANK"]) + elif "SLURM_PROCID" in os.environ: + args.rank = int(os.environ["SLURM_PROCID"]) args.gpu = args.rank % torch.cuda.device_count() elif hasattr(args, "rank"): pass else: - print('Not using distributed mode') + print("Not using distributed mode") args.distributed = False return args.distributed = True torch.cuda.set_device(args.gpu) - args.dist_backend = 'nccl' - print('| distributed init (rank {}): {}'.format( - args.rank, args.dist_url), flush=True) - torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, - world_size=args.world_size, rank=args.rank) + args.dist_backend = "nccl" + print(f"| distributed init (rank {args.rank}): {args.dist_url}", flush=True) + torch.distributed.init_process_group( + backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank + ) + torch.distributed.barrier() setup_for_distributed(args.rank == 0) + + +def reduce_across_processes(val, op=dist.ReduceOp.SUM): + if not is_dist_avail_and_initialized(): + # nothing to sync, but we still convert to tensor for consistency with the distributed case. + return torch.tensor(val) + + t = torch.tensor(val, device="cuda") + dist.barrier() + dist.all_reduce(t, op=op) + return t diff --git a/scripts/README.rst b/scripts/README.rst new file mode 100644 index 00000000000..23247e34178 --- /dev/null +++ b/scripts/README.rst @@ -0,0 +1,23 @@ +Utility scripts +=============== + +* `fbcode_to_main_sync.sh` + +This shell script is used to synchronise internal changes with the main repository. + +To run this script: + +.. code:: bash + + chmod +x fbcode_to_main_sync.sh + ./fbcode_to_main_sync.sh + +where + +``commit_hash`` represents the commit hash in fbsync branch from where we should start the sync. + +``fork_name`` is the name of the remote corresponding to your fork, you can check it by doing `"git remote -v"`. + +``fork_main_branch`` (optional) is the name of the main branch on your fork(default="main"). + +This script will create PRs corresponding to the commits in fbsync. Please review these, add the [FBcode->GH] prefix on the title and publish them. Most importantly, add the [FBcode->GH] prefix at the beginning of the merge message as well. diff --git a/scripts/collect_model_urls.py b/scripts/collect_model_urls.py new file mode 100644 index 00000000000..2acba6cbbda --- /dev/null +++ b/scripts/collect_model_urls.py @@ -0,0 +1,20 @@ +import pathlib +import re +import sys + +MODEL_URL_PATTERN = re.compile(r"https://download[.]pytorch[.]org/models/.+?[.]pth") + + +def main(*roots): + model_urls = set() + for root in roots: + for path in pathlib.Path(root).rglob("*.py"): + with open(path, "r") as file: + for line in file: + model_urls.update(MODEL_URL_PATTERN.findall(line)) + + print("\n".join(sorted(model_urls))) + + +if __name__ == "__main__": + main(*sys.argv[1:]) diff --git a/scripts/download_model_urls.py b/scripts/download_model_urls.py new file mode 100644 index 00000000000..f5f53d71e98 --- /dev/null +++ b/scripts/download_model_urls.py @@ -0,0 +1,41 @@ +import asyncio +import sys +from pathlib import Path +from time import perf_counter +from urllib.parse import urlsplit + +import aiofiles +import aiohttp +from torchvision import models +from tqdm.asyncio import tqdm + + +async def main(download_root): + download_root.mkdir(parents=True, exist_ok=True) + urls = {weight.url for name in models.list_models() for weight in iter(models.get_model_weights(name))} + + async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=None)) as session: + await tqdm.gather(*[download(download_root, session, url) for url in urls]) + + +async def download(download_root, session, url): + response = await session.get(url, params=dict(source="ci")) + + assert response.ok + + file_name = Path(urlsplit(url).path).name + async with aiofiles.open(download_root / file_name, "wb") as f: + async for data in response.content.iter_any(): + await f.write(data) + + +if __name__ == "__main__": + download_root = ( + (Path(sys.argv[1]) if len(sys.argv) > 1 else Path("~/.cache/torch/hub/checkpoints")).expanduser().resolve() + ) + print(f"Downloading model weights to {download_root}") + start = perf_counter() + asyncio.get_event_loop().run_until_complete(main(download_root)) + stop = perf_counter() + minutes, seconds = divmod(stop - start, 60) + print(f"Download took {minutes:2.0f}m {seconds:2.0f}s") diff --git a/scripts/fbcode_to_main_sync.sh b/scripts/fbcode_to_main_sync.sh new file mode 100755 index 00000000000..c08d61690da --- /dev/null +++ b/scripts/fbcode_to_main_sync.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +if [ -z $1 ] +then + echo "Commit hash is required to be passed when running this script." + echo "./fbcode_to_main_sync.sh " + exit 1 +fi +commit_hash=$1 + +if [ -z $2 ] +then + echo "Fork name is required to be passed when running this script." + echo "./fbcode_to_main_sync.sh " + exit 1 +fi +fork_name=$2 + +if [ -z $3 ] +then + fork_main_branch="main" +else + fork_main_branch=$3 +fi + +from_branch="fbsync" +git stash +git checkout $from_branch +git pull +# Add random prefix in the new branch name to keep it unique per run +prefix=$RANDOM +IFS=' +' +for line in $(git log --pretty=oneline "$commit_hash"..HEAD) +do + if [[ $line != *\[fbsync\]* ]] + then + echo "Parsing $line" + hash=$(echo $line | cut -f1 -d' ') + git checkout $fork_main_branch + git checkout -B cherrypick_${prefix}_${hash} + git cherry-pick -x "$hash" + git push $fork_name cherrypick_${prefix}_${hash} + git checkout $from_branch + fi +done +echo "Please review the PRs, add [FBCode->GH] prefix in the title and publish them." diff --git a/scripts/release_notes/classify_prs.py b/scripts/release_notes/classify_prs.py new file mode 100644 index 00000000000..5847c9f03f5 --- /dev/null +++ b/scripts/release_notes/classify_prs.py @@ -0,0 +1,120 @@ +# In[1]: +import pandas as pd + +# In[2]: +data_filename = "data.json" +df = pd.read_json(data_filename).T +df.tail() + +# In[3]: +all_labels = {lbl for labels in df["labels"] for lbl in labels} +all_labels + +# In[4]: +# Add one column per label +for label in all_labels: + df[label] = df["labels"].apply(lambda labels_list: label in labels_list) +df.head() + +# In[5]: +# Add a clean "module" column. It contains tuples since PRs can have more than one module. +# Maybe we should include "topics" in that column as well? + +all_modules = { # mapping: full name -> clean name + label: "".join(label.split(" ")[1:]) for label in all_labels if label.startswith("module") +} + +# We use an ugly loop, but whatever ¯\_(ツ)_/¯ +df["module"] = [[] for _ in range(len(df))] +for i, row in df.iterrows(): + for full_name, clean_name in all_modules.items(): + if full_name in row["labels"]: + row["module"].append(clean_name) +df["module"] = df.module.apply(tuple) +df.head() + +# In[6]: +mod_df = df.set_index("module").sort_index() +mod_df.tail() + +# In[7]: +# All improvement PRs +mod_df[mod_df["enhancement"]].head() + +# In[8]: +# improvement f module +# note: don't filter module name on the index as the index contain tuples with non-exclusive values +# Use the boolean column instead +mod_df[mod_df["enhancement"] & mod_df["module: transforms"]] + + +# In[9]: +def format_prs(mod_df, exclude_prototype=True): + out = [] + for idx, row in mod_df.iterrows(): + if exclude_prototype and "prototype" in row and row["prototype"]: + continue + modules = idx + # Put "documentation" and "tests" first for sorting to be dece + for last_module in ("documentation", "tests"): + if last_module in modules: + modules = [m for m in modules if m != last_module] + [last_module] + + module = f"[{', '.join(modules)}]" + module = module.replace("referencescripts", "reference scripts") + module = module.replace("code", "reference scripts") + out.append(f"{module} {row['title']}") + + return "\n".join(out) + + +# In[10]: +included_prs = pd.DataFrame() + +# If labels are accurate, this shouhld generate most of the release notes already +# We keep track of the included PRs to figure out which ones are missing +for section_title, module_idx in ( + ("Backward-incompatible changes", "bc-breaking"), + ("Deprecations", "deprecation"), + ("New Features", "new feature"), + ("Improvements", "enhancement"), + ("Bug Fixes", "bug"), + ("Code Quality", "code quality"), +): + if module_idx in mod_df: + print(f"## {section_title}") + print() + tmp_df = mod_df[mod_df[module_idx]] + included_prs = pd.concat([included_prs, tmp_df]) + print(format_prs(tmp_df)) + print() + + +# In[11]: +# Missing PRs are these ones... classify them manually +missing_prs = pd.concat([mod_df, included_prs]).drop_duplicates(subset="pr_number", keep=False) +print(format_prs(missing_prs)) + +# In[12]: +# Generate list of contributors +print() +print("## Contributors") + +previous_release = "c35d3855ccbfa6a36e6ae6337a1f2c721c1f1e78" +current_release = "5181a854d8b127cf465cd22a67c1b5aaf6ccae05" +print( + f"{{ git shortlog -s {previous_release}..{current_release} | cut -f2- & git log -s {previous_release}..{current_release} | grep Co-authored | cut -f2- -d: | cut -f1 -d\\< | sed 's/^ *//;s/ *//' ; }} | sort --ignore-case | uniq | tr '\\n' ';' | sed 's/;/, /g;s/,//' | fold -s" +) + +# In[13]: +# Utility to extract PR numbers only from multiple lines, useful to bundle all +# the docs changes for example: +import re + +s = """ + +[] Remove unnecessary dependency from macOS/Conda binaries (#8077) +[rocm] [ROCm] remove HCC references (#8070) +""" + +print(", ".join(re.findall("(#\\d+)", s))) diff --git a/scripts/release_notes/retrieve_prs_data.py b/scripts/release_notes/retrieve_prs_data.py new file mode 100644 index 00000000000..fb64902a6af --- /dev/null +++ b/scripts/release_notes/retrieve_prs_data.py @@ -0,0 +1,212 @@ +import json +import locale +import os +import re +import subprocess +from collections import namedtuple +from os.path import expanduser + +import requests + + +Features = namedtuple( + "Features", + [ + "title", + "body", + "pr_number", + "files_changed", + "labels", + ], +) + + +def dict_to_features(dct): + return Features( + title=dct["title"], + body=dct["body"], + pr_number=dct["pr_number"], + files_changed=dct["files_changed"], + labels=dct["labels"], + ) + + +def features_to_dict(features): + return dict(features._asdict()) + + +def run(command): + """Returns (return-code, stdout, stderr)""" + p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) + output, err = p.communicate() + rc = p.returncode + enc = locale.getpreferredencoding() + output = output.decode(enc) + err = err.decode(enc) + return rc, output.strip(), err.strip() + + +def commit_body(commit_hash): + cmd = f"git log -n 1 --pretty=format:%b {commit_hash}" + ret, out, err = run(cmd) + return out if ret == 0 else None + + +def commit_title(commit_hash): + cmd = f"git log -n 1 --pretty=format:%s {commit_hash}" + ret, out, err = run(cmd) + return out if ret == 0 else None + + +def commit_files_changed(commit_hash): + cmd = f"git diff-tree --no-commit-id --name-only -r {commit_hash}" + ret, out, err = run(cmd) + return out.split("\n") if ret == 0 else None + + +def parse_pr_number(body, commit_hash, title): + regex = r"(#[0-9]+)" + matches = re.findall(regex, title) + if len(matches) == 0: + if "revert" not in title.lower() and "updating submodules" not in title.lower(): + print(f"[{commit_hash}: {title}] Could not parse PR number, ignoring PR") + return None + if len(matches) > 1: + print(f"[{commit_hash}: {title}] Got two PR numbers, using the last one") + return matches[-1][1:] + return matches[0][1:] + + +def get_ghstack_token(): + pattern = "github_oauth = (.*)" + with open(expanduser("~/.ghstackrc"), "r+") as f: + config = f.read() + matches = re.findall(pattern, config) + if len(matches) == 0: + raise RuntimeError("Can't find a github oauth token") + return matches[0] + + +token = get_ghstack_token() +headers = {"Authorization": f"token {token}"} + + +def run_query(query): + request = requests.post("https://api.github.com/graphql", json={"query": query}, headers=headers) + if request.status_code == 200: + return request.json() + else: + raise Exception(f"Query failed to run by returning code of {request.status_code}. {query}") + + +def gh_labels(pr_number): + query = f""" + {{ + repository(owner: "pytorch", name: "vision") {{ + pullRequest(number: {pr_number}) {{ + labels(first: 10) {{ + edges {{ + node {{ + name + }} + }} + }} + }} + }} + }} + """ + query = run_query(query) + edges = query["data"]["repository"]["pullRequest"]["labels"]["edges"] + return [edge["node"]["name"] for edge in edges] + + +def get_features(commit_hash, return_dict=False): + title, body, files_changed = ( + commit_title(commit_hash), + commit_body(commit_hash), + commit_files_changed(commit_hash), + ) + pr_number = parse_pr_number(body, commit_hash, title) + labels = [] + if pr_number is not None: + labels = gh_labels(pr_number) + result = Features(title, body, pr_number, files_changed, labels) + if return_dict: + return features_to_dict(result) + return result + + +class CommitDataCache: + def __init__(self, path="results/data.json"): + self.path = path + self.data = {} + if os.path.exists(path): + self.data = self.read_from_disk() + + def get(self, commit): + if commit not in self.data.keys(): + # Fetch and cache the data + self.data[commit] = get_features(commit) + self.write_to_disk() + return self.data[commit] + + def read_from_disk(self): + with open(self.path) as f: + data = json.load(f) + data = {commit: dict_to_features(dct) for commit, dct in data.items()} + return data + + def write_to_disk(self): + data = {commit: features._asdict() for commit, features in self.data.items()} + with open(self.path, "w") as f: + json.dump(data, f) + + +def get_commits_between(base_version, new_version): + cmd = f"git merge-base {base_version} {new_version}" + rc, merge_base, _ = run(cmd) + assert rc == 0 + + # Returns a list of something like + # b33e38ec47 Allow a higher-precision step type for Vec256::arange (#34555) + cmd = f"git log --reverse --oneline {merge_base}..{new_version}" + rc, commits, _ = run(cmd) + assert rc == 0 + + log_lines = commits.split("\n") + hashes, titles = zip(*[log_line.split(" ", 1) for log_line in log_lines]) + return hashes, titles + + +def convert_to_dataframes(feature_list): + import pandas as pd + + df = pd.DataFrame.from_records(feature_list, columns=Features._fields) + return df + + +def main(base_version, new_version): + hashes, titles = get_commits_between(base_version, new_version) + + cdc = CommitDataCache("data.json") + for idx, commit in enumerate(hashes): + if idx % 10 == 0: + print(f"{idx} / {len(hashes)}") + cdc.get(commit) + + return cdc + + +if __name__ == "__main__": + # d = get_features('2ab93592529243862ce8ad5b6acf2628ef8d0dc8') + # print(d) + # hashes, titles = get_commits_between("tags/v0.9.0", "fc852f3b39fe25dd8bf1dedee8f19ea04aa84c15") + + # Usage: change the tags below accordingly to the current release, then save the json with + # cdc.write_to_disk(). + # Then you can use classify_prs.py (as a notebook) + # to open the json and generate the release notes semi-automatically. + cdc = main("tags/v0.9.0", "fc852f3b39fe25dd8bf1dedee8f19ea04aa84c15") + from IPython import embed + + embed() diff --git a/setup.cfg b/setup.cfg index 5b77b5fbce3..0f4ddbfab10 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,12 +2,22 @@ universal=1 [metadata] -license_file = LICENSE +license_files = LICENSE [pep8] max-line-length = 120 [flake8] +# note: we ignore all 501s (line too long) anyway as they're taken care of by black max-line-length = 120 -ignore = F401,E402,F403,W503,W504 +ignore = E203, E402, W503, W504, F821, E501, B, C4, EXE +per-file-ignores = + __init__.py: F401, F403, F405 + ./hubconf.py: F401 + torchvision/models/mobilenet.py: F401, F403 + torchvision/models/quantization/mobilenet.py: F401, F403 + test/smoke_test.py: F401 exclude = venv + +[pydocstyle] +select = D417 # Missing argument descriptions in the docstring diff --git a/setup.py b/setup.py index 8ece63ce739..956682e7ead 100644 --- a/setup.py +++ b/setup.py @@ -1,192 +1,518 @@ -from __future__ import print_function -import os -import io -import re -import sys -from setuptools import setup, find_packages -from pkg_resources import get_distribution, DistributionNotFound -import subprocess import distutils.command.clean import distutils.spawn import glob +import os import shutil +import subprocess +import sys +import warnings +from pathlib import Path import torch -from torch.utils.cpp_extension import BuildExtension, CppExtension, CUDAExtension, CUDA_HOME +from pkg_resources import DistributionNotFound, get_distribution, parse_version +from setuptools import find_packages, setup +from torch.utils.cpp_extension import BuildExtension, CppExtension, CUDA_HOME, CUDAExtension, ROCM_HOME + +FORCE_CUDA = os.getenv("FORCE_CUDA", "0") == "1" +FORCE_MPS = os.getenv("FORCE_MPS", "0") == "1" +DEBUG = os.getenv("DEBUG", "0") == "1" +USE_PNG = os.getenv("TORCHVISION_USE_PNG", "1") == "1" +USE_JPEG = os.getenv("TORCHVISION_USE_JPEG", "1") == "1" +USE_WEBP = os.getenv("TORCHVISION_USE_WEBP", "1") == "1" +USE_NVJPEG = os.getenv("TORCHVISION_USE_NVJPEG", "1") == "1" +NVCC_FLAGS = os.getenv("NVCC_FLAGS", None) +# Note: the GPU video decoding stuff used to be called "video codec", which +# isn't an accurate or descriptive name considering there are at least 2 other +# video deocding backends in torchvision. I'm renaming this to "gpu video +# decoder" where possible, keeping user facing names (like the env var below) to +# the old scheme for BC. +USE_GPU_VIDEO_DECODER = os.getenv("TORCHVISION_USE_VIDEO_CODEC", "1") == "1" +# Same here: "use ffmpeg" was used to denote "use cpu video decoder". +USE_CPU_VIDEO_DECODER = os.getenv("TORCHVISION_USE_FFMPEG", "1") == "1" + +TORCHVISION_INCLUDE = os.environ.get("TORCHVISION_INCLUDE", "") +TORCHVISION_LIBRARY = os.environ.get("TORCHVISION_LIBRARY", "") +TORCHVISION_INCLUDE = TORCHVISION_INCLUDE.split(os.pathsep) if TORCHVISION_INCLUDE else [] +TORCHVISION_LIBRARY = TORCHVISION_LIBRARY.split(os.pathsep) if TORCHVISION_LIBRARY else [] + +ROOT_DIR = Path(__file__).absolute().parent +CSRS_DIR = ROOT_DIR / "torchvision/csrc" +IS_ROCM = (torch.version.hip is not None) and (ROCM_HOME is not None) +BUILD_CUDA_SOURCES = (torch.cuda.is_available() and ((CUDA_HOME is not None) or IS_ROCM)) or FORCE_CUDA + +package_name = os.getenv("TORCHVISION_PACKAGE_NAME", "torchvision") + +print("Torchvision build configuration:") +print(f"{FORCE_CUDA = }") +print(f"{FORCE_MPS = }") +print(f"{DEBUG = }") +print(f"{USE_PNG = }") +print(f"{USE_JPEG = }") +print(f"{USE_WEBP = }") +print(f"{USE_NVJPEG = }") +print(f"{NVCC_FLAGS = }") +print(f"{USE_CPU_VIDEO_DECODER = }") +print(f"{USE_GPU_VIDEO_DECODER = }") +print(f"{TORCHVISION_INCLUDE = }") +print(f"{TORCHVISION_LIBRARY = }") +print(f"{IS_ROCM = }") +print(f"{BUILD_CUDA_SOURCES = }") + + +def get_version(): + with open(ROOT_DIR / "version.txt") as f: + version = f.readline().strip() + sha = "Unknown" - -def read(*names, **kwargs): - with io.open( - os.path.join(os.path.dirname(__file__), *names), - encoding=kwargs.get("encoding", "utf8") - ) as fp: - return fp.read() - - -def get_dist(pkgname): try: - return get_distribution(pkgname) - except DistributionNotFound: - return None - - -version = '0.5.0a0' -sha = 'Unknown' -package_name = 'torchvision' - -cwd = os.path.dirname(os.path.abspath(__file__)) + sha = subprocess.check_output(["git", "rev-parse", "HEAD"], cwd=str(ROOT_DIR)).decode("ascii").strip() + except Exception: + pass -try: - sha = subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=cwd).decode('ascii').strip() -except Exception: - pass + if os.getenv("BUILD_VERSION"): + version = os.getenv("BUILD_VERSION") + elif sha != "Unknown": + version += "+" + sha[:7] -if os.getenv('BUILD_VERSION'): - version = os.getenv('BUILD_VERSION') -elif sha != 'Unknown': - version += '+' + sha[:7] -print("Building wheel {}-{}".format(package_name, version)) + return version, sha -def write_version_file(): - version_path = os.path.join(cwd, 'torchvision', 'version.py') - with open(version_path, 'w') as f: - f.write("__version__ = '{}'\n".format(version)) - f.write("git_version = {}\n".format(repr(sha))) +def write_version_file(version, sha): + # Exists for BC, probably completely useless. + with open(ROOT_DIR / "torchvision/version.py", "w") as f: + f.write(f"__version__ = '{version}'\n") + f.write(f"git_version = {repr(sha)}\n") f.write("from torchvision.extension import _check_cuda_version\n") f.write("if _check_cuda_version() > 0:\n") f.write(" cuda = _check_cuda_version()\n") -write_version_file() +def get_requirements(): + def get_dist(pkgname): + try: + return get_distribution(pkgname) + except DistributionNotFound: + return None -readme = open('README.rst').read() + pytorch_dep = os.getenv("TORCH_PACKAGE_NAME", "torch") + if os.getenv("PYTORCH_VERSION"): + pytorch_dep += "==" + os.getenv("PYTORCH_VERSION") -pytorch_dep = 'torch' -if os.getenv('PYTORCH_VERSION'): - pytorch_dep += "==" + os.getenv('PYTORCH_VERSION') - -requirements = [ - 'numpy', - 'six', - pytorch_dep, -] + requirements = [ + "numpy", + pytorch_dep, + ] -pillow_ver = ' >= 4.1.1' -pillow_req = 'pillow-simd' if get_dist('pillow-simd') is not None else 'pillow' -requirements.append(pillow_req + pillow_ver) + # Excluding 8.3.* because of https://github.com/pytorch/vision/issues/4934 + pillow_ver = " >= 5.3.0, !=8.3.*" + pillow_req = "pillow-simd" if get_dist("pillow-simd") is not None else "pillow" + requirements.append(pillow_req + pillow_ver) + return requirements -def get_extensions(): - this_dir = os.path.dirname(os.path.abspath(__file__)) - extensions_dir = os.path.join(this_dir, 'torchvision', 'csrc') - main_file = glob.glob(os.path.join(extensions_dir, '*.cpp')) - source_cpu = glob.glob(os.path.join(extensions_dir, 'cpu', '*.cpp')) - source_cuda = glob.glob(os.path.join(extensions_dir, 'cuda', '*.cu')) +def get_macros_and_flags(): + define_macros = [] + extra_compile_args = {"cxx": []} + if BUILD_CUDA_SOURCES: + if IS_ROCM: + define_macros += [("WITH_HIP", None)] + nvcc_flags = [] + else: + define_macros += [("WITH_CUDA", None)] + if NVCC_FLAGS is None: + nvcc_flags = [] + else: + nvcc_flags = NVCC_FLAGS.split(" ") + extra_compile_args["nvcc"] = nvcc_flags + + if sys.platform == "win32": + define_macros += [("torchvision_EXPORTS", None)] + extra_compile_args["cxx"].append("/MP") + + if DEBUG: + extra_compile_args["cxx"].append("-g") + extra_compile_args["cxx"].append("-O0") + if "nvcc" in extra_compile_args: + # we have to remove "-OX" and "-g" flag if exists and append + nvcc_flags = extra_compile_args["nvcc"] + extra_compile_args["nvcc"] = [f for f in nvcc_flags if not ("-O" in f or "-g" in f)] + extra_compile_args["nvcc"].append("-O0") + extra_compile_args["nvcc"].append("-g") + else: + extra_compile_args["cxx"].append("-g0") + + return define_macros, extra_compile_args + + +def make_C_extension(): + print("Building _C extension") + + sources = ( + list(CSRS_DIR.glob("*.cpp")) + + list(CSRS_DIR.glob("ops/*.cpp")) + + list(CSRS_DIR.glob("ops/autocast/*.cpp")) + + list(CSRS_DIR.glob("ops/autograd/*.cpp")) + + list(CSRS_DIR.glob("ops/cpu/*.cpp")) + + list(CSRS_DIR.glob("ops/quantized/cpu/*.cpp")) + ) + mps_sources = list(CSRS_DIR.glob("ops/mps/*.mm")) + + if IS_ROCM: + from torch.utils.hipify import hipify_python + + hipify_python.hipify( + project_directory=str(ROOT_DIR), + output_directory=str(ROOT_DIR), + includes="torchvision/csrc/ops/cuda/*", + show_detailed=True, + is_pytorch_extension=True, + ) + cuda_sources = list(CSRS_DIR.glob("ops/hip/*.hip")) + for header in CSRS_DIR.glob("ops/cuda/*.h"): + shutil.copy(str(header), str(CSRS_DIR / "ops/hip")) + else: + cuda_sources = list(CSRS_DIR.glob("ops/cuda/*.cu")) + + if BUILD_CUDA_SOURCES: + Extension = CUDAExtension + sources += cuda_sources + else: + Extension = CppExtension + if torch.backends.mps.is_available() or FORCE_MPS: + sources += mps_sources + + define_macros, extra_compile_args = get_macros_and_flags() + return Extension( + name="torchvision._C", + sources=sorted(str(s) for s in sources), + include_dirs=[CSRS_DIR], + define_macros=define_macros, + extra_compile_args=extra_compile_args, + ) + + +def find_libpng(): + # Returns (found, include dir, library dir, library name) + if sys.platform in ("linux", "darwin"): + libpng_config = shutil.which("libpng-config") + if libpng_config is None: + warnings.warn("libpng-config not found") + return False, None, None, None + min_version = parse_version("1.6.0") + png_version = parse_version( + subprocess.run([libpng_config, "--version"], stdout=subprocess.PIPE).stdout.strip().decode("utf-8") + ) + if png_version < min_version: + warnings.warn("libpng version {png_version} is less than minimum required version {min_version}") + return False, None, None, None + + include_dir = ( + subprocess.run([libpng_config, "--I_opts"], stdout=subprocess.PIPE) + .stdout.strip() + .decode("utf-8") + .split("-I")[1] + ) + library_dir = subprocess.run([libpng_config, "--libdir"], stdout=subprocess.PIPE).stdout.strip().decode("utf-8") + library = "png" + else: # Windows + pngfix = shutil.which("pngfix") + if pngfix is None: + warnings.warn("pngfix not found") + return False, None, None, None + pngfix_dir = Path(pngfix).absolute().parent.parent + + library_dir = str(pngfix_dir / "lib") + include_dir = str(pngfix_dir / "include/libpng16") + library = "libpng" + + return True, include_dir, library_dir, library + + +def find_library(header): + # returns (found, include dir, library dir) + # if include dir or library dir is None, it means that the library is in + # standard paths and don't need to be added to compiler / linker search + # paths + + searching_for = f"Searching for {header}" + + for folder in TORCHVISION_INCLUDE: + if (Path(folder) / header).exists(): + print(f"{searching_for} in {Path(folder) / header}. Found in TORCHVISION_INCLUDE.") + return True, None, None + print(f"{searching_for}. Didn't find in TORCHVISION_INCLUDE.") + + # Try conda-related prefixes. If BUILD_PREFIX is set it means conda-build is + # being run. If CONDA_PREFIX is set then we're in a conda environment. + for prefix_env_var in ("BUILD_PREFIX", "CONDA_PREFIX"): + if (prefix := os.environ.get(prefix_env_var)) is not None: + prefix = Path(prefix) + if sys.platform == "win32": + prefix = prefix / "Library" + include_dir = prefix / "include" + library_dir = prefix / "lib" + if (include_dir / header).exists(): + print(f"{searching_for}. Found in {prefix_env_var}.") + return True, str(include_dir), str(library_dir) + print(f"{searching_for}. Didn't find in {prefix_env_var}.") + + if sys.platform == "linux": + for prefix in (Path("/usr/include"), Path("/usr/local/include")): + if (prefix / header).exists(): + print(f"{searching_for}. Found in {prefix}.") + return True, None, None + print(f"{searching_for}. Didn't find in {prefix}") + + return False, None, None + + +def make_image_extension(): + print("Building image extension") + + include_dirs = TORCHVISION_INCLUDE.copy() + library_dirs = TORCHVISION_LIBRARY.copy() + + libraries = [] + define_macros, extra_compile_args = get_macros_and_flags() + + image_dir = CSRS_DIR / "io/image" + sources = list(image_dir.glob("*.cpp")) + list(image_dir.glob("cpu/*.cpp")) + list(image_dir.glob("cpu/giflib/*.c")) + + if IS_ROCM: + sources += list(image_dir.glob("hip/*.cpp")) + # we need to exclude this in favor of the hipified source + sources.remove(image_dir / "image.cpp") + else: + sources += list(image_dir.glob("cuda/*.cpp")) + + Extension = CppExtension + + if USE_PNG: + png_found, png_include_dir, png_library_dir, png_library = find_libpng() + if png_found: + print("Building torchvision with PNG support") + print(f"{png_include_dir = }") + print(f"{png_library_dir = }") + include_dirs.append(png_include_dir) + library_dirs.append(png_library_dir) + libraries.append(png_library) + define_macros += [("PNG_FOUND", 1)] + else: + warnings.warn("Building torchvision without PNG support") + + if USE_JPEG: + jpeg_found, jpeg_include_dir, jpeg_library_dir = find_library(header="jpeglib.h") + if jpeg_found: + print("Building torchvision with JPEG support") + print(f"{jpeg_include_dir = }") + print(f"{jpeg_library_dir = }") + if jpeg_include_dir is not None and jpeg_library_dir is not None: + # if those are None it means they come from standard paths that are already in the search paths, which we don't need to re-add. + include_dirs.append(jpeg_include_dir) + library_dirs.append(jpeg_library_dir) + libraries.append("jpeg") + define_macros += [("JPEG_FOUND", 1)] + else: + warnings.warn("Building torchvision without JPEG support") + + if USE_WEBP: + webp_found, webp_include_dir, webp_library_dir = find_library(header="webp/decode.h") + if webp_found: + print("Building torchvision with WEBP support") + print(f"{webp_include_dir = }") + print(f"{webp_library_dir = }") + if webp_include_dir is not None and webp_library_dir is not None: + # if those are None it means they come from standard paths that are already in the search paths, which we don't need to re-add. + include_dirs.append(webp_include_dir) + library_dirs.append(webp_library_dir) + webp_library = "libwebp" if sys.platform == "win32" else "webp" + libraries.append(webp_library) + define_macros += [("WEBP_FOUND", 1)] + else: + warnings.warn("Building torchvision without WEBP support") - sources = main_file + source_cpu - extension = CppExtension + if USE_NVJPEG and (torch.cuda.is_available() or FORCE_CUDA): + nvjpeg_found = CUDA_HOME is not None and (Path(CUDA_HOME) / "include/nvjpeg.h").exists() - compile_cpp_tests = os.getenv('WITH_CPP_MODELS_TEST', '0') == '1' - if compile_cpp_tests: - test_dir = os.path.join(this_dir, 'test') - models_dir = os.path.join(this_dir, 'torchvision', 'csrc', 'models') - test_file = glob.glob(os.path.join(test_dir, '*.cpp')) - source_models = glob.glob(os.path.join(models_dir, '*.cpp')) + if nvjpeg_found: + print("Building torchvision with NVJPEG image support") + libraries.append("nvjpeg") + define_macros += [("NVJPEG_FOUND", 1)] + Extension = CUDAExtension + else: + warnings.warn("Building torchvision without NVJPEG support") + elif USE_NVJPEG: + warnings.warn("Building torchvision without NVJPEG support") + + return Extension( + name="torchvision.image", + sources=sorted(str(s) for s in sources), + include_dirs=include_dirs, + library_dirs=library_dirs, + define_macros=define_macros, + libraries=libraries, + extra_compile_args=extra_compile_args, + ) + + +def make_video_decoders_extensions(): + print("Building video decoder extensions") + + build_without_extensions_msg = "Building without video decoders extensions." + if sys.platform != "linux" or (sys.version_info.major == 3 and sys.version_info.minor == 9): + # FIXME: Building torchvision with ffmpeg on MacOS or with Python 3.9 + # FIXME: causes crash. See the following GitHub issues for more details. + # FIXME: https://github.com/pytorch/pytorch/issues/65000 + # FIXME: https://github.com/pytorch/vision/issues/3367 + print("Can only build video decoder extensions on linux and Python != 3.9") + return [] + + ffmpeg_exe = shutil.which("ffmpeg") + if ffmpeg_exe is None: + print(f"{build_without_extensions_msg} Couldn't find ffmpeg binary.") + return [] + + def find_ffmpeg_libraries(): + ffmpeg_libraries = {"libavcodec", "libavformat", "libavutil", "libswresample", "libswscale"} - test_file = [os.path.join(test_dir, s) for s in test_file] - source_models = [os.path.join(models_dir, s) for s in source_models] - tests = test_file + source_models - tests_include_dirs = [test_dir, models_dir] + ffmpeg_bin = os.path.dirname(ffmpeg_exe) + ffmpeg_root = os.path.dirname(ffmpeg_bin) + ffmpeg_include_dir = os.path.join(ffmpeg_root, "include") + ffmpeg_library_dir = os.path.join(ffmpeg_root, "lib") - define_macros = [] + gcc = os.environ.get("CC", shutil.which("gcc")) + platform_tag = subprocess.run([gcc, "-print-multiarch"], stdout=subprocess.PIPE) + platform_tag = platform_tag.stdout.strip().decode("utf-8") - extra_compile_args = {} - if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv('FORCE_CUDA', '0') == '1': - extension = CUDAExtension - sources += source_cuda - define_macros += [('WITH_CUDA', None)] - nvcc_flags = os.getenv('NVCC_FLAGS', '') - if nvcc_flags == '': - nvcc_flags = [] + if platform_tag: + # Most probably a Debian-based distribution + ffmpeg_include_dir = [ffmpeg_include_dir, os.path.join(ffmpeg_include_dir, platform_tag)] + ffmpeg_library_dir = [ffmpeg_library_dir, os.path.join(ffmpeg_library_dir, platform_tag)] else: - nvcc_flags = nvcc_flags.split(' ') - extra_compile_args = { - 'cxx': ['-O0'], - 'nvcc': nvcc_flags, - } + ffmpeg_include_dir = [ffmpeg_include_dir] + ffmpeg_library_dir = [ffmpeg_library_dir] - if sys.platform == 'win32': - define_macros += [('torchvision_EXPORTS', None)] + for library in ffmpeg_libraries: + library_found = False + for search_path in ffmpeg_include_dir + TORCHVISION_INCLUDE: + full_path = os.path.join(search_path, library, "*.h") + library_found |= len(glob.glob(full_path)) > 0 - extra_compile_args.setdefault('cxx', []) - extra_compile_args['cxx'].append('/MP') + if not library_found: + print(f"{build_without_extensions_msg}") + print(f"{library} header files were not found.") + return None, None - sources = [os.path.join(extensions_dir, s) for s in sources] + return ffmpeg_include_dir, ffmpeg_library_dir - include_dirs = [extensions_dir] + ffmpeg_include_dir, ffmpeg_library_dir = find_ffmpeg_libraries() + if ffmpeg_include_dir is None or ffmpeg_library_dir is None: + return [] - ffmpeg_exe = distutils.spawn.find_executable('ffmpeg') - has_ffmpeg = ffmpeg_exe is not None - if has_ffmpeg: - ffmpeg_bin = os.path.dirname(ffmpeg_exe) - ffmpeg_root = os.path.dirname(ffmpeg_bin) - ffmpeg_include_dir = os.path.join(ffmpeg_root, 'include') + print("Found ffmpeg:") + print(f" ffmpeg include path: {ffmpeg_include_dir}") + print(f" ffmpeg library_dir: {ffmpeg_library_dir}") - # TorchVision video reader - video_reader_src_dir = os.path.join(this_dir, 'torchvision', 'csrc', 'cpu', 'video_reader') + extensions = [] + if USE_CPU_VIDEO_DECODER: + print("Building with CPU video decoder support") + + # TorchVision base decoder + video reader + video_reader_src_dir = os.path.join(ROOT_DIR, "torchvision", "csrc", "io", "video_reader") video_reader_src = glob.glob(os.path.join(video_reader_src_dir, "*.cpp")) + base_decoder_src_dir = os.path.join(ROOT_DIR, "torchvision", "csrc", "io", "decoder") + base_decoder_src = glob.glob(os.path.join(base_decoder_src_dir, "*.cpp")) + # Torchvision video API + videoapi_src_dir = os.path.join(ROOT_DIR, "torchvision", "csrc", "io", "video") + videoapi_src = glob.glob(os.path.join(videoapi_src_dir, "*.cpp")) + # exclude tests + base_decoder_src = [x for x in base_decoder_src if "_test.cpp" not in x] - ext_modules = [ - extension( - 'torchvision._C', - sources, - include_dirs=include_dirs, - define_macros=define_macros, - extra_compile_args=extra_compile_args, - ) - ] - if compile_cpp_tests: - ext_modules.append( - extension( - 'torchvision._C_tests', - tests, - include_dirs=tests_include_dirs, - define_macros=define_macros, - extra_compile_args=extra_compile_args, - ) - ) - if has_ffmpeg: - ext_modules.append( + combined_src = video_reader_src + base_decoder_src + videoapi_src + + extensions.append( CppExtension( - 'torchvision.video_reader', - video_reader_src, + # This is an aweful name. It should be "cpu_video_decoder". Keeping for BC. + "torchvision.video_reader", + combined_src, include_dirs=[ + base_decoder_src_dir, video_reader_src_dir, - ffmpeg_include_dir, - extensions_dir, + videoapi_src_dir, + str(CSRS_DIR), + *ffmpeg_include_dir, + *TORCHVISION_INCLUDE, + ], + library_dirs=ffmpeg_library_dir + TORCHVISION_LIBRARY, + libraries=[ + "avcodec", + "avformat", + "avutil", + "swresample", + "swscale", ], + extra_compile_args=["-std=c++17"] if os.name != "nt" else ["/std:c++17", "/MP"], + extra_link_args=["-std=c++17" if os.name != "nt" else "/std:c++17"], + ) + ) + + if USE_GPU_VIDEO_DECODER: + # Locating GPU video decoder headers and libraries + # CUDA_HOME should be set to the cuda root directory. + # TORCHVISION_INCLUDE and TORCHVISION_LIBRARY should include the locations + # to the headers and libraries below + if not ( + BUILD_CUDA_SOURCES + and CUDA_HOME is not None + and any([os.path.exists(os.path.join(folder, "cuviddec.h")) for folder in TORCHVISION_INCLUDE]) + and any([os.path.exists(os.path.join(folder, "nvcuvid.h")) for folder in TORCHVISION_INCLUDE]) + and any([os.path.exists(os.path.join(folder, "libnvcuvid.so")) for folder in TORCHVISION_LIBRARY]) + and any([os.path.exists(os.path.join(folder, "libavcodec", "bsf.h")) for folder in ffmpeg_include_dir]) + ): + print("Could not find necessary dependencies. Refer the setup.py to check which ones are needed.") + print("Building without GPU video decoder support") + return extensions + print("Building torchvision with GPU video decoder support") + + gpu_decoder_path = os.path.join(CSRS_DIR, "io", "decoder", "gpu") + gpu_decoder_src = glob.glob(os.path.join(gpu_decoder_path, "*.cpp")) + cuda_libs = os.path.join(CUDA_HOME, "lib64") + cuda_inc = os.path.join(CUDA_HOME, "include") + + _, extra_compile_args = get_macros_and_flags() + extensions.append( + CUDAExtension( + "torchvision.gpu_decoder", + gpu_decoder_src, + include_dirs=[CSRS_DIR] + TORCHVISION_INCLUDE + [gpu_decoder_path] + [cuda_inc] + ffmpeg_include_dir, + library_dirs=ffmpeg_library_dir + TORCHVISION_LIBRARY + [cuda_libs], libraries=[ - 'avcodec', - 'avformat', - 'avutil', - 'swresample', - 'swscale', + "avcodec", + "avformat", + "avutil", + "swresample", + "swscale", + "nvcuvid", + "cuda", + "cudart", + "z", + "pthread", + "dl", + "nppicc", ], - extra_compile_args=["-std=c++14"], - extra_link_args=["-std=c++14"], + extra_compile_args=extra_compile_args, ) ) - return ext_modules + return extensions class clean(distutils.command.clean.clean): def run(self): - with open('.gitignore', 'r') as f: + with open(".gitignore") as f: ignores = f.read() - for wildcard in filter(None, ignores.split('\n')): + for wildcard in filter(None, ignores.split("\n")): for filename in glob.glob(wildcard): try: os.remove(filename) @@ -197,28 +523,43 @@ def run(self): distutils.command.clean.clean.run(self) -setup( - # Metadata - name=package_name, - version=version, - author='PyTorch Core Team', - author_email='soumith@pytorch.org', - url='https://github.com/pytorch/vision', - description='image and video datasets and models for torch deep learning', - long_description=readme, - license='BSD', - - # Package info - packages=find_packages(exclude=('test',)), - - zip_safe=False, - install_requires=requirements, - extras_require={ - "scipy": ["scipy"], - }, - ext_modules=get_extensions(), - cmdclass={ - 'build_ext': BuildExtension.with_options(no_python_abi_suffix=True), - 'clean': clean, - } -) +if __name__ == "__main__": + version, sha = get_version() + write_version_file(version, sha) + + print(f"Building wheel {package_name}-{version}") + + with open("README.md") as f: + readme = f.read() + + extensions = [ + make_C_extension(), + make_image_extension(), + *make_video_decoders_extensions(), + ] + + setup( + name=package_name, + version=version, + author="PyTorch Core Team", + author_email="soumith@pytorch.org", + url="https://github.com/pytorch/vision", + description="image and video datasets and models for torch deep learning", + long_description=readme, + long_description_content_type="text/markdown", + license="BSD", + packages=find_packages(exclude=("test",)), + package_data={package_name: ["*.dll", "*.dylib", "*.so", "prototype/datasets/_builtin/*.categories"]}, + zip_safe=False, + install_requires=get_requirements(), + extras_require={ + "gdown": ["gdown>=4.7.3"], + "scipy": ["scipy"], + }, + ext_modules=extensions, + python_requires=">=3.8", + cmdclass={ + "build_ext": BuildExtension.with_options(no_python_abi_suffix=True), + "clean": clean, + }, + ) diff --git a/test/_utils_internal.py b/test/_utils_internal.py new file mode 100644 index 00000000000..1a32e6f2b25 --- /dev/null +++ b/test/_utils_internal.py @@ -0,0 +1,7 @@ +import os + + +# Get relative file path +# this returns relative path from current file. +def get_relative_path(curr_file, *path_components): + return os.path.join(os.path.dirname(curr_file), *path_components) diff --git a/test/assets/damaged_jpeg/TensorFlow-LICENSE b/test/assets/damaged_jpeg/TensorFlow-LICENSE new file mode 100644 index 00000000000..c7563fe4e5b --- /dev/null +++ b/test/assets/damaged_jpeg/TensorFlow-LICENSE @@ -0,0 +1,13 @@ + Copyright 2019 The TensorFlow Authors. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/test/assets/damaged_jpeg/bad_huffman.jpg b/test/assets/damaged_jpeg/bad_huffman.jpg new file mode 100644 index 00000000000..ef5b6f12c55 Binary files /dev/null and b/test/assets/damaged_jpeg/bad_huffman.jpg differ diff --git a/test/assets/damaged_jpeg/corrupt.jpg b/test/assets/damaged_jpeg/corrupt.jpg new file mode 100644 index 00000000000..5e2fe6c56f5 Binary files /dev/null and b/test/assets/damaged_jpeg/corrupt.jpg differ diff --git a/test/assets/damaged_jpeg/corrupt34_2.jpg b/test/assets/damaged_jpeg/corrupt34_2.jpg new file mode 100644 index 00000000000..4211155c455 Binary files /dev/null and b/test/assets/damaged_jpeg/corrupt34_2.jpg differ diff --git a/test/assets/damaged_jpeg/corrupt34_3.jpg b/test/assets/damaged_jpeg/corrupt34_3.jpg new file mode 100644 index 00000000000..c1c2a9d1e1e Binary files /dev/null and b/test/assets/damaged_jpeg/corrupt34_3.jpg differ diff --git a/test/assets/damaged_jpeg/corrupt34_4.jpg b/test/assets/damaged_jpeg/corrupt34_4.jpg new file mode 100644 index 00000000000..b8e7308ba00 Binary files /dev/null and b/test/assets/damaged_jpeg/corrupt34_4.jpg differ diff --git a/test/assets/damaged_png/sigsegv.png b/test/assets/damaged_png/sigsegv.png new file mode 100644 index 00000000000..3ecff65ec60 Binary files /dev/null and b/test/assets/damaged_png/sigsegv.png differ diff --git a/test/assets/grace_hopper_517x606.jpg b/test/assets/encode_jpeg/grace_hopper_517x606.jpg similarity index 100% rename from test/assets/grace_hopper_517x606.jpg rename to test/assets/encode_jpeg/grace_hopper_517x606.jpg diff --git a/test/assets/encode_jpeg/jpeg_write/grace_hopper_517x606_pil.jpg b/test/assets/encode_jpeg/jpeg_write/grace_hopper_517x606_pil.jpg new file mode 100644 index 00000000000..0f37ea0d9e1 Binary files /dev/null and b/test/assets/encode_jpeg/jpeg_write/grace_hopper_517x606_pil.jpg differ diff --git a/test/assets/expected_flow.pt b/test/assets/expected_flow.pt new file mode 100644 index 00000000000..403784b1db1 Binary files /dev/null and b/test/assets/expected_flow.pt differ diff --git a/test/assets/fakedata/draw_boxes_different_label_colors.png b/test/assets/fakedata/draw_boxes_different_label_colors.png new file mode 100644 index 00000000000..72178930602 Binary files /dev/null and b/test/assets/fakedata/draw_boxes_different_label_colors.png differ diff --git a/test/assets/fakedata/draw_boxes_util.png b/test/assets/fakedata/draw_boxes_util.png new file mode 100644 index 00000000000..ee5dac329e0 Binary files /dev/null and b/test/assets/fakedata/draw_boxes_util.png differ diff --git a/test/assets/fakedata/draw_boxes_vanilla.png b/test/assets/fakedata/draw_boxes_vanilla.png new file mode 100644 index 00000000000..bbc7112deb0 Binary files /dev/null and b/test/assets/fakedata/draw_boxes_vanilla.png differ diff --git a/test/assets/fakedata/draw_keypoint_vanilla.png b/test/assets/fakedata/draw_keypoint_vanilla.png new file mode 100644 index 00000000000..6cd6d943b6c Binary files /dev/null and b/test/assets/fakedata/draw_keypoint_vanilla.png differ diff --git a/test/assets/fakedata/draw_keypoints_visibility.png b/test/assets/fakedata/draw_keypoints_visibility.png new file mode 100644 index 00000000000..8cd34f84539 Binary files /dev/null and b/test/assets/fakedata/draw_keypoints_visibility.png differ diff --git a/test/assets/fakedata/logos/cmyk_pytorch.jpg b/test/assets/fakedata/logos/cmyk_pytorch.jpg new file mode 100644 index 00000000000..16ee8b2b4bc Binary files /dev/null and b/test/assets/fakedata/logos/cmyk_pytorch.jpg differ diff --git a/test/assets/fakedata/logos/gray_pytorch.jpg b/test/assets/fakedata/logos/gray_pytorch.jpg new file mode 100644 index 00000000000..60c9c7cf705 Binary files /dev/null and b/test/assets/fakedata/logos/gray_pytorch.jpg differ diff --git a/test/assets/fakedata/logos/gray_pytorch.png b/test/assets/fakedata/logos/gray_pytorch.png new file mode 100644 index 00000000000..412b931299e Binary files /dev/null and b/test/assets/fakedata/logos/gray_pytorch.png differ diff --git a/test/assets/fakedata/logos/grayalpha_pytorch.png b/test/assets/fakedata/logos/grayalpha_pytorch.png new file mode 100644 index 00000000000..3e77d72b904 Binary files /dev/null and b/test/assets/fakedata/logos/grayalpha_pytorch.png differ diff --git a/test/assets/fakedata/logos/palette_pytorch.png b/test/assets/fakedata/logos/palette_pytorch.png new file mode 100644 index 00000000000..2108d1b315a Binary files /dev/null and b/test/assets/fakedata/logos/palette_pytorch.png differ diff --git a/test/assets/fakedata/logos/rgb_pytorch.avif b/test/assets/fakedata/logos/rgb_pytorch.avif new file mode 100644 index 00000000000..ea1bb586957 Binary files /dev/null and b/test/assets/fakedata/logos/rgb_pytorch.avif differ diff --git a/test/assets/fakedata/logos/rgb_pytorch.jpg b/test/assets/fakedata/logos/rgb_pytorch.jpg new file mode 100644 index 00000000000..d49e658b94f Binary files /dev/null and b/test/assets/fakedata/logos/rgb_pytorch.jpg differ diff --git a/test/assets/fakedata/logos/rgb_pytorch.png b/test/assets/fakedata/logos/rgb_pytorch.png new file mode 100644 index 00000000000..c9d08e6c7da Binary files /dev/null and b/test/assets/fakedata/logos/rgb_pytorch.png differ diff --git a/test/assets/fakedata/logos/rgb_pytorch.webp b/test/assets/fakedata/logos/rgb_pytorch.webp new file mode 100644 index 00000000000..e594584d76d Binary files /dev/null and b/test/assets/fakedata/logos/rgb_pytorch.webp differ diff --git a/test/assets/fakedata/logos/rgb_pytorch16.png b/test/assets/fakedata/logos/rgb_pytorch16.png new file mode 100644 index 00000000000..b5e9e35d989 Binary files /dev/null and b/test/assets/fakedata/logos/rgb_pytorch16.png differ diff --git a/test/assets/fakedata/logos/rgb_pytorch_incorrectly_encoded_but_who_cares.heic b/test/assets/fakedata/logos/rgb_pytorch_incorrectly_encoded_but_who_cares.heic new file mode 100644 index 00000000000..4c29ac3c71c Binary files /dev/null and b/test/assets/fakedata/logos/rgb_pytorch_incorrectly_encoded_but_who_cares.heic differ diff --git a/test/assets/fakedata/logos/rgbalpha_pytorch.png b/test/assets/fakedata/logos/rgbalpha_pytorch.png new file mode 100644 index 00000000000..5a9ff14ba5e Binary files /dev/null and b/test/assets/fakedata/logos/rgbalpha_pytorch.png differ diff --git a/test/assets/fakedata/logos/rgbalpha_pytorch16.png b/test/assets/fakedata/logos/rgbalpha_pytorch16.png new file mode 100644 index 00000000000..df1db4d6354 Binary files /dev/null and b/test/assets/fakedata/logos/rgbalpha_pytorch16.png differ diff --git a/test/assets/gaussian_blur_opencv_results.pt b/test/assets/gaussian_blur_opencv_results.pt new file mode 100644 index 00000000000..d68f477fb44 Binary files /dev/null and b/test/assets/gaussian_blur_opencv_results.pt differ diff --git a/test/assets/interlaced_png/wizard_low-interlaced.png b/test/assets/interlaced_png/wizard_low-interlaced.png new file mode 100644 index 00000000000..3badd9264dc Binary files /dev/null and b/test/assets/interlaced_png/wizard_low-interlaced.png differ diff --git a/test/assets/interlaced_png/wizard_low.png b/test/assets/interlaced_png/wizard_low.png new file mode 100644 index 00000000000..7b1c264f030 Binary files /dev/null and b/test/assets/interlaced_png/wizard_low.png differ diff --git a/test/assets/labeled_image.png b/test/assets/labeled_image.png new file mode 100644 index 00000000000..9d163243773 Binary files /dev/null and b/test/assets/labeled_image.png differ diff --git a/test/assets/masks.tiff b/test/assets/masks.tiff new file mode 100644 index 00000000000..7a8efc6dd0e Binary files /dev/null and b/test/assets/masks.tiff differ diff --git a/test/assets/toosmall_png/heapbof.png b/test/assets/toosmall_png/heapbof.png new file mode 100644 index 00000000000..e720d183342 Binary files /dev/null and b/test/assets/toosmall_png/heapbof.png differ diff --git a/test/assets/videos/hmdb51_Turnk_r_Pippi_Michel_cartwheel_f_cm_np2_le_med_6.avi b/test/assets/videos/hmdb51_Turnk_r_Pippi_Michel_cartwheel_f_cm_np2_le_med_6.avi new file mode 100644 index 00000000000..979cd3901af Binary files /dev/null and b/test/assets/videos/hmdb51_Turnk_r_Pippi_Michel_cartwheel_f_cm_np2_le_med_6.avi differ diff --git a/test/builtin_dataset_mocks.py b/test/builtin_dataset_mocks.py new file mode 100644 index 00000000000..ef5d5e1ec96 --- /dev/null +++ b/test/builtin_dataset_mocks.py @@ -0,0 +1,1582 @@ +import bz2 +import collections.abc +import csv +import functools +import gzip +import io +import itertools +import json +import lzma +import pathlib +import pickle +import random +import shutil +import unittest.mock +import xml.etree.ElementTree as ET +from collections import Counter, defaultdict + +import numpy as np +import pytest +import torch +from common_utils import combinations_grid +from datasets_utils import create_image_file, create_image_folder, make_tar, make_zip +from torch.nn.functional import one_hot +from torch.testing import make_tensor as _make_tensor +from torchvision.prototype import datasets + +make_tensor = functools.partial(_make_tensor, device="cpu") +make_scalar = functools.partial(make_tensor, ()) + + +__all__ = ["DATASET_MOCKS", "parametrize_dataset_mocks"] + + +class DatasetMock: + def __init__(self, name, *, mock_data_fn, configs): + # FIXME: error handling for unknown names + self.name = name + self.mock_data_fn = mock_data_fn + self.configs = configs + + def _parse_mock_info(self, mock_info): + if mock_info is None: + raise pytest.UsageError( + f"The mock data function for dataset '{self.name}' returned nothing. It needs to at least return an " + f"integer indicating the number of samples for the current `config`." + ) + elif isinstance(mock_info, int): + mock_info = dict(num_samples=mock_info) + elif not isinstance(mock_info, dict): + raise pytest.UsageError( + f"The mock data function for dataset '{self.name}' returned a {type(mock_info)}. The returned object " + f"should be a dictionary containing at least the number of samples for the key `'num_samples'`. If no " + f"additional information is required for specific tests, the number of samples can also be returned as " + f"an integer." + ) + elif "num_samples" not in mock_info: + raise pytest.UsageError( + f"The dictionary returned by the mock data function for dataset '{self.name}' has to contain a " + f"`'num_samples'` entry indicating the number of samples." + ) + + return mock_info + + def load(self, config): + # `datasets.home()` is patched to a temporary directory through the autouse fixture `test_home` in + # test/test_prototype_builtin_datasets.py + root = pathlib.Path(datasets.home()) / self.name + # We cannot place the mock data upfront in `root`. Loading a dataset calls `OnlineResource.load`. In turn, + # this will only download **and** preprocess if the file is not present. In other words, if we already place + # the file in `root` before the resource is loaded, we are effectively skipping the preprocessing. + # To avoid that we first place the mock data in a temporary directory and patch the download logic to move it to + # `root` only when it is requested. + tmp_mock_data_folder = root / "__mock__" + tmp_mock_data_folder.mkdir(parents=True) + + mock_info = self._parse_mock_info(self.mock_data_fn(tmp_mock_data_folder, config)) + + def patched_download(resource, root, **kwargs): + src = tmp_mock_data_folder / resource.file_name + if not src.exists(): + raise pytest.UsageError( + f"Dataset '{self.name}' requires the file {resource.file_name} for {config}" + f"but it was not created by the mock data function." + ) + + dst = root / resource.file_name + shutil.move(str(src), str(root)) + + return dst + + with unittest.mock.patch( + "torchvision.prototype.datasets.utils._resource.OnlineResource.download", new=patched_download + ): + dataset = datasets.load(self.name, **config) + + extra_files = list(tmp_mock_data_folder.glob("**/*")) + if extra_files: + raise pytest.UsageError( + ( + f"Dataset '{self.name}' created the following files for {config} in the mock data function, " + f"but they were not loaded:\n\n" + ) + + "\n".join(str(file.relative_to(tmp_mock_data_folder)) for file in extra_files) + ) + + tmp_mock_data_folder.rmdir() + + return dataset, mock_info + + +def config_id(name, config): + parts = [name] + for name, value in config.items(): + if isinstance(value, bool): + part = ("" if value else "no_") + name + else: + part = str(value) + parts.append(part) + return "-".join(parts) + + +def parametrize_dataset_mocks(*dataset_mocks, marks=None): + mocks = {} + for mock in dataset_mocks: + if isinstance(mock, DatasetMock): + mocks[mock.name] = mock + elif isinstance(mock, collections.abc.Mapping): + mocks.update(mock) + else: + raise pytest.UsageError( + f"The positional arguments passed to `parametrize_dataset_mocks` can either be a `DatasetMock`, " + f"a sequence of `DatasetMock`'s, or a mapping of names to `DatasetMock`'s, " + f"but got {mock} instead." + ) + dataset_mocks = mocks + + if marks is None: + marks = {} + elif not isinstance(marks, collections.abc.Mapping): + raise pytest.UsageError() + + return pytest.mark.parametrize( + ("dataset_mock", "config"), + [ + pytest.param(dataset_mock, config, id=config_id(name, config), marks=marks.get(name, ())) + for name, dataset_mock in dataset_mocks.items() + for config in dataset_mock.configs + ], + ) + + +DATASET_MOCKS = {} + + +def register_mock(name=None, *, configs): + def wrapper(mock_data_fn): + nonlocal name + if name is None: + name = mock_data_fn.__name__ + DATASET_MOCKS[name] = DatasetMock(name, mock_data_fn=mock_data_fn, configs=configs) + + return mock_data_fn + + return wrapper + + +class MNISTMockData: + _DTYPES_ID = { + torch.uint8: 8, + torch.int8: 9, + torch.int16: 11, + torch.int32: 12, + torch.float32: 13, + torch.float64: 14, + } + + @classmethod + def _magic(cls, dtype, ndim): + return cls._DTYPES_ID[dtype] * 256 + ndim + 1 + + @staticmethod + def _encode(t): + return torch.tensor(t, dtype=torch.int32).numpy().tobytes()[::-1] + + @staticmethod + def _big_endian_dtype(dtype): + np_dtype = getattr(np, str(dtype).replace("torch.", ""))().dtype + return np.dtype(f">{np_dtype.kind}{np_dtype.itemsize}") + + @classmethod + def _create_binary_file(cls, root, filename, *, num_samples, shape, dtype, compressor, low=0, high): + with compressor(root / filename, "wb") as fh: + for meta in (cls._magic(dtype, len(shape)), num_samples, *shape): + fh.write(cls._encode(meta)) + + data = make_tensor((num_samples, *shape), dtype=dtype, low=low, high=high) + + fh.write(data.numpy().astype(cls._big_endian_dtype(dtype)).tobytes()) + + @classmethod + def generate( + cls, + root, + *, + num_categories, + num_samples=None, + images_file, + labels_file, + image_size=(28, 28), + image_dtype=torch.uint8, + label_size=(), + label_dtype=torch.uint8, + compressor=None, + ): + if num_samples is None: + num_samples = num_categories + if compressor is None: + compressor = gzip.open + + cls._create_binary_file( + root, + images_file, + num_samples=num_samples, + shape=image_size, + dtype=image_dtype, + compressor=compressor, + high=float("inf"), + ) + cls._create_binary_file( + root, + labels_file, + num_samples=num_samples, + shape=label_size, + dtype=label_dtype, + compressor=compressor, + high=num_categories, + ) + + return num_samples + + +def mnist(root, config): + prefix = "train" if config["split"] == "train" else "t10k" + return MNISTMockData.generate( + root, + num_categories=10, + images_file=f"{prefix}-images-idx3-ubyte.gz", + labels_file=f"{prefix}-labels-idx1-ubyte.gz", + ) + + +DATASET_MOCKS.update( + { + name: DatasetMock(name, mock_data_fn=mnist, configs=combinations_grid(split=("train", "test"))) + for name in ["mnist", "fashionmnist", "kmnist"] + } +) + + +@register_mock( + configs=combinations_grid( + split=("train", "test"), + image_set=("Balanced", "By_Merge", "By_Class", "Letters", "Digits", "MNIST"), + ) +) +def emnist(root, config): + num_samples_map = {} + file_names = set() + for split, image_set in itertools.product( + ("train", "test"), + ("Balanced", "By_Merge", "By_Class", "Letters", "Digits", "MNIST"), + ): + prefix = f"emnist-{image_set.replace('_', '').lower()}-{split}" + images_file = f"{prefix}-images-idx3-ubyte.gz" + labels_file = f"{prefix}-labels-idx1-ubyte.gz" + file_names.update({images_file, labels_file}) + num_samples_map[(split, image_set)] = MNISTMockData.generate( + root, + # The image sets that merge some lower case letters in their respective upper case variant, still use dense + # labels in the data files. Thus, num_categories != len(categories) there. + num_categories=47 if config["image_set"] in ("Balanced", "By_Merge") else 62, + images_file=images_file, + labels_file=labels_file, + ) + + make_zip(root, "emnist-gzip.zip", *file_names) + + return num_samples_map[(config["split"], config["image_set"])] + + +@register_mock(configs=combinations_grid(split=("train", "test", "test10k", "test50k", "nist"))) +def qmnist(root, config): + num_categories = 10 + if config["split"] == "train": + num_samples = num_samples_gen = num_categories + 2 + prefix = "qmnist-train" + suffix = ".gz" + compressor = gzip.open + elif config["split"].startswith("test"): + # The split 'test50k' is defined as the last 50k images beginning at index 10000. Thus, we need to create + # more than 10000 images for the dataset to not be empty. + num_samples_gen = 10001 + num_samples = { + "test": num_samples_gen, + "test10k": min(num_samples_gen, 10_000), + "test50k": num_samples_gen - 10_000, + }[config["split"]] + prefix = "qmnist-test" + suffix = ".gz" + compressor = gzip.open + else: # config["split"] == "nist" + num_samples = num_samples_gen = num_categories + 3 + prefix = "xnist" + suffix = ".xz" + compressor = lzma.open + + MNISTMockData.generate( + root, + num_categories=num_categories, + num_samples=num_samples_gen, + images_file=f"{prefix}-images-idx3-ubyte{suffix}", + labels_file=f"{prefix}-labels-idx2-int{suffix}", + label_size=(8,), + label_dtype=torch.int32, + compressor=compressor, + ) + return num_samples + + +class CIFARMockData: + NUM_PIXELS = 32 * 32 * 3 + + @classmethod + def _create_batch_file(cls, root, name, *, num_categories, labels_key, num_samples=1): + content = { + "data": make_tensor((num_samples, cls.NUM_PIXELS), dtype=torch.uint8).numpy(), + labels_key: torch.randint(0, num_categories, size=(num_samples,)).tolist(), + } + with open(pathlib.Path(root) / name, "wb") as fh: + pickle.dump(content, fh) + + @classmethod + def generate( + cls, + root, + name, + *, + folder, + train_files, + test_files, + num_categories, + labels_key, + ): + folder = root / folder + folder.mkdir() + files = (*train_files, *test_files) + for file in files: + cls._create_batch_file( + folder, + file, + num_categories=num_categories, + labels_key=labels_key, + ) + + make_tar(root, name, folder, compression="gz") + + +@register_mock(configs=combinations_grid(split=("train", "test"))) +def cifar10(root, config): + train_files = [f"data_batch_{idx}" for idx in range(1, 6)] + test_files = ["test_batch"] + + CIFARMockData.generate( + root=root, + name="cifar-10-python.tar.gz", + folder=pathlib.Path("cifar-10-batches-py"), + train_files=train_files, + test_files=test_files, + num_categories=10, + labels_key="labels", + ) + + return len(train_files if config["split"] == "train" else test_files) + + +@register_mock(configs=combinations_grid(split=("train", "test"))) +def cifar100(root, config): + train_files = ["train"] + test_files = ["test"] + + CIFARMockData.generate( + root=root, + name="cifar-100-python.tar.gz", + folder=pathlib.Path("cifar-100-python"), + train_files=train_files, + test_files=test_files, + num_categories=100, + labels_key="fine_labels", + ) + + return len(train_files if config["split"] == "train" else test_files) + + +@register_mock(configs=[dict()]) +def caltech101(root, config): + def create_ann_file(root, name): + import scipy.io + + box_coord = make_tensor((1, 4), dtype=torch.int32, low=0).numpy().astype(np.uint16) + obj_contour = make_tensor((2, int(torch.randint(3, 6, size=()))), dtype=torch.float64, low=0).numpy() + + scipy.io.savemat(str(pathlib.Path(root) / name), dict(box_coord=box_coord, obj_contour=obj_contour)) + + def create_ann_folder(root, name, file_name_fn, num_examples): + root = pathlib.Path(root) / name + root.mkdir(parents=True) + + for idx in range(num_examples): + create_ann_file(root, file_name_fn(idx)) + + images_root = root / "101_ObjectCategories" + anns_root = root / "Annotations" + + image_category_map = { + "Faces": "Faces_2", + "Faces_easy": "Faces_3", + "Motorbikes": "Motorbikes_16", + "airplanes": "Airplanes_Side_2", + } + + categories = ["Faces", "Faces_easy", "Motorbikes", "airplanes", "yin_yang"] + + num_images_per_category = 2 + for category in categories: + create_image_folder( + root=images_root, + name=category, + file_name_fn=lambda idx: f"image_{idx + 1:04d}.jpg", + num_examples=num_images_per_category, + ) + create_ann_folder( + root=anns_root, + name=image_category_map.get(category, category), + file_name_fn=lambda idx: f"annotation_{idx + 1:04d}.mat", + num_examples=num_images_per_category, + ) + + (images_root / "BACKGROUND_Goodle").mkdir() + make_tar(root, f"{images_root.name}.tar.gz", images_root, compression="gz") + + make_tar(root, f"{anns_root.name}.tar", anns_root) + + return num_images_per_category * len(categories) + + +@register_mock(configs=[dict()]) +def caltech256(root, config): + dir = root / "256_ObjectCategories" + num_images_per_category = 2 + + categories = [ + (1, "ak47"), + (127, "laptop-101"), + (198, "spider"), + (257, "clutter"), + ] + + for category_idx, category in categories: + files = create_image_folder( + dir, + name=f"{category_idx:03d}.{category}", + file_name_fn=lambda image_idx: f"{category_idx:03d}_{image_idx + 1:04d}.jpg", + num_examples=num_images_per_category, + ) + if category == "spider": + open(files[0].parent / "RENAME2", "w").close() + + make_tar(root, f"{dir.name}.tar", dir) + + return num_images_per_category * len(categories) + + +@register_mock(configs=combinations_grid(split=("train", "val", "test"))) +def imagenet(root, config): + from scipy.io import savemat + + info = datasets.info("imagenet") + + if config["split"] == "train": + num_samples = len(info["wnids"]) + archive_name = "ILSVRC2012_img_train.tar" + + files = [] + for wnid in info["wnids"]: + create_image_folder( + root=root, + name=wnid, + file_name_fn=lambda image_idx: f"{wnid}_{image_idx:04d}.JPEG", + num_examples=1, + ) + files.append(make_tar(root, f"{wnid}.tar")) + elif config["split"] == "val": + num_samples = 3 + archive_name = "ILSVRC2012_img_val.tar" + files = [create_image_file(root, f"ILSVRC2012_val_{idx + 1:08d}.JPEG") for idx in range(num_samples)] + + devkit_root = root / "ILSVRC2012_devkit_t12" + data_root = devkit_root / "data" + data_root.mkdir(parents=True) + + with open(data_root / "ILSVRC2012_validation_ground_truth.txt", "w") as file: + for label in torch.randint(0, len(info["wnids"]), (num_samples,)).tolist(): + file.write(f"{label}\n") + + num_children = 0 + synsets = [ + (idx, wnid, category, "", num_children, [], 0, 0) + for idx, (category, wnid) in enumerate(zip(info["categories"], info["wnids"]), 1) + ] + num_children = 1 + synsets.extend((0, "", "", "", num_children, [], 0, 0) for _ in range(5)) + synsets = np.array( + synsets, + dtype=np.dtype( + [ + ("ILSVRC2012_ID", "O"), + ("WNID", "O"), + ("words", "O"), + ("gloss", "O"), + ("num_children", "O"), + ("children", "O"), + ("wordnet_height", "O"), + ("num_train_images", "O"), + ] + ), + ) + savemat(data_root / "meta.mat", dict(synsets=synsets)) + + make_tar(root, devkit_root.with_suffix(".tar.gz").name, compression="gz") + else: # config["split"] == "test" + num_samples = 5 + archive_name = "ILSVRC2012_img_test_v10102019.tar" + files = [create_image_file(root, f"ILSVRC2012_test_{idx + 1:08d}.JPEG") for idx in range(num_samples)] + + make_tar(root, archive_name, *files) + + return num_samples + + +class CocoMockData: + @classmethod + def _make_annotations_json( + cls, + root, + name, + *, + images_meta, + fn, + ): + num_anns_per_image = torch.randint(1, 5, (len(images_meta),)) + num_anns_total = int(num_anns_per_image.sum()) + ann_ids_iter = iter(torch.arange(num_anns_total)[torch.randperm(num_anns_total)]) + + anns_meta = [] + for image_meta, num_anns in zip(images_meta, num_anns_per_image): + for _ in range(num_anns): + ann_id = int(next(ann_ids_iter)) + anns_meta.append(dict(fn(ann_id, image_meta), id=ann_id, image_id=image_meta["id"])) + anns_meta.sort(key=lambda ann: ann["id"]) + + with open(root / name, "w") as file: + json.dump(dict(images=images_meta, annotations=anns_meta), file) + + return num_anns_per_image + + @staticmethod + def _make_instances_data(ann_id, image_meta): + def make_rle_segmentation(): + height, width = image_meta["height"], image_meta["width"] + numel = height * width + counts = [] + while sum(counts) <= numel: + counts.append(int(torch.randint(5, 8, ()))) + if sum(counts) > numel: + counts[-1] -= sum(counts) - numel + return dict(counts=counts, size=[height, width]) + + return dict( + segmentation=make_rle_segmentation(), + bbox=make_tensor((4,), dtype=torch.float32, low=0).tolist(), + iscrowd=True, + area=float(make_scalar(dtype=torch.float32)), + category_id=int(make_scalar(dtype=torch.int64)), + ) + + @staticmethod + def _make_captions_data(ann_id, image_meta): + return dict(caption=f"Caption {ann_id} describing image {image_meta['id']}.") + + @classmethod + def _make_annotations(cls, root, name, *, images_meta): + num_anns_per_image = torch.zeros((len(images_meta),), dtype=torch.int64) + for annotations, fn in ( + ("instances", cls._make_instances_data), + ("captions", cls._make_captions_data), + ): + num_anns_per_image += cls._make_annotations_json( + root, f"{annotations}_{name}.json", images_meta=images_meta, fn=fn + ) + + return int(num_anns_per_image.sum()) + + @classmethod + def generate( + cls, + root, + *, + split, + year, + num_samples, + ): + annotations_dir = root / "annotations" + annotations_dir.mkdir() + + for split_ in ("train", "val"): + config_name = f"{split_}{year}" + + images_meta = [ + dict( + file_name=f"{idx:012d}.jpg", + id=idx, + width=width, + height=height, + ) + for idx, (height, width) in enumerate( + torch.randint(3, 11, size=(num_samples, 2), dtype=torch.int).tolist() + ) + ] + + if split_ == split: + create_image_folder( + root, + config_name, + file_name_fn=lambda idx: images_meta[idx]["file_name"], + num_examples=num_samples, + size=lambda idx: (3, images_meta[idx]["height"], images_meta[idx]["width"]), + ) + make_zip(root, f"{config_name}.zip") + + cls._make_annotations( + annotations_dir, + config_name, + images_meta=images_meta, + ) + + make_zip(root, f"annotations_trainval{year}.zip", annotations_dir) + + return num_samples + + +@register_mock( + configs=combinations_grid( + split=("train", "val"), + year=("2017", "2014"), + annotations=("instances", "captions", None), + ) +) +def coco(root, config): + return CocoMockData.generate(root, split=config["split"], year=config["year"], num_samples=5) + + +class SBDMockData: + _NUM_CATEGORIES = 20 + + @classmethod + def _make_split_files(cls, root_map, *, split): + splits_and_idcs = [ + ("train", [0, 1, 2]), + ("val", [3]), + ] + if split == "train_noval": + splits_and_idcs.append(("train_noval", [0, 2])) + + ids_map = {split: [f"2008_{idx:06d}" for idx in idcs] for split, idcs in splits_and_idcs} + + for split, ids in ids_map.items(): + with open(root_map[split] / f"{split}.txt", "w") as fh: + fh.writelines(f"{id}\n" for id in ids) + + return sorted(set(itertools.chain(*ids_map.values()))), {split: len(ids) for split, ids in ids_map.items()} + + @classmethod + def _make_anns_folder(cls, root, name, ids): + from scipy.io import savemat + + anns_folder = root / name + anns_folder.mkdir() + + sizes = torch.randint(1, 9, size=(len(ids), 2)).tolist() + for id, size in zip(ids, sizes): + savemat( + anns_folder / f"{id}.mat", + { + "GTcls": { + "Boundaries": cls._make_boundaries(size), + "Segmentation": cls._make_segmentation(size), + } + }, + ) + return sizes + + @classmethod + def _make_boundaries(cls, size): + from scipy.sparse import csc_matrix + + return [ + [csc_matrix(torch.randint(0, 2, size=size, dtype=torch.uint8).numpy())] for _ in range(cls._NUM_CATEGORIES) + ] + + @classmethod + def _make_segmentation(cls, size): + return torch.randint(0, cls._NUM_CATEGORIES + 1, size=size, dtype=torch.uint8).numpy() + + @classmethod + def generate(cls, root, *, split): + archive_folder = root / "benchmark_RELEASE" + dataset_folder = archive_folder / "dataset" + dataset_folder.mkdir(parents=True, exist_ok=True) + + ids, num_samples_map = cls._make_split_files( + defaultdict(lambda: dataset_folder, {"train_noval": root}), split=split + ) + sizes = cls._make_anns_folder(dataset_folder, "cls", ids) + create_image_folder( + dataset_folder, "img", lambda idx: f"{ids[idx]}.jpg", num_examples=len(ids), size=lambda idx: sizes[idx] + ) + + make_tar(root, "benchmark.tgz", archive_folder, compression="gz") + + return num_samples_map[split] + + +@register_mock(configs=combinations_grid(split=("train", "val", "train_noval"))) +def sbd(root, config): + return SBDMockData.generate(root, split=config["split"]) + + +@register_mock(configs=[dict()]) +def semeion(root, config): + num_samples = 3 + num_categories = 10 + + images = torch.rand(num_samples, 256) + labels = one_hot(torch.randint(num_categories, size=(num_samples,)), num_classes=num_categories) + with open(root / "semeion.data", "w") as fh: + for image, one_hot_label in zip(images, labels): + image_columns = " ".join([f"{pixel.item():.4f}" for pixel in image]) + labels_columns = " ".join([str(label.item()) for label in one_hot_label]) + fh.write(f"{image_columns} {labels_columns} \n") + + return num_samples + + +class VOCMockData: + _TRAIN_VAL_FILE_NAMES = { + "2007": "VOCtrainval_06-Nov-2007.tar", + "2008": "VOCtrainval_14-Jul-2008.tar", + "2009": "VOCtrainval_11-May-2009.tar", + "2010": "VOCtrainval_03-May-2010.tar", + "2011": "VOCtrainval_25-May-2011.tar", + "2012": "VOCtrainval_11-May-2012.tar", + } + _TEST_FILE_NAMES = { + "2007": "VOCtest_06-Nov-2007.tar", + } + + @classmethod + def _make_split_files(cls, root, *, year, trainval): + split_folder = root / "ImageSets" + + if trainval: + idcs_map = { + "train": [0, 1, 2], + "val": [3, 4], + } + idcs_map["trainval"] = [*idcs_map["train"], *idcs_map["val"]] + else: + idcs_map = { + "test": [5], + } + ids_map = {split: [f"{year}_{idx:06d}" for idx in idcs] for split, idcs in idcs_map.items()} + + for task_sub_folder in ("Main", "Segmentation"): + task_folder = split_folder / task_sub_folder + task_folder.mkdir(parents=True, exist_ok=True) + for split, ids in ids_map.items(): + with open(task_folder / f"{split}.txt", "w") as fh: + fh.writelines(f"{id}\n" for id in ids) + + return sorted(set(itertools.chain(*ids_map.values()))), {split: len(ids) for split, ids in ids_map.items()} + + @classmethod + def _make_detection_anns_folder(cls, root, name, *, file_name_fn, num_examples): + folder = root / name + folder.mkdir(parents=True, exist_ok=True) + + for idx in range(num_examples): + cls._make_detection_ann_file(folder, file_name_fn(idx)) + + @classmethod + def _make_detection_ann_file(cls, root, name): + def add_child(parent, name, text=None): + child = ET.SubElement(parent, name) + child.text = str(text) + return child + + def add_name(obj, name="dog"): + add_child(obj, "name", name) + + def add_size(obj): + obj = add_child(obj, "size") + size = {"width": 0, "height": 0, "depth": 3} + for name, text in size.items(): + add_child(obj, name, text) + + def add_bndbox(obj): + obj = add_child(obj, "bndbox") + bndbox = {"xmin": 1, "xmax": 2, "ymin": 3, "ymax": 4} + for name, text in bndbox.items(): + add_child(obj, name, text) + + annotation = ET.Element("annotation") + add_size(annotation) + obj = add_child(annotation, "object") + add_name(obj) + add_bndbox(obj) + + with open(root / name, "wb") as fh: + fh.write(ET.tostring(annotation)) + + @classmethod + def generate(cls, root, *, year, trainval): + archive_folder = root + if year == "2011": + archive_folder = root / "TrainVal" + data_folder = archive_folder / "VOCdevkit" + else: + archive_folder = data_folder = root / "VOCdevkit" + data_folder = data_folder / f"VOC{year}" + data_folder.mkdir(parents=True, exist_ok=True) + + ids, num_samples_map = cls._make_split_files(data_folder, year=year, trainval=trainval) + for make_folder_fn, name, suffix in [ + (create_image_folder, "JPEGImages", ".jpg"), + (create_image_folder, "SegmentationClass", ".png"), + (cls._make_detection_anns_folder, "Annotations", ".xml"), + ]: + make_folder_fn(data_folder, name, file_name_fn=lambda idx: ids[idx] + suffix, num_examples=len(ids)) + make_tar(root, (cls._TRAIN_VAL_FILE_NAMES if trainval else cls._TEST_FILE_NAMES)[year], archive_folder) + + return num_samples_map + + +@register_mock( + configs=[ + *combinations_grid( + split=("train", "val", "trainval"), + year=("2007", "2008", "2009", "2010", "2011", "2012"), + task=("detection", "segmentation"), + ), + *combinations_grid( + split=("test",), + year=("2007",), + task=("detection", "segmentation"), + ), + ], +) +def voc(root, config): + trainval = config["split"] != "test" + return VOCMockData.generate(root, year=config["year"], trainval=trainval)[config["split"]] + + +class CelebAMockData: + @classmethod + def _make_ann_file(cls, root, name, data, *, field_names=None): + with open(root / name, "w") as file: + if field_names: + file.write(f"{len(data)}\r\n") + file.write(" ".join(field_names) + "\r\n") + file.writelines(" ".join(str(item) for item in row) + "\r\n" for row in data) + + _SPLIT_TO_IDX = { + "train": 0, + "val": 1, + "test": 2, + } + + @classmethod + def _make_split_file(cls, root): + num_samples_map = {"train": 4, "val": 3, "test": 2} + + data = [ + (f"{idx:06d}.jpg", cls._SPLIT_TO_IDX[split]) + for split, num_samples in num_samples_map.items() + for idx in range(num_samples) + ] + cls._make_ann_file(root, "list_eval_partition.txt", data) + + image_file_names, _ = zip(*data) + return image_file_names, num_samples_map + + @classmethod + def _make_identity_file(cls, root, image_file_names): + cls._make_ann_file( + root, "identity_CelebA.txt", [(name, int(make_scalar(low=1, dtype=torch.int))) for name in image_file_names] + ) + + @classmethod + def _make_attributes_file(cls, root, image_file_names): + field_names = ("5_o_Clock_Shadow", "Young") + data = [ + [name, *[" 1" if attr else "-1" for attr in make_tensor((len(field_names),), dtype=torch.bool)]] + for name in image_file_names + ] + cls._make_ann_file(root, "list_attr_celeba.txt", data, field_names=(*field_names, "")) + + @classmethod + def _make_bounding_boxes_file(cls, root, image_file_names): + field_names = ("image_id", "x_1", "y_1", "width", "height") + data = [ + [f"{name} ", *[f"{coord:3d}" for coord in make_tensor((4,), low=0, dtype=torch.int).tolist()]] + for name in image_file_names + ] + cls._make_ann_file(root, "list_bbox_celeba.txt", data, field_names=field_names) + + @classmethod + def _make_landmarks_file(cls, root, image_file_names): + field_names = ("lefteye_x", "lefteye_y", "rightmouth_x", "rightmouth_y") + data = [ + [ + name, + *[ + f"{coord:4d}" if idx else coord + for idx, coord in enumerate(make_tensor((len(field_names),), low=0, dtype=torch.int).tolist()) + ], + ] + for name in image_file_names + ] + cls._make_ann_file(root, "list_landmarks_align_celeba.txt", data, field_names=field_names) + + @classmethod + def generate(cls, root): + image_file_names, num_samples_map = cls._make_split_file(root) + + image_files = create_image_folder( + root, "img_align_celeba", file_name_fn=lambda idx: image_file_names[idx], num_examples=len(image_file_names) + ) + make_zip(root, image_files[0].parent.with_suffix(".zip").name) + + for make_ann_file_fn in ( + cls._make_identity_file, + cls._make_attributes_file, + cls._make_bounding_boxes_file, + cls._make_landmarks_file, + ): + make_ann_file_fn(root, image_file_names) + + return num_samples_map + + +@register_mock(configs=combinations_grid(split=("train", "val", "test"))) +def celeba(root, config): + return CelebAMockData.generate(root)[config["split"]] + + +@register_mock(configs=combinations_grid(split=("train", "val", "test"))) +def country211(root, config): + split_folder = pathlib.Path(root, "country211", "valid" if config["split"] == "val" else config["split"]) + split_folder.mkdir(parents=True, exist_ok=True) + + num_examples = { + "train": 3, + "val": 4, + "test": 5, + }[config["split"]] + + classes = ("AD", "BS", "GR") + for cls in classes: + create_image_folder( + split_folder, + name=cls, + file_name_fn=lambda idx: f"{idx}.jpg", + num_examples=num_examples, + ) + make_tar(root, f"{split_folder.parent.name}.tgz", split_folder.parent, compression="gz") + return num_examples * len(classes) + + +@register_mock(configs=combinations_grid(split=("train", "test"))) +def food101(root, config): + data_folder = root / "food-101" + + num_images_per_class = 3 + image_folder = data_folder / "images" + categories = ["apple_pie", "baby_back_ribs", "waffles"] + image_ids = [] + for category in categories: + image_files = create_image_folder( + image_folder, + category, + file_name_fn=lambda idx: f"{idx:04d}.jpg", + num_examples=num_images_per_class, + ) + image_ids.extend(path.relative_to(path.parents[1]).with_suffix("").as_posix() for path in image_files) + + meta_folder = data_folder / "meta" + meta_folder.mkdir() + + with open(meta_folder / "classes.txt", "w") as file: + for category in categories: + file.write(f"{category}\n") + + splits = ["train", "test"] + num_samples_map = {} + for offset, split in enumerate(splits): + image_ids_in_split = image_ids[offset :: len(splits)] + num_samples_map[split] = len(image_ids_in_split) + with open(meta_folder / f"{split}.txt", "w") as file: + for image_id in image_ids_in_split: + file.write(f"{image_id}\n") + + make_tar(root, f"{data_folder.name}.tar.gz", compression="gz") + + return num_samples_map[config["split"]] + + +@register_mock(configs=combinations_grid(split=("train", "val", "test"), fold=(1, 4, 10))) +def dtd(root, config): + data_folder = root / "dtd" + + num_images_per_class = 3 + image_folder = data_folder / "images" + categories = {"banded", "marbled", "zigzagged"} + image_ids_per_category = { + category: [ + str(path.relative_to(path.parents[1]).as_posix()) + for path in create_image_folder( + image_folder, + category, + file_name_fn=lambda idx: f"{category}_{idx:04d}.jpg", + num_examples=num_images_per_class, + ) + ] + for category in categories + } + + meta_folder = data_folder / "labels" + meta_folder.mkdir() + + with open(meta_folder / "labels_joint_anno.txt", "w") as file: + for cls, image_ids in image_ids_per_category.items(): + for image_id in image_ids: + joint_categories = random.choices( + list(categories - {cls}), k=int(torch.randint(len(categories) - 1, ())) + ) + file.write(" ".join([image_id, *sorted([cls, *joint_categories])]) + "\n") + + image_ids = list(itertools.chain(*image_ids_per_category.values())) + splits = ("train", "val", "test") + num_samples_map = {} + for fold in range(1, 11): + random.shuffle(image_ids) + for offset, split in enumerate(splits): + image_ids_in_config = image_ids[offset :: len(splits)] + with open(meta_folder / f"{split}{fold}.txt", "w") as file: + file.write("\n".join(image_ids_in_config) + "\n") + + num_samples_map[(split, fold)] = len(image_ids_in_config) + + make_tar(root, "dtd-r1.0.1.tar.gz", data_folder, compression="gz") + + return num_samples_map[config["split"], config["fold"]] + + +@register_mock(configs=combinations_grid(split=("train", "test"))) +def fer2013(root, config): + split = config["split"] + num_samples = 5 if split == "train" else 3 + + path = root / f"{split}.csv" + with open(path, "w", newline="") as file: + field_names = ["emotion"] if split == "train" else [] + field_names.append("pixels") + + file.write(",".join(field_names) + "\n") + + writer = csv.DictWriter(file, fieldnames=field_names, quotechar='"', quoting=csv.QUOTE_NONNUMERIC) + for _ in range(num_samples): + rowdict = { + "pixels": " ".join([str(int(pixel)) for pixel in torch.randint(256, (48 * 48,), dtype=torch.uint8)]) + } + if split == "train": + rowdict["emotion"] = int(torch.randint(7, ())) + writer.writerow(rowdict) + + make_zip(root, f"{path.name}.zip", path) + + return num_samples + + +@register_mock(configs=combinations_grid(split=("train", "test"))) +def gtsrb(root, config): + num_examples_per_class = 5 if config["split"] == "train" else 3 + classes = ("00000", "00042", "00012") + num_examples = num_examples_per_class * len(classes) + + csv_columns = ["Filename", "Width", "Height", "Roi.X1", "Roi.Y1", "Roi.X2", "Roi.Y2", "ClassId"] + + def _make_ann_file(path, num_examples, class_idx): + if class_idx == "random": + class_idx = torch.randint(1, len(classes) + 1, size=(1,)).item() + + with open(path, "w") as csv_file: + writer = csv.DictWriter(csv_file, fieldnames=csv_columns, delimiter=";") + writer.writeheader() + for image_idx in range(num_examples): + writer.writerow( + { + "Filename": f"{image_idx:05d}.ppm", + "Width": torch.randint(1, 100, size=()).item(), + "Height": torch.randint(1, 100, size=()).item(), + "Roi.X1": torch.randint(1, 100, size=()).item(), + "Roi.Y1": torch.randint(1, 100, size=()).item(), + "Roi.X2": torch.randint(1, 100, size=()).item(), + "Roi.Y2": torch.randint(1, 100, size=()).item(), + "ClassId": class_idx, + } + ) + + archive_folder = root / "GTSRB" + + if config["split"] == "train": + train_folder = archive_folder / "Training" + train_folder.mkdir(parents=True) + + for class_idx in classes: + create_image_folder( + train_folder, + name=class_idx, + file_name_fn=lambda image_idx: f"{class_idx}_{image_idx:05d}.ppm", + num_examples=num_examples_per_class, + ) + _make_ann_file( + path=train_folder / class_idx / f"GT-{class_idx}.csv", + num_examples=num_examples_per_class, + class_idx=int(class_idx), + ) + make_zip(root, "GTSRB-Training_fixed.zip", archive_folder) + else: + test_folder = archive_folder / "Final_Test" + test_folder.mkdir(parents=True) + + create_image_folder( + test_folder, + name="Images", + file_name_fn=lambda image_idx: f"{image_idx:05d}.ppm", + num_examples=num_examples, + ) + + make_zip(root, "GTSRB_Final_Test_Images.zip", archive_folder) + + _make_ann_file( + path=root / "GT-final_test.csv", + num_examples=num_examples, + class_idx="random", + ) + + make_zip(root, "GTSRB_Final_Test_GT.zip", "GT-final_test.csv") + + return num_examples + + +@register_mock(configs=combinations_grid(split=("train", "val", "test"))) +def clevr(root, config): + data_folder = root / "CLEVR_v1.0" + + num_samples_map = { + "train": 3, + "val": 2, + "test": 1, + } + + images_folder = data_folder / "images" + image_files = { + split: create_image_folder( + images_folder, + split, + file_name_fn=lambda idx: f"CLEVR_{split}_{idx:06d}.jpg", + num_examples=num_samples, + ) + for split, num_samples in num_samples_map.items() + } + + scenes_folder = data_folder / "scenes" + scenes_folder.mkdir() + for split in ["train", "val"]: + with open(scenes_folder / f"CLEVR_{split}_scenes.json", "w") as file: + json.dump( + { + "scenes": [ + { + "image_filename": image_file.name, + # We currently only return the number of objects in a scene. + # Thus, it is sufficient for now to only mock the number of elements. + "objects": [None] * int(torch.randint(1, 5, ())), + } + for image_file in image_files[split] + ] + }, + file, + ) + + make_zip(root, f"{data_folder.name}.zip", data_folder) + + return num_samples_map[config["split"]] + + +class OxfordIIITPetMockData: + @classmethod + def _meta_to_split_and_classification_ann(cls, meta, idx): + image_id = "_".join( + [ + *[(str.title if meta["species"] == "cat" else str.lower)(part) for part in meta["cls"].split()], + str(idx), + ] + ) + class_id = str(meta["label"] + 1) + species = "1" if meta["species"] == "cat" else "2" + breed_id = "-1" + return (image_id, class_id, species, breed_id) + + @classmethod + def generate(self, root): + classification_anns_meta = ( + dict(cls="Abyssinian", label=0, species="cat"), + dict(cls="Keeshond", label=18, species="dog"), + dict(cls="Yorkshire Terrier", label=36, species="dog"), + ) + split_and_classification_anns = [ + self._meta_to_split_and_classification_ann(meta, idx) + for meta, idx in itertools.product(classification_anns_meta, (1, 2, 10)) + ] + image_ids, *_ = zip(*split_and_classification_anns) + + image_files = create_image_folder( + root, "images", file_name_fn=lambda idx: f"{image_ids[idx]}.jpg", num_examples=len(image_ids) + ) + + anns_folder = root / "annotations" + anns_folder.mkdir() + random.shuffle(split_and_classification_anns) + splits = ("trainval", "test") + num_samples_map = {} + for offset, split in enumerate(splits): + split_and_classification_anns_in_split = split_and_classification_anns[offset :: len(splits)] + with open(anns_folder / f"{split}.txt", "w") as file: + writer = csv.writer(file, delimiter=" ") + for split_and_classification_ann in split_and_classification_anns_in_split: + writer.writerow(split_and_classification_ann) + + num_samples_map[split] = len(split_and_classification_anns_in_split) + + segmentation_files = create_image_folder( + anns_folder, "trimaps", file_name_fn=lambda idx: f"{image_ids[idx]}.png", num_examples=len(image_ids) + ) + + # The dataset has some rogue files + for path in image_files[:3]: + path.with_suffix(".mat").touch() + for path in segmentation_files: + path.with_name(f".{path.name}").touch() + + make_tar(root, "images.tar.gz", compression="gz") + make_tar(root, anns_folder.with_suffix(".tar.gz").name, compression="gz") + + return num_samples_map + + +@register_mock(name="oxford-iiit-pet", configs=combinations_grid(split=("trainval", "test"))) +def oxford_iiit_pet(root, config): + return OxfordIIITPetMockData.generate(root)[config["split"]] + + +class _CUB200MockData: + @classmethod + def _category_folder(cls, category, idx): + return f"{idx:03d}.{category}" + + @classmethod + def _file_stem(cls, category, idx): + return f"{category}_{idx:04d}" + + @classmethod + def _make_images(cls, images_folder): + image_files = [] + for category_idx, category in [ + (1, "Black_footed_Albatross"), + (100, "Brown_Pelican"), + (200, "Common_Yellowthroat"), + ]: + image_files.extend( + create_image_folder( + images_folder, + cls._category_folder(category, category_idx), + lambda image_idx: f"{cls._file_stem(category, image_idx)}.jpg", + num_examples=5, + ) + ) + + return image_files + + +class CUB2002011MockData(_CUB200MockData): + @classmethod + def _make_archive(cls, root): + archive_folder = root / "CUB_200_2011" + + images_folder = archive_folder / "images" + image_files = cls._make_images(images_folder) + image_ids = list(range(1, len(image_files) + 1)) + + with open(archive_folder / "images.txt", "w") as file: + file.write( + "\n".join( + f"{id} {path.relative_to(images_folder).as_posix()}" for id, path in zip(image_ids, image_files) + ) + ) + + split_ids = torch.randint(2, (len(image_ids),)).tolist() + counts = Counter(split_ids) + num_samples_map = {"train": counts[1], "test": counts[0]} + with open(archive_folder / "train_test_split.txt", "w") as file: + file.write("\n".join(f"{image_id} {split_id}" for image_id, split_id in zip(image_ids, split_ids))) + + with open(archive_folder / "bounding_boxes.txt", "w") as file: + file.write( + "\n".join( + " ".join( + str(item) + for item in [image_id, *make_tensor((4,), dtype=torch.int, low=0).to(torch.float).tolist()] + ) + for image_id in image_ids + ) + ) + + make_tar(root, archive_folder.with_suffix(".tgz").name, compression="gz") + + return image_files, num_samples_map + + @classmethod + def _make_segmentations(cls, root, image_files): + segmentations_folder = root / "segmentations" + for image_file in image_files: + folder = segmentations_folder.joinpath(image_file.relative_to(image_file.parents[1])) + folder.mkdir(exist_ok=True, parents=True) + create_image_file( + folder, + image_file.with_suffix(".png").name, + size=[1, *make_tensor((2,), low=3, dtype=torch.int).tolist()], + ) + + make_tar(root, segmentations_folder.with_suffix(".tgz").name, compression="gz") + + @classmethod + def generate(cls, root): + image_files, num_samples_map = cls._make_archive(root) + cls._make_segmentations(root, image_files) + return num_samples_map + + +class CUB2002010MockData(_CUB200MockData): + @classmethod + def _make_hidden_rouge_file(cls, *files): + for file in files: + (file.parent / f"._{file.name}").touch() + + @classmethod + def _make_splits(cls, root, image_files): + split_folder = root / "lists" + split_folder.mkdir() + random.shuffle(image_files) + splits = ("train", "test") + num_samples_map = {} + for offset, split in enumerate(splits): + image_files_in_split = image_files[offset :: len(splits)] + + split_file = split_folder / f"{split}.txt" + with open(split_file, "w") as file: + file.write( + "\n".join( + sorted( + str(image_file.relative_to(image_file.parents[1]).as_posix()) + for image_file in image_files_in_split + ) + ) + ) + + cls._make_hidden_rouge_file(split_file) + num_samples_map[split] = len(image_files_in_split) + + make_tar(root, split_folder.with_suffix(".tgz").name, compression="gz") + + return num_samples_map + + @classmethod + def _make_anns(cls, root, image_files): + from scipy.io import savemat + + anns_folder = root / "annotations-mat" + for image_file in image_files: + ann_file = anns_folder / image_file.with_suffix(".mat").relative_to(image_file.parents[1]) + ann_file.parent.mkdir(parents=True, exist_ok=True) + + savemat( + ann_file, + { + "seg": torch.randint( + 256, make_tensor((2,), low=3, dtype=torch.int).tolist(), dtype=torch.uint8 + ).numpy(), + "bbox": dict( + zip(("left", "top", "right", "bottom"), make_tensor((4,), dtype=torch.uint8).tolist()) + ), + }, + ) + + readme_file = anns_folder / "README.txt" + readme_file.touch() + cls._make_hidden_rouge_file(readme_file) + + make_tar(root, "annotations.tgz", anns_folder, compression="gz") + + @classmethod + def generate(cls, root): + images_folder = root / "images" + image_files = cls._make_images(images_folder) + cls._make_hidden_rouge_file(*image_files) + make_tar(root, images_folder.with_suffix(".tgz").name, compression="gz") + + num_samples_map = cls._make_splits(root, image_files) + cls._make_anns(root, image_files) + + return num_samples_map + + +@register_mock(configs=combinations_grid(split=("train", "test"), year=("2010", "2011"))) +def cub200(root, config): + num_samples_map = (CUB2002011MockData if config["year"] == "2011" else CUB2002010MockData).generate(root) + return num_samples_map[config["split"]] + + +@register_mock(configs=[dict()]) +def eurosat(root, config): + data_folder = root / "2750" + data_folder.mkdir(parents=True) + + num_examples_per_class = 3 + categories = ["AnnualCrop", "Forest"] + for category in categories: + create_image_folder( + root=data_folder, + name=category, + file_name_fn=lambda idx: f"{category}_{idx + 1}.jpg", + num_examples=num_examples_per_class, + ) + make_zip(root, "EuroSAT.zip", data_folder) + return len(categories) * num_examples_per_class + + +@register_mock(configs=combinations_grid(split=("train", "test", "extra"))) +def svhn(root, config): + import scipy.io as sio + + num_samples = { + "train": 2, + "test": 3, + "extra": 4, + }[config["split"]] + + sio.savemat( + root / f"{config['split']}_32x32.mat", + { + "X": np.random.randint(256, size=(32, 32, 3, num_samples), dtype=np.uint8), + "y": np.random.randint(10, size=(num_samples,), dtype=np.uint8), + }, + ) + return num_samples + + +@register_mock(configs=combinations_grid(split=("train", "val", "test"))) +def pcam(root, config): + import h5py + + num_images = {"train": 2, "test": 3, "val": 4}[config["split"]] + + split = "valid" if config["split"] == "val" else config["split"] + + images_io = io.BytesIO() + with h5py.File(images_io, "w") as f: + f["x"] = np.random.randint(0, 256, size=(num_images, 10, 10, 3), dtype=np.uint8) + + targets_io = io.BytesIO() + with h5py.File(targets_io, "w") as f: + f["y"] = np.random.randint(0, 2, size=(num_images, 1, 1, 1), dtype=np.uint8) + + # Create .gz compressed files + images_file = root / f"camelyonpatch_level_2_split_{split}_x.h5.gz" + targets_file = root / f"camelyonpatch_level_2_split_{split}_y.h5.gz" + for compressed_file_name, uncompressed_file_io in ((images_file, images_io), (targets_file, targets_io)): + compressed_data = gzip.compress(uncompressed_file_io.getbuffer()) + with open(compressed_file_name, "wb") as compressed_file: + compressed_file.write(compressed_data) + + return num_images + + +@register_mock(name="stanford-cars", configs=combinations_grid(split=("train", "test"))) +def stanford_cars(root, config): + import scipy.io as io + from numpy.core.records import fromarrays + + split = config["split"] + num_samples = {"train": 5, "test": 7}[split] + num_categories = 3 + + if split == "train": + images_folder_name = "cars_train" + devkit = root / "devkit" + devkit.mkdir() + annotations_mat_path = devkit / "cars_train_annos.mat" + else: + images_folder_name = "cars_test" + annotations_mat_path = root / "cars_test_annos_withlabels.mat" + + create_image_folder( + root=root, + name=images_folder_name, + file_name_fn=lambda image_index: f"{image_index:5d}.jpg", + num_examples=num_samples, + ) + + make_tar(root, f"cars_{split}.tgz", images_folder_name) + bbox = np.random.randint(1, 200, num_samples, dtype=np.uint8) + classes = np.random.randint(1, num_categories + 1, num_samples, dtype=np.uint8) + fnames = [f"{i:5d}.jpg" for i in range(num_samples)] + rec_array = fromarrays( + [bbox, bbox, bbox, bbox, classes, fnames], + names=["bbox_x1", "bbox_y1", "bbox_x2", "bbox_y2", "class", "fname"], + ) + + io.savemat(annotations_mat_path, {"annotations": rec_array}) + if split == "train": + make_tar(root, "car_devkit.tgz", devkit, compression="gz") + + return num_samples + + +@register_mock(configs=combinations_grid(split=("train", "test"))) +def usps(root, config): + num_samples = {"train": 15, "test": 7}[config["split"]] + + with bz2.open(root / f"usps{'.t' if not config['split'] == 'train' else ''}.bz2", "wb") as fh: + lines = [] + for _ in range(num_samples): + label = make_tensor(1, low=1, high=11, dtype=torch.int) + values = make_tensor(256, low=-1, high=1, dtype=torch.float) + lines.append( + " ".join([f"{int(label)}", *(f"{idx}:{float(value):.6f}" for idx, value in enumerate(values, 1))]) + ) + + fh.write("\n".join(lines).encode()) + + return num_samples diff --git a/test/common_extended_utils.py b/test/common_extended_utils.py new file mode 100644 index 00000000000..a34e15629bb --- /dev/null +++ b/test/common_extended_utils.py @@ -0,0 +1,310 @@ +import os +from collections import defaultdict +from numbers import Number +from typing import Any, List + +import torch +from torch.utils._python_dispatch import TorchDispatchMode + +from torch.utils._pytree import tree_map + +from torchvision.models._api import Weights + +aten = torch.ops.aten +quantized = torch.ops.quantized + + +def get_shape(i): + if isinstance(i, torch.Tensor): + return i.shape + elif hasattr(i, "weight"): + return i.weight().shape + else: + raise ValueError(f"Unknown type {type(i)}") + + +def prod(x): + res = 1 + for i in x: + res *= i + return res + + +def matmul_flop(inputs: List[Any], outputs: List[Any]) -> Number: + """ + Count flops for matmul. + """ + # Inputs should be a list of length 2. + # Inputs contains the shapes of two matrices. + input_shapes = [get_shape(v) for v in inputs] + assert len(input_shapes) == 2, input_shapes + assert input_shapes[0][-1] == input_shapes[1][-2], input_shapes + flop = prod(input_shapes[0]) * input_shapes[-1][-1] + return flop + + +def addmm_flop(inputs: List[Any], outputs: List[Any]) -> Number: + """ + Count flops for fully connected layers. + """ + # Count flop for nn.Linear + # inputs is a list of length 3. + input_shapes = [get_shape(v) for v in inputs[1:3]] + # input_shapes[0]: [batch size, input feature dimension] + # input_shapes[1]: [batch size, output feature dimension] + assert len(input_shapes[0]) == 2, input_shapes[0] + assert len(input_shapes[1]) == 2, input_shapes[1] + batch_size, input_dim = input_shapes[0] + output_dim = input_shapes[1][1] + flops = batch_size * input_dim * output_dim + return flops + + +def bmm_flop(inputs: List[Any], outputs: List[Any]) -> Number: + """ + Count flops for the bmm operation. + """ + # Inputs should be a list of length 2. + # Inputs contains the shapes of two tensor. + assert len(inputs) == 2, len(inputs) + input_shapes = [get_shape(v) for v in inputs] + n, c, t = input_shapes[0] + d = input_shapes[-1][-1] + flop = n * c * t * d + return flop + + +def conv_flop_count( + x_shape: List[int], + w_shape: List[int], + out_shape: List[int], + transposed: bool = False, +) -> Number: + """ + Count flops for convolution. Note only multiplication is + counted. Computation for addition and bias is ignored. + Flops for a transposed convolution are calculated as + flops = (x_shape[2:] * prod(w_shape) * batch_size). + Args: + x_shape (list(int)): The input shape before convolution. + w_shape (list(int)): The filter shape. + out_shape (list(int)): The output shape after convolution. + transposed (bool): is the convolution transposed + Returns: + int: the number of flops + """ + batch_size = x_shape[0] + conv_shape = (x_shape if transposed else out_shape)[2:] + flop = batch_size * prod(w_shape) * prod(conv_shape) + return flop + + +def conv_flop(inputs: List[Any], outputs: List[Any]): + """ + Count flops for convolution. + """ + x, w = inputs[:2] + x_shape, w_shape, out_shape = (get_shape(x), get_shape(w), get_shape(outputs[0])) + transposed = inputs[6] + + return conv_flop_count(x_shape, w_shape, out_shape, transposed=transposed) + + +def quant_conv_flop(inputs: List[Any], outputs: List[Any]): + """ + Count flops for quantized convolution. + """ + x, w = inputs[:2] + x_shape, w_shape, out_shape = (get_shape(x), get_shape(w), get_shape(outputs[0])) + + return conv_flop_count(x_shape, w_shape, out_shape, transposed=False) + + +def transpose_shape(shape): + return [shape[1], shape[0]] + list(shape[2:]) + + +def conv_backward_flop(inputs: List[Any], outputs: List[Any]): + grad_out_shape, x_shape, w_shape = [get_shape(i) for i in inputs[:3]] + output_mask = inputs[-1] + fwd_transposed = inputs[7] + flop_count = 0 + + if output_mask[0]: + grad_input_shape = get_shape(outputs[0]) + flop_count += conv_flop_count(grad_out_shape, w_shape, grad_input_shape, not fwd_transposed) + if output_mask[1]: + grad_weight_shape = get_shape(outputs[1]) + flop_count += conv_flop_count(transpose_shape(x_shape), grad_out_shape, grad_weight_shape, fwd_transposed) + + return flop_count + + +def scaled_dot_product_flash_attention_flop(inputs: List[Any], outputs: List[Any]): + # FIXME: this needs to count the flops of this kernel + # https://github.com/pytorch/pytorch/blob/207b06d099def9d9476176a1842e88636c1f714f/aten/src/ATen/native/cpu/FlashAttentionKernel.cpp#L52-L267 + return 0 + + +flop_mapping = { + aten.mm: matmul_flop, + aten.matmul: matmul_flop, + aten.addmm: addmm_flop, + aten.bmm: bmm_flop, + aten.convolution: conv_flop, + aten._convolution: conv_flop, + aten.convolution_backward: conv_backward_flop, + quantized.conv2d: quant_conv_flop, + quantized.conv2d_relu: quant_conv_flop, + aten._scaled_dot_product_flash_attention: scaled_dot_product_flash_attention_flop, +} + +unmapped_ops = set() + + +def normalize_tuple(x): + if not isinstance(x, tuple): + return (x,) + return x + + +class FlopCounterMode(TorchDispatchMode): + def __init__(self, model=None): + self.flop_counts = defaultdict(lambda: defaultdict(int)) + self.parents = ["Global"] + # global mod + if model is not None: + for name, module in dict(model.named_children()).items(): + module.register_forward_pre_hook(self.enter_module(name)) + module.register_forward_hook(self.exit_module(name)) + + def enter_module(self, name): + def f(module, inputs): + self.parents.append(name) + inputs = normalize_tuple(inputs) + out = self.create_backwards_pop(name)(*inputs) + return out + + return f + + def exit_module(self, name): + def f(module, inputs, outputs): + assert self.parents[-1] == name + self.parents.pop() + outputs = normalize_tuple(outputs) + return self.create_backwards_push(name)(*outputs) + + return f + + def create_backwards_push(self, name): + class PushState(torch.autograd.Function): + @staticmethod + def forward(ctx, *args): + args = tree_map(lambda x: x.clone() if isinstance(x, torch.Tensor) else x, args) + if len(args) == 1: + return args[0] + return args + + @staticmethod + def backward(ctx, *grad_outs): + self.parents.append(name) + return grad_outs + + return PushState.apply + + def create_backwards_pop(self, name): + class PopState(torch.autograd.Function): + @staticmethod + def forward(ctx, *args): + args = tree_map(lambda x: x.clone() if isinstance(x, torch.Tensor) else x, args) + if len(args) == 1: + return args[0] + return args + + @staticmethod + def backward(ctx, *grad_outs): + assert self.parents[-1] == name + self.parents.pop() + return grad_outs + + return PopState.apply + + def __enter__(self): + self.flop_counts.clear() + super().__enter__() + + def __exit__(self, *args): + # print(f"Total: {sum(self.flop_counts['Global'].values()) / 1e9} GFLOPS") + # for mod in self.flop_counts.keys(): + # print(f"Module: ", mod) + # for k, v in self.flop_counts[mod].items(): + # print(f"{k}: {v / 1e9} GFLOPS") + # print() + super().__exit__(*args) + + def __torch_dispatch__(self, func, types, args=(), kwargs=None): + kwargs = kwargs if kwargs else {} + + out = func(*args, **kwargs) + func_packet = func._overloadpacket + if func_packet in flop_mapping: + flop_count = flop_mapping[func_packet](args, normalize_tuple(out)) + for par in self.parents: + self.flop_counts[par][func_packet] += flop_count + else: + unmapped_ops.add(func_packet) + + return out + + def get_flops(self): + return sum(self.flop_counts["Global"].values()) / 1e9 + + +def get_dims(module_name, height, width): + # detection models have curated input sizes + if module_name == "detection": + # we can feed a batch of 1 for detection model instead of a list of 1 image + dims = (3, height, width) + elif module_name == "video": + # hard-coding the time dimension to size 16 + dims = (1, 16, 3, height, width) + else: + dims = (1, 3, height, width) + + return dims + + +def get_ops(model: torch.nn.Module, weight: Weights, height=512, width=512): + module_name = model.__module__.split(".")[-2] + dims = get_dims(module_name=module_name, height=height, width=width) + + input_tensor = torch.randn(dims) + + # try: + preprocess = weight.transforms() + if module_name == "optical_flow": + inp = preprocess(input_tensor, input_tensor) + else: + # hack to enable mod(*inp) for optical_flow models + inp = [preprocess(input_tensor)] + + model.eval() + + flop_counter = FlopCounterMode(model) + with flop_counter: + # detection models expect a list of 3d tensors as inputs + if module_name == "detection": + model(inp) + else: + model(*inp) + + flops = flop_counter.get_flops() + + return round(flops, 3) + + +def get_file_size_mb(weight): + weights_path = os.path.join(os.getenv("HOME"), ".cache/torch/hub/checkpoints", weight.url.split("/")[-1]) + weights_size_mb = os.path.getsize(weights_path) / 1024 / 1024 + + return round(weights_size_mb, 3) diff --git a/test/common_utils.py b/test/common_utils.py index 9c0c3175ef1..99c7931587d 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -1,13 +1,35 @@ +import contextlib +import functools +import itertools import os +import pathlib +import random +import re import shutil -import tempfile -import contextlib -import unittest -import argparse import sys +import tempfile +import warnings +from subprocess import CalledProcessError, check_output, STDOUT + +import numpy as np +import PIL.Image +import pytest import torch -import errno -import __main__ +import torch.testing +from PIL import Image + +from torch.testing._comparison import BooleanPair, NonePair, not_close_error_metas, NumberPair, TensorLikePair +from torchvision import io, tv_tensors +from torchvision.transforms._functional_tensor import _max_value as get_max_value +from torchvision.transforms.v2.functional import to_image, to_pil_image + + +IN_OSS_CI = any(os.getenv(var) == "true" for var in ["CIRCLECI", "GITHUB_ACTIONS"]) +IN_RE_WORKER = os.environ.get("INSIDE_RE_WORKER") is not None +IN_FBCODE = os.environ.get("IN_FBCODE_TORCHVISION") == "1" +CUDA_NOT_AVAILABLE_MSG = "CUDA device not available" +MPS_NOT_AVAILABLE_MSG = "MPS device not available" +OSS_CI_GPU_NO_CUDA_MSG = "We're in an OSS GPU machine, and this test doesn't need cuda." @contextlib.contextmanager @@ -22,20 +44,12 @@ def get_tmp_dir(src=None, **kwargs): shutil.rmtree(tmp_dir) -ACCEPT = os.getenv('EXPECTTEST_ACCEPT') - -parser = argparse.ArgumentParser(add_help=False) -parser.add_argument('--accept', action='store_true') -args, remaining = parser.parse_known_args() -if not ACCEPT: - ACCEPT = args.accept -for i, arg in enumerate(sys.argv): - if arg == '--accept': - del sys.argv[i] - break +def set_rng_seed(seed): + torch.manual_seed(seed) + random.seed(seed) -class MapNestedTensorObjectImpl(object): +class MapNestedTensorObjectImpl: def __init__(self, tensor_map_fn): self.tensor_map_fn = tensor_map_fn @@ -64,90 +78,442 @@ def map_nested_tensor_object(object, tensor_map_fn): return impl(object) -# adapted from TestCase in torch/test/common_utils to accept non-string -# inputs and set maximum binary size -class TestCase(unittest.TestCase): - def assertExpected(self, output, subname=None, rtol=None, atol=None): - r""" - Test that a python value matches the recorded contents of a file - derived from the name of this test and subname. The value must be - pickable with `torch.save`. This file - is placed in the 'expect' directory in the same directory - as the test script. You can automatically update the recorded test - output using --accept. - - If you call this multiple times in a single function, you must - give a unique subname each time. - """ - def remove_prefix(text, prefix): - if text.startswith(prefix): - return text[len(prefix):] - return text - # NB: we take __file__ from the module that defined the test - # class, so we place the expect directory where the test script - # lives, NOT where test/common_utils.py lives. - module_id = self.__class__.__module__ - munged_id = remove_prefix(self.id(), module_id + ".") - test_file = os.path.realpath(sys.modules[module_id].__file__) - expected_file = os.path.join(os.path.dirname(test_file), - "expect", - munged_id) - - subname_output = "" - if subname: - expected_file += "_" + subname - subname_output = " ({})".format(subname) - expected_file += "_expect.pkl" - expected = None - - def accept_output(update_type): - print("Accepting {} for {}{}:\n\n{}".format(update_type, munged_id, subname_output, output)) - torch.save(output, expected_file) - MAX_PICKLE_SIZE = 50 * 1000 # 50 KB - binary_size = os.path.getsize(expected_file) - self.assertTrue(binary_size <= MAX_PICKLE_SIZE) +def is_iterable(obj): + try: + iter(obj) + return True + except TypeError: + return False - try: - expected = torch.load(expected_file) - except IOError as e: - if e.errno != errno.ENOENT: - raise - elif ACCEPT: - return accept_output("output") - else: - raise RuntimeError( - ("I got this output for {}{}:\n\n{}\n\n" - "No expect file exists; to accept the current output, run:\n" - "python {} {} --accept").format(munged_id, subname_output, output, __main__.__file__, munged_id)) - - if ACCEPT: - equal = False - try: - equal = self.assertNestedTensorObjectsEqual(output, expected, rtol=rtol, atol=atol) - except Exception: - equal = False - if not equal: - return accept_output("updated output") + +@contextlib.contextmanager +def freeze_rng_state(): + rng_state = torch.get_rng_state() + if torch.cuda.is_available(): + cuda_rng_state = torch.cuda.get_rng_state() + yield + if torch.cuda.is_available(): + torch.cuda.set_rng_state(cuda_rng_state) + torch.set_rng_state(rng_state) + + +def cycle_over(objs): + for idx, obj1 in enumerate(objs): + for obj2 in objs[:idx] + objs[idx + 1 :]: + yield obj1, obj2 + + +def int_dtypes(): + return (torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64) + + +def float_dtypes(): + return (torch.float32, torch.float64) + + +@contextlib.contextmanager +def disable_console_output(): + with contextlib.ExitStack() as stack, open(os.devnull, "w") as devnull: + stack.enter_context(contextlib.redirect_stdout(devnull)) + stack.enter_context(contextlib.redirect_stderr(devnull)) + yield + + +def cpu_and_cuda(): + import pytest # noqa + + return ("cpu", pytest.param("cuda", marks=pytest.mark.needs_cuda)) + + +def cpu_and_cuda_and_mps(): + return cpu_and_cuda() + (pytest.param("mps", marks=pytest.mark.needs_mps),) + + +def needs_cuda(test_func): + import pytest # noqa + + return pytest.mark.needs_cuda(test_func) + + +def needs_mps(test_func): + import pytest # noqa + + return pytest.mark.needs_mps(test_func) + + +def _create_data(height=3, width=3, channels=3, device="cpu"): + # TODO: When all relevant tests are ported to pytest, turn this into a module-level fixture + tensor = torch.randint(0, 256, (channels, height, width), dtype=torch.uint8, device=device) + data = tensor.permute(1, 2, 0).contiguous().cpu().numpy() + mode = "RGB" + if channels == 1: + mode = "L" + data = data[..., 0] + pil_img = Image.fromarray(data, mode=mode) + return tensor, pil_img + + +def _create_data_batch(height=3, width=3, channels=3, num_samples=4, device="cpu"): + # TODO: When all relevant tests are ported to pytest, turn this into a module-level fixture + batch_tensor = torch.randint(0, 256, (num_samples, channels, height, width), dtype=torch.uint8, device=device) + return batch_tensor + + +def get_list_of_videos(tmpdir, num_videos=5, sizes=None, fps=None): + names = [] + for i in range(num_videos): + if sizes is None: + size = 5 * (i + 1) + else: + size = sizes[i] + if fps is None: + f = 5 else: - self.assertNestedTensorObjectsEqual(output, expected, rtol=rtol, atol=atol) + f = fps[i] + data = torch.randint(0, 256, (size, 300, 400, 3), dtype=torch.uint8) + name = os.path.join(tmpdir, f"{i}.mp4") + names.append(name) + io.write_video(name, data, fps=f) + + return names - def assertNestedTensorObjectsEqual(self, a, b, rtol=None, atol=None): - self.assertEqual(type(a), type(b)) - if isinstance(a, torch.Tensor): - torch.testing.assert_allclose(a, b, rtol=rtol, atol=atol) +def _assert_equal_tensor_to_pil(tensor, pil_image, msg=None): + # FIXME: this is handled automatically by `assert_equal` below. Let's remove this in favor of it + np_pil_image = np.array(pil_image) + if np_pil_image.ndim == 2: + np_pil_image = np_pil_image[:, :, None] + pil_tensor = torch.as_tensor(np_pil_image.transpose((2, 0, 1))) + if msg is None: + msg = f"tensor:\n{tensor} \ndid not equal PIL tensor:\n{pil_tensor}" + assert_equal(tensor.cpu(), pil_tensor, msg=msg) - elif isinstance(a, dict): - self.assertEqual(len(a), len(b)) - for key, value in a.items(): - self.assertTrue(key in b, "key: " + str(key)) - self.assertNestedTensorObjectsEqual(value, b[key], rtol=rtol, atol=atol) - elif isinstance(a, (list, tuple)): - self.assertEqual(len(a), len(b)) +def _assert_approx_equal_tensor_to_pil( + tensor, pil_image, tol=1e-5, msg=None, agg_method="mean", allowed_percentage_diff=None +): + # FIXME: this is handled automatically by `assert_close` below. Let's remove this in favor of it + # TODO: we could just merge this into _assert_equal_tensor_to_pil + np_pil_image = np.array(pil_image) + if np_pil_image.ndim == 2: + np_pil_image = np_pil_image[:, :, None] + pil_tensor = torch.as_tensor(np_pil_image.transpose((2, 0, 1))).to(tensor) + + if allowed_percentage_diff is not None: + # Assert that less than a given %age of pixels are different + assert (tensor != pil_tensor).to(torch.float).mean() <= allowed_percentage_diff + + # error value can be mean absolute error, max abs error + # Convert to float to avoid underflow when computing absolute difference + tensor = tensor.to(torch.float) + pil_tensor = pil_tensor.to(torch.float) + err = getattr(torch, agg_method)(torch.abs(tensor - pil_tensor)).item() + assert err < tol, f"{err} vs {tol}" + + +def _test_fn_on_batch(batch_tensors, fn, scripted_fn_atol=1e-8, **fn_kwargs): + transformed_batch = fn(batch_tensors, **fn_kwargs) + for i in range(len(batch_tensors)): + img_tensor = batch_tensors[i, ...] + transformed_img = fn(img_tensor, **fn_kwargs) + torch.testing.assert_close(transformed_img, transformed_batch[i, ...], rtol=0, atol=1e-6) + + if scripted_fn_atol >= 0: + scripted_fn = torch.jit.script(fn) + # scriptable function test + s_transformed_batch = scripted_fn(batch_tensors, **fn_kwargs) + torch.testing.assert_close(transformed_batch, s_transformed_batch, rtol=1e-5, atol=scripted_fn_atol) + + +def cache(fn): + """Similar to :func:`functools.cache` (Python >= 3.8) or :func:`functools.lru_cache` with infinite cache size, + but this also caches exceptions. + """ + sentinel = object() + out_cache = {} + exc_tb_cache = {} + + @functools.wraps(fn) + def wrapper(*args, **kwargs): + key = args + tuple(kwargs.values()) + + out = out_cache.get(key, sentinel) + if out is not sentinel: + return out + + exc_tb = exc_tb_cache.get(key, sentinel) + if exc_tb is not sentinel: + raise exc_tb[0].with_traceback(exc_tb[1]) + + try: + out = fn(*args, **kwargs) + except Exception as exc: + # We need to cache the traceback here as well. Otherwise, each re-raise will add the internal pytest + # traceback frames anew, but they will only be removed once. Thus, the traceback will be ginormous hiding + # the actual information in the noise. See https://github.com/pytest-dev/pytest/issues/10363 for details. + exc_tb_cache[key] = exc, exc.__traceback__ + raise exc + + out_cache[key] = out + return out - for val1, val2 in zip(a, b): - self.assertNestedTensorObjectsEqual(val1, val2, rtol=rtol, atol=atol) + return wrapper + +def combinations_grid(**kwargs): + """Creates a grid of input combinations. + + Each element in the returned sequence is a dictionary containing one possible combination as values. + + Example: + >>> combinations_grid(foo=("bar", "baz"), spam=("eggs", "ham")) + [ + {'foo': 'bar', 'spam': 'eggs'}, + {'foo': 'bar', 'spam': 'ham'}, + {'foo': 'baz', 'spam': 'eggs'}, + {'foo': 'baz', 'spam': 'ham'} + ] + """ + return [dict(zip(kwargs.keys(), values)) for values in itertools.product(*kwargs.values())] + + +class ImagePair(TensorLikePair): + def __init__( + self, + actual, + expected, + *, + mae=False, + **other_parameters, + ): + if all(isinstance(input, PIL.Image.Image) for input in [actual, expected]): + actual, expected = [to_image(input) for input in [actual, expected]] + + super().__init__(actual, expected, **other_parameters) + self.mae = mae + + def compare(self) -> None: + actual, expected = self.actual, self.expected + + self._compare_attributes(actual, expected) + actual, expected = self._equalize_attributes(actual, expected) + + if self.mae: + if actual.dtype is torch.uint8: + actual, expected = actual.to(torch.int), expected.to(torch.int) + mae = float(torch.abs(actual - expected).float().mean()) + if mae > self.atol: + self._fail( + AssertionError, + f"The MAE of the images is {mae}, but only {self.atol} is allowed.", + ) else: - self.assertEqual(a, b) + super()._compare_values(actual, expected) + + +def assert_close( + actual, + expected, + *, + allow_subclasses=True, + rtol=None, + atol=None, + equal_nan=False, + check_device=True, + check_dtype=True, + check_layout=True, + check_stride=False, + msg=None, + **kwargs, +): + """Superset of :func:`torch.testing.assert_close` with support for PIL vs. tensor image comparison""" + __tracebackhide__ = True + + error_metas = not_close_error_metas( + actual, + expected, + pair_types=( + NonePair, + BooleanPair, + NumberPair, + ImagePair, + TensorLikePair, + ), + allow_subclasses=allow_subclasses, + rtol=rtol, + atol=atol, + equal_nan=equal_nan, + check_device=check_device, + check_dtype=check_dtype, + check_layout=check_layout, + check_stride=check_stride, + **kwargs, + ) + + if error_metas: + raise error_metas[0].to_error(msg) + + +assert_equal = functools.partial(assert_close, rtol=0, atol=0) + + +DEFAULT_SIZE = (17, 11) + + +NUM_CHANNELS_MAP = { + "GRAY": 1, + "GRAY_ALPHA": 2, + "RGB": 3, + "RGBA": 4, +} + + +def make_image( + size=DEFAULT_SIZE, + *, + color_space="RGB", + batch_dims=(), + dtype=None, + device="cpu", + memory_format=torch.contiguous_format, +): + num_channels = NUM_CHANNELS_MAP[color_space] + dtype = dtype or torch.uint8 + max_value = get_max_value(dtype) + data = torch.testing.make_tensor( + (*batch_dims, num_channels, *size), + low=0, + high=max_value, + dtype=dtype, + device=device, + memory_format=memory_format, + ) + if color_space in {"GRAY_ALPHA", "RGBA"}: + data[..., -1, :, :] = max_value + + return tv_tensors.Image(data) + + +def make_image_tensor(*args, **kwargs): + return make_image(*args, **kwargs).as_subclass(torch.Tensor) + + +def make_image_pil(*args, **kwargs): + return to_pil_image(make_image(*args, **kwargs)) + + +def make_bounding_boxes( + canvas_size=DEFAULT_SIZE, + *, + format=tv_tensors.BoundingBoxFormat.XYXY, + num_boxes=1, + dtype=None, + device="cpu", +): + def sample_position(values, max_value): + # We cannot use torch.randint directly here, because it only allows integer scalars as values for low and high. + # However, if we have batch_dims, we need tensors as limits. + return torch.stack([torch.randint(max_value - v, ()) for v in values.tolist()]) + + if isinstance(format, str): + format = tv_tensors.BoundingBoxFormat[format] + + dtype = dtype or torch.float32 + + h, w = [torch.randint(1, s, (num_boxes,)) for s in canvas_size] + y = sample_position(h, canvas_size[0]) + x = sample_position(w, canvas_size[1]) + + if format is tv_tensors.BoundingBoxFormat.XYWH: + parts = (x, y, w, h) + elif format is tv_tensors.BoundingBoxFormat.XYXY: + x1, y1 = x, y + x2 = x1 + w + y2 = y1 + h + parts = (x1, y1, x2, y2) + elif format is tv_tensors.BoundingBoxFormat.CXCYWH: + cx = x + w / 2 + cy = y + h / 2 + parts = (cx, cy, w, h) + else: + raise ValueError(f"Format {format} is not supported") + + return tv_tensors.BoundingBoxes( + torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, canvas_size=canvas_size + ) + + +def make_detection_masks(size=DEFAULT_SIZE, *, num_masks=1, dtype=None, device="cpu"): + """Make a "detection" mask, i.e. (*, N, H, W), where each object is encoded as one of N boolean masks""" + return tv_tensors.Mask( + torch.testing.make_tensor( + (num_masks, *size), + low=0, + high=2, + dtype=dtype or torch.bool, + device=device, + ) + ) + + +def make_segmentation_mask(size=DEFAULT_SIZE, *, num_categories=10, batch_dims=(), dtype=None, device="cpu"): + """Make a "segmentation" mask, i.e. (*, H, W), where the category is encoded as pixel value""" + return tv_tensors.Mask( + torch.testing.make_tensor( + (*batch_dims, *size), + low=0, + high=num_categories, + dtype=dtype or torch.uint8, + device=device, + ) + ) + + +def make_video(size=DEFAULT_SIZE, *, num_frames=3, batch_dims=(), **kwargs): + return tv_tensors.Video(make_image(size, batch_dims=(*batch_dims, num_frames), **kwargs)) + + +def make_video_tensor(*args, **kwargs): + return make_video(*args, **kwargs).as_subclass(torch.Tensor) + + +def assert_run_python_script(source_code): + """Utility to check assertions in an independent Python subprocess. + + The script provided in the source code should return 0 and not print + anything on stderr or stdout. Modified from scikit-learn test utils. + + Args: + source_code (str): The Python source code to execute. + """ + with get_tmp_dir() as root: + path = pathlib.Path(root) / "main.py" + with open(path, "w") as file: + file.write(source_code) + + try: + out = check_output([sys.executable, str(path)], stderr=STDOUT) + except CalledProcessError as e: + raise RuntimeError(f"script errored with output:\n{e.output.decode()}") + if out != b"": + raise AssertionError(out.decode()) + + +@contextlib.contextmanager +def assert_no_warnings(): + # The name `catch_warnings` is a misnomer as the context manager does **not** catch any warnings, but rather scopes + # the warning filters. All changes that are made to the filters while in this context, will be reset upon exit. + with warnings.catch_warnings(): + warnings.simplefilter("error") + yield + + +@contextlib.contextmanager +def ignore_jit_no_profile_information_warning(): + # Calling a scripted object often triggers a warning like + # `UserWarning: operator() profile_node %$INT1 : int[] = prim::profile_ivalue($INT2) does not have profile information` + # with varying `INT1` and `INT2`. Since these are uninteresting for us and only clutter the test summary, we ignore + # them. + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message=re.escape("operator() profile_node %"), category=UserWarning) + yield diff --git a/test/conftest.py b/test/conftest.py new file mode 100644 index 00000000000..a9768598ded --- /dev/null +++ b/test/conftest.py @@ -0,0 +1,121 @@ +import random + +import numpy as np +import pytest +import torch + +from common_utils import ( + CUDA_NOT_AVAILABLE_MSG, + IN_FBCODE, + IN_OSS_CI, + IN_RE_WORKER, + MPS_NOT_AVAILABLE_MSG, + OSS_CI_GPU_NO_CUDA_MSG, +) + + +def pytest_configure(config): + # register an additional marker (see pytest_collection_modifyitems) + config.addinivalue_line("markers", "needs_cuda: mark for tests that rely on a CUDA device") + config.addinivalue_line("markers", "needs_mps: mark for tests that rely on a MPS device") + config.addinivalue_line("markers", "dont_collect: mark for tests that should not be collected") + config.addinivalue_line("markers", "opcheck_only_one: only opcheck one parametrization") + + +def pytest_collection_modifyitems(items): + # This hook is called by pytest after it has collected the tests (google its name to check out its doc!) + # We can ignore some tests as we see fit here, or add marks, such as a skip mark. + # + # Typically, here, we try to optimize CI time. In particular, the GPU CI instances don't need to run the + # tests that don't need CUDA, because those tests are extensively tested in the CPU CI instances already. + # This is true for both OSS CI and the fbcode internal CI. + # In the fbcode CI, we have an additional constraint: we try to avoid skipping tests. So instead of relying on + # pytest.mark.skip, in fbcode we literally just remove those tests from the `items` list, and it's as if + # these tests never existed. + + out_items = [] + for item in items: + # The needs_cuda mark will exist if the test was explicitly decorated with + # the @needs_cuda decorator. It will also exist if it was parametrized with a + # parameter that has the mark: for example if a test is parametrized with + # @pytest.mark.parametrize('device', cpu_and_cuda()) + # the "instances" of the tests where device == 'cuda' will have the 'needs_cuda' mark, + # and the ones with device == 'cpu' won't have the mark. + needs_cuda = item.get_closest_marker("needs_cuda") is not None + needs_mps = item.get_closest_marker("needs_mps") is not None + + if needs_cuda and not torch.cuda.is_available(): + # In general, we skip cuda tests on machines without a GPU + # There are special cases though, see below + item.add_marker(pytest.mark.skip(reason=CUDA_NOT_AVAILABLE_MSG)) + + if needs_mps and not torch.backends.mps.is_available(): + item.add_marker(pytest.mark.skip(reason=MPS_NOT_AVAILABLE_MSG)) + + if IN_FBCODE: + # fbcode doesn't like skipping tests, so instead we just don't collect the test + # so that they don't even "exist", hence the continue statements. + if not needs_cuda and IN_RE_WORKER: + # The RE workers are the machines with GPU, we don't want them to run CPU-only tests. + continue + if needs_cuda and not torch.cuda.is_available(): + # On the test machines without a GPU, we want to ignore the tests that need cuda. + # TODO: something more robust would be to do that only in a sandcastle instance, + # so that we can still see the test being skipped when testing locally from a devvm + continue + if needs_mps and not torch.backends.mps.is_available(): + # Same as above, but for MPS + continue + elif IN_OSS_CI: + # Here we're not in fbcode, so we can safely collect and skip tests. + if not needs_cuda and torch.cuda.is_available(): + # Similar to what happens in RE workers: we don't need the OSS CI GPU machines + # to run the CPU-only tests. + item.add_marker(pytest.mark.skip(reason=OSS_CI_GPU_NO_CUDA_MSG)) + + if item.get_closest_marker("dont_collect") is not None: + # currently, this is only used for some tests we're sure we don't want to run on fbcode + continue + + out_items.append(item) + + items[:] = out_items + + +def pytest_sessionfinish(session, exitstatus): + # This hook is called after all tests have run, and just before returning an exit status. + # We here change exit code 5 into 0. + # + # 5 is issued when no tests were actually run, e.g. if you use `pytest -k some_regex_that_is_never_matched`. + # + # Having no test being run for a given test rule is a common scenario in fbcode, and typically happens on + # the GPU test machines which don't run the CPU-only tests (see pytest_collection_modifyitems above). For + # example `test_transforms.py` doesn't contain any CUDA test at the time of + # writing, so on a GPU test machine, testpilot would invoke pytest on this file and no test would be run. + # This would result in pytest returning 5, causing testpilot to raise an error. + # To avoid this, we transform this 5 into a 0 to make testpilot happy. + if exitstatus == 5: + session.exitstatus = 0 + + +@pytest.fixture(autouse=True) +def prevent_leaking_rng(): + # Prevent each test from leaking the rng to all other test when they call + # torch.manual_seed() or random.seed() or np.random.seed(). + # Note: the numpy rngs should never leak anyway, as we never use + # np.random.seed() and instead rely on np.random.RandomState instances (see + # issue #4247). We still do it for extra precaution. + + torch_rng_state = torch.get_rng_state() + builtin_rng_state = random.getstate() + nunmpy_rng_state = np.random.get_state() + if torch.cuda.is_available(): + cuda_rng_state = torch.cuda.get_rng_state() + + yield + + torch.set_rng_state(torch_rng_state) + random.setstate(builtin_rng_state) + np.random.set_state(nunmpy_rng_state) + if torch.cuda.is_available(): + torch.cuda.set_rng_state(cuda_rng_state) diff --git a/test/cpp/test_custom_operators.cpp b/test/cpp/test_custom_operators.cpp new file mode 100644 index 00000000000..5178575d21b --- /dev/null +++ b/test/cpp/test_custom_operators.cpp @@ -0,0 +1,65 @@ +#include +#include +#include + +// FIXME: the include path differs from OSS due to the extra csrc +#include + +TEST(test_custom_operators, nms) { + // make sure that the torchvision ops are visible to the jit interpreter + auto& ops = torch::jit::getAllOperatorsFor( + torch::jit::Symbol::fromQualString("torchvision::nms")); + ASSERT_EQ(ops.size(), 1); + + auto& op = ops.front(); + ASSERT_EQ(op->schema().name(), "torchvision::nms"); + + torch::jit::Stack stack; + at::Tensor boxes = at::rand({50, 4}), scores = at::rand({50}); + double thresh = 0.7; + + torch::jit::push(stack, boxes, scores, thresh); + op->getOperation()(stack); + at::Tensor output_jit; + torch::jit::pop(stack, output_jit); + + at::Tensor output = vision::ops::nms(boxes, scores, thresh); + ASSERT_TRUE(output_jit.allclose(output)); +} + +TEST(test_custom_operators, roi_align_visible) { + // make sure that the torchvision ops are visible to the jit interpreter even + // if not explicitly included + auto& ops = torch::jit::getAllOperatorsFor( + torch::jit::Symbol::fromQualString("torchvision::roi_align")); + ASSERT_EQ(ops.size(), 1); + + auto& op = ops.front(); + ASSERT_EQ(op->schema().name(), "torchvision::roi_align"); + + torch::jit::Stack stack; + float roi_data[] = {0., 0., 0., 5., 5., 0., 5., 5., 10., 10.}; + at::Tensor input = at::rand({1, 2, 10, 10}), + rois = at::from_blob(roi_data, {2, 5}); + double spatial_scale = 1.0; + int64_t pooled_height = 3, pooled_width = 3, sampling_ratio = -1; + bool aligned = true; + + torch::jit::push( + stack, + input, + rois, + spatial_scale, + pooled_height, + pooled_width, + sampling_ratio, + aligned); + op->getOperation()(stack); + at::Tensor output_jit; + torch::jit::pop(stack, output_jit); + + ASSERT_EQ(output_jit.sizes()[0], 2); + ASSERT_EQ(output_jit.sizes()[1], 2); + ASSERT_EQ(output_jit.sizes()[2], 3); + ASSERT_EQ(output_jit.sizes()[3], 3); +} diff --git a/test/datasets_utils.py b/test/datasets_utils.py new file mode 100644 index 00000000000..8ea5e12610f --- /dev/null +++ b/test/datasets_utils.py @@ -0,0 +1,1056 @@ +import contextlib +import functools +import importlib +import inspect +import itertools +import os +import pathlib +import platform +import random +import shutil +import string +import struct +import tarfile +import unittest +import unittest.mock +import zipfile +from collections import defaultdict +from typing import Any, Callable, Dict, Iterator, List, Optional, Sequence, Tuple, Union + +import numpy as np +import numpy.typing as npt + +import PIL +import PIL.Image +import pytest +import torch +import torchvision.datasets +import torchvision.io +from common_utils import disable_console_output, get_tmp_dir +from torch.utils._pytree import tree_any +from torch.utils.data import DataLoader +from torchvision import tv_tensors +from torchvision.datasets import wrap_dataset_for_transforms_v2 +from torchvision.transforms.functional import get_dimensions +from torchvision.transforms.v2.functional import get_size + + +__all__ = [ + "UsageError", + "lazy_importer", + "test_all_configs", + "DatasetTestCase", + "ImageDatasetTestCase", + "VideoDatasetTestCase", + "create_image_or_video_tensor", + "create_image_file", + "create_image_folder", + "create_video_file", + "create_video_folder", + "make_tar", + "make_zip", + "create_random_string", +] + + +class UsageError(Exception): + """Should be raised in case an error happens in the setup rather than the test.""" + + +class LazyImporter: + r"""Lazy importer for additional dependencies. + + Some datasets require additional packages that are no direct dependencies of torchvision. Instances of this class + provide modules listed in MODULES as attributes. They are only imported when accessed. + + """ + MODULES = ( + "av", + "lmdb", + "pycocotools", + "requests", + "scipy.io", + "scipy.sparse", + "h5py", + ) + + def __init__(self): + modules = defaultdict(list) + for module in self.MODULES: + module, *submodules = module.split(".", 1) + if submodules: + modules[module].append(submodules[0]) + else: + # This introduces the module so that it is known when we later iterate over the dictionary. + modules.__missing__(module) + + for module, submodules in modules.items(): + # We need the quirky 'module=module' and submodules=submodules arguments to the lambda since otherwise the + # lookup for these would happen at runtime rather than at definition. Thus, without it, every property + # would try to import the last item in 'modules' + setattr( + type(self), + module, + property(lambda self, module=module, submodules=submodules: LazyImporter._import(module, submodules)), + ) + + @staticmethod + def _import(package, subpackages): + try: + module = importlib.import_module(package) + except ImportError as error: + raise UsageError( + f"Failed to import module '{package}'. " + f"This probably means that the current test case needs '{package}' installed, " + f"but it is not a dependency of torchvision. " + f"You need to install it manually, for example 'pip install {package}'." + ) from error + + for name in subpackages: + importlib.import_module(f".{name}", package=package) + + return module + + +lazy_importer = LazyImporter() + + +def requires_lazy_imports(*modules): + def outer_wrapper(fn): + @functools.wraps(fn) + def inner_wrapper(*args, **kwargs): + for module in modules: + getattr(lazy_importer, module.replace(".", "_")) + return fn(*args, **kwargs) + + return inner_wrapper + + return outer_wrapper + + +def test_all_configs(test): + """Decorator to run test against all configurations. + + Add this as decorator to an arbitrary test to run it against all configurations. This includes + :attr:`DatasetTestCase.DEFAULT_CONFIG` and :attr:`DatasetTestCase.ADDITIONAL_CONFIGS`. + + The current configuration is provided as the first parameter for the test: + + .. code-block:: + + @test_all_configs() + def test_foo(self, config): + pass + + .. note:: + + This will try to remove duplicate configurations. During this process it will not preserve a potential + ordering of the configurations or an inner ordering of a configuration. + """ + + def maybe_remove_duplicates(configs): + try: + return [dict(config_) for config_ in {tuple(sorted(config.items())) for config in configs}] + except TypeError: + # A TypeError will be raised if a value of any config is not hashable, e.g. a list. In that case duplicate + # removal would be a lot more elaborate, and we simply bail out. + return configs + + @functools.wraps(test) + def wrapper(self): + configs = [] + if self.DEFAULT_CONFIG is not None: + configs.append(self.DEFAULT_CONFIG) + if self.ADDITIONAL_CONFIGS is not None: + configs.extend(self.ADDITIONAL_CONFIGS) + + if not configs: + configs = [self._KWARG_DEFAULTS.copy()] + else: + configs = maybe_remove_duplicates(configs) + + for config in configs: + with self.subTest(**config): + test(self, config) + + return wrapper + + +class DatasetTestCase(unittest.TestCase): + """Abstract base class for all dataset testcases. + + You have to overwrite the following class attributes: + + - DATASET_CLASS (torchvision.datasets.VisionDataset): Class of dataset to be tested. + - FEATURE_TYPES (Sequence[Any]): Types of the elements returned by index access of the dataset. Instead of + providing these manually, you can instead subclass ``ImageDatasetTestCase`` or ``VideoDatasetTestCase```to + get a reasonable default, that should work for most cases. Each entry of the sequence may be a tuple, + to indicate multiple possible values. + + Optionally, you can overwrite the following class attributes: + + - DEFAULT_CONFIG (Dict[str, Any]): Config that will be used by default. If omitted, this defaults to all + keyword arguments of the dataset minus ``transform``, ``target_transform``, ``transforms``, and + ``download``. Overwrite this if you want to use a default value for a parameter for which the dataset does + not provide one. + - ADDITIONAL_CONFIGS (Sequence[Dict[str, Any]]): Additional configs that should be tested. Each dictionary can + contain an arbitrary combination of dataset parameters that are **not** ``transform``, ``target_transform``, + ``transforms``, or ``download``. + - REQUIRED_PACKAGES (Iterable[str]): Additional dependencies to use the dataset. If these packages are not + available, the tests are skipped. + + Additionally, you need to overwrite the ``inject_fake_data()`` method that provides the data that the tests rely on. + The fake data should resemble the original data as close as necessary, while containing only few examples. During + the creation of the dataset check-, download-, and extract-functions from ``torchvision.datasets.utils`` are + disabled. + + Without further configuration, the testcase will test if + + 1. the dataset raises a :class:`FileNotFoundError` or a :class:`RuntimeError` if the data files are not found or + corrupted, + 2. the dataset inherits from `torchvision.datasets.VisionDataset`, + 3. the dataset can be turned into a string, + 4. the feature types of a returned example matches ``FEATURE_TYPES``, + 5. the number of examples matches the injected fake data, and + 6. the dataset calls ``transform``, ``target_transform``, or ``transforms`` if available when accessing data. + + Case 3. to 6. are tested against all configurations in ``CONFIGS``. + + To add dataset-specific tests, create a new method that takes no arguments with ``test_`` as a name prefix: + + .. code-block:: + + def test_foo(self): + pass + + If you want to run the test against all configs, add the ``@test_all_configs`` decorator to the definition and + accept a single argument: + + .. code-block:: + + @test_all_configs + def test_bar(self, config): + pass + + Within the test you can use the ``create_dataset()`` method that yields the dataset as well as additional + information provided by the ``ìnject_fake_data()`` method: + + .. code-block:: + + def test_baz(self): + with self.create_dataset() as (dataset, info): + pass + """ + + DATASET_CLASS = None + FEATURE_TYPES = None + + DEFAULT_CONFIG = None + ADDITIONAL_CONFIGS = None + REQUIRED_PACKAGES = None + + # These keyword arguments are checked by test_transforms in case they are available in DATASET_CLASS. + _TRANSFORM_KWARGS = { + "transform", + "target_transform", + "transforms", + } + # These keyword arguments get a 'special' treatment and should not be set in DEFAULT_CONFIG or ADDITIONAL_CONFIGS. + _SPECIAL_KWARGS = { + *_TRANSFORM_KWARGS, + "download", + } + + # These fields are populated during setupClass() within _populate_private_class_attributes() + + # This will be a dictionary containing all keyword arguments with their respective default values extracted from + # the dataset constructor. + _KWARG_DEFAULTS = None + # This will be a set of all _SPECIAL_KWARGS that the dataset constructor takes. + _HAS_SPECIAL_KWARG = None + + # These functions are disabled during dataset creation in create_dataset(). + _CHECK_FUNCTIONS = { + "check_md5", + "check_integrity", + } + _DOWNLOAD_EXTRACT_FUNCTIONS = { + "download_url", + "download_file_from_google_drive", + "extract_archive", + "download_and_extract_archive", + } + + def dataset_args(self, tmpdir: str, config: Dict[str, Any]) -> Sequence[Any]: + """Define positional arguments passed to the dataset. + + .. note:: + + The default behavior is only valid if the dataset to be tested has ``root`` as the only required parameter. + Otherwise, you need to overwrite this method. + + Args: + tmpdir (str): Path to a temporary directory. For most cases this acts as root directory for the dataset + to be created and in turn also for the fake data injected here. + config (Dict[str, Any]): Configuration that will be passed to the dataset constructor. It provides at least + fields for all dataset parameters with default values. + + Returns: + (Tuple[str]): ``tmpdir`` which corresponds to ``root`` for most datasets. + """ + return (tmpdir,) + + def inject_fake_data(self, tmpdir: str, config: Dict[str, Any]) -> Union[int, Dict[str, Any]]: + """Inject fake data for dataset into a temporary directory. + + During the creation of the dataset the download and extract logic is disabled. Thus, the fake data injected + here needs to resemble the raw data, i.e. the state of the dataset directly after the files are downloaded and + potentially extracted. + + Args: + tmpdir (str): Path to a temporary directory. For most cases this acts as root directory for the dataset + to be created and in turn also for the fake data injected here. + config (Dict[str, Any]): Configuration that will be passed to the dataset constructor. It provides at least + fields for all dataset parameters with default values. + + Needs to return one of the following: + + 1. (int): Number of examples in the dataset to be created, or + 2. (Dict[str, Any]): Additional information about the injected fake data. Must contain the field + ``"num_examples"`` that corresponds to the number of examples in the dataset to be created. + """ + raise NotImplementedError("You need to provide fake data in order for the tests to run.") + + @contextlib.contextmanager + def create_dataset( + self, + config: Optional[Dict[str, Any]] = None, + inject_fake_data: bool = True, + patch_checks: Optional[bool] = None, + **kwargs: Any, + ) -> Iterator[Tuple[torchvision.datasets.VisionDataset, Dict[str, Any]]]: + r"""Create the dataset in a temporary directory. + + The configuration passed to the dataset is populated to contain at least all parameters with default values. + For this the following order of precedence is used: + + 1. Parameters in :attr:`kwargs`. + 2. Configuration in :attr:`config`. + 3. Configuration in :attr:`~DatasetTestCase.DEFAULT_CONFIG`. + 4. Default parameters of the dataset. + + Args: + config (Optional[Dict[str, Any]]): Configuration that will be used to create the dataset. + inject_fake_data (bool): If ``True`` (default) inject the fake data with :meth:`.inject_fake_data` before + creating the dataset. + patch_checks (Optional[bool]): If ``True`` disable integrity check logic while creating the dataset. If + omitted defaults to the same value as ``inject_fake_data``. + **kwargs (Any): Additional parameters passed to the dataset. These parameters take precedence in case they + overlap with ``config``. + + Yields: + dataset (torchvision.dataset.VisionDataset): Dataset. + info (Dict[str, Any]): Additional information about the injected fake data. See :meth:`.inject_fake_data` + for details. + """ + if patch_checks is None: + patch_checks = inject_fake_data + + special_kwargs, other_kwargs = self._split_kwargs(kwargs) + + complete_config = self._KWARG_DEFAULTS.copy() + if self.DEFAULT_CONFIG: + complete_config.update(self.DEFAULT_CONFIG) + if config: + complete_config.update(config) + if other_kwargs: + complete_config.update(other_kwargs) + + if "download" in self._HAS_SPECIAL_KWARG and special_kwargs.get("download", False): + # override download param to False param if its default is truthy + special_kwargs["download"] = False + + patchers = self._patch_download_extract() + if patch_checks: + patchers.update(self._patch_checks()) + + with get_tmp_dir() as tmpdir: + args = self.dataset_args(tmpdir, complete_config) + info = self._inject_fake_data(tmpdir, complete_config) if inject_fake_data else None + + with self._maybe_apply_patches(patchers), disable_console_output(): + dataset = self.DATASET_CLASS(*args, **complete_config, **special_kwargs) + + yield dataset, info + + @classmethod + def setUpClass(cls): + cls._verify_required_public_class_attributes() + cls._populate_private_class_attributes() + cls._process_optional_public_class_attributes() + super().setUpClass() + + @classmethod + def _verify_required_public_class_attributes(cls): + if cls.DATASET_CLASS is None: + raise UsageError( + "The class attribute 'DATASET_CLASS' needs to be overwritten. " + "It should contain the class of the dataset to be tested." + ) + if cls.FEATURE_TYPES is None: + raise UsageError( + "The class attribute 'FEATURE_TYPES' needs to be overwritten. " + "It should contain a sequence of types that the dataset returns when accessed by index." + ) + + @classmethod + def _populate_private_class_attributes(cls): + defaults = [] + for cls_ in cls.DATASET_CLASS.__mro__: + if cls_ is torchvision.datasets.VisionDataset: + break + + argspec = inspect.getfullargspec(cls_.__init__) + + if not argspec.defaults: + continue + + defaults.append( + { + kwarg: default + for kwarg, default in zip(argspec.args[-len(argspec.defaults) :], argspec.defaults) + if not kwarg.startswith("_") + } + ) + + if not argspec.varkw: + break + + kwarg_defaults = dict() + for config in reversed(defaults): + kwarg_defaults.update(config) + + has_special_kwargs = set() + for name in cls._SPECIAL_KWARGS: + if name not in kwarg_defaults: + continue + + del kwarg_defaults[name] + has_special_kwargs.add(name) + + cls._KWARG_DEFAULTS = kwarg_defaults + cls._HAS_SPECIAL_KWARG = has_special_kwargs + + @classmethod + def _process_optional_public_class_attributes(cls): + def check_config(config, name): + special_kwargs = tuple(f"'{name}'" for name in cls._SPECIAL_KWARGS if name in config) + if special_kwargs: + raise UsageError( + f"{name} contains a value for the parameter(s) {', '.join(special_kwargs)}. " + f"These are handled separately by the test case and should not be set here. " + f"If you need to test some custom behavior regarding these parameters, " + f"you need to write a custom test (*not* test case), e.g. test_custom_transform()." + ) + + if cls.DEFAULT_CONFIG is not None: + check_config(cls.DEFAULT_CONFIG, "DEFAULT_CONFIG") + + if cls.ADDITIONAL_CONFIGS is not None: + for idx, config in enumerate(cls.ADDITIONAL_CONFIGS): + check_config(config, f"CONFIGS[{idx}]") + + if cls.REQUIRED_PACKAGES: + missing_pkgs = [] + for pkg in cls.REQUIRED_PACKAGES: + try: + importlib.import_module(pkg) + except ImportError: + missing_pkgs.append(f"'{pkg}'") + + if missing_pkgs: + raise unittest.SkipTest( + f"The package(s) {', '.join(missing_pkgs)} are required to load the dataset " + f"'{cls.DATASET_CLASS.__name__}', but are not installed." + ) + + def _split_kwargs(self, kwargs): + special_kwargs = kwargs.copy() + other_kwargs = {key: special_kwargs.pop(key) for key in set(special_kwargs.keys()) - self._SPECIAL_KWARGS} + return special_kwargs, other_kwargs + + def _inject_fake_data(self, tmpdir, config): + info = self.inject_fake_data(tmpdir, config) + if info is None: + raise UsageError( + "The method 'inject_fake_data' needs to return at least an integer indicating the number of " + "examples for the current configuration." + ) + elif isinstance(info, int): + info = dict(num_examples=info) + elif not isinstance(info, dict): + raise UsageError( + f"The additional information returned by the method 'inject_fake_data' must be either an " + f"integer indicating the number of examples for the current configuration or a dictionary with " + f"the same content. Got {type(info)} instead." + ) + elif "num_examples" not in info: + raise UsageError( + "The information dictionary returned by the method 'inject_fake_data' must contain a " + "'num_examples' field that holds the number of examples for the current configuration." + ) + return info + + def _patch_download_extract(self): + module = inspect.getmodule(self.DATASET_CLASS).__name__ + return {unittest.mock.patch(f"{module}.{function}") for function in self._DOWNLOAD_EXTRACT_FUNCTIONS} + + def _patch_checks(self): + module = inspect.getmodule(self.DATASET_CLASS).__name__ + return {unittest.mock.patch(f"{module}.{function}", return_value=True) for function in self._CHECK_FUNCTIONS} + + @contextlib.contextmanager + def _maybe_apply_patches(self, patchers): + with contextlib.ExitStack() as stack: + mocks = {} + for patcher in patchers: + with contextlib.suppress(AttributeError): + mocks[patcher.target] = stack.enter_context(patcher) + yield mocks + + def test_not_found_or_corrupted(self): + with pytest.raises((FileNotFoundError, RuntimeError)): + with self.create_dataset(inject_fake_data=False): + pass + + def test_smoke(self): + with self.create_dataset() as (dataset, _): + assert isinstance(dataset, torchvision.datasets.VisionDataset) + + @test_all_configs + def test_str_smoke(self, config): + with self.create_dataset(config) as (dataset, _): + assert isinstance(str(dataset), str) + + @test_all_configs + def test_feature_types(self, config): + with self.create_dataset(config) as (dataset, _): + example = dataset[0] + + if len(self.FEATURE_TYPES) > 1: + actual = len(example) + expected = len(self.FEATURE_TYPES) + assert ( + actual == expected + ), "The number of the returned features does not match the the number of elements in FEATURE_TYPES: " + f"{actual} != {expected}" + else: + example = (example,) + + for idx, (feature, expected_feature_type) in enumerate(zip(example, self.FEATURE_TYPES)): + with self.subTest(idx=idx): + assert isinstance(feature, expected_feature_type) + + @test_all_configs + def test_num_examples(self, config): + with self.create_dataset(config) as (dataset, info): + assert len(list(dataset)) == len(dataset) == info["num_examples"] + + @test_all_configs + def test_transforms(self, config): + mock = unittest.mock.Mock(wraps=lambda *args: args[0] if len(args) == 1 else args) + for kwarg in self._TRANSFORM_KWARGS: + if kwarg not in self._HAS_SPECIAL_KWARG: + continue + + mock.reset_mock() + + with self.subTest(kwarg=kwarg): + with self.create_dataset(config, **{kwarg: mock}) as (dataset, _): + dataset[0] + + mock.assert_called() + + @test_all_configs + def test_transforms_v2_wrapper(self, config): + try: + with self.create_dataset(config) as (dataset, info): + for target_keys in [None, "all"]: + if target_keys is not None and self.DATASET_CLASS not in { + torchvision.datasets.CocoDetection, + torchvision.datasets.VOCDetection, + torchvision.datasets.Kitti, + torchvision.datasets.WIDERFace, + }: + with self.assertRaisesRegex(ValueError, "`target_keys` is currently only supported for"): + wrap_dataset_for_transforms_v2(dataset, target_keys=target_keys) + continue + + wrapped_dataset = wrap_dataset_for_transforms_v2(dataset, target_keys=target_keys) + assert isinstance(wrapped_dataset, self.DATASET_CLASS) + assert len(wrapped_dataset) == info["num_examples"] + + wrapped_sample = wrapped_dataset[0] + assert tree_any( + lambda item: isinstance(item, (tv_tensors.TVTensor, PIL.Image.Image)), wrapped_sample + ) + except TypeError as error: + msg = f"No wrapper exists for dataset class {type(dataset).__name__}" + if str(error).startswith(msg): + pytest.skip(msg) + raise error + except RuntimeError as error: + if "currently not supported by this wrapper" in str(error): + pytest.skip("Config is currently not supported by this wrapper") + raise error + + +class ImageDatasetTestCase(DatasetTestCase): + """Abstract base class for image dataset testcases. + + - Overwrites the FEATURE_TYPES class attribute to expect a :class:`PIL.Image.Image` and an integer label. + """ + + FEATURE_TYPES = (PIL.Image.Image, int) + + @contextlib.contextmanager + def create_dataset( + self, + config: Optional[Dict[str, Any]] = None, + inject_fake_data: bool = True, + patch_checks: Optional[bool] = None, + **kwargs: Any, + ) -> Iterator[Tuple[torchvision.datasets.VisionDataset, Dict[str, Any]]]: + with super().create_dataset( + config=config, + inject_fake_data=inject_fake_data, + patch_checks=patch_checks, + **kwargs, + ) as (dataset, info): + # PIL.Image.open() only loads the image metadata upfront and keeps the file open until the first access + # to the pixel data occurs. Trying to delete such a file results in an PermissionError on Windows. Thus, we + # force-load opened images. + # This problem only occurs during testing since some tests, e.g. DatasetTestCase.test_feature_types open an + # image, but never use the underlying data. During normal operation it is reasonable to assume that the + # user wants to work with the image he just opened rather than deleting the underlying file. + with self._force_load_images(): + yield dataset, info + + @contextlib.contextmanager + def _force_load_images(self): + open = PIL.Image.open + + def new(fp, *args, **kwargs): + image = open(fp, *args, **kwargs) + if isinstance(fp, (str, pathlib.Path)): + image.load() + return image + + with unittest.mock.patch("PIL.Image.open", new=new): + yield + + +class VideoDatasetTestCase(DatasetTestCase): + """Abstract base class for video dataset testcases. + + - Overwrites the 'FEATURE_TYPES' class attribute to expect two :class:`torch.Tensor` s for the video and audio as + well as an integer label. + - Overwrites the 'REQUIRED_PACKAGES' class attribute to require PyAV (``av``). + - Adds the 'DEFAULT_FRAMES_PER_CLIP' class attribute. If no 'frames_per_clip' is provided by 'inject_fake_data()' + and it is the last parameter without a default value in the dataset constructor, the value of the + 'DEFAULT_FRAMES_PER_CLIP' class attribute is appended to the output. + """ + + FEATURE_TYPES = (torch.Tensor, torch.Tensor, int) + REQUIRED_PACKAGES = ("av",) + + FRAMES_PER_CLIP = 1 + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.dataset_args = self._set_default_frames_per_clip(self.dataset_args) + + def _set_default_frames_per_clip(self, dataset_args): + argspec = inspect.getfullargspec(self.DATASET_CLASS.__init__) + args_without_default = argspec.args[1 : (-len(argspec.defaults) if argspec.defaults else None)] + frames_per_clip_last = args_without_default[-1] == "frames_per_clip" + + @functools.wraps(dataset_args) + def wrapper(tmpdir, config): + args = dataset_args(tmpdir, config) + if frames_per_clip_last and len(args) == len(args_without_default) - 1: + args = (*args, self.FRAMES_PER_CLIP) + + return args + + return wrapper + + def test_output_format(self): + for output_format in ["TCHW", "THWC"]: + with self.create_dataset(output_format=output_format) as (dataset, _): + for video, *_ in dataset: + if output_format == "TCHW": + num_frames, num_channels, *_ = video.shape + else: # output_format == "THWC": + num_frames, *_, num_channels = video.shape + + assert num_frames == self.FRAMES_PER_CLIP + assert num_channels == 3 + + @test_all_configs + def test_transforms_v2_wrapper(self, config): + # `output_format == "THWC"` is not supported by the wrapper. Thus, we skip the `config` if it is set explicitly + # or use the supported `"TCHW"` + if config.setdefault("output_format", "TCHW") == "THWC": + return + + super().test_transforms_v2_wrapper.__wrapped__(self, config) + + +def _no_collate(batch): + return batch + + +def check_transforms_v2_wrapper_spawn(dataset, expected_size): + # This check ensures that the wrapped datasets can be used with multiprocessing_context="spawn" in the DataLoader. + # We also check that transforms are applied correctly as a non-regression test for + # https://github.com/pytorch/vision/issues/8066 + # Implicitly, this also checks that the wrapped datasets are pickleable. + + # To save CI/test time, we only check on Windows where "spawn" is the default + if platform.system() != "Windows": + pytest.skip("Multiprocessing spawning is only checked on macOS.") + + wrapped_dataset = wrap_dataset_for_transforms_v2(dataset) + + dataloader = DataLoader(wrapped_dataset, num_workers=2, multiprocessing_context="spawn", collate_fn=_no_collate) + + def resize_was_applied(item): + # Checking the size of the output ensures that the Resize transform was correctly applied + return isinstance(item, (tv_tensors.Image, tv_tensors.Video, PIL.Image.Image)) and get_size(item) == list( + expected_size + ) + + for wrapped_sample in dataloader: + assert tree_any(resize_was_applied, wrapped_sample) + + +def create_image_or_video_tensor(size: Sequence[int]) -> torch.Tensor: + r"""Create a random uint8 tensor. + + Args: + size (Sequence[int]): Size of the tensor. + """ + return torch.randint(0, 256, size, dtype=torch.uint8) + + +def create_image_file( + root: Union[pathlib.Path, str], name: Union[pathlib.Path, str], size: Union[Sequence[int], int] = 10, **kwargs: Any +) -> pathlib.Path: + """Create an image file from random data. + + Args: + root (Union[str, pathlib.Path]): Root directory the image file will be placed in. + name (Union[str, pathlib.Path]): Name of the image file. + size (Union[Sequence[int], int]): Size of the image that represents the ``(num_channels, height, width)``. If + scalar, the value is used for the height and width. If not provided, three channels are assumed. + kwargs (Any): Additional parameters passed to :meth:`PIL.Image.Image.save`. + + Returns: + pathlib.Path: Path to the created image file. + """ + if isinstance(size, int): + size = (size, size) + if len(size) == 2: + size = (3, *size) + if len(size) != 3: + raise UsageError( + f"The 'size' argument should either be an int or a sequence of length 2 or 3. Got {len(size)} instead" + ) + + image = create_image_or_video_tensor(size) + file = pathlib.Path(root) / name + + # torch (num_channels x height x width) -> PIL (width x height x num_channels) + image = image.permute(2, 1, 0) + # For grayscale images PIL doesn't use a channel dimension + if image.shape[2] == 1: + image = torch.squeeze(image, 2) + PIL.Image.fromarray(image.numpy()).save(file, **kwargs) + return file + + +def create_image_folder( + root: Union[pathlib.Path, str], + name: Union[pathlib.Path, str], + file_name_fn: Callable[[int], str], + num_examples: int, + size: Optional[Union[Sequence[int], int, Callable[[int], Union[Sequence[int], int]]]] = None, + **kwargs: Any, +) -> List[pathlib.Path]: + """Create a folder of random images. + + Args: + root (Union[str, pathlib.Path]): Root directory the image folder will be placed in. + name (Union[str, pathlib.Path]): Name of the image folder. + file_name_fn (Callable[[int], str]): Should return a file name if called with the file index. + num_examples (int): Number of images to create. + size (Optional[Union[Sequence[int], int, Callable[[int], Union[Sequence[int], int]]]]): Size of the images. If + callable, will be called with the index of the corresponding file. If omitted, a random height and width + between 3 and 10 pixels is selected on a per-image basis. + kwargs (Any): Additional parameters passed to :func:`create_image_file`. + + Returns: + List[pathlib.Path]: Paths to all created image files. + + .. seealso:: + + - :func:`create_image_file` + """ + if size is None: + + def size(idx: int) -> Tuple[int, int, int]: + num_channels = 3 + height, width = torch.randint(3, 11, size=(2,), dtype=torch.int).tolist() + return (num_channels, height, width) + + root = pathlib.Path(root) / name + os.makedirs(root, exist_ok=True) + + return [ + create_image_file(root, file_name_fn(idx), size=size(idx) if callable(size) else size, **kwargs) + for idx in range(num_examples) + ] + + +def shape_test_for_stereo( + left: PIL.Image.Image, + right: PIL.Image.Image, + disparity: Optional[npt.NDArray] = None, + valid_mask: Optional[npt.NDArray] = None, +): + left_dims = get_dimensions(left) + right_dims = get_dimensions(right) + c, h, w = left_dims + # check that left and right are the same size + assert left_dims == right_dims + assert c == 3 + + # check that the disparity has the same spatial dimensions + # as the input + if disparity is not None: + assert disparity.ndim == 3 + assert disparity.shape == (1, h, w) + + if valid_mask is not None: + # check that valid mask is the same size as the disparity + _, dh, dw = disparity.shape + mh, mw = valid_mask.shape + assert dh == mh + assert dw == mw + + +@requires_lazy_imports("av") +def create_video_file( + root: Union[pathlib.Path, str], + name: Union[pathlib.Path, str], + size: Union[Sequence[int], int] = (1, 3, 10, 10), + fps: float = 25, + **kwargs: Any, +) -> pathlib.Path: + """Create a video file from random data. + + Args: + root (Union[str, pathlib.Path]): Root directory the video file will be placed in. + name (Union[str, pathlib.Path]): Name of the video file. + size (Union[Sequence[int], int]): Size of the video that represents the + ``(num_frames, num_channels, height, width)``. If scalar, the value is used for the height and width. + If not provided, ``num_frames=1`` and ``num_channels=3`` are assumed. + fps (float): Frame rate in frames per second. + kwargs (Any): Additional parameters passed to :func:`torchvision.io.write_video`. + + Returns: + pathlib.Path: Path to the created image file. + + Raises: + UsageError: If PyAV is not available. + """ + if isinstance(size, int): + size = (size, size) + if len(size) == 2: + size = (3, *size) + if len(size) == 3: + size = (1, *size) + if len(size) != 4: + raise UsageError( + f"The 'size' argument should either be an int or a sequence of length 2, 3, or 4. Got {len(size)} instead" + ) + + video = create_image_or_video_tensor(size) + file = pathlib.Path(root) / name + torchvision.io.write_video(str(file), video.permute(0, 2, 3, 1), fps, **kwargs) + return file + + +@requires_lazy_imports("av") +def create_video_folder( + root: Union[str, pathlib.Path], + name: Union[str, pathlib.Path], + file_name_fn: Callable[[int], str], + num_examples: int, + size: Optional[Union[Sequence[int], int, Callable[[int], Union[Sequence[int], int]]]] = None, + fps=25, + **kwargs, +) -> List[pathlib.Path]: + """Create a folder of random videos. + + Args: + root (Union[str, pathlib.Path]): Root directory the video folder will be placed in. + name (Union[str, pathlib.Path]): Name of the video folder. + file_name_fn (Callable[[int], str]): Should return a file name if called with the file index. + num_examples (int): Number of videos to create. + size (Optional[Union[Sequence[int], int, Callable[[int], Union[Sequence[int], int]]]]): Size of the videos. If + callable, will be called with the index of the corresponding file. If omitted, a random even height and + width between 4 and 10 pixels is selected on a per-video basis. + fps (float): Frame rate in frames per second. + kwargs (Any): Additional parameters passed to :func:`create_video_file`. + + Returns: + List[pathlib.Path]: Paths to all created video files. + + Raises: + UsageError: If PyAV is not available. + + .. seealso:: + + - :func:`create_video_file` + """ + if size is None: + + def size(idx): + num_frames = 1 + num_channels = 3 + # The 'libx264' video codec, which is the default of torchvision.io.write_video, requires the height and + # width of the video to be divisible by 2. + height, width = (torch.randint(2, 6, size=(2,), dtype=torch.int) * 2).tolist() + return (num_frames, num_channels, height, width) + + root = pathlib.Path(root) / name + os.makedirs(root, exist_ok=True) + + return [ + create_video_file(root, file_name_fn(idx), size=size(idx) if callable(size) else size, **kwargs) + for idx in range(num_examples) + ] + + +def _split_files_or_dirs(root, *files_or_dirs): + files = set() + dirs = set() + for file_or_dir in files_or_dirs: + path = pathlib.Path(file_or_dir) + if not path.is_absolute(): + path = root / path + if path.is_file(): + files.add(path) + else: + dirs.add(path) + for sub_file_or_dir in path.glob("**/*"): + if sub_file_or_dir.is_file(): + files.add(sub_file_or_dir) + else: + dirs.add(sub_file_or_dir) + + if root in dirs: + dirs.remove(root) + + return files, dirs + + +def _make_archive(root, name, *files_or_dirs, opener, adder, remove=True): + archive = pathlib.Path(root) / name + if not files_or_dirs: + # We need to invoke `Path.with_suffix("")`, since call only applies to the last suffix if multiple suffixes are + # present. For example, `pathlib.Path("foo.tar.gz").with_suffix("")` results in `foo.tar`. + file_or_dir = archive + for _ in range(len(archive.suffixes)): + file_or_dir = file_or_dir.with_suffix("") + if file_or_dir.exists(): + files_or_dirs = (file_or_dir,) + else: + raise ValueError("No file or dir provided.") + + files, dirs = _split_files_or_dirs(root, *files_or_dirs) + + with opener(archive) as fh: + for file in sorted(files): + adder(fh, file, file.relative_to(root)) + + if remove: + for file in files: + os.remove(file) + for dir in dirs: + shutil.rmtree(dir, ignore_errors=True) + + return archive + + +def make_tar(root, name, *files_or_dirs, remove=True, compression=None): + # TODO: detect compression from name + return _make_archive( + root, + name, + *files_or_dirs, + opener=lambda archive: tarfile.open(archive, f"w:{compression}" if compression else "w"), + adder=lambda fh, file, relative_file: fh.add(file, arcname=relative_file), + remove=remove, + ) + + +def make_zip(root, name, *files_or_dirs, remove=True): + return _make_archive( + root, + name, + *files_or_dirs, + opener=lambda archive: zipfile.ZipFile(archive, "w"), + adder=lambda fh, file, relative_file: fh.write(file, arcname=relative_file), + remove=remove, + ) + + +def create_random_string(length: int, *digits: str) -> str: + """Create a random string. + + Args: + length (int): Number of characters in the generated string. + *digits (str): Characters to sample from. If omitted defaults to :attr:`string.ascii_lowercase`. + """ + if not digits: + digits = string.ascii_lowercase + else: + digits = "".join(itertools.chain(*digits)) + + return "".join(random.choice(digits) for _ in range(length)) + + +def make_fake_pfm_file(h, w, file_name): + values = list(range(3 * h * w)) + # Note: we pack everything in little endian: -1.0, and "<" + content = f"PF \n{w} {h} \n-1.0\n".encode() + struct.pack("<" + "f" * len(values), *values) + with open(file_name, "wb") as f: + f.write(content) + + +def make_fake_flo_file(h, w, file_name): + """Creates a fake flow file in .flo format.""" + # Everything needs to be in little Endian according to + # https://vision.middlebury.edu/flow/code/flow-code/README.txt + values = list(range(2 * h * w)) + content = ( + struct.pack("<4c", *(c.encode() for c in "PIEH")) + + struct.pack("" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ascent = scipy.misc.ascent()\n", - "plt.gray()\n", - "plt.imshow(ascent, interpolation='nearest')\n", - "cropped_ascent = ascent[:100, 300:]\n", - "plt.imshow(cropped_ascent, interpolation='nearest')\n", - "print(cropped_ascent.shape)\n", - "print(cropped_ascent[90,90])\n", - "print(cropped_ascent.dtype)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "torch.Size([100, 212])\n", - "117.0\n", - "\n", - " 117\n", - "[torch.DoubleTensor of size 1]\n", - "\n", - "torch.Size([1, 100, 212])\n", - "\n", - " 117\n", - " 117\n", - " 117\n", - "[torch.FloatTensor of size 3]\n", - "\n", - "torch.Size([3, 100, 212])\n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "img = torch.from_numpy(cropped_ascent.astype(float))\n", - "print(img.size())\n", - "print(img[90,90])\n", - "img = img.clone().view(1,100,212)\n", - "print(img[:,90,90])\n", - "print(img.size())\n", - "img = torch.cat((img, img, img), 0).float()\n", - "show(img)\n", - "print(img[:,90,90])\n", - "img.div_(255);\n", - "print(img.size())" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAW0AAAC+CAYAAAD+3F4XAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJztvVuspNl13/ff51p1+vRtejjNy4gzIq0RKQYSZSNGEMmW\nZRGGoQCikAdChhFIVgIEiBU7cZDo8mL4IYBlwDCchzzEl4AxHFiyDIeEESSKJFimYkgkFcoWSQ2v\nIufGvnefe52qU/XloXrt/n3/WnW6yeGc4SH3Ahqn+qv6vm/f91r/9V9rl67r1KRJkyZNzoesvNkF\naNKkSZMmTy5t0W7SpEmTcyRt0W7SpEmTcyRt0W7SpEmTcyRt0W7SpEmTcyRt0W7SpEmTcySva9Eu\npfzFUsqLpZTPl1J+/ptVqCZNmjRpkkv5RnnapZQVSZ+X9GOSXpP0CUk/1XXdi9+84jVp0qRJE8rr\n0bT/tKQvdF331a7rJpL+maQPfnOK1aRJkyZNMnk9i/Y7JL2M/7/y8FqTJk2aNHmDZO2NfkEppcXJ\nN2nSpMk3IF3XFb/2ehbtVyW9E/9/9uG1BXnuuef0rne9S9PpVM8//7ze/e53azQaSZJWV1fr76bT\nqSRpNpvV613XaTab1evxN35bStHKytxgODw8rM8ppdTvB4PBvLJra/W3a2vzqm9tbdV3ra2t1fvi\n+5WVlfp5dXVV4QMopWhjY2OhrvHbyWRSP8c94/G4/u74+LiWZXV1tdYn7on//6t/9a/0kz/5kzo5\nOZGk+k62wfr6en1HtFFcj/fHb1dWVupn3hPPPTk5qc+IdpnNZrWsXdf12iCE793c3KzPyr5nG3u9\nWa74LsrNseLPpW/m5OREq6ur+uhHP6qf+ImfqHW5du2aJOny5cu1vjE2VlZWeu0VZYx3Rv1dVlZW\naj0nk4mked/u7+9Lku7cuaM7d+5Ikl566SVJ0oMHD2p5T05O6rNLKbXtYqwMBoNaBo71qPfKykp9\n1urqam98xzM4b+JZ6+vrtdzZOPqd3/kd/fk//+drfeIZHN8cv2z7uMY+4fjx9j44OKj9sbKy0qtb\nSPTHyclJb9zFd8fHx/X/W1tbtQ05rkLi2ng8XijXyclJHROsYymlrllx/9HRUe2vyWRS2+FjH/uY\nfviHf1jT6bTWcW1tTZcuXZIkvf3tb++tL/H8T3/60/rMZz6j4+Nj7e7u6rd/+7cXyi69vkX7E5L+\nRCnlOUlfk/RTkv5S9sPnn39eP/ZjP9ZbtJo0adKkySN53/vep/e9733a2dnRq6+++s1ftLuum5ZS\nfk7Sr2uOjf+jruv+6Bt9XpMmTZo0eby8Lky767r/S9L3Pu53zz///Ot5zXe0vPDCC292Ec6tfO/3\nPnZoNlkizz333JtdhHMr73znOx//o9chZxIR2Rbtb1zaov2NS1u0v3Fpc/Ybl2+LRbtJkyZNmnxz\n5A2n/IWEZ13qe8zD27yyspJ6oyeTyQLbgcyNo6OjBc/+2tpa9UBPp9P6+fj4WBcuXOj99uTkpJYh\nYzPQAz6bzXpedJbdr21sbNQ6855wxrpXO/5PlkW0w8nJSX1WvIusBbYdWSLxrul02mO6uPd9dXW1\n1z/xXrI3MvYHy0BWAduQXv94Zlxje3JM0KNOFkfUJ/vey5i1V3j/h8NhvU6WRpR7dXW1PpcMG44z\nXifTKa4FA+HSpUs6OjqSJD311FOS5myJYDqdnJxUBsJsNqvl4bWo13A47LEz4nuWNX5L5gTrxXaJ\n+8hIIjuF/R8siChfKaXX9/GZzCOyWnw+TafT3lyJ+kwmk/qMeOfq6mqPiRTXfX6FcM7Gb8gQY3tw\nvvDZUcd4Npk7LEe0R9d1tc/i/Wtraykr6vbt27XN4/3Hx8fa2dmp37/22mtaJk3TbtKkSZNzJGei\naZdSepxH7ozUhDIt058jzbUT8j0zjjJ52qFhbW9vL3BAZ7NZvf/4+Lin3Ur9nff4+LjukNx5Mw4p\nedqxG1ObJHe6lNLTDrxepZRaDmrt8fy1tbWqzfF3LGvWRnxn1p4sCzUo1pvtE+8ndzqe5X+luZZB\nKyXagxYENbOMH05NiRaA13cymdRxsL+/v6AVTSaTnmaXvYvXOD7ZdnF/PGs4HOry5cuSVN9/cHDQ\ns0aCY+wWYrRFaO3UfKnlUgsm59o57NPptKf5ucZKYXuOx+Oephvfx/18Ljn4UV9agvEcarEs02w2\n6z03ys928XfNZrPan9PptNanlLJgbXDMjcfjBYuK1iP7PpuDbMPZbJZa57wW1tXR0VFdE+LanTt3\ntLe3J0na2dk5lR59Jot2mF0cXN6JJycnvUWMZkkIzXaazW4C8fPh4WFvoXRTajqdpsE1HBDx/LW1\ntV4QSggXixAuQpzMfFZc52bBewiZxH1uVkVZ3ETnpsA6DwaDhaAKX/B8Y+MG5e/yzZWTopTSK2cI\nF5N47/Hx8QJkxE1ufX19AdZhGdmPnECs68HBgaR528cEiYAHBh3FbyjcgPhbn9hSHw7Y2NjQcDjs\nvWtnZ6cu1Hfu3OltBj6WCNWcnJwswGhd19Wx4UqBw0kcn1ywuDhGG66urtZ3EOqLDYTKjo/rKEuM\nVYffoq7srygDYSqOw2zzzAKrXCkgVMF3x7uosMXvOH45d7zPHf7zvptOp73NIhbou3fv1uCrGIfT\n6VT379+XNB/rseFl0uCRJk2aNDlHcmaOSGq53IXj73g87mmxdBaE0OwJYTg4nTfUvmnmuZlOjZah\nqHSG8Hs33eIZ8Uya226a0WlEa2FjYyM1FWmmMcw3rhHKcQ2MJj5N5Uz7oIaXhf5Si6VTiLBL5hzk\nfVkbUfMjpJFpOqPRqBfOHc/Pwsu7rltw2FHrPzo66mnC8b3DXdG2cY2mLuE117D429XV1appR3j1\nU089VbWu4XDYM9HdkqIjknMoc5DSUdl1XdWKqXHHvFhfX18I4WbfUTOeTCY9Z6W3zTJn7GlwKPuI\n84r1zaxsPi9LgcB0BxkU6JaLj0+OOfZzlkKAbZhBnHSe7+zsVK361q1bFc4M5yMdu8vGdciZLNox\nGDlgvGM4acfj8ammLgev5ziQ5hM8Bix/y8FHzzU9uPE981CEqcJ8DVwECG3wPg6UqAsnSIabc+Ei\nROOefvoIiIVyUrDdOND53CgLF2UvS9wX17IFi/ANJ65vkj74+V43sd0P4pAazXnmnshweTIFxuNx\n7VOaxcwH4eOTk9InqJvrfH+MQ+nRor29vV2ZJKPRqLfRO4zAvmUduBlx3hCiyWAGLgZu7q+vr/fe\nH+WdTqcLUAsXV/o5qCjw+Q5tcRz64hfCucbFNZ5LRhTHwWmMNPrD2HbcdDhmuYB7riGuF5kfZTwe\na3d3V9K8n2PR3tvbq+NuGWMp8zOENHikSZMmTc6RnImmHeC+78bSI03HNcNMY6TXlztv7KzkuFKr\np9lBrm68K2AVSalplrEhMt6mO5Lc0bO2ttbbQWkKuznv73W+M6/R6RnCsrIN4ju2pzsM3aPOerHv\nNjc3a5vTWsg4r6wLrRyyIKhxxnPo1Im+o6lOTZkS92UaSymlljuck+vr6z32CNvW60AnXZaJjmOD\n4zfG3MWLF2u5R6NRdUoFy8rrkcFrhLjins3NzVNZQBwTJycntb6E/AjTRXtn1hktrszZTPaDM6Gi\nDdlurGPUh3M0c6pnmSudhcYMh/Fewhu0Ulg+lj3uy8ZECGHDGFORrU+Sdnd39eDBA0lz9pLPEVoA\nzshyOZNF2wdixjqQHnUCF1HCH8vMUw+k4EQhRsughyzYwzeO+J54WzZQQhjQsrm5ubABcNJxoLon\nXerDDLzOSZlNoGWLP2EMxyedruTlJsZGytVkMlmYzG7O+2TI+i3K7ROIkAVxYk46BlhkgRAZrW02\nm9V2CpN1fX29B9U4Ts3F2RdP37wdn48yMB3s9va2pPkEvnLliqQ5q8AVFF+AnKrIzdtx5GyB9CAZ\nqQ99ceOKNuO8YN+GOHTkZeFzyXTJFBRSGDnXMjiI84dQyjLWU9zDuZD1c9Z33DgIS4YcHBzUugVe\nfe/evYpZP3jwoEfj9A3PN5BsfQxp8EiTJk2anCM5E02b3M6QMA9JiqemlHFy6TQgTODmhAfpUIul\n9uDP2tzcXIBP1tbWek4MOjKdTE/tYjKZ9JKpR714D7U115A9hDjaK+OEsw7USPg5vh8OhwvwCB2G\n1LrpXMycStSayZShduMBCTTRCevQKUnWQ3bYBTUtWi7U5vwwAQ8sCaETOjt4gNpVZsllMIGPP3ci\nD4fD+q4rV65Uzezw8LCa1lmoNevDvs2cj9TWqLmeFgtB6IDjm1ZdFkdwcHCwoH16f8R9Ma/IQ3Zr\nNwvk4fikAzPKmgm1VfYdmWVuQRCiouPRYzCkeX9xnWI/SnNIJKAvOom5TvB+zu3T2CNN027SpEmT\ncyRnFhHZdV0Pv4mdhEf4UCPgZzorpb4Di5F01OCogYVkzkViy9QMM6cpaUzUsOIex9rdScfvPAkO\nozKlPt3N8b94Fo86ciyUmvzx8XFaxsxHkNH06Kxl5Be1vCdpW6nPSafWzfBl4s9Rr+Pj4wVHkN/P\nvolxRfw7owrG/QcHBz3utlt6zr1eFursnzOK2snJScW0x+NxjZQ8PDys2lqGzzMiN8p9fHzcC3On\n0zv6LLR3tn3UI8oT7U4rh5qhjz9q9YwiDKGWynG/zJfk459CTTtL5uVWQWaNcsxFu7AO9N94PEDU\nx4+VOzk56VFHw+l448YNSXNNO5515cqVGvEoLa4NtOLdgexyJot25E8gy8Mbi05CmjBOcJf6i3oW\n8MIFgIEU0uKgcGdFvCPMmsFg0DvnjaZk5hDJ+J6cKIQLuMn4JpENXrYB38vr2fv5fA5qDnpCJpkz\nkxtnxi2lyUkWSDYZM+iL76V4JkWWO+PZehm44GXmKc91jDqOx+MFh1/c5/XOhPdwEcgW1O3t7VrH\n0WjUC3Vm28T743vWNWu3lZWVBWXHFQF3dhIGc+iLY5h1id/6fB6Pxz0HbKYoZGMmc6o7BJoFomXw\n3fHx8cL4cCeyL9DMWEi4iA76aFdma9zd3dXNmzclPVo7mJ5ie3u7B//5wuwBcKeNrwaPNGnSpMk5\nkjPRtCN8lk4KpwiNRqPeLp9xhJ1y45/j+VtbW1VToalLig9NKGpznhhHUs85Q03WLQB37rjGwHza\nHkUWz13GUaZWLPUpQtRUGLmWUb7o8M046Vm5CbXQxM6iVfncLIEXv++6rjqmaH1l4ceZxuoc6cxp\nyL7jOHJNfDKZ9ELLCSfF32UpDHwsZ9AK24AO2I2NjZrj/cqVKwvZ3xjpSY2XpnSmkZK/TRM8C8HO\nokpZRlpqtIAzCzOjTXLeZO3JeUPLmUJNPBvrpDIyUZX3BWmiJBlk6w1poIyiDbhpf3+/cq/v3LlT\nr7Mu0Te3bt1KtXZaw1Gf4XDY6wuXM8s94pPKOc5cyJlPhFh3ZrYwKCNkOp1WzJBpEH3BkRZNRuYv\nkfoZCWnWZAEFTKPJZ3BBZl2yYA5uIBn+d9rGx3eRI+34u0MwpZSKpWah0h6eT6jFTVUOemKhWd6Y\n2WxWJwInfhZEtbm5uTCQuTE5Fu91cB63Y7ScYIeHh7W+Ph7i+RxzDoM9Dj7h4jscDuvny5cv136I\n8VvKo9TCJycnC9AV00Nww6Us2wRD+CyHMeK3rjRwnGxtbS1kjiT8QlYWxyFx3QzqyNqYG1O2kLM9\nCYlxs8lCz0NKKb0+Z8BMbKSBTe/u7lblcGdnJ/WDURGhX8d/S6WV0FImDR5p0qRJk3MkZ8YeGQwG\ndbelOc+dO3ZFZj6bTqdV++CulXlYs5Bnev03NjaqCZM5yKgJMzQ4hBr+1tZW1fxoZjK8mU6ZKEvm\nUHOOupebGq1DRdGGWf5nmqTx+2UWApko7sVmW5PzzetZ5GCmzcU74rfss4xlQ3FWALnXHFN0NmVR\no+Trh1D7PTw8rBZTaFduzmcQXcYfXxbyToso2n4wGNToyIjUnEwm9fPq6mrVwBl9R6uU0IFr+4ws\nzJgVhCmWpT4gu4RpGPxgAj6L9WUbUaNlWT0DI6EzZx95+ah1ez9Eu2XvZbvQQo3PR0dHFQqJKMe7\nd+9WxgitSkKgUZfRaFShQM5HOkUzjnwmTdNu0qRJk3Mkj9W0SynPSvrfJF2XNJP0D7qu+59KKVcl\n/Yqk5yR9RdKHuq7bWfIMjUajnjPDnTa+M2c0OX9mPMuv0eHi1Dzi5vFscm4dh+Zuvr+/X3fL/f39\nBbx2c3Ozp3GQly4tYvnEll17YFmdrhZCzdGddNRcqKmTD0rsj89yDjLxYEbC8Td0ZlFrybQi4vqZ\n9hvPJw2KTs9MO6GzlOVZNqZcaL2Nx+OKW2a+FV7PrAmWhQ5UaoOMII3vL1y4UDX74G6T/0urk86y\nTLMk9hvteuHChV7SLbfa6ByfzWY9rjX58nF/lmc+S59MJ3BIKaVq7WFJhLg/wpNXRX3Je2e92bbU\n1qO9WKeMMkp/VpTt/v37tW+C2sc0sGx7OtdDnB7rDnyPU8ks4lrupd88khNJf6Pruj8opWxL+v1S\nyq9L+iuSfqPrur9TSvl5Sb8o6ReyB0SBMzMsCr21tVU7hg3PRC0MOeXgcueiO6WyyeyDUMpz4tJR\nSUckF+Awt8g+4WQK4aLBAcPQ7wwaoNMxc9IxuU8IByzv47MoNIV90mQDmr/jb5YFIGWbledcdsgs\ng4UozqMlg8CTiBFuWgafcJGL6zFpydd3mMGde+7kzKAv1oGbaCzWdIoGJBKLN5/J8cnrZH9QueCC\nlDFkuBlwoXNHIhWjUkpd0Fj3LLc82S8BVXIuMe0Ec8fTeejtG+/wtuVn1jXj69OBGp93d3drGe/f\nv18/M1w95j5hVCqEhHUiPzmPQOQ6yLH6usLYu6670XXdHzz8vC/pjyQ9K+mDkj788GcflvSTj3tW\nkyZNmjR5ffJ1OSJLKc9Ler+k35V0veu6m9J8YS+lPHPavc7jdcjj8PAwTTrEXSdO+7h//35PI/Dw\nUkackT7ICKl4F/msmfnsO17GG6b2kTnDWO8Qag8ODcW1ZY5AadHx5qdqkPZGpyR5vYQQqMmzH0Ko\n3bA8HgpNymPW3rz/cZq4t6FDQCwTISa3YvxZNN05zhit6illDw8Pe6caZe3B+zONL9No4/dRbkZK\nSnPudmixJycnVdu+ePFivZ91JLTkDmOe6ETKHmFLOqQJN7oTmM/NHNKj0ajew2horgOZdZlp0VwD\nMvjOYx4csvN3cO2JuU+6XVhXdD7u7e0tkBgI362vry9YRCQY8B20yAnxcM06TZ540X4IjfyapL/e\ndd1+KcVt7KUB87/5m79ZF6Hnn39ezz333AJv1+ETmvuOHXPAHR8fL5ifnJSEDjxEVernKM4CRMbj\ncc+kc6hGetQZhHg4MWmqk+3Ad7m55GwFn4AOcTgc4ItUCAd1djK8B0FF/YhlZvBFCDOfLQuo4SbH\n/OaOf3ueCTeBOUbIlsjgJG6o2YZIPjQ3kHgHGSVkBSxLZ0A2jffVMshuc3OzPiMCbi5evFg/b25u\n1jGRHZN2fHzcy56X4cgsg7cn/TCrq6s9M9+hslJKXcQGg0H9bXxP7jbHN5WlEIfBXAEhBErIYVnK\nBuLEIRn0lW3uOzs7lR1y48aNWsfRaNRjlUS54127u7s9DNufP5lM6gbAYCDW5d69e9rf3+9BW5k8\n0aJdSlnTfMH+J13XfeTh5ZullOtd190spbxV0q1l9//oj/5oD7daRulq0qRJk+9U2d7e1vb2tgaD\ngSaTSU085fKkmvY/lvTZruv+Pq59VNLPSPplST8t6SPJfZJy89+F2gfZEDTzw2yhFkttjBp5xgfl\nDsZdkeYWw8SjLNRefWem0Ivu7A3WM96ZOVqoWTqbgu+liZcl/1nmcGS4LJ9BCyLzqPNd7oBlHdmP\n7Dvnw0cZme867vMcx/FbH0OEtuI38V6W1+/PnJrk5HJ80SEY9WUUoqRelj0pP6g5e1+8i053wkvS\nfCJfvXq1liGSEUVdyGAopX8CkrOEyP8djUZpUiyOT2qJPJFJmo8N8o69DRi67uHz8Tvmsad1F1p7\nFjWajQO3XEKya7R2GS0d7Xr37l3duXNH0hyG5Rx0dIBOU1rG0W7D4bBq7Zubm/W+cGRGeUJ4vNpp\nPO0nofz9kKS/LOkPSymf0hwG+SXNF+tfLaX8rKSvSvrQ457VpEmTJk1enzx20e667v+VtOzAsg88\nyUs2NjZ64Dox6RB3gFHjdccDNbjNzc00BzHxWOYdcExwZWWlh2l7XmpGNjLVaJYEhxxSat1ZYhxq\nARlPm+3hqVNDMn4t25XOtux7wlWZVpKlRXXqnOOLTlfKIjkz3J1WELXzLAo24zgv42PTAiCW6fet\nra1VStbR0VH6rsCL9/f3exqnW0R0CFKWWZLU3Nw5uLm5qcuXL0ua578IjDWszpWVRylYNzc3e84s\np+m5tujY77L+oMONc4FzxOMDfA56DhdS97K1gOVyWm5IZh1yjmcRmZyDPFSZiZ9CO55Op1X7jXaQ\nHllWw+Gw3k9LLSyc6Cup7y/LLLGu63ocfDqaXc4kjD08yZmjMMQbmN5mN5dpzjGYg0dy0cSPzuWi\nu8xpGYMiS0hOp0+ULe6T+iYQf8fnZ0E75KXznoxl4QM6fusMGCaloWPXPe3x97TNiGY3y83wZbY3\nFwM6AuMaJ3O24bE/mWPdF3UuFnT4cZMidz/bLEKcIcFyxftjUg0Gg97BAhmbhQuKwzZ0XvI62zkm\nNqGBK1eu1OcGo+Tg4CANVmJ5+H6OI4fyuNl42gGHyrjZLHOckaOcOdKzOTqbzermyXfxvmxDjDba\n2trqlcWVtK7reptvJHwKSGR/f1/Xrl2rn2Mxp5M2xsF4PO4lgWJiL2k+TrI4hK2trd585N8o42kM\nkhbG3qRJkybnSM5E0w6QniaCc1edgkQTJ3bRMLHW19erpkOnIzXIjH5FjjI1HWqnUR4e8krzNXbA\nra2tBccDtdTNzc2l9Lx4lmut3mZuKkfdWWZvz9NyEcdnt1icU5xZQRkdk05N17ilxZD2EJroGT0w\nrlFDy+roYdunWXK8n+ODFgatGFpH0QbUcsMSYyIgwkmEtk5N/mNOaodt1tfX67i/evVqfUdAJoeH\nh+kpT3wGocSMLskoYY5DWh4ObzAfd5aYiePXnxX/J4WOTlEfnxwDtBRZT3LR4/tsTE2n05qi4P79\n+7p1a056C42a6WkvXLhQOdeZU50WiMdgRF2zyG3GjpDY4HEHy+RMFu1gQmQk/BCaD05K9wAO5qKm\nWU2uMbmefgI7P9MU5qJObJBHl2Vn6HHwcqFmMva4xsmcHWhALDVb7LO6+OEM8Zd1zPDJEOeBe70c\nTiB05KwXhs8fHR31TO+4Jwv0Yf/znswszoJ+sgWC7eHYs0MWGxsbdREhsycz+w8ODur3Gxsbdayw\nDRya4LuI6/M6IR4qBAxZD6yTi/bt27drvUL4LC4sXKx9UR8Oh732jLKT0cLNjvf7fPaFyZW02WxW\n6+IMIJ4bG9dOy7dNBcnTCnAxl/o8bMIj8cz79+/XhfrChQu1PRivwfNHmZEwCz1nubgoM6gm6hBt\ns7e3t8D5pjR4pEmTJk3OkZzZcWPUnt2zLPV3LfdMh3DXI4DPyC1pvssTWqC55L/lyTTU4EM2NjZS\nvnLm7BoMBqlZToYC24Ahw64VUdNh5NWy0F+2Tfwus1ao1WScc2d/xDVqjB79xs9ra2u9U3Dciezw\ny+O0Z+ej8x7ncdPioRke9SaE42wIZ054f0wmk96YCm1rPB738idL/chGQg4sU2ZFObwRzySTJBJK\nhbNsOp1WBgO1wdlsVmGbjEVEh1wGYTkjxC1B7xvvR9eMPaMf4RE6Ih2mimtZG2XEBI6DyWRSx2K0\n0f3792vQys7OTnXokulFyyYLu2e5MiuaZaXFzzryaDGpz5Dx+ebSNO0mTZo0OUdyZmdEOhfYMS5q\nhuvr673vHeMi3/To6GjBETQcDnu7eAb2L6Nk+e7OstBRSWob8Snuxl5HYqVSnyse9zFHQsbrXcZt\ndZ6s1yvTgMiDjd9ubm72ckbEO/l8ctmdhufORdfQXCvKMHxqhsQv3Qpyyh/bxN/nFlvUwRNDsWxS\nP6KSvpXQ3DY3Nxdoek5rdNyf/ekWRhb1SUw5yhMn3Ozv79fPN27c6I0/Wl3xXvYz6ZTSfC5lVFX6\nFugIZ9u41u6WC3PXRJl4P60JWqasc9znecI5b+knuXfvXm3fyIG9s7NTHZGHh4cLY73r+odNL8Ok\no66sr1vB9KOwjjw4mnTksLyHw6GefvppfepTn1ImZ7Zor6ys9MK23enkjqhojNFotAANeC5gXxyZ\nFIaNGf+XFp2bcX/GKuA9fJebafSoj8fjBY87zWZfvNzJwTZg2/iJ2V7HzONO51+2cXpwjnvfnUPN\n624S0mQkxJMl6SE3Ogu7Js+bMBYXOk7srJ+5OC9zGPszOfHpEGT4f3y/t7dX+zlMcY4DjikqKKc5\n5flbfr++vl7bKJx4Tz31VDXxd3Z2eiHg2fg8zVlKCJOLIxWUbJPNxgzLz/bOIFC2tztT4y+5zx6T\nwE2Sc//4+Lg6Gu/duydpzsdmexCejWdlAWjr6+u9hGHSfMPkPIs+4bkADO/nWA7hJk0FiLnTXRo8\n0qRJkybnSM5E0w6uKjVmd0TSAXB0dJSGaNNUyY4LI9+amjZPk8nobtT8XCOI98WzYuckFzeeOR6P\ne7uxWxN0hjnlyzVDOlxca47fZU7HDNahKcuoTk+A4+2RQR7LNFqajhm9j+WnVsb7PfUloRhqrDQt\nmciIFDSvA7VbN93jnVldOM6ycUJHYETy8fvhcJhGTGb008xBSYuL4y/k8uXLNWbhwYMH+trXvlbL\nHb9l+ls6zrLI2IxPT3orIZVom8FgUK0MOiSzfia9MDvhJf4vqQeL0lnq7TmdPkpVMRqNaln29vZ6\naVajDQgVelSow6lMceEkBnLo9/b26jjI0AOpvz7RKSn1YbaVlZVTKX9nsmgTVpAW+blS36yhZIEQ\ncT3udyavMj7fAAAgAElEQVQAJ7hPDjfffKH2M+WCDxvvzyY5zW6eF+kLMX/reK3DCA4XublO6IGe\n+GUh82z/bHHKsF1OarJtKN7OnKx+nJiLM1xCiHvSlI02YGZAtj2ZHhkjYtlCGX+52WTsDm4ghLEC\nnohJu76+XlkeWVmWQSMOm8Rvo4xZVsbt7e36rre+9a11wTo4OFiA1Lqu6ykwjkM7/EfoyTF+Yu0s\nI+d1thBnbDFf4H1t4Phmbm9mLIw67u7uVsz67t27FRahgsPN23F9MtMozhWX5uOUqQ0yuInzg+kZ\nsndkATqZNHikSZMmTc6RnJkjkppQFprM3Zre4PF4XI/xyZgEDA+N3fLg4KDesyxRUMYLnU6nPa1E\nmkM1PGYqHASMHssgFYY6U5vysO9og8wZRdjHPel0vvAd1AaXaXSuZZLhQGcUzU86osIc397e7r0v\n3kNz253E1GjdAeoh+rRm2OdudUhz8zQ0zgyqISRH+Cwkez/bzcdOaFh0koWGNx6P6/gaDAYVNglT\nfXt7u8ciWsbNj3qzjXm4cPx95zvfKWl+gnswH1599dVaHo4ZaoauzREe8YyAzlYiJ5wwQZZ6geOA\nDLCwUBhNyPnK/ias6NAXIQ9q6uTWE7bMMvPFekGYjnBSlI3CTJ4eN+HtxvnIfOwcc4TX/PQeypnl\nHnHTNCqTZdMjfkjaTGZSECcOc4+UPw4C4sA0wTLzk1gXyfqkb2V4bUjW2Y5PhhAqyShXxOJp2hFj\nzcqSBbQ4gyDeycnmpi5hjrW1tXr8FTffjMXBiUs8OmNsZAulBztlmeKirBcvXkyxYSoCbA+H3JZR\nBkNWV1d7kAgVCY6PeBahsWibOOOU/hCnSPJzlCOrF+dHFrTBoJ/4nmd2Zuln2c8cUx7EFM/kguWL\nctQznuVjbjKZLBycEPe4csczJtkvoUDt7u7WoJ0bN27U66RmRh0uXLjQOxXd/WHcGBlYR8Ulrm1t\nbaXwrtNg4zPng8OFHJ+eNsKlwSNNmjRpco7kzDRtJ8hTA6qFsYCH+K1DBzSHqL1mR4Q5sZ8kfmmu\nnWeMjiypkR9q6wED1GiXOR+pxdLKcA2KDizu8oRcqC26U5R18B3fd3F3vrhG4YmbMosi4766BpVd\ni2dduHBhAZpyp1TGdsicqpkTMeOyx3PjGr/3g5LpiGJ7UDuOuh0cHPTCkz34i5YT76eGtSx+wWEw\nQlvD4VDPPPOMpD6bIWMf0SKKsuzv71criqwrsjc47zjm+I64RuvN29MPAmAd3dpwaDUsHv4NPvb+\n/n6FXdxZL/X5+hwz5NgTDg0ocGVlZaEfuTaMRqOFALjhcFjLQuuKVgzhIqIDWbbO2rZLv2nSpEmT\nJt9yciaaduwc1CicK5w5hqRFrEfqp2Kk4yzDe1dWVnpadUQt8Wgflss1QmrH7njw3ZBlZXkzKhmd\nIKQSEjtehvP6tUxTdxyS//ffsg0yjJbcVY9Oc/ETYMjlXVbWeK9bAN7emQOVfZBFPNJ6Y5/6CS30\nVzCsn/Viil62vY+Z0WhUNazhcFjHWmCpHsZO7TPD4jMndVyjU2swGNTx/ba3va22/SuvvFLfyzoE\nLk+u8DKKpkcl08JlZCy194yjzEjnmJebm5tptCk13ixBV+TAfu2116qmTQcp60DHHt/l5aaPzNeR\ncHTHe6m107lNHxvHSRa/QAuAdMjTKH9nlk+bAzVzXIzH455HNQYcnYr+TP/MHNiZ86/rHh0ZxU2B\ncIBvHjShaM7TdKdnOu7PEqQ7lJNxzTlIspPGo9zkgTtjw69xweEikIWW04nGumeOkYyrzjLSqcMJ\nnk0qtvuy/Cp8b/wu21hoTmfQAicm0wKQ2eMQC4PD2I+EGfj+cIbxpHHCaFwsPDCKbeCQifcDHXdr\na2t1IT48POyd4i7Necsx/rlQ8r0xh+J5UR9XBhggUkpZYEOwvh5YF8/k5s6x6PNmd3e33v/gwYPa\ntsGOOTg4qHUknJSxxZj/nAodxz/7k8qGB89Q8VpZWelt+nGNCzE3jgyqyxgwmTR4pEmTJk3OkZwZ\nPOKhrMuob9Iihc3NQzcdYrdkjlqeLsFyhFDjzSh9WYIacrPpSMzqkGnP1EgIu1BboraWwUlsN3fu\nUMg3pXbm1Ddvm0y7dq57FoVIk5JtRPOPZfM6ZhpH5sxlWakVuTPMrQm2Pd+RabFsW9JBM8uG8Aa1\nyajv7u5uj0IWz6QVkVkxLPdpmnbXdb0Tw6NcFy9erJp2aIiHh4dV0z48POylZJAWj48jx9md03Re\nZ+/n2Dg+Pj71pBZqoexDUhVDu97b29Nrr71W6xD1y6Av79MoH61Szw0/m816bcC2d1qtQ68OcW5s\nbPS0bpbBI4lns0fH2S2zMEOeeNEupaxI+qSkV7qu+4lSylVJvyLpOUlfkfShrut2snudcZARx7mI\nSY86j8cWZTjfcDhcmPjMKeCYd/yWncUFxc0Svt/rhLaR1F9cl/EsM7yM7+WCmOFhHDgc6B54sgyO\noslG0zHKQpyOgzDjdMd3FH+vwwxc0Dgp1tfXF1KJstzeHlH+jNvq0FD8NjNJuWF7gAjbgKlynQHj\nm3OUR+ovjsHMIAvJ/QmOdXqMgwt52txYLly4UHnhwYDY3d2tC/h0Ol3Y1D2jZbx3NBqlsEy8N4Mv\nXUFyDvPKSj/FKmGKeF4syoeHh7XcDx48qPWJ77lxcXxl7BGO5Y2NjYXNgmHynn6DeUiijllqASoq\n0YaEkxicRcabs+CWydcDj/x1SZ/F/39B0m90Xfe9kn5L0i9+Hc9q0qRJkybfgDyRpl1KeVbSj0v6\nHyX9jYeXPyjpRx5+/rCkf635Qp4KzVfn/Ur9XZF5lOP3/C21KmoB8f3h4WEKWVADC+2HB/dSq+Gh\nvA4vSHnGQOcKu0XB32fOsihDXGO9l/Gc/b6Mb02NN+Mr83tCNSwLtW4yLrxcHm1Ix6zUdzTxHTS3\nM4cMTXdqxPTeM4+xH5xBrSi+YxvRqZk5F9kHNHu97UKo6YdGSCcgHZGZhZCNCUpmjZBxNJvNqmYf\nR5Pt7u7WugfbgkIGBPvRnfkhWU77eD7HPudVMGim02lvTFADj2cwT3gkfrp161Ztx7iffb+xsZFG\nNod4jEeUMxy4KysrvbUhY3FkLKPMwe7QGfvWLWPex9QdmTyppv33JP33kminXe+67ubDQtyQ9MwT\nPqtJkyZNmnyD8lhNu5Tyn0i62XXdH5RS/twpP10KvIUGkB2llVFtuCsRoGdaVO6WrmVSGyBeRk0m\n3rW1tdXTmlzDOjk56fGRT8sPQI2BmuEyZxbpQJ66cmNjo4fxehswoY+0qOG4Rp6ljCWWmSWMIu5P\n7Zc8V7ZzfJ9R19h3GZecfePOo7iPTrC4h5TR7D72J9vQT0pxnwv7Jv4+ztoJoTVC7Dg0x83NzfRo\nMlpXbL8Mw+fYYx2ZSyXqFseRXbt2rYcHR//Gs0KDlebzglaIW0FsL1pUdLayP9wnQz42y7C3t1fH\nclD6Hjx4UDVtOrdZhtDgSR2mtUDsmuWK9mDcBq0zOk7daUm6MNGDrN5O8zwtgpR5bjJ5EnjkhyT9\nRCnlxyUNJV0spfwTSTdKKde7rrtZSnmrpFvLHvCxj32sHjH23HPP6YUXXkjzacfiSnMqM83oVCLT\nZFliKf6fxzHxr9QPVOAgy5IhsVw0h8jycOeibzYhXLBCyGQhrzjEif8+kOkU5fuyXL5+oIIHN5Bz\n7m3PZEP+Li4yy5yLIaurqwsLoLeLtxEXOeczZwssF5yMhcH29L5bX1/vLQaZozAbU9KjTSaSGg0G\ng+o484AWryM3gPjNaRLlXV1dXVAEtre3e4ySmAvczMhf9/v5fG58dGBm84YbE98Vz2JAzGg0qtkQ\nY5O7detWuhAyDQTnLZVDJqiS5u3NPuUYD+H4J3/ck0udnJzUNSsL3uE44gbCsUp4+OjoqDJhTuvn\nxy7aXdf9kqRfevjiH5H033Vd95+VUv6OpJ+R9MuSflrSR5Y94wMf+EDvNJomTZo0adKX7e1tbW9v\nV//O7du309+9Hp7235b0q6WUn5X0VUkfWvbD0LJ5KozvNK7JhCzTZLizuWbjWmZoFDRL6QQ5jV5Y\nSulRmmiOO0+Vv3UHptTXTOnco/YQQsdYKWVB82OIN+ubORfdcnHxyDFPjUkHl4eIR9u6JeDvohZL\npxO/Z59Ii4nDHAZz2iTr42OGTiFCDv7OuObc/+Pj4x6dk/3sUIpDGv6ug4OD+nxGX9JCYBsuc1TH\nX47VjFJKTTvC3C9dulRpdAENkG5Hq9O1RLaz1O9Tjklqx1EvUvuoXce77t69Wy2SO3fu1D5gXZwX\nz3ZxC5bQZNyTWVQcW7QqmZoghHMtS22QJbryQ4Qz9IBtexrt7+tatLuu+21Jv/3w8z1JH/h67qdZ\nw+xY0iITIsNmucBzYXGcmjg4G4KYXYjn+iXOG2XhQM4S8PP9DLhheVk3LyOxdJaLg8A7mQOO5ifN\nOS7wLGsGM/C5vsCT+8ocMtyIMy58lIPl5gbEySz1k9pLc5wyw1CzTZRl8HaK7zL4gos6r3su6mWL\na8YFdrgpJDa4vb29yg7gZsAFiYsI2zuDdTI4yjMJSn14ZHd3twasxBxkEAyhPm4cma8py//iSouP\nz83NzR5OHZ9v375d52lg2lToqDRkWD6hqywX+3A47I0D5+Zvb2/3Uhtw/DrOzNiCUsqCQsg56qk1\nvG+opLG8mbQw9iZNmjQ5R3Jmp7HTjJxOH2Vny2AOj9rzo7a4UznTROo7HqmJUXul44IaJTnd8T3v\nCSYJy0ANLWNJEA7INEeyO/x0aC/3MsjDLQuWj2Zcpmk7POJtQA2DZaWFkLEaqD1Qm+TnZU7JqDet\nicyJS+2c8FuWrCtL+ETLiuPGYwOWcd0zrrxz4T28mZrl5uZm7wSXDB5xZ65/n1kA1D6pcUamuqef\nfrrCEPH36OhoaZoGQmLxriwfPC29jNMdwujMw8PDmjnvwYMHPU1X6jsqGUnJ9mamQ4q3GfnhmQX8\n4MGDXr0zDd2JDyFZVPMyR3kI243rkJMFenVa+k2TJk2aNPmWkzNLzcqdmadPkCJHjIynOIRwJ8ry\nQ8QOR2yOGhZ5ldSwMpoeqWy8RjzNtUt+T+4qaVLkGvO+jFnjjjdJPY3aHYZR3vg+y13iDk9vz+xd\nrJfnOfG2p1VA7ZVYrScfijKGZFzjjAbl91Dz8b6hxcb8E9SOqD27U5xtuCymgFYdky251k5tb39/\nvzf+3Kr0CFXHll2TdAuO5ZpOpxXD3draqpGSPJCY/hs6W91yWVl5lCyJ502y3UPrZRRiaNH37t2r\na8De3l51Oh4eHi5g5R534WOZODbnxXg8XtDwqak7MSDemeHjrGOW1Io5/mPtOTg46FmiWRmz9Lf0\nnWVyJot2LAqstE9mmmDBNonrmam6LPzTr3loMBdVqc+T5YLECUxnAct1Wma+zGnEwev3OcOAAR4Z\nT5uQCB2RXBAzOIqbCZ2qHFC+INDM43PZDxmXl2Vc5sTLNm3ew7Jmp6XzvRmXN1v0sxQE3h7OcCEU\nRAfsaDTqKQjxvScXojBh0N7eXl3cNjY20sMEQpyVEu/KHH58RsYouXTpUp0v169flzRfMHlUVwTl\n7O3tnRocQz4zg4aoLBEKkebwSLz//v37vQN4fS5I6sGSmVOU850bnm+obK+tra2UX86EZqexwTY2\nNnr96PnL3dGZMbc432NDY1xEJg0eadKkSZNzJGcGj3RdHg3ocIW0CPC704ifsyOaaBKR40mhIzI0\nJXIt6XyklstdnhpMvNfNSKmf3pNRZrQ8HJ6gNZJRhFyrd6iEVEZqaGwLOoGzJFAZvZHmeAZdUfvO\nzGp3dGbaEjXuzPnGurDtSQtz5x8/sz0zDY2OSmpqtIxCw6Ip61py1CuL9Aw5Pj7uhbdHn9DJF+LO\n0igrLYxsjjACkNS2+Bxa7s7OTq+/yFX3OcSxTNpi3H98fNyzNqK9wuF469at6oxlP3Mss78yi8qd\n3nGNlrFr0rSsmU4j3k+rgXAS3x/jjOvU6upqDYnnddYrg05ZlgyazOTMFm0KsTsyETjZM8ghm8D0\n8GbwCgdqxg7ggklucwYzkKxPXNQDM6S5OefskVL6hyBwUXR+rh9v5eYyB3fXLQamEL5xZobDEM6m\n8PZyHjfbxnH7DDZivWie+mbD6/78DAtn8IIHJvgBEYRXPG+L1J80GauF/g4ulJkJ7qkCHIphvcfj\nccWUB4NBXSTI+WV8QwaPsJ2zAB8uHISYIijs8uXLkuYwSWwgBwcHtVwc64Ry2DY8CCHaIp41mUwq\nfh0L23g8ru1JVhZD4glpZIE+UT8eqOD15VGAUh+bHg6HvfUn6kp4hX3nUBfLwngOloNtlM3XLJ82\n2zmTBo80adKkyTmSM9G0pUVHjmfMcrMmdunM+UIhKyCeSUeTazlulvAaNZZl2gu169gNqbXTgsi0\n50xDyt7FXZzaHJNq8Xvnn1NLde+7s1KoXWSajB/JltWB97C/HEJy/u4y+CKeRW3LnZ6EObw+Do+w\nXqxDiEcTugZGDrNzswkJxPcZM4htwPfHdTolo/zMvZ21t8MhGW98WXtGHSO0/emnn67JmkajUQ8y\nC7jGQ7W9viGERPb29qrW/rWvfa0+P0slwRPSsyyXPHiY7yKER+sti18IGY/H9XSf4KrToqJTdHNz\ns3dweDyfFl1meVM7J0zl12j9zWazU7P8NU27SZMmTc6RnBnlT8rxNmplWfRQpjWRfpNpRZ4Xgc9y\nLZP4t/Oso1ykqDHhTVaHLL8zubfcZTM6Wvxl8qplmmnGNV9mFVAjdU4trSA6ZllvWg1sI6fGMTrt\nwoULVWNg/YhDZ9Fh/C01MC83P7PtmSsls86IKWeWjdc92pD3L7OOQjIHe4i/J+7f39+vbU+KJrVa\n9+9w3tDiou+AYz6jJYamPR6PK/1vMpnU021Yd45fWppRxnBq7u3tVWv59u3bNc9JZpH558wHRX+V\n18spqdSOQytmWdl24QyNa+PxOE3XSj4940GWxUXE89n/GYU3xmnXdT2qYXaObsiZnsaeNTzB+8yE\nznJCe9avuM6Kulnk78iEHcYgA3YCPf1uLrvJ6JsVFx4uhMsmZTaoaVJmbcT6MziBEzsLi+aC6Gwd\nBrksO+iYkgUQhdDk5GaxLMCH9clYQFFHBoasrKwsTDxu/l6G+MsAEP9+GZOAv+Fiwjb2/OTO1Wc7\nx6IXR4Vx0eYm6U5bFy4Yy7jbni97a2urhrlfvHixwhtsD/Ytr0U7x+J8dHRUF8Q7d+4s5LHnJuuK\nkW9Ms1k/a6czqegUZTtTIaPznQ5aZ1113aPEUByHVA5DCEGORqOFYCAqOE5ycGWHZVzWpyENHmnS\npEmTcyRn5oh0SpZzW/1g10wryfjQWW5uapnUqtxxFd9TA8tCaDO+sztP4n5aC645DgaDXnrYTJMO\n8RBuOgXjmTx5xM1qUiEJSayvr/fyJ2fljuuEGJbBCA4zOAzjWigdPVH2+N7bgM/iIar8m3FiOb6W\nQUtZKD01z9NgiPX19V5SIyZ88mcRhuDpKTweLspIx1Zo3Dyo1i25KBfHf6bBhVCjdY68NI+SpFM9\nynLnzp3eocfSYkpcP/Hp/v37KWWQ97BchDvjuyyMnu1AOIHzkrRbP22G1h3HCbV6vp+WnMNghNkI\nw7JtaNlkKYXJ+Q7aJGmemZxplj9OBD9nkIOIJnyGzTEIZhmjhIsFB7DDH74IubBjGXBAEj4X3ygP\n8VpyRDk4s/wSLNdpQSrM8cHryzaoLECJ9Sbu7tDCYDDoLVLZAOd3bv77X5qJGRMlO2CAfZqlPlhZ\nWel59bOcExlXnH6BrCzZtczspjiP2/OYcDE4Pj6ui/5oNFpYGBjTQBM7g7h4X8ZO4e/JJGEgT8Ay\nly9frmHsBwcHCwsSIbfJZFL515FD5N69e/UaITcygFhW9qPDfj7GfENlHZhbZDQa1YUwWzCJU2cw\nLMsiLeZC4ZhhGbgGcGOMfu66rseOi2tUQE5bkxo80qRJkybnSM5E0w5YgSwP37WWaVWZNznTYqU8\n1JRarHNa4/ncGT2Mnc5JOhJHo1GPSRJ/M09+9nyWl1ogy5rt6MueFZJpBnT8ZlGEPFqKsI9bJf58\nWkTuDfffZ4wPN3kdYmEdM+chWQMsA7V5OrsIy7hlQjiL1kLGDKFQo2cMADUo1zKXcXpLKb2kQfFb\nHgLszm+Ob4qPa68Dx07m0HvqqadqGUajUeVXE6aLkPT9/f3qdIxrR0dHPa5xjKXQNhn67tz/OFmd\nEFe8d2NjYwEOZfSgO7Qd/uD4LaUsaMd0tJOfTlgz+pyHBK+urvbWjCgrrXRq0g55EXIj/JvJmVH+\nnJbmIdacUKurq73J7xVkA/FZ2QTlQGVGtmzBZNBDHAf14MGD3oLHBOxupjm1LsPHuUhkWHoWdn1y\nctIbCCG+gPIaNw0uiFkGO5r7lBhEw+GwFwzFRd0XUi76UTf+dajncZ7yzBPPOrIPuOn7ZCVOyM3G\nGQFRRodiiJ8Tssj8ET4OMiyez8+ohlQOYsHY2tpayHToMMNplL/MbxDlDSFUEkySa9euVYw9II+9\nvb36eTQaVXogM9xFePz9+/d7LIt4fozpS5cu1TIeHBws+COIc1OJ4+KdwanZ/ODYyCBO+nxWVlZ6\nxyG6QuebiQe4ue8mS3eRZSvlmpZJg0eaNGnS5BzJmWjaoclRE6ZDTeo75uhkownMBDNhMmY7L0PI\nPdGPa7/UwCaTSdWwl3EtqbU4e4S743A4rDt2/KXG4KYqoZB4Vhamnt2TaaHLeLSnOQnjs/PeaTJS\ns2NipIwXT+2C5Yrfbmxs9IKcMn5u9qxMy2S5qe0/LmE92zXToJY5hDLHalYW/61/75+9XMfHxxV6\nGAwGvTkkLYZEZw69kMyZy3IR/pMeBd1cu3atMkF4NFlAIWSKLMtlnQWehKbtyboyyI2EBYd9lvUX\n51BozKyjWznR3lxH2M50JMb7yf92q5JrE2Gf3d3dBfiu67q6ph0fHy8cuUZpmnaTJk2anCM5E007\nnDw8pcF3/K7rUscDw6rJc+Vu5toeNfmu63oONdfmPCEVuc/xPXdLlsExLD6XjgvmGiYuRm2HNDc+\nJ97r2JuHJGdtQO0jw/kyjJfaLfMhZwmKWE5aM+w7x4xL6UfC0XLxMlJT8WjSaFdq+OwPUvmkRa3e\no0g9JJmOxBCOE1pMGXfZKYNSn97l/RXXSVeLeod2u7Ozs0BR41xyPrPLMg2fY41c9ej/K1eu1KPJ\nXn31VUlzX0/g2FE+Pmttba3OJXeqR7nZBvFbOhqZSpfimjbTwK6urqbOzqy/aCXx2fSd0SEcfRLt\nsrLy6Mi12WxWy8AxHZb7/v5+jRZ1n1z8llz306KOn2jRLqVclvQPJf0HkmaSflbS5yX9iqTnJH1F\n0oe6rtvJ7o9FmJMmJDujj5VhGHDGRMjy73rS9iyogiHPdIz5cWN04pFTy0bmRIp3uUc77uGAo2l1\nWi6MDP7gUUcZE8BDZR0W4m99YfKjsrixueMrc/iSlcIgkviO93CgZmwctq3nMSFPnOOEk4L1Yr19\nEyJk4jCY1A9TznK9SI/GNbMijsfjNKaAmySZQwwYiXJHWXd3d+u4ClN6MBikvHfK41hGIbyXm+TG\nxkbNhheLEPNtc5EhsYBjMb4PyIUc6clkUqGDyWTSg02keUg/N6aQWOg5L6koTCaThbzVg8GglnE4\nHPb615/vRxE6xEOnZTZHV1dX63PZz3QYZ0E0VDQzeVJ45O9L+j+7rnuvpB+Q9KKkX5D0G13Xfa+k\n35L0i0/4rCZNmjRp8g3KYzXtUsolSX+m67qfkaSu604k7ZRSPijpRx7+7MOS/rXmC3kqbu67KUwt\ngZAItS1q3JmpSS01ozll2gW1hIy+xWOTGHW3srKyYKrG+6KspNlJfT60m2EhWeg5d/l412g06mlw\nrokwdHjZzp5xTB8Xcux0TM/ASO2Xdcj6hlo7aXLUZKg1OeXKhZZctDkhD44vhy+8XF5uhx04pnx8\nMa3A2tragoXpNNMYX6PRaMFxy/F5dHRUaXah8YaGGpJZdRmktkyyKMOtra0Fq+zw8LDn5PW0ALTC\naK3SmmEfBbRAbjOhIlphbjWWUnpWJ5/rsB+jqY+OjnoQTvyNdx0eHvb6KSS0Z4+M5XyKayQ2MDoz\n5lDmVM8sRcqTwCPfLelOKeV/1VzL/qSk/0bS9a7rbj4s0I1SyjPLHhAVYmPThI6KcKJkiwAXHqZJ\ndLxsZaWfmjPLW5BhVX4+nLSIp5Ep4pxZcq/X19d7EEy8kxsEFzIf1CwDP2cYKRdVZlPjIsTF1XNl\nLMOGswnOxZVYOMtK77uX3yEwlsvb2jcPx2Dpo+DGxMmU4f7ZQusso2hHbsyZiU44iSyk7DAKLmhs\ne3KXva6EpmazWcVFYwEgd5sLdcY48k0yW9QzfnnXPTq0JHDsO3fu9HLY8LiweD/HbHwmXMH2Irzh\nMCjzr2TQKscpN4soB+tI+ISbJ+tKxYprzrPPPitpfr5ltCcVM1+IuY55jpmQGBOXLl3qpWl9vcE1\na5L+pKS/2nXdJ0spf09zjdrV1qVREh/72MfqwvTOd75T7373u5/gtU2aNGnynSN7e3va29v7piza\nr0h6ueu6Tz78/7/QfNG+WUq53nXdzVLKWyXdWvaAP/tn/2wPEqEzqhYEmgx3QE80FZJppjQtGSVJ\nTdghC2p7GVOFGgOdXdROCd9Qe84808vYEoQEpH5mP2qB2aEAfFemqTv7xJ0fh4eHqQUQ4kwVikce\nOsc+JOPU0sKgczAkS5DEMjD0l1oixxKvEV5zDYvlXsZk4bWM9cQxtyxqTlo87ox1pHUUkqVnuHnz\npqS5s+zq1av1OZmmnUUiZwwFZ1NQU/YEXG4ZO6xTSulBRHyH1IcdV1cfnWS+vr5eIZ9sTBJuIkyX\nxc5FE2UAACAASURBVGXwsOG4//DwMI2VYJ05vgh1vPbaa/Udfn9mTTj7hBaoO7KPjo509epVXb16\ntVqKwdRxeawj8iEE8nIp5YWHl35M0mckfVTSzzy89tOSPvK4ZzVp0qRJk9cnT8rT/muS/mkpZV3S\nlyX9FUmrkn61lPKzkr4q6UPLbg6ciCq/cy3dmZZFohHP4o7O/AB8djw/fktHz7IoSc/x4Rhy5pig\nxkHc1DVO4ojUqlh24sVZgqxM2yN2xmts70yjzd5FPDjDgJ3X7ri6R9U5duyO0AyjD6Fzh9oetfYs\n5zhzKsfzydnNND/ivfSjkOa3LGeKa2yuqbMM/n72LduZmhhzOjtWPxqN0mi/aF8KtW9vey/rdDqt\nc+HOnTv65Cfnhvbv/u7v1rLQf0ONMsrHue3Hv5HSNpvNakpYH9fxfFpEpMp6XUho4Ik7WYQh582y\n/CtxfXNzs+Zf4buYatatDUm9cUj0gLTDeH98v7m5uZB4jfJEi3bXdf9O0n+YfPWBJ7xfw+EwXSh9\nEY3vfeGN69LiIQnOaWSgBDvROZjxbDZQmGZ0khCyyLjNmUPP4R7WLa5li0Q22bkA01yjWe6bINkt\ndIA+jjlB5yIXWjqCYoJl51iurj46nMHN7XhXxozgosmJz2CMLPApg9TY9vw+O36Ni+Qyhgt/50IT\nOOu7eDaftUxB8fuijZgJzp1Z9+7dq2OZJ5lnicV8M3UlhwFwo9GoBs28/PLL+uxnPyvpUb5swiXc\nXLOYAcIBZIQwpoGbuYfq+xz2vuE7OQfZThwHGYSzzNEfv/VQe29bj1+IsmQKBt/LectYhgy6CWlh\n7E2aNGlyjuTMDvYdjUa9ncg1He5U8Zu4NzOxeR/fI/UdCNTgaWZlfGs6HeP+LH/vsjpk5eezeI/X\nJ4M/+CzX9FhWpnF1Xmrcn52CQ60tC0Mn35VULZ4Q41YMw/ddS5T6SabiGXG/O0D5Xr7L6+rtSSpg\n5pycTCZVO3WaVjzHtbjHpRWgEIbI6IV8HjXDjHJKbS9LeSupRiaurq72Igu9vailsp85l6I9jo6O\ndPv2bUnSpz/9aX3uc5/rvZfOflqrbC/OK7ckB4PBwulVUW72WTyfMIdr2owK5elQ7Ac68Dn+vR+9\njdgfnmJgGURJOIsWKsPy3ZnZdV0venKZZSed4RmRNEsyPJCYpXMlfQFmKCpDTZdhqTRrHC9bXV3t\nmS1ZGDFNeJowjueyYxwGiDaglztb7Jct5BlkwYmdsTi4gGf4NBemjPWShYWzPU5OTiq/lrlT6CWP\n52b5uGnOU+Ke0WjU6zs3/bnZsD7xHgrhJAZJhbANoh38b+Z/yWA8b8+MicIxm7Fp/P1RJ/eTrKys\n9JgXMf6uX7++AE05Y8Q3g/F4XDfdW7du6dOf/rQk6eMf/3iFSgi5RRuvrKwsnIPJNqDPhPk7Mmhs\nMBj05pDUh+HI7ya8w1iL+DwajdKNyRf9KE88M+qwtbXVO7gl2obHw2WxI5mPgmkSCLMyjQOzmDq7\njtLgkSZNmjQ5R3ImmnbXdb182XFN6u/MvEaHXrbrMLzZTXQ6Pmh20wFKs4fX3OHnTjp+7zAC68Vn\nLWO1EM7J6khrwLVfWiDUXjPNlfXOohidS+wsB7YxtVE6uzLeMsP2GflKDStz7Gb8XLZxluyLFgAt\nNfbjsmjUeFYWOkytKeMlk1VCq4Dmtodlu5WUWRNZVB7nCK2o+Ly7u1vb+dq1awsMLM4FapTxdzwe\n19zdX/7yl/WJT3xC0jwK0hOh0XLhOCDzIoRjhlx2zn1aT24J8rDpjY2NBSbKaDTqRWRy7pNZE+/K\nTq7hOAqtm/14cvLodPoQjt+MeEA4djAY9NrEj1tkaPvjTmNvmnaTJk2anCM5E007+KUZhkXthVoA\ncScH5Z0e6LiVY4PUErkLxvfU6l3bIs2KWPpsNqvJYDwXRVw7zZnA35IbTU05ywdCJ0f2WzoMl50/\n5+8nxkoNnho5rQVqJZ4Tgv3hBwpHuTgOXHNkWUmny9rS60JrxDFhUuuW4cwZxYyaFv0ZtEzc+Uuq\nV2ZdOqWVuGfG5Y7PxHbZB+yvSLx0//79npYXz4lr4/F4Icr2zp07ldr3b/7Nv9GXvvSl+l7XDKkd\n01LLrE63rqKspNKGZFYjn390dFQ1Yb6HaXs5pnz8cI5nVinHCXFo9hlTMrM93crY2trqlYX1Da06\n/BHkn5+GZ0tnCI+w4SlZuC4XAzIfQmiSkv+YmYw8yJYLDp0cNFsdhpDUOwYo3rW1tVXfQS94lmDI\nvcrxfi4CrJu0GFzjG9MyniwnOCcYze6MEx737+/vL0xQDlhnjMQikR0NxUXXGQJ8f4hzvn1B8vYq\npZ/TPAtrzoI9MqeR88R9rLpywLZztgPLSEd3dlgG7+OY44LFMeELw8rKo+RodJbt7+9XPn2WGuHk\n5KQudHFs2Je+9CX9/u//viTp85///KmHzw4Gg97GGGXgXIs2IFmA7Z1lziObhk7LjHHEBTGe5WHq\nWY52KjDkVEt9B+ve3l4arMbNjnxrsoCib3hoSrZmhYOXcBPncyYNHmnSpEmTcyRndtwYnUOZFspr\nUt9BmTnZsgg/aurkiGbPonZEulC2W3pCoHhHCJ0cNAnppJAWoQU6y8Is5rPoPPQwYVooa2uPjnai\nk87DfeO3TsPzPN9OqaJ27s4wP+nH29gTcNFK4n1s36yOdKKxLI87TYYwRca5zrRr18qlvrZIp6XT\n6LxetDaiLQ4ODhaiJOO9bHOpzyumtp/xoXn/0dFRz8Eo9bXY8Xhcoxu/+MUvSpL+7b/9t/Xz4eFh\nL4rxcfCUQ2pSDmlxPIQlcHR01HOwe3Iq54Gzz+LdzKedRQJnlFemS2VYOed7Vq7MQZ+tTdT019bW\nahj8+vp6tW7CUmUZ6JzO5EwW7dFo1AtSce+61Oc40+SjCcPGyBYON/ulRe++L9o0/Zl5LOMtT6fT\n3onMzun2jIIuvphkJnIWXJPlcvZQfTfdnEucDcQQ3yx90rA9aQZm/HFCW+Sesj2zNqIPgO3ExckZ\nIfytsxE81/IyeCPjVrNszE+etQdhGY5T+hg8NoBMAkJ5HGtZ3pmMueMpHehPCGUkxiwPUbh7966+\n8IUvSJrzsKU5YySDaKTFbI7LQrwJxbCdIugnnrO1tdXL5UGYwTn0HP8cy5yXHkQXEtBmLOo8X7aU\n0jtPMupC/xHHgceD8D3cLOKZnN8bGxv1+sHBwQLWTqWCrJNMGjzSpEmTJudIzkTTDsYANTpP8jSd\nTpdqWNxl47dZmPoyRyedN/EMavXxrtFotADF8NBQloWOFHp96WH2OjpXmM4m31mpiWeZ+TxU3800\nRhMSqqGDMoS/ZZtS+8+8/jQ1abbz+d7ey1gv1ND5XRZNyjYmJLIsgtOfT+dhdvoJYQSG8nsfRBmz\n6EtaTG49sY3ocGZ9M0531I3C57slGdpcaNcOiUTE48svv1zfT5ZFlIWwIduAVo5zwsmEoSYc9+zs\n7FT2FetIpzgz/4WmnMFgnCu+xjg8R5lMJrVtGEfA8ZnBLlyHMniXp/CEhcET69nPtFSpmV+7dq1C\nVS5nsmhHIbMw9CwEnZAF06kyFDrDbon9MXcIWR7ODmG4d4Yj0QykJ5/n5tEEd1YHn8uFhwtHRv+i\niZ+llJUeDSgu6iw3n0kzzKlLg8GgZ65FGUk/o+nPCehmMfsxgyGIj3IisQ6cHFyQfGJ6u9G8dCjF\nF78s+IbPi0lMX4FPWr+PQRvZuCZ0kEFMxDK5CRMa8+cybYD7GGKBJvwXC8FnPvMZ3bhxQxT2x9ra\nozMcl1EkMzYEF7yAJrI5dunSpZQmurW11cPd428s8NPptJdFMr7PICauDTyf0ecln8UFlTlCuu5R\n0A3DzWNj4QbBDTfmKM/U5IZK31t8vnr1qt73vvfp937v95RJg0eaNGnS5BzJmQXXUIsYj8d158uI\n8ATiqSmH0GtLriTNTJr4NEEyB1RmyjqvNMpCTd3NeWrlmdbumiHL4jxrah9RTj7DHX5er67rehpF\nZobRdKTl4vAJoR6WlRZNFtTjLItotxAGOmSsA2qRGXTgbezMC39uxlShU4n8X2cRucPQx4lL9l7y\ncGnlZA52PpcWBtkh0lwzpRVExlI4+oKp8Nprr+mll16SNE8IRQ086spnBXTAfqIFQivLudM8QZ39\nyHFKgkCWloLXAl7IGGCcVxwTPu7jL61Oavgh5JxnztiQgEG8DNE36+vr1Rm8v7+fji8iAleuXJEk\nfd/3fZ+ee+45LZOmaTdp0qTJOZIz0bRj96QW4SdGSIs8XamvkWZ0o2Vh30x5SFrPaaeMsFzEJ+P7\no6OjquU5bzfKku36Wag+tSbSkBzriudn2gU1NM/1G++Iv/zMnNfxfOKD3jfEod1CYMQX2zU+u6OP\n2rdTtbJ+Po3mR02KyYEYUZY5tIlvUzun38C54hy/pBcSz80cY3xvtNHR0VEvDoCaH5MVxfMzq5DH\nVPH7uG93d1e7u7uSHh0C/PLLL9cc2RnOzPvZL5PJpGqi1MqjvZm/POo1HA574zu+j/oR46Wvib4v\navfR3vQHsA/pAyBtN/NdMAw+6hVWyWw2q23LccLxl60X7If47e7ubm0jphGm8zrKsr29rXe9612S\npGeffVbXrl3TMjmzMHbyYOlsyIIMnKftE4yLCCd+Bl10XVcHGlkQvJ+DNn5LxggHXMbZzvJEZB3O\nBY2Ll3ND4x4ulA4jOIzhphsHGXNW0GyPNmZCei7K7mCL7z00WFIvm9pp3GrfkDM4aRlvOcoR72dC\n/CzQg3Ugo4Nsh4wP7XWLcnvZ4/kZt57XnMHARZ/jLzPnSym9MUUIkO+R+gwXQgpxiviDBw/q4uQw\nQVwjl5xjzckAZBy5EzfKGsLgLDrPuQkSqiHTyJ/FsnCcEKrkepD1c0h2oALv97Uh7iXrhetMlCtg\npexM2/gcz41N4/nnn9fb3/52SXNHZLwjkwaPNGnSpMk5kjPTtI+OjuruMhqNFjJ8UZucTqdVc7t4\n8eJC9BCdINQCCRdkFDPm4g2hNsdoQNKVKMsi6KJ8jJJcRiGM51BLdCdblhktnuvPokMlc0RG3byM\nWcgz6XKk25GaxKPaHIagRjGdThdoUq6RUQvNOMi0yFyrdkdRlrSKVlp2H8vLvnWNlp+p8S6zFlhP\nP5DYNfZMa6am7do3241t3HWPUiM8ePCgHkMWjsigALoQlqJGffHiRUl9PnOmsdK6yrj9rDsd1wzx\nZvSmt5P3kfOhCSs5hfLy5cuSVKEictEz5zdjKwiJZc7njY2NXkj7zs6OpEfWBGMiaMFubW3V9SW0\n62effVbPPPOMpLkm75kMKWeyaHsSeHYCBz8Hb3ZaOk0VmpfRAB4QIam3SLsJHM9yorvUN+NCuBBn\nXHNioQ7R+PN9cLGMIdmGRhM7K3eGqXPwRJpcPst5w84I4WJAJoBDOPEsBtR4iDdhIy8rF7P4LWEd\nh9Jo/kp9szjjcvN+1iHKn+WYoSKQQV/0N3Chzw4DIOxEaIBt50dteZt4uTh+eYL6/fv36yIdizfv\nI2TGhYo+ET9ijPXhOOGYYLY9boweaEbOOY8Fc/8Kf+/vChkOh/X5TN1aSql1z3B/xiewbzKojZAG\nM32GMPSe/rQQZ4jFAv3Wt75V0py3HhvMcDhcqCOlwSNNmjRpco7kiTTtUsp/K+k/lzST9IeS/oqk\nC5J+RdJzkr4i6UNd1+1k98eOE04QahdZrl+pn8UvgwkyT77zYeNaxoem2c0d3fnjNKWPj497iW/c\nLHYecBaqSg0vcwBlTktqtNRo6JijV1+aaxxZJB21vHjXwcFBz/SL+maaOpkurDP7izCCsyGoyVDr\nyWAAskeizaPucX/GxskgD2p6Hu4t9WMDHFqKezLHMJ+V/TbqxjailkohlJdpfoSu+OzQjg8PD6tm\neePGjeqApAVKC9KTmjEKkqkPJFUOcRxHFu0g9UPLoyyEmAhpZLAPxxznIplcdIp7UjdGoDps6TAY\nNWI6U5l8jWsLj1fzctHiZx1pIcf7NzY26ju2trb09NNPS1JliWxvby+NP3B5rKZdSnm7pP9a0p/s\nuu77NV/o/5KkX5D0G13Xfa+k35L0i497VpMmTZo0eX3ypJj2qqQLpZSZpKGkVzVfpH/k4fcflvSv\nNV/IF4TUGqnP36X2xF0x44syJwDFaXyzWf+Ukwwfit8eHR31NBHX2j2iMmQymfQSw0hzLYGpW91p\nQ02ckV/LNO3MeUdqFK2G+ByayubmZs3Vu76+3nOGEUuU+oeWZomRXOunAzSzDMjZzVKkLktb6xZA\n1/WTPLmVRMoVedY8vo1aC8vi1hGtIJaPeS6yOmaccI45OuQyXNZ57x5/QPxd0gLmPZ0+OrrqwYMH\neuWVVyTNNW2fQ84pdwuB2iTLMhwOFyw5x7SdIklLM57nQguCKXCJb/s1tg0jF4ljM7lUtFd2CDUt\nPfp3Mn/BYDCo7+BfWgtOPWYbDwaD6th929vepqtXr0pSvXbp0qUFa3CZPHbR7rrutVLK35X0kqRD\nSb/edd1vlFKud1138+FvbpRSnln2jFJKL+EOJzsX6ugEJienCULPNAef84oJv9A5SJiAjj0uvhlv\nPDv6bHt7O3VccaHzyermLc18N8cJ5TzOBOchCJm3mmyHk5OTdFHm8wPGCqiIjJHV1dXeBOFgj7IQ\npvIUAzQ/OfE5qX3wh/hhAtxQWYbsXr4341uzjXzT97LSaUnnYYzfra2t3vf+Li4MXHDodOe8YHm5\naUtzSCQ2552dHd29e1fSHMZwOIlwE9kQhDboRM4WvGV52T1mYWdnp7ehexCLJyGLNlhfX6/1cUgl\nvvcYD0JEnkoiy+5HJcyVJXeIMxFW/CaSRDErKGFYBhVFGz711FO6fv26JOn69esVHiEfm+XONrla\npqXfPJRSyhVJH9Qcu96R9M9LKX9ZUmc/9f9X+exnP1sH6tNPP11pLk2aNGnSZC4vvviiXnzxxZRJ\nRXkSeOQDkr7cdd09SSql/EtJ/7Gkm6Ftl1LeKunWsgf84A/+oB48eJCG3hKO4DWaKEyqIvVTMdKE\nDqE5RccbD+alycnwZ6cNOqWLtC53cvBdpC5lGhY1cY+Wiu/ZHpk5n3HVeY0OVL7XtTVP1xq/DW7r\n2tpa7bvNzc1eyG9oCtTmaPby0ONoA5YxgxTYBrRWHBogR9rbPoMkMi2RdFRCTK7NnZyc1PbiIa1u\n9cXfrL055li+05yWHnXn5vze3l51Dr700kv1cwYnrKysVOhgbW2t9l1o59Soj4+Pq/NxZ2dngbJH\nTvdwOKx1X3aavJ+iQwuCjm6OZVod1PpD0yV3nDRVOi3dSuf4zFIosL3X1tZ6YfnxjmhDOt3j3Xzu\n1tZWLetb3vIWveUtb5E0dz5G23Nev/e979V73/veauX82q/9mjJ5kkX7JUn/USllIOlY0o9J+oSk\nfUk/I+mXJf20pI8se8A73vEOHR0d9TzLvuCtrPRPmuYi4qR2DkgPwZb6C4DnzfBwbs8Z4JiiL5js\nJGK3Un/xW8Z0ITxDiCabrBkuykU7W7CyABHnFXve34ODg3Rj4z0cqDHgxuNxNWVjcDrOnLF1MvYI\n24N9n8EE3BROY5/4e9kfvpA6k8U3XG7CznTyUH1uqNyos4Av5+A7ZMGyBMwYbS/NN9bILXLjxo26\nobLsDCHn4hhBN7Ehs7+2trZ63zuf2fHgeAfZQo6Rs0zT6bQ37wi/OXTKjY8waubTIfxXSqnc5xin\njK/I+OWEwZhfZWtrqz6D61SUhXM/oKALFy5UHvbTTz9dN0EG5XBtyRSnTJ4E0/54KeXXJH1K0uTh\n3/9F0kVJv1pK+VlJX5X0occ9q0mTJk2avD55IvZI13V/S9Lfssv3NIdOHis/8AM/oN3dXX3ta1+T\n1N+ZadYwUinjcGYnppDXmXnkufM6BBPvp6ns0YB0yMQzQvibeD9DXJ1zywx61B7oIM3yWrPc1OTJ\nLfU2oFbm3OuMIUOPu/cN+ax00tGxmlkW5NSSz0rzNtMoMkclNVLCDNTgyCrwMHRnN3h7sg2pubGP\nqa2HmZ+dpk3nuX+W+kmRJpNJ793MMBeS8ccjWvHg4KBq2t7PDKf2NqAFSS2XUb7UZB0CWllZSTX0\nuH97e7vCF4TBOCapWYbMZrPe3I+25lh1h/Th4WGt22Qy6WWe9OhEjm8SFuLv+vp6DwpitLVDfePx\nuD6fmTODe33p0iW97W1vkzTnuRNidOHaIfX55C4tIrJJkyZNzpGcSe6RF154Qffu3auOLdL/mJuC\nWFRG36MjKMMXeU92GgcdlHSMETd1pyYjD4kH09nEnZ0WgDs9uYtPp9MFLJS/pRAvC3GtKISaA9sl\ni77MHDLr6+vVMcV2ic/UoNbX16vWTMyTfGjPt02NhXXY3NxcoKjRIiNmvSwfCX0HGUWS93NMxLVM\nw19GDwxNl3lGQvh+WoXURt2HEe9lutx4J63D6JvIK3Lr1q2aqIhURGkx2RnrQMdrlH9zc7P2LR2s\nxJmpKbPc8aywPBizcHR0tEBh47z2vmHe8Xg+x6r7dyicox5fEEKKr/cD/W0sI9cktlHcd/ny5V5u\nbGnufIz5cfHixV7OEk87S7qnRx27nMmiffnyZb33ve/Vq6++Kkn68pe/3HNKujDbXtd1C789Pj6u\npgYl476ysWl60TMd37OxeHhnPJdsiMFgsOB0ZMPT2eAc7KgXM9z5IuKLtIfuuqPHHWueFCkmUyml\n17ZRr9jk9vf3FzIhzmb9ww6iLIQhsk02g3DcDOTG5yHn7v33TdAXbdadsEr8zZy4Id5eviH6whyS\ncc3pzKXzmYsUj4o7bYMgvHJ4eFgXjFu35mStV155pQd/ZGHgGbOC5eVY5+IWCw4T+Mc1Jh5zbrQ0\nH1N0TvocYDmZ9ZObVIwzLqSEMwmrkKdNp6SLp4SIz8yUSIIB+9bnKA9F2d7ero7GcH5evHhRly5d\nqnVgVkJXQAghjUajlk+7SZMmTb5d5Ew07c3NTV2/fl0/+IM/KGmuMYSmEGYmtZ/Nzc0e6O9aEfNi\n00lHeCVkMBj0dmZyVqV+7mWmdcxCqQmJeMRivJdmlkMxNHnoVHJal7ToJPFwcGpo1E4y7ccdSVl+\nZx795PnL2V77+/tVYxiNRjUMl/xcakqEZeL7kAzyiTKEsO08yRO1UGpYtELoaGQbU+uJ8mcc/gyK\n4Xu9PP5+On5Zd7Y929ktl+l0Wtv24OCgzpuXX35ZknoUP7YbNbdszNDqpMXHcRLaK++jgzc0yv39\n/Z41Gr+j6e8Qp/cRLROHLKj9TqfTajU6lVLq86n39/frc6O/Nzc3exZErD+0sjiHwqJ/6aWXFo5c\nI9d9fX29fg7tmnmxGSlMuCeEdeR8zORMFu3ZbJ7tLU4YPj4+1ic/+UlJjzAwnvhNPJiHJxDby3Cn\nEEIiPGl8WQBHForKgU5ogAPFPc+c2PyeDAR6hVmvLCezL+QUckw5AbNrDD1nnmNOVm42fnDB0dFR\nD9+mKRp4agxYLmasLycgcWy2YdY3XHQ9HwMHv8MuhBfifgZMeXizR6FleSQyBgIhiSwPC8cqJyUX\nzIwpEnJ0dFSx3bt379ZFm4v+MhaRzxvWj7z4+H44HNbnbm1t9WCb6HPmCAlcnaeSEyqkH8P7Y9lm\nxfMgQ5jF0u+T+orV4eFhT/FxhheVtNlsVsPJY/HmvF1dXa0+BId7or1igb58+XJVYGJxHw6HPRyb\nuWCcbeM+FW7GLg0eadKkSZNzJGeiaYf589RTT0mSvud7vqfunLET3rx5s8dAyMx5Rt25syzuk/ra\nnDt9qPHFPcs+S4vaCTVxN/m5W5KnTQ0vM9dpSlIry/jXdFZkWgvfH/f4KfRks8Rfh4Wk/sk1bEOe\naOJaHDWlnZ2dnjMpnkmOMq0r9/p7/b0NqGmzz8mM4DjIHMK0Rqj1ZBYXzfnsWDiG52dZ2vx3rFt8\n73EEk8mkd5p6aLdMvESuOPuUMIHU5y2TZUHtmBx812j5W0ZJElIgu4oEAnL3oyyEQwlP+PhkYiZa\nPIRcGEXJvg8rICzB6XTag3343hDyqOP6cDis94V2PRwOa7a+q1evLiSUcsucY8qTthEezpJcUZqm\n3aRJkybnSM5E05b62tq1a9f0vve9T9Ij58n+/n5NdjMYDHoYFilg8Sxif64xeOQXf+t5SqjlkuJD\nah9x5uwzcVVe8++ddpa1Teb8I+bN3Tqju/k7ow2Ix7rWw8jG1dXV1CEXWgZpjwcHB5XmFPePRqMe\nJh71yhIJsV6O6cW1jJObRbSRXkitOISaNNuYbZil+2U0bmYFETeNMtIBS6HvI6MtRjmkR76evb29\nimPfvXu31j3wU0biXbhwofYTz3rMInp52kv018bGRk9L5Ryks17q5/xZFt3I3CSeT4R+K9JIM78C\nLeTMiXfp0qXUorp06VLvVB9/Pn0bnH/UeDm3A6uOsl66dKlq8qurq/V7+gKySOJsbJBCSUsxkzNZ\ntGNCcqBHqOezzz4raZ5QJRqYJidhiLjfA2Y4cOOeMIdoTjnbICQL1AkhFDMYDHqQSNawdChm35MT\n7kl4WBaHPDix41rGlsmSRPmi4Kbs7u5uHXCz2awmCuLgpfc+rg8Gg16gjdTP2EYTPXOq8gDTrut6\nwVPRBpygbhYz6CgLF6dMp9Pe+IlneP7qKLcHkWSbdNSHTvH4nmyKbMPiBsMxF2M5HLw3btyokMje\n3t5Cud7ylrfUNtje3u4t+uFEC1l2bBcd0sGW8DnIkHK//+DgYGHR5YYrLSoTVJY8tUK0Uyy0TCiV\nEQ8YLzAej3uMINZHWuToexswodRsNusFOcUcYUKoYNCsrq4u5BT3d2Xc+3gXlR3m4c6kwSNNmjRp\nco7kTDTt0LgymCA07e///u+vO+utW7d6IZ2uQTGfNtNzMjQ9NEBqXXR8ZRF6NOd5aguFu6hTuiIX\n9QAADaZJREFU1KbTaZrwJ8S1+8wJwu8Zzp1pqrzPoy8ZXs2EUDSL+UxyakMYaUenUVxnqLJrySHu\nVOERZNT8HPKKspAfHO9gP7MNydf3JGNsr4yHzbajVRj3kO/PvqGWlznuSH3je6Ksg8GgWpiTyaSO\n29CSX3755fr5+Pi4F+krSc8880wty6uvvtrTaMPxH8+kxcSDjEmJpZMuo1NSA4zvnc4WbUHIgilM\no95ZSohSyoI1TIpl5jxk37BtMguWJ9DQoUwLJD4fHBxUTfrChQsLTk1GPbvVyL/xW843UhSjXWml\nvOnwSHQK+aSM2Zekd7/73dUs39vb63nPuVhL8wYkRus4seOXHAQOSTAghjzVjFGSQRQUToQsSICD\nc2VlpZd3xU1/Lgz0qGc4tAfHSPNBQkySE8DhEbZB13Vp7gc/NEDq509huTnxPaSYky7+H+JhwmRx\n+H3x/tgkmYmQEA79Fexbx7ypSHhwlreb580I4UKfMZnIHmFoefTz/v5+hUXiVPWDg4NeTueQuPbi\niy8u8OrjHf7erut6G4xnrSM+z4VlNBot5L9f5ocJ4bwhBMSTzJklkBklM98DNyPPHEm2DscJA3GY\nBiLa8fDwcCEoaDab1XJtbGz08rLE9fi7tbXVy63tbDFukqzjyclJ7T8u+nxvpsiFNHikSZMmTc6R\nnImmHdzcTBOOHeVtb3ub3v/+90uaaxyf+MQnJPW9zLErMqqQzgJCHr6zh2RME2qBNL2jfMtC1z0M\nmBrayclJz/yL8lNTyZyGvMdDwKV8F+ezQkajUc/ZxrZx08u1RSaXimvUIqmVBAc42AzULGluh0lJ\nTq07S7MkT+ynzJmVZRfMtExCLR4FG0KNNBtrmXbM8mb3L+PQU6snPBIMqsg9f+fOnR7sE31D52dc\n297ertozI4k5tuhUz6w7jm/OC7c2efwaoRS2CR2zDkPQUck4gc3Nzep4ZbsTEvG0EGxDQguMfqSz\nNdpgOBwuWFeEMTY2Nuq43d7e7jkg4/4QEhOYnzwjVBweHi4wdwgX0TLO5EwW7cCJnO5FGQ6H9Wie\nF154QV/+8pclzc3DqCDzFrDDnEJD73+2GGTvj3JmZmCGUXHRjoWDnUjTnmYiBz3NJYd4HDbwDYAD\ngp7+DKPlwsHzInmiN/NjxLt4hmQIqWQ04wJKmU6neuaZZ2pdHIPl4smNjewMQkCE1OjHiGvsGy83\n23OZT4WYPP0BnjqYWG7Xdb3+8YnPcZLRMQnf0FS+ceNGxa95vFsID40gNBZtPBqNem0U72Aif8IE\nGdyX+QWeeuqpHiMj3puxl+L+o6OjXpZBnyOuPJBF4VAg+46/zY61I/TEjSPaczgc9tol3kEljYty\nLNSDwaAqJqwX29BpuXxulDPa0FNJsNwMWsukwSNNmjRpco7kTDTtCEPNwrm5W4cW8F3f9V36nu/5\nHknzfMHhoHQnWPx1DScLIInvXYPhTslyhTB8mtoBvcyZxkznYqYFU6vJAmoI27AeGdeYpiwTQ4W4\ng8mz/HFnzxxyrAvziDMggHxXamvxfTicvX2zUHm2UXbga+ac9DiALCsinUJubVAry9gl1CypYS1z\nerIs3v+0fA4ODqrzkSerM/sc68D+j+cz42W0EZ3itLgIq8Rc4D0hZHHQCmHfhJCnvSwBEllA8X5a\ndzzIwRNwjcfjqqFnc4Ljk6wpwjLxTB71Ross7rlw4UKPjx2W2HA4rJ/JwKHz0ccBrWmGz29sbCwE\n6tBqzZhUlKZpN2nSpMk5kjMLY6f2m2m00qOd9erVq/pTf+pPSZo7KL/4xS9Kkl577TVJfQyWO3Ps\ncExGc3x83ItUit2ZOzeTT3nkF3d5Os4YlsodNuOIhrjWTivjtARHGd2N2gU1R+KA5I9Tg3fnDLFS\n1pGaUuB8V69erbzfo6OjBcx3e3u7px2HZhf3kCZFbI+aclZW53RHueggi/eynTMfAS0LvpM48Wl0\nNlIseTJN5qTmqS3swyjr8fFxz/lIWmu0MS0j72emeaBzmuOLfGZG4pFuFkKLLotGjb/j8bgX3UtN\nN8Qddd4GDI3nAbwMtY+/tAo9ZJ6HLtP/c+nSpWqxMDEZx1SUMa4NBoMeDzvuIw+b91OT9jnM+Ar6\nb8jvpnZNf9VpmvaZLNrBa+UiFBXMOnMwGOi7v/u7Jc2DB97+9rdLmicil+aQSXiYb9++veBMoNle\nSunxjR0+oNeWphWZClz8okMz5wsJ8lxQOJE42djhPim4KDOXbwbFcIHmok2n52kbJt+VQQPM3Pfy\nyy/XNrpy5Uo18+K9d+7c6eWUiEnDZPAh29vbPUZGvINn+NEcdtYLIQuyS7gAk8HA+vrmSt472Qgc\nG2RAEE5yqI0cZy5i7KMYf/fu3avOx5s3b9bNjewBwmxeH57mTmf/yclJ7RvPcR7lcibV8fFxrTfP\nO51Op7256/XhospxFG3Lgwm4oFFR4FxxR6QfnBH3xcZGOIHzjqHhWfg825mZ++K5pTw6LIOsFMKS\nrJfHZXBME/6gUsk5nDGdMjkTeOTjH//4Wbzm21I+//nPv9lFOLcSFlqTr1/iEO4mX7984QtfeEOf\nfyaa9u/93u9VuEPq71o0HbmD8fSHcGK95S1vkSS94x3v0Je+9CVJc9MqBlgWjk7NktpHiEd7ecTk\n+vp6z/nDndMj+EgPzHjaNPcZIUUNklpq13X60pe+pPe85z0LWibhBNceTpNSygKcFM+I9zpMQToe\nTdG1tbWFrHKz2azmf37qqaeqBsOITXK26UB1xxnrRcdWFkVJR2LwqP/4j/9Y73nPexZ+y/ERzxqN\nRr0UB34aDJNXeXZB14qo+WWc78lk0svi98orr9TP/tu1tbWU4kiLKEu5wDpEe4fVE+IZL0t5lGjt\nzp07NVf0ZDLpHTEX7cb+Yni6l5VUWvYnIRGnWHq94l1MSEaNlg5Bxje4A5SJnaS+QzfaKn67sbHR\nS0Hg1F+PJI7rn/vc5/Sud72rB8PxWbSCafmT2HDafD6zMHapP8G8ATxfBKEB8kWleSBHcCb5/Aj9\nJXZ9eHjYW3A83Htzc7PCJ/T0E9Mk7sRyx7M8JWPcn0EOWY4EwiMMHY7viZty0HPAOO/TeceZEK/l\nZ8/bwUWKmOLdu3fr4hITjMEe5BBzg4pNdmtrawGjZVk4KQgnLbvH8UEumITnCG847BSSMX+oFGTl\nCuEC7/lmpLmicfv2bUlzqI/YrMOGPKmcz8hYRhzfFy9erP2QBTDRn8Bne72jDjFHOF/j82QyWYDJ\nOE6ysH9u2K5kObREuIApYQkRMRsfF11nVTHd6qVLlxYCZQaDQe84Ma5ZGQ+fm7rHi0ynjw5hGAwG\nvc2C7RBtxLHypsMjTZo0adLkmyMl845/U19Qyhv7giZNmjT5NpWu6xZoJG/4ot2kSZMmTb550uCR\nJk2aNDlH0hbtJk2aNDlH8oYu2qWUv1hKebGU8vlSys+/ke/6dpBSyldKKf+ulPKpUsrHH167Wkr5\n9VLK50op/3cp5fKbXc5vBSml/KNSys1Syr/HtaVtVUr5xVLKF0opf1RK+QtvTqm/NWRJ2/3NUsor\npZT/7+G/v4jvWts9lFLKs6WU3yqlfKaU8oellL/28PrZjb2gsX2z/2m+IXxR0nOS1iX9gaT3vFHv\n+3b4J+nLkq7atV+W9D88/Pzzkv72m13Ob4V/kn5Y0vsl/fvHtZWk75P0Kc0prs8/HJflza7Dt1jb\n/U1JfyP57Xtb2/Xa462S3v/w87akz0l6z1mOvTdS0/7Tkr7Qdd1Xu66bSPpnkj74Br7v20GKFq2f\nD0r68MPPH5b0k2daom9R6brudyTdt8vL2uonJP2zrutOuq77iqQvaD4+vyNlSdtJ8/Hn8kG1tqvS\ndd2Nruv+4OHnfUl/JOlZneHYeyMX7XdIehn/f+XhtSbLpZP0/5RSPlFK+S8eXrvedd1NaT5gJD3z\nppXuW1+eWdJWPhZfVRuLmfxcKeUPSin/EOZ9a7slUkp5XnOL5Xe1fJ5+09uvOSK/teSHuq77k5J+\nXNJfLaX8Gc0XckrjaD65tLZ6cvmfJb2r67r3S7oh6e++yeX5lpZSyrakX5P01x9q3Gc2T9/IRftV\nSe/E/599eK3JEum67msP/96W9H9obkbdLKVcl6RSylsl3XrzSvgtL8va6lVJ34XftbFo0nXd7e4h\nCCvpH+iRCd/azqSUsqb5gv1Puq77yMPLZzb23shF+xOS/kQp5blSyoakn5L00TfwfedaSilbD3dv\nlVIuSPoLkv5Q8zb7mYc/+2lJH0kf8J0pRX0cdllbfVTST5VSNkop3y3pT0j6Tk892Wu7hwtNyH8q\n6dMPP7e2W5R/LOmzXdf9fVw7s7H3hiWM6rpuWkr5OUm/rvnm8I+6rvujN+p93wZyXdK/fBj2vybp\nn3Zd9+ullE9K+tVSys9K+qqkD72ZhfxWkVLK/y7pz0m6Vkp5SXP2w9+W9M+9rbqu+2wp5VclfVbS\nRNJ/Ba3yO06WtN2PllLeL2km6SuS/kuptZ1LKeWHJP1lSX9YSvmU5jDIL2nOHlmYp29E+7Uw9iZN\nmjQ5R9IckU2aNGlyjqQt2k2aNGlyjqQt2k2aNGlyjqQt2k2aNGlyjqQt2k2aNGlyjqQt2k2aNGly\njqQt2k2aNGlyjqQt2k2aNGlyjuT/B5o356YoiwKpAAAAAElFTkSuQmCC\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "show(transforms.Compose([\n", - " transforms.ToPILImage(),\n", - " transforms.ToTensor(),\n", - "])(img))" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "torch.Size([3, 256, 542])\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAC/CAYAAADuOyeQAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzsvV2sbdlV5zfWPmeffT7uvVU2pGyE7TISDaaRWs6LeWgE\nNmnlQ2o1ch6sTkcRxEoUgVpCCg+N+wUlikSIlEYhUktRhyA6ogUkkEB4aLktxEcLAt1RghK5Gyyb\ncuGyq1xVt87n3vt8rjzc+1/nt//nP9c+dlE5t50zpa2991przjnmmGP8x5hjfqyu7/u6T/fpPt2n\n+/SvfprcNQH36T7dp/t0n/5i0j2g36f7dJ/u0zdIugf0+3Sf7tN9+gZJ94B+n+7TfbpP3yDpHtDv\n0326T/fpGyTdA/p9uk/36T59g6R3DNC7rvu3u677l13X/WnXdX/nnarnPt2n+3Sf7tOT1L0T69C7\nrptU1Z9W1b9RVV+uqn9WVX+z7/t/+Rde2X26T/fpPt2nqnrnPPSPVNXn+r7/Yt/351X1S1X1g+9Q\nXffpPt2n+3Sf6p0D9G+tqj/H/y89vXaf7tN9uk/36R1K95Oi9+k+3af79A2SNt+hcl+pqg/g//ue\nXhtS13X3h8jcp/t0n+7T15H6vu/S9XcK0P9ZVX1713UvVtVXqupvVtW/5w999KMfrR/4gR+oq6ur\n6vu+uq6ri4uLurq6qq7rquue0Hx1dTU8M5lMquu66vt++OgefzP/crkcytC1rutqNpvVZDKpjY2N\n6rpu+L2xsTHc07WquvFs13W1sbFRk8mkNLmsa8qjtLGxUX3f16/92q/Vxz/+8SHP5eVlXV5eVlUN\nbWc5ao/yez3Ku7n5pCtVZtd1K9dYLq/rI56Sl2rzZDKpy8vLlbqZ2B9+X/X+5m/+Zn384x9fuea/\n1Wdqt/imxD5WUj9MJquDTbbN+aJnu66r6XRa29vb9fDhw9ra2qrZbFbT6bQ2NzeHvtWHckUZ8KT7\nl5eX9bM/+7P1Iz/yI3V+fl6Xl5e1WCzq7Oys9vf3a39/vxaLRb322mu1XC7r5ORkoPny8nKFTsnU\n+fl5bWxs1HQ6Hernh7ojesmjyWRSV1dXdXFxMfBfekVZVn+z7Syn67o6OztbkU/p2GQyqd/7vd+r\nj370owNPLi4uhrxciJFo3dzcrIuLizo7Oxv+U8ao27quvnUZuLy8HORla2trkOUkM2wDy9a1ra2t\nAaPIc/Hh93//9+t7v/d76/T0tLa2tmK79RG/Nzc3a2trqx49elTf9E3fNPCb5fd9X+fn53V2dlaP\nHz+uH//xH78hc0rvCKD3fX/Zdd3frqpP15Owzs/1ff8v3om67tN9uk/36T49Se+Uh1593//jqvrO\nd6r8+3Sf7tN9uk+r6U4nRT/4wQ/eZfV3kj70oQ/dNQl3kr7jO77jrkm4k/SRj3zkrkm4k/SBD3xg\n/UPfYOnFF1+8axLuFtC/7du+7S6rv5N0D+j//0rf8z3fc9ck3El6FsDt/+v0LLT5HQu53CZxworf\n/ruqVial+AwnvvhcmiThhAzzagKKkySceNPEXGtXrU8IpglCTjJy8lHXOBHj32kSUhNgyscJ38QX\n/vaJz9bEXuKFl6f2Om0+2ao8aTLKyydtbFeijxPniXanl5Oq7AtNnvEjOp2XXm6ajCMdnFjU5OZ0\nOq3pdFoXFxc1m80GGdTEuNcvPnDiUs/wOZc3p8vbnnRCtCfZIy18riVrSbeVOLnc6h+1jfpC3iZ+\n32b3O9vhbUg67DLr9ZAe6jNlPpVJ+V0ulyuTopTN5XJZy+WyHj9+PNquZwrQ072qm8pLwPKZbwGd\nl9kyBGnmnECf6GmBlZetejmLzw50QSSNrTazTAJ6AtiWkqldTpPT4O1KtDiPHdBdIR0oyMdUvhtr\n1sP2clWIJy+PdUppCOoqj4Y88SrJaEtuucpEKxs2NzeH31oNkeSHBlirrLxfyG/S6zS1+jwZ+dRH\nLI9laVVMC9xbNDgt7uis0+mxvhnDFfJrDNDHZM77Wb99RYvnVdsI6AJtX2l0fn5eFxcXdXx8XMvl\nst58880bbWK6c0CnZ+WCWHUTYNz6uUDrOu+3Ovfi4qImk0lNp9MbAksB0hIv1e2CwyV2LcFS+RQQ\npyt5t0xjXgm9NQqPL9dUOxLI+e9Uj/932glebsxIZ6uPRDPLdD6zTO8r0ui0pT6T0pyenlbf9wPI\n6p63K/XLWF3ihWRDsjabzWpvb68mk0mdnp7WZDKpxWIxGBbxiv3Ydd2wfJX9Tf7Ru3XD731Dfrh8\nsG/YFk+6zxFF4hWXPNKh4LJYtrVqdWScljKz/0k7607LnL2fWIY/R93xUTT712WA3+p/r5vOznK5\nrPPz8xUHY7FY1MnJSZ2fn9fx8XGdnp7W0dHRjT5gulNArxoPs7jlXFeOe70JpNgZEqK0npgKQsWm\n4Oo59w69Q0m/8idvND3v/PCUhEnXZICurq5qc3NzxfNpAffXm5LH5YJPnrYMiK5R6D2s5Hwa88QS\nLW64Ly8vB+PuozLv79vywelie+SlT6fTury8HDz02WxWy+VyeP7s7Kyq6sZIgQDn9VC+UsiG17yN\nY3KbeCmv3NupsqQXBNh1Mq57/iyvJQekZWhTyIY0JsxIIUbiSXIyVYdf02/fqyIaRPvFxcWwJ+Xi\n4mLwyo+OjgZAPzs7q9PT0ybfqp4BQGfDUiyVYYWqDL4JSBSP5P2qGjbjnJ+f37DcLuTKJ4VyD0Ee\nhuKi9J6cJtKlOlg2jYb4oWG4nqfXocSNHfyvMunJ0utwRRTgt4R5zLDSqOk5N3ouzB4q4m/l7/sn\nGyq8LO+bNHLyZ8lzyhs3ssnwKcbNZxWOUX1jiTxnmwnmm5ubNZvNqqpqb2+vqqp2d3fr7Oyszs7O\nhk1Gys9yqlb1gA6Fgz49ftFEo+X9yw1F1D13QqqeyIyeT6NclaHf5HFVrci+g63rpTbqtEKVbtyp\nL36P4T8fVVCPq65H8f5sMirEGD5DLGA+ydTFxUWdnp7WycnJsPlMo8blcrkC5MK0VrrzkIv/J+PU\n0dwl2bLWfo1CPJ1Ob3QqlUOW0Yd9FEKWS6ViCCGBuYO6C6N7By7YKZEegoWSGxbftUpvLXk2Trcb\nEwdhKg1pE4+8zQRZL0/0u+FkfvItzXO4opG/bCNBS8NdAXrLoKcY9boJM58U7bpuiJ133bUnvlwu\n6/T0tLquq9PT0xWj5+EV1pNkKNHPtrvBaekVUzIEah/BlSEV8kn/0wjV+zbF0hNN7sjRKUkOlcrn\ndQdq5k36NeacqG+pn9Qz0iAgPz8/r5OTk9rf36+Li4sB0M/OzgaPXbjTcqqU7txDV1JHUJl9Jt47\nSyl5lXyulZ+goW91hgucKyj/O6irfBdmPst2+28KqK57e5OweF6GlJIHm9rJ615n8jSSYV2X3GtJ\nxs3b479Vl8e5CWatur2NyiNv6eLiYuXYhRR2WdfOZCirVldraXSn8IsmSuUlujFszX20gFP1ucfr\n8kdQcrBPbeY96iufawF1knfVq2vkO3mWaEq64uDpvOfzLIeyQJBP7U8y6g5WSxZVz+XlZZ2dndX5\n+fmwikUgr5Fa1fXRJy2ZZrpTQHePj7FCMkvg5BOo9D4ImMrHemQRfTik6ypLQywpNO+xLIIJPWLR\nIvrcK3GBS0LGPMkjIQ+qridd1P4U7kgeiYMwadZz3g+tfG7I9DvxiPnS3IPLh/dpih8TlPgs+y8Z\nB4YFdH6IwgcEV49js5wWeDtf2SaNGieTyRB62d3drYcPH9bGxkadnZ0N/JJhZgjNQ3f87c7KOqPo\n9LlBEG91TefIiB6FfJKBIVC6vCd+pfCqOw1ufNwIcXECaU+hUI9/u9HyunlP+XjmD/mh9ia8IYgr\nxKaQy+np6RAxYLuS4+rpTjcWuRVteYu65mCQPEkvS6nVoVV1o1z3ImQh0/WUb8wTSe130EwK4c87\n3yRQref4PwkZ4+r+SX3FtrW8J7bd86d8rpi6lvjZ8pCcDrYrtSGBBUNwPt8w1tZWch7IwOijmLq8\n9K2trZpOpwNQEJxT/QnQWXcKW7UAyhONh5JkLTkGLUBnXs+zjldJl8ccHc/bkg+/5zru5Xpy/aBe\npfySrfPz8yGkot/8UO5SnWPpTj1095poiWiJ5Q1wA1Dfr67JFRNo1TmM7vt+WFVQtTqho+fOzs5q\nNpsNwkj63JPRtaTw7llUrU6MufekiVq1V/VKAFxRqQycbyAPVW4CQ5W3ubkZQcK9tZZn4DSpr9gn\nnERsDWOVl32TJnsZQqJHKB5pYlfXkoypfioMvVgNg1XedDodymH/i7/OUzoNiU8ESOWfzWbVdd2g\n0PLQl8vl4KX55CZlyGPANFKUgxY4utHkiI+6Jdo1MceYuhuNrltd5aKyPDbO/H6NckR9pt6nyfUU\nHmwZBd3zOLeePz8/r5Soz2pj4jP7Tqe+KqRyfHxc8/l8mBBdLBZD+Rw5SM/TghBPdwroaagmxvhQ\n0b1Pt/RJiQiSusdhUVIyXk/eA4dBFBgakzHAHZtAc6BzwBwDabaTxsWNR/LmSH/iK4fJXq//dq+F\nfGAbuLLB+Uyl59I4rUxKSs9vX6GRZIR8IQ2aIBVwyPBrdQrlbx0vUn38VnkyGtvb23V2dlZd19Vi\nsajd3d2aTCY1n89vOAPuYLDvvB+8b5wW5fcRJ/PQKxdvHVyTwVAZlAW2QW2SXvkSYMmmH1stmp0f\n3hecb0p9kgxC4hH1yg2e67pW8VCWtGqFSxCPj4+HMIu8ddfRpI9j6c5XubgyO/BWra7K0DNk8Lry\nHehdSHSdcUGl1kSU7umavp2u5AElOtn2BAjJe3B6WtdTHbzm91t9Mka/K+pY8jZ58hGBy4F7Xet4\n5DS1gEfPSQY0QUrvkAqc2jymcA4m9PA4QcodpO6Vr5sgTNdbfHa9SLLLdrU8xBagJ3oSLWNy4/3m\n+Vr93HIwUt4x2U/1uAPo+u8Gg2E8AbuWImpDm5fjfHKj0Up3CuiaGCITW6sWqsbjuPTM3KvQM0qt\nGKBCM6qLs8sMyVC5qq4nRvTb7+l5tUOeWWv4ycQhl3hBT1c0O00qU3kYBqJhZNgneTuigfznMN/7\nx5NPOum3h028H3Vdz/Z9P3iyDAGQLuZJXhk9bXpPzjeVoUnSquv1yPT8U/3Jg6Qcsb+VT5Ow3B/x\n8OHDOjs7q42NjSH0ohcpKD9lnRP43seUA8qCe9zsh8QXttuXlnq5VavhCnmtyuv9OJvNhjZQBt0h\n8pFoAmUPV7g8EkfUftHj7aP++ijb+aPRs0ZZqlsTncvlcmX3p3aHOg9TaDLpd0p3vmyRneSgpUQF\nZ1jDwbvrupVJBW88FU+gWlU3FIPgS2V0ul2BGfflPbe0FH7S5mXrOjs5eak+9OSQz/OLnx6TptKQ\nHvd4vO1uyJxP7FPn31g+KnQyzF5uGn67YdJ/huLc4JAOPxJA9x1MGE5yQ0jQ9285L5LFy8vL2t7e\nrslkMih81z0Jv2i47vFh1e9hAA8BulEmkHmfJGeKoT7yOtGhslKbNzc3V/Q5yb2DOelN9fp/N6Kt\nj7fVDZLut5xKtYdhG+mVNqxp8lPrzBUvV9t0MJs2DrkzkqITY+mZCLm4INELoheh/3rGvTF2hO+O\nc+BLnjv/kzb3cJl4LYEIBUYpeT98lpNN7klw2NXyjP1+a6SSDFUrJeVT3Qkc1pVLQPcDp1K9Lb6w\nXt9Aw3qUaLxVjhsPXaPHrCWvWsrIuhMotYx1uiaAm0yulzPqNEa9Fs839bAMgblA13deJgeB18cc\nHzopbJ9+J6CkTjoAjo1Ek0fdypf0qtUPLRD35PME7gyMOSwcAQnIJT8Kr8gw69V6kidNwpNu9lFq\nayvduYcuQZFFc2t4dXU1DEkJ9A7iBF6VpxUWnCXXSgjfUu6eBUGQCqI6tBWZnpF7asmqsxzdEx/Y\naVIiV/xUFsth2bqeAJ2K7QpOAeXz7vEIPHwVBPtlbPTlRralZOxzttHByEcX5Bl/u6I7AKssArrk\nZWtr60b4xEdcqR3JsJAn9Pa6rqudnZ169OhRTafT4fAuTaSxb1WWQCTxTfW3nCHKLEdAHhpzr9dB\nnv3kXraSVvF03fVksPOHoSOWlwyGj3TFFwdfdxZdrhN4S7bIN/0n3ii/5lwUXjk/P6/FYlHL5bIO\nDg7q6OhoCJ1NJpPhgDatBmN7W3q7Lt05oFMJ3JMYeyaBmVtR/eZwJeVnOS1l1Me3NbNep1v30vXb\nJgfvFo23rYNAlLwXp7tVvvOyVXdq/xi/Ul3u/bfoW1dOS/ETffqmwSIY3KbtY2lMVieTm4d36XuM\nz4mWsVGE60FqV3IeyD+vs9Uvrt9+fV1K9XjfOSa0ym7hB8twZ0cgrnusX+AuQOcacwH6YrFYOYBL\nsuXGyelKo9+xdKeAzsZworDqSWPEgJZHwXKUR98+WaHhzdHR0Q2v31cS0JrruhitN4dXra6IqLq5\ntNK9QLf8XFfPQ4CSd5AmvFg347G6Rg+KS6LEJ4+5jgEV83k/pN238giTAfS+UWJef2s6PUA+y5EB\nJ9RJs7fH4+9ePtutSTqdgqgYML1R7mlQf7acDCb2Az1gtX97e7u67skE6XQ6HQBCIweVwWG/Yv3O\nL4ZldJ/HG1A2uaFJIMUyJPfcY8C+Sx608vneET6jj+/K9glD6mMyTq5n0nEep8D+oD6RB26w9M3l\nq9wMJNCez+fDcbcHBwfDXIzAnCuntKgh6Z/q9P0ZY+nOt/5X5Tg2hYyekpSmKg+FOInjHV9VQzxS\nxiIpAYWtanXzBu+5ZR0TMhd+go3CQ6qfk1wuTGqv08jtzglEqDQEWXqFDmgOmt5vKrc1yaX7qUzR\nwhMleZ+nYbqRJYgLlDzM5PW5J5pkTXk9jKdwhgym5IUbmbysJHstYNc9GmP172QyGcIUWpvOMz58\nRyvlJ8WyW2DIRDlw45Seq6ob+lK1uoqNMqsyp9Pp0AbqCYG16uYGpATges69XBkRPyPKnSIZklZ7\n1T8uFwyvnJycDBOgh4eHdXZ2VkdHR02jIyND/PJ+YXzdeZHSMxFySQqWEhXjNuWmvOxwMZKWjx6d\n8hHQGDdLtKf8NEKp7f6s7nkbCGpJmMf4khSadboxcN61vBWns+UVp/JIv9/3eL3zTNfdmKayEpi7\nEWH5qQw5AXIE0ua3VmrxwRPbpBFl3/fDq+p0JAA9O29jiw8qv2XYnF6Xq1Y/jbUzGQ8+x75LTgh5\n4vW1RjypX1sy2srfaq/PA1EmfH25QJ4j4xSr93i+85NGmY5lK70tQO+67qWqOqiqq6o67/v+I13X\nvauqfrmqXqyql6rqE33fH6T8PoSi18ZOp2UlWGgilbvMWh2hcmXxZJFlebV0iJ5u1bUyqxxd8yNV\nlccPyaIB8c70VRuuAGk7P+twT1sCoGukgclXgzgtnp/C5EcZE9jpPXu7eJ98IC3qd8rF5eXlynGk\nKTm4iwZ5XIx/63kHF7aBPBDvJGvT6XSQIdVHz45yQ3n1CXKCWQpB9f31yFHhpwcPHtSjR49qPp8P\nrybz1TcCGdcf6kfqHw97iDbKk0+qkk9yiq6urlctUX5Upuuo+okypclSB3l+syzV7wCpMlpOV9Xq\nsSKqlzJMPFCYS9jDc8z5fXBwMOwAJQ94HIXK4WS76xB5QnrH0ts9nOuqqj7a9/2/3vf9R55e+4mq\n+kzf999ZVb9VVZ9qZXagG/O0Usfy3m1S6nCn5TYC4fl80syHhVSUljKlstmppIVAsM6rGeMPhSN5\nCYlup8EFzvO4d5X40KKbtLW8s7FykzyN1d/KJ7lR6EVgyjDgOr6kOrwtTh8nSAXafnhXS/FTeyj/\n3me34QNpTPM9+u0GzmVqjF46Wut4mWSSPEx0E0N8ziv1gd+TA8kDtvTRefZ+wBZxwEOQ3o50z+dl\nxtLbDbl0ddMo/GBVff/T379QVb9dT0D+RnJr3WqQg5qSBIfDT3k+/rYiB3OPk6oOPz6Xnhi9G/2X\nUmtVgh/RKxo9xi0axoa07i2xXvdsPIasZ1mGC7Q+9KxUpuhzxeL8gXudyeiqfxj39qE2+5N10LOs\nup6M4uFdzEvPraq9sSfFWVvJwXCxWFTXdSvzHjpO1kEueVWig54b+5YTeQLz7e3tqnoy/6PzXfhS\nDIVgGHv2eLz3lepQn3N+ymXOZcrndrquW1lbzaXIejZNjnMxAuvhTl53eAiMbAd11eeqWLbaJlrl\nIbf6zp2Sq6snO8Y1Sjo8PBzCK2+99daKwee8xnQ6vbEblu2Q3HrEgqPTMQdB6e0Cel9V/6Trusuq\n+u/6vv/vq+o9fd+/9pQBr3Zd90Irs69w0DcnK8V0DlcoEBIwNdbBouWVuPEQQKosDltJI4FEgMH1\ntK7Ivha+NUmo51OM3wFI+QXGare8BwkFQzNsr08QsTwXmrERRzptMAFBMgy6zlCP8nGykXzyzWLe\nP2wf62LIzcHNh7C8TyOk/33fD283Ur9r5QsPkErzLk6rt8FlUcsWr66uhjXLFxcXtbm5Oax22d7e\nXjmlj33FMiVDNCAEEhpB6iUNK8OJ0jvuQfC6q2owPGy7QknOc/FIu3MZQtM9D8dxD4g7KSp7zLNN\nCw8o4wRlhbPm83ktFouVExN5+Nbp6enKWfoqV0cCEMME2hsbG8N+A9bnjpjzztPbBfS/2vf9V7qu\n+9eq6tNd1/1JPQF5plGT4p5n1c1h2VgSgzxG6oDpAu6g5XWvSy36kgC1wKdV5ttJboBcWHmvlW+s\n3NvS5+3lqguWk+pt9cFteMj8CSi9Da4wySslGFbVikfsG6vGnIbbJPd8BWDyuvVCaYZeBIA0Xi2+\nJjng6I488f9uYN1A87rq038af7bR9T/Rk/iS7pFez9N6nuUlnoke9bmvNdeHHnnLeeJEP5/hJ6Wx\ne57eFqD3ff+Vp9+vd133v1bVR6rqta7r3tP3/Wtd1723qr7ayv9bv/VbQ8NefPHFevHFFwerpqE6\nd5a55ZTn9ODBg+HkMloyeR18a41WCWioJW9ciQqUPPuqVa+TySfjWqEFj12zc6+urgaPzwGiZaRI\nExWEw3bW594LvQL3LFVeKtsFjffdi6J3R8PrO+SYv+rmaXYtoBbv3Oumt+4eJssjD9hPzlfFTyeT\nyXD+hrbpqz6Nkkhby6g7oFDZRQtHWjs7O1X1ROYVehG4XFxcDPcdHCUHbCe/tRyT9XNCWXpDurkC\nIxlFBy3pmniqXbfyRjnaZH6GUVhWctz4jK9n5/Np3opLizkvJuOtd30ul8s6OTmp5XJZ8/m85vP5\nMALn6qSqGkKxm5ubNZ/PVzxv9jMxx8NxWtfuCw5S+roBveu63aqa9H1/3HXdXlX9m1X1n1XVb1TV\nD1fVT1fVD1XVr7fK+NjHPnbD0q/zAJMHRYDQM+yQlJ9KlmLkrsjMT2+stepDzxJsPHmMN4VjSKtS\nmlgZ41fyHll+67rzSf/ZHrYh8UrPJMOU6tPzfuxBCxBT8rDYOpkaSwn8dN2XMoovaRWR09DyBv0Z\nhXIkawIIfTi012jBjQRB8+3wIelbkl+uVqKOVV2H09xLpfFMBrtqNaySnCSXUedlSq1wJtvtyxJP\nTk5qPp8PxtT3AYg+xdvpLNGBUD5598QItkWrm3Tt1VdfbfbT2/HQ31NV/0vXdf3Tcn6x7/tPd133\nz6vqV7qu+2RVfbGqPjFWSPJydT0NT9xLEMMpJFI2ChOZ7GUxZKPnKIyMEbIcKhtHDFWrSkxPIXls\nFGQKY5o4cy9D7UwGQ3TIY3RQdE8wATc9MdWp++Sxe0EEPgoy+9yNOHnr8XmWl7xB5nfvx/nmz7Jc\nL7OqVhSW13ScLdeHczTiAOj8S3UxeayVMrq7u1tVVTs7O8M7SXl+u4982Mcuj9527yuXV9HhbfX6\nNEpm/4tPLrMeKqJDRVn0ZcEESO9rl+WWDrUcPxlBvVXo6OioTk9P6/Hjx8MLnQX2PlrR/9PT07q4\nuBhGcGoDQd4Xdei+yyzloZW+bkDv+/7PqurD4frjqvprX0M5TSKT5+jg3Pf9sPKgajX2qPw+0UmB\nEwO1Rp0HHFGBOHQlTRwiEfxJo1bOeJjGDQAFkhNrrizclUfg6rqbu9mSV9XyBNNKB283+ewKTiPH\nNqYRlIN81equPZ53oWc4GiLt3j5XUueVG2ryz3kjpabnq/xnZ2d1efnk1WI8JZF9zQ1I3t8pudfK\n9gq4CegPHz4cjtk9PT1d8cYpO+4YcKKX/3kchRs8jsrUNwpXbm1tDUaE4CW+MPxJQFP5klvxejKZ\n3OAlwxUKn5JGDwu6fLnM8D9DHgJp9e3R0VEtFovh+Nujo6NhYlz960dz0HArnKXndcJi13XDHgOB\nvxLzcz9Fag/TnW/9T8rp19iIdRZKHckVFO4F8lkPe3jdHrtzL69qFQySgXIAcU9kjD/Mn7ymxCc+\n7zTrfrrmvxOvx/jvvE31tHid+ieV6XnH5KdVR3q+RZcDW8pLo+3xeZeNdQqZ2u/yIiXn8brn5+fD\neS9V+cRNN36873zy/0n2XA6pe62yUn/y+STHY0a8JbdJ7m+DI/SaeXKi1phrVEbg1odLlH2VHOVA\nIRanubUSijLl4TxPdw7o/E1PmPfpmfmuTaXkqXrs05nFNdWqh56Ney2cNBF9VFrV456C6FLSBBXr\nSKDhEzdqA9drU4nIw+RpJ8Bz5UgezhiAprLozRGQ+Ell+8QkPVy2IXm6BJKWwXKjQaD1NvA/J3g9\nDKbQi+Rta2tr5YUVUsKWU+F0pDbQqZDscPfocrkcHBgdPudhAPdy6Zkno5UAhs9Wre54pNzxftI9\nb6+HTqgzlAd6633frxxQ5mDtZVHWWoZVfNM6cy1LPDw8rPl8PoRZ1KfqF2GDRinT6XTYHyB6FHbx\nDUec1+DoWjTyGeHaWLrz0xar2jFV/new8uGHCyOBmZbcwcPLctDTs7TITARSB2cHWyUKVsubUDk+\nVE58UZkJNJ2nHqt3GlL7+Z9gTD46bYkuKiavOd3sBwo/DSzXH7sxT3Q4n90Y8RlvM/strY5QmECe\nGecs0twmxUuYAAAgAElEQVQKeeC0OS/JQ3qDPOdFB87NZrNhy7n3l8ecx4wr6yE4Ug+cvhbN3ib2\naXI4nB6W6XQmY55GB24IvBzJJZclyhvXenMBMjdNcSJcmCOaGL5k6JZy77iTDJAn56WnO39jUQKQ\nZEGTIuo/78mCOqAruRFx4Uj0OQiyTllnB7CWErkie/taKRk90kBlcyBPQ2y20fmaeMG2tGj3YSQB\ngvTTayPAkefOz1TPGM9SG1pG23nLMpKsuZHU7sCqJ5tHdMSye78OQt6GZGR4j/MTs9msJpNJ7e7u\nDuEWeuja0EOg8bYkg8W6CN7uuHTd9TtaXR9czr091CuXzzFd9Gf4rOul5/G+V99wJCMAPz09HXaA\nHh4eDpu4Hjx4MNB9eHg4zB/4u4gVXyefeahbOqUz8SvhxTqcuFNA50SEUgJNpnTOgyznZDIZXqhL\nrzwpLK26/qeYHAVWHaNOoccuxdEyMr9O2pNws+1V7ZUnvMd8virE+ekjCz3L364kFKYEAK3y6IG4\nB8/r3s9+eJR7g7qezoUeA4jb0k/jn0DfAZf8UmxVcW3xT+1i2ETXPB6a5IBtp7yJzu3t7UHuTk9P\na29vr7quG87fdiMruuglU6dcb3S/5VFqpQvzOY0uf6nvEhCLb1zqyIlZn+h3XdWH3rIbID6zWCzq\n8PCwTk9Pa39/f3ihs55Xn81ms5XjEhxX9JzT4eEn3tf+hhbu3TbduYfucbeq7Hk54LhipXghy1Oe\nFHtvKXoSMhfWqtWD8V3wlY/XfI0160oAruutjiaIUbCSJ6vynb4USvDy1gkaeStwbXneKbXobNHk\ndCXe3QbEW/T5qMJpZOJEGidIx+ZIvE3r5NA9ZgKy1qdrJ6lWgHCo3ypX9Lf433IIWqNNynxqY3qW\nyU9bbPGI3nai20NGqRyNZhhm0ZpzvpRCeXgevgyMAzZ57oa8pfepj4llNGBj6ZmIoa8D86qKw0bG\nxThZwTMmJPT0rsVsHpLjNLhXqfqooGSwH5Tl3passO6lw7pUF/O6t+kA1AJoF2wu//N4ZWozy2C5\nych4nzkQ0JjS0/T2Je+KNLAsz+NtSTxLypTKYzlUJrZBfFQdUvzT09Pa2tqqvu+HI1E5ouDErvN7\nLN5ede0I0HlRDH0ymdTe3l49fPhwOP9IzyZD1fKKKcOq271NPZdGha2RBfnEfvQ+77pu4J9/0hyM\nl+8T6i6jolO6r41CZ2dndXh4WMfHx3V+fl7Hx8e1WCyq7/vhsC3tWt/Z2YkOlO/LYB87sPNaq799\nxHEbUL/zVS5cWuieFgWXjUkeuO5zskwAurm5ubIGnfkIeHpWKwFUB2OFKpuAnD6sww87Uhv17ZMh\nCuekUyQT2LgyKrlAKJ8DCY2b0hhwel4lGiy2UwKq2CLjy3zWwV1lJt5pJYnyCWAcdLx8/+8AS4V0\nXrpxSAZJiq/4rPqfIzv32lv9xL5w+RI9WlUhQH/uuedqNpsNowSBkcqpWt3invqWp2PqVY2kh16p\nG3z2jeiTnrsu6P5kMqn5fD7wmccDSAY4Cm6NJltGyvtL4Q2FWfS6uMePHw9vGtLr5KSL8so1EmM4\nS2UrNES8UN95eI3zIXT2eK/rVs9gpy610p2/sWhdogLRytFqOeiJ6WQkGc4yqnKctTVJ4UNT1dEC\nPJbvAKZ7NERVNw8LY9uSl87nXMi9fckzZDnkwxivvGyWTz62DEOrDE+JXi9fzyU+3IZup81pTsnb\npGsJ0FseZ5IFd2oSQFEXuFtTS+cuLy9Xjgbw5X3uydKoiZekg84NafM5J79ftbo02NtEI0c99bkX\n6nErzOOGzq+rHoXENOfBNeY6LZHb+dPSYF9FlJy4FhbovujhNT+1k/NNqpsnu6Z05zF0ByT+duDy\nDpOl5AQlLbCGogJ3Crd7ZLrWGgUQqFog5qDvXtkYmLHMtNaetCWPM4Ewy3XvTveSR0ild76nIZ/6\nZyxOybJJvxtIT25M6KWIHnptKbU8SeeL/qdQIMticrnQ4U0Cqaur67XpKp90e9lpjoYGTP+l9DIW\nXdfVbDYbVmLs7u5W3/fD+mkvy2UpGQg9d3Z2Fud9COjukKhsgZ8fEEda6Mkn2tRelzPnkeusrhMc\nBdxaPLG/vz9MhCrMwhM0RS9lJK3jd0eGOklsIn2uX/zQa+d9nY3fSncecqm6OdGYQIZ53EPQx5dR\nOdh5eCKVn7aX63nmSbFQB47UuWwfrf468GBoKpWZPIkW31qgxmcT/7xPfMg5llLcnjQkA+s8Ykpx\n2BSyaU1murOg3/7x66m9fp1Den68jev6PPE19ZOU3XeP6phdGRYvn2Uk54nPu/OgelNIjG1JDpu3\nVX3sOuZy4LRRpl2/kqMjDNBcx8XFRS2Xy2GtuR+XoLxjcltVK6MHevWkRc6k84PlcwNRS/48NOrp\nzkMuHpuuugmGCbz945OH9CjJ/DSBwmtJmNxK8pqXTy9Ldbjyc0iZvC8m5wXpZXJhYHtIh75bysK2\nJ0X3OljGmHfkdSWeOwh7+5PnxpQ8N5epBAypLrazBUYqg5PrVdfGVeeUz2azIc7NMn0kNwbeKSkv\n3z2qMvV+XC290w5IyZ5i17cBdMoP6W05BgT5JFstB8afEUCmkSjpoFdLfSOA69wZhVd0/O3+/v5w\nNouHb5VSmCfhBPnju7mZR88kfRRoU08oB880oHvnuvdBBRDDqdTJS0vbrVU+jwNwQHeL6JZd15V8\nIotDVRcw0ctht77V6VQw8ifVn4CPdPnxpSkWp8SJZwKweybOj3QteXbeHu97/qeCUqDdaLpiKD9P\no1N+Lt9j35NffN7Lb3n9foiVlyFAv7x88lYhhf+qrsMaPtne8gJb4CH6/WTFruuG89Ln83mdnJwM\nKzm8r9zgsx+8rxzkFcqsWt22Lj77VnfxLhkwhajoybbi4aKVc1cMaVCv+n71EDW9yPn4+LiWy2Ud\nHBysHGpG3FGbPJSoxRNV1xO25KGWNnICVYdxed+Sv35omcpKRrSV7jzkkhrJzk/AqmfUeGe4ynbv\nwPPyv3tKrficBM/B2wWQ5RNI+IwrM4VxzFOkgnnYw70l1Z3mBlqA4cYytYmAkoSM+WiIvf7klbY8\nOJ+HcH55W50/znf3jPh9m2dIl9rClUnyVDXRxhip5Jby633nv8l38s49VYVeFH7Z2dkZAM03w7Cf\nKOMt5yXR5HxLI8hWn3ib3JHQfR99Jf0mPxj60ESowiunp6fD99hBWWnEx0Sd5X+FouRkJEeqhRPO\nh3Wy5+nOAd09Pe9sFzrvyLQES8nBaF0cd6xO3aNX4h3qh32RXnqIyftodeKY8iSA9d8sx/OMpcTH\n5Nkl+tO8AHkmhSMPSacbRiqKGyqCIYf6uqa1wzTWySBUrX/ZAa+5MVYiDVoyOJ/PazK5frEzl8hK\n+RU68TBIMsD87fwWiHBSdjKZrLxcWsDO9lA2fRScgEvfNNrer6k/Ux73rt2paRl954n6UPcE6lp5\n5G8akmfuhoxl8vx18j7JNmVV/KWH7aP2pD+pjYknrfRMhVyc2HXEyxvhs2loIibqzAulBHw0LC58\nLItD7qprJfVzXdjJPjPO5F6Ag3kCWDdQLY9Bda5TlmQ0ldcVLqW0Xn9MoZMRovHhvZYRdv4S5DXk\ndWAc47Eb1uQxsg/8mu8arqoVEFWcW0mAruE219cnAzM29yCj0HXdEOKRI6HNRtosw2Ngu64b1q37\nCYr+20GJvPB5Gj8CYwyY3Vgnudd/LzPJsOhJOEDnivV6+KSqhnX4kiWWm+SJ4c2xthJfnLfJqfDR\nXCvd+aQovQKPYzFRKRNDvVN9Y5A6yL+prAJq0uV08L2Mes6F2+lheW4g9BzjZylRIDx8QWXi8jE/\nQ0Z0JGCkB+ttkOAncGl5iqQ7tUWJoJLa7kZS7UphtqQIzJvi5W4Y/H9KDjY+ieXt0vG6VTXEsTmf\nohAeDWYyng5mTg/7Q0B0cXExrHjhhiM3SCk0Rf4RyJL8eBn6zzXVzjd3SDjCYNnMx/DSGKAqrHJ5\neTnMHxwcHNTx8fEQfpFRS5uHfDKTc3IyfI5HctT83a2izXnO/97PyYm4TXomAH1dcg+KFsvBht4C\nJ2q6bvWMdNbN3aFOE4VIZZB292Rv25bWfwq50+ICkLxIppZxGUstgGNdLJNluyJ6uS2jktqpspOR\nUEqemf/WQVnr2ut0tXiWJkmZeJIePT8BDb1xtY/7KFrArvpcF5wf+qTQjceF6akSDKtubq+nwXU6\nyEcCMq87YCW+67p46Lx2Gadx07MMcx0fH9fZ2Vnt7+/XcrkcdoT2fb8SP1ed4hP1jM6D+sadG5+c\nVRSA/CUvUrudVy77NDBj6c5DLhxGJFDS9aqbbyjhZBTv81rqAIZJvB7FHf3kP6el1R4KVwLmludF\nwExx02TN6TVx4oWGLyUHZ6ePXlvLsLTAORkc8TwBubdRz+o/z0FJxiUBXGqLtyH9dx61rrnBb/Gb\n/xXL5bp00uEhvpYurKOXsi4Dsbm5WbPZrLa3t4ft7K3yCawyRn3frxyYxf5NspwcDraJI2zvx+QU\neV0t2Zbecpni2dnZcOCW+J9kNPFf/ab/Am6Fr8hntYttopNIrOLplWyf89XnA6TnY+nOPXTvGF96\npu90Td9p6MayyRQlCr6ssE6qa3mDTis7jnXfxgN1oWS+FK9u8cCvu/K495PANtHHb2+beOllkO6W\nZ838AuxWkvDTcLnSjRkuluNtdJmgrDh/Wg4DDXBL7qTo2mik94LqtXHsjxRGYnm83jI49MDF3+3t\n7drY2KhHjx7VxsbGyuvU6LiIzwonuGy5LI6FRjyPOyGt58RPOmIOZHRedF9grXYdHR0NR+AqzML2\nOhg7XdQ9XaeHrusbGxvD7k29E1Qg7qHYseOSmdJ8VyozpTtf5VJ1MxZLK+7KkwBMjOVQNj3nQKt6\nFW/XutkEpqKNMf7k6eq51uRsaktSFgmxW38vk0KXlKf10XOt+tkXXqeHhVhW1erhT6LdjVRrBOQ0\nJOVnbLbVHoYXkuJQiV0m9J3ALhm+5HGRf5Rnnfgp7033EoB6mMT50AJ1yZ9kRyEMnfOys7Oz4ry0\njJLocr7xt8uC9zHBiTzzvhaPudjA9a2lF/xoRctisRiOPeDZ8C4rPikqOnm2DHEmyZxCZjLQ7tjQ\naKuNrnPscz4rmn2U2kp3HnLRt1tidiBBlpbVO4hl6TcFmqfIJdBlTM07sermG9JZr36zI6iQ7Jw0\nKZUmedhedjbXznp9HOG0PDuCJWlObWm1c11yRfVr4kGa/3C6vc/H6mQbud3e26vn3RPSMwQAyp97\nWYlG56GAp6pqsVjU1dXVsCW/769jxkn5qcwsPyk2HSTG5AU4e3t7NZlM6sGDB8NSSh5ToLzUFY/r\n6xl3TKpuvnyG7SbfPBatetwo8r6/5YfnrZyentZ8Pq/z8/MBxI+OjoZNRIqbJ4eGdXGimmEp6q3a\nx0lRLrpQ33BEqfL9SABvq/cj+991qZXWAnrXdT9XVX+9ql7r+/6vPL32rqr65ap6sapeqqpP9H1/\n8PTep6rqk1V1UVU/1vf9p1tlu7fjCp2sOi00DYALftXNpVCbm5s3YmiqR4qr/xRgxvlpxWmE1oFc\nAjYKV/KGEiC6p5piau4pOh00WuJTAlPW7b89eR+xjBaou7eTynYBbgm/P9OSLbU30U8aU5sIUCpH\nBpZtpQJ6eVrlopMQJZdcaZHkxPkxxjPRpg1GkhWFex48eFCLxaImk8mwg3E+n98YffqokP+dz+5R\nuvGjLHv/0RumDriXy7K0UuXq6mqYAD09Pa2jo6PhJc+LxWKInSdniXykXLWeFXB7aMy347vTpOjB\n2dlZjLd7f+o/PynEmdL6ZRlVP19V/5Zd+4mq+kzf999ZVb9VVZ962pC/XFWfqKrvqqp/p6r+frcG\n6RwcW5bUgUDX3Stmg+mxSnAcSPxDIUqgw+cSTQ56qezURtHvI4SWcrsyJa9uHS/9fwKPFtB7SqBD\nuhKNTifvuyFw3rsMjNWb7jntLV4luum9+/NjskzHQbtHOVE6pgMtHqSPaJH8y5uUt67Du7a2tgbD\n0nWr67BbckU5Tfwhb0RD8uj1LPlSdXOJbUtnFLLSGTVa679YLIYwi0Ye63iW+owLI0ivT2y7c6Qy\nOQ/hvEzn6LT6PunKWFrrofd9/0+7rnvRLv9gVX3/09+/UFW/XU9A/m9U1S/1fX9RVS91Xfe5qvpI\nVf1ho+wbCuzA4Yqt37SKBHROqEg4uGmCAsmOVEqhjDSc8gOZxoCDMV9/nm1wehRr5T2OEFgfPRv3\nuD2skYDTy+N/tTm1y695Pjd8Xo/z28vTmScOmuyf2xiplM9HdG4klDedC9R1N9fNJ4OoNpJnfd/X\ncrkcjr6V58wRJOtMk+Ru7FI7COJd1w0vr97b2xu8xcViUVU1vNCBoEzZYQhR/OQyX3qtaqOHp5Lx\n03UuD2SdkgGVq1CQjsDVG4d09O3JycnwOjnRlY7m9X7yUZXrjUJCCacE0JRTnnDp0QSGcDiPp3s8\n90U887mIVvp6Y+gv9H3/2tMKX+267oWn17+1qv4Az73y9Nra1LKWSklglC8pLxWOAul16b+eU/K6\nCTCu+N4Oz+9tTN7BGPCpQxPYsmwHRIG/QNFpHEs0AMnLSLR6X7jSp+fcYCWjt45e8shTcgbSqMDn\nZFwWEh8cbKvGt4jzmrai931fu7u71XXXk/KJr7fhQ3rWAW0ymdTOzs5wANZ8Pq+u61bOAU+es9dN\nUFK73PmisaXhpnFQ3tbmN9Z7dXU1jGy0jf/s7KyOj49X3jREg6S+TbKZ6NWzfl5Tkn0lH/lzMpgT\n30oe2mU7PVzqtKY5HKa/qEnR8XFAI7UY5ACt5IxNz7JzxBx1roZLypdiZvKM5NW3rHGavfcwDelm\n23zlQqJ/zANrgc46Piu1vBSvtwXmLWNIDyTR0FJSKp0DByckqaTu/fjEGZXZ2+xGJPWF081wGIFI\ndflcjpfhRlpDeh0cxZUyyUlp0Zb4rGf13z1fxdZns9mwfNLPTVdyWRXvSZPXmfTU+5m0CqiouyyH\ndYlfAnaFWpbL5RDCcuNBnSMvkgEhz7iDV/MbxBHXDe9Dl5FUl0cKkjPH/+8UoL/Wdd17+r5/reu6\n91bVV59ef6Wq3o/n3vf0Wky/8zu/M/z+4Ac/WC+++CSy0/JsqFRV2UtIed1wuFKTiRxStgCN5Sbl\npgByrS7ve7jCAdUVkgaDFr7lcbCdvt2b9FMJnd8sV8m9W9LZCsu49+H9onvkZcvbJn28xvqTt6jr\nvvog9bGXTR45uCalc1rTdXmbVdfrl6fT6YpX6Ou8WwZ/DJhUFunXRqOu6+rRo0dD6EWHh1E/+v46\nHMAQgfpLvGTfen+785TALp374rxSKEWTn5oI1bcfY+t1eUx8DIRZltMlo1K1umSVZ6CTj+SL677r\nLvtbz2jFDvuylW4L6N3Tj9JvVNUPV9VPV9UPVdWv4/ovdl33M/Uk1PLtVfVHrUI/9rGPrTSs5dnx\nvitSAn7lU2KHKCUBZH0S0hSqcMVi3YkmT+xQP6eEZUlI9F/g3lrHqrZyc5QELPGAgOZxdzcoHO24\nd5y85uQpkjdU8NSHLWD0NrAtaQ+Bh0D4vNedDLaD/bpJa+ZNbeKIgV66wmLutKQyEk9Sasm15E5e\nul7XOJ1OB+AcA2p3iLz9yblIG8icj5Rvf04TnPLOBeya/GxhBw2Qh5LGeMm+0DMMn5BGjQqoI84T\nr5fXWgswVO+DBw+GVwtubGzUq6++Gmmuut2yxX9UVR+tqm/quu7lqvrJqvovq+p/6rruk1X1xXqy\nsqX6vv9s13W/UlWfrarzqvrRfgTdCD60/kpiEk+gM9qi0CqvlEfLlrwMf57K+rQ9AxNdgdkR3sSW\nt0wB03Oqh6l1n55A1bWAtcI3t7kuAeW7Vr1uehzM794FDS9jkaxrrI0JMFIopmUExr5b9TtvWgZI\nyedvWkcTkMaWEyLZn8/nw0mMW1tbN/iY2uqx+7FET73v+5WJ0tlsVpeXl8NSRnqg7BPl5ft5Hex9\nHbrvylQ5et7nxVSXyuAkrYBba8tPTk7q4OAg1kMnSP1FfulZjl65Bl3/fV6Fc1KMDrixp2PB86TS\nkme23R0ql8WWoWe6zSqXv9W49dcaz/9UVf3UunLxfFRQpXX3VIY+rRAH8ziw+uyzW+7kSeh6msQQ\n6CaD0VJ2jy0y7xhI+fXWcK7FN9KXPFIHIS/DaVBK4RfmdV56PyXAJa1Of9XNkAi/U/kOzqnOsbJc\nJpJSit7Ud6JBE5TL5bJms1n1/ephUTpHxWVyDMxTf0k3BMYC9Kurq9rd3R1eLK31237eTCp3jAds\nO3XMHSEabiXxpe/7YSWLJkD5Cjk9mzx0B8oUVqR80gnxeQDKKQHd9V+J7W3pM40ajU4rrQPzqjve\nKUovjMypuo67OZiQGfo/JlQuiIxRqn4pjEYMoonDYt13kBYtyapSUH0pWAIAJR45yqTnHLSdV35k\naQIglpmA3w2O87bVB3qGmzDcu24Bk5fLssUXB2JXjtTnXk7iBelk+W7sOTpyLy2VLXoTiOl5yZgm\n+sQ/LtdzMHKeUR7SdT7P0bBCL/5J8wXK5/znvIToVb3J4fHfvhJHZcpD16oWhVp49C2fd0+Z9SbH\nIvWvr4ihbFCnxQdOjnKuy6MDvO/Goe/7G+WwDylLf1Ex9Hc0qZFcYUIhIRMSw3mdCudgmUBeHoBf\np3fiE0SuwH5WMsuuWvWOWEYr/OLeMulQW5XHvQcXbgq586/lOXrfOH1ebjIIKY7ORDocSL1OHzb7\nNnDV4XsTUjtaMuHOhPNF31wv7EBJg+fGXv3NEKOSPNHFYlE7OzvVdd2wTtyXzyWgYnucfjfCNIpV\nNYR4Hjx4UMvlsrquG7bP65RCGRfW5W1neCjJvctpAldd18tAFC8/PDys/f394VhcrWph3ckTZpm8\n7qFI8liJTpE2XzEGL/ml4VH5vkzYr5FvfGlGMnT6n5b/pvRMAboLiAtqVZ7M4vWWoCRrp+TMJ036\n7d8thW+lVjw7JS+zRUe6x7a2Voq4x95KBD/nuQNiq5/SUJj3WRbrTQClUBYnaUmTe4NJFsba6vWt\nu9+SxdSGlmyKbt+F2FoAkHj9tSQHOMWLFb/n7lEZGndUmD/xhbKYnI9EP8shH+iV8zRFn+h32XTj\n0ZI3tYejIQIqAT9NEKdVbpRPp8fbPNavSYeeaUBXI2iBJGAt0GbjfZkQPUYemOMAxw5k2VUVLaas\nsU+A6FvCxclbgpCEgvT7EM9X06g+XeeyLiWfZCGP2Cbygh4k62dfkH4+Tx4QdLhTzj0l70N5qvJ2\n6NV53FG0K4mHUnaV56sRRL+ui2aG0xyIvH2iY8zZSENjyqUDiIfA2B55ekdHR8O6cL2cw1c1tQzF\nmCFjvfL82Z7Ly8t6+PBhbWxsDOegyIOUIaVRpa5RPrsuvwRGvHLe0ztWyEm7VrWF//j4uI6PjweA\n56oSp8lH8KlfnW/qG8oUn9NowEfZVTWEqMijtEqHB8UxlEtsSo6NEvk3lu7cQ6dCq7FVqxMlVEp2\npBIFmWW5EPlkpQ/xpTQECFdwDn9TSlbWBUTfLJ984D3yJNXhgM78zLcuButejgO8jzDYBvHGwSV5\nL6mPvS6Cz5jHR3B1ME/leDvJZ+Z1r4nPJMPhsqj/aWJY9xz0qmoALIGI4tt8NZr6InmeSSe8Duej\nypxOp7W9vV1939fDhw+Hw7q4LJBy5fMj5Bc34KQ+ZHvEL20SWiwWw6QsT07UQVyi108VZf96v7j8\n0ZN2eRFdpI3fxCMaPIG2GxKGnBR24SoYyq6De6I3GWumOwd0J1zL57wBHnN2zy8pL5VRneQnwLUA\njRNRDi4t73OsfX7N6fPJplZepmRU3NNVOYzzrwPzVjtYh+pPhsQVxtvVinf6HEGiwfO1QjrM0zJW\nbJf3cQrfjPV5MioEH15XGxNdklN5opILb29LuZ2OMb7ww3g9Qy8bGxsrXjododSHpC05SwQ5JY1O\nOPkpgNdacz+73T19X1rr/ZR022mjDiYccX4xfs6+YX2UZRohlZv6jh8+721O6c4BnWGRNFyngvkw\ni/kSeIhhNBI0FBQSlUWLSstL4HWB4uhBAktrTlpIB+ltxYRJg+rSs9qQ4mU7kHRddyO/exEUSo5O\nyONW36kON3psH3nroMkylIc8TiDrfeMxzsTPBC6tttIwuXdKr83brskzemGUFR1n65NcusYXF2v1\nlbx0Hy0qeXzb5d9/Mw9Dgdvb29V13QCsk8lkWDBweno61Kk3IPGccRoE8YtHAfPIDPYNgVzLEwXm\nR0dHQ/zcR9OUXb6U2T/JYaJDw/ZX1Y3X87VCdjwv3vm6sbExxPnn8/mwgYvtIE2sL3nlDAc98x56\ny0t0T2NMQd0zSt5Sem6MHj1Pg7GuzLGUnnNPOdHiBsfbn/ImL43KnvI5ENzW+2vxwRWL+QmW/u38\nIF0Otqmv17V53bOpLa2237bvk1eWDCQdF57xovhrGmHy+7b06FkaMgHRdDodjtdVLH86na54ogJR\nHRXA8lpt9vAM28sjcLn7s7UL1Ns7Vndqd+LVmPFL3nLKm+7TEU0brVo0jOnnWLrzSdEWICVA945t\nreKouhljo+cs5ialouDSMosGt6op/upemXuAtNAS6nRuB0++o4eRYmyctKKAcfTgiuA0kVaWIX4l\n74G8Eo9aid4wn2e7nC565ZxsShObCfjp4STPXzQ4+LPPUn+Tt6qv769DhtzKT1lyXiRQurq6GsCt\nqoZjdv1EQtXtO4YdkDzu7/0v+VCIRSOFjY2NOjk5GWiTbmxtbdXm5ubKYXfsS8XIz8/PVzzzvn+y\nRJiTvfLOj4+Pa7lc1uPHj+vw8PCGvrszoI+PEp2XqlvXeI+6Q10k9vjIQ8+ojWy/ViqRZsnfcrlc\nOZv3BVwAACAASURBVHKb/adrLRpcFsfSnQO6kisMAYhK4dfcOrPTfZhGZhOIfdKSgkJFEqi4Yrtl\n5VEDDgqtCQ+Pk7oCEwgdnF3IHWgS+CYQd6/alaBVJ+vT/cSb5OmwHP5P/E1K7b/HnlMIoGWcONfi\ndDlvvO+S16h84i2X2ume/9Z9bXOXl9z31285kowRbNhu5y/LT/0oZ0fhoq2trdrZ2amqqueff344\n4+X4+Liurq6G19fJeyZtdKQmk8lgJOhoKYyjFzefnp7WYrEYPHIZCsbt05lKHjokyE8mk5XwifcL\nV+XQixY/HQPoYTuPHWcoEzS6LqPikeMJjb3L1dhL1auegZALkys/lUkNpPK08imPK6SXNZbfy/eO\n8G8l71hed+Hy5ADFMpJ34eU7LaSPAuJtGcuTeOJ1e1kO9q08TGP30rNUYpXvoyJvSwukvW6XoRa/\nXMaYP7UnGa5WHyjs0nXdjeN1b5M/1T0m2zTqAnfFfq+uroYNSFWrB1cR9JKh52hMSfk1+cmQS1qD\nn/qS/9lG99jH9DyNsPx+y4gnnro+kjct3WxhQjJCt0nPFKCTCbRa+i/h5mTEmKDK0moYzAOFUlxO\nllTJhTX9rqqV4Zg8QQqLdtrRO6QSuYdLekgLQ0bigerUBBp5yGG+H2nA8inMHg5p9Y/azesOhFXX\na6jJL/WN2pJCPz7B6XznZBbDNirf+ae6RLcDu/izTvGo2PS60uS0rpH3lGn3MH2vgbxCLSHUmeV+\nThDbkY6poC61nCZek5fedU+O151Op0NIROvTq1Yn5UUX+4WyTT5rB+p8Pq+zs7M6ODioN998czge\nl0fTitfppc3sG1/7L9kjz0kHccZ/931fe3t7NZ1Oh9HIZHK9J8R1XRPd2oil0AudDJ//YBkqn210\nXFDd/v5ST3cK6C0F8pS84XWWkh3FPG7l0/UxjytZTnZMy6q2PMYx+v26C2LiUevamKezrhzS1YrT\nJk8l0Z88Hy/na5EJL5+//RkH5FaZX2tKNLTqGavDPUBdUyxa4ZeqWjn+wMHta6U9ORgqX29R2t7e\nru3t7SHuzbCAe5l0NPThzk9fkqizzNNxHet4to7XY7pMJ8np1QhJOteSLToydDC+Vg+bIZ1Ux5jO\nMt05oKf/3hEEMjaydYhV1ep7CMkkF0QHScZRHai901mGe18JTESP/rsAKDG2R96kTQlKSbD8zBgX\nQvdUSbd+J7pabSR/mIcereKsipM6ECRAp5JT4eQhEahTHh91cIet19PyfJPR91CC89hHFlWrrx/z\n1OpDLXeTN8hYtfMu0a/yXIZ13R0RvQBDMfXJZFLPPfdcnZ2d1XQ6rf39/WHSVInAyD4RnYqPz+fz\nYSni/v7+cI1g7g4c2+l6J76Rp5IPlud8ki5xtMP8ej0gR7S+wYvGljTxWmsTEelVP/MtaRyF87ev\n4fd055OiZL6ShwT0Tcvv4OLWnWDIs44ZcnHgcKOha2LmZDIZhpmpfgmIFE51ElR9CMr/BAABrpID\npfITLOn9Kr8DsHsl5BkBy++LBy74BB5dayUpt8JmbuTYdzw+lqES8kP1pr5kXW60ObFIJWP4h/zl\nRKnzvepavpIn6YZI+VK73Njzc3FxMRzepT6kI+FeIZODiPcheSb+qDyNCHi87unpaU0mk2HNvPLS\nw1TfqX/m8/mwokU7Pw8ODoZ16OpDgZbTmXjj9JP/vreCxoXeMMNeifdcqaRdvKxTRo80sB4estZy\neLyNPikqepPj6unOY+ju9SQwSfc9pfzu7biFvC1dCTBum9xbSv99U4zTkMrT/WT1vSxXAublfa/T\nn0k00RD4fQfAVrtIVzKqnpLxT8+4h+eeXSrT29vizVi/Oqglr8zl3Gnz8qTUisfzbBDS7vkTz8ZA\nXwZCRkbgvrW1NYRctra2Bnq8XuelgIinJ2oClKtZxmhPsptob7VXz5DPnEB3OfEykvPTwinP4zQm\nQHd6k2x5X7fSnXvoSu5dcjKBw+V0MJDyaDKy6np3qO7TO+U11Z0Ug8MfXXNvjLRSEdJkn5dXVcNm\njvRCAe9ACUNqA1cU8EW5XDqm5+lNi355WP76OtJJPornNB4UeO8nF0Q3JgIpelUOpk5X3/cra3up\nNB53pOfpspO+lc/bznukQzzUxJjy+TJT9p0vQdRmHQccTojN5/PhdXHiMyfQWss9W7x33jlwKfQi\nvgnUT05OVs4x4SiWZRG0dRSuDiBTmTTiCdTVd6qHowLSnsKluk5+cl055VP8Y58lZ0OhQ/1mfpcH\nRgd8U5jqY5vUHsXwq54YVX9DVCs9Ey+4ILAI2Hit6uZkhjM7bcV1a+i//VmBof63LGIKkwg4/XV5\n7GA9zzKurq6GDSQe92eIg2vbWQbjzgLrMXATTc4TPsfwAulm/DEZJ/LNT5xzYHNjTJo9fKN8zgOX\nAdXtYC7lSLxx/ul6y8h7XUoEJ9KVRkvih69rVjiv71c34Jyenq6EEHwHqR9XQfDyPnYZSG3migv9\n397ervPz85pMJrW3t1dVVYvFYugvByvJgI67PTg4qMViUYeHh7VYLFbCHuQFgc31k86L087nyGOC\ntGgU/7Q5ijqqMInjj/cd+esyRGdS9/0EWX5zEpqbkwj2Y3jEdOceuns6TrArjufnbyqPg3nK4/Ws\nq6OVr1XXusS2eRkJ6Jwm3k8eZmpDS6l9Yum2v52e1rV1fUhZaKVUT+u59Cy9SN7zetf1eXpmXftU\nR+IR/7dkVkZJYKPYrLxfevW3oX8ssSx9c3keQy++4qXq2rnSMkStaNFyRXr2SpxT8Mlmp6t1zw2X\nA/o6UJRB5Eg+5XGDzchCosUnnW/TP96Wlmx4ulNAdwukziKD6JEm8KfQtTxXCo97/2S+G5jUaayL\nE6CiUysBeDARafW6eZ3esBsnH8p6W1hWEnoCi4ZvXO1B3okerq8nWKjffJlVixbGW50ettPlgDLC\nZwkgHDLrw/6mUpPP7F/PS57TG/P6kufr8sf+dHBXO9PoyyeANZLTsj/xZTab1Ww2u8ErN1rsI/Jb\nzxAE+TwPvtrd3a3ZbFYXFxe1vb298kYjtlsHbckj15nmBPYUEuSIw+Wz71dfNOH6Lw9YvNY1ts9l\nvLXaSbLPfQTEgIuLi6E+7XqtqgELtra2hme5MEH9xj0pSQ+4Hp3PcZTRSnc+KVo17k07ELq36RY4\nWbRUPv+74HgeggJP0yOdLcFw8GAdLV64Ujk/SGsCwbFE0B3zuqvyhCO9lRRrbPGCYNNqjxuDsVHH\nujbyt+f3mK179AT3sfBFq8/TqKT1/FgbnG4aCMkgZTG15+146+5kCUx0WNdsNqutra2VNwxprbmu\nCcR1Tfn1nGjl5Kj6R/MEXXc9v+Lylwyz0m09Ye8vhnRasp6iAS259+veP44NLby4TXueCUAnA52Z\ntMKJ8Q5Qnj+BnRsIt5y8X7U6GTmbzYYtyk6nx/vds0406Hkl99Td05XXpOTzEOQPaXEhIVDJk6CB\nYP1eNvnKNqRYvbdzTDjdUImGlMYMTQJd8ZBtpDdPOpMhS/KgxJh/yxjTS2zRyX6iYUm8UchluVwO\nk6Rcbki5cGOV2pPopTyr7I2NjdrZ2anJZFKPHj2q+XxeVVUnJyfDAVR6QYWWKOqcFtG+tbU1ALr4\nTSN7eXk5HDuga34QWNL1ljFrORmpb6Tnzn/Nr+kZes10+Dyfb1xS+9hu5mMIrSXHY2ktoHdd93NV\n9der6rW+7//K02s/WVX/cVV99eljf7fv+3/89N6nquqTVXVRVT/W9/2nW2X7agsqmRrK9a18nqGa\nq6vrFwL4yWl6XvnVAR5/FIMZjpDyScDScIcCwHIk/By28wCkRG/LAxGvJDQ+scq6PURQtbrKhxNx\n5LePOPjN376W1/uSfCaAsUzxRu3hK8WqanjrfFV+azqVn3KSJnWV+IwDiPPTZSfxksm9RF5zcF7n\nebGcVKZ4IIDkeSuUBz+YKnmJrfoI/C7zV1dXtbOzM6zI4YFdOvJW4H50dFQHBwfDEsVUto+s2ec8\njoEgyH5SvzqQjvGXfawVRinMq3weMmOITHMLblyIHbymZ1ie+DCfzwedoKHQfU6YttJtPPSfr6r/\ntqr+oV3/e33f/z1e6Lruu6rqE1X1XVX1vqr6TNd1f6lvmJWWF+K/U0epY/U/WWzWwbzMk7wWFzCV\nK4YSMFheUlh6mOzIZH2T1+RecvI01lntlMbyJMBiSh5kSv5My5Pi818L/d6HSY7IayqR97mXR6W/\nrXeUaLxNSv3ees5juVrT3XXdsD488fxr4S3rcxoFMPK0FcOX1ypQZ6zceZ1GXf4MaRdwV60uz00j\n9DHZpSG4jffrPEt1JGMtHtEg+OhChpLOquu4ymzpS0prAb3v+3/add2L4VaSkB+sql/q+/6iql7q\nuu5zVfWRqvrDVDa95ORtqlFkCBunCRsxww+oIsBX3ZxU9TXlCdjZUT6jnyyvHxrlS7PSpJt7Bz5s\nbwE6O5nDRd73drSANBm99DwVqQWi5GFaW19VN66PeZNJwcVHbscmD1LYxI0r28NRodrnSx3Vjwk4\nnF8CLo5GWC+ThxxdplgvJ+HkGWuiUuuVxQPKUwvUx/qPiZvfVMfe3l49evSouq6r119/ffDODw8P\nh4O8fBS+sXF9fC756PJP/rGfdZ+7sVv8F1DqHpcGsu9Vn4Mu9YZ6qf0Gek5zB4r56xm1gUd2kJaq\nGs6xcQxk3/vcwlh6OzH0v9113X9QVf+8qn687/uDqvrWqvoDPPPK02sxOXAwbuqC1oqXtSbufAbf\nn1VH+DBIHeUASEGjktCLczD35xy0WCZpT94VDUkCJim9Qhjknfjn9DI/jUjLm6Px8TIdMJIAkgcO\ndgnEEh/ZVqchAT9Bg/S5cXfayRMqIq/RWJNWB/ykrK2jEtwbY/nJoEhmdT6OhwdcFzyldrfu0wkR\nLQr58MRBHoXLMpLsyQlSmR6ucx1Sm93w67fPiThA0khQB+kY6r/6yfVUzzpviRM0Dokml/Xk0LiT\n1TLKTF8voP/9qvrP+77vu677L6rqv66q/+jrKciZXXWTAWxYq8EtxfA6eJ+7IlUvBc6HaK26uQ2b\n91rKkoDd84oeF1zlZ/vG6tI9gk7L2LCcxM/bAILTRXpodFqeIQ1vMm7kn363ACvxpNU+XeNzVGLK\nQGpb1fVOZk+M0XuY0BPLcyPMUaee1SRk3/fDiYjiiUYebHOr/euAQvSIRwKt7e3t6vt+CPfIM+c7\nSJNx8bq1coablAiujEm7LvqIkbzic0mufCSj8kQvlxaSh0kuKIe+4zrxWHn9zCXdU1vckRxLXxeg\n933/Ov7+g6r6357+fqWq3o9773t6Labf/d3fHX6///3vrw984AOs44b16/vVdbFJqX34yrzJQ9Oz\n1r4b36648izYmdyqm2J8CVDd+xRt3nnJ6ybgK2anA4T4rHih513RWkbTlSR5DEqc3GQZ9BjZJ2x3\n4pHzgH3IHX7kiwu79yvf8rPOC0oA7vwaq5PAQR4KrOSVii4HCq1n7vt+WNHC/iCdiqFr/bcApTXC\nSE7KOkBnf/F8F+6APDs7q+VyubJEsaUrST/VbuqP3q2qcIlkvWr1FXfkC50GLgLQs1zNRhlSv2ku\nwB0q7twVT9gutdvpcf6JH5QFjbAUnqHMHB8fD6uJ/qIAvSvEzLuue2/f968+/fvvVtX/8/T3b1TV\nL3Zd9zP1JNTy7VX1R61Cv+/7vq+qMjgoialu5V0R+f2UxpUy/PdtPDR6Nyq/NVR2gG7dJ7ASwDxR\nYdO9ZHRS25W4asfr4O/beGqeHHydZ15nmpz09jldfr9FQ+Lnbdp023a3PN6UUpiwFS4g/frt3mGL\nVoKQluASyNaFp/SdHIFk0Jwf2uK/XC4HQGc4ynXJ5Zd95qMS8pF0EKBdT1zmeH1sQnZdjJr32V/b\n29srq7/4DOlJekus8JCw8GI2m9Xzzz9fVU/A/ytf+UqTxtssW/xHVfXRqvqmruterqqfrKqPdV33\n4aq6qqqXquo/eUrkZ7uu+5Wq+mxVnVfVj/aJs0+TCxM7xmO67p2QUWOhA15nGVQqfhNIVQ8nT33i\njPTx40NjdhDbJWVMwODt9Wv0dhJAULh4ZgUFiArH0Y/uOQB7Pfp2b1v5khHx+GISfJWb8qd5E+eb\n84L0et7UpkSD08P4v+eVF+a8SHMCDNMQxOipJZ47gMzn89rc3Kzd3d1hXbrKcgfBZTSFKMg/1y3W\nf3l5WYvFok5OToYdoWrrbDa7YWCc/9QBluttZd3ytG8D6GwrZdTl2501xwx/V7DkQSMVGTLvZ81z\nuGPpMpJWwmihR8u4e7rNKpe/FS7//MjzP1VVP7W25uvnbygCvWh6fS2PiNeT55eMQ5rc82dJYwLw\nloGhodF1B+1kTPxe1epBXV4HhZkC5HxNHh3LSrSzzORZON8Tz9bl9/80OFRgz+OxbfJK95MhIG+c\n/gQCqa1eRqKX+R1AUgjJJ+Ba/cY2sgzxWfH0yWSy8tpFruFWuR7m8zb5Ne9rd1IEbh7jdr0lGDJW\nrjI5Uc4ylYfn5Dsd1AVvC40a+0HPjjkJruceStVOWA83Ug44GSv5dGcmjdx9RLduFHHnh3M5we6h\n0TtwsE1KzfwJDJO3to5JLLcVTkkGx4E/tXUM5P0+29oC0nXXW6DlBoqAQ1qSgrfqbNHU6i9vWwJh\n0lJ1c+I4Aaordau/E2gloE5tSrwZ41ULNNM1d04SEGsOZ7lc1sbGxnCiIY+U9mOAnb4xA83EMMv+\n/n49fvy4XnvttTo+Ph4MCYHe9c6NYwqRpY+8cn9GbUl94t/eRo6cWs4Fy9d/tW8ymQybvHhaqhte\n9qP6y5087w86szQCY+nOAb3l6fBa1WoYwQWa12glfULOy9HzHErxOR9qy0PwRKvv1psC4MKdjEQL\nQAhcbq2Tx8KyCIxVq29zIdhVra4Pb3k9ThP56jzRUJ70tgCdefVNRSItzlf2qfeh0+N1d11+u5Py\nt7xq0dKK8fK3AwL7xJfGjRk8gjIPefIJdL1CjUc6p7BKCzy9PfQYtTRxf3+/Xn755fryl79cn/vc\n52p/f3/Fm2YolODkbXK6ONrsum5YEun8cX4mYG7xUx9O5FN23BmQrHE+gnNiBFuer+MGgNe4C1oy\nqPoUbiE2KHQ1lu78naJsRPIOqERjHpQDU/KmfYJVz7FsvoDAQZ15GWP2T6LbBVvtdeDmdRqjBBBu\n8JwniVctg5BA1elLXiX/O3Cv67NEO9uY6nFaCKw+ZG15hsnweDyZeRJNSeFZpsq5zXNusFI9fJ73\n3BDIQeHxujIYLQfKdcjbreuKD+vNQ/P5vA4ODmp/f3/YRERgTuW5PqoPOanocurtdbqch36NqQXY\nNP76z9FQaoPo1bUx/WA+n1/zcC7703mRMJLpmTici8xwBt8mHKLE+JsrN+uqWj1hcd2RlK2OGlNs\n9zxSB7cmoVLZLWOn/BQ+Koq+fcfebYSPnnB6LvHWAcONnofBPN6dwF3f5Jfa40BAoEqKkBQ1eX4t\nJ4PtaN33Z5PhbYE4lV4enAOA10ueafJ7Pp/X1dXVytuqptPpjdhtCiG22qtJv+Pj41osFvXlL3+5\nXn755frSl75Uh4eHK6MVAqTz1WXQj8XVWvQx8G5dp97RORvTUfaHUppvSfVzNJG85wTK6b76iPKY\n2vlMh1w48111cyJJwteKG6aOciDgb8UTHUCoMFrHq3sULAK0h0kSGLTAmMqcQiXpWW9Tuk46xV8H\nVE+67p4VhZ3tIt1UWvE1bcTgSXkEdJXl15wup5/yQEOVlo6xnb7eOLWXNLTiqslrc3lMxi99e3n6\nJjAyzJIMc4rFanu9Do+qul5H7TLsH9IlHdTLKs7Pz2t/f78ODw/rC1/4Qr388sv16quv1tnZ2cAz\nTmpyR6WupclQ0iWAIyB7KNB57cbDvf7WpCdH7ex3lpf4RPmjE+k6z2s8eqAVxnMHg5O2qm8s3bmH\n7oJIRUuC5mBMUCH4Ugk8xqm6CEpJuDgzTqa6gUmjAQKFd5i3l20Z8w6dZ248UplUJvKPz7McluW/\n9T/F80mXgNFHBuIdaaABULnpyNHUNuZhf/mzDrDuYXl/pHqSMU39xZCBl8ty3Jh4HS7nDhQJ/Emf\n2qbDsrquq+3t7RvglIwP6ZGR5tuHFGZ544036uTkZNjMloy164rLmtqRdoDSe+amIDdkidcMxbkO\n89tlz/vfV9x03fVRwm5kWafrhfNbNFJuSTcNFfXkmQb0llC5gCeL7GBHAHEQT15e8pDEwDTEZUfx\nN0E/AZwbKW+7t4HJh4z+m/mTV0rl4THEY3xMQNJqiyun153Anu1seU40gKn8tLzvNjQlA+H0OSC7\n9+bfqd2JD6zb5Z797oc4OZ2pPoYMGS/XMkb1Pd+ko3KTLEi+5ZlrnfnBwUGdnJzUSy+9VK+//nq9\n9NJLtb+/PxiMrlt9EQXBykNUznt5uu700BGi1+4bp7we9mXVzQ1LaV13+u8jV+eR+FxVK4cDel+n\nD8NCSpxo1fyH+kHXxtIz4aEnj8YnGyhkTC6MyQMlgPhse8tTSvX55EULUNg2Nw4JyFsCo45tKbbz\nLJXJey2A9TLcuLrhaHkiVLBEp49w1EbfdDIGvE5r2pTTalvyDp0nHGo7qPtvPedhM+e3Ow/JkI+B\nNssjz13mfNJfHp0mSAXOKZTEbwEHQeXs7Kzm83kdHR3V48eP64033hgmQvt+NQbsDlqSCW8fQc7z\nJYfMJ09ddsgPdybolHnoT884XZ6XdHg4hP2VdIF10mlTHT4f5p+xdKeAnsDKgTUJiD+n/x5vSkbi\nNh5WSygpdOoonj2hOjg8bAlyMgBsV+JDAgZvY0vIEyC1gNL7wndEJh67gROPxoyOA1+KK3ocX8+m\nIxjSULfV7sR/v898bnBFGwGdbWdKwM52ers9HxWcDgn7uHVNYZeu6wZvfWtra6WP3HHguf/ayv/V\nr361vvCFL9Sbb75Zn//85+vNN9+so6Oj0Y1E5JnT1dLTFv9cZpOeVF0beBqtxFPihY/mvT+9/1y3\n1A5fhqh6nCf+rlOXC4a5NMnNHahj6U4B3d++sQ5gqlaH68yTlDGVkYZ+ybN06677rli02prsEMCn\n/K3zyj0lo5JCT3rWaWDbXHlaw/qW4XTPz8MybF8CSgdHXveJyGTM2MYEWGPg7G1hn5FPXo6XnxwI\ndxJcljha5HyNG96W0dJ/hUzkLPAeaUhD+KoaznlZLBbVdd0QeuGZ4myjliVeXl7WwcFBLRaL+uIX\nv1h/8id/Uq+99lp95Stfqfl8Pky8+gQuHRx3CpKBFA/07ZPb1DGCJvuC8zbeHpXjIM/7lAkfibv8\n+AjHwz0e52e/iD8u15QX9ZeOIP5XBtCTkrjS+zUCKYHJlasqe+FkevIyydhUFgXPFco73wWSAueA\nkzwB0kAQcJ7wOQ/xkG8UCFfElqC4IiVlIj/IJ6ffhZe8YZ5EQ+IZE8t02sbyUvGpZCkM4rLptLXq\nS14YZcEBnW3ifTdCDiru2RKw6PH5uen0VgkoFxcXw/ksjx8/rsePH9f+/v7KWS3J2eK3G2fy0Pd8\nJB5Ura7Wcl54GMTBm7Llq+q4B8Z5Rlq8b7Ublv3pRom65e1yPKNOcE7O+3AdmFc9A5OiLU8qNUq/\nHTg8JusA44cc0VMgsCUDo+eVUrxUddELYRv57fS75+n1ujfM52h8vD5vp3sn5JHKSnwWj8UnAp0f\nqJS8I99u7nz0OY/EExqC1E6m1nXviwTkiceJdgd+z+vPJTq8nwSkpO028i8eu3datXq+v4yyVqpo\ndMzlpPosFos6PDys+XxeX/ziF+utt96qP/3TP61XXnmljo6O4lHOoqPFfwIx9YVyouf0jlSFG1rz\nZu7pOs8p+yyDIxpPYyEXlc+JSWIOZUp5UzhH18kDlan5Cp2cSf2gjrXSnQM6OzopmXtKBOp1YFC1\nGitrMd9BXM/5hE9V3QBtVzBe86G9exctsCKAqrNZLxWexi8JkwN6Enz3KFh/Gqbyt9OU6tdvKSiN\nn9OWDJ2Hr6icyRgrDxXF5YjtYDnKz2+WS5nhOSmkmTLAcviM5Esy1ff9cPSt1+ltF/+cVsp34qF4\nzeG7+p7XT09P6/j4uA4PD+urX/1qvfHGGytLFJ1P/jsZH44M3dA5fT7KJABzJNYakbccmdQPTrcD\nOufEKEu+yoZ1rptX8H7hOeoEdF+V5vSmdOcbi8iIqpvKo+GWmOzDJj3LPCrHlT0phgsTwYIC0YpP\n6jkHbiprUjBv99hwyoGCbRHtCiFx+zV56e3y6+4x87mWQNGwOKB7Xhoo3k/1EZTSiEh1kwZXbncI\nktHzPhzju1JqQwJv8dbDeno+barzc4IS8LHN/qw7BpyUpz7J++u6bgAOLXE8Pz+v119/vf78z/98\nmAB966236vDwcIjpstzkpIjH1DF64+Srjyy67vq9qTT+HnJRXucBecplf/683r/qoUS1Q3Wen5/f\nkK/kjLC/qPP0xPXOURoAbjbS3IW/6MLLGUt3HkN3a0yw4zkUSVmVEph4fN2TnzuRJkGVlwLhnqAz\nmIaBgEkakyD6fcYhE0AnJRF9EhINjRk/d09JZXJHn5LHWN0w8Tnyx9vmdCevIz3D/mU/uqftipwm\n09zzY2o5FYkG/h7zmJIcJaPMpH5L8qrkcV+n0Q2Ye7QEDtKyWCxqf3+/5vN5felLX6o/+7M/q7fe\neqteffXVYXkidcQdGOeb0+i6Rp7Q+20ZVpYpnfB5IdYnDKAMk3btoHW9T3k5IZ36sdXf7JPJZDLs\n1HXjp/t6YTTrdsdTZbTSnYdckvK6xXYBYcfofxIixg+ZxsDEQyG0wm4wUgezfAdNAkvy6Lx96b4L\nptfNshO/fIKJnhvLqWovK01pjNfJsKa+S+3p+9XYbOpn5h0bYfDZJE9epoNukhWGpZKj4fn42zhw\nvQAAIABJREFUnzKWjIMDJsvUdTeEiSduRNXfpH0+n9fh4WEdHR3VG2+8UY8fP66jo6OVOK7zyMvX\ndYIQr8vzTWE86pk26Hh/VF2f+6LdmmPGPxlNOkNpBOO0yNi4Y+bX3JhSH/UMnT/JNEcjLdwgn595\nD52W30MkSZCktBRyDl8pLGmopd8ECZ/k0HfK7/QnAUg73sYmTm7j6ZEPDiD0eqpW3z+a6lJKgM3f\nCaw4YnLDS5pb7XKPRjxLQKV7+p2E2esSL1K4zb1Xp0l1u8fn5Xu/sz0pH8GPoxhe83IJVMyT5gLG\ngJy00BMUb9XeN954o1555ZXa39+vL3/5y/Xmm28Opzb60J8fl//Eg6rVuScdh8tRrI9CvX0EeTkm\nyidPm/3BsKfLHMvmNn7ijfLrvsfMSaeWfyr/xcXFEF7RPAvfIKVyLi8vh5dpK/SVQkRq/+bmZn3z\nN3/zDR4z3fnGIn27gCSPp+pmbF2McQ/NFUJ5fejnCp6sZLLyntxzaymUP98qM9XL5ySAaWUBJ2+8\nrhbtHmqh4LqykjYHRjfOeib1JcsioLlH2sqbgLaVRJM7CTTgaruDohtmB4CW10QD7BN6TG58SFML\nnPntoUDyxtvfddeHpUl3tN78zTffrLfeeqv29/eH89QF+DS8DAMyUd9Io8tDWi7o9Hk7FY4i78VX\nvZWJAO6AnvSPfUejldpF2SZv6cSx/ziSEKD7yaBcQuoToJSLrruOwT948OCG/DDd+cYiAhItGxun\nGWAyk0uWqm4u0eLKEAq8BNk7wEEhea6qR/dJS/KS9FwScAqFg6LXTSViHV6e7vl2Yjde7gE4D8Qb\nrldO9bAM5xUBVALPkRD57QajtfwtAQbbQqfAwUztSX2aAJttIL/9SFvnHWl1Y8HrpJEgpfK5gobl\nptEDyz89PV3ZDcp26KOzWS4uLuro6KgWi8WwYUj/2e/c2aiJQjkTSppgVB4/Epe6rBc3pBEeDQdx\ngJsQBYCike/LJTAqv+sc+44GS3wkuG9tbdXGxsYAvqrfy+HOZfHe9fb09HTFaJ2enq7wmm2RLCrW\n/+53v7sePXpU73vf+2os3fkql6qbw2+f7EggOfZf11woyHgHPoYfdE2JQ3gZBIE4jYR3ctX1MZ4E\noRSnT8YleRRuCBIgOg1evscDXelYJ/sheWW6TgViXhpVr7cFrK6QqU9TOE7JX5pCD4j1OAi3eNYy\n7myLeENwboXZyEsaDeZh3ycP3/nCMhkbFm1V16s+zs7O6vj4uJbL5bAc8fXXX6+Dg4NaLpcrb/Fx\nR0fyIEBVuMFXVxFkSSPXzEv/GFtX3ZRT1etyTHr42w0p6/N+InbQQHH+QuXz8C3nvcsJDajqq6ph\ndKS3Ssk4clGC+n86ndbOzk7t7e3Ve9/73nruuefqhRdeuCGDTHceQ3cFak10JOXz+yqn5S1RSCSM\nvN8q38Hdt+9zopHXk1cgegmOCUyrVr3ftJnDPW/nhf4nIPJ2qW0JdFl+8oBbZUlJWt62K3sr5OYA\n7MbBaWjV50Y75U0y2SrPrzvwu9zyeisunowAR0hjNNAjZL/TcF9dXdXZ2Vmdnp7Wcrms+XxeJycn\ntVgsbuwAdafDjQn7gEaHYM6+9Q8BfWtrawXMvd3uuNCwtwCdcpPi/jR2osN/kyeKHpBG5vF+8VG1\n2qF5Cd8PoDJleDY3N2s2m9Xu7m49ePCg9vb2amdn50Z9TM/EaYtV11aa/z2Wyg5kB9OrSd6ornus\nnRtdkqfqnrAmSah0rQ0/9AZa3njLUySN9Gy8jeSf06pERaQXIq/AjQqH2uqDtMGhqm5cT1u5xWMa\nAipFSyEc4HSd5Ts/uKbXy6JR9b5I/ZDA2j0/71uvL4Ewn+OLEdTH7qnR+Hu/O+/4DEe/lG8N9Y+O\njuro6KjeeuutAdQFXpQFttlHD5IntYOjoFab3biIVoYaqENKDHV5fNz54SGzlgxRLpVoIFQvDRj7\ny/t9NpsN5SqsRBkXkLvhYt2idzqd1qNHj+qFF16ovb29eve7310PHjx4+zH0ruveV1X/sKreU1VX\nVfUP+r7/2a7r3lVVv1xVL1bVS1X1ib7vD57m+VRVfbKqLqrqx/q+/3Qqmx2jIRcZnISDnqoLuTNH\n1/gcQZaKlADO69HEhK7rm2UqJUFKCg4+r9TnwMyhm57l7xZY8ePJDZ97CrxGEG61gzxMnp0Dpo9a\nmFpAT7qdT6zPDaz/9nKTYUypdT0Z2dvkqconR7r88ZqXmwyIG7G+74cDn05OTuro6KgODg6Gw7eW\ny+UKSCa50W8/YIpzEx42ouMjmgmQLh9Vq7Ks+2ni2UeT+q2Qm4dZ2Lfc/el6QIfAZcnb6HXrOkf/\nDLFoY5cbjq7rhonT7e3tms1m9e53v7teeOGF2t3dreeff752dnZqe3v7Bh+YbuOhX1TVf9r3/f/V\ndd2Dqvo/uq77dFX9h1X1mb7v/6uu6/5OVX2qqn6i67q/XFWfqKrvqqr3VdVnuq77S31AlBSnI9BS\ncclsdZQDOC1uAjEyP4FJywOicmgZE/M5rfzvwsJOd9ocdLzDPW7s9FIhXFEIhu55OY3kqZ6V0SNw\nuJKk+2nISa+F9TH5nEXitbfb2+b0jRmBlpEg7d6e1PfJ0SDt3g43gi6zbmy8/50+3qMTIO9cbzDS\nmS76TU809YP6qu/7lV2WvvnP+9755nJNefB+E+ASpF3GUjnkE0f+5JOWAWpS0o0MeevHfbjRJH/c\ngIhH2s5/dXU17D5lPaJzY2OjZrNZ7ezs1O7u7vCZzWa1tbV1Yzexp7WA3vf9q1X16tPfx13X/Yt6\nAtQ/WFXf//SxX6iq366qn6iqv1FVv9T3/UVVvdR13eeq6iNV9Ydetla5EJyp7GKeN5xgQK+66joM\ncHl5ubJipurmsZZo4w1hpuKys7XDLgkqt/cyrsd2JcXTtys1+ZC8pgTuKY/o07eGraLN43nKyzWx\n3hdukMiHxDsHRSqRt8+BMSlvywC6A6A+SOWwXem/g3163vvM28RRp8CwVS7LdJ1QPekAuNQPKkcx\n8/Pz85rP58PBW/poYo70qhxfgSZ6tWbaeUt+MD/boyMH2Hdp4xhDouxTl1PR6vWrXC535KhZxx8I\nXNWWrlt9xZzKbIWAvW8EyHqek570zBOgb25u1s7OTj3//PO1t7dX73rXu+rhw4e1u7tbe3t7K2vZ\nW+lriqF3XffBqvpwVf3vVfWevu9fe9oJr3Zdp+nXb62qP0C2V55eu5FaAv20rvgcO0/M8TAKhYxW\nnsLuwyqvMykdDYh7arrm8c4WEDsfEm9S2GHseedXCyySMXGv3SeYWp5hal+LNjcubhhSGckAOi3s\n+1a9yQixDuchFdbp9nxOm67dlj/raEpGeV1blAgmZ2dnKxOivmmoxQOnQw6A75T2fP6RXDF84m2h\nDvsyV9JBT71VjrzlNGrj25tInzuMY05AMuwCZucZ//s1GkCtbFF4ZTqdDh8/DC6lWwN69yTc8j/X\nk5j4cdd1Lp23l9aniZMiAkIRn2aj0ySQOowgX3UdF9c91aPkQ0mCP2frdU9re6tqRTDlSdATUPn8\nsMNJJzuXQ2TvfAcPb4P+MzTCtd/T6XRQbBpHtp/AJ8+OfFWZoluvNlPsT33mB5LpeXo0vruuBYbK\n63xiXzr/XHZ4jeXpOmmk95f6i7xKxpsAxuV/aWLO63CnQ9fo1TtAVtVKOEJJ/7XWeblcDu8EfeON\nN+rg4GDwljc2Ngbv0ePfjAlTH/SZzWa1ublZx8fHKzLN8GjXPdniLj7o42Cq9vrcTdXqy3DGwni6\nx990jlgn4+jEIu17EW+VT/UJC+gsikcCXe7h4FwBZUsALh3a3t6uhw8f1nPPPVd7e3v14MGD2tnZ\nqdlsduOogla6FaB3XbdZT8D8f+z7/tefXn6t67r39H3/Wtd1762qrz69/kpVvR/Z3/f02o30x3/8\nx8Pv97znPfUt3/ItNyb3aAk5OUcme2yaoMTOFNigXdGCShjlvSRhcK+DAuE0j4Gc05KAzPN7aIH5\nE9A5kAqoBfgUPJXN119VXYerOHGWJn6V2I9K9M7GvOHEC3/O66RcuNeu32ljE/ns9NII+CiQckf5\n8T5xmpPRIgi518eyXZa9HZR/ORpa0XJyclJvvvlmHR8f1/Hx8eCdt8JRkiWBPGkgsFEWlF+ykiZC\nnf5kzH3E7KE/0uj96bF4OklcXUJQd+Ph9Hl4kKBKz1kxbh4XQL1KdGvj0t7eXj333HO1u7tbjx49\nGtaf7+7u1uc///n6whe+sMLTVrqth/4/VNVn+77/b3DtN6rqh6vqp6vqh6rq13H9F7uu+5l6Emr5\n9qr6o1Tohz70oRUhcCWhNScz0sQFO4PP+scBhcxV8k4gkFKxU30pz1hHJKVN11K7UpJXlGirurnW\nNs3yuzDyeYImvRd6rvzP/kxg5gDmfGVbnXaW6c96Oek/6SLw0xglcGZdY7LAfA6GiV4v38tKZbd4\n56C+WCzq5ORk5dVxHlZLhjQ5PaxTwO994+Cq5zl6GmufnnUa+XwCdKfXecpltBrRMiWj7Y4h+Swv\nW/F6etItfpJ2hWhms1nt7e3V9vZ2bW1tDZ/pdFrf/d3fXR/+8Ier655EMH71V3+1Wuk2yxb/alX9\n+1X1f3dd93/Wk9DK360nQP4rXdd9sqq+WE9WtlTf95/tuu5XquqzVXVeVT/aN9CHjHIvwzshebjM\nTyFSPj6fvA96RiqPMTcOcVQHldLDKcpDL4NlE9zoISSgdCDQPYJIKz6o31yOmZSA7XAw1nPuvVdd\nn3in+8kDo9Flv7AM8tS9a79edb3ihTLgxjqBYgJOz8skGmicvX/Hwncul9q/wBATeZwU/urqamVn\nosr1tf5uTHSetlau6BVyWqaoiVHf3Zw8dV3ns+w3LQBwvtI4ynvVaHC5XK7wS/dPT08HuSA4U88U\nDqF8iU4PryTdIJ2UZ9cxlk8ZdZwieE8mk2FJIRcZMLTEkORkMqnZ/9vetcVIdl3Vtaunu6u7q6en\nLb+k8TgGBWeMhRQ+4h9HAiGILJASxAeKQBFBQkKyeAgQhJiPSAgp8EEifvjhIUXhESEkk/BFEkV8\ngMgDYhND7NhWZCBhPJ6Znn5VV78vH1Xr9qpV+1S1o5gal++WSlV1H+fus88+e6+9z+MuLmJ1dRXt\ndhvr6+u466670G63sba2hvn5eSwsLAzppkdwGZ1nlss/Ayjh/B8t3PNRAB+dVPbg2tRoZdcAo0jL\nUZ6ToxgvM0NEGY+lsr1Tnae8rIxxqPu85ej3pGtLz1Jjo23iaJ3H2dEUTbkB9xkb3nnOW+9xqDFD\njtm9WZmZDo3jKTOkmdwph9I885IulxByxlMGVDhWwimKzKFztouj1KwuWRRBUoc+jh8tKyvPEW9J\nh9XplBB4Fs3rMzNZj3PoWb0U/OjzstSjl6t6RoM+Pz+PxcVFtNvtoYFQ5swn5cszmupKUQ4eqLcj\n+lFEQEXNhMowJJt2l22apYJVQ5U1ir7TkCGVblTkXhwYnovqpIaO5IMtfHYpraQdnrLzlBSRgCJz\nlaPL2aMMzpvVdIyOWzAHT9SpiIholO2i7apy8FSPIiBF4lnKRiMa6o475wwAaJuroXEDQ95YR/LB\n+vGZeoxy0KmxHn1lYzDOo8+7zvRH9YXXsd1oxHd3d3FwcIBbt27hxo0b9fJ+DoqzLPLLQdGI/uZS\np6en9Y6L1EvmoJeXlzE/P49utzukf6rfEf0BU7YH0zyaVyfvx8fHQ9eyjXQ6pSJzj6AvXLhQ56I1\nevKUoOoa7YU6Jz3u+sPn8aOLgPhc3dtc8/7UEfYNbrjFqYlLS0tYX1+vkTmnPWaLoibRVA366uoq\nbt++XTde1glUCdRY6Xk3fjQmWUNSMNoR3Fj6+WzAR41h5iyosFlUwN/qoNTQsA6KiJ0/N14lY5bx\nxkGbLJzmbB2+SFhRnxtZ5fHo6KjeZY4y8zRKtihLjUuGSrMoSHff9LZzNOiI15FTCVlnMi7dn8mZ\nclJZUS7Km/4uzT1Xp++IWq/nQDYHQnu9HjY2NrCxsYHDw8PaQPs9mpaLCOzv79f8qHGjoybK95ld\nLvsMUJQGCF1mrr/876k+fR6BmbeTjgnxGI1maWsKlqN9WB0Iy2u320O2i+3FtlC5ckYQ0fj6+jou\nXbpU79ei0xNLQGuSYX/9mP67SEtLSyNe1I2wD4gAeZrGqYTMS8jNDVQJQTkPpf+ZIcr4Uf5L/JXI\nkes4mlRvVWTP+yvCUN55nCsQHdH7nhUlo1cy3trB/Hgmi0my8/aaROPaoGTE9D53tuchbx8tVzt4\nZtjZFoywdL65TlfVbzXm/Ci6zJwr2zart34ykJO1oeqMjjHoOZfRuH5FcpuijsQHRMf1iyy9onXw\nvqH9RZ06DbamWRYXF7G4uFgj/qxPah097ek0VYR++fJl7O3tYX9/v36fHjDa2Kenp+mgkoboFCaP\n6ZQ7R5ZEJqpsVGTNd2rDKIIl2tRwTYWvA2Y+q0Y7Ip/hoZ8qEuurfGlZigBYH6YJsoUZwFnoTHlo\n6M5rdY6uvhCB9yi6Z1tQPkyBkW/WWUNQDqjxutPT0zrs5vxoT09pe2lqo2RIPCWmGzy5Q83QuDqb\n7Ld2dt7Puis61ShI3/PqbUo94HVqNKlvmdNX3eWgJ18px/nhKjO2qTvYUr20f6g+sW31dXCaAvOo\nrtVqYWlpqY4UtH6a3tQ6kdSQ6XNVfzV9qlE/j1P2uj5D2zVLd6n+aAqXg7mK/ikH3RKBg70XLlzA\n6uoqFhYW6tWf3HCLA6BsY++zLotxNFWDfunSJXQ6HVRVVefLgFEEoYYkI/damVcHRtHGuOsy4j2e\na8+QfIZOtG56T1amU4b0JtUxq1+GZNy4+UcdFXnN0LCvICyVnf32OvjvktEtySJDcZl8Xw96znjz\nMh0NajuX+FejV+IrM+L8plHhviz60X1DtKwSCnbnqE7AQUmGJF3fSao7PgMl42GcTmTRySQd9vpl\nvPogrcvDx+48vaPghM+j/Gi0iciJypliIRjyNnfQkfV5paka9AcffBA7Ozu4fv16PQjnKJeeFxje\nXlINsu4JA2BEqNogjv60HCJvdSQ0ThqiKcpznvh8NrCGaXqMZfE+XS6sA22kUifhOf12xMVjeo5U\nUn4Nf1kuB9w8yqByHh4e1gqsg4msi5bHPDinZili0w6veXcf99D2oyxZJy2H1ymK4/V+jZNGAVpO\nyVHyHZFEbGosFJWrE/f8rbc3edDBQ9UVpla2trbq7XBv376Nbrdb8+PT6NjXVC9UV9mfaGxYDtvB\nZ3c4wnZDT6R6fHxcr7LMHBuP+3nKXfPeKnfWLwNq7pxKctY+SF3yNFcWSXsdld9Op4O5ubk6T764\nuIi1tbX6/9LSElqtsxkvmiUoOYdxNFWDfu+992J7exsXLlyo92RmOOakuVgN09iIFIS+eoukxpdz\ne9Uoadlq8LUzquIAGFIC7SRu2MmfzkihYnp9eA+VxtM/WWPy+WqIsxWR7rjUSOucfD5XQ2du/cmB\nUnWoLguG89xb2xGQG1934u6cNJ+r+53roLQ7ea23ys0dgz+L8tdv5Ul55v2eUlO5uwxZhgMS7bCs\noxp850/rdnR0hP39fezv76Pb7daGnAOhviEUy9QBbI8MVM+zNmF6QB0FZ8ao/NQBU07qDOnMPc3C\nY7o1gctJ+6im33TwU/nTPqDAwsGayoDARMEfja+mEdn/+Tk6OqrbcmlpCQsLC/ULKhYWFupvzpDh\ntZp3Vx7U3k2iqRp0Tqzf29tDp9MZmj8LjO6bAQznUUsG0SlDdY5O/P7smJflOd5xlPE1ydtm5Pd8\nJ2VkZWb1VUPo00L1HlVE7ZS8V52Nt5U/yymTvV/v7euOq1S/rEyPEEo8ud5Nagd/pj7Do43MOZTq\nrA6UbyDi9ERts1KUO453R+/j6qSgKZujrrLN6qV8utEtoWo/rve5TrgTZTmKuvUaOjRP7/AY66jT\nQFUeOsWR0yp19ae+KMfBSEnG56GpGvSVlRXcd999aLfb2Nvbw/Xr11FVVb1xkBoJRaKKfPhRpMxr\nMlStjQig7hCeGuExonb3zIoO1VCpsdeOo/+VF96j3xli0jL0ejew2qHc+QHD0YYOZPG5nmqhLDhV\njbJwlM/oh+Fwu92uw2N9pg9uk6eso/t/31uGPHt9FARoZ1Fj4Q5dowwaGB0MdoNCorwdvWtbOjok\nqtTBNF6rz1O+vOOTv8PDQ/R6Pezu7uL27du4ceNGbdxZpr4URg2E8+T6q+cVUOmguuo368M0Qq/X\nq9tXUbU6F5av2xFof1WDWzLulLu+yFnL57xxbjBGHZ6bm6uNbKvVH7Bl2/Ml2q7rlA0jkps3b9Z2\nQW1Vu92uJwgsLCzUe7NwN0Weo91yebsOKLgaR1M16BFR7zJ26dIlHB8f1wjDwyVVLCqpKqcbQTV8\nelyv97mpTuMQpPKTefGsjOy4hm6l6zw3OM5rZ3yUEKnmCdW4ZwZWzwH5HuOaCqGx0fnH+jytv/Pp\nxzJDWorQvN4l1OvP1A6UofZSFOjXj4tIlNyZZihwXL1ovDTlcnBwMMJ3Jrdx0YTf46CEzsj7mAIB\nLjzTttG2yp6vz/M8vOuj12OckVPddD1Qh8L/7Is0ttnskogYGfMj7xFR781Cw879WVimGnEdCHUb\nkOnsHW3QW60WlpeXceHCBVy5cgVra2u1t97e3ka3260H1ChYndSvqEGRuXtV/2aHUIOehU2uhJkS\nuZJm4Z4+W6/Rc45Y3bho+arwTiU+spDOOwv/z8/PD40laETEtqD81GAfHBxgfn4eVVXVb48nGmH5\nmiNmeVonN2Rad3UmHt1kxl7PqwxKaNTbRo2YtrvLUw2ClqED+942jvwV5VJODlL0Xs4z393dxebm\nJra3t+vIVpEz73UDrbJxY6n3ettraqLVatWRtL6fl9GBbjetvPt+4c6Hpjv40YWC2i6eM3fdYN/S\nMZesD2vOvqqqevk9XxvnjpL6rXXhatV2u13nyTudTp1LJ2rn+AVnuVAurluusxExNBswo6kbdA6w\n3H333eh0OrWhuHnzJq5fv17nA0mqABx4W1paSlGRd3Y9T0V1dKr3ZPdr51Old4OfoV0NPfU8/yvK\n4fX8rWF9FjmwHgwn9X41XBoRaJjrPBFJVFU1hCg0LaPXE5npoLYiFhorDq7qUm0NVQHUHYXnPCR1\nZ+ht4s6QH0/L6cfDXC3fnao/Q+91Y6F65YZW+4Fe52k5dWS6cIiDn6+99hq63S62t7fR6/WG2ljb\nSTfKoq5Qt9SY6P7rfC5TBG78SUw9Me3GclmGOnLmnVWfCQ7U+aneaXSg+uaRNuvr60e8bXTpPsvz\niN0jEV/FSX7Zj1ZWVmpUvrq6Wo8R0nhzEJRrMLJoRG2AA4cMsTtN1aCTWOGqqrC6uor19XWcnJxg\ne3t7aF9uVs5H3jPUmhl2/VbvrMe887JM7SSu1CpoNzjZf0eg2W9/dnbcy8/QKv/7sczglcpRR+a8\nqzFj56JB1o6i0+QyQ5rxltWFx7LzzqeX4/Usle/HJ8lT20h50OfRsIyjTE+8fjoIyvw5XyPHfgSM\nOhS9vyRf1zV12DplTo0/MJom4bNUn7L/2cK3DHDps0ttqPYAwAhw8uM6hpb1O3XA2fGsXWngfQ8j\nB3JZf8z00PXnPDR1g87KcS7z/fffj9PTU3Q6HXS73RpJZnOZdYDNywPOUG1JcVyp1XF4GoLlckqS\nK7ij4XEfkip4qUE9PPY6eLjviM6PlRyYdnR+PEJQ463PV+RJtKJvbtEIS1cRAhiSZ2bo1WFrm2RO\nJzPgivCza8a1BcvInIaibh8UBYZX8fK/HishL41EPD2g00d3dnaws7OD27dvY2dnp0bFHKzTFMPC\nwkLtCBSZZ+k2jRIU1av8WI4aXZ94wDZWp8566ypNNXh6Dw0jeVDQoOWxHloej9G5kS+NNNh2OvCp\n/I9LAboT4KAqbRj3Ndd90pmJUCfD8vgMtqH2AT5L013jaOopF51n3Gr1c+rr6+totVq466676jCe\nA6XA6AwR5uW8A/jiiVbrbCSbAuNxXq8oyj2kNqw6FjV8jij0eld6/ua3rspz5Oo8qNHNULOWTYXI\nlITXqWFWPhiW0hDoZk5Ehdpx1bDT2DHcBs6crHZO5Z3Xq2GgomuomjlHr7sbF9UFlZW2nfKl6Evl\nzPs0peHGmfeqnD394Zs6AcOLW/Sb6ard3V30er367UPchEvz2Brqc4COxuv4+LjeidH1WFNf+hpI\nLgbyOqjh060cmP/f399Pp+d5TlrTngQO6mx0wRllwfZUvXV5qTP2VAYNri9Sos4QLLJe2sZ0DCQa\na5bJHLqWwWtUV1mmPreqzlJSTK/RObFdxtHUDXqGmjjN5+LFi+j1etja2hrK9/I6FYai9Ow8MDwY\nBQwPZmVoSY2E5/KcsjBKyyjdx3tLx93wj7vezytaccem/GmEUaIM2fO/ohr9qOHLDLCPGXiEoM91\nXhTpqIxLssjQu19XkrHz40Zb+VS983JKdXAe1bDxGB0oV4Xq24fUeEWc5Wsd1ZIfnb2RRbdaB+0D\nWftoXVxeWaok65ee69c8uZfrMszsBzA6+Kt1y8CUf0p9nYZW36PL37qMXz8Z6MraX/+ro3W+xtFU\nDbqiYUWvOvNlZWWlDjP39vaGNvTxXJ4qhs4S0OX8unRdOwsVieQGiqhqcXGx2BAeugFnhssHmvQ5\npbSK5zN5jYb7POa8kBTVKArk/UQ7ulAie6Z2GjfEalg5KMtrtHw1Ctp2Xr7y6h3VjR6vz+TCTqn8\nehRXkhtJ+VV0SX3Q+eR8pnfYjEoGT8smiDk6Oqr1f2trC7u7u7h582Zt2BXFRQTa7TY6nQ4iAgcH\nB9ja2hp6DtE302EuL0+T0IiVdMHrynNM/7hT13QQ/yto8r6iKRV3Mg4W1KlR53RGkfd+ZbMZAAAR\nWElEQVR91V/Nc2ta1aOAw8PDWob6QgruHquGnfaipE+6ylX5538dhFUnVaI7IoeugypU6Pn5eSwv\nL+P4+Bhra2vY2NioFVsFwntYFskRO89nysgyMjSo+fNxdRiHsrWMDCG4QXZ+vBNp3nOc4dP6amRT\nGnNwnifVRVG9lumIjuQ5f4/QxqEx58PrXeLVIzovP0Ngk9C1Gwbnye/1sYcSD14vRWs6EMrUF9MF\nPp6zv7+Pzc3NOqXJeeHKr7aFk/Lr9zlfyn9JFhm5bigi1qjHwZsb9Ux+486zXE/3kQfljSlDlsnz\nnKFFnn1VqJ5TynTW+7m2i45haLnjaKoGXZVDGWUF1tfXsby8XL9tpdVq1QspfEWdIy9tDFVQRWq6\nvwb5Ud54jrlQ3q8oZhKS1pAr+/iz3AirsdPOmJVBXnzgjQhLz7tzU5757Xlj1k/r74iY/FG2i4uL\nIyhPO67y4x2O/7PQOdMh51Pb3vlTmes93tHVeGu5Gh1425cMt59TRA5gBBWyjajzOzs72NzcxN7e\n3lC6hXuiMBo6Pj5Gr9dLZVWi0nWa9/ZrGRm43vJ+6mGGolWv3Zi7Qac8OI7jbaO/9XnaT9w5cPxH\nNwpzMKXPjzibzqk5cS4c0tWhXFTkdkH1Q3VR25l5cwD1hl0+QDyOpmrQdTDNO0Or1UKn08Hy8jIe\neughHB0d1cZ9f39/qLFoFBRx0PCqMqrxd09HYfFaNqiHg9oAmcfVjqGKqkrsDsQVKVPO7IXBej87\nAQ2380iZaCfTvT5KDoflMDxniO6ycQdDJLm8vDwUsvJZHFylzNWZqANSZ+poX2WYoXF3XFq+Xqe8\nZ8jN5anOic/mMR9U9/Z1OfG4ojHew+1vadC3trawsbGBXq+HnZ2dujwaAT5f9b7VOlumrukVPp8G\nVBdBlVBuSUbeF/xFzt5u2SCx6xsNKEHR/Px8nXp1fhQcsN21/6rcqfua3iHfHNDNtvjgM8nP0tJS\nPVbBxXOcZ84dMckXy/GN/1QP+KYpBUdceETkr+1Woqka9BLS1E7KPFWn08HFixexvLyM3d3doQ6l\n3tjDFyUqI89nwvHOrOU5fxmV0KKez56pysbrSgZfyZ2TO4MMNWZ11JkH7JREfOoEaTiVFzWeGhbS\nqBBVcaCOfHsOmeTIyo/rMzK0nRlkXusy1/8ehfi5khFzWbrMvR5e3wwZsg10Owy+BMbbSsEMv1W2\nel7lr0BAoxCVZaZ7bBvOfJmk33q/gis97lGQEiM+d+L6e1wuXw2z9gsCQdUnR8EKsuhgdB9zfkob\nbmX6rf9VLiobB4MZEMhoqgbdQ3DvxKxUp9PBPffcg/n5eVy7dq1e9kzEwT0sFNGq8QbOOhXPK5LS\nAS83qNqgmVHX866QWfg+qZFVubMOlRkeRaeaDlFZcIBIZUIlZqhHnon2/A3xPuUwy5truottzGXS\n+iJglZPLQe8nyuQztc5aBuujRkPbhik2bzdNuWg6zufLO9+abiiRGzrVb0frqq9E09yfZXNzc2iK\nor9Ozl8Hp0abbaByUBlSd9QYuZ5mqDwihtJpdNqZvrJOp6enQwOK6jTUsagesAx9x62DLF7L56u+\nqOwzpK1AhOd8TYQ6MKZBmFbhjDyd9eJp1qyvqIzY3nQumZOYpGukiVdExAMR8YWI+M+IeC4ifnlw\n/CMR8a2I+Org84Tc8+GIeCkino+I95TKpuHwF9VqZdnBOLfz4sWLWFlZqcOckhcrodvShx5Sl/+6\nh5/00eec59latjZ2hghLz1H+3fHoteokSnVytJfJMkMK2omooCwnW7CkDiOrK3nOBsB4LuNzkrwz\np5vpm8qzVO+SrmWGW59dMuZ+3gdBfUC0dN+kY9nYiBt4YBTx6nnVFY0Y3Nhmr7nz52YfbXdGeJpm\nzPpQqX9p22ZyUH31tldno4jc0bkbcP/t8taP9hUdWM22PTgPnQehHwP49aqqno2IDoB/i4jPDc59\nrKqqj+nFEfEIgJ8G8AiABwB8PiK+r/Iei7MXVGhuVRuHAm+1WlhZWcHc3ByuXLmCiEC328WtW7fQ\n6/Wwvb1dN7oPxHiOmELlCkU9RtTlgtdOTF7Jt1/rOWUe4/283mQ29D97Nq+jwuu1PjKuZei9WVjr\nxtmf5Twp/yxPF02ogjJ81Z3ryB+RLxGbDoS5Y8/QuMvRDYHWjzLK6umpOuWhZHzcgLs8XG48xxAf\nwFBYr2XTgHEREacq6stf9BmKnDO9Ul4z45CF9uRR02Kqx9q3sm/XSW0bGkjVR19BqcZbF0xl6T+9\nx9tUoy/+18icdVQboe1DXn3hEFeE6jFPIWobeJrS+wHlwudwoNYBK8sbRxMNelVVrwJ4dfB7NyKe\nB3CZ/Ca3vA/Ap6qqOgbwSkS8BOAxAF/KyldlcBSkAuFUoStXrmB5eRm9Xg/Xrl3Dzs4Obty4gZ2d\nnRrJMOfGj4ftaniB4dyrDti4EgPDc2szr6t885x+Zwa0ZAQctZTQlT5P+fEO5XVV48dvfaOT10M7\nGpWdbcOXIyvq1ghsbm4uffkzB7mqqqrnx2unZznA8Au3FUkqKRBQ2fnYSeagvD091eMIz9vPDaie\nz1IM7nyYkuACIi7vv3XrVp1i5PoKdZyqG2qYnP9MZ2g4VB9Ylg+QaiqLpGk2X+GsS+xJfJ4adTda\ndHwEaZoq4/W6eZy2lSNrbR+CC+oi9xzyKIN10P3MuY+5o3MaX9UJ1TFP4bqc+E1jXkLl7shL9Lpy\n6BHxEIB3om+c3w3glyLiAwD+FcBvVFW1hb6x/xe57ds4cwBeXs1sds4909zcHJaXl3FycoKlpSWc\nnp5iZWWlNipcaHF0dIS9vb0R1OapFJ3Kx2f5pvYaJYwLnbwcfY4bA96fGQKViyNx/7gClcrJeMl+\n+zG91+ugz6MR4v2+Wu7kpP8CY07BouwZZtIg6Lth1Ri7s8yc5zhFL8k+uy67T0nlmMnGnWRJdplD\nYWfXFaEHBwd13lydn+ue189z3s67OpdS3dWJax6+1Ae0PnyGR8fKqy6aIWnE5Hqr9cjOl56j/9Vg\nu8F3GUXEyEucfV64P6dUntZBwZXe7+WV7MY4OrdBj3665W8B/GrVR+p/DOB3q6qqIuL3APwhgF84\nb3kA8NRTT+Hy5cuoqgrvete78Nhjj414WhIbfXV1FUtLSzg5OcHa2hoODg5w99134+bNm+h2u7h5\n8yb29vbw6quv1mgxiwKA4TnEDK00rBvUe+jbqaR0ifxQVRVefPFFXL16tVhWZmTHKa2WD4y+ts9R\nh+cOnb/MWGpuVTsrr2VUxLEOzsVttVr1oCgXuhCJE9noniEaHXnKw1MLnsYi/97xSUTELi93FCrn\nrH2yZ/iAlcrxpZdewjve8Y76nMqRxLpRVpxzzo23tra2aqOaGV5tJ40os3QKefC9ypV31QUdU9L7\n+Qz+V72LCOzu7uLSpUtDxsr1W1G6b0Wg7U3H7m3jjoW8eDSufaMEbByw0WgvLi4O6TQBB5G+yvbF\nF1/Eww8/POSkPIL0Yzq/3FNP7BfPP/88XnjhhZFyMjqXQY+IC+gb809WVfXpgRBvyCV/AuDvB7+/\nDeCKnHtgcGyELl++jCeffLIYTnjn0YpSgTjSHhFYWVnB6Wl/xP7WrVsARufC+vJ+ntPGAc6Wn+s1\nWaqF5WT8ZkiCHdwNaKlMVzoi3nHoRXksGScnRS/OU2lgRo27XkdFZMrl5OQEm5ubuOeee0b2WHe5\naud2Q+nP9WNunNTAjHO8ej5rv5IT9bLdqEcEXn75ZVy9ejVtZzc0OsjIKHNvb6+OftwxqxHP+Ke+\nZAhYF3XpmIVeo/LS9AaPKS/eN7rdbv3CGvKideVvH8eKGH5pRdb2Wp4+W0nTg1n6IstNe15e0zs6\nM8f3aNG+/fDDD4/wrM9VoKXtoM5N2+zk5ASPPPIIHn300bqcp59+uviM8yL0Pwfw9aqq/ogHIuL+\nqp9fB4CfAvAfg9+fAfCXEfFx9FMtbwfw5VIFB2WlFedvT4GoYPl6p5WVFRweHmJlZQVbW1t1SM9V\npvocdh5XDh/I4bUlw+YIUvO0itxK9/i33qednc/WDq2oJevM0k5Diy5U0d0Iej2UvA5uyHmM6RXu\njqm8cvUiB6RVvlV1tssc0Rw7kw4csd1Lhk1l5vL02SEu95KDHuds1RGqzF1f9VqVHfml3HZ2drC3\nt4eNjY16IFTbUY2/11uNhfPC5+quiNk1qkeZzmZOz/PPPM59kzTaVQPuYI33+4I3BXDZ6m/gLAJT\nHdCFi9qeaoh9XIZAg1MT/eXOjOSzeqh81LZ4n+Nx5ueZziFfvvDqvKAMOIdBj4jHAfwsgOci4hkA\nFYCnAPxMRLwTwCmAVwD84qBSX4+IvwHwdQBHAJ6sMleLyfkgNzYuEFaUc0IptHa7ja2tLWxubg4p\nHNEtUWNmPLm5lA4+abiYIcYS36o8/O/GweuXlUU+HQV5Z/ZnkNipfEpm6ZklckSjhoLHdEriyclJ\n3TZAH23ohlO+fJ6pm3a7PVT/LOR3Pijr8+hUqSPqNVrnSfJwY6b8KH+OevVZ1L1er4dut1sP8meA\nhjKe1Mm9ndVY+KvqqEsamSp/Gfhie2c6xfpwcyrer5FwBnR0kNV1OksXsk46FqM64zwTEPAadwQ0\ntNniIRpeOkXKJWuHUjvzN/nQcv36LJKYRPF6OvR3kyJiOg9uqKGGGnqTU1VVqYWfmkFvqKGGGmro\nu0uT8wcNNdRQQw29Kagx6A011FBDM0JTMegR8UREvBARL0bEh6bBwxtFEfFnEXE9Ir4mx9Yj4rMR\n8Y2I+IeIWJNz59r35k6mGN3v51cGx2e93osR8aWIeGZQ748Mjs90vUkR0Yr+Pk6fGfyf+XpHxCsR\n8e+DNv/y4NidU28fnX6jP+g7kZcBvA3APIBnAVz9/+bjDazfu9FfTfs1OfYHAH5r8PtDAH5/8Pv7\nATyD/myjhwZyiWnX4Tuo8/0A3jn43QHwDQBXZ73eg7osD77nAHwR/W0uZr7eg/r8GoC/APCZwf+Z\nrzeAbwJYt2N3TL2ngdAfA/BSVVX/VVXVEYBPob//y0xQVVX/BOC2HX4fgE8Mfn8CwE8Ofr8Xg31v\nqqp6BQD3vXlTUVVVr1ZV9ezg9y6A59FfUDbT9QaAqqr2Bj8X0e+4Fd4C9Y6IBwD8OIA/lcMzX28A\ngdHMxh1T72kY9MsA/kf+fwuFvV5miO6tquo6UG92du/guMuiuO/Nm4Ui4iH0I5QvArhv1us9SDs8\ng/4Gdp+rquoreAvUG8DHAfwm+g6M9FaodwXgcxHxlYjgVid3TL2n/pLotyjN5FzRGN3vx+s5c/Wu\nquoUwA9GxEUAT0fEoxit50zVOyJ+AsD1qr+l9g+PuXSm6j2gx6uquhYR9wD4bER8A3dQe08DoX8b\nwIPyv7jXywzR9Yi4D+hvmQDgtcHxc+97c6dTJPv94C1Qb1JVVdsA/hHAE5j9ej8O4L0R8U0Afw3g\nRyLikwBenfF6o6qqa4PvGwD+Dv0Uyh3T3tMw6F8B8PaIeFtELAB4P/r7v8wSxeBD+gyADw5+/xyA\nT8vx90fEQkR8D8bse/MmoJH9fjDj9Y6IuzmjISKWAPwY+uMHM13vqqqeqqrqwaqqvhf9/vuFqqo+\ngP4GfR8cXDZz9Y6I5UEUiohYAfAeAM/hTmrvKY0UP4H+TIiXAPz2tEas36C6/RWA/wVwAOC/Afw8\ngHUAnx/U+bMALsn1H0Z/9Pt5AO+ZNv/fYZ0fB3CC/oylZwB8ddDGd814vX9gUNdnAXwNwO8Mjs90\nvU0GP4SzWS4zXW8A3yM6/hxt151U72bpf0MNNdTQjFCzUrShhhpqaEaoMegNNdRQQzNCjUFvqKGG\nGpoRagx6Qw011NCMUGPQG2qooYZmhBqD3lBDDTU0I9QY9IYaaqihGaHGoDfUUEMNzQj9H7b50VnR\nNeivAAAAAElFTkSuQmCC\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "img2 = transforms.Compose([\n", - " transforms.ToPILImage(),\n", - " transforms.Scale(256),\n", - " transforms.ToTensor(),\n", - "])(img)\n", - "print(img2.size())\n", - "show(img2)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Files already downloaded and verified\n" - ] - } - ], - "source": [ - "import torch\n", - "import torchvision.datasets as dset\n", - "import torchvision.transforms as transforms\n", - "cifar = dset.CIFAR10(root=\"abc/def/ghi\", download=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "trans = transforms.Compose([\n", - " transforms.RandomCrop(32, padding=4),\n", - " transforms.RandomHorizontalFlip(),\n", - " transforms.ToTensor(),\n", - " # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))\n", - " ])" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "import torchvision.utils as tutils" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(0.3371489570090489, 0.24515368371385993, 0.0, 1.0)\n", - "(0.44256409261470253, 0.2971765334316165, 0.0, 0.9960784316062927)\n", - "(0.4061938378436025, 0.32892546338681194, 0.0, 1.0)\n", - "(0.2704159075874486, 0.18337201969836966, 0.0, 0.9176470637321472)\n", - "(0.34992724462032737, 0.2732488478952251, 0.0, 0.9960784316062927)\n", - "(0.3060087387730164, 0.25710693466354395, 0.0, 0.9725490212440491)\n", - "(0.41604116667743557, 0.2388433838705675, 0.0, 0.9764705896377563)\n", - "(0.4606604996988608, 0.24625605326498523, 0.0, 0.9725490212440491)\n", - "(0.4938623460972546, 0.3129965597088279, 0.0, 0.9882352948188782)\n", - "(0.2621004459118315, 0.2239845061390575, 0.0, 0.8549019694328308)\n", - "(0.26454759721430793, 0.11071022852775213, 0.0, 0.5098039507865906)\n", - "(0.4611264388361936, 0.32001783467012906, 0.0, 0.9960784316062927)\n", - "(0.4666066774840753, 0.30674951653607474, 0.0, 0.9843137264251709)\n", - "(0.21249872842918194, 0.2636358923863605, 0.0, 0.9372549057006836)\n", - "(0.2946678490996722, 0.21798154353121305, 0.0, 1.0)\n", - "(0.4658573437985372, 0.28209593857100396, 0.0, 1.0)\n", - "(0.5015995290223145, 0.31443273237117386, 0.0, 1.0)\n", - "(0.3317019086171058, 0.19920514503802628, 0.0, 0.8823529481887817)\n", - "(0.3885838012647582, 0.27673680696400277, 0.0, 0.9254902005195618)\n", - "(0.38839997841690393, 0.22913308841635177, 0.0, 0.9490196108818054)\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAW0AAAB0CAYAAABOr2PFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzsvVmsZdd55/dbez7zOffec8e6NQ8sjpIo0pKs0bZkeYrd\ntmPE6XTSDpAAAYIOEgToDvJAOQiCTpCHAP3UjW50HGRyp5PAhhPbktuaLGqiRHGuKtZ45+HM5+x5\n77XysNa9RdJFiyXJciu4f4Bkcdc+e6/xW9/3/4YtlFKc4AQnOMEJfjJg/U034AQnOMEJTvDecSK0\nT3CCE5zgJwgnQvsEJzjBCX6CcCK0T3CCE5zgJwgnQvsEJzjBCX6CcCK0T3CCE5zgJwg/lNAWQnxW\nCHFNCHFDCPH3f1SNOsEJTnCCEzwY4geN0xZCWMAN4GeBHeDbwL+llLr2o2veCU5wghOc4K34YTTt\nZ4E3lVL3lFI58L8Dv/qjadYJTnCCE5zgQfhhhPYasPmW/98y105wghOc4AR/TXD+ul8ghDjJkz/B\nCU5wgh8ASinxzms/jKa9DZx+y/+fMtf+Ej7xiU/w3HPP8dxzz/HFL34RpdT/7/957rnn/sbbcNLv\nkz6f9Psnp89f/OIXj+Xkc889966C94dxRNrAdbQjchf4FvDbSqk33nGfeu655/jd3/3dH+g9Py4c\njYMQf+lg+9cK73W+CqmwlMSSGQCvf+vrvPj8V1nsNIiyAoD6yik+/gu/gu1XsYT9I2nf0fgdtfPf\n/a0P6/bkgrKA7kIbKRMADg77NJotkizncL8HwNLSElEYMR6PEaU2BNdWVxgOh0ilkI5kb2cPgIrv\ncu7iBfYO++xu7+v3CnAcB8u2qFWqAAyHI5SSXL87/kvt/Umb9//lP/87ALzcSxjIVeadIQ13AsCl\ny4/wvmc+wc2bt/jOd14AYBRPWVrp8tj503h+DYCNvSHXbm1w4+ZNJqMxs8kUgDIvwBUUSMqyBMC2\nbYS0ubLQ4udPPQrAd/wWxaTPV1/8ClfPXgBgvjrPP/38Hx2385f/0w/odpeCTqdLZ65GUNVrbH5p\nkWbT5vqLA+58R7/n07/+iyw9WVBXd5lOJADDMOaRqzXqrYy9vX0ODg8BmMqIYbYDIuLU/EUAquVZ\n7rzs8Y0vvonj6Gdefv8yIRG1ap3zl+YAGIxvcvf2If/4P/w28BMx73+pgT8wPaKUKoUQ/zHwebTG\n/s/eKbBP8DcDpRQoBSgGBwcAXHvpRWaDHjLLmCZakPup5PFBn6XVCkqpv5YFXKu5AMRhSX88Za+I\nabQCABrNKmka4tgOC/MtABxbsbDQotmoIHJtCEpZsLa8SJqlZGQsdR8BIAxDKhWX+VaTeqAF0n7v\nkFanTZKmLM7PA+C6NkmcAH9ZaP+kIXZWAFha7/Dyt3cYOYqWp4XuBz65zuknn+brr73J67c2APjw\nRz7EL/7yz7Nz+xZzS8sAfOvGl+mlORevXmFna5ONe1pIpmlOnMcUSIKqniPXcXFnJb5Q1Cr6Posp\nVzoV3vAsVqpaARBi8LZ22rYWLUmaMgvH+BVFLvUz43yXldUG1aCLtajn6N7c+9kb93kiKKjYQwBk\nlnO4n6KERb26QD6n+5n0Bzh5E8Qa117V73fkIVY+z1y3xdKiXkvtrmIy3CSzJtzdCAHo92O2Nsrj\ndj75kacA8DyPOEnI8gJh6cPFtmxc18W1XeJhDMBkOOT85QuUFNQbes215uepOwF797YZTqfUl3Sf\nvE6FQb/HfLtDu94AYHd3j1lWMh0nTMe6P5ZVcPbCKkrB5vVdAMqoZOP2qw9cAz8Up62U+hPgyve7\n75Of/CRnLd2Yiu+R5wlhGDKdzXQDy5KSHM/3wGh849GEaqWOzdGGgyAI8H0fabsMR2M8RwuZuU4b\nyoIijcnKHIDeaMbhIGSjHzHI9H2ptGi35/jaV778w3T7X38IgW0JosmUV17QGsXuxj2yaMaNe/v0\npnrc1y+eZ3tri8WVFfS5+6MX2gf7+tDwvTpCgFSKg0OtVc/PdbCEZDYZ4Vf0BphMR/iej5IKmZjN\npaBeC3AdQVlIHFcL80rVp987YK65wFyzam6VzC12iZKYdKo3arVSoV6roX3l7w3/7A9eRUe1giPE\n8cjYtl6flmUhzPU3Xvo6jz/1ISxLgNLa29sOQAW2tHFcAUKvT2nr37pKIAotDBGC3ILShoLSvFti\nlenxo/qVswB05+e5cCbgzc0bpCP992FhEUqbm7t9ZubAG4UpO/sD9nb7HI604Ll5+y7PfuKTdKvw\n/+zcxbP0u/xGlXrpsn56jfMXtQa7f3DAi9/8NqEXUQ30YX9uNOWnWqfILl6msaI12NvTtwtt16ro\n9jcCClmQ5DleVV/LEkVvx6HGIpanBdwX/nQHSwiW/pbLWk0L7UYTZnGKGENSjins5GgSWGqtsLMl\n6G/qg3hxvsMs7lFZyFi8uKjf48xoW3MM90aMQv3McJZTpO5xOz/9a8+Y+RTkRcE0CUlS3c94kmAr\njzIpyQbamnHted7/7OPkqqDZ1oLY9318yyN9cpVZnLI3HAFw6942FUux1u3QaeqDpLe9TTqMcHGp\n+3o8pAops4yDXshsqvvole/OXP+1OyJBC+3/5p//lwCoMkfJnCgKj09j3/eIi4TM95Fmn/Z7fcaW\nRzWokaZ6UZaywHEskkJQlArX1h0b7/lUAw+hMoStN4tIJdHBAcO9KXf7eiAGCbQXFh6q7f/kD77A\n1rXvAHB45w3K0mHp9COcvnAVgM7yaYKKw43XnufezZcByKcz7NKh2WnhBFWe/emPA3Dx8iMk4wGv\nvfoi0tAWWZ7w+muvMBn1SDPdzzyzGfQjZlFCYTZstztHZ65+v2FKb3QlLIQCpKYGAKRQoAo2rr3K\ntW9/C4A4DNk66HPtzgG5eUR7dZW8yJGywLI8eAvzopBaWzeCSyAeINPVW/774EWWRFoTajeqWDgU\nSqEs/bsoTglcC99zkYWeeFmU5GQoKXGMgHQdl/7oECEEjm0x6GsBMZ5NaQQVkjAkRs+xY1sMR32K\nsiQPI906pQgqwQPb924oyxLLtLMUAgvxNmpKSmkENzzyxDMUstT3mLES6j6lYVk2JYLBaIxUej7r\nlTo2FpMkJQj05rUrHgqBlIrSCP0sy8jD6fF7v/31rwCwtHSBXBUstnL+3n/0D/S1c2sc9A/Z3tgi\nKfR7rl97DX8y5FzVJzcKzWe7beo332Sx7fAz2ZSio8fG9upUWx0W1k5R7ep98noWMqkq5iU0LQ+A\njy53uP3Sq1xtd5ja+rCdZffbCOBaeq1mMqJarWDZ4mgp4dgeo8OEN+59i71eE4Abm6/iNS5w65kR\nXc3C0Jq3yIDcitkf79CfasWvIerY2YzJMOXseheAy5dXcasNwmzK7oF2rb3y6g2ErFFOAtaWlgAY\nHd7DF/fXwsc+8xigD1qpFJnKkbmeNxmBnbnIqETlRpgGDl7Nx615WJ5en0WeI0sQsoPn17hzd0f/\n3gGk4PzZs7SMpp2GEenkJlFUUhiaslYPWOwscLCXYJtN7LnvLrRP0thPcIITnOAnCD8WTRugUtPm\n62w6psTCrdaPzc+gXoMsQCl1rFVXq218L8B1XGoNc2rnMY4rSPoRrusf0yOFkkyjlMB1qPj6FK3Y\nisXuArmyMMYnspdQcQSjh2j3ZDhgvq1NQNVdQjlNVk6fp5Raa7FkhIwKkmEfZWictYVFTq9fZP3i\nGVbXTrG4qE951/Up2lXWTy1TFFrTTpKY0XBGrzfA8YwGIGw68z5BLWY80WadHzhIVRy360jpkygs\nKRClQpnoSunCcPserz3/ZeJ9rZ30o4RXNw8YR+mxll5tNDl99ixGj77/bBRKlSglEcqYksL86y03\nirdp2g9Gmek5TqMUx7VxXJek0P3Is5TAtui0Oihlm/FIiKIIN/BJUqPdeDYil2RZxmwcg6/b5PkB\nNjaB41JI3YZcKSZhiO06OI6xEoR4S1vfO44oDktYCN7uBD7y+AshkOL4B2b0NH1iWfr9cZJw89Y9\nbt95E8/Tz2h4DZAWcZ5SNevbch08x0UoSKXeB7PpkDIJ+e1f0Q7dKNJWRmatsFwPWGytcfFx7Qy0\nGwG7b9ygq2Jqxq/cDUOe2u+zXs6gabhax8LqhdiOz6eEj23GSZaCfDqjfOMa9vVbALRkyeXWKnmz\nxuKS1mrv7e0SqpBLzdNM0M/s2u+wZIy2agOzwQjbs6gFWqv2PJul9RlekDG3bqyRNty6PUTmFn4g\nzX09pBxTFgmdZkKR6z3XP7xHPLWo1pdZPLMKQC++yf7dTabThPFYW23hsMR1Cup1RX9iuGKZMx3e\n30e1lnU8x5ZlIZSPr7RF4cSC+CACHKSx+oRv4TerEFi4NV9fswVJVpJmYBOwsqrHKQkFu1t7vP7y\nywSevne4PyA8PCSOJMKMe7XdRGTQ8QOCjrZcbLuEOzwQPzahnc608LGkBEuQ5Tm+EbBlmePZNnma\nYpkNXQ8qRGFIgaIS6E1a8S0ajSrjcUQhcxo1bXIUWcJkFJKMIY20SWxbEldKlhoujtMBIEwPyUXB\nQyHPyVK9WKIo4+zlNWZhSGbMpbmFFo5rcenSZT7yoQ8CsLZ0ilarS+6UVAMfx+x1URTE4Yw0z6ma\nyIZOe5EL5x/ljTeuH/OdaRrRanZwPRhPTFQEGVLeFxoFR5tCoIRCWtrxCJBOZrzy5a9x7/qbzIzT\n8ebGHoNBRJaXnD13BoCPfvyTdDoLIGxj1h9BoZQWkq5ZWLawwXqHeD/mbf8KLlweCeOMquVSYh0L\n6Farg0uJKiGoaIqgLBWBD5ZtUTecYRiFOLZLc67FeHeAb4RcmKXUHY/l+UW2dnVESVmUtNstsqIg\n8IzQjOJjAfpecUR/mNG4f0q+BUopSgnKMg46ZaItLEEYRfR6mru/fecOGzv7CEtSwzhmE0mSlsSq\nIDzUAsVWgkA42ApK88w8CwneEtjzqc98GoBap8mi6zBX9UgTvbc4mBLcvcvPrS5hD7XfwlMZNSvD\nLyX21PDkvkA0bNxpjpKS0vQtp6AMQ3wp8cz7Kpag4QdEQpEfjfHBIZdPncIOx9SmeoweMU65+wOo\n3+U5FuNZhHBsZlW9NxeXG5y9WGF13WN3T7dz+eI8h/9bnckQCqVz9qazPrgWrl2h7TfxO0b5UbvE\njYIstbm3dx2A8XRIfweKpA6llivZrGRhtUVQE+wb34rl+Ah5v5mucTpKpVBFiRyVDA76+towpndj\nkwouXkM/s7nYYe7sGp5Vod7UffYaFVIliFJBf3fMwXVNz7z+1ZfZ3dqjt3+AYxTUMitIpxlpaqEc\nveZHlmBwOKTi2Jxfa+s5aghe/C4PxPcV2kKIfwb8MrCvlHrSXOsAvw+cAe4Cv6WU+itd86OJdgpZ\nCDzXoRb4eK4+eaMioVILKLMEabTAKC1JZcEsnlBN9BJqVKvESU6e5chS4iizUBwHHEHFc4+1b6EU\ntm1Tr3gUlhaG3bpPjPNuB9gDUSQxwvCtvldh3Osxv3yK049pR83i+iqu60GRkxdakF/b7RPdPiS3\nMq6/8hLPXNUk3ceffQalFJPJmI17mvfy3ADPa7LQXWNj8019Lagyi0Mmk552XgHNZpU4jo7bNQi1\nQJirtsiUIkdSNdN5+8WXuf7VF5iNE94c6AW4cTiiTMCvBPzSr/wKAB/56Y8hcZDKRvvQjKauJGmW\nce2NN6gYB+Gli5dwcLR2eSSkj4T22yX+22CjD8lTy4tkWcLhKKLIzOFERlBvIFNJYgRPnhYoS2kt\nW+j+BLbDaDTh9PISa/Um00hv9FGkkAX4VcHakUNse8gsKlC2xf5Yr7kwDB86MkZKdXxIFkJqnlqI\nY+FtC47HQprOZ0XB6PCQw16fza0tRhO9JYqixMImqFSYZfoQLYuEWZSSqvKY4+9Ua3jYBNhk6Ptc\np4L1Fgsr6hnLS1mkFejvDQgjPceT126QjGa0HIf6glZUnLaDF3jg1PHu6d8GAqZtSTq9pa0QWx8k\neZ6jSoGrOPYRpK6FFIqiPyU3wqxSCZiJGXI8wzIOs8H07Yea45mDTECn1SFOUrJY895FLgi8Jdod\nxdqajoaJmefP/rzO7Zsp6hf1vFuqAZZD4Ncg95iM9fr3ShevIuglh+zu3wVAOClBvYtdaTPt6/dU\n/BSLPnlUPfaVzcKQ2fS+1M4T007bIo8yXv/iK0QbepyK/hgxmrHabBMYp6G3nEBZoXF6mZqZFzcr\nKfOSjWt3+dLnn2e4ZRTUmWI591lurYKRIaNyTD+QhLbNONbXxv0pti2pN21Eocfz2Q99iH/5+/+K\nB+G9aNr/HPhHwP/0lmv/APgzpdR/Z6r7/Rfm2rtiFupF2KjXqDTmcVSL/T1tAm70Nqi0qriqYBLp\nDZ3LkqafUypJdERwFDm2SlACVJmRJXrzKuEipMJ1JRVPC3gLC9exSbOUwKgqK90G09zlYZBGIXXj\nxGrOdfnAU+9j/fwlpsYiuH57k0kUMRuN6I/0ot7dG9JsdcFK+aPf/z9xf0tPxCc+/FFcN2d5eRWU\nFrqj4ZTvvvgyjutTa+hDrCgV2WyEbWkHJEBZZvQHveN2fe/aN3SfmqfILZtau4Xc18TPN/7k80z3\nB/TiGde2tXYU5gWecPjkpz7OL//qLwNQbzSIsxQbKCmx3uL8+tJXv8zzX3uepvF6/5u/8RucXT+D\nUgp5FOxgOwihiQfrXYRixYx9FE4pioJ6pUYU6X7EYczKfJc0i2kE2nz0HB+n6qPsnCTWHnvfr1Ak\nBXdv3uGpS+fpTXU/bWUhhUOvt4tjlnKR5oxnITkFg9HUvDtkbm7uvU24QamcYytBCE2VWFiI0nTe\nAtd1SbOMWaTX8a07t9nd3SGcxaRFSWmslFIpmo5HkimmWXk8xmma4nk+jbo+GPM8Jiug4gZIoSM9\n8jJ723m4/ZqOqpWLp1hYrtMY7TJ6UddoS6IZ7UsXqa2tYS9qIUM2Q712G2u+RjGv19d0NCM9HGPn\nNoXvk5pDo7LSJHDqRDduQk+PseO52M0KU1vQN05xpznPeNjjcG8Px9Lj3q+9nR6ZX9Z9UjkI6RHO\nIpbP6GuNto9KVyjcEZWGlgtBMKHWddjcqrO5ZcRSMaZSrzPfCCjCAqvUWqjKItI0pExtVKGtrkJB\nd9lmvN8jzfTYtSoORTzjYCdkEOpDyPYDbP++DCiPrFe3JO2FjF/eIN0z62Y8oe3aSNslirUCsLy4\ngBsponuHJLtae3cCj4PhhOsvvEx1lLLS0fRI6CmSJMZGkpnfV8qQhUqdApubZn3eGWXMVeaYr7qM\nh/pgml9q8W74vjajUuovgOE7Lv8q8Hvmz78H/Nr3e84JTnCCE5zgh8cPymkvKqX2AZRSe0KIxe/3\nA2HOh4rv0Kq4zHrgZPp0ft+Vp3j+9RcYT6bkJuTFFyWL8z5FDtuGp86siLpfstZdoNVsIowzsNVq\n4QqLLB6TGq1Hc8YWwoJGXWvfa1adSfZwXfZ9l9zW3GpcqXNnEvO9v/gWg77W8rd39nFtgWtJ0mPn\nYsZK1+Fg7x5N32M60hrjjTt3WFlZwHUdVtZ1osPq+jIbe5tcf2WTxRV9Qt/d6EEukZmkNNldgefj\nO/c1hGu3XgHgjtqkWm3SqTe49TVNgu28+ioiVry5t8cg1FpHqUqeeOpJ/u5/8O+wekpPl0Sy39un\n3W7TalXJjfVwMOjxxS99iVdff53mnOHYKi4f/uCzLHcXUaXWTlqdOebm5o1j88HjunZ6HYDJeILj\nuqR5jufpe+vVJgf7ezjYtFs6xMx3bXBtvKpHFOu2F6VNs9Vle2uHMIy5ekXTTa9cv0mWFESFpFHR\nz6xUPOzZlKzI6NS19u6onMW5xnudcj2Hysc23LujJI4Flu1w5DcI05jh3h5b21tME02D9AaHWJaN\nEBaOY1NyXysPycnykjjX67NIUxxh47s2hXG4JvEUP6gjLXXsNpXviLSs2EYrJuRcax6xE1GaDNOl\nJ6/QefYZBvsHRHd0TLpvFZCGcDCFKzopqfrkaYb/6ksEmYXw69Qu6moU1kKVXNmMd7axd3Q7G1mO\nH3TwV5fxm8aaqVcI+/v0Dvq4htcdviOh9tHHHtfv8ivYqorn1Gks6PlUToJQa0yTG0zC1wEYzzZo\ntB9l89YcX/3zewDMLX2X5VNLuNY5mt4yrq33sVdZBbfEHx0ghe774XCb0WBKv6fIEt1O6dQoUh8v\nzekaP0h/Mka8JckwMxRHYEFd+ARhwXB/cDz21XoNz7Pwm2Yt+YLhoAeiRErdH8f2SNOSU0GdxYUG\nM5NfMCsTiiwiiqf4hrZd7jbwJMRpycbQxLarBE8W1L0mhWlbr9fn3fCjckT+la75z33uc7xg+LQn\nbYfFdkKRx0QzLXTFyMexFEIoqoZf+/DFU/zm0+fZ2p3xj/5Up+RuxyVVpyAJx1w8s8BqV09Enic4\ntkW11TrmLsM4ReBQbVTABMsXIsJpPNzmrVaXOBjpib25ucnrr72K5TqUxjkZT0NsSxKnE0ZTLZyn\n4Yy7W29QqzS4cuEKGGH+ta9+iTPnznH5ymXmTQagHzi0mj5WMSZMjxxnKfFoSlkmBBU9HrPJlKah\nTwASvV4ImVIWJeNrt7jzghbaapayNU7YGk2MgxL8huDqM+dJ5JhvfudrANQbba7fvI3reayd6h5H\n7ty9u8lBv0dQq2HV9EZ58fqr3N26x0KzzZnVUwB87GMfpdWugQLLup9l9lZUTEJFrV6jlCXTaUJQ\n0RvAtQsmxQglC7Kj6HHLYjIZUicgjg33LaDZqJLncHjQO05PF9jYwqIsM2omDnZvcIBnKU5fOIss\nCzOeEaWUPAymcXKcSOMKiZAlSZowm2mTdmdnk8PDHqUssHzTdyGRZY6DjSUcMEK7pCQqChACaTh+\nzxFUPQ+BJMtMjLkD0iqIRXacJBZnKW+FV9Xr+wNPn2EhsBidatJe1/ltWQhb3/4ecv+A/EA7sNOl\nGl4gcMYZ2Y6hE+fOUvmpD/PyrdepOk0Cw7eq71yjgiDa3qaSmcQ3z+Ub4SE7mwmPn9Xz3mk0KTNF\nxa0hK3rc19fX4OX7pYcePfvTAIRFH9/1adY6lJ7O0tyf7JLFMWncIzPzUhQJlzsWeypGjgzH3oi4\nd2eDbGhz9dwSDUN9lqVLnsX4ToO1eR05kwws3rjzJrOJYr5hojpIkJlD0wvITUz1VApSE0QAOvcD\nwFE+B5u7bG7sUaR6PFzPJQiqWECnrmXN6PCQKNym0agS1LT4dP0qeQEIwSwMCRP9TN/3sUTAtAjJ\nzIatVxtYpSTPXQpD7agixRE5d3a22JlpBaD8owfvJ/jBhfa+EGJJKbUvhFgGDv6qmz/3uc9h3fxD\nADyvwiiVbE32uXeohZwcWggHGk6VpbbWAi81FwkGfRadjKYJAbKlg8JiMCm5vdVnafmceaZgNJyg\nXOc4vK8QNklaUFG6FgVAxQuozM0/VEfbcwvc3LwBwO7dO1TdlHE4ZDbRXRZSMprOGMUJjuHKFpYW\nqTRarJ19ivXA5s5LX9ftFxl5WXLY6/PEEzo55+Kl86yvdKl/6P28fE0v6jQJSF2JpHkc5re3t4Pn\n+8ft+uZXtYaSewlz0qJ9mFAO9XgOJ1NujEIiwBzwPPW+R6guOPzxn/0BO1u67bkEYQd4foDvCUqz\neceTmO3dfa4+/gTdC9pR1O8d4hSKjd1tWk29Ubd3NoiiCa7nURQ5D8LOnt7Ivu/jOA5Jkh/zoBLJ\n/FwLpSRRqhd1FM0oVUkhY5ThlLEsJuMpSZLR703wHNs8s8lBb0iazRgNtVLg+hXOnV5hYbHLeKQ1\nmZpjEUbhe5twg3t3XiI3IWZplh2HIqZGwBYqBwSOY6HMfUe0flEKbOxjTrtQEmHpSIXAtN2xbGwh\n9TOM5VFQkKqULE0pjDCLk5iiuO+IfP/j2gHezTKSUUjr3AW2N7VmGl7fojNNEUlEZpLM6mUbOY3J\ny4xiU0ep7C528Vbm2XMcdm7cxDO6/PnhmLnDMT4zsPRaHpYBv98bcL2/wd+v6wN4cXUBT/h4nSaR\nEaTzq0tvG7881e2fhiPcrk0sthmNtKP93u41LNHCp0G1qn/Xck5ROJJvjl5g0dYVnivqp4nzHDVZ\nYryzQtDRFoFtu5T5DOHMoND1SIi38KOAeBIhpXFYtio4boAT+AxLPW+tTpPEi4/b6bgmeqSQ3Lmx\nyUFvSmCSiFxVMgtj6o6LY5iCcDxkOo6xVYmF3o+lFAjbJ88yiqIgz7WSJixYWZ6n6sLd27f1HGcZ\nSZoyTaqkpraOZTt0F1p4lktloN/9S59+mj/7/IPDR96r0Ba83Ur7Q+DvAv8t8O8Bf/D9HuBaJqoj\nz0FpT7sypl69AlWvgusELJjsrMMw4gs7ezheQceYYBdqDnEYkWQFYRryxm2tTTx66TL1jkeWJahc\nL3DLsXBsm3qtTrupPelS5hDUeRjcuvUtrt26CcDO7i3KaUijVePKpbMAPH71cXYPY+4dhnSX9QI8\nc+EcjflF9ochqneHjXtaGB+O+lx9FD59+SrhTC8cWYLKMl77xte5dOV9ACyttfnGt77C3v6E3PQn\niTOGw/tZZ+XMCI+4x3Qa46YOhbl3Y9RnmhfkQvH4I+cB+OTP/BTVjktcjfBqWnsbTyaMRxFJmNDb\nPiQ00RbDVJIrj3a9zaqJMY+nUyqVgMOtPk8+qUMbL58/wx/90R8QJiGV5jtCvgyEOUYFkixNkEWJ\nY6iMqh9gqYLzF8/hmJC/cJYzHY+ZhgPGpt5DGCaMhhMoc9rzdYTSh8ve7i5RVDKajo81/Z/91FMI\nJan4LrVFTbkMR8PjNfRe8dqLX7gfl+3XwfZ0TQpjuUhLx4zkJccRJbIsEQJcaSOkRS7ux45LWeJh\nUTVZwIWA0rKxLAslSnNfSlpo53tuImx0Jup97uGM6fvg2y/gJzkRgijR86YGY4ZJirAFVeMMm/V6\n2BVB7ewa41TvwTv7d7n+3a/hS0XSG3Jg6JV5W+BbKZEoCE3kzkhUmZSSQiUMYt2fURGjbIXn5FhK\nz9G4fLvkaXuKAAAgAElEQVTb6+7287pNwZCGlyBFTBJr61IUdVw/wFIdMpO5eWF5naG6RcfZpmFr\nhebc8oeozVWo2A1E6aIS44KzbXwlCeMRtqWtz5o9pFZMcK0Jfq7HKD0smWUzqnVFbsJpy7jATt+S\n2WqUoiwV7Gz36M1STIQxgWvRm07pduaPo992dw5BOdTqdfLMzK9IqdS0UhLHMdOJtlLml+do1j1E\nUaVZ0+tbCEEuLIbplMg4m6VTgtAUaKdqwlz773Qj3sf3dUQKIf5X4HngshBiQwjxO8A/BD4thDiq\n8vcPv99zTnCCE5zgBD88vq+mrZT6t9/lr37uYV6kTNpYXiQ4doHvpKwt6SPtkctrnDv9DLfe3CBL\nNQFflmMmFWg2u1xs6lPyvB9w/dY2STjFDwKGJmY1XIOF5S4iHuNyVBlOUiDxAxeTtIWwXZT1cCF/\n3/jKF3CWNGd44eoTVDLJ1UcvceWy5vfKxEZZMSE9HNfUcLDb5IVPOB3QygoK47jbOBgS1LdpNTuc\nv3BWjwsW8Sji2je/h4q1Vvr4z3+WJ548T/zChFs37wJQrdZpte9TO2fXtCMz3JdM+lukWcHBRNMj\nvTRDWorVtUU+/dlPAbDcXSDLZpQSGqauweq5VXY3Q4aW5PEL60wNvXLrcMwbt7aZ9YbcflFr9wf9\nHr5b5d6dDXZ2NL3y9OOPMhlPePPOm7QWOw8cv6NKe3meYdk2liVITcIPStFpN3nisUdozmvN0BEB\nezs7PP/8F7n4lLYSylIxGYekac65lTo9U6Jzv39Dh+FZ7nHolu8LRK7I4xDXZOH6nntssbxX7L7y\nNY6yMObXzmO11xF+7TibFEtiuwpVOmRmG4lSYQud7GShjh2ZgeuRxiGdTouGq9fyvTvXqdSb1Dvz\nCLM3yrhACogd2BtoKzLLBJXaferh8MWXABhdf52a4xApRXXO5DtkY0SaULW9o4hSpiMJFQeZlGwY\n+i6MMpKDPdK5Kq1Gg5qjTfJs0EO2KyjHJUp0O7dFnVMrXWThEJmYZifPWLALPEuQHmUGq7f7DOaN\nv0m6MZPkBul0Sl3oGjxr3TqlPcEPqiS2XnP78atMkgSvVqXV1c7rTvMSFCWD3hRZZMeF4yoVD9vz\nmcUOeaE1WLd8lMWmzcbBCyhHz7XteAhlkQ1Spua3llQQ39e0yyMqQ3kk0mYqBZmxnNKiYCYLCsdm\na08P6GCcYAkPr5qAobqsIqdUFpYdEIbhMYXWbAY4tqReDZg3Dv3heMYoKdiL95i7oKlgETocDoZQ\nkcw19Vzm0/s5Ge/Ejy0j8khYekGALVIoRix1dAOfeuoqC90OC12f29f0ImjXVtjc26Ux38YZ6U3a\nadVZXXmK7Rvfohoobu0YB0oxJUwstrf2aBrzol6tImXKrIyP07vBQZU+D4ODzR7vf+qXAPD9LnM2\nrKw2j2OAN28OyKSPJUpsxzieVAqFQ5nGqFJSN5ER/VmI5dWQ6n72IhLqQZOzq+sEhi6ymPHE4+do\nt9v8Yfx5APZ2h6wtrh63KzQx6ju9PtE0xi8sdkb6WixsvFrA3/q13+AJEzFw9+ZrDPr7kCs8V7/n\n5gtvMh0q+uOM5Mk1fuajnwTg6foc/9V//d/zxrdfYGVVt92uVigDi3aryb3NuwCMp2P8akCUxDjj\nBxtt84b/LosSx3UpVEEW6E3VrLVZ7jZpt+q0W3ot9HfHpJMZ3UaTUyZWVUpF3KqSpTkLLZtWQwuZ\nG7e2qDQqjOOEwGTHhrMJdpEjcDkca6dOFEXMwofjtPtbd6mb6JOJsGj7bRAulnFcWSQUswjXbSAM\nB1pgkwkPVWsy312iajIqAyGwLAu36qNCzStfXFyilALb9vCMMA2wODg4oHAkXePbQXlU7PvO89um\nbnhe2tR8D9fzGQxMic+kwJOCPEtJTT1sZZeoqSCOffJHzwJwnjpqeo++jFm90CUycc0uArcWkAmJ\nNJz4QAVcffwKNdehuKMTwoIDCyceMSpKDqWeo9W4+rbxO7OuCzHlbLM53SYsU1zfODczxXS2wf7k\nJm7TZMIOY3ZDQWJ32T7Uvgjh3yGexUymU2zbOXYM12o1KrUqcZbeF+RuGzu4yPxiyJxxEHa7p3Wi\n1njC7kgLXTvPsaP7zt2jD2uVgF2tEwmbzCgAGRKqFXLbZq+vDxfbrVGWFoNxcjxGtbpFnOZg+cxm\nIanxQcgyZzYdIaRDy0SvjGcpvdmAxUtdPvq3fwGA117f5fn/+ctMihkV1zjpxbsHTPz4hLZ5VVoI\nRKFoNxu4rp7ord0Bys2oBRVOn9NCYrG7xumr55BWzmi0aq4t0evv0fLnuLA2R/KnOqngzZ27hGWX\n8TSmP9CTeOncGVa7Hco8IiuOCt0XCO/dvbIPQrU+h5FxjEYH+HNtokKSGAd0pdPAlwKSEmVGM8kj\ngoqDJTKk5VCf1+331AC70kF5NlLok1SUNSzbwa15VExoYpFO6W/vM1/r8qu/+PMAvPDSXWZxdtyu\n/UOt7R6OQ6IoR6SK2AgPfIfOwjw3b9w+Ls0q0hDPhuEoptnS/G4YTRn1EzJpE74RYhUm3LK5gKdg\nur9PYsqo5hZ0z6xTOBZ/8eUvAlD3JHc3bhOlIUH6YKHtGcvHrQQoFJMkBrNRmo0G1aouudrb18Jo\neDghnkxYX1pmbVEL8rJURFFClhbUq1CaTNiV1S7beyFplnHhsnZKJ0lE3RJYlsPIFPdP0pQkfnsU\nxveDsKxjL45DTs0u6Y8OiI0C0QxKsnxGu7FIs661qJ6ok9U7NFcvsj0JGb2pqz7a4ZBzlx9llubM\n9rR/5EKrzjhMiGY5wvDk71teYjAecad/gFrWpQZ8u6CS3ec3vxfqfrQW1lnuzhFQkhr+s5aVZHHI\ncNqnYrRnq0zxS8HchbOc/5SO6Jj8xffwA5vznTns0ZhyoIVk0/cJJyGZKpkV+iAZ+xYfv3qF+brN\n4IZO7HEPchKZ8ZIFf2HqePxS+HZHdKG0kAvTQ4bjEYejAYmjf79zd8xodkjulZw+qz+W0HabHE7v\nMstbb1kL28ySjEajSbXeIDM8vzOYUmtVEG5GZiLDdmcleSGZqy6yuKYtk8eufJCSnDJNOZPo/SZn\nIXJ83zfkV/Q42SLAbvhMioKqbawELGK7Ti9T9COtkHUCh8BzyYqEkSlxLOwqtgMpKf0oRym95g+3\nx4hC4rk+gXnPpJhRu9Lms7/9s1x8RlvwrcUOd158jZ3XD6ibBKLl7gXeDT82oZ0biiDwPFzH4/SZ\nyzTmtYnfWmlS83OqlqDTNvSGnTFXC7Bp0DylO7ewvEbv+T/GbbZpr55jbU1rCPf2dtm9s0vpusTG\nhLu3sUnNXqJW9bBcPWD1eoBTfzhH5Mrpc3oDA0kyYX/i4LUXyAv9TOG6xLMZubJwHH1KFrZPtdlk\ncX6EGsRkxjQX0qJSqWDZ9x0gZVliuTbKtpiZEpxCSnzLYnK4T6WqM/k+/uEnuX7r3nG76i1NR4j9\nKbNiokPfTFEaZduEYcLzz3+LdlVfC1SJKkvKoEZsYr8rwRyJO6I9N8+Zsx0CRwvDTqPD+soaWzuT\no6g1vMBjuLdPVOTHYWuvf69F4UGj06DVfnAGV904GB3bIk0Tar5LzwjTra0dZNlFXL/HzZvaWbu6\ndIqmY7OwUMG2j3zfkna7geu42CpmZuL2z51e5s2bLzIdjiiNtmgJD8uxSJP0OJ48TTIs++FoMcsS\nx4K+8Cdk4312Ng+Jh1pohxWF7UhkbYZa0gJWrK2zeuVJJhlsH/YhNnXLmwH98S6pdAhN6NeoFWCv\ndJCeS2nWwtCTnPnAY2x+N8FZ1sqLpUry6f3D+nVjzjdxeWVzj5XlOtLQOI2aw1xniajnUqvpufRn\nUxpxRCuwufeGjjjK+ntIW8EsJi6meKlxximbTGZYwiI1BZ9Sx6Fbn8NfXyc3tENc5ryWe3xhr8f2\nxJQYnrx9X9051MrCnf0XmBSbhEnKVqgtn52tKeOe5NHHLiAs83EBmTCTBbPpJuNc04zS8igsh8AN\nUF5JzSh59WqN6XTMweCAKNVj7Lgu09mMOIAzK3rsmotVVFxSth3qpbZc7MJFvDXkzyzwVq1Kc6FF\nikAYTTtVkhvbPZLUwzM1SlQS0xIK27FJTWlVOZrRaNXxmhWUazMY6uff2hhAVmI7Fso3FtqKx8/9\nnc/y2IceRVn62sUrC3z2b3+CP/n9b5Ae6L1p+ffDe9+J9+KIPCWE+HMhxGtCiFeEEH/PXO8IIT4v\nhLguhPhTIcS7512e4AQnOMEJfiR4L5p2AfxnSqnvCSHqwHeEEJ8HfoeHqD9im3jILI1ZXO/y7C/+\nG1TaOgY4l0Padko0GGB5WiNqLs5TCgvP79JEn7D97R3qzhwv38ywai3WnvwEAIu7f0a40afSbjIy\nNQai2RjHXtW+AqG1gTxKiIuHo0eUsI+dWNF0il+pMJ0MyBKthUWTKa6ARs2n29FacXOuRrddoXRa\nxH7BwJSPTMtdyCPKIkMeOZ8siXBt2nMdZKk1yDIvaLUqeEIxMnU2VD7jfVeXj9s1mRyFw8Xg2ppv\nPXbCKpSEOM7ITEZkt93E96qktkdokpqYZZQSRBzT3xb4HT1Ht+++RK8/wq82SE2ImRKSQDicatZR\n5gMO6WDEwuV1vPkagfvgpaSOih1JgShT6lWXQmm+7vU3dtndn9G8c8jAFFcq3RZXF6u0lmvYJuws\nnEXUmjVsyyaQCkwNjNXFNu1qhQtrSzQDQ8PYHtIWVJoep4zlELU6pLkEbry3STewzEc2iijicPM2\n6SjGPnK8CQfXFiRxRKq0Znjq0Q+xZ/kMhhtYZUjHOAjn2h73Blt0uqeputoxu3Z6Cek45MMxjol1\nVnnJ+dNnCC/MODBmv+NapMFbamWYAmIzx6XISsrNLQITv993FW+O+mSxJDG1Ls66Hh+sNtn/7qvH\nNXxEmlNMx4xrASJwmPPMV3+ERWZZ2KUiNFmapfDJpwkNp0rVcPw3EsUX7o5x8oLfvKw17M+ceXtI\n5c6+Lu3aG4yRviDwq4QmAbBWPc3CxTrrp+eZJDqOfzgOUa4DYsRkvGfaU6O0bWSeEI5GnD2jHdPx\nJOdwe4v+8ABpQg4vXjlDf/Me+7OQ/DG9D916REKJLSS2qTlkOxWc6n2/1pE1NcmnWMKjwCK39dit\nnuqyt3GAED6XTul5m44iICCPBdJUbGzLFFFJmRN11lfm6fd0PPrNXqjXiW+TFfqZP/f0z/D401eZ\nxiM8k/BTa9T41Gc+wplTl/kX/+T/AuD5V77Gu+G9RI/sAXvmzzMhxBvoL6//KvAJc9vvAV/irxDa\npTEFlJdgNWH10hUqdT0J/b279A7fpNU5g2ey3ZxWh+bcIogGSV/zqtu3v0o1i+k4dV761k1+/Xf+\nfQCeyUpG//cfszvIODAxmKWCUiSQl8hcd1OhsP2Hy4ikyHCMkGoFsN4SPHK+Tf3oayPCIpyMSKIx\nlZre0FcuzbF+5hSWe4bZaMT6ij6crtw5oDkXMNdp4hjOUSpQNgS1KoXJpLIUuJZFQsr8gt4Usygi\nHO0dN+vgUAtzKSWu72A7HqXh7j0sAtdlkg+PM84SZWG5AUUuCarmayOzGe1Om+XuPHe++zJD33w4\n1ZKUto1fgrekN4DMEohiVpotHFMyLckLPNtCWQLpPdhoOzCfXmpUAzzbwhL2/TKpyiWMcwoZMjFC\n6vVr13ly+WkqnoM4iu3HxfcURZFQSnk8dq7rsbyyTGthBXl0MOcpwrKpVH0K8xmvwHJIynf3xj8I\nZVEizXjaTQeHmGYgmU5Nmd6ZRHQaLK6eZ/Wxj+lxS2129++S9u7gpyFLi5qftMlYX+hQa/oITxuk\nF5aWcYRFI1PUqnqOW67HKb/K+vs/yN0dTRc5voMfeMfter+p9fzGYBMccKSkXTUZopWAqR0SJ1P6\nR/xvWfJI1aU/nIEpGGW1AkJVMq0FBJ5zTN91yxyEhUXBKDMljmWDvXs3aCMZotfN9+7u4BQlv/5I\ng09/UCfCnGm/XZSYs425+hlCuYXnxARNHXFVXXiMwt4mzDdQpj9V32dxBQ5Xxmy9vm8mIaCUBUUy\nTx7PuB6ZDxCnJVkyZTbdxzH+gHQuoFlRDPf2ETMdgRZNd0lxsVIfp9Trw3Lc48MYQJgErjxVbG8d\nkknJB545C8DHf+bD/NP/4V/QXF3i2V96FoA//3//nP7eiLJoUJam7HBp4berLNU6nF2sM57q8fzy\ny7fIipRmUKHw9Fo+/cQlvLpDNFXHWa+iTHAcwcWnlnnfJzQV/NILL/FueChOWwhxFngf8A1g6WHq\nj3TOmuwfWcOtwmR6QKWuw7z2917j+W98iyce/yAXzffdZOSQqQLPyZBGmC0vdhnd2+X8qS7D17aY\nGM7w6kc+RH/3Ft954QZFpE/RXj9CxTlO3SI1Hn8pJfZD1sL/xIef5vyj+uOfO9vbrK3OcfnSBZa7\nhiNTgul0RJpHx0KmXqtRrwfYXgVXZsShFoYfePwMZy+fJZc5ymjFhSxQtsB2HfLEJGnkBZZjIQIB\nJqsuzXOct/CyVbPRRSmZRTPyUpGZdHnf8UjSDNvzcI60RcdF+AFWmVL19SafDqeE45DDUhHHJbkp\neZr5Nk61QiEVgREIWILDnT0IUy6e1puvu7xKlinC4QQ/vS9Y3ood4yRbtuZp1StMJxGZ8QeU0iJJ\nSybhmKnRnqfThKrvkEUzfJPursoU1wqQlCRpQWHKgY5nMaNwRpoL2qYsgHBtonhKXo5xzKewpmF0\nLJjeK1zboTRJGklp48uCTqOGVzdO8eUrXPnA08ytX+Cw0AfbeGeb+sEG9WjA6W6b2b6uC71y7jRr\ny2f1YWK04sVOm06zyeWV09RN4kXVt6l4LpXA59mf1mvOdQTuWwpAXyhMvYtKnXyuRSYLcnO47PUP\nKWxBUK+wVOi+n/I9ovGEzYrFEx97Wo+7BbuvXaOYhHT8AFItDG2hEJZApSlTY0kqaZHJlDgreGNf\nz+UsT3j6VIsPX15gLjBCM3v7oTjXPAtAmEYMD69xbr2DqJroJznHxvAV7FqMJbXTMJnBxctL7G+8\nwe03dYRNUQhUnmPNMlRZkByl+1MyG+0yONzGM58OGy0s4Sw4WL4gMo7GcPeQDA/HmcM2iSzKs5Dc\nT1aqV/VBtnd3yPdeeJW5hTq/8us6NLFAEOUlC6e7PPsLPwXAfjzkj/+PrzKLCmSp522WlkxkzmGa\nsNhukTsm9FdUUGWM5/ucv6r3zOqZZQqpPzEnjpKmhAK7IGfKT39Gz9EzH3s//+M//goPwnsW2oYa\n+ZfAf2I07neKv3cVh5/73Od4/vPak/7IeptnnqpRRDP6xoTa3HmRhQWXPBrx3a99SXfYrdJZOsXS\n6joVc0pV55fwKs+Szd/i9CTk3i3tWHn0Y7/DUx/doT+MKe7pU7rtdrCtGqVzP8U6iyc4D0mPPP3k\nIzz2fr2B4scvUGs1kYA6+qqJ7TJXW0ZZ9x0EUkqdnZjnpGnMBVOQp+LViMMxynKOa0UroZBKfxPw\nqH5zFseUsobliGPKY9qPuHdnk8/8phlso0ELz8OVdVR+39kmEFBI6n4H28SS2rZNUpYUCA5NyneU\nZ4R5yjSOKB2bzNR6FoWiUkgECsvUUC7znFEmKYSkZhabZfv4wtR2EQ929A1NSBajEWGaUGQFnqM1\n0DRTjEczBtMxd8yHAC6vtfFtizSKKU00S56XVH1BOMvYuLtDZsqjXrt5jzjLiHMoRqaUqGPRaLYY\n9Ac45hAtlaI3fJjvFYHlBihTuD6XNdrLlzn3+DPMXdWb1+ks4aJIpjMc8wHh842AR65e5Ozy08xV\nXb7zTZ0V+Ni5s5w5d46gUjl2EFLmNOp1avUaVRNZUKu49Ad92u0OzYX/j733DLYsu+77fnvvE29+\nud/rfh1muicHDAYASYABJE2DSbQpipRtUpTpkuWSHGRLRVnWJ9hVVqlcsl0ql21K5bLKpqpkBpsS\nKAgmSIAYDAYzCBMwM5jQOffL79148t7+sPc973VP90w3CVGE1evDTPftG/Y5Z4e1/mut/3+aiCwp\nJ/vVDl0XureGhkZzBh3EOH4kfN+gWwGm0Ji+45AvK/x0Qk9KCtfGvre7i1rfplFmhMqj6xSTZGUr\nYsp0QuoOgjBosLi6yvbFiwxcBcaphZCPn+iy3IsQLgoV/s2ltBvX7SY5u9hgoTdHFET0Dttr0loy\nCbpsDft4TksyGY1h3qe3GNM+ZL9zrjsHieDsa5sYAhbnHeXp4Dpec8Jyr4F0ykoTucHq0mGeOPED\nNFwJ5c7mLg2/RzDfxA9shK2NpqqlhiB0QgRXL77FxXNXefbDD3PqYeuQ/e5nv47WPisnZhGuQOJ7\nf/z7MV7M1158i41N+yy8KiTzDaOWvd7ta313TwKavmJmocef+Tlb3tdbajNJE6vJylRZSRP6kq9/\n6U1e+kMr6iDeJ914V5u2EMLDbti/boyZtqzfNf/Ipz/9af770a8DEIj4bn7yvt23+3bf/rWyj/7A\nozz9UctgKfH5B3/v/7nt++7W0/7fgbeMMX//wGv3xD+Sn7cQQekp5uN52kEXlPW4Thx7Gq8ac/H0\nJdYu2MREmpZUKmD+8FE6Hfu+MG7y+DMfpTvTpvHm2/SH04aJGY48+eM8eE3z8uv/CwCryz0SWTIa\nViQTe+qrYsLsyr1VOcbNJi1H0N9seOAptDmoHyjQRqML7ZpmbI1vibZqMELSchqTZaWptAItMLjS\nHimgElSeXyvHUOYIXRFqhe+ggGaqMOv7RDd6KsUhJEIpAhmhnfetqwqhDEru48dTzUIpvdqjb/td\nqqqy6iUNqMr9MsSyLNG6YjJyvBZaY5Si9D12HPOcNxgw60nCyMe7g6edFE41fpSwtbNHI4hpOpm5\nwXDM1vYuu5MBsZOrmum1MVXJ5uaAz3z2c/YSEZw6dZJ+f4+tnV1WVm2O4O2z58HvUJmAzHG5eJ5A\negrlh1y+YptBvCDmyvXrd/nErRkvYumIrf1+6uM/ycKJp0j8Lldd4ip5922iyZBZJVh1HuDKkQWW\n5o4y01A0PPieU38egG67Qdhu0Wg1mbi5uLm5xqMPH8Xzg9pb1VUKMy2anTaRS9yPh0P62xs0lyx2\nvHzc/tarb20wXFsnVBA4uOuwHyJTyIZ9ZrR9HiZPkCqnYSTF29aLa2UFrUlG0AgJygIcm+LQVJgq\nw2BItF0n3VaX+bkF1t9+hwXHOPnscshjsx6BzuqIzxygOwXIHLxSpAHdeJlsohkp+9rO3pto40O1\nSNywCb4nH38MPzCsHB0yd8SOsx1nnFh8mPULI/o7I2LH0DlJfZZWenz0B1cJYgcd6ZAZZlm/0Kd1\n2HZURgtt/DSEykO460Fo8nwfbqrc/EzGORLJRz58kmbDvnb+9DU82eTIsVl2ExtxRjNtPvWzn+TR\nZx/j2g2ba/OMR7vps7jYY3lpmc/8pm2Ie+31K5iy4OShDqtPHbe/408wWiMKge9yM4iS0uVQanIw\ncXOH6UG7G7mxTwC/CLwhhHgVC4P8bexm/ZtCiP8AuAT8wvt9zyB3dY5FyXwqGOxlzLqHcOTos5x+\n43kuXLrE9g2LA544cojBcI93v3aGKHTJuKQgJGOlrdgb9/G6jiP38jmWHniUj//Ej5Imlkf47Csv\nUOUZQnn4jWmNpUeWpNyLtbuzGAc7TLIck2VkWc7YbWZ5kZNlBWWpa1a4osiZTCZWrUVr2rNd9109\neu15oiCgcgsVUSIpabcjtjccH3cyQusZBAHaYb2ddsixo/vtzA0XZmdFWR8i0wdeCYlC4Htefbh4\nnusoM6KmKRVOrFcIgZKy3rSLoqSqKoo8Qzuc3GhNGIeEYViz2Q0GQ6qqoNmKqarbEzL1HbHVeGJo\nhD6hJyhcgnBjc9OKmwrNoUM2JG03FTeu32B9ssW5i/agz9OKt95ZJ4x8xskEr9l116kYDkcIGddq\nOu12g/5oQlWWNbyx3e9T3kbj8f3s0OoJnvk+q8dYzR3mKxcuszWY0M4szPJ0u+TRY6ucWD5Eq2Vx\n0Z3dTTr+Mg8dP0ysDG23yflKgO8hlCRycyn2Zxjv3rDPTE8lyBJK1eDdc+d5/jmLZ65fu8IzTzzM\nLz/2DAALR20FUXJ2jdzLaDQCisSOSY7AX8vppQm43oRMFQhRoT2vbvU3ZY6hIk01ujCUrgXdKIOS\nBi19UnfvgmZMMR6Tr9/g8TkbJT+zOkvXKxmXJTiOdy1uDud783Y+NNsRXnScfjrgRt/Cobvb1wnU\nHCpSbOzZip6Ny+coJpKjqzGHD9l1fe3qJsXsCbrzPTbW01r6bmFmBaFfJ/Jge3DV3c+AyU7Bu2++\ng/ZsbubU6hLtzhyFScgdbl+WAnWgGicOHeNkATNzDZ7+2CkabTu/Ar9JuzNmfmGO0OUiqkoj/YrV\nU3Mce8Sux1YU4scBlYEwCFh80PZQTCjpj4e0VnrInt2DEiHw8cnzgqqGQEpEURJ5PtOOrqmI8e3s\nbqpHXoADyP3Ndtf8I+PAeptJknIiXGRtc52LTq6n1Wpz5cJlJsNRPdid7S2Wlw6h0xLtFnlWpfTX\nLlOtJ+zt7tCO7Cl99p1vsz1IeeDEIT70rAXyL337ZYpkgCmrGvA3ukKbnHuxf/qZz1H5zwOwu7vO\nqL+FNJC5aof19XUqbZhdWGRm3o4nVB7jnT1On3mbwWjEqhPSVb5Ppz3HiRNHOeJEEE48cJjZUNCO\nfLRr5UYpiqpEeRIV2oe4dHyeqHMAN6z3IEEQBAgh8H3nXWmN0AYpZN3623AJRau0vr+BKSlBCIzW\nTCZTr8UQBgFRw68FWrWu0JUmjuP6x/M8Ryms91Lc/jD0XFLGkxCFEa1Wm3OnLwK2pK7T6yBjaDbs\n2KXJ2d3tc3Fzk0bPYqCH2j1832Nzc4vtYcqGa9vOK5BhQJ6bulLEjCf4gSJJJjRcaztBhJY+nLsz\nc3KlcZoAACAASURBVNqt9sDDj9Gas4vyzOVLLMQ9ltqGx1ftglzxDE888yQijPjnv2Whv6+99BXm\nF2f5m7/6X3Dq2BESV37qozEOh5/S606yMZvbG8RRg7KwWHGaDNlNBb/7ha/gucX7wh9+gfUrZ/nl\n//y/AmDGqbk/utTm7FZGWkHmWnbTIgNTsleUbLo2dulpolKDLDF6SpNg1cdLI2gAzSlLoSnxtKSQ\nkpE7BQfJgLXrlyn31nnysJ1DK0tdqnSC5yuCnt1g1S1yc17TRhSHjsyxvp3w1ZfeIOra+9+OIq5v\nruF1cgrPerBnL2iuvlXy8e85QkPZe6QnAdqMWDpaceFMQOWaWfwg48hSg+TagCvnLE4/F0Vcv7hG\nM+zRTu1c+trnf49HPvIkC0tPETk2wLysiPz9lnvPjdtUgqMPzjF3dIZKuo7bwyvcuLxLqzOHcuV9\nhc5JKTHGELmD0VCSlTlahchKMjtr54j0Q0rl01ueIXPUsIW2uLqlobCHi9aaOArxvADpaKSr8o/h\naX+nTB61k6g5bjIY93nxS59xvJY2GTBKBhgK2q6Ursgrtje3UEaTO3L+Q4tzNCMfr8hpNTv4Lvkx\n3t2myEre/OofsLNuZXvb3Rl2N7fw8VBTXuMyp27xu0v7/T/8Kj3XkWmqEa9+9Q85duQI844I6drV\nNUpd0ZjtkTsF7fWrV/jRj30fH3rqcSZZinQ1zBcuX+L0mXO88ear9FyU8HN/7mf5xOMPERjJkWUb\n1uVO0VsbQzGFUbyKsLevw5dlU15ny8frHfCqjTEoIwiDoN7Iq6qy1TNqf3EJIax6uBDkZYFydaPK\nKKQSlu/BJUylsN8Rx3F9YGRJguf5YEzNIXyrTZynLdF0Gy2KsmLiEloPnDzFmYtniJohC055/dSh\nFsPRBO3FRL0pjWkJElpzLYLdiIlTBhFxiO8FaClRrn7Z9xXjyYAwboCDhnb29hind/ZcbmfHDh+l\ncDBQJxA02xKvlHTcIRiHHlpr3nj9NX7nM78FQDP22Ni5xv/6a3+fv/ZXfoXALbyW8tCmJHd8ywCb\nO+vs9HdQSjF24hlFXvHOtW3mj5zkP/1P/ioAf0dWfONrL9bjariKqR8+vsJRlfDylXWuOUFpTyn2\npOBdrZn2J3ZzwZIOKfZ1dACohEGbikNSohzpupGCVCiGlWZvGrXpnDwbE3k5R5ftZlcFhsI0iEIf\n4RJ5mJvv740tW3iwtALX1ja4fO4ivZajew0rhknJchxjnEN1+EiLtmlQ5B57TmhiYf4oR09ELC6O\nOPdWRZbbQu+ZlT1WHw1494unCTbcRh5X6GRAJ5rDuK7CzeI85uoO/bFgyXm6WkFWxMD3AjBxmrTj\ncUWz3WWc5WQ7NpHYaMYk+YSdnT7erP2dTOc04gbK82qx4CLXlOUEGUKW+YS+dTb8KEYFQ9rdFqOR\nfcZpCVEgMcZBo0AQBpTasD0Y1XtaWdx5n/rAjsj7dt/u2327b3967E/O0z7kSG2SHv31iwRZTCew\n/kClcyIPMB6+84o9CWVZUpUlUu43x2xsbHJi5RCPPPkse47KdG/rBhlXufzut0kdB8XS/BzduImo\ncnAeMJ6qOxHv1n7+3/1lwsVTAEyGa5x541ssH1qtE3xx1CHXCQ89cYqZZYvLTuZn+Omf+DdotGPG\nWcr0J0ujScuUjY0dLjnGtEajw9rVbS5++wzSsVCdX9vgY//mRzh2fIXCSWbJKAB/v1xx+vsK65GX\nZVnDHrqq8KUHxlC5MLkqK6QS+FLVJzwYqkpjsGWHvsP3pPO8pBQEaj+RaYzG8zwiFw31ul18T6KU\nQHN7T7bXs0nkUX+HLE0xRcri4py7dp9mw2d+oUvLqbY3wpDJ3oC9JGXouFgWFuZJ0jHtbhvhKaaC\n6KEXQBBghKybOZQS+L4VLMgc1FYZQ6N5MwvdB5knNGli52yZ7HJ6/SzFOKF56iEADj9wmPFwgxe/\n/Dlm5y0G+h/+pV/hW699i5de+jLPP/8cC06KS6QZaZ6QFzmTxEYZo8mIrCwAQ+a0MCdJxplru/zK\nxz5ZM1Murx4meHOf12Mwthc/GoyhGNGNSm70ncJ7ZbthG56o4RWMZKAEEwT5dC5ojQEiIPAMQ4dH\nj43PQFfsGs3QRRTDtQ3SzXnmOzEtJz1n8hIhA4yGbGSvJ45vLvnznQTb1u46o9GAE8snOP2aLTIY\nFwNmj0X4ocBz5bxEFRvZkGwU4LsSxJXDC1w4fZXQUwhsPwTAsa5kZiFidfkwWrr6cL+iaEEc5Ewm\nlpSr2zXoNGFr7zSlK/VVcZNmuN9Z3B/YOXLm7DWKzJCmFdqpKElf0h8OuHzlBk0XBSdFSpqUCCGI\nHNdPO4qRfoVINaNEcemCnTd7owEyEFS6ZOxYJvNK4qsmjUarjoyFEGxubzMcp7Tbdr28n6d9N4nI\nEPgyELj3/7Yx5r8WQswAvwEcAy4Cv2CM6d/pe3zXqhuFHteuDdg8t0fo2Np6PY8TyzO0oshirNgN\nezSaEEYxhduMPC/m4ac+QpkkhEHAZNtCIflwB6lzumFJM5y2BGcszi0yHm4xzibut2OkuDdq1jCQ\nnH7nTQAG/TWMMRR5zsglIoUQRKFPMRnS37TjXL98hc/93ufYHQ7pj/q0HUdud2aWZifk6tXrLM7b\naoCos8jzn/0cO2dep3JdbGfX1rk6HnLq0VN0Ow332S5xI+IJ23xH4O5nVlakaYpA1Bu50ZqszCjy\nvIZHPM9DIB2vtduIpzikFGgM2iWkhASpFJ6UU4QBpSRK+W4rsNfZbDYJAx+ERt6ha2n5kH3Gcjli\n0B/T8kKI7XXuDi7z6KlFhNYIY6GIrRsb+J7PjfUtpOuWKzJQKiDJU7xQIzzX+RnGRJ0u/d1+PSat\nSwJPkmQZmVuorU4LfY9NVWfPv8Ukm3ItC8xkTL43ZLdj7+dZdlDXA7791iv87M/8GQB+8Rd/iRMn\nHuClF5/nD774PEcXLCYuK0OaTkjStJZ0G2cFWVEisJAOQJolXNsasLWzy/qGTcIORhOanX1anz0H\nN23tDNkbj9Bo5PTaywoqTSB0TfudYCg8yyXvublvysLOY61Zq3IydwpODEwUZF6A59JY29fXufiG\n5pFHZuvGNGkEKgoQ2tRYfXXLDfZd8cCZc5fZ3dAMdkLKid2MK+DEg6t0ehMGE+eIGIPWgtBrsLRi\n819BnFAkGcmgjSkqlk/Za189MkurFXD48DxvXbT6sd0jXR48fJydnYuYwsIovmiTVA26iwJf7bif\nmZDsl72z7QS6L12+zuLCPOMkQwWONzz0yMuKG+vbPJDZxqA0LxgOJnieh3ZnqWd8vNxKzF08v8Zn\n/8Uf2GeV7NLptShMuZ+CMjAZJxR5XueZwjBCCo8gCFEusTvVa72d3U0iMhNC/LAxZiJsRu8FIcTn\ngJ/jHrhHxA27eMWsz8Jsm1FrwMCVsA12x0Q64aEHj9dJw8kkYTzKiFszPPqY5eZtLaww0SGHVw+z\nceUcQ8e4dmhxif5gTLOsKAq7mZZVwaRI0CrcZ9TLxjS8e0OEhttrfPGffRaAK2tXkUXC668PakHA\nsixBaH7/n3+RwOFRH3rmw+RBm0E24fzlDba3LSVlnmqur13kwsW3+cgzNmH6n/3Hf52vv/QiZX+b\ngXtQCYbz37zC8y/foOnZDc4PFCoM+dlf/m8AkNPeJqORUiCFQKlpsb6asp/WG7MRmsqAkuImbNPg\nGrI8WVeFaGPQuiQrDaFrzvFVaLsEta7x6zQbI2VMEPh3bAZIJxbLa7Yjmq0GS51Z9hzBf7sT04g9\nRFHWFJtFLvAbLRYWFhnu2Puxub5Noxvixx5R5CEczj8YDCiEIstTRDWtiBEoJQgCD+U8IaUUk8m9\ntbHv7u2wsW5pA3bXN6mEoUwyts/YA3xurkXUarKzu8mhZeu5DQcD5ufmUCrgnTOXuHrFJtqVkBRF\nSZEX9eItKiu5F4URy8uuKsiLKRjx6utv8MAjNrp79fU3GY72xz52eG+WjKGs8CtB0z24TCoMisLo\nWmU8MSWmLBFFUXt22kVXlRBgdM0SWCmJj0RpiBxG7ac5c37FctvDOCoAFbXAD6Eq8V2+Rvo3l3yO\nR+7Zbe2Rjjy0zmnP2atPRhA0AtAlFI6FMtQ89thRFmdPMn/Y4c96jWR7nsGGotWa4dAxu8lNij6n\nL4zY/kZJfsOV8y74DOYmxMc7yK6dH17WoOevUAqP4dCW54VGUg33lc7Hju7YjzziliTJc2K3cVbG\n5lLOnrvIQ5vW0w7jAN+LUUphzPSwzemvZ7zz+rd57stfZ+Ka1H7ox36Q06ffAUk9/zyviVCSvd0+\nmcuzdHvQ6/aIy7wunRSN23cYw13CI8aY6awJ3WcM98g9ItfsKZuPoRH6PPXgKp5rbU+HA9Jkjxs3\n1vY5OSqI4yZKhYwcmdDG5hZaG4ZHDnH+3XP0Iht+5klGMh4g8CldF91gMKQsS4RWmMpO9Ga0y9LC\nw3dzybUtLy1z6rit1zVoPKlRQtTeqtGGIGqCH7GyYr3nT37qU7QbDbrRDG+9+S1On7WlTocOHyc1\nEhU3ePO05QJ/6/RpGscf5fr1GWZ6Nuu8GAQ0WjE7a5fYvmZDvc2tddJq35spXZJMKZ9Q+KRpWnd+\nNhoNV5onanhE6ym0sZ/MnCYnwWoYTmETJSTKC9FlhXYQw2g0xjBCCkngFqovDZ4SGKMJw9tPsmm7\nvRSGMLSeRLdrPalYlihT4fsGJ9pOMckpK02r1WHGERQJCYXJyXTKzNE2u3v2IIi7PWQU4ytJMrZT\nNM8y8ryi1WqRukWhEDSDe4uwJuOKpvMWx36fSZlhgMQlgNfXUoy3S1aVDPp2PDvb22xvb5MkGcNx\nztAluezB6GGMqWvxKzQGQVYUjK7dcNcp0FXFV154kSvrFj7b3tmuhakBjIPLAmnwMTSEYt5BDKKU\nBFoSlIaJq/dKtLF6lGY6EtCugkhik83SHeMSQ4ghkiXzoZ3fzxxd4AcfX6Elsnp+SN8H37fZaQet\nyFucoSlHuEBRlgnthSbdBbvpLqsVVCDQeUSsLKS4cKjNTPQQjWCOi9cs70Y62kDvtmiHczyw4nHh\ntN14c1/TnQ8ozITIt3BCJ5thcK5Pvj5i5lH73NqLbYKZEHZj/Mh5taQ05H4x3FQc48jRRdodGE9G\n9Ef2Hg9HQ1aPHWU4StlwncFHjq5gjKQoKgx2DWa65OUXL/H5f/YKKi75qT9vt8Uqj3jr9JtEjag+\nMH0voNVqoZREuHunK4NRmkAa280MlNWdq9zuyu0UQkhXo70G/L4x5hvcwj0CvC/3yH27b/ftvt23\nP77draetgWeEEB3gd4QQj/NerpH3RQ2b2tVejjQhkmbogwvhZhfa5FWTvf6wBuDnZ2cw0md9Y5tt\np57SbkYsLc5y6Z1XWL+2R9+3WN/Otocf2qKm1JWDFTmMRiWBGrO04OgbVyO6R+4cdtzOdjZ3+N7v\n+TgAH/+hHyIMFZ6SNX6sjUahKPKKxJHmbF+9wE5asLO1w/mz57i+YcPs1uIKhBEiaNQq1L//3Fc4\n9uCTrM4eJnIJ14YfkqVDzg++Tavt1FtMydruqB7XFA+bFCVlXrpyvn2eEWNMzSUCEEWRw7WpRR2m\n79O6oqpMTXg1fT0MA4QLe7XWVFWFgZrTA7PfwbWf3LzZgilE4YGuJGlaULhxebFPGIakwwFNdz1F\npsnyEo2g6VTb/TBgb5gRhQ1kntXSTaWANBmjNQRunGmS2M5PBLFTPq+qEnGPzTXXrtygcg0ZVZmT\nmRJdaYTr/CyNpioMlYFvv2Xhr0984uO88NUX2BsMMEKhpxiVkEjhE4ZB/YykL/B9hfI8PAerSRHg\nSYGnBGlmvfTDR47WuQagbuDyFESBhykklRO1KE2FMIJQSrIprW1lKCUUUtQLtKoq2zVrwJOK0EGS\nkayY8QxLTZ+TS3beffSxwyw3wGQV2n1n5fl4QQilqjl4CG7eSgpH8pb1DbHXoDffpHRQiBeFIHIG\nOxNWD1sipUOzhynGEdcuX2Z3za6j2WiZJJuQFWOuXOized5e++wDS4SdHitPxaRN9/u5h1QV0U6J\nPG3nUjbuMr42xJvkLM5aCGrxyArhlUE9zkluPe0HH10lamh2+30qV8s3tzjD9/7gR9jbW6shqr3+\nmE6nje8HbG9bnDwbjbhw6Soy8Pme7/8QJx+2XEMvfvltiiKjzCpk5dZRXlHmBa1mu/a+lZL4nvXe\npx2zyXh/rd9q91Q9YowZCCG+BPw498g98tIZe4HHFls83YspygLjbk420YxGY3wvpOM0BYX0GA4m\n6MJghMuOFwUbW1sIXeD5Ff3EhpXSn8EvJdKn3kTSfEDcMswvCWaadpMIworOzL0pmDQbIdsDGxK/\n+vrLLC7OsLQ4Xy+e3d09SFM8XXD4hE1WrM60uXb6BuNRxuLSIRpzjqIz6jBJUpaXj7J23XZybW33\nWV4ZI4xh5BYqXkihK8K4SegebL69CQdEiT/zW//3PV3HvyrbcAeuECVpoplrzrDn9Ppacx3mu22K\nJCNwjQrjPLMsbAamNfU7O1tkZYHJBLHQdQv/eDKhnyQI6THjlHza7TZSSoSB0t1PTwhUcG+FUlm6\nQ+pwyJoywBgc8y9KCISwIe4LL1piqCxPuXjhAp1ex0FBLh/gB4RhSKvZwnPQkhC2IkcpiVT22r2g\nga4cTOU+a8T+nAb4uZfvvkHo7ux2VT8FXHCI6Etrt/n3D7a/8wtv/tGH9IH21h/7G8xf/gcAeK6p\nKzYBSgnGo4RuZ9oRGdDswrHgERKXb/LCkKwwlKZkNHEH0wAaMwEPf88Cx55cYFLY9850eywtzLN+\nbZtDjqa30SqowgqtKqpq2rJuyJXia19+k1e+ah2AO/U9wN1Vj8wDhTGmL4SIgR8D/i73wD3y6U9/\nmr/xzu8A0O600cpHF1k9Gft7A3RWsLzUqdXUyzTB5CnCgHE4T15UeGFAqHx8vyBwm7kMAqKwh1IS\nbam/6S0UdGZKenOSjmsnjqTCi+5t8Ya+JkttqdFXv/oFTJHSacS1MEKaJHhIjh1f5YnvtWQvDx5d\nYe/KVdZ2twjikAedrNrm5ognH36Cx598mP/rH/+fAHgEFOOUPE8xUwbCqESFIcdPPMDGFcvDgFTE\nzXvDZf802G9/9rV/1UP4I9nMTAfddcxwWlvPVEqUwyFtJ52gEce0m3Z+ra2vs7C0RLPZQEp1IDEs\n8X0PY2w5pnsRJSWe79U0vcILbPRSVYgpji24qYP1vn1nLQinBHaSIAxpxG3CaWVWnpFlCRqfwJUh\nNqIGla6QUjLjcjNhN2ZmZoFJPqE920S6aOroyWWM9yzt2ZjUNVpVkxSjJFvbu2SuxLeqSsIoZun4\nEj92xDbmpGnCP//1r9x2zHezgy0D/4ewqLkEfsMY8y+EEC9xD9wjX3rOJuMeeXKV6IFFQlMxdHSZ\neZozO9smiD0Kp1cnA2h0Asyo3M+oSoOnK8qyoBl0UcKGQcl4TFmltFqKGQeFtDqSVjMiamaE0yRJ\n2mA0ujdPe5JM6s66T/3ET6PzMaqwoTJYPUblBUTNBmt79reHe6fZSUpEFPHua+fZftFWuTxw4mE+\nevIUeZIST/Uci4JJkiKVV9dzJ1rjVSXHjjxA6gjdH+s0+frLr97T2O/bH90eeODBmm9CKWW1PKXc\n7zqttFVY9z1i10kqpSDLc3SlbYLXPU+lPJDSVvlMPXWl8H0fIfe5YJASbQye2V+WxYGqj/v2nbfR\n0G6cURRRFbaMdXrYpmmGkB5pUpAMXdIxSGm1W3i+QjoB3zi20GOYhXjhPkeziiSHgwXa7RaRy7RX\nVUGlBZNJwe7eFH7TNFsCFYRETlNVvU+V292U/L0BfPg2r+9wD9wj9+2+3bf7dt/++Cb+ZYdeQghz\nP7y7b/ftvt23ezMhBOZWzlvuc4/ct/t23+7bd5X9iXGPTHG5v/3/XkSUlS09c/9mW8MLgjCoOw21\nLlBFwkzDo+l4DcpJThY0mEyZYp0Hbz15deAbcd+h35PEMcbcNrHz6Z84DsBf+8u/BEA6TvHCJkjJ\ngycfBOCBBx8EY7h29QpvfeMbAFw8f55KgvQ9yywH9NodOt0unW6XmdmZupmk0Zqh3e4Stxq19mIU\nN1FBjGa/U9FMj9LK1MIGUkk+9qHH6rF/z/fbjsq42aTb7aK1Zji0GJkUhijwSccTYpdACQJJ2PQI\n/YjUlWOlaU6aJQgpaDVbhK7xpiwL8jwnDGO2t2zeYX19E+WFCOXXZWtFYd+3u7vL2tVr77nPwJ96\nPPb+OL+zdqdx/pX/6Qt1tRiIugRTCjsXBxuXuHH+DbavnqYc2EK0dmQ5qvOiwhhT84eAptKWs35x\nwSb5P/Wpn+TJJ5/C87y6smu6/o2p6nUEIIzml375L952nN8Ndi8akRL4JnDVGPMz98o9MjUjbCOV\nQjB1/KWQGGEJoaYlUbZby1BUJalr3R1dv8H8Q09QICk1tQKLESC0wJj9bXufEml/ItWb9Xv27P0X\nTj7+IQCOHjnGzOw8ufBtVt99Pk0THj50nAcfeQqA86dP09/dYW9nh8uXLBfKlcsX8ATEgU+VT/Bd\n+VYUzeCFEVG7SexqjXtzC/RmV+j2Zmg5Pu12t0PcaqNCSwEJlnbzoClHBGGoGE+G+H5A3LCHW5Yl\nCE/Q6rYIXO03OieQmk4rIhnZxKg0FXEcosGKArsqo0Yjth1tRtNq28Nla0tQlCUKWd/HoijqTsv/\nv9k/fnuErvZL7YSwdc7TjrXpWn+PUuo92gdCh4ab6rS/W80ToOv6foGU4ImCc29+DYDzr3+Fyc46\nJs1ZmLFrY2lxGS+K8X3FcDigdJU3SkryIkFXVd0d+7uf/QxXr1/j6Sefptu15XWB71v1KKMpXV/E\n7l6fzfXNP8lL/47bvcAjf42bCyT/FpZ75GHgi1jukft23+7bfbtv/xLtboV9jwA/Cfy3wF93L98T\n98jUrl24jBIa31OIwKmHK0noB0hd4Weu09DziJSAsqJ0bG/hoePsTjLGQuKpoKav1EYjcF2KU7Yx\nY/0iw34n2PT/tvNvn7rSHPC0K2m91a3+kEa7Sxj7pKntTgqCAJ0njLMJC4tWp/D7Dh/n2uWLTPp7\nfN8nvh+AG+vXCPyQXqvNm69/g+e+8C/sd2+cR0qBEQLluDqCIEBpgR8EeK7ErNGM6c4t0Z49wsyM\nhVbmnOhCPU7HRaq0Txy36HQ6NY9CXqaEjYjYD2tmtiypkMLQ39tBV44MyPcpxJTBT+G5iCDLU3RV\noCtRy3iFYUCZlDeFmVZHUn9XhpgfZJ4S6IM+zZRG88DfBfusvzf/4/uYOfhHAwfyTAfn4dSjt0pD\n3/2pJyVVHTEEKFSV8O5rX+bNlywjXjrapCzBw0dIC9PNLSzRnm0TeJL+oMnE8cs0W02qqiT2wylC\nyt54zCQZ8uqrXyd2PCOelMzPzRI3IjYcSdn1G1vs9u9NvepPm91tXPs/Ar8KdA+8dhP3iBDirrhH\nXrl8A4ytefWnjQoIPM/HFwbHUkkqYLHb4fhsh0OuIabVaJKkKUIrdgd9ktwVp5clyg8IgrCe+Mrz\nyNLMUpZOWdDynKos8Xyf2HFCS+G5T9h22gdOWna1q1cusbOzTqfdJXTvDZShGUiSNMdUU5Y/6HZn\nyLOkJnlZffBB4qhHq9FjfvUEEzezPv87v4EqDYHy8adirkmOrApSKdBunJtozNkzoBooR24zrRme\n2v5GqVGeRJsK4Q6xuBmRlzmBf0DEt9fFUyXXr60ROiFYqXyEqUAJlC8pnB7keDQikArfD+t60U63\nRV6OyHJ9E91rlmW0ndbnB9n/8D//GkeXunilhbtiVXDs8Apxc4FrA3s9f/CV1xjt9Gl3Zvjclj2o\n1GM/xOAb/4Qf9V7l3/+lv0DSsN2PWo/w8NjZ2OUf/to/AqC/u8ff/Ft/nRMnjvPNb1razpMPnSKO\nYlqtFjs7tjN3NBqxuHjnKetJgZ72B7zPoXSTNOJdbNrilr/dBI4Ywa3YnUF8AEHEd4dJ4dF0V1+l\n63z75S/zxjdfJHGMe0ZrhB/ihTGh0z898cAJZuZ7KKHQ2pA43vE8y7hy7SqTUcLhRdueHnoheTOz\njolTAnr7/DlOnnyU7sw8N9ZtJ+nm7ohWZ+FP9Nq/03Y3HZE/BawbY14TQnzyfd56V1NLNHtg7NY6\nbQnOgQoDpqThNpmiKmhOUkwrpDdrh7ncFqhei63+mHMbE85uO05rpYAJQhhCJ5zqS0WepQixv1Cy\nPKcoCqSUNZG/FApLrWLpX7OJ9apjT7K7vUGS5Cwesux9yIrCSPLSIKZ6e9rg+x4zMx1eeOEPAWjH\nIY89/jEy1SCvoOOSJYUXs7u7S8PTNNw4Q89DeCGGA0GCw+HIhzXmOZzcsphdJ06SpgyHA8QBPuxS\nlzSbMYaKuGG9FuV7VEja84tMH/twMMJIja8khSmpjMUM55fmCVCuScTuSkVeUlUlWu8LCHueR36A\nF/iD7JlHH8GXmo3r9rDtLR1G4wGSuZ7F83/6Uz/K+tXrXL2+xkmn2j7yd1g61qG6UfCVr79APG8P\n2IceXKU10+OFt7/Oc889Z+eC1vz+5z/Pn/25P8uTT9hnmiYJQlcEStJ2TQ6tOKQ5pRa8jSl5EK++\n89T+jvrAd9j09QHce7n73gPyYM7GGHPTISMAaW5OwB/E0W9NzNf0rbXKutn/+/R97j271d1j7aGE\nfNcemF/+w99muHmBRiiII7s2mq02cauJRNN2ea2lxSXiZkgcNInCiG7X+oxFWTB3/jyXz51nZsbO\nmyDz6Q8m5EXKrmMF3dkbcvKhx1g9epwXv2YP8N78UY6sHr3tGP/iP7mG0QYxLXQQ00O04uC5LZB1\nJLRvB+7F9M0HmBVvPaz1gfnlGYU0hkrmTGeU0JJ/+O+t3Hacd+NpfwL4GSHETwIx0BZC/DqwnKj0\n5AAAIABJREFUdi/cI1MbvvtNmkefQAhLvD+9CHuhmtJx+0amQuqStX6C1naTuLg3IdOKvXFBf1Iy\ncVSlg6JEIjHa4E1bziiQSITR9VzDBGjtYSoDLhNtTLU/GYGvfuF3AfBLzaETx8hLTaNlT/5GYxnj\nkqATx6QuFRR5xjvfeplXvvR5wHZVLS8ss7QaE/geTz72tL3Zf+Gvcu3KJfp7WwwHzuMb7DEej0mS\npM56GwxCSAIvromQGo0GXNiqx9np2GRLoSf4niQvsv3KG1MRxQ2KNGPsvJNxKmi0WmjpMR7Z1+JO\nl8l4B7Sh3WmTOSKnPLe8vkEQ1q22URyidYVSfs2ZEARWnb14H+Xog7a6OE9VlZSJ/U4hG1QGhPBr\n8QohDN0HjnB0ZZGTvvWo39nOmDn6OK3NjBs3Npns2PtgjhwiDGNWj53g2FG7ELPxiCeffIo0ndRK\n2+1GRFlmXLlwlmar7cYeUKTjO47VJsedFqVLQb536dmkuqlfP5jovhnqMO/Z+N+7Q4uDX1Ove3Og\n6oI6mpp+vyXzdFCK27BvjQw0xoo314xR+5/X3ITQIIRdDje9Ztxm/seAwSY7F3jxi58BoL+zhfJi\nDq2uELioqRWGGHLSbMSSEw0pq5I8g531DZ555hl63UY9yCh+mMOHZpkUdh2eX79EKgYM+jvohn1t\nebXNQ6dWeebpj2IqF9UHAZ5/++uwqKkBcethdPOzE+j38Mfv35oD7xX7z2hKtuUJjaLC0xrhbnIu\nKiqluPHtb3LtTctjI9l/5u8Z5x3/ZToEY/62MeaoMeYB4N8BvmiM+QvA72K5R+AuuEem1jr+5Af9\n5H27b/ftvv1rZyuPf4Jnf/5Xefbnf5WP/Lm/ccf3/XFqtf4u98A9MrWqLAFLA1ontYxBSBtylO50\nasuKSMLWaEJaWG9T7kkmuSFSFv9tyin7X0VVhfhIjDuhtBJoozEHpJcw1ts4GG7a03D/71N4ZGtt\nnUQXtOcXa88ljiLmFlbwPJ8smWrjBZw5/TYvfuV5pCtJ2tva4vrVK4TtOYJGi55joPuBT/4IUgqS\ndMxkYr2B8bDP+tVLXLxwgTNnreBBs9nkyJFV5uaWrPo5MDs7yxe/+R/V49TGYeJFSmY0fhijHNFN\nq9VCoKgqDxzk4XmK/t4QUWWkI3uN7XaL2VYXoXOUFtPgg8kkZVyW9LoK6Tt4ROfErZDJqKgpXMuy\nRBu4W9nNMk+YTBJiVw/uSUsBK6RPnlivt7+zy9LiIlFDMedyGYdjn0iAbj/EkflV+s5T11lOmWse\nf+IpfuAHrA7b/EyHT/34pzh//izr1y0LZLsRkoyH7Ozu0nVCE1VV4nk+xx596vaDlcpGYYBHic0Z\n3uJdGVBGWAUYOBA5Tr2x6Z+tr67Zhy6EENapu/V3jS3FrIsNlUCKA17XLZ62QBxALd6LwRtzIMHp\nXrdUOtP4Qb8nGWrMflxQX8NN33nvIPvXvvp5kDbCO/X4kxR5ijYl5VRkI88o8xG6yugctnBks9Nh\na2ON02+/xcXrF2g5QQ1jPNbX1knzCdppUZ5ZP8vK8SWOrfZqSJAkYJxdxohnWJi3kWmS5zcl0w+a\n56kadpxevb3s/eduMAgjkbe+9j61n0JKpiKmerjBZOsSpw55LB2y2PpO0WV74lFoj8y30YQRd96a\n75Wa9TngOffnPxL3iJSiDrVq8p0pDieow47KSEKpGXkxA8ex3YwFXmAIfY9+UtB0WctW4HFxN2eC\nxFfTLL+0SaKDxdsu7JQcwAH1zWFIz1VrrJ+/SJRMGFy9zPq6zTy//MorPPbY0zSaHXKnYCIFvP7K\n1+kP9urif13ZBTttGhoZuyE1GhD6MXGzQ3fGJsGiwCeQPoP+hB/5EdvEs7S0RKvdwYsa9QQ7qDgD\n0Gw6AhrfIJWPFzVYczSok2xMs9Eh8iPKwi6UyPdAlwijiV14WBUprbhBnuTkaVZTiUZxTFWWaKgF\ncdMspdNpMR5tEUdNd+8klTb7vNEfYK+98QrJOAHHeBaHAZ12j9ne0yQOLrpy7jSiTGk2GsS+PVya\njRjlabzeHH5Lkl629fDXb1ylMXOY3VHOww9bRaIf/7FP0un2mJubZ/3qZQD2Nq/TacZIUzEZ2Gah\nuBGTJ3fW4RPG1HNRuOqNW69SCchHQ4Sbc0EcUxmDEXKfZ5r9TVPeKbA9iBK6v9aQi5EYc5AD3j6j\ng1DK9BC1c/pmjFpK4xwVVf++kramqsRuUlMYSCqNcHXhwsmNIcR7NnKLodzhxt3BdrbWOXrEVlzt\n9ndp+YLJ9hqFy4902h2Wel18EVMVdm1duX4NjMbrddgWE86eOw3AhYtr7K1vEfk+fsPhz37K8uo8\n8/1tEgddJuMRl996DlUqhn37O93eDHl6e059T3lOes0diK5C6CZYyBhAuj3Fvc8cTICImrVRCpAm\npxjvkG5Yh0zvXiNIN2h3ZjjZtms6LVPe3b5G6nXZLe3+U/mzd7yX/wq6IoRL/L03IYI2tdeSVppy\ntIUR3VokdKkTECvJsfl5Tiw2aEZuEmp4/uwaXzqzxU5uP6+cV1OW5mZPxBgOtvPfKvgauHIh5XmU\nRY7xFGvXLVx/7sIVXnzxJbtJOoL9hdkeFCmehOHATpa5dosgDBBSUukKndtF4fsB3d4MutKkDis+\n/e7bvPClL3Lx4vlarmxrdxuDwIuaeA7TLm/BjaddiXGrhReEFFrUVR2msgyKnvEJXDTS7IQo4ZFk\nBYtOPTzVhrIq8HyfNEuJXeLPQyOFoCwL+q48Kk1TfD+w1SRugnq+RBlJoe+Mvx20y1cv4CFpONL/\nbFxZUVph8PypMAMkyRioML797W5kMMLHhDEq8Fg9dgyARqcDUZts8wYf/rDtEG13elR5ycryEung\nuB2n0YTKbp65O1h9T9VSbLcznxytp9zXCqiQZt+XllLS31znC7/z27Rbdn4+9MjDxDNdmgsLNFqz\nVG6eGWGLB+3mfxD3dj7cwT1BCCq5/z5pDOYA61/tTU8/dBMePfW696tQNArheYQGTGCfezlzAr30\nKO35IxyejRhv2M1w7VtfpRpuUakU4zZOTIE2tiNx2sVYL5n93qMPtMBU7Fw858YJjW6LldkWHcdd\nvbCwQByHpFnCxratKHnjjXXiVpMrg3XGyZDxmm2KubGzhaciRjtD5JYdTeBVfGXvTZqhpte18zgO\nFa21Nb796m8xGdu5dPjwEts7JX/xL/3Ke8boeb6Lrm7BsA/kGGw3p7ARvMO+pVYYoTAYpNx/az7c\nZLx2GkbXaSs3l+ebCL1EECtiVxEW+ZK5oCRsJlxbewOAVHbueC//xDftoiqQuETPTTCF21CdM1J5\n4DPiI72Qp5/9CACLHQ9tJIFUrC74SLdZlKXCe3iJQVLxe+esJ2VMhag0nlCYqUq5EzKlKmt1Crs8\n9seRFy4kVoo0mUDg4XsOHgk9RpOMsijQrktysLdFlY7p9nrk7gRIs4zRaISnPEZpRsepz+hCs7W2\nzng85N3Tluz8m9/4GufPv8t4NOLCJTupfd9DG4FU+0onB4nw7fW50kalMMYKIU/fGwUhFJoqnyB8\nO7GWuitcuH6d+V6HmRkLEQySiklSUJQFXuDX+e9KW2rRJEnqUkPf99GVxPNkLQqgpEdZ5geAgPe3\nZ5963EIKTilFGEMYNhFS0521i/fkww/h+T6+UrWSTxQ3ML6HkB6+kQiXTBTNWbaHkscf6rEwZzfO\nJC/JEk2ro3jw5El7PZMKJQqMqKimbdZGo8s77zpSD/CEHZNNQera4wRQwmdva43XX/oSJrUL8sLr\nq3QOL3H8yaf4vh/4FEI49XGhEcYgb4IVDqYQp/81GCEwQlI5RZX169dZWlwF9pNw9TfUm8kBzxqJ\nwN/vPhQCJUPKxhHS1U/Y9yw9StieIWoIwoWQlQ9/LwCLS6u8+4XfZDLcoHKzwZQaqurAMUAdRd6L\nzYSSGQdvrCwfotlqMT8/uw9VGoMXKIzw6ij23XfPgvK4tnuJhw43eGbFck0fWWhw5vqQzes74M4W\nz2g2NidoJIK+uxcFUlZ4yq8dmvidC3iqedsxKs8WM9za5irqg9DefqUlBlVDGEJ4lErhmYxquMVk\n6zwARf8qLZnT6yrCcDqXPPJUE4QhwXRteTFR1CTwFF3H/d97Hwbp7/6q/ft23+7bffvXyP7EPW1T\naetNyP3TzBhji+tttTEAyotQ7eOIhiQbu7pLr0m7EXFmc8A33tljvG0VqxuHTiArQTEpaLkWtVQL\njPBcdZOFFipXa6rLosaKPXVzxaXw7BG33e+T7GYcOX60bs6xJPi2hlo7fcsyr2jGEYPhkOHYYqSx\nlLz8yitc3OjT7s7QbNiTPRA+p0+/w+7eJhcvngFgd2+bylTuhLdjqKoKo20t9tSjmWpSTm0wsA0E\nqtNASBvSTZOWxWTC/OwMyivxK6dSPhiSDBOahGxet2Hm3qRAhhF+FKCNpnJq7kmWEkhFq9Wi6Rod\nBoMhgR8zGaf0+xZrLssCPwgo87uLkx89cRLlqZuuSRsJKBqObEvOSzzfJ/A8pKsD1sJGYBKDMiW4\nKKeQDcrtPs1mm9CbYpsR2zsZ6SCj53B/LRKESUGCdk1RUkikvJnP5aBdv/IWy6sftZ832vXWHkg+\nVSVVmdENBdKVno43rrI9uMHm3iax1+GpDzvPNjQYKsQty03WGPa+t2m0QHmSq5esYtFLz/0eH/vY\n98PjP3HTZ/edP8G+72Wo8PBVgO887SJsk80+Qho8iihthLKoC1bVHu1yjL44oLnrPL70OjdiQT6q\nMDpz1166od2Mld8pmXcn+9iHHmJ1xdZka+UzHAxpNJp1xKsrjfQkoiiYTGzkcvHCNbpzSygv4Nkn\nuzyxaCPEP3h5l3anQ3s+pRi5uaQhNBNbtle6haRTckoy3aDhSmRPPbHKow/dvoLN8yXmILHUFIq6\nFRDARkPSPTdPF+hki+HmJfLtKzSFzSPNtj3ioIUS+zCsLRMVeL5XdyArt69IIQlcl3j8Pgpbd9vG\nfhHoY8s6C2PMx/6ohFEK4cRk9xn4bmoKmIav2ufKxOedfsFb21cA6M620ZVhr59QXH0Lb/ciAP/2\nL55g89p1Huw2kZGdmF+9tIsy0A082qHrKgwChFJkeUHiBDT7acVmtn8bllePA1DEIWVWkOWGPacR\nWRiBH0eISlOldlKX0seoEC/08TKneWkkb545w/bLr9GIWwSOUMkYQZJMHEboDiflAwrk/qKQyrNZ\nrpsWys0QxBQuyYscqbDt5k5xo9vpUqQZkVSY1G7aa5ev0Ostk4726Pfthj8qDJ0lj1Jq8rLEm7bW\nhwHpYEyn02HidBJ930MpSRj6dd28FBAEPtVdtlmf/fa7BHFAyzWIzC/MI2VIFDbwplOxBFfD4CqN\nsIk+bdnaFKDC/eqTbjMiUBrj3nt1c8y7VxNWD8d0WlMVkBwqW1s77TAVpnpPPuOgnTv9BiuHn3bX\n6TNtW9dqKoeXcfpbLyOLCYsO0764cQNEE90f8MXP/FOavj3wHnvmSUphE4bTdEqlDZWp8KTVmbS/\nI1EYymzAu6+9CMBbrz7PqH+Nv/pv2U37YPLe7vgCjLsmBMIRnGVtm9SenPop5ANPMb/xDvHbzwPQ\nu5AyL5ZZWVyi3TE0U4tpf+mdN9hev0qRTTCO6gCHZUsOtI8Yc8ts/GCbn23TcXXWSVaRBz5hFNa9\nAUhN4QSHcX0AggATeiR5wTNPHuKHH7dw1z/63d9hIGZotGaZVHYdC11Q0aaS3oFNW+OZCk+LOu9w\n8viDHFu9fdOK7wtbPXJL3uDgtRpjQEqEVJjUrqPhjXOM1y8SkXMolkin5er5PtpMQbC6hM1h37KG\nbHQFRmt0UVp9VLDJnTvY3XraGvikMeagquiUMOq/E0L8l1jCqA/kHlHGoDEEyqN0G1dWlnXmewpq\nCyoyLdhONYHLzrfTMVUJrXSL1Awoph2AuzdYu/IupSn5vh/+cQDm44jFls/qXJvYt6szCgM8z6PS\nmtIJdV5Y2+N/+8rFenyHV1YBaPVmSdYTdnb7jCdugy5LkMLxcrhNE8PuYEAQ+HUWP8lyRllKVmSU\nZYViiqnb8h8p9ssdtbFUqgDVTR1m+wcavLevwXd4WFEYYl8RBR7KdZKZQjMcj9DKo+ta1idJyu6V\n63i6IHJUt40opDe/wPr2uvX2HJ4vBHi+YjIZ16yLcRQxGvbxlKw19PJck2U5YXDnzsKD9hu/+ds8\n/MgpnnnWbobNRkSzoSjTFOO8jtAlCKVSNy8Wqfj/2nuzGEuT687vF/Gtd8+8uVXWvvdSvbK7SUkk\nRUrCUIRm0WLZECxrMBLGL5YtG7Yxo3kwBgLmwQPDMObBBgyPPdbIMgSNZsQRTC2UuJPNXtlrdVXX\nvmRlVq733rzrt0SEHyLul1nVVd3VZDeb1XP/QHVlf5WZN7bvRMQ5//M/URDRWl2je9PGLWp7j7C9\ntcZffP2v6QztOG2aPZSmDrB3zyN4buzyNEGrFGMMKnMBOpVjVM7dwj2djTWUeyn90jxagxApxr2Q\nWxtrXHz9RWqhT8PNxebGOnmnTXOgmZ4VvP2SrfF36cxrVKemefypTxCU7IajXcQq04pkaA3XsNuj\n197k+tXTvPWSNbB62GXtxpWdcdjtSzYAmnEuvRA+AkMWNBlO2WJTw+Ao+5Tmp2Y7zC3aG9b25gq9\nC6/RX4ppNAO6kf35K9euo4cjm503Phkae47Xt/vk36dPOwqjgkLpCYXUyt543XukjLaSC0bhufch\nCkMS1cPIhDjIQNv5CAMBucb3Ijxhx06KHKUlAj2uDohWNoZhfMFQu1KA6YBE3zmpKgh8Z7RdF50j\nXxqz8wJ6HmbUp790lf7aVfss7TFrckxvm961LXLftn/h5DF7o0Pu/LywRAzf8/BdVnSmNEEYEPg+\nDMbSHHe/ydyr0d59BxvjBxKMCgMfIRWNUsQgt50bbncZs/OK7/OkZVAYzcG6fSkeXphiq9Wm0x2Q\nacXatr2mf+Ob3+SRp3+SKPKZrlojdWBhjrlqwFQ5QroobzkOkZ4kTTPaPXuCfPv6ckExAgpWyHR9\nmnw0BAMDl1UY+h7D0QidZfhjaqGwtKrRaIAc83iFIHUnCGMM2i1WIwRofUuuk3GbWEGFvO3fCs3w\n28ZRi3G1bskwSZlrVKjW7Mnuxo1VVCBQYUju3A5hqcHWmfPIPGNhnMrdrKJ8CMtlsiSleFPRVKol\nut0uvquSnuUJKksQyiuK22ZpRq5SAv/ejPb3z5ym0qzxhLHc6N52C3KNJ0aUXZs8z0flGbnJEG4T\nywysdtqsbbQYdIdUXZ/mZYk//IN/zbPffRZVtYySqWOf4cnyLMOtG2ROx3ywuU6a9dE6QTlXjkpT\njM6Yf+yTd2zr1uYSly+9DsADpz6LkCUCIYuN4PqVK7TbbQ4uzkLfZbIa6zYZ9jtMN6dJOpZ19OaL\nLxCGktaF14idu6lULYE2tNc3GXatEVm6do1etwuhQeV2fUqhyeWO+2n37dSuF8k45ogx5EGdYe1x\ntNuOTvSf5ej5y0TDS2h3bZ+u+TSoE9Pnkw/OcWbbtV9nBAJSI3Y5XBxT9nae9vvMjpRS7uhcKxvY\nVVlaiJdpbW9CWmsSd2tSeYoYKvAh8Wp0U2vkRoMEr2So1CO0e3el8gm0wEhl6a1AJQqRQUS7PyB0\nhrTfG9FzeQq3IwjkLSmiZux+8iS4wPBg4zpby1cQ25tUnBa4Xw5Jbm4yunmDhoYkHFNFx7/DIN1O\noNk5pI3dLhLLVotKZTx3C1bvkmV8r4FIA/y1EOJFIcQ/dM9uEYwC7kkwaoIJJphggh8c93rS/rQx\nZkUIMQd8RQjxNu88/N31vrQ7jT1bvcDMiUfZ6rQYpGO3gPMTjSl5gNQKpXM+sX+Knz5hT0w6yen4\ndgcedDtUHcfz8aee5umf+AzVckSajK9LOH8EBbUmyzKWrizxrZde46UVy6k+01Z00h0K0MULZwEo\nxSFT9RpJliHtbZy5mSZpmjIcDEjdTpimGb7v4XmSLHPByVwVQU+DKeRNnYjDLaI+RuvCrXI7bklo\nuO06Kp3vS2uNVhn93sDqjwC51uB55MLQz+x4zE7PEUcVjBzuBHsDQ5L0yNIhRuX448CcMaTDEXEY\n4otxYokhT1LQXnFq8D0fpMfI3UTeC0NhyCQ0mjYo1KxXCL3Q9sYlePS2O4xGIwSWWgWQCZ8//+o3\n+Oq3vkcQVnnyQavEGEbP8frrbzC//xDxoZ+07WycYOPGBZ776sv4j1m/bne9TWWqTr0W4o0z01QO\n6u6nmXTYYvmGlY8/8cAT9HtD8s4a0qke9jZWSdKERCtaG/ZE3Rn0KJcr+L5AmBTlAuhzlQBPJ7Qu\nvlFk0uZZgjFQqlRpukITevMS+SDlxIOniEN7BuoNB1xd33pH+4QQRXB6rFMh/Qb+7KMEtYij8WsA\nHBPrZMMW3VGfxMVW5isVDi402D9bYv/xvfz7L79g5wcPE0pMrtDjW5fW75DieL/+bIBut8vAubBG\no5RslBF47SIOk+QZ2ghyoxkldl7ydMRcGHBz2/BHf3Ke56dsbEuEFRYOzTA7f5TLb9v3tbW+Sd4V\neH6AqNrb4eLJkzx44kGe/fq3WV+1pIUrV5bode8cegsDGxsa989zN+PtzgZb16zfX21eJ5aGOArw\njD35p0qh8pzqVB253UOObRj2vbmF6IBLvxHilpGUUvLmG2/y3W9/5z3H8p6MtjFmxf29LoT4EvBJ\nYPX9CEb93u/9nm3c7GE21jqkiII/HbpWmCKXCDxhOL5Q4dc/d4pO316BWp0205HPjV6Hxx55mE99\n5mcBmG5OU/IDIpMxXbc+wzj0CWXO5sY6p8/aSPy3v/cc3/32d2n5UzR/6u8AMMgD9K404e+//F0A\nAt/Q77XxnaQnwP7FRTpbbVrKVnwBaLXbSAm50i4pBDzCW4xscZN0maC3JBXtGqfdQaY7lUnbDeF8\nXkHJw/PHgTJrdKemmqxvbFGulQnd76zUSjSnmvTba+TuStnb3mRqYY520iPy/IJxoHNNvz9in0v2\nAdhYXyf0Q6KgzGhkF70wGSrXyODuQZPdiGoxs4szRdaqL32MkAhjGDuN+oMOSX/AqDfgxpoNoWR+\nwIsvPMe1i+fZGAx5621rkALhsbDvEIsLh1gd2fFpzBjOvv0SHdnliMs6/f5L32dr1GahWePU8aMA\nPPHYwxh194zIdNjl2uXTAFw6f4bIm+PCC9+g5ni0MsvJ8wHPv/4Kc1XLahgaher1mJ2fQ2Up/Z7d\n7WemplGpgFTD0G7sZWnw45DFw3vwcrtubsQZ20mGTlNqTqRs/+wMzdrUXdspRUjgufVbrVKtlXly\n5gbTWP/1EJ9w9jiNUg0/tJvDiUOH+cQnHmFh7wwvPPc8r1yz1WP8sIbOhwRC7rCVtAGt0Y7hBXYt\nSmMgu/fsGq01xn1/miSgDEZQuA4lBulJVG7fJbCupoVGGaMMzz9/Bf3ESQAWTi7QrXk8/blH2HvE\n/vzy1YD+1oDeKKPnjOENscLqlQ0G1Q20tBtBJ+3jRXfW0w4D4ypr2c8ftNusXb9Ktr6EcL73Ulyy\nDCajyV0ke319lWjYp1yPGK0PkY4BYilgtndjl4vN0tZkWVqQCYRLl3/6mac5cPBAMV7/5o/++I7t\nvBdp1jIgjTE9IUQF+ALwe8CfYQWj/jnvIRi1G6myzAnfF0X6r1GQIwmFwLiMtYVqyC9/8ij7p0IG\nzne9MFVjOvKYrfwkDz3wEHXns0zThMhTSJOxtWb1Jq5eucgLL32fF7//GhcuWrJ7t7eNwmP6U7/E\nUFnjLvKUwNvxEi1ft8GFo0cPEZViRmlO6nS7A18iUHhC0HXsEyM9orhE3u9iXIZdqnPHTHD92xnL\nO6qw3Qm3G+nbKX9jBkfgFkieK+JonIAhLTODnDC2BlXpFF8aZqanaPXt/tpuDag26kilqFZrqNQu\nbGGgEpTot7s7Ot65lb3tdtqkLpkkS1KUkXjevV3YZqYbzM1NY1JrLI0GfN+e3MdB88An9ARhqcS3\nrlnj/PLZt7l65TKBStF5n9WOnY/p0hSbrTbm2jLRPqvjEMohZ99+DX//PobC+nWn9x/jK3/6B5CN\nOHvWJjAdOHyMhfm7Z51JY2hv3QTg5vINPvvUwzz0+U9z8S3bpt6NDXypaJPTcOykxWOHuH7mIsko\nJ2hGBI7lYnxBmnuIsEyCo2qqjNgLqYYRHnY85qYarHc32Wi3EG5DMck2izM7+s87+iISz5P4QqCc\nvGm8+Dg//emf4tH4BunAbniiMUd1dpHa7F7KNfu+LE43qdRKvPDmG/zBl1+m52TyS806WqWw63Q4\nPtFLT+K5NRj4Npi++r2vvteUF5hu1EHbeYsCn9EoIw7jgvKntHJU4AThfMVKZ3gy49H9AVdXcra2\nrgDQ2tJUjjxIc7rMiRmbGj86nuHlGctbOX/6V9YGHDzyCLIkMfuPUPbsOj575hWOHbtzG6PII+m2\n2byxBED75jJmOCBAW/0QQKuUUZIwHCQMe+N0+Rbznoc2AVprvIJEoMDsaETahxKtclaWl+ketcqU\nU3W76cuxt4FdGa93wL28bQvAnwqriOIDf2iM+YoQ4iV+AMEo24UcYfyifmGjHJJgdZo9x2DYX5U8\nsDjNcJQWC7gSVzh05BDy6D6iMEKlLiK8cZOXL1zg9OnTvPKafakuXrpEt7ttNTTGteUMxDML1Ob2\nFRQxrXMMOyfF3KW7on1K5SnW1peoluxJu9trEYSC0WiEC/hTKtfpdFqYPKNccpzmoULnO1egsZND\nwJ2DjYCU3i1BpmK87iACBJAZ2/fhUCFlgFYwdPzWuF5ncd8ekmGHgau6U40j4hi6m9sF+0gon85m\nl3SQs513Kbmgoy8DBr0+nVG7yJ6MZES7tcXmVotyxT0LQkbZO9N+74ZyFJMNR4wp+lJx2HPgAAAg\nAElEQVRbrWukQLkrZbvXRYyG7GnOM7/HvpCv/7svEYmAvXsOsHXlUjEW1VKMSYfMT5WpzNixf/Hb\nf0O3vcFyxeOP//JPAPj8p57m2OIiVy5f5NqyLUB8+uwZ9uz5ybu2VaUeiQv2eoFProeEcUi97LTd\nq4Yjc2XiUomgZl++x59YRI8k6WiELyXGuaY22musbGxRLleJxpoeyZA4C+lsrSOygRvPEmmaMUj7\n4Nv+tFob9Nzm4WbNjp0wCOGRRnNEs1Z35fGHjrGn2mI9zSg3rSGXXonrN1scjOrMuupHg7TDsD1A\nG8Gv/PLfodlsuu/1bBq23HG9eJ6H5/uEYUjoAnyBJ5BS8PijD73nnI+xsrZJo+IobkKgBWihCw33\nXFlRLg9TVJ5RwufazQF9P6MW1xC+HacwFnSyDV46/RZmzbo91q5ex5QlWd2jl9i1pJY3MWVBbgyx\nmzdEyLmVa3dsYxQJ1s5eo3P1MmAr3/iexzBJxl4ckmGf1eVl0kwV2Zyx0HieT9ZPyAwQ2g3CBlcB\nzxSiasKJS1UqMdK5q8Ylva3bclzI++5j+Z5G2xhzGXjiDs9/IMGoCSaYYIIJfnD8yDMiIy8AD07u\nnefYor32HWrGtHt9Or0+YW6vULWsRTpSJElOzQVqylEZoaFSiWm11vj61y2X9dlnn+fM2YtsbLZI\nXdVlpbWjsJni+u6FZYKZg4iwjHTlvoTnF4kusCPbut1u45diyqUYd8AgGSVUyxVGoyHGBUsyk2Ly\nFGModC2sT04gbtdX4c6aDcYYPCmLz76TkNHtGWhxbZyMogkDHyVsGTSArVYLEQjKsaTj1PMW52c4\ncXIvb768xaDrskYzQ5b3iDyPbq9PHjofmxH0BwN7XXPBwCAIyNIMgSgSVMIA0vyWy9+7otftsr62\nTu7cML1ul++9+ipeFJC4UmeDXocnH3yI3GiaTXuiR0i6gyFzVUPoRcSOHjhdqzEa9Ejba7SH37d9\nv34FoTO22qusrNu2d7cPEWHQeU7P3c5urK68q2CUIWTQd5TUUZe1jav4cUTsEnaefOgoKzdOs/76\nNQ4ctzeCQ4uzeI8d5aVnn6fb2cB3mbBquE1rdZkN/EJbIvYNlXJIu7/NsOtkenPopyn5YJsc+27E\ncYn+5o2iXePs3DyeQZQWibyYkrburpU3v8zVFzOU5/P4Ezbr78nHH6MhJSrpcvO6dQ1Vq2UaUw2e\neuQgoJDuGOn7nvv9phibLEsxRqPyPpubVkWylw6Lyk33ii//5deYrtkTaL05TbUSMVWvErss3iAI\niPwQtEDl45tpSHs4YC1S+IMeM1h3U5D1efwhzbBzlTNX7fpevdLBNGKiMGB6X+TGs8doe0iS9+il\nNg6TNwKuju487xVPEasRgVsjOgjIDIyGPSJ3Ks7TDIlhulGm7OY39iEfDUmShGBxjsBVctLaIDHo\nXTphQlrXUxyEReBfZhki33ETwTvdobvxIzfan3vsBFNlw7G5OhW3MBp+TuZ7DCsBuStOmwykFf4V\nhrLjPQbS0NtYpre8zVeff4X/50++DMDG2jpag0YW/GVpMgy27lzofL1hGODP7wM/tmRRQJPc4npI\nMutO2NpYZm5hD/v2zhO7TMGtzQ021jfRKqPsAhuh9Jjfu4ebGx1azve+Y7RvdW2I24KQYxhjUFoX\nEyWEsAkmuybudvfI9IztU2u5ZwNCCsaZ3GmaknS3KXkVy/gA+v0ejWpAXAoR23ZR5kmO9HMqjTLr\nK10aVevjHfaHZGlOEIV0+9aglCtlcgRaSMaic6HwyXs9yO6NOTrKUjq9Ll3HoFhaWuG1N98gKMcM\nXOamMIoThw+TqZyqM3B798zyyqunWTIeuTE0Hdd5bnqaVj5ke+0aKz17pU26I3xyyl5A6NwOl06/\nxtb6TXI0PSdGNEiSwiVzJ5w4dZJW2/78sLPKm69v8MLaGoFjyvz3v/Nf8Mv1ClMz36S/YX2olbXz\nnKyOuBjD0rWreOPs2tyQGElvu8uwP3btBEgvpjvI2WpbY9hPM9r9EWEOF69av+qBmQbBrkDvOIEq\nKtfAy/DSmwV7Z8UIhFdiYe8ePvnYwwCcOnnAulGyjO2ONVx7ZprEJUHSuUmSDMmdfIFSyh40oKjH\n2O/3CAKfwWDAxua6a4Vh4BQt7xWrKyuooZ23pRvLIK1PvtGwa65SqTDVmKJSrxRsqsg3HF2c4jM/\nM8fN66u0NuxaznOfT9Q163GfrePu5/fmDJOcLj3GFOeuzsiNwUdiHLMqExoR3VmNqex7hEaRdW0A\nWQUBMoqQgp1qTb7PzOwsEk009vOlGZ4f4IchndYmoVvLlamdmMnO+2vdpUrpQn+fPEW6998fu0ze\nJe71Izfa/8kzRwgjw9WVdZ79pj0pn5ovIYKQVBguvv0mAMdPnESS075xkX7LLrabK2ucv3iR6xub\n5OU9NPcdAcB4ESrNySUkzo+YD7qUAoE0itHAbgQqnqU0PY9RGbkz2gZ1ywDp1AaKUBJhcnxfs2fR\n+gfnZxf4i4t/zt7FvTh7wmCU0s8UuTZF9e6xhjHwjsDjOH1/55n1eOtdRtv+DhvBv5OfGyi0Nipx\nBZlq0KpgcdTKdYIwIPIks1PWX1mOywxGI/qDIb7TV/EzKJdDZuYatLe2MOykjadK27TxouRWZk8e\nUqDleOwEni/Q+d1PrLuRCUMvHbHRsUGyM2fPsry+yszCfGG0N9dXuXjtCpUgYo97oX/1F7/I0soN\nVJLjBTuZpypJ7DyLnJLzjap+C6lzmqJM2SUqdNKEoVIMdMrQrY8gvLOm8hgze5rML7gAoBZsd1qs\nb6/QvWHbfm1ljb2ze/nC536O66+9DMDW8mvIuSkWZ6e5cPHMWDacHEFvOET4ktQduTrDhOHqBp6Q\ndBO7vv1yhCgFtLa7RUm4ZNhn71y1aNc4AG26SzajEoN0B5UgiDGeIvKtxjxAs1Gn1pjhxo1lusat\n7VyRj8Y1VSUBdiyMZymp/f6AxFU+LwUxWZoy6o1Q46KuEsrxnZXy7oY9M9OcetAyd9rbHUa54O0L\nV7l82TK7wjAkLIWUp8rUqjYwemBvgyoJ6fUhv/Wrn+D//kNLh1u+mTIVZazJhC2nNT4QdmzyNKaS\nuhJmRpKqHKkqlJyp8/NtVHJniqpur9JrrRUyyOkwwa8Zu7El41iXlVeIUWinVS+RbGvDMBlhVM6+\n/TZN3qowjjOadzIitVL0h6MdmQaTodymMlbqfDcVxXvVHmkA/xJ4BOsp/y3gHD+A9sjQ+Gz1R5xd\n6fLdNy0PdqmsmamWaAQ5dVfZu1RrsLSywfmrm7z8qr36nl9apjvS4Ef87JMP8wsP2UUQSytHemNt\njaU1e2rZ7g05d/pN3n752SIQGS6eQHsRarAF4xN5EN5iVI8dsEZuanqBoFxnpFLWHQ/30L5jHNh3\nkLnZKXLH8V0+fYaNdpdUU0SYxW3VcN4ZWBxXxMB+za0nc7BG2/O8d0iyjjHadptTpil5vt293b+F\nYUi9XgOhmHa7fej5DLodtNHFbu4HNvV3u5MhZcTcvDVSvh+yvPUKQRjjud0pFTmVepVKpUzqTrCD\n7oAojhgN7k08qN3vce3mMpeX7Slyo7fN0upN/HLMsRNWV2JjY80GvyTEge3700+e4DOffYqlay1W\ntrp0WvYkFI0GqHRE7ikc4YBmPSZNUyKlid0Vfmt7jW7Jp5Nkxem6Uq28O4tH5BhcVqtQlKqChX3z\nlKS94WRa0eu0ECbgmb/1qwCcP71AkiWEL16lVC0VhRDanTa5zl2KnFsLxuBntgpQada5XD71GHPN\nWb7xlee5ed2eam9sGXqjHWrieC0nyhRsgzFlVeshMjdsbPq8es6yoJp75qhv97lw/kKhv6G1odVq\n4Xng+1GRX5CmCZ70aLc7tFwR3izL6fX6aK1pO5GyYZJY2tv7wFSjTmPayZP6iiRVPPLgCb7dsif2\nJFeYUYK37THsWz3th48uMNuosbSSko8yfv7zdo186ctnubkOq72EdNuOnekqTFUSRuC7W7ARNSId\nY6QhDR2dNQV/dOeT9utf/RLXr2/gBbadaZZgkpT21hZ5On7fcsgV05GPPyawex4mCMm0ohzHhC4Q\naYwuqhPdvtaaM80d9pwRBFF4C3tEvYsg171mRP4L4M+NMQ8BjwNn2dEeeQD4GlZ7ZIIJJphggg8R\n98LTrgOfNcb8AwBjTA50hBA/kPbIc8stklHCymoX569na9Dl8s019taq/Mov2Vp/Dz/6OGGpxszi\nAeYftJSmn0lz5psNpko+jVKZyJXgqsQxgZT0koQtR3tbaY/41twsQ21YdpUwjGcYbC2jBJTG9eak\nd8suuHePTcgIKlNcXd5gs7tdVL1YP7jFnn2LrK/f5NIVm5114+Y6CFe5Qt92FboNxliNEYNhnCZp\nD97CKf/tVnzYEa6x33jr7+pvuetUGFKKSyij2XbXPj/wXaZkylbXno6mpupIIWg2p0ldJmq6Db1R\nzraXUCqXabtSXMoYvFKILAUku8qT+DrH5HlRD69aLdHaHL2zcXeBUYYwiIpATTdPGRlNa2sb6arE\nLEzNEitFyUtYatvAmapq5uYkL7+0zTCXRK7K+ijpYVDoXLDVtb5qv1JjfnGOra1t1odOajbVSKkY\n5ppSaNdMvVxBqLtfQaXOSR3VNIgkg36P3Gi82F7Hv/Rn/44njy6wttZh/iG7ZkvTC7z07Ne4trFB\nuVYhcdSzSjkmJ2dmYQbprr+eHxB6Hvv27WH/Ket+m12sEwmfdrvLX61Z12GmPbrJLt2PXWpxthjF\nbbJAxiBD+Np3XgXgu88+h0m30XlK07mbPM9nOBwgpQApC6GyPM9BW/782KedZykqz1BKodz6znP9\njkpK74VDhw4UgfbZ2TmGwyH1WsBUw76Ha6021UqFRx96EH8sqjVcZ9++Ji++fJmLZ4c8csqpQ8Yl\nLp0L0XN1PnfgEQBeufIKV9evcPLxUzRie2Ncu7zKdtrBm42o+PZzRBKyLz50xzaeffNNvGga4bJm\nU6MROqdWqaBdm4zW+MLgGU3oXGxSSuaCkNmZaYLAKwTVCogx1RkkAs+TxJFfzOUoKBGUy3Y+ipyR\nH04w6giwIYT4V9hT9kvAf8Nt2iNCiHvSHmlttchzECojFE4tTkbsaRr2H3+Co49bDePaVA0pJfWq\nYGHGGu1QgCy0jUXB1kAlpLlGCo+ySzZZaPh86umniapT/H9fs0kA15avovSQPIiRTmHLJ7xFV9kr\n2eyzgZJoT+KLkJJLnuj2O/SzAZeuXGZryxpDmxVl5Rd33CDyVrnZMYTNuPKlKIrAGq3RjmmS7VI8\nkwIk/q5isbcamIbLwktMymA4JAiDQvtaerb0USmMmKvbFzUuRWxttfA8Qblsx31/vcbZK1eJyzFZ\n0mPooubKYNUMhSnWkBYabZT9f9cl6XlEcVD4X98LAgVZSuDYI+VcMxXH9Ec9NlrrRTsHwy79UYdz\nm9Zoy5FGeyOyvM/2Zg/htJ5rUWjLvPVHJM6BXKuUOHRgkWRuntffsCnOfq3O4t452m+fL7TNm/U6\nvAt7pDfo7gRHPej1h2B8lHPZ/OVff52VM3tZ6w3Rp207c3KSpEPYLJHebDHo2d8/NDlzB5v83V/7\nAiJ2PGuvRNrN2TM7zdBzboesR7lU5sRDx/juN1+089sNkPFuQa6d9WDAqWKODwuglcGTMQR23lsD\nwbDTBqVoOSaRGiUMh0MEAumC3gBZnmOUIs3SQi42DDx83ybWRK4dWS5B+MCd1fLuhMW9C9xwSStJ\nIqmUyqANM9N2PtrbHbQy5GnC8WOW976x3GV1bQkRClY3FY+6OM5MI6STHyYVs1QH9pnfz0i2NP1K\ngCxZQsD21lU6rU1OVg4ROufh8vlroO5Mgm7WZ1ltJ0g51nH3SHODzg3Ok0E5CvE9ULliNI7lmBw/\nV3i+RKnUpr/jJITFbYc4Zxd6gwGpc9VlWU6q9S3xr3fTer8Xo+0DnwB+2xjzkhDif8GeqO9Ze2Q3\nFhsVMqXIxBRRxRrIawmEjVk++9NP0azZHTHLrZHoaQid3kPNbWC+ES5Da+zcl6DVLUUDMDBVr/HA\nsSO89balZN24cZVcKzzp7dSJNNwifN7q2VP11ZV1lBCkw5yRy6Rp90eIwCfJdrSYfd9HKyu1uqMx\nspPuaw23+15P2grx+AhXJ9EojeeCE7kaBx0FwkgrtTn2m4lbDczY152olJKwcqbKFUBOVEa9XKFR\nrxG5yTdZTp4roigqigR3s4RMDxFhTr1cJnUStIPtHvVamSAO8SLb+FSl9Hod9s3voTewJ/J0NCpO\nG/cCvySIpyK2Hf0KL8ev+wxQrCr7O4VIWVbbzOoB57etUVi5tIJMahx9aB/ZGxdZcdKsOYJm1Z5i\np5y/9ODiHsrC8NmffIaqC7h+57nnKUf7KccxC7M2wWRxfg7vXdgjfuBjnK9eK5uBGMSyKDRx4pGT\nHG3uQ26v0ZZ2fSzMzFKeOUI2GNFa7tLdcu3Uhk6nR3fUx3PDlabbCBWw2snJQ3tLEB60+kOUbyg7\nmmtnrc+tir3jk5il5t1+HTPCWDnp2L5H5bhhy2FlQ0K35rRKYbCN9ENL8XPBWa0yjNIk3TYCu7HK\nwCeIIsK4yvhEL5ORXX+tjbuO3+04cGBfwcA4d+4cbd3G8yS1itNw9yXb3QFvnbtEyR0qZqfKZJli\nYbZCmiuq1f0APPRwl1425PLWOi232f/UE4rPNvbx9b/5PivO9/7Fv3eKqXgfFWmoT9tT+qUpw/Wr\n79RyATj96mlMWKO2YM1iGEfk2kr4jgU8jS8xno8Rpggyau0SbbR7l8dyysXs7D64CbTRdLpdUnfQ\n0DplkCTMCFHcxN5NRfFejPYScN0Y85L7/3+LNdrvS3tkjMHlN3joqWdo+5pBw1WTmJ7m2FOPs2/f\nwUKIyfPchcLYwAmAMR6+J/GQjkGwi1lR6NuMB8IQ+R71cszxg3bnvnjpEktb2xg/tsL2jNN0dwZo\nyRXxXbq5bms+allUZilXKvi5RmWmcIXIQGK0Y4W43yHY0W7QesdoC2wgSim1I8YvJSES4+1sONa1\nodFpsiM+4926J+aOReB7HqEfkmbpjttD5QRC4k9PoVyQyfM9oihGyIxK1Rrt9maXA4fnkJ7lDI/F\nukZrA6r1BpHvFQJJceSRRylhFBJr+/PJqItSqghsvhdm9s6yFSQ8v24rU+clUEemkSrnem4DUmEg\nEFmbzYunOX/DGoVLF1aZ9kd87pmfZu/8fv74T/7C/rz00MAzn3iMIwftlXdhfg6GfY4vzFJ+5kkA\nnnv2WS5duAjGsDhnr86z01NFuvGdkCdJIQHr+z4jnaMyhXRZvNOz03SH2xx7/CCqbg15JD1agx5B\nuUFj7zzLV+zmdGB+Dyudm6wsbzLnilRrchqNsk1Fd646ZRRRWCaII/YfswbqxsVzoHeHnnZ//c7s\nWuNomUKOr+4hWkSYAGRsNzYpBMgyflxB5WDcDSsSIE2OThXJ0G44WWrwPY9Kea6g0+IPEFkKXLrr\n+N2OcqnECVezc7oxzdUrVxgN+xxxkrpGhrx19jJbnR4vvWo1Xx55+AgLs9OUZYnllTX+4A+tVO6R\nwz6/+Z8+ypkrggsX7Bg//qDh4SdK/PrnD5Pmdn3WpiO+9d1rrLeHHHPc7V/9wpP0HZXzdlxZukqn\nMyiE6Gbm5mhMT1GuNoqC31mm8QMP34dwfFv3PaLQw/NtkZDA3Qi0MYX8anHaNlZobWFunshtomma\ngzK8+PyLfOc7VvvI/DCBSOcCuS6EOOke/Rxwmh3tEXgP7ZHdRvuRZz79Xh85wQQTTPAfHJ565il+\n47f+Pr/xW3+fX//N37jr990rT/t3gD8UQgTY7fU3sRI/71t7ZLZWIkt9eoOc8iNPAXBgts4DR+cI\nkcixvoGAwLMJI2PxFF8YW1dPjHnMjjKjLeHN5JCNa9pJgYdHpRTxmNNISDB85TsvsdYZFZllVtB/\n58QyFmqXQqKyFND4zrHrGYNvIESgnZBSmo+DQTunfds+UbhLxu0UaDyUVTRT9krsSUHJt/XihDvJ\n5FnmEnQyxgEJz7v1VCXdST+OIkqlmO5mD+F42rEU6NGQPE/wgnHBghFTcZlWOqTvqnjU5qsESYjO\nIUkHGMe/nplv2qII2pANXTAuFgihCIKApOU65rJsPP/esuMeOHGSc+1rdD3782GjxvxUE5koBs4F\n5SEQ2YArF5dIOnYuGukMJR3gDUfsn55lz4wNn9xYW2Wu3uSRw/uZqduTUM0z+JUSdFvMObGsn/uJ\np/mL516km6TUnHsjHQxI5N1PMwYou4CpEJJer41A4zs6V7leoTkVU56Zpe2ogVmm8YKYbpIws3+e\noGYTfh5//AHS1xOyVBf6H8bLKIcVMmXQruyU79lYSByXOO7orKefv061vMOJfoeQkNC7tG08fBcL\nMe4mKYWPMdI+c8UqpPCJSh5Rtc4oS2HkNGfUgI2Vq2xtLJHlThDNwPa2T3/YZ99he26Lq9P46b1x\n88eIghK+UyM8dLDCgUMHSdK0KBbyxJMDDh94jRdfeYsbq/aGdf7yTQJfEvtdetsZ56/ZE/JKN+KT\nl1O6W2BcYteNtSGj7+X0uobu0L5bh4Mn+Jm/9SmM53Hh3BkA/od/9qeUShX+2TPvbKOWEpMP6Ky6\nQgSDDptrMY3mNCdOuriaB54MGPQzMjcVnueTZSFxFJAkSVHQI44ChDboTCD9HbeHwBB6HqF7X1Wu\nwBi629sFd7sow3YH3Ks062vAHbr5/rVHTJ4wShJKgeTUceu22DsdU5IK6Ymi1NC4zI8wO6m7whiM\nAu3qKebKTphSikwZ+qkuOK3DRKOMzzBXKHeNWdx/iJnpK2xuXy8+RxiNMDsvQu6i5kJZtS6l8sKY\nmizBd9VYjVNwy01Cmue3qOaqsZRlQcl1HFFsRZCyrykH9lm9HFEux0jPK9wMNqlG27JfbsMIQsnZ\nazs0+CK/yhiyPCeMI8YlTEIEpTDA8yTGGe1ep0ugbEHiazftSzG9d9ZKBfQHCF9ZXXPsdU9oSZ7n\npC693EhBkgwYDnv4zu+W54ogjNHm3gJSemvIkcocFTeecR4TbUKU+0QlG1j1pUeebJOXA7RLq5ez\n08ShRCQdAuDBRRuj6G23+aknHuXhA4tIx5wp+SC8gFIgi7jB5z/9SV67cpXulSWmXMxk2OshuDvX\nOJeg3Hj6vkcYBST9PnHZzntzfoY4AS+IME7krBSX8HROluXsP7yHK4dnAWgsxJx6/CTlSomaCwwP\nRl3SdITSOULaZ0oZhv0O5bhEyWlC7z0yy8FDOxK5xbsw/u+4OjDgyRgpA+r1OvNTtp+hH5IPKvSG\nqatHCj6CKI4Io4ggCBGuMk57eZlRZw0pKcShjLFGKk8GGCfJ+8lPPIHwI1599S/fa8oLCN/HZ7yR\n2EC8H0REkf3sOC7z2c98lodPPcn5q5Zj/sJzz7Kx1qUUe1RrMQeOWB31pWur/PZ/95cM0gTt6mNK\nnaKUIskN0oltfe7nquxZHNHrbXPhvDXaLz53haeeunNh39LCcQapob9uM1y1UdTLAWlvCzNwuRpH\nZ9l/+CTf+PbLnDtjEwHrU00qtWnq1ao1xGMxulKIEApPeIROjM0LJCbt0e206fW23Rh75Cpj0O+T\nOr//D+vT/kDxuQdmftQfeW/4/X8EQLM+9lNpkhyMDgncYg/9gFB6KB3QcRMTBz55LEhTTT6uP6it\n4TbGslw8548OfUWjErPQbNAo2aGPQ+s3FkIUGim+H9hIsjRFhpTnSeDtormh282FMORaE8VlknE5\nJ2OIKhU0FMFFpRRaZGR5St3pM5vcJ1E5CYrpUokp9zt7nT6dLCFN7YYEEFVKNKebjEajwveepon1\n8b1LEdLdaA4UIjOFfEFZGEI8AiRVp0sRBhHZQBCH04TV8W2oivTA+D5ClHjJnVBCT7LQnGJ+qo6X\njcuVBShh9Yt9d4I9fniao4cOcXlphSNOr7herSDU3VkvXilkoOyGFfmCaqOOhyFzSVUikAy6XSo6\nJHbqtWQjpNHMNxvkZY9TT9mTqRfC0ekDXFu/SadlMyqDKCRLEnI1ohw5o50raqUKwmgqFftL9x2b\n4+CJHWLW1LSdOyFsaS6Eh3T+a88LCcKIT3/qCX7mZ3/ejlFY4vTbb7N0c4tqxRpyzxgapZCgVLWG\nKbBsi+99XfPdYZ/+qIcqTtoKZERcqnDqpNU0/Y+++HniqSb/6//+P93DrLuhyXdiO0Fg5XiFEEg3\nR4EfUY4FlWqDmXmb4DbfnOaV518m00NKlYArVy3N9uzpVVIhSUTGKLOxEE8HBGMFUbcc/82//fdg\nhGVMuVvXgQN7qVbvnM05/8AzlGf3cfOCVQqNVI+TJw8xXSkRu0PW3tkmwiiWlq5w+hUb5ouCmLje\npDlVY362ydzsAgD1mSZBOSTwfZSTdzYYkt4msadYWLBUz6OHD3NzeZUoDNnetIZc+B+x9sg3vvGN\nH8XHfCCYa9oXYG4mQGuFJMKTO8NkWSKauuODB1EFKQXJSOFkolFas97uM1MvIaVH6E67pTCjWo4o\nl8rOCIMnpb1hSL+o4mwl4Y2LOLjKMbcJ9Axc2ms5DKnUagzTvBB3UjpjkGQEUVgEIoUQRJWIIM+K\n04lQPoORDS4abYhj+/l9pfA8g+cJVDKmJWkqpYhBb4jR4yBrTpYpPBlxLzgURi4Ia1+AQPoEnkfo\neeBqInqeIqr4lr42zjD1DEiB59eQwkM7t8YoTVEqp9ZogLt1+VEJJCgCnGQNXiiZatSolEvMO5aJ\nJ6Cn36UOXwDJaCyJkKGiAC/2EXJc9cfDL08xyjOun1vj2KkDCF/gKUEgA0RgOPmolVlAKcgFA9NH\npPbnG/Uym4MhWWoKDQpP5QSeDxjKFbsJVRohswuNol1PP/MTgDWmaZqCDJFOK6QT/asAAAiESURB\nVFrlOaVSzKlTD3HsqA3wlctVSjE8eUpQchLDoTTMT5XxggqD0ZDQaXwfbzYQQcAb586x3bfPNIIo\niDi8uJe//fNfAGBuzx4G6l7z8sZzEBYkAQWEnhWnGlMLLePAcpiFK8T9+COPcGDuAEsbF+n1uyht\nhbNOPBxZmqrQDNzNWo1GBF6ZLDMMnF6OAKampzl+/AQLc/bW05xqUHW3pdvhS496c5H4UesWS9o3\nGAo4efgQR/fZw+aNK1fwayParTaBM6xGZURRwPVrV7l+8Sw1p+FTn5unMlVjplFjYc5uvHEcMeys\nc3D/QiHJeuncGS6ef5t6o87G1qYb93fJIbj3Yf/BcT8Z7Q8Km5174y5PcP/j0lvXP+omTPAjwlun\nz37UTfjRu0d+3OH7svg7CGICL2LsNDSOrpemeXH6rtXLaJO6Ul9jGp9is9fj4ZNNewUcE+bBVRzb\nOWFYt0iAJ73ipC2Eb6uzS7FTgM3c7uNyPnlfogwY4ReSpYaUUTKAbh+cT7pejugOErQRjNzpJMC3\nyT3KQCAL7neuc2bnmlQSj2RpFbBc5TzPSdNhUX29XInxZES7dfeyXbsReF5xLQbAkxjfRwcBoUuK\nissVgjDEC3z0mAPuSQI/IIojPM/jyKqj0q238MMSjeYs2chRG8MYjWKUC8yuk7TWimqtWnDUtVY7\nwaE7wexkfmZ5TpIqPE8UcQclNJmQpFlGkqR0un2U0lQqZbIsw/c8otqOGD65Zv/RPcQlR8ULoFSJ\nCeKI4cCpQ+Y5vqwghS6Sv/bsnaFc3rnJLC/bDcIYQ5amIH2CyAUY/YDc1Eny3NYJBZJkhOf7LOxZ\nwHPrq9PaJJeSIAoQKkdh3QX79u/hH/zHv8Dpy49wdcUWXqhV6yw26izMNGksWBpiUK0QZPemNzPG\nf/Zb//n7+v4PEs+99Pw7nv3O774zeVtKz7rVXHzFCyO6/Q4vvL3Bypo9vbfaAzqtEaMkR7t3OC7H\nlCo14n6X/laPrqtBaaKQTq/DtTMdDh2xt66Z2Rma1ZB+v8uVK+cB2FpZYmt7RJoOi/N1PE4XvwMm\nRvs2jP3KYRgQxwG+FxQRe601SimM1pQDpwPsCXKlEFIX2YNSCgJPUgm9nRwIbFDHMku8wpAjJEjP\nPXNBFek5wShRJFOI2y5F48CGFoY0z1HaL7KojPCQYUQQ+EV/tNK02yNk4FFymW1CQmhAKOt7T5zy\noQh9SqWIzVanqMYTlSOUGuD7HjuVXm2ljXtLYodaswlCFkGuOC7hRxEyDAtjGMcxvuchPI98zJ+X\nAs/zCkGdqgvmzc/OMco0id7JjtUqQ6HIjV+IKwlPorHyn2NGiDEG/S7JNSrPMEVxW2W1yqVBirGS\noyRXit5wRKff49rqTTBQG1WplitUyuVCTXGUJURhQKZylNtIZA6lWomKCBkNbd+zLENKQRjGCPdq\nHjyy7xbd7/PnrMjajtxvgPCd9HC5SrUxojtMCJz8qFEa3wup12pF7EGlQ6QXEYQBQZ4QOvdKWq2w\nN95PaXqOB7at4Tly7ChpBqNei8Q4QyJ9onvPqbpvIGUIOqNwJ3tlAj+CbMSSUxq9enMbLqxi1M5r\nIETAcDSyuRRCEzgFxAMHDiMFXLt4jprLVs6SIaYaUDIK6cqVXXz7HMcePMWhw4cQbrOeau6UmLsd\n4t0kAD8IiPFxZYIJJphggvcFY95xxf7wjfYEE0wwwQQfHH4kgcgJJphgggk+GEyM9gQTTDDBfYQP\n1WgLIb4ohDgrhDgnhPjHH+ZnfdQQQlwRQrwmhHhFCPGCezYthPiKEOJtIcRfuQpA9y2EEP+nEGJV\nCPH6rmd37aMQ4p8IIc4LIc4IIb7w0bT6h8dd+v1PhRBLQojvuz9f3PVv932/hRD7hRBfE0KcFkK8\nIYT4Hff8Yzvfd+jzf+We/3jN9Vg+9IP+g90QLmDLkQXAq8CDH9bnfdR/sJos07c9++fAP3Jf/2Pg\nf/yo2/lD9vEzwBPA6+/VR+Bh4BUsQ+mwWwvio+7DB9jvfwr8t3f43oc+Dv0G9gBPuK+r2HTcBz/O\n8/0uff6xmusP86T9SeC8MeaqMSYD/gj4xQ/x8z5q3FZCBLD9/X339e8Dv/QjbdEHDGPMd4DWbY/v\n1se/B/yRMSY3xlwBzmPXxH2Hu/Qb7lyy5xf5GPTbGHPTGPOq+7oHnAH28zGe77v0eSz88mMz1x+m\n0d4H7E4VW2JnAD6OMMBfCyFeFEL8Q/fsluo+wD1V97nPMH+XPt4+/zf4+M3/fymEeFUI8S93uQk+\ndv0WQhzG3jSe4+5r+mPV7119Hmfm/NjM9SQQ+cHh08aYTwC/APy2EOKz/IDVfe5z/IfQR4D/DThq\njHkCuAn8zx9xez4UCCGqwJ8A/7U7fX7s1/Qd+vxjNdcfptG+ARzc9f/73bOPJYwxK+7vdeBL2GvS\nqhBiAeC9qvvcx7hbH28AB3Z938dq/o0x68Y5NoH/g51r8cem30IIH2u8/sAYMy5y8rGe7zv1+cdt\nrj9Mo/0icFwIcUgIEQK/hq1287GDEKLsdmeEEBXgC8AbvI/qPvcRdkrfWdytj38G/JoQIhRCHAGO\nAy/8qBr5IeCWfjuDNcavAG+6rz9O/f6/gLeMMf9i17OP+3y/o88/dnP9IUdjv4iNwJ4Hfvejjg5/\niP08gmXHvII11r/rnjeBv3Fj8BVg6qNu6w/Zz/8XWAYS4Bq2gtH03foI/BNsRP0M8IWPuv0fcL//\nNfC6m/cvYX29H5t+A5/GqqiO1/X33ft81zV9v/f7Xfr8YzXXkzT2CSaYYIL7CJNA5AQTTDDBfYSJ\n0Z5gggkmuI8wMdoTTDDBBPcRJkZ7ggkmmOA+wsRoTzDBBBPcR5gY7QkmmGCC+wgToz3BBBNMcB9h\nYrQnmGCCCe4j/P/TCWL5jVFYFQAAAABJRU5ErkJggg==\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "transformed_images = []\n", - "for i in range(20):\n", - " transformed_images += [trans(cifar[i][0])]\n", - " print(transformed_images[i].mean(),transformed_images[i].std(), \n", - " transformed_images[i].min(), transformed_images[i].max())\n", - "show(tutils.make_grid(transformed_images))" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(-0.3257020916595745, 0.49030737107138356, -1.0, 1.0)\n", - "(-0.1148718173111168, 0.5943530690757043, -1.0, 0.9921568632125854)\n", - "(-0.1876123301917687, 0.6578509306606333, -1.0, 1.0)\n", - "(-0.45916819203800213, 0.36674404239797703, -1.0, 0.8352941274642944)\n", - "(-0.3001455154347544, 0.5464976989913715, -1.0, 0.9921568632125854)\n", - "(-0.3879825306551841, 0.5142138738794487, -1.0, 0.9450980424880981)\n", - "(-0.16791767110892883, 0.4776867721654128, -1.0, 0.9529411792755127)\n", - "(-0.07867900658554088, 0.49251211342491164, -1.0, 0.9450980424880981)\n", - "(-0.012275311339180917, 0.6259931231081871, -1.0, 0.9764705896377563)\n", - "(-0.47579912012831, 0.44796901896179764, -1.0, 0.7098039388656616)\n", - "(-0.4709048134003145, 0.22142046144980368, -1.0, 0.019607901573181152)\n", - "(-0.07774712605169043, 0.6400356728895145, -1.0, 0.9921568632125854)\n", - "(-0.06678664839516084, 0.6134990363534119, -1.0, 0.9686274528503418)\n", - "(-0.5750025513892373, 0.5272717873515015, -1.0, 0.8745098114013672)\n", - "(-0.410664308796792, 0.43596309108907383, -1.0, 1.0)\n", - "(-0.06828531355131418, 0.5641918783797807, -1.0, 1.0)\n", - "(0.003199054510332644, 0.6288654684816006, -1.0, 1.0)\n", - "(-0.33659619160850224, 0.39841029565502767, -1.0, 0.7647058963775635)\n", - "(-0.2228324031845356, 0.5534736178810422, -1.0, 0.8509804010391235)\n", - "(-0.22320004721404985, 0.4582661803925075, -1.0, 0.8980392217636108)\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAW0AAAB0CAYAAABOr2PFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzsvXucFNWZ///uk7Isy7Io2rZt2qYdh2EYh2EcERFREa8Y\n4yVe1hhzWzdrslmTzTfxm5hsNkuMm7i5mJtxE+MvicYkJhoVbxFRAREQRy4DjMMwDEPTNk3TNEVR\nlkVZ1tb8/jjFoAajJtnsL7/XfF4vvBTVVc855znP85znVqnh4WFGMYpRjGIUfxsQ/9sEjGIUoxjF\nKN4+RoX2KEYxilH8DWFUaI9iFKMYxd8QRoX2KEYxilH8DWFUaI9iFKMYxd8QRoX2KEYxilH8DeHP\nEtqpVOq8VCrVn0qlBlKp1PV/KaJGMYpRjGIUB0bqT83TTqVSAhgAzgKqwPPAlcPDw/1/OfJGMYpR\njGIUr8WfY2lPBzYNDw9vHR4efhX4DXDxX4asUYxiFKMYxYHw5wjto4AXX/P/leTaKEYxilGM4n8I\nyv/0C1Kp1Gid/ChGMYpR/AkYHh5OvfHan2NpbwOKr/n/QnLtD3D66aczd+5c5s6dy6JFixgeHv7/\n/Z+5c+f+r9MwOu7RMY+O+29nzIsWLRqRk3Pnzn1TwfvnBCLfBWxEBiK3A93A+4eHhze84b7huXPn\n8tWvfvVPes9fC3EcAyDEXyYLcnh4mFTqD5Tkn419dL4VgihGiSOUyAPgdz/8Dj/79tfpaM7T8AIA\nclNn8G8/+AmqmUER6l+EvlQqRSqVGqHzqfs+K+nxBWEA7W3HEEUOAL19G8kXijieT99aGb/uPK6T\nRr1BeWsZEWoATD9xKkObh4jimOiQiDXP9wCQNnXOeve59LwwwKrudQDEAjRNQ1EVsukMAENDJeI4\n4qHF5T+g9y+x7v9Ta/1a7KPz/B1PAPCrfofBaBqt2hDj9Ir8u/e8l7//5y8zf/4CfnL7bQBstatM\nmdrOFWedgmFmAVjas4UHn1jKI/Mfo7KlzPZKFYDQD0AXBESEYQiAqqqISOXitiI3n/x3ANxuFgkq\nA3z9pzdy6exzAWg9YhInXffxETpve/EfJd2hoLl5Ms0TslgZyWMTOzsoFFQe/tkgC2+X7/nGXbfQ\n+cGAXPw01UoEwFDd5r2XZskVPXp61tHb1wdANWqwxXseRIOTWt8NQCaczcJfGnx/7u/RNPnMC67u\nok6DbCbHWee3ADBYns/TT77A8Sv/C3jn6/7XWOvXvmv4AJb2n+weGR4e/u9UKvVJYAHSYv/pGwX2\nKP53EMcxxDEQM9jbC8C8X/yM2uaNREcWqO6RgtycMJErBwfonJYmjuO/mMJ6LbJZHQC7HjJQrtIT\n2OSLFgD5QgbXraOpGm2T5KFNU2Pa2o6mkE8jfMmeURQw/fgpuN4ePDw6298LQL1eJ50+lNajC+TG\nSoG0dkMfR09oYvcelymTWgHQdRXHdoA/FNp/a7C1qQBMmXkM1//XKkpazCJDCt1fTTuZUz94D9+5\n5/fct2ApAKdd93/45Y9vZmWxi5bOLgB++OiN9K9YyXmXXszKFct56WkpJF3Xx/ZtAiKsjFwjXdPR\nayGmiMmm5X0KVS5sTnP1YQcx9QhpAAix6XV0qqpUuI7rUqtvxbRj/Eg+8zl/NcdPy5Ox2lE6JgLw\nTMsdmOUB3m8FpNVxAETeZvrWucRCkMu04bfIcToDmzjYL4A4iQd/I9+vRX0ofivd7UU6J0teOnpW\nzLKhZXhKhaeX1gEYGLBZvizk+ITOD/7fDwNgHGZg73bw/AChSOWiKiq6rqOrOvaQDUBlyxBnXzCH\nkL2Myx8JQLG1lZxm0bOkm83btjHuuEnymc1pNg30M6mpmaZcHoDVq3vY7oVUt+6mWpbjUZSA2XNO\nJI5h+YOrAAgbIc88cfcBeeDP8mkPDw/PBya91X2zZ89m8X/IxJK0aeD7DvWddaq1miQwDAnxMUwD\nEovvxVKFw9M5VPZtOLAsC9M0iVSdLaUyh2pSyLQ0N0EYELg2XugDsLG0nb7BOks3NtjkyfvcSKGp\nqYVv/sf/t63+PxtCoCqCRqXKr38sLYrVy57Bq1d59N519N8h533msue468EH6Jg6FckKf3mh3btO\nKg3TyCEERHFMb5+0qltbmlFERK1SwkxLoVupljANkziKiRxpMRFDLjsWXROEQYSmS7ZNZ0wG+tfT\nUmijpXBEcmtES8dkGs4u3KrcqEccnmbckVlgxdume+Y//AYh5Hs0IUZmRlUlfyqKgkiu735xDUeO\nPw5FERBL6+11CjAGNVLRdAFC8mekyt/qsUAEUhgiBL4CoQoBYfLuCCV0Rx41MPZ0ANpbz+bcu8/i\n98sfxS3tAaAeKNQjlcfXbOKOb34TgNJOl5VzB+lZPUBfSQqex084i2u/PJf2DFz7kQ9xmCLfNSaf\nIRfqnHLaSZx93nkArOvt5affv5W60SBjSWV/RqnKvxRn8Ox5F3LJ1AkAPFV9vdDWlbSkP28RRAGO\n72Nk5DXPidm4UkNlMorRAcDnrluJIgS9d+pMz24BIF+IqdkuogxOWCZQnX2LQGfxBFauEAwsl4q4\no7WZmt1PepLH5AvkM2OtRpMykaGeLZTqm+Uc1XwCVwfJLnzrjmuT9RT4QUDVqeO8JMdpVxzU2CB0\nQrxBmXOhHzSJqz95JXvjgKOapCA2TRNTORS39hI122XtlhIAC5Z0c7gSM729meajpCLp/2k37uad\n6BxKzpTzEcV1Qs+jd8MOalU5RiN8c9H8Px6IBCm0Vz7xeQDi0CeOfHbdXx/RxoeZBrsDB900iZJ9\n2t8/wBrFIGNlcV3JtGEUoGkKTiDoCWPWqJL8TI9JxjIQsUdKlZtFuBGN9b1s7qmyeEBOxKADTW1t\n74j2af/weZ6b9xMA+hY9QBhqdJ5yMaeceykAzV2nYqU1Hrnn2zzz+K8A8Ks11FCj0FxEG5Ph2s9/\nCYDzLngvTnmQe37zM6LEbeH5DvfdezeVUj+uJ8fpeyqDAw2279xNkGzYyZMn0tyS209YLDd6LBRE\nDETSNQAQiRjigKXzfsO8H90KgF0e4LneAe55aj1+8oimadPwA58oClAUA17jeYmJpLWeCC6BOIBM\nj1/zb3nfG4+Ozk5pCTXlMyhoBHFMrMjfNWwXS1cwDZ0okAsfBSE+HnEUoSUCUtd0BkovIIRAUxUG\nBwYBKG+vkh+bxqnXsZFrrKkKQ6WNBGGIv6MhqYtjrMOtA6zumyMMQ5SEzlAIFMTrXFNRFCWCG8aM\nO5YgCuU9yVyJeL9LQ1FUQgSDpTJRLNczl86holBxXCxLbl41bRAjiKKYMBH6nufh16sj7/3Rd78G\nQGfnuXh2QEdxgE1r5dg/+Q//h96BPnZu24WzVwryh+fdwweu+ThnfvFz+IlB812hcurjj9FxtMaN\nl91IMEHOjWrkyBQnwPQZbDxW7pPfPb+Ez2ViWiPYpBgAfKGrmSfvvJv0MRMoqlLZ1rz9NALoirSW\nvahBJnM4iir2sRKaalDqc7j/mVtZ87sCAI8sK2Lkz+HpcolPXy7vK7YqeICv2Kwrr2RTVVqh40QO\n1auxbcjlqJntAFxwwTT0TJ66VyXo7Qbg7t88zJIoS1ixmN55HAClDU9j+vt54Yvfli4fIQRRHOPF\nPtGdct2iBqieTtQIif1EmFoaH/7nW/lM1uAqQ/Jn4E8kCkFEx2GYWS67eiUAv737aYgEZ51+OsUL\npID/7UX34Vbms3NnSJC4KbM5iynNbfSu2Y2abGJDf3PRPFrGPopRjGIUf0P4q1jaAOmsPI/Uqlt5\nFQU9kxs5fh6ZyzLWszgojvETqzqTacI0LHRN56C8tDBf9m0O1gXrBxrouomRuEeCOKLacPF0jbGm\n1KJj1ZhNk9vwY4Xk8EnU75DWBKV3QHdlaJCJTTKIsbG9k/jgozh+9qUokbRalKhB1AhwhgaIEzfO\niW0dnHryHGaeN4tHH3mAjo5OAHTdJGjKMHNGF0EgLW3HsTnhhDP41a9uRzMSC0CoNLeaWFmbckUe\n60xLI4qDEbr2GX0RMUokEGFMLBILQYeh7iXc8+2vYp8svXebGg7/tWw9WxvuiJWeyRc4dfZsEjt6\n/7OJieOQOI4QsZ7QlPzjNTeK11naB0boyTV2Gy6arqLpOk4gx+F7Lpaq0FxsJo7VZD4cGo0GumXi\nuIl1Y6gIP8LzPGpbbRgjaTLGWKioWJpOEEka/DimsqOOeqiGpiWnBCFeQ+vbxz4XhyIUBK8PAsdx\nPBIHiMTID5LZk+4TRZHvtx2H+QuW8NSi32MY8hl5Iw+Rgu27ZMZJ/lZ0DUPTETG4kdwHte1DhE4d\nkAHdxk55yvCUqXTlLCYXpzP/UJm0peYtVt//CGecfBbZgyVJx9Z38Ml1A8wMa1BIfLWagtK/A/XW\nMXw1MlGTeYpCgV+tEd4/D/Vr3wOg+PsVbDh6GqsKR9LZKa3aJT2rqcd1zi+cRgX5zHb1DScZX45T\nBWqDW1A3KWRPlFa1Yah0zqxhXOjRsis5jTTBgieGiHwF04qS+/qJojJh4NBccBjvyz23pW8JdlVB\ny3XRMWsaAP32fNYvXka16lAuy1Pb4FCIrgXkcjEDlcRXHPlsGwrgcElmtqiMrLGiKIjYxIzliUKz\nBXZvA9CIklOfMBXMQgYsBT1rymuqwPFCXA9ULKZOk/M0p34Ga8as5b4XfoVlyHt7V65gxwsvYDci\nRDLvmWPGIzxoNsdiNcuTi6qG8BQHxF9NaLs1KXyUKAJFEPk+BycCNgx9DFXFd12UZEPnrDSNeh2b\nmLQlN+lYU6Eln8EqNwgin3xWHjkCz6FSquNsha0NeSRWlYgoiujM62haMwB1tw9fBLwj+D6eK5ml\n0fA4vWs6dr2OlxyXWtqKaLrC+edfwF133wfA7x6dQbH4S3wtJGOZaMleF0GAXa/h+j6ZJLOhuamD\nc8++HF3Pjfg7XbdBsdCMbkC5shaAGI8o2i80AvZtCkEsYiJFBh4B3EqNX3/1myx5+DFqdTkf859Z\nw6bBBp4fcsZZ0i96/Zfm8vVvfhWEmhzr9yEmjqWQ1BPGUoUKyhvE+4jf9o8c2KJ9wtgjo+iEKCMC\nulhsRickDsFKy10UhjGWCYqqkEt8hvVGHU3VKbQUKceDmIkSr3suOc2gq3UKK1avkb8PQpqOKeIF\nAZaRCM2GPSJA3y72uT+S2divJV+DOI4JI4iVJEAXJ9kWiqDeaNDfL333Ty1ayCUr1zFUj8iSBGad\nCMcNseOAHU+tBuDgWGAJDTWGMHmmv3gnlgonnCbfecO3vwFA9v5f0aFrXJExcD8mMzjorWItXsxN\nn/oM6pCMW/xH7JFVPMwwQq2+AoBmvguRV9GrLxNHEWEyNp+AsF7HjCKMZIxpRZA3LRoixl8ts3bC\n3he44OSTUetlslU5Rxfns2+YQCk4DU2hXGsgNJXaFsmLHcflOf28NN5MgzVzJJ1dtNJ3cY7KAghi\nqZiqtQHQFXQ1TZNZwGyWxs9x8SrsfMDLrsozPQ8CUK4O8chKCJwchFKueDtC2k4oYmUF65LYiqKZ\nvCvaT6aeBB2jOCYOQqJSyGDvgLw2tJv+R5aTRsfIy2cWOpppmX0ihpImV5BjNvJp3FjQcAUDq8v0\nPiTdM/fd9Ct+t+JWNsztRUsM1NALcKserqsQa9ItVlIEgy9sJq2pnHPSMXKN8oKf3v5GjpN4S05O\npVI/BS4AdgwPD3cm18YCvwWOBkrAFcPDw3v+2HO2VurJCwWGrnGEZZLRxwPQCHaTzo4l9ByixAps\nuCFuFFCzKww4koXymQyq41PyfKIw4uBYTtormgaaIG3oI9a3iGNUVSWXNlh9kBSG7TkTG42FbzXo\n1yBwbETibzWNNOX+flq7ZnDqFTJQ0zFzGrpuQODjB1KQz1s9QOPJPnzF4+Ff/4JPXCqddP/2yX8m\njmMqlTJLl0i/l6FbGEaBtvbpLF3+e3nNylCz61Qq/TJ4BRQKGWy7MULXYF0KhJZMES+O8YnIIGME\nT/7slzx0023Uyv08NigZ8NG+EqEDZtri1ttkOti8+fOI0IhiFRlDSyz1OML1PObdfz/pJEB4/nnn\no6FJ63KfkN4ntF8v8V8HFakkZ3RNwfN201dqEHiJcsLDyuWJ3AjHkUrddwNiJZZWtpDjsVSNUqnC\nqV3HcWJuPNsbcqOXGjFRAIdlBHumyiyEJ7s3U2sExKrC2q2S5+r1+jvOjImieERJBiKSfmohRoS3\nKhiZiygZvBcElPr62NA/wPLnVlCqyCDZgiBECVWsdJqaJ09YYeCwveHixq+O+PgnZI7AQMVCxUPe\np2tjUeJXRuhqbJRBOnPiRNw0DPRspt44FoDKvY/iTHkPxd4yuTZpqGhNGoZlgJbDWDIk51NAtSnC\nrT4uTyGqVCS+7xO/ItBjRmIErq4QiZhgYxV//UYA0odb1MR2onIN5RUpQgarr1dqmpEoMgHNxWZs\nx8Wz5Ykg8AWW0UlTc8zHp8tsGJuJNB/ZyhOPu8Sb5LorcR4UDcvMgm9QKUv+N0IdIy0QTh8L1y0G\nQGguVq4dNd1EdUD619Omi8IAfiMzEiur1etsr+6X2r6T0Kkq+A2P3/373TSWyXkKBr6NeM92phWO\nwUqChkaXAxcPkj+1i2xy8tW9kNAPWTpvMTd87mbePXyyXOPuCl3+GLqK0yCRIaWwzIAVsUNVKdvy\nWnmgiqpG5AoqIpDzee2nPwN8gQPh7ZgfPwduAX7xmmtfAJ4cHh7+ZtLd74tv+gb2TZZkwnwuy9j8\nJLS4yPoeeQRcuuEZ0kdn0OOAxTvlhvajkILpE8YRjX0OjsBHjR1iAXHo8bKTZJ8IHRHF6HrEWMNI\nBqagayrv8lwsS2rTE9rzVH39bQx5P9xGnVxaatmjJhzLJ//5Y8w8+3yqyYngoSeXU2k0qJVKbCpJ\nATlp7RCF8e2guJxz6VUs/c09APz7Z7+Arvt0dU2DWArd0lCV/+dnv+TmH3yd7IZlcphhjFcroSrQ\n3i6FURh6DAxugDmSrjvmyePr1MIMfEUl21QkWlcC4Huf+RzVeU+y0a5x83PSAq37AYbQuOHGL7Nl\np4yE5/J5bM9FBUJClNcEv77y9a9y87du5qijpGK959e/ZvbMWcRxTLQv2UHVEEI6HpQ3EYrpZO4b\n9W0EQUAunaWxU47d3mEztXUyrreLvCWPj4ZmomXGEKs+ji3pNM00gROweP5TWOefg1uVgkuNFSKh\nsbF/FU6isALXZ+v2Oj4Bg6Vq8u46LS0tb3fJ5XzH2sgpQQjpKlFQEGEyeAV0Xcf1PGoNycdPLHyS\n1WtW8uuajdsfEianlFfimPGagePFbPPCZI5fxnVdDENwVE6eMnzfwQsgrVtEYre8FnoIfOBQAJ6/\n534Apj74NBd25ciXuikN3gSAM28BTeefR/bEE1E7pJDBqxHf+xRKa5agVbonqqUabl8Z1T+YwDRx\nE6WRnlrA0nI0HnkMNpQA0AwddXya6sGCgSQorhUmUR7aQN+aHjRFzvumrMVBr5m/1uOkso99EJFB\nfUeDrtPktXHTTdrdqQR6iXReygXL2kb22AksX5Gjf6N8JkGZdC5Ha94iqAcoYZN8ptfAdeuErsqS\nQJ66ghjau1TK6/pxX5ZZMsWxGoGo0buqzqa6VEKqY6Ga+2VAuO/0qoe4/XVe/NVS3HlSuTS2VmjS\nVU5UdRr2DgC6OtrQGzGNJX04q6X1rlkGvUPbeOi2X/LD3grFZukeqRsxjmOjEvGyLQ2IobBOWzpH\ngMr8hD+fKnm0pFuYlNEpb5aKqbWzyM6tHBBvaX4MDw8vBXa/4fLFwJ3Jf98JvPetnjOKUYxiFKP4\n8/Gn+rSzw8PDOwCGh4drqVQq+1Y/EMmrxpoaHWmdWj9onrRgP3LxR7j53h+xtVLFT1JexoiQjlaT\nwIfuxE/tKQ1yZsj0yW0UCwVEEgwcXyyyUyh4dpmXEqvn8HQGUBAK5HPS+p6u5Kh42jsaqGnq+Kr0\nre5O51hYsbnjP3/I5gFp5U9atQ5dFehKxLp9wcXKy0w1zqB37TMUbvoO27fKirV7717I1Klt6LrG\n1Jmy0GHazC6W9ixH3P0sHYdJC2Dxsn7wIyIvIkyquyzDxNT2WwjzFvwagIXxcjKZAs25PAu++VMA\nVv3oNwg75hdr1jBYl1ZHGId84CMfYtFzj3H5VVcCEBGxrn8dTU1NFIsZ/OT00DvYz9wbbuAnv7iD\n2++9C4APf/wf+ew/fZKu9g7iUFonxeYWWlpak8Dmged1+mmnAFApv4im67i+j3GYvDeXKdC7bg0a\nKk1FecQ3dRV0FSNj0LB3ARCEKoXiZLpXPM/Ous1xF8k0rbsfegzPCWgEEfm0fGY6baBur+IFHs05\nab1rsc+UlvzbXXK5hrGJmvjetThCU0BRNfbFDequzVBPD889v4KqI90g8wb7UBoqQihomkrIfqu8\njs9LfojtvyTH5LpoQmDqB7E3Cbg69nZMK0ekxCNh0+gNmZZpVf7NWOr8+/hWxAO/I0wqTDs/eBHN\n136CwbxJY9Fzcj6VAFavht4qXChtq0zTKQx96QYsT0GYObLnnSrJnJTBj1XK3/0O6vNyH+U9H9Nq\nxjzxeMxCcprJpakPrKW/dwA98esOvaGg9rIrJI9lzLGocQbjuzny/yp58cOag4inU3UepVL/HQDl\n2lJOPG8Dy2+/kJv+bQkALZ230zWjkzOUsygYXeiq3MdGehroIZNLvawVcux9Q89RGqwy0B/jOZLO\nyM8SvGRi9PtMTuIgGytlRLx/Rr3ExWEpkBMmY+p7aawdHJn7w8dl6TQUzMIYyQumYGhwI4iQKJLj\n0VQD1+1hhpWjoy1PLakvqIUOr3gNPHsbZuK2PW5yHiMC2w35xpB8D7GDEQXkjAJBQlt//wBvhr9U\nIPKP1sJ/5Stf4aVV0k906EEaNO0m8HfR2C6F7vaSyf1KzLtEzBGJf+2z757B3R87hxWra0z6zI8A\neM4OyWgBTr3Meae3cUK7XIjYdzhYVTi6WBzxXe61XQQamXyaw1wpTHeKBlr+nW3eTKaTF0pyYec/\nu5ww+C2KrjE/CU7aq+uoSoTtVthSlcK5uuObLLrxUxy5dCUXz7kIApmj/s2vT2HWmWdywUUX0Noq\nk+1NS6NYMHlXUGb7tiRwttLFLlUJw91YaTkftUqVQn78CF2O5BfqVAmDkPK8J1h024/l31VdVmzd\nzbOlShKgBDMvuPQTZ/OjH9zOD34iCy9y+SYemv8kumEwfUb7SD784sXLuf5zX8LKZlGycqP8/OHf\n0pzNM+moJmZNOwmAf/3XL1BsykIMSlKg8Uakk4KKbC5LGIVUqw5WWm4AXd1LJSgRRwEeL8sfKAqV\nymZyjMW2E9+3gEL+CPyXoa93A9UkiCtQUYVCGHpkc7LBZM/geg5TYk6bM5solOtmNxqEUcQ7wXbb\n4aB9eeIiQkQhe1yHWk0eaVeuWk5fXz8PLQxQzGTsIiIK96JxEIrQIBHaISGN4BUQgijx8RuaIGMY\nCFJ4ngwHaZpPpATY4tCRIrFdXlJQkrhHjIzk73+85jTa2g6iNKNA08kXAeB9GlZsuZNoXS/+T6RS\ndzuzGJZAK3t4q6ShEb8vR/pTn+GXT9xHRitgJf7W+PZ5pPkdjSfuJe0lrkdD53v1PlYuc7jyDOmr\nbc4XCF+KSetZorTcTyefMh1+2T0yf5fP/hwA9WAWpm5SyDYTGrJKc11lDZ49Btfux0vWJQgcLmhW\nWBPbRFsTH/vJDZYsXMrDQyqXnNlJ3pBzEIYxvrcbU8tzYqsMwj4xqHD/okfZXomZlLjaBA6Rp1Ew\nLPwkp3pbJHB9Z4TOMEoqKmOT3uVreNb+KI4r50M3dCwrgwI056SsKfX10Xi4m3w+g5WVykE3M/gB\n4Apq9Tp1Rz7TNE3GCYvtwQ683XLDTsgchRK+yt6XD+WbiWsnDlw04bNw1XOsrEmfyN4dB95P8KcL\n7R2pVOrI4eHhHalUKgfU/9jNX/nKV1AelwLNMNKU3IjnKuuY3yevRUMKQoO8lqGzSVYzvbswhX8Z\n3EhH6HFkkgKkRhoxCpsqIcqzA4RdZybPFGwdqjBW10bS+wKhErgBY2PZiwJgrGGRbnnLAs7Xoaml\njcqzjwCw6o5FHKH3sbU+RO3F9QCIKKJUrVGyHbTEV9bWOYV0vsj02R9i5hiVhXd9F4C1q55l4pQT\n6Osf4KqrZHHOeeefzcyp7eQ+fTWnzZM+7Ueft3D1iIjCSJpfT8/zGKY5QtctN8lMFd9waIkUmvp+\nQrhB+oCHKlUeLtVpAImC5yNXX8LhAxruFz7KquekL86P4Lrjr8UwLUxDECab96STT6O7ezWXXnkV\nx56TpAz2b0ALYpa90M37L5SWZffKpTQaFXTDIAh8DoSVPYnFZ5pomobj+CN+0IiI1pYicRzRcCVT\nNxo1wjgkiGzixKeMolApb8NxPAY2VFilJVWJ5nh6N2zG9WqUNkujQB+T5szTptLWMZlySVoyWU2h\n3vijLPoHWLLoFzycpJi5njeSinhzImCDlT4g0DSFOLlP2gsOe8NdHIzKK4lPO4gjhBKhKypWQvsh\niooqIkBwSBK020vAntjFc7ezN7GebccmCAJAFqtcfaUMgF967cs4W+sUj5tC95pnAKg/tILmn7mI\n3TvxkiKzXNhEVLXxQ49gucxSWd27DmNqKz2axqpH5mMktvxZm7fS0lfGpAaK5OWh0OKy/kGUgaWs\nHicVcMe0NgxhYjQXaCQxpNZpx70uwO+7ckzVegm9XcUW3ZRKj8m5Xf1DFHEXJjUyGZkRUtRmcLoW\ncWnpNjrUJgDS8fXYvk9c6aS88nisZnkiUFWd0D8DodUgSDpn2CswGxZ2pUEUJQHLV9JogYVmmQyF\ncj6Pbi7gGPYInZqeZI8EEQsfXU7ve6pYSRGRHofU6jY5TUdLRGW9vJlq2UaNQxTkfgwjgVBNfM8j\nCAJ8XxqJQoGpXa1M0eHpJ2TPmMB7Ccd1qToZ9iS9dRRVY3JbEUPRGTsoFdO2PR9nq3vg9JG3K7RT\nyZ99eAiCP5l4AAAgAElEQVT4e+AbwEeAB9/qAbqSZHX4PsTg7Q2Ik6NeLg0ZI42uWbQl1Vl99Z18\nfmUPmhHw8+QINierYdcb7PYCdrh1HnhCNgm67D0XcmSzwaueQ+xLIRdrCpqqksuOo6kgI+lR5IOV\nw+bt44kFP2TegscBWLnqIcLqDvLFI7n4PWcA8L5L3seaDbtZ0lenvWsKALPOEYxbl+IT151PHC3k\nmaflpuorDfDMslV846avUa9JKqIQYs/jnu9/l/OHpIvgi9c28f1bv0bPugp+Mh5nl8fmzVU520BY\nk0wY2BuobrPRXY0gufeZ0gBVP8AXMe+/5GwA5t74L3R0XI+d2cXpF0qL+sVKhfLWBk7dob+7j3pZ\nBls2uxF+bNB05NFMu2o2AJmqxdj0GB73HueDH/wnAC44exb/9E//QN2pky4c2EMmEjUqiPBchygI\n0RJXRsYcixLv5aw5Z6GlpUCo7/Cplstsrw9SHpLukXrdobS5AqFP06QcIpbKpWfVKhqNkC3V8oil\nf9ONH0HEEWNNnbBDVvVtKQ3RXDjyHaw6/PZn1+/PyzZzoBqyJ4UiLe1IkTkjfshIRkkUhggBeqQi\nIgVf7M8dj6IQA4VMUgW8V8ArioqiKMQiTO5zcQMZfPeTDBtZibrf9zArGfvgj36M+ROfxrVfpPHD\nnwMQbyoz5LgIVZBJgmG1/n7UtCA7ezplV+7Bheuf5uHubj67fCXOhs30JgqirApMxaUhAupJ5k5J\nZNDCiCB2GExyqkuBTazGGJqPEks+LoebXzd/i7tvljRZQ+QNh0jYOEm2hAhy6KaFEjfjBZIXz+06\nmaH4CSZo3eTVSwA4o2sW2ZY0aXU7U8N/JXYScaWqmHFE3S6hKucAkFVvIxu8iK5UMH35HrcvpObV\nyORi/CSdNrQDVPc1la2JUeS5glXd/fy65pJkGGPpChu3VbmkuZVSkv22emUf/LdGNjcO30vWV7ik\ns9IosW2bamU7AK1dEynkDESQ4agjJX/rQrBXKCx1qzREIgO0EARYxhgmZJI0101DsN9Gex3eMhCZ\nSqV+DSwHWlOpVDmVSl0N/CdwTiqV2tfl7z/f6jmjGMUoRjGKPx9vaWkPDw9f9SZ/dfY7eVGclI35\ngYOmBpiHuEzvlCrt4gunc+Yp17LgsaV4rswF1cIylTQUCu3MKSRNdkyLhx/vxqlXMS2LoX55JN55\nEkzqmoywy4xjX2e4iICIgqWTFG0hVJ1YeWcpf9/72vWoljwuzbnsKtJexKWXvZsLy/JYNnP65Rw3\nxeaJ9Weg6TKt7CC1CT0wqVcHKb4cEHxKauRn1m/GynVTLDRz9rmz5bygYJcaPPiDn3PhnTKQeOXN\n3+X9HziLW2+7kwXzFwGQOSJH8Zj9rp3Z02Ugs74uorLxWVwvoLci3U39rkekxEyb3sE3vnsjAGef\n+wieVyOMIJ/0NbjkzGmsnlRny+aI950zk+oW+fsn+srct+A5ahu38NTR0uJat6mftXqG3/7mIVau\nlO6Vj115OZVyhccWPUZxsjzNjGHi6+avo1XS7PsvoagHoygC15HBOOKYY5rG829XXEyhVVqG2o0W\nPStXctTNc+n4kGSx8NWYSrmO6/qceXyO/g2yReetGx+WaXiKPpK6dZgpOMyP8e06elb6vk1DHzmx\nvF2s/tU3QMhTQuv0c1COmYkwsyPVpCgRqh4ThxpeEoQVYYwqZLGTQjwSyLR0A9euM6G5SF6XvPz0\nwodI5wqMa25FJHsjtAMiAbs06BmUp0jvJUH6yM4Ruvp+JjNvSw/eS1bTaMQxmRaZytfwygjXIaMa\n+zJKqZYiSGtEzissNeXRu954CWd9D9smZsgdlSfQJH97m/qJmtLEmk7DkXR2ixzXn9BOFGgsSnKa\nNd+j7eAAQxG4+yqD49fHDFrb5ckm0jdQcR7BrW4jJ74MwPT2HKFawbQyOKrkuXX2b6k4QxjZDMVj\nZwLQfNH5EIQM9leJAm+kcVw6baAaJjVbww+kBXtIeBlbCg+ztPdHxJpca1UzELGCN+hSTX6rRDHY\nMciDMuE+V0Zs4EQq2yLBS8nJyQ0CalFAoKms6JH7ffNWB0UYTD5iN+xz0wU+YaygqBb1+g5eSuIQ\nFxcsNDUil7FobZFFM0Pl7ZR2B/TYPUycI13BYofGC4NDkI5oKci41d7q/pqMN+KvVhG5T1galoUq\nXNhbonOCZLYPf+gy2i5opu06kycflEzQlD2B5T2ryLceg1Z6AYDm4jhumPoRuh+5hYwV88TzSQAl\n2EbdUXh+xRoKyfHiyMwR/He0h1q4a6S8GzTi8E3OHG+C9cs28NEPy6ZL5mHttKhw/MzZ+EmO5bOP\nD+JFJtU4RNUk474auxwUaISuTRxGjDtauj0GttdRjCxRvImR0u8IclaB0084lmLiLlKocdWVZ/Kt\nm+ZyzcdlQKdn9Wamd0wboaue5Kiv7N/IzqrNmEDh+ZK8ZgsVI2vxizvu5mN/L7srLp6/lsFN68Bf\nh6HL9zy+8vdUh2Ie+slLLHv+JG78qGy8fm+uBUXLcf+tP2LqvZJ2tZImtBSOKRb4zvLFAJSrZcyM\nRcOx0V5MWGl/rBSAiQW5HmEQouk6QRzwsiU31VHZJmYdW6DpH3I0FSUvDKwu41Z20J5/iBmdMlgb\nRTH2KRk816etTaV4lhQyTdc/S/qoNFtth7FJdezOWgU1eBnBoZTKMqizrtGgVn9nPu2BFYvJJdkn\nFaHQNKYJhI6SBK4UHIJaA13PIxIfaICKJwzibIFJ7VPIJBWVlhAoioKeMYnr0q/87o5lhJFAVQ2M\nJBZiodDb20ugRbQnsR1ig8PV/cHzJ5+TFbJ+qJI1DXTDZHAwafHpBBiRwPdc3KQfdqyGxFWBbZv4\nfycl1dnkmFRdwqbI5oQ5k5nkSTdUGoF+pIUnIqKDpSLZHFscc+XFLNM1bnhKFoTd2qug7SpRCkL6\nIrlG0+wjXjd/s2ZeIenk3Syv3kA93INuJsFNL6ZaW8q6ynz0ghS64ZDNmrrgDrWd7g0yFvHUSQux\nazaV7VXUg7SRDovZbJax2Qy7PXe/INeb8MbMofWFu2hJAoTHHnuKLNQqb2PNVqnFVN9HbezvmigS\n2RACaiZHQ6h4iQHgEUEmja+qrN0olYvQs4ShwuatDlESN8jmFGzXB8Vkx/Y6bpKFFYU+tWoJEWkU\nk+yVcm0P/bVBOt7TzvWP3gLAvfet5ltzbqAS1EjrkucOEXn2cmD89YR2Yo24gUAEMU3j8xyiS0to\n1ppB4lM8slaaU8+QfsiO9umcGp1JpPiUStOSa530D/RQfL6Fc09s4cTr5gHw2POL2RC207fNZmCT\nXMQ9Z53OCe0TCP0GXrCv0X2AMN48KnsgZHIT0WMZYilt7cWc0EQjiHCSAHT6mDxmJGBPSJxkvTkT\nG7ya1ugVHpGikWuV9BvxIGq6mdh4nkhITSrCLIqqoWcN0klqYnB1lYHudbRm2/npLdI3eNtdT1Oz\nvRG61vVJa/eFrXUaDR/hxtiJ8MDUaG5r5ctfehK7Iqtxzpr8NYyfwdAWm8LR0gpa3NhGaaPDS5FK\nXa9zww3SKjcLbXzthu+zbd06didtVH0FJp9+CoGm8I0bpXD/8N9fxdPLnmLF6uVY7oFZyUhYTE9b\nxMRUHBuSjVIYlyfTYqFrgv510rIc6qtgVyrMnNLF9FlSkIdhTKPh4LkBuQyESSXs8dMm83zPDlzP\nY86FMijtODvJKYJ3KYfwVNLc33FdnF0u7wRCUUachxo+WTVkYEsvdmJAFKwQz6/RlJ/C+HFNAGwQ\nObxcM4Vpc3iuolD6vez6qO4Y4qwLL2f7/T61W+YDMOdon611h0bNRyQZPld3dbJpa4mFA+uJj5et\nBkw1IP3SEFz2PgDurMtxFNtmctzkiViEuBulPznrhXh2naHqAOnEelZCFzMUtMw5g7NvkAZA5Rt3\nYloqZzW3YJa2Em6SQrJgmtRfrOPFIbVAKpKyqXDVJRdy9bOPM/iwLOz53nofJ/L4hQLfGJIC6tb6\n6wPRQSyFXN3tY6hcoq80iKPJ4PmqxWVKtT58I+SU2dcA0KQX6Nu+iO33FOlfJ4P8v/55wB2OR36g\nQGbXOLzEz6+JKs+NTzOoe3hJG9XVtRA/iGixOph8oowtXXnVvxPiE7o38unPyf0W1epE5SqL+2Xa\nqJmW86QKCzVvUgkCMqoUsBEKtppjoxfT05AGWbOlYRk6XuBgbJNKSKgZVA1cXAYaPnEseb6veysi\niDD0MVjJe7YFNfovbuK7D97Ev/z7hQBcN3cl73nfFTx/by+5pICo69hzefYPuFLirya09ya5vWOM\nw9C1gHfPupArJ8ojfvGnBbItPhlF0NyUuDdUj5ashco4CjPk0bmtazr93/40+lFNNF10FtOnS0e+\nsWY1q59aTajr2MkRbskzyyirnWQzh6HocsJyOQvtyByVd0D3CaediVCk28JxKqzbprHxlDb8QD7T\n1nXsHTX8QEE7WGrJQDU5vFBgV2uJeKyNd1vSKD5SSKfHoqg/HgmAhGGIoqvEqkItacEproswFYVK\n33rSGdmv+Euf+QAPLVjCwJBUOrmidEeItduoBRWZ+pY0pYlVlXrd4VvfuoWmjLxmxSFxGBJaWexD\nZFl02prIbn0Lx0ycxKzZzViaFIbHjGvmp7efxLMrK/uy1jjMMti8Zi2NwB9JW7vvzjup1Crkm/MU\nm1775bn9yCUBRk1VcF2HrKkTJML0uedWcsLUdsRDzzD/cZkONm3KDAoatLWlUdV9se+YpqZx6JqO\nGu+mluTtn3lKF40v/5TqUInQk7ygCIN3aQovOXtYkfCcu9tDUd+ZW0xRBI4tBWRgVvC2rmXl8j7s\nIemaqadjVC0iytaIO2fJtThpJtMu+gAVT6e7bwB2JX3Lx1tsLK/GjTTqSa7mlqMt1KnNRIZOmPDC\nkBFx+jVXsPx2B61LGi9KHOJX9yvre5Pj/Hh0Pr9sDSccnyNK3Dj5rEbLhE4aG3SyWbmWZq3KOLtB\n0VJZ8oAUmt4jzxCpMdRs7KCK4SbBuFjFizwUoeAmDZ9cTaM914J58sn4BalE7dDnHt/g8z39PFZJ\nWgy/+Jq2wcDCPpmmu3Ddj9gW3EfdcfHrMuvonhVVyhsjLv+7cxFK8nGByKEWBdSqyygnrVNfVQwC\nRcPSPWLjaLKJkZfLZKnWyvQOvkBjvpxjTdfZVqthWwM0kkBkoSNDbIeETRq5UJ5c1EBHvDblL2Hw\nYjZDYVIR1xSIxNJ244iHu/txXAMj6VESOzZFEaNqKtV9rVW31Bh3dA6jcDixrjI4JJ//xDOD4IWo\nmkJsyntrJxjc9Pj3+fAXLhtpX3DeRW189+tz+cyHvou7XtKumIU/LGlM8HYCkYVUKrUwlUq9kEql\n1qdSqX9Jro9NpVILUqnUxlQq9XgqlRrzVs8axShGMYpR/Hl4O5Z2BHx2eHi4J5VKGcCqVCq1ALia\nd9B/5KAkHzJ2d9ExczKfvOV20t+RzWL8aIgm1aUxOIhiSIuo0NFKKBQM8/9SQGrYge6V5LQWfvm4\nh/KhItM/II/pk6+7nkXPDJA+pkBph9RyjVoZTZ0mYwVCWgN+w8HeGu5LeX1biIU6EsRqVLdjpidT\nrQzi7ZFWWKNSRa9CPmtybLMMRBZassxoSnOOVsQ2A+6edQIAbrgG/F2Ewc+J9gWflAihqzS1NBOF\n0oIM/YBiMY0hYkrVkqTDr/H3l3aN0FWpSAulXrdBV6W/dSQIGxNHYNseXlIR2d5UwDQyuKpBPSlq\nYrtHGIHYtYuBboHZLNfo9lt+Qd/aLYzJ5NmTpJjFImKs0JhYyBEnH3BwB0u0XTATY2KWMfqBKyLj\nfe1kI4EIXXIZnaNj6aO97/5V3H5HjUKxj2vmS//zhZuLXDo5Q7HrCFSRpAHWFLKF8aiKihXF8tMu\nwLSOJpoyaeZM76RgJW4Y9TD+WxUcXDDYrUk3UKM4Add/FXjkba35PijJRzaCRoO+5U/ilmzUfYE3\noaGrAsdu4MZjATj58k+zRkkzuHopSriT5onyTDexyeDpwRU0t59KRpefQDvp1E4iTcMfKqMluc6x\nH3L2KadRf/AB1icFYZqu4Fqv6ZWRNBDbrh1K4IWEy1YwNsnfH9BjHi0N4NkRTtLr4gzdwMsUWHf7\n3SM9fITrE1TLvJi1EJbGBEPur1goeIqCGsbsTKo0FwgTv+qQ1zJkEh//I07M9YvL1PyA314gLexH\nT8/ywdfM3cp1CwDYOFjGMQWWmaGeFABmM6fSNifHyedPYqcj8/iHynVW6hqIEi9u7UnoyRKqKpHv\nUC89zexZZwFgV3z6lj/HxqHlREnK4bsvmsXG5UtYu30He+syaKjnfoRDiCoi1KTnkKql0TL741qO\nLYPiFX88ijAIUPBVOXfTZrTTs7QXIUzec7Jct+qWBmDh24Io6djYFLnsONxlghjHySdM4s7+RwGY\n31+XfGKqeIF85k3XfI3/6r6Eql3GSAp+svksX/32dTz5u37+bs6HALj5nG8y/bzPcyC8neyRGlBL\n/ttLpVIbkF9evxg4PbntTmAxf0Rov6ok+dOGwxlHwbQlF5HOyeyAgZ7F9Pf9nmLzLIyk2k0rNlNo\n6QCRxxmQftXuJ79OxrNp1nL84tb5/DLJf/7nG75F80c+xepNHr1JDuYrMYTCAT8k8qVAiYlRzXdW\nEUngoSVCqmjBzPGCiy9oIvfR5GsjQqFeKeE0ypxdlBv6ovNbmDlrBoo+i1qpxC03S+W0YOFcCi0W\nH/7w+WiJzzGKIVbBOiJDsCcJcl0CuqLg4NLaJjdFrdGgXuoZIWv9C7JpUhRF6KaGqhmEie/eQMHS\ndSr+0EjFmRMrKLpF4EdYGSnMarXtNE04huPbW3nq9rsYMqW/dpsSEaoqZgiHHSezQV59aTc0bKaO\nPxotaZnm+AGGqhArgsg4MCv1DpUAyGcsDFVBEer+NqmxTt1+mSCqU0mE1H3zHuQB5eOkDQ2xL7ef\nQzCNmCBwCKNoZO503eC4qcezu+0Eon2K2d+DUA5Gz5joyWe8LEXDCd88Gn8ghEFIlMynWtDQsClY\nEdVq0qa3FiGa83RMO5sT3/dFOW/uBaxevBi3exGm20dnR5P8PR4z25o5smAiDOlGOnfKcWhCIb9q\nPdlkPYq6wUlmG+Lqj/P0Kuku0kyNw8YY/OD90qX00aTX832Dy0CDQ6KIpkxSIXq4RVWtYztVBvb5\nf8OQHRkdd6gGScMopWhRj0MWZi0sQ2NtYpQcG/ogFBQCSl7S4jjK0/PMozQRMXTdpwC4c/FK3CDk\nrvfmmfKJ6QBc0PR6pZ3oNibkTmdn9CyHajZRQVZUZtquIFC72eM/Q5yMJ2OadBwPL9xcZsW9a5NF\nsAijgMCZhL+rxp6GbKkQuiHe7vnUqmvREheD22IxPh2zec06UjtkCXjjgdW46CiuiRZK/lA0fUQZ\nA4hItrny3Szdz/XhnR5xzbWzAfjSjdcxo+kyxk/r5Npb5di//KkvMbCmRBjkeSUxHpxQwTwmg55t\nZnZHjjOrMoR4w10L8AKXgpUmMCQvn3rV+Ri5n9OoxiNVryJ00DTBeR/u4iOXy+rWu267kzfDO/Jp\np1KpJqAL+bG9I99J/5FjTk+qf/47S8t4qFR7Seekr3pdzz18+3u3ctWVH+e886TvKWpoeHGAoXlE\nSVloV0c7pSURZ580maF7n6ViS3/Wpf/303z0Axdy+48fJtgptWj/QIPY9tFyCm4S8Y+iCPUd9sL/\n8meu4ezLpcZb2X01T33ySv7t/HPpak98ZLGgWi3h+o0RIZPLZsnlLFQjjR552HXpB/3HK2cx+4LZ\n+JFPnEx9EAXEqkDVNfw9SZGGH6BoCsISkFTVuf5eNPWQEboyR8jjgggjao0afhjjJb1PTM3AcT1U\nw0DbZy1qOsIcixLu4YgxcpNXh7ZR37qDF16Jse0QP2l56pkqWiZNEMWMTQQCiuCFlT1QdznvNLn5\n2rum4Xkx9S0Vth4uN9/Rb5i/VQMySHacMoliLk210sBL4gFhpODsCans2Mq2pINcdZtDxtTwGjsw\n04cBEIcvoSvjiQhx3IAgfBcA5doqtta3U/EFTa3yzUJXadjb2BxuxVelMKzWG3j+m8XiDwxd1QgT\n36gTqphRQHM+i5FLguJdF3PxNR+j5eQifT+TXx7fuqqbXO8ycjv7OG1yE9vXLQdg6pmnMr3rdISi\nMvYwyZ+3tXTQXCjgX/MJcln5+4ypcvkVOmnL5N7fyg/O6prgnv/an053TiAty2fSOfyWIl4U4CfK\nZc3GPgJVYOXSdAZyO55sGjTKFZanFa760sflvCuw+rcPEFTqNJsWuFIhqCJGKILYdak6cj3iSMGL\nXGwv4IF1MsV2wHe4ZkaRn1zYRouVCE3v9UqxpTAbgLrbwOt7gOkzmxGZJPspamHp0K84KGtzaCQ/\nA7atBnMu6ORbM+7jyUdlhk0QCGLfR6l5xOFenCSVTiOkVlrFYF83hpD+79Kxx6G1aSimYFfywdwb\nVvfhYaBpLahJIUtsKETsL1bKZaQi61m8hZ/3foiWthy33SVTE7NNgoYf0nbaZD7ZIYX2FR/8DJ+6\n4mvUGgFRKA23mhtSiXxucR06mo7G08Ym85kmDm3+X/bePUCOqsz7/8zxUCmKomiKpmmaSTMZxmEI\nYQghhBBihKjcRQFFUNEVBVR00XVdb68rvPwQFVm8rqvoorKId0XuFwkghBhiGMI4hGGYDJOh6XSa\nTlMURVEcz7x/nNM9E0gwWV333feX5w9IKt1dp06dy3O+z/N8v7sGAW84baF5b08dTKqK5HIholU0\nJTQ4KQkV/unLJjB7/xNbp13Y5kXbQiM/By6wHvdL+Ua2yj9y4YUX8vRHTCR9t8X7ot+5F2m9yrA9\nQi1fdRV9fZDUx/july40D+zl6e5fSP/8RYR2l8r39uOH5xP33sbiRV/lno9+AoC3fPpu3vWJj/Da\n8z7MrfeYXbprl24ulwUyd1fS1PBaxI0J3HT7skfOfeebOf1sM4EaZxxDodyJAnRL1cTx6CnMRcup\nzlRKmerEJCGKGhxjCXlCv0CjNo6WbpsrWguN0kYTsMXfHB+xiUztj3RFG/KoDNe5Z9lP+LK9h7Ye\ntNh1VzwFOpkKtgkEpIpi0I1jc0kdx6GZvUCK4I+Pm4W0nsTUkohKo07mOsSW61mkmjBVCDTScihn\nScJYrEiFomAHm3QCAhFCFMMme4Sf2lcAGLUpWQ+NraMUNUnj5/Fdk7N6Z6wZH6vyWGWcO4fMRD1p\nQRfdjiSqN8jse0uSlHwgqFVj7r3rD8SWHvXXt97Jx1bGNBJIx8zJw3Ulpc59GRl+DFeaxT3Tf2Lt\n42Pb/tIB6eXQlrg+UQW65r6R1535QXpOMZPXbW7Cu+tSmm4V9ylz9H1DmuOUU4/ltQeXGc17XGkz\nf9569GtZsvTN5MKQT3/Q+jZZQqlYpFAskLeZBYXQY3hkmK6ubjr7WoHIlLQ+pb9Ytkf3YkWTL3Sj\n/BDLj4TnadTePvo5hR63HPJphtes0yUlyXKj3jL2+CjOmmHyaUTguMy0ikmFzGTEpM06TbsRBH6e\nOUccwfDddzPRNAvzCbMD/vF1ZeZ15RD2FCq8zVNpB1eZRbJnTp4DunqZ7+foWmCeSalXUfe/x9rK\nOK40J8lGtQZ9Hl0HhpQuNr/ZW+6FhuCWq4bQ+Mw54EAAausfwC3UmdeVR1plpY3yYY7sP5wzln6G\nPWwK5QlDo+S9Lvy+Ap5vTthKKzI1FcoLrBDBirtWM5au4LxzTuaJF8x9zjv/m5xyhMf8pT2s+4MZ\nCxd85RN89ZNX8rUrfsbDfzTvws0CnvE01aJ53uGVT9g+8Sl4Dt0HdvHta75m2vSRRdSbDaPJSktZ\nSRF4ksG7NjCwzNAkiM2Ibje3bVq0Ozo6JGbBvnpycrJVsr7N/CMXXngh39lgvA7P4pQ7bIftsB22\nw6Zs9qLdKc823OoSj+v+dctMf9vqaf87MDQ5OfnVade2i38kvsNABKnr0PdMH6VflMESwyxdchZu\nVuOuG/7AwDLDFNZspmSOT99hi+nsNEffICxw+tkfpNxdIr93mXHZ2ie6Wfj2r3DMez/E+FEGtlh0\naBebZEpWyWjUza7vJHXeMH/7qFnDQoGiZQ0r5F1wHZSerh8oUFqhEoWylVRCSlKUUYMRkqLVmEwz\nRaYcUAKNTe2RAjJB5npt5RjSGKEyAuXgZeYVFZoOek3DkAYAqiXFISTCmYEvJcp63yrLEI7GkVP4\nsZQ7Wd3CndsefclzyLLMqJfkIUun0hDTNEWpjHrV9LFWCu04pJ7LY89aMvyJCXpcSZDzcMWWU+oa\niVWNrzZYOzJG3g8pBOb4OPFkjUeGRxmtTxBauar9ukr8aSxlaGiCc843Xq1AcPwJlzI+PsajR36V\nQxeZwO6vbridutdJpn0iy+XiugLpzsDxAu697wFzbdeQ+1at2vaXDmg3R/9Ck/t91se/weyj+2j8\n+gpWPGfgosb1vyRXr9DjCBa5BuY79PDZ9Pcu5kMnO+RdePhmk2L33f/9SYJSkXyxQN2OxaGhAU49\neTGu57e9VZU1obtIobNEzgbua5UK48ODFPoNdjzvaHOv8Z8NUhlYQ+CAH5g2LfAC5CaIKuPMUuZ9\n6LiBdGKe15LkF78BoBglFOsRfj5glzTBs2yKFZ2hswiNpqHMPCnvvS99vbNZ02gy2zJOnjMv4J97\nXHwVtU98Wm+eiBZZeCVp+pTDQ4nqiqpjro2M/cTMlewgwrwJ8J15+lvo9jXzF1foXWjauU8YsfTA\nN7HmLVXGR6qElqGzvsmj/7Auzv/MkfihhRFUwKxre3h42TjFw0xFZe6oEl4zgMxF2OdBKOJ4CnrI\n7PjcVIs5KJC8/33Hc8VPDFvmHTf8HlcWOPw1PZz6z2YBzXWXuPyHn+PeewdZOWCrt7VLqeAxx9+X\nDYUcBJkAACAASURBVOvHOefDJh/+qgXL0WnC8XNnsuiQo8y48epopRCJwLOxGURKamMoqZ2DrWrc\nLdm2yI0dCbwDeLijo+NBDAzyacxi/dOOjo6zgSeA01/pd56MbfZIknJ881VMjEX02JewcPG53PCj\nz3Pn7+5heKVhulu68BAmKuu4/us3sltgRE03NBIu//cvML/kMLbxENyZ5mg1fO/t9L/+VP7xa5/n\nf73TrGqnnfhFsjhCOC5J3tx7sumyX6PJ9lip3IO2sEM9itFRRBTF1OxiFicxUZSQporEsr0lSUy9\nXqdeq5AqRamnbH+ri65SHznfJ7MTFZEiSSmVdmNY/QqAZqOKUjECH2Wx3s5SwJLFU+XMe9o83GeS\ntL2JpM/bRVdIHF6F57qIV5nJ5Lq2okyLNk2psGK9QghmSMkLLcHd51KyLCOJn0FZnFwrRRAGBEGA\nMKgDExMVsiyhUAzJsi2HNJ54yvR3baMmv5tH4AoSGyB8eGjIiJsKxdy5ZrMt7eVw3R9Ws6a+ln0/\nZjb6uJnx3l+tIch5fL9R548F05+rhEOlUkXIsK2mUyrlGa9uJEvTNrzx6BPjpFvQeHwlm7toKe/9\n2GWmP3tXcumye1k7UacUmaPvu4dSTl1yBEcfMpdi0QrWRkN0evM46agFhI6mZBc5zxHguQhHkrNj\nKfS6qY2uNpNUtSTIGqROnt/cdgefv/gS00fPbOLT//gR7njL2QDMXmwyiBo3DxC7Efm8T9IYA0BW\nwRuI6Wo2wNYmPOskCJGRuG671F+nMZqMZlPxTKJxbQm6djSO1Cjp8RXbd34hJKnViNes5q295pT8\nhSN7KLsptTQFy/GuxOZLSVefGQ+FUg43dxTjzQlWjxs4dN3wA/hOL05uBvHY9QAM3nsrSV0iFu3B\n4Qebef373w+RpEspH9DF4Jome1jpu9n7Hcar1NXkXBieuN/2p0/98YTf/PhXPHCfic187uMxpc5e\nEt0gtrh9mgqcadk4YdDi3obu+XnOOv94jjnD0lG8uBelzhoi7CWwGTpZppBexqITelnyZlOgU8wF\neKFPppcQ+D4HHmNqKHqbKeO1CsX5XTz2tFmDGkLg4RHHCVl7+U0RSUrO9WhlYafJllkzYduyR+4D\nXrWVf95m/pGabzqi0WiSBnMYGLqau6xcT7FYYvmd91Gv/Kjd2JHhR5g36xDUgynKTvKdsybnDtxH\ntqbB2Dm/oBSZXfqWX/+U4Ykmr186l7871wD59wTzSRoT6DRrA/5/UhlKx2yP/d37/p7M+zwA69at\noTr+KDvP2J3ddzeY0/r1a8hUBz2zD6G7z7QncGZQG3mCG276JeufeoojDjGqrI7n0Vl6NUcffSQL\nrQjC0tcvoCcQlHIeaqaZ/PgOSfYCjitxAvMS+4/an9yZFfilbX97DRL4vo8QAs+z3pVSCKWRQrZL\nf/e0AUWjtD61gDlSghBopdhYb3ktmmBXn9yeXlugVakM9YJijz3C9s2fjWMcB+O9JFveDHd2DV7q\n7gS5IEexuA+3XW/4VKQjmdnViQyhkDdtlzpmdHScu4f+yL1dBgOdu08Xnucy9Me1PFppMmjLtuMM\nZOATx5r0ebvw1Op4vkOjUSe/l80U2jWHkh7cNvqK73q6veFNb6XYayrrbrz3j8wO9+Xgkub0RQaP\nP9TVLHvPh+hfkuMDbzNVp1+94lL27XS55Y6fcsKShTSsxJWHQlscvkWvW49qDA0PEubypInBipuN\nCqNNwXmf+SKnrDDZIxd99lN89IIPttvVbdXcT+kvcevaiGYGkaUlaCYR6JSxJOV6W8a+3FXkUgUy\nRasWTYJRH0+1IA8UWiyFOiVRkqaUPGV3wYnGBAOr7iMd+wRvP9yMofn9ZbJmHddz8LvMAuu8RG7O\nLZgTxdyFvawZbnD5V64hVzb9r3I5Vg0N4HbGJK7xYG9epljx85SPf3ghecf0kar7KF3l4MUZd97o\nk9lill38iIX9eRoPTLD8NhML6c3l+NWyAUi6KDXNWPr6xz/Gm857O7P730FOmrkVpxk5W6QD4Np2\n60zwmmN72fSqGpk0m8Phh8znD797nNR5NQ5mw0pUTJMUrTU5uzFqUqI0RjkBMpP0WJ4ROXQMqePR\ndUg3N97UtN83uLqhoTCbi1KKMBfguj7S0khn6V8hEPmXmlxsBlGhVmCiNs6/XHSO5bU0wYDqWd9C\nVxNKOds5ccbwI4/gJIrYkvPPnbM/hZyHm8QUCzPxbPCjNjpMEqX8+MufYuThGQCUdIN1Q2vZGRen\npdWXxuzH1jtjS/aJz11O10KThnPHiirfu+yzvPaII+jrNQv07+9/kFRl5Hu6iK2C9poV93PJ+W8D\nqalHTW65zZQuL7vvd9xw022879z3MmtfM9ivufYH/NPpJ+FrycJ5JisjdhyE3BWlNUkLRnEzgq5c\nu13P2DzaVBs+3p1dtw3ZaK1xtCDw/fZCnmWZyZ5xpiaXEMKohwtBnCbMsHmjjnaQjmCG4+LYgKkU\n5jfCMGxvGFGjget6oHWbQ/ilttFSyEoU5XyRJH2BjTag9YbjT+DGZTeRKwQcOMsssCfM3ZuPVuso\nNyQ3yyqliBQkFPcvsutojrpVBhFhgOf6KClxbP6y5znU6hMEYR4sNPTYujFqza17LluyJQsWk8Qm\nQ2emLyiUJG4q6bSbYBi4KKX40X98n+W3fReAQugyOLKSC85/N/fc+3N8O/GKjovSKbHlWwYYGnmY\nkfERHMeh9pTJ507ijOtWDtPT08Ojj5hS7s9eehHf/OrlnGjblbcZU//76Plc7DS4cvkaVlpBaddx\nWCcF1ylFqz6xHAsOVgHJlI4OAJnQKJ0xV0ocS7qupaApHCqZYqwFlalniZ+tkftOzOJ5ZrHLfE2i\n8+QCDzHDYLDozfv3wbUmPa9/PqwcGOTe2++iq2jpXoOMSiNl3rEh2jpUhx9eZJ/78yRnu4xZoYkD\nxWIWL83R/ECVW5dmPBObRO/95o+x6BSf33z2evyHrbjuHhmqMUHn7r3oQTM+hpLb+dWKxzi0di39\nXYbTWDkQJSHwEQDqddPu2saMwm1lHn0sJhoxlZunLA1pfKhOpbEet8fcJ1Ix+TCP47ptseAkVqRp\nHRlAFHkEnnE2RC7E8SvsXSjyvqp5x80Ucr5EawuNAn7gkyrN8ES1vaalydbXqe2TqN5hO2yH7bAd\n9t9qfztP+2BzNCps6mL9mmXsEoV0+sYfyNSz5FxAu3jWK3YlpGlKlqZIOVUcMzg4xNL5c3nzmecy\n1m12o7G1DxKRce/1l9NcaY5l/X37MzMsUMhisB7wLq5DXm3fPvWT624neNxAIfXKAHNLr2be3EXt\nAF+Ym0msGpz0tuOZNc/gsvWBYVZ8/6fkSyG1qEnrlo89+gf22udVbNw0gxtvNEoWPd0LGFgxzF0/\nuxlpWah+OzDI+Ze9nyVHHUpiJbNkzgdvKl1xJ3t/jUBpzfNp2oY9VJbhSRe0JrPH5CzNkI7Ak057\nhwdNlik0Ju3Qs/ietJ6XlALfaQUyJVorXNclZ09DXeUynitxHIFiy57srFldADz1xAhRs4lOmhw0\nx5xS8vldKOQ9DjiwTNGqtu8RBDC2nrFGk4rlYpk9u49Gs0apXEK4Di1B9MD1wffRQraLORxH4HlG\nsCCyUFumNfnC1JF4W8wVimbDSmE11nL9mm+RbGhQOPEkAPzXHUZt9iBXXHIBt9/6MwBWrriHq39w\nNVdccTG3/ezbzLZSXKIZ0YwbxElMvWFOGRvqXyJKnwM00fetak8j4srfj3LP+Z/j33/0HQAOOWIB\nH/37KV6PiZppU3WiBkmVci5l9bhVeM9MNWzeFbgtf0xLJhzBRgRxaywohQZygO9qKhaPrmmP9Spj\nVCsq9kRReXCQ5tAB9HWGFK30nI5ThPTRCqINGwEIw902A1F3thJsa0fXsKG6nhWHLOX6H5gkg1oy\nwauX5PACgWvTecllDEYVoqqPZ1MQ5x92IHduup+RGQ5iqamHAHhtWTJr9u48MG8BStr8cC8jKULo\nx9TrRiGnXNaoZoNHx24gTY0KjBMWKAQHt9s5PmHGyE23rCSJNM1mhrIqStKTjFcm+P3q1ZxiT8GN\n5BmajZ0RQpCzXD+lXIj0MkRTUW043HOnWeu6nDzSF2RZSs2yTMaZxHMK5PPF9slYCMHQ8DCVWpNS\nqcuMuVfwtLclEDkDuAfDSi2Bn09OTl7U0dGxO/ATTD3FGHD65OTkM1v7Hc+W6u62r8sDP5vg2lvH\nCCxbW9csl6PndVPM5XCkWSDT9Hmq1TpBLiSxi5Hrhpz8zvNIGw0C36c+bLDR+KkRpIopj6YUglZJ\ncMSc3oOoVR6hFpkXmwtCpNg+atbAl9z46x8DMLF+gIvOOZMkjqnaQKQQglzgkdSfYv2QKeZYc9/9\nfPiNZzJaqbB+w3r23sfoF5a7eyh0BqxYsYo5fSYbINc5h8+ffwGPf+NSsmfNAPrGwBrOeveTHH/q\nNynvk7ffLRPmc5zxadMu3/bnn9KMZrOJQLQ3Eq0UURqRxHEbHtnZdRFIkiRuV4TJFg4pBQqNsgEp\nIUE6Dq6ULYQBx5E4jmeXAvM+CoUCge+BUMitVC3Nm2vesZyXY2K8RtENIDTPOTrxO047cQ5CKYQ2\nENijqx/Gcz1++tBapK2WSyJwHJ9G3MQNFMK1lZ9BSK5zX8ZHn2i3SakU35U0oojI5uTvPbOI2s6i\nqlvu+DkboxbXskBvrBGPPcnoTNOft/72Wpycz1vPOourv2cWhE2bNvHlL38ZtOCTn7mE1xxoAscy\n0zQ31Wk0m21Jt3+NEp5JUgQG0gFoRg1Wrp1g7br1rPmeSdR6csMB/PCHU2RcYxZuWvtYhbFaFYVC\ntp49zSBT+EK1ab8baBLXcMm7duzrNEFrTaIUD2Yxkd0F6xrqDkSuj2sLUB5dtYa7rlG8+ZSedmGa\n1AIn5yOUbmP12Us62AvMRnPzbb9jYFAx8XhA+muzGGfA0jcsonNJnYm6dUS05kUl2M3N03+oiX/5\n+adJNkU0niyhL8o49ETDoLTo8FdTXOpz2GF9/OIyQ0xVXljmDYcdzeOPL0MnBkbZWZTY9GKema8T\neM6IvU2dRuWhdjuHrUB3NP4Acw7so9aIcKz8mxu4xGnG6oeHiS6zhVZxQmViPa7roopmvrvaw40F\nDg533THABz9sKmT/9dIr6ewq8nw1nQpBaajXGiRxTN7GmYIghxQuvh/g2MBuS691S7YtgcgXOjo6\njp6cnEw6OjpeBdzX0dFxM3Aa28E9Ih40k1fs5zG7p0R17wkmHrIMX6M1HlQNTjzmKIQwmHS93mBj\nNWL/4n6ceppJTCnOPpR6vsyCRQsYXH4blXUmu2DuAf2MT9QoPJmRJGYxTbPnqCdPo5xgilEvqpF3\nt+9wURke4DNnnw/A8oE8cuxRrr56fSv1wmCUQvHx938G3/sYAO/JcsR+iSejOnfcO8hnLza8uXFT\n8cCDy9BqJYsWmAk9MjTEN6+4nF+PD7PevqgGmtu/tZzF315NYb5Z4DzfwQkCwODjssURrhVSCqQQ\nOE4rWd9psZ+yUwvnFopMgyPFZtimxhZkubL1SCitUSolSjWBLc7xnMBUCSrVxq+bUQ0pQ3zfQ2xl\nKDXrBssrlHIUinn6O1/N2IiZNKXOkHzoIpKU2C6QSSzw8kVmHziHymOmP4YeGia/b4AXuuRyLsLi\n/BMTEyTCIYqbiKyVESNwHIHvuzjWE3Ich3p9+8rYH183wuCaBwEYXTNEJjRpI2L4JrOB9/YWyRUL\njIwOcfAhJqj8ox9eTV9vL6ujJr++6R5WLDeBdkdIkiQliZP25E0yiNOUXJBj3jybFeSGJFS56j9+\nxDW/Me/5mRde5Mmnptpee8qcPqJGDdIMLxPca19cJB00DolWbZXxhk7RaYpIkrZnp+zpKhMCtGqz\nBGaOxEPiKMhZjNprxvR6GfNKLtpSATi5Ini7QZbieaaaSu68ecrnxqp5d8vXjtGsuigVs0+vefpG\nFfy8DyqFxLJQBoq3nraYOT3Hs/8/Wt1GNUBjuI8nj3YoDnVz8KDh7H6n9wQ33lll+FspjbttOu9s\nj4nmRnZ/bSeybONnUZ5DvMPQeZdKxVBhBFqSVYYxCXFQa5gCru4DXfYoShrxc4SueaZMm1jKLbct\nY2jIZA0FIXhuiOM4aN3abGPG10Rc9x8/I7nk6ww+YoqYPnfZZ7n++l9BHep1w1nuunsiHMnY6Hoi\nG2cpd0FXuYswjdupkyLvszXbphVscnKydfadYb8zyXZyj8gBs8s+dz/skXg4xyzCPcp0TrOynvc1\nxuhYPTDFyZHB7mGBTiegOmECA4OPXINSmsrhc7nj+tvois1xPm5ENDZOIDKPNDUPPTFRIU1ThHLQ\nmRnohdwo/Qe8iaFteWhr8/rnceLRJl9Xo3B3UswQou2taqXxcwXwcsyfb7znz12+C4uWnkE5103U\niLj+ZqMxOffwo2lqiRPm+fENhgt8bKjK266+kQce2I/9ZplUoYN8n3wxZGTgHh79vTnqDa1dQzOb\n8mbS2EyKGY5HIDyazWZbXDefz7NbECCEaMMjSrWgjalgZis4CUbDsAWbOELiuAEqzVAWYqhWa2iq\nSCHxLTmUJzWuI9BaEQRbHmT5vPG4pNAEwW44rke5bPhMQpni6AzP01jRdpL6s6SZolicSbclKBIS\nEh0TqU10v6bE6JjZCMJyFzK3B56zE42aOabHUUQcZxSLRZp2UjgICv72nbDqtRcoWG+x5o1TTyM0\n0LAB4DUDTbQ7SpSlbFhv2vO9b3+b4eFhGo2ISi2mYoNcZmN00Vq3c/EzFBpBlCRUV662zylQWcal\nX7ycJccZutcbb1/WFqYG0BYu86XGQ7OHcOizEINIJb6S+Klmo62QbihNKsAMHeuR2wwiicBxXKTd\nxiWaAE1OphwQmPH9nsWz+c7b5lMUUXt8SG8X2NmDFwV02FObJ5mOkLU4wpfjkKYNSgcUKFvit3nO\nfJx5AhXnCB0DKR4wt0R37o0c7fdy10qjzvNM9WHUaJHdg/0J5rs8fKMhghr2FOX9fRJdJ+d1AdAZ\ndfPk7ePExSrdp5j3tvecEn53AKMhXs56tTTJy6ky9pY4xhHnfZdSJ9TqVcar5j6V6loWvXYxT1ab\n/HHQZLkcvng+WkuSJENjyKYilXLlFfdwVvOLOI+mfOuXhsgu+0KOn517Lbvlc+0N03N9isUijiMR\nFpZSmUY7Cl9qU80MpNnWs9y2CeDt6OgQNke7Ctw+OTn5AC/hHgFekXtkh+2wHbbDdthfbtvqaWvg\nkI6OjgD4VUdHx4G8nGtkq9wjAHsq4+nsVFUESArBLqANiU/P7H2IswJj45U2AN/Xsx9aeqwZfJTh\nX5idrzSQo3/Oq7nnuit5eMUY457B+kZudvEmTFJT06aDJTFsqKbs4tTon22OUMcdkaO8yOfHy7fl\nqY2NDI1wwYcN7CFzHkHg4DqyjR8rrXBwSOKMq79taBXT5p2MNBNGHh2h+GGfVYOGna845zAIcgg/\n31ah/qeLv4jX28+RPQvI2YBr3tuVqPkUt6//CXuXrHqLThkYrbbb1cLD6klKGqc2nc94EDMcB611\nm0sEIJfL4bomPCVs2x37OaUysky3Ca9a14PAR1hMXClFlmVowG0FMvVUBddUcHNz831Ds+64oDJJ\ns5mQ2Ha5oUcQBDQr6ynkzdG3Hr1IFKcoBAWr2u4FPmOViFywJzJ+pi3dlApoNjagFPi2nc1Gw1R+\nIgit8nmWpYjtLK5ZuXw1mS3IyNKYSKeoTCFs5WeqFVmiyTT89OdG0WXtyDCXXX4Zhx9+JFo4qBZG\nJSRSeASB335H0hN4noPjurg2zUsKH1cKXEfQjIzbuuCIxe1YA9Au4HIdyPkuOpHMcs2Yf0RnCC0I\npKRkUzVrmSaVkEjRhmZeyDJTNathZ+kQ2LS7nMzodjX9BY9j+824+7e3LmBeHnSUoexvZq6H6weQ\nOm0OHnx3M087sSRv94xrQjdPV1+B1EIhbm43EM8yMVLniAUmwLegZwFJLcfKe+9l3YA5Ne2eO5RN\n0UaeTTYwuGycge+YG/QMHkxwbBfz37EHzdvt/WMX6WTkRlJedaNlCj1sX2orK7j1mDk9BoKas3A+\nwfIpGZSnY+Npj5+yiFxesc4dJ7O5fO6cbj76mQ8w9sQAlc8YaGhsvEZnZwnP8xkeNjj5s9Uq/3LP\n/Ujf44JPvYdj1ZEAXHHJr0g+G5FGGTIzqIKKc6RxQrFQanvfjiPxXOO9typmG7Wpuf5S2y6Ad3Jy\nMuro6LgLOI7t5B5x6uYBdzqoiN8VkqTPoW3nRHUT2PPcgE6rKSikS2ViI39KNELY6HiSMLj2EYRK\nmOFlrG+YY+UsrxsvlUiP9iLyTDzBzkVNf7+gu2AWCT/I6Oz2jLb8NlohHzA8YY7EV11zJXPmzOKg\nA/vak2fdujFoNpmhnmfB0aa8elF3iZU3rGZjNeL7Bx9Cfn+TbD8j10m90WTevMUMrFoBwNpHn+DQ\n+TWE1lQj8zy4AYkaJwj3IrAvNh4egmmixO9965nb/hD/jTY4ZDZcIVKaDUVvoZuxdQZfLO7fSV95\nH5JGROIZRr+xODIsbBpasjkjI48QpQk6EoRCtUv4a/U6440GQrp0WyWfUqmElBKhIY1a8lQCx98+\n+oKoOULT4uBtygCtscy/OEIghDnifvFyUzn5gfPO4bwPfIDOrk6CIGiTdXmeTxAE7F0o4u5iRYCF\nychxnJ2QjoGWXD+PyixMZb+rxdSYBvjRd7ZeIDRzq//ySralrJ8E7jTPvvIrA1v49z//M7v8/Meb\nX9gai4CB/Vn+71OXZk3757ayykHTv/TzqT++YetNSqf9v8Vi0Pr/V+wWNsMWdT2tfRxHUKs2KHca\nZ3AXz0eXwT//zWxaYJysXYKAKNGkOmWDXdOqE5Dv3pU3XXAgS86cTb3LwCbd5RH6v9THtT8e5qw5\nRi0qX0zIggzlZGRZq2RdEzsOD9+3gZEHDPb9XPJx4PNbfK5tyR7JAy9OTk4+09HRsbPtpi+wHdwj\nF154IeddZwZbqbOEcnZBJc+0B+P6sQlUlHBIfyeeZbdKm0+j4yZCm7Q2gJ2TDDfwCRwPz0vw7WIu\nfZ9cMAvHkShM8Gjf2QnzulO6Xi3ptOXEuU4HN7d9kzfwFFFzDIDLLvsUOjmOk5aEbWGEs97YjYvk\ne0d/h9v7uwA4ZvF8xpavYGD0UfxwhGN7TaBqaKjKO950Bqef+UbedJwpJnXZlaTWJI6b6BYDYS7F\nCQKOft3rGVxueBiQDmFh+3DZ/xvsbR/4/n93E/5T1t3diSpbZjiljGcqJY7FIU0lnSAfhpQKZnw9\n+NBDzO7vZ6+99kRKZ1pgWOJ5LlqbdEx7EUdKXM9t0/QK1zenlyxDtHBswWYVrDvsr2t+YIIpeyDx\ng4AsLBHYzCziiGejBjvjsYtNQ8zn9iBTLyKlZFbZrAFBeXcmPvwkT8d19u4p4NrT1JHHHsLvPruM\nUk9I0xZaZXWBdvZg7fAoUdMk22VZSpAL0XIGO895EYAXmv8I9/0nF22MzssPOjo6zCiFn0xOTt7U\n0dGxgu3gHvncRYZ34KZ3LOLM188h0BkVS5AfN2P26ynRHe5MEhnIRPqQ7/TR1ZQ9bETVlxpXZaRp\nQsEvs7ew5D21GmnWpFh06LZQSHEfSbGQI9cTEbQyRpp5qtXt0wqsN+rtyrorvv5vqHgjTmKOymD0\nGB3XJ1fYg8uONmIHlbEbGGmkiFyO6666neHLTejz9UvfxPnHHU+8KUd4jk2/Sp6j3mgiHbedz91Q\nCjdLWbLwdTSrhgz9rZ0Fvv7t721X23fYf95e/4Zj2S2Yyj7Jssx48O2yZ2UU1j2X0FaSSimI4hiV\nKRPgte/TcVyQ0mT5tDx1x8HzPISc4oJBSpTWuHrKsUimZX3ssL++baiYU/RuuRxZYtJYHccknDeb\nTyKkyzONBF0xnnbgP85epQzX60BaAd8wzOG6+zEr2kgumOJodnIzOMyfTam0F3kbac+yhEy51OvP\nMzo2Zq6likJR4PjPkwstF7e7ubr9dNuWlL+HgXlbuN5gO7hHdtgO22E7bIf95dYxOfmK8cO//AYd\nHZP/1ffYYTtsh+2w/9eso6ODycnJjpde33Hu2mE7bIftsP9B9jfjHmnhcs995C5EmtECyMEEWpI4\nwQ/8dqWhUglO0mBW3qUQWuarekzk56m3NN5sgMYEahxeugcppV4WxNFaM7mFwM7k144CYN0fTCFM\ns9bEDQogJW841oSoX3/MMaA1K1cs5xffMuWzd91+O5kE6bkEexiMvas0k85ymc5ymVn7dVMu7wdA\nvthNqVQmLObJ2ZS9XFjA8UMUU5WKuvVWMt0WNpCO5EN/95b28/z9J88FINyrwL7lfVFK8WTlSQB2\nEpqcvwvN2kZCG0Dx/Z0ICi6Bl6Np07GazZhm9DRCCoqFvQls4U2aJsRxTBCEDK81Ml4PPTSE4wYI\nx2unrSWJ+dzo6CgDK1a+rJ+B/+vx2B3t/Ova1tr5cO+n29liINopmFKYsTjxx3tYfce1PHr/DaQT\nhuGwlPPJNMRJhtaatC0VqMiU4ayfM9sE+a+44puceeY7cF23ndnVmv9aZ+15BCC04pY77txiO/8n\n2PZoRApM4s7E5OTkydvLPdIyLQzNp4OgJXYhhSQVhhBqhq22m0Swk6PJZSlNS9VYXbWavpPOIEGS\nKtoKLFqAUAKtp5btKUok2ot0ewF/2Zo9deHYtxoKx8ULl9Dd00csPN71yS+3v99sNjh57lF8+1Yj\n1nDHDXcyvu5xnhgZ4d57zEBY/rvf4goIfY+Lzq3juUbUM7d7N26wO7lSgdDmGnf1zmbf/Q6l3NVN\nsWxyY0vlTsJiCScwFJBAOyLdMscSQWgyavUn8bxd2SNvEqSi6GmEKyiW98a3ud+oGF8qOos5EdA5\nMQAAIABJREFUGlUTGJX6BcJwNxQYUWCb1p3Ph6aiTSuKJRMQeeQRQZKmOMh2PyZJ0q60/H/Njvtl\nFZVNpdoJYfKcWxVrrbku/sLEjj+bGaLZLE/7f6q5AlQ7v18gJbgi4bafGIqH2z/7BeojF6ObMbO7\nzdw4+KBDcXMhnudQqUyQ2swbR0ripIHKXuDxdSbn+pwPvJcLPnYBZ515FuVyF2By96UArRWprYsY\nHRvnkYcf+Rs++V/ftmebuQA2qwD/JIZ7ZH/gTgz3yA7bYTtsh+2w/0LbVmHfTuAE4BLgH+zl7eIe\nadnKO+/FEYrVrsMa36TfrXEkVc9HqoynI1tp6LrkHMEuaUZq2d6CuUcxWo/YKCQzHB9t3Zw/aUUH\ntkqxxTamjV80yVQlWOv/pvJvirpSo9uskpk0UMza8Qr5Upkg9Gg2TXWS7/uouEEtqjN7jkmo+YcF\nR7HyvAs4Z3wM36YWrl6zEt8L6CqW+PE13+LiTxnCqRcG72AnKfiTEMywXB1f832ctyzB831cm2KW\n3ytHuffVlHoWMmuWgVZ6rehCyzLLReoojzDcm87OzjaPQpxuIsjnCL2gzcwWNTKk0IyPjaAyQ5Lj\nebuQiBaDn4NrCzqiuInKElQm2jJeQeCTNtLNjplGR1L9jzxi/jlzHYGaPj1aNJrT/i5os/6y+T++\ngunpf9QwTVtRT/vHlkdvlIb+Zijmf5k50mmfGHwcnKzBb35wCddeYq41qzeTpuDiIaSB6Xpn91Pq\nKeG7kvGJAvWaKfgpFAtkWUroBS2ElLFajXqjwlVXfZPQ8oy4UtLX28Pue+zGHx83kMuq1Wv59x//\nhgvOO+tv+fh/VdvW0XAF8HGmFSjxEu6Rjo6ObeIeufK+1aBNzqvXKlRA4M7w2EVoLEslmwQcVO5k\nfU8nB9uCmGK+QKPZZC/lsGlinEZsciyzNMXxfHw/aA98x3WJmhECQb3FghbHZGmK63mElhNaChcN\n7IqBMF5/3AkArFh+DyMja+gslQnsZ31HU/AljWaMzlosf1AudxNHjTbJy6JjjiHMdVHMd9H3zZ/x\nwUvMEfBjZ52Ck2p8x0O3xFwbMTJLaEqBsu38Iwp9803g5Pnw6ccAEAQB9w4PtvtxaqFUOK5E6Qxh\nN7GwsDtxGuN77pSIb1cZ10lZtfJBgsBAHtLxEDoDR+B4ksTqQdaqT+FLB8/bra36M3PfInFaJYpV\nm+7VdV2iKKJktT7/nHXOnsvi/jJuauCu0ElYsmA+YWE2KyfM83zyC99nw+Pj7L1PN3//qNmonNP+\nmYlvncyl7lUsu/lWGnlT/ahUFReXkcFRDjvE+A/jj4+xsbmepUuP4t/+zQi0HnfSCYS5kGKxyMiI\nrWKrVpkzZ85W2+pKgWoxrr3CprSZNOI2LNriJX/bDBzRgpdidxqxBTjvf55J4VKwT5811/DTKy/h\nmks/TqNiiJi0UggvwA1CgoKhPD36dUfT3deFIxyU0jQapgYjjiLuf+D31KsNDptjytM3ugFxITKO\nScVAJr+4/VaWH38a5cefZNUaUwo5NFql2Dn7b/rsf23blorIE4ENk5OTAx0dHUe9wke3Ka9P7NkF\n2iytrZLgGMgcDTolbxeZ5MWEvepNdDGgq8cs2vNKAqeryNrxGiODdW4ZtpzWjgPUEUITWOFUTzrE\nURMhpiZKFMckSYKUsk3kL4WD1opzbG1QVDdedehKRocHaTRi5sw17H3IjERL4lQjWnp7SuN5Lt3d\nnXzpS4bdqxTuyltO/xCRkyfOYJ8DDOn6yYvnMzo6St5V5G07A9dFuAGaaYcEi8MRV9qY55P1l0xm\nW4nT2NSkUlmPmMaHnaqUQiFEkxHmjdfieC4ZklLfQYDpz8pEFS0VniNJdEqmDWbY138APo4tEjFD\nJIlTsixFKdGuZHVdl3gaL/Cfs7NPfTOeVAyuMpttV/8CFC4g6e2yXBf/8nku+tQq5s4e4PjDTNur\nfSMc/JpOsgcTvvCNLxH2mQ32pGMWUezu4ku//AY3/eYuMxaU4u77bufqa67m7WeYd9psNBAqw3ck\nJVvkUAwDCi1qwS2YI6fj1VtfNf+qPvBWFn01DfeeV375BqmnxWy01pttMgKQeurfpn9++nfan28n\nAqjNPquUagf+W6eO0WzbsfZAQjxqNsyLP3cmlV98lXwgCHMmkFgo7kO4dwGJYh8b1+qf009YCAj9\nArkgR7lsysuTNOHdZ53Dfbf/lu5Zhqt+VrSB9U/WiZMmo5YVdGSswrEnnsaixUdxxde/DUBX32IO\nP+JI3v2mQV5qy05eiVYa0Up0EK1NNGP6vi2Q7ZPQlE3ri9aHpzErvnSzVtPGl6sdpNZkMqY1ooSS\nzLv+0C325baMuSOBkzs6Ok4AdgZ27ejouBqobg/3SCtX+6nr17LnYqPaoKYAC/ugitRy++Z0htwj\nZWC8wc+UWSQKY3Ui5TBWSxipp2y0VKUTSYpEopXGbZWckSCRCK3aYw3to5SLflGDjUTryQy05hz7\nkcs/cx4AXqo4+OglxOlM8kVziMjn56FtELTeMN6idCCJI379w+/wvYtMsKNQ+F98+5qb6F8U4nsu\nb3/LuwD45Q9/ygP3/47xsbVUJqzHt34dG2o1Go1GO+qt0Qgh8d2wTYS0Zz4Pd65t9+nMmV0APKfq\neK4kTqKpzBv9ArkwT9KMqFnvpNYU5ItFlHSpVc21sLNMvTYCSlPqLBFZIqc4Nry+vh8QWTWdXBig\nVIbjeG3OBN836uzJKyhHT7dFc/rIspS0YX5TyDyZBiG8tniFEJry6xeyeP4c7vaMR/3r4Yjuxaez\n7FOfwkNQHzH9oBfOJQhCFi1Zim9d3qhW5cwz38GixfPbStulfI40jVh+5y0UiiXbdp+kudUhixQS\nbWkSWiHIl089E1TX7evTA92bQx36ZQv/y1doMf1n2vNeT8u6oH2aav2+IfO0UIpdsF96MlBoI97c\n+mo29X3FZggNQpi1ebNr2i7mfwEMVh+5k8s/a2bZ+CNDOG7I3CMPY1d7aioGAZqYZlSlv9Ns4Ok3\nU+IIRtYMcvbZZ9NVzrcbmQtP5qMfP48FJxiY8o41u9Cc+SgT60dQeZNFdeiRJdCLOPtdH+SC8wxE\n6fo+Jy1dtMU2GtRUg3jpZrT5uxOol/HHT3XNtM+KqXfUIttyhcIhw1UKYTs5FhmZ47C69igrNzwA\nwKsmX9xqX/7ZtzA5OfnpycnJ8uTkZDdwBnDn5OTkWcD1GO4R2AbukY6ODjo6OtjrqG07Su+wHbbD\ndtj/n2x+4TDOPeCDnHvAB/lA3zlb/dxfcrr7AtvBPdKyLE0BQwPaDmppjZDmyJFa77lERk7C2mqd\nZmK8TTkmqceanGPw371ki/0vI3sxwEOiX7Ck/zsJlFboadJLTBpvY/px0+yGU39vwSNrBx5i2e0J\npb77OeRQQ7UY5nL0zp6P63pEVusvDH1uuuGXXPHFS6k/anbJsUfWsmrF/QSld+Pni3RZBrpPX/j/\ncf1vf0ejWaNeN556rTLOmhX3cPeyZdx0ixE8KBT2YuHCRby6t59wd8NF0NPTs1k/Km0x8aRJpBVe\nEOL4himvWNwbgUOWuWAhD9d1GB+rILKIZvUp08elIj3FMkLFOEq0Dh/U65uopSldZceQ2wOJigmL\nAfVq0qZwTdMUpWFbZTfTuEG93iC0+eCuNBSwQnrEDUuROTJK/5w55PIOvTaWsSD0yAlQpZNY2LeI\nceupqygmjRWnn/FODnm1Cdju393Jyof+wB133MKaVYY1sZQPaNQqjIyOUu4y7yLLUlzXY8mp79xy\nY6WDbvUdKSZm+BLvSoOjhVGAgWknx5Y31vqz8dUVU9CFEMI4dS+9rwapszZDHY5AiilPm5d42gIx\nDbV4OQav9bQAp71uqHRa5wf1smCo1lPngvYzbPab2w+yf/Xyj0NsSONOfNvbSeImSqekLZGNOCKN\nq7yYRXQuMHBkobOTtYMD3PirX5AvhxStoIbWLmsG1nDwoQehrBZlcU03y4+6iSUHdxE+aeYMjV25\n+cb/QIuzmd3XZS7F8WbB9Onmuk4bdmw9vXnsqfeu0QgtkS+99gq5n0JKWiKmqjJIfe09HD/XpX+u\nwdZHkjKP1V0S5RJ55jShxdbTaLeXmvVu4G775/8U94iUon3UapPvtHA4QfvYkWlJIBVVN2TCcmwX\nQoHrawLPZbyRULBRy6LvsmxdTB2Jt1Mryi9NkGh68vafADGF8QFTR09DLkhXt1kc19y+jFyjzsSK\ne/nBVd+ybXI47bSzyBc6ia2CiRRwzfe+yfDq+9rJ/yozE7ZVNFTVZkHK5yHwQsJCJ+VuEwTL+R6+\n9Nivp5/CxaaIp7+/n+tvvJt35fKoU88wn8vluGP16nY/Fgp7mDZ5Gul4uLk9edDSoG6MNlDId5Lz\ncqSJgUJyngsqRWhF6NmAUNKkGOaJGzFxM2pTiebCPcjSFAVtQdxm1GTmzCK16lrCXMH2nSRTeoo3\n+s/Y93/0XRq1BljGszDw6Sx10dP1LhoWLlp+2w2ItEkhnyf0zAZayIc4rsLt6sUrSpr3mnz4VatX\nkO9ewGg15p//4SMA3PSlC+lctJje3j7WrLgXgLGhVXQWQqTOqE+MmXvnQ+LG1nX4hNbtsSh0S0R5\nc3MExNUKwrGEZmFIpjVayCmeaaYWTbm16TYdJbR/bUMuWqL1dFUg846mQymtTdSM6c0xaim1dVSc\n9v0dqRFoUswi1YKBpKMQNi9cWLkxhHjZQm4wlK103FZs5JE1vOYIA2U8Pj7K3p5g4/AAyfNme5q5\nTyf9XWU8EZIlZm4tX7UStMLt6mRY1Pn+bTcA8O5lDzK2Zi05z8PLW/zZazJv0QHsv36YTXUDjzRq\nVe79xcU4J76fyri5T7mrm7i5ZZUl13Gt9NpUgZDYrLOxuL60a4r9nJ4eABFt1kYpQOqYpDZCc9DI\nx6l1K/Gbg5Te0M1xJeO8NNMmhw6vpOmWWZea9SfzNnfSptt/Qy6RkaeClwdEULrttTQzRVpdixbl\ntkhof6dP6Ehe09fH0jl5Cjk7CBV8/6wBLrppLY+1iMWtV5O+oDf3RLRu67ABLxN89W260M93dkmT\nGO06DKwy6UK3/nY5ixbNM4ukJdg/sKeLu269HlfCkxPGe96/VMQPdkHI08lUhorNpPA8n3JXNypT\nNC1WfMNvfsllF32WI//tG8y/8XrAqI9rBG6uwIydDXn6C88/zxe+MkVD0KpKDIt74/oBiRLtrA6d\nGQZFV3v49jRS6AxwhEsjeo45ffuaPlaaNEtwPY9m1CQMrAeMQgpBmiaMj1s9yOYmPM832SR2gLqe\nxNGSZBrm+kp274pluEjyljs7qr2I5AAQGtdrCTNAo1EDMrRn7l3OabTw0EGI47ssWrIEgHxnJ+RK\nREOred/7TIVoqbOLLE6ZP6+f5sRRpp1aEThm8Yztxuq5TluKbUvmEaNUi/vaATKknvKlpZSMD63h\nM+8+g9JeZnye+OaTCbvLFGbPJl/sIbPjTAuTPGgW/80LvKzT2zYtBJmc+pzUGj2N9a/tTbe+tBke\n3fK6p7JQFA7CdQk0aN8E8tLu16EOPpVS3+Es6MlRGzRVwAM/uIysspbMaaLtwolOUNpUJLaqGNtT\nZqr26M+arzMeW3arbSfsue/eHNZTpNNyV8+ePZvdw4BnogaDwyaj5NprryIsFvj16tvY0KhQe/CP\nAKweWYvr5KiOVJBrTWt8N+PSsWv5YKD4btmM43A3h1sHHiQ67XTqNTOWFiw4mOGRLTfcdT17unoJ\nhj0txmCqOYU5wVvsWyoHLRw0GimnPhpXhqgN3ADVVZQcO5b3LyBUP36PQ2hTfHOepNdPCQoNDh74\nEQBN2bnVvvybL9pJliCxgZ7NYAq7oNoWZS54VHl/V8BZ574fgA2dLkpL3i8dFs32kHaxSFMH9+R+\n3tvI+OhtYwBonSFeVLgdDnon6yntJGAnBS+mbXUKMz2m2hEn5vpdjkOzsRH8nfFc8xrCwKVaj0iT\nBGW1LNePrSVr1ih3dRHbHaAZRVQ3bMB978epNiM6rfqMShRrB9ZQq1X4zQ1G7eTb3/o6l331EmrV\nKr+92wxqz3NRWiCdMzn7XWZxfs+7U+Cn7Xa2+s6ozBgh5Bl2Ic/5ASSKLK4jPDOwDirPZ9mqVfR1\nzaS720AEE42MeiMhSRNc32vHvzNlqEUbjacJgt1smzxUthOuK9uiAI50SdN4GhDwynbuO99qIAVh\n0iqFfiNBsAghFeUeM3mPO/kkXM/Dc5y2kk8uzKM9FyFdPC0RNpgoCj0MVySnn9TF7F6zcDbilKih\nKHY6HHPcceZ56hmOSNAiI2uVWWuFSre+6kg1gStMm0wIUrU9TgBHeIytHeDqr1yEfr+ZkB97zSI6\nF/Rz1Jnv4KOf/heEsOrjQiG0Rm4GK0wPIbb+q9FCoIUks4oqa1aton/OImAqCNf+hfZiMs2zRiLw\npqoPhcCRAWl+Ic1FnzCfOfgUgtJD5PInsNsBAYfNvR2Ag676Jdd96i3UK4NkdjToVEGWTdsGaJ8i\nt8e6A8l+e+4NwPx5cykUi+y/f88UVKk14a47Eez2DLf91mwY1113MzguK0fv4aTD87z3sD4Ajjgw\nz40PVBhaNQJ2b3G1YnCojkIiGLd9kSBlhut4bYcm/NWduE6BU87/xMva6LgmmeGlZa6ivRGa7neU\nROO0IQwhXFLHwdURWWUt9bV3AJCMr6AoY7rKDrsGrbHk8lxT4QcBvl20PTcklyvguw5ly/3f5cHv\nttKX//lw8A7bYTtsh+2wv7n9zT1tnSnjTcip3UxrbZLrTbYxAI6bw9nnKEReEtVs3qVboJTPcdPQ\nBP/66zFqwyZhPj93KTITnFlPKNoStaYSaFyb3WTwuexPJtdUpUk7GOE6m2dcCtfsdMNPjPP04xFH\nHP0aZDuAI+2BQKG08dLSOKMQ5pioVKjUDEYaSsmV3/0uvR/9MKVyN3vuYYpZfOFx443XMTo2hMRg\nzd/65qVkOrM7vGlDlmVoZXKxWx5NS5OyZRMT600/deYR0hzpwtDg3El9I309++G4KV5mAqbPTVS4\npdKgQMDQKsNGMFZPkEEOL+ejtCKzau6NqIkvHYrFvSkUCvZ+FXwvpF7bxBNPGKw5TRM83yeNt+2c\nfOrS43BcZ7NnUloCDvnQeJKyT+J6Hr7rIm0esBLmBCbRODoFe8pJZJ50eJxCoUTgtrDNHMMjEc2J\niK5CaL/fQOgmSFC2KEoKiZSb87lMt1XLf868RR8039cKwRTGDaCzlCyN2DfYBfmCeZ7a4AoenVjN\n0KWf4fsX/QvvfN8/mXsFGk2GYPPgkmxj2FPeplYCx5WsuMcoFn314o9x/vmfgNO/ttl3p5w/wdQ0\n1mS4eI6PZz3tJCgR9ZxC0z8VkRpPd47q4UhnjFJaQ909QaFsPb6xP/CH8IvE1QytIvvsqW3a5lj5\n1oJ5W7MPv+ckjjjU1Csox6Py5FN05fdsn3hVppCuTyNxqNfNyeWuO1dS7u3HcX3Oe3uZtx1ogs2f\nuvJxSjM7KfU1Sap2LCkIdN2k7aV2IqkmMSmRypPv7ALghDOP5LST3r7FNrqeRE8nlmpBUS8FBDCn\nIWnf2/9h79zj5KjKvP+tw6FSVCqVSqfTGYZhmAxDMgwhhBDC/X6Tq1xURFTEGyqiq+4ui7zIgvq6\nLuu6rOu6kcUFXS/gDbkoBIUQYghJCCEkIZdhGDpDp9PpdJpKUSmK4sz7xzndM7lBRBHlzfP5TGam\nptN9rs95zvP8nt/jZDFZbSWlFXOIVs+jIPTe7mp1GOO1YIthEHdgDyE40XWaGci20StSSDyTJZ57\njQpbu5rG3g+8iF4rrwwODs54o4RRNsIUkx1i4NsqKaBxfc1c5o10+eXamMvG6qKO7V2tZKniwGKN\n+PGf4fQ9AsD/3PcoKxYs4rT2Ag8H+up805w+bAX7eQ6tvh4c3/MQh9mEUUzNFNB8vp6yYrMDI3T7\nDj1KZ9Z94F0nk4QxYaToX6vvYLESuLkAkWakdb2oE+mibB/Hd3FCU/NSSX58332snvk/5HMtnHPW\nyaafgquv/zvjIzSHk+0CNsihTSFth8ZMD22UrS9FW8zVPoojpI1ONzflrdrb9iOuhwTSRtV1kdQl\nc39PR8dh1Mv9FIta4ZdjRdsUh0RmREmCY7DSnu9RH1hPW9u+VE2dRNd1sG2J77tkBjcvBXijXNJd\nTLO+/8678XIeLSZBpLunGyl9Aj+P01BoCRgMg0EaoQN9mWZrswHbH0KftBcCPDtDmdfOX1Hh7vk1\njp6Ro61FK23biSDV2FrbKGqh0u3iGcPlwXt/xDUzPmj66dJIW8/MGCf1kPu+/13uvuFKJu+tXTMP\nP/0EiAJZcYDrPnY5M394GwDv+vD7SIQOGDbCKWmmSFWKI3WdSf05EhtFEg5wz23/CsDXJyRcd/VF\nzXYND95rjS9AmT4hEMJFOB5h6xkAVM/+NvLUbrqXHU7u5z8AoON3dbrFNKZPPpi92xSF+n0A3PDL\nr7J66XzisIpKTSDS+LIlw9JHlPqDr+iT9m9lyhRTaSpMiUbuhd/R0cwNQGbEacqrr6T85Edm3vFQ\nvkMtirn8kkP5l69od9fdCz/AgOgk39JFNdX7WGQxKa2k0hmmtDMcleJkgtYWPUdnnng6xx11GGG5\nul0bXVdo9Mg2cYPhfVVKgZQIaaPqOvOytHgWlaWzCYiYmpNIU8vVcV1eVQ0cUZPDk0GUqXpkiman\noLKMLE50fVTQwZ2dyK5a2go4cXBwcNOwZw3CqH+2LOtqNGHU63KP2EqRofBsh8QorjBJmpHvhlNb\nkPJiJlhdz/BCPWyt9QppAi31ldTVC8RKJ1kkfYt56rG7SVTCJ2/4NwAeygVMbnE5emIrm1y9OwPf\nw3Ec0iwjCbXSXbSknyOd2c32zZiugfctEw5g41M1ep99nvVVo6CTBKQwvBxGaaLoGxjA89xmFL8W\nRpTDOmEckiQpthlmJTT8R4ohuGOmQJqjPN0qw2zoQIPt8xpGGn9YHCty7ggCz8E2mWQqzihV1pHZ\ne9FuUtartU30zVuIk8UEhuo2H4ymo7uHpauXamvP+POFAMe1qVbX47g6EJoLAsqlIo69Z7OGXhRl\nhC9G+KN2nlk4XC569yWce/5ZfPhjmvehkA8o5G2Seh1lrA7fBAilbW+9WaSN7/r0LV1GaUk/AK3T\nT2agdxmf+dLfU9ykx2nNhKmc+amjuXXqe7HN2CVRSJZGKKVIYxOgSxNUmrCzcE9x5XLS+mMAOLnJ\nZBkIEaHMhuxduYxZP/wOreVe2s1crF65gqTYT9eGjP0PFNwz858AcAvfp2W/Tj7w8Y/i5vSBk5mI\nVZylhDWtuGqlMuX+1Tw2506+P/Nr+nWrH+fpBQ9z8SRtTGzlS1YAGY1ceiEcBIrY7aLW8VH9nu4q\nZqQZf9ddpOcwfcMaWPUE5d/cTuWxC5EHuKzz/weAS+bOI6vVdXZewzJUWklk2/rk/0Cftu/5KKXX\noi1SZPYKWbJPcx+lKtOUCyrFNvvB9zzCtIySIYEbQ6aV5ChXQJLh2D620GMnRUKaSQRZozogWapj\nGMoR1DKt3EtRlc3Zhh220XUdo7RNF40jXyo1tAFtG1WvUJk/h8qyOfpZVKZbJaj1A5T7ekkc3f4p\nZ5/OHsLAA5u0kBqI8U7bxjFZ0XGa4XouruNAtUHNsfObzK4qbYvt/d9viDDKcx2ETNlvjM+GRHeu\n9kKJBjqv+bo9NWzKURnHtelNcdGUDp7t66dYqhJnKU8P6Gv6DV/+MgceEeH7Du+6UJ/mR0/poafF\npSPvI02UNx94SFsSRTH95qS9e94iDTHSgIYmKqSzrZOkvhEUbDBZhZ5jU6vXyeIYxx4KTEipqNer\nyAaOVwgiY0EopcgM3lcJAVnGcMyCModYEwq5zd+anOHbjGMmGtW6JbVwMz3tBVpatStjwYKlpK4g\n9TwS43bwcu2s+fl9yCTmkEYqd1cLqQNePk8cRjR3KhmFlrGUSi/gGKRHnISk8YuI1G4Wt42jmCSN\ncJ1dU9q3/PwOHu9q5TumT+WBPkgybFFv1tCzbYc0iUlUjDCHWKxgabGfZSv7qJZqtJg+TZY5zj7j\nNFYseQ/ptXop3nnG1Xw46qbWu4C4XcOmqqtXEMUVsiwkNa6cNIpQWczk9396h23tXTOfh679XwDO\ne88XETKHK2TzIJg3ezbT95jEsdO6oWIyWZV2m9QqRfY/YH/CokY7/Pj62YwaJbnhnBMIjLspN34M\nZIr+Z9ZQK+mg4/y5G1iX+nz9ox8mfVRj/qXISOSQ+2n47VSvF0kj5ohSJG4btdbLyNDp3WdV1nHa\nrx/C3xiSCV30unMfh3baCKjw6fN7mDCg2z8ji3EFREoMc7jor+1w2n9gdqSUewzxXKc6sJvGEVna\n4L5+FcEeZFlGaG5NaRIhaik4ENqtlCKt5DZVQ+ycotA2msygXGTq4GYCJVMNbwUKvod0R9Nf2YBn\nFGmlXKe8vowufbu17OXKrVJEVcP9ZEswgeGNK+fRu2g2YmA1BcMF7uQ9wqdWU1+ykPYMQk+PnqUu\nMu+hkEZ9ZsAeQnEOQ24XieZK8nN57KI+mNLXyDLe1ZEfBB60LGuhZVkfNc+2IowCdokwarfslt2y\nW3bLG5ddtbSPGRwcXGdZ1jhglmVZq9ieIGqnhFHDuUe25BIO2KuVNev7qEYNt4DxEwlBI31RvpqS\nZgkfO7CDa8/TFlNbmGA7kL4cUX2oyN4G4/mBj13BZz5/Iy15nyhsXJcw/gia0Jo4jpk/ez5f+a/v\nM3OxxlRPez5lRVhoWtqz7r8LgCuvuIiOtn0I4xipi7dw0KQuNm+OqFWrROYkjKIYx7GxbUkcm+Bk\nkuoK20r7ZpuxDU3isBWpj8qypltlW9kqoWGb66g0vq8sy8jSmEq5qvlHgCR7BewRJEKBMSA3AAAg\nAElEQVRRifV4dHf2EPgFlKwNBXtdRRiWiaMaKk1wGoE5pYhqdQJvFI5oJJYokjCCzG5aDY7tgLSp\nm5vI60lNKGIJ7V0dAHS1FfBsT/fGJHiUB4rU63UEGloFEAuHT197A9d++V/Z98AWPny+hgyO9B2+\n+q3/4uCjTiA4/nO6ne3ns3Lh/fzbxO/iXKoZEksr+il0tNHW6mE3MtPSBNKdWzNRrY8nFvwMgOvO\n+xCVco2kuAxpWA/Xr1zK1TP2JsxS+p7ROP5itUw+X8BxBEJFpCaAftB4FzsL6Zv1I8KN+mqe/M37\nUQpyd/83Xa365pCtepCkGnH2BRcTeAfr8ahVeWT5mu3aJ4RoBqelubtJpx2n+1LcVp/Txmj/9eni\n34k3PkupXiE0xSomjy9wzJT9OLI7x/lnTOcjV2oWys9iozyJSlKyxq0ry7aj4ngjkLPSunVUR+j3\nfPHFiLi+Gtd+ohmHCZNlZOppKpUy9VDPSxLV6fFclgwoLrjqPg7u0IVHFvyqwJQTJnLg5FP53d2/\nBKBv+WqSksB2XESLdt8ddu65nH/mO7np+q+x4il9c3n44cdYVypy45V/t10bPVfHhhr9s83N+IXi\nSnrn6sSe/tXzCKQi8F1spS3/KE1Jk4SWjn2QA+uRDR2GyZbcZuwEyoAbhv4ipWT9uvUUe/sBsNiu\nNOTQa19/uGFwcHCd+b7Bsqy7gBnA+j+EMOrGG28EYI8yrIyLRKFAmUXnmRiUauYSgS0UZ04qcN+N\n76Gnoq9AfcV+JvgOd5SLvP/ii7jqH74CwOe/+CVyjouvYjrbtM8w8Bw8mbB65QruvEtH4r/2zZt5\naNE/03LspfzgVzrLsRq7ZMPShG+95SYA4qhEpfwcTjCWvU2g6chph1HsfY6+NCVn/MJ9/f1ICUma\nmaQQsPG2UrLNm6TJBN0qqWjYOA0PMu2oTNpwEcbn5eZsbGekCZRppdvRcQArVq4h35rHM+9ZaM1x\nwIQDqDz3NIm5UpYHVtEx5SD6w3X4ttNEHGTJK1QqdWZMP6L5eStXLMdzPHx3HPX68wBUVEyaZEh3\n50GT4eK3Bkw6dCLf/4pR+tJBCYlQiobTqFItElaq1MtVFizrAyB2XL4zegw/nnUfK6s1zvzV7brv\nwiaccQLTphzP0roen/aJil/ePZOZssQd874JwK0zb6W33s+UrlbefcYpAHzo/e9CpTvPiIxqLzD3\n4Z8C8Ntrf4Fv9/DAf/wjexscrYwTkmQDN//gVnpaNO69plLScpnuyT2kcUSl3A/AxI5O0khAlMFG\nfbDnpcIZ4zHtxKnYiV43C4KYtWFMFkW0GpKyI7snsqy1Y6ftlMLDtc36bWmhpTXPRyYtoBPtmqnh\n4HWfSXuuFcfTh8OZx53A5z/6XqasH8F/uBcx/1GdrOR4rWRJDVfIIbRSpiDLyAzCC/RalEpBvOvZ\nNVmWoUz2Y1QNIVUoQdN1KFFIW5Imei+BdjVNac+jUsXNN88mu/wcAA45dwql1hFccd3FPDFXH06L\nHh1JpXcD6+oxZaMMHxeLeeqRr1NdsZxM6oOgGFWw/WiHbfRcZSpr6c+v9vezfN4celfMRxjfey7I\naQSTykhMJHvFM0vxKxXybT71FTWkQYBoCJjuXcPlorO0M+I4arJlCpMuP+nASbz8ysvN8Vr25NId\ntnNXqFldQAwODkaWZY0ETgduAO5GE0Z9ndchjBouUaqRE44jmum/KoUEiScEymSsHTLe4/ZPn8o5\nHR5V47ue0tFKp29z/TWf58LvXshZxmcZRSG+nSJVTO8yneo9Z/Ysvj3zVn72wANN//UJJ51AyoN0\nfvZ2aumP9YAlEa49NAyLfq8RKaeedgJ+LqAebWHzZq3kXEciSLGF4AWDPlHSxg9yJJUSymTYRVli\nkAmmf+a9Gwxsu1I0YFslvS3kr4HgcL2RgLbuA7+RgCHp7jkQSYIXaIWaZhGOVEzcfwJ9FW0Z9vdV\naWnfF5mmtLTsQxrphS0UFNyxVPpL+OaWQqLw7ZGUis+xeZNe9HEYkSqJbe9aubFJ+7fT09OJirSy\nVBngONpybwTNXQfPFni5HF/5vd6Qt9x1Nw/f9yBuGpElFZYW9Xx05jpY1fccr85dxOgZhnlQ1rjr\nmNvZ68gjqAkdZpxw5Glc+IHTIa5z6Sc0DG/1yiVMmfwaWWdK0d+7BIAlixbwxY+/i5n/+Pcc/bPv\n63YuXIVzw1X0k9A+Wjd+2unHM+8XswjrCe4BPq5BuShHECU2wssTYqCaaUxge4z3fGz0eNw3oZ3l\npdWsfO5ZRKqBWP3hAAdM7GnGQYb4RSS2LXGEIDX0psG0y7ju6i9wyeiFRBv1gSf2PZCW7mm0dk8n\n36r3y9/+03cptOb4j5/8iDOuvIXfPaADarmufcnSzQxLom9a9NKW2FJzPbiODqYv/dddL1bVue8+\nkOk+TQgd6i/GBF7QhPylWYoSgo0yRIgt5lnMCBlz6VEujzyRsGbNbAD61mQUTjmfrs48103UqfH1\nr8bYScyi3oQPfuEJAI476b3InERVTyZv63X8y19+jzNO33Ebfd8mLPWzZsF8PfZLFpHUqrhkmj8E\nyNKIehhSq4bU1uvbeq3vOSbbushDlmXYTRBBCmqII1I/lLyaJixetIjSqccC0NGmD33Z8DYwLON1\nB7IrlvZ44JeWZQ2a1/9wcHBwlmVZi3gDhFG6CwlCOc36he1jPUIEycsJtkEwHNki2WNaJ7V6hDAW\nUSEocPzJxyNPnYHv+aSRiQivXMJ377+fn/70Tpas0hu9kjocc8Y7NYdGo7acgmDiFFp7Dkclmjw9\nyxIUQ5biFpPuSuaQy0/g6eWPsfdYbWmXys/ieoJN9Tom4E8u30ax2IdKYvI5g2mupWTJ0BWo4eQQ\nsONgIyClvVWQqTleOyABAnhJ6b7XailSumQp1KqbAQja9mXajKmEtSJVU3WnJfAJAiitXguG9VGk\nDsXVLxBVEwaSF8i5+lrpyJFUy+sp1p+js1NjY305mv6+Naxa08e4gl5kvutRj7dP+92Z5P2AuFan\nAdGXmea6RgpSc6XsL5cQ9RpTuyYz+ZBDAfjB5Cn4wuXwqUfTO/u3zbFoyQWoqMbBHXkKE/XYf/v/\nXkPp2x9jYcHm3Z+7BIBlwWzOuP4wHn7oAR5dpAsQz/vtfUyd+rmdtjWNRhCaYK/tOiRZDS/waMvr\nNXvoeMX1384TjM0xsvU4AC770DSyuiSq13GkRBnX1Mr+ZTzxzBrG5VvwG5weYY0g9ljbuwIRa6Ni\nwM0RRTEbogo4uhBA37PP8OiaJ+GIk0zLGjhzhRA2kd+D3/1O/fkXncYh4z14NSY/VmOipdfCvCV9\nHOu30W2qH1WjIrX+KpkS/OD27zTJyKRtI6WPkEOuF9u2sR0Hz/PwTIDPtQVyJy69ncmTy9ewb8GQ\nnAnBqwIykZEaI+3lVJNy2ShygUaZpMLh0SVV1jsx+wStCEePkzdGUIxXMvPOn6MMp8eyR3+PyktO\nabN58m69ltInVqPygkQpAjNvzPG4Z/GjHLcDqmrfFyy7ay7FOQ8BuvKNY9vUwrDhxSGsVVj6xCKi\n+BWykkGliQTbtogrIbECPH1AvJpl2slhqyapmkBhCUWhECCNu6pR0lu7LRuFvHc+lq+rtAcHB58D\npu7g+RsijNotu2W37Jbd8sblz54R6dsu2HDuYZM5fZqmJjzubwP6yxXWnl3BS/TV906nj3+pp4Rh\nQqsJ1OT9PCKDQiGgr28ZX/rS/wXgG9/4Fm0rXuDMc12iRAcm0iwzEDbVvL7bXh534nEI77dIU+5L\n2E4z0QWGaFsH+vtxcgHjxgYYA4OwHtKSH0+9XkOZYEmsIlQSoRRNXgvtkxOIbflV2DFng1IKW8rm\nZ++IyGjbDLQxrY1klAzPdUjFntTq2tJe0/cswhXkA0lxQAexpk2eyJnnTOeO766h+qJ+r02x4qUt\nZUaPsCmtq5CMMj42VaOyoaqvayYY6Lou8eYYgWgmqHguRMlWl7/XlHKpxIrlK0iui5u//+ttt2H7\nLqEpdVYtr+XD519AojK6uoyvWEhK1RoHtSg82ycw8MDO1lbq1TJR/zL6a7cC0Pv7exFZTG//Up5Y\nrtu+buAZ/FtuI0sSyuZ2tuDpxaQ/3jlhlMJjQ0XPx6Z6iQtWzsEJfAKTsPPhC0/h/BlXsPwHj3L0\nO/QV/fhDJ2H/eCbfuelmSs8/gzNOW8vpxrX0LV3ESpwmt0TgKAp5j/7KWmolQ9O7BSpRRFIdIEHv\njWBMjsqqBc12NbJzk2AiIjcN3x5DLtPurid+/Ck+/r6Y9MpruOyB3wFw+QcupV1K0rDEknmzAGhp\nydPe0c7H33sskCKNGek4tnl/1VyDcRyhVEaaVFi9WudFlKNas3LTrsonP3stn2nVFui+/7k/Lff7\ndMxvISjq8XRdF9/xyDJBmjRuph79tSrL/BSnWmYi2t3kxhUuuzCjVpzDzx/V63vp7CKqPcD3XDpn\n6M+pJGXqAzVeTNZRjnRQOGl3mVPf8bwX7JQgreOaNZK5LrGCeq2Mb6ziJIqRKDrbx5Ife5CZS0he\nLBCGIe60A3HHmsByexsSRTaMJ0xI7XoKXK8Z+JdxjEiG3ESwvTt0uPzZlfb1l55FR14x8PE2fm4W\nxjuchNix+WTB5d2mOG1YlZr4VyjyBvfoSkV55SLKiwb44r9/j3e8V1ejuOGmmWQZZMgmflmqGIWu\nO+cZX6/nuTgHzwAn0GBRICPcyvUQxtqd0PvMQnoOOZQZ0w8mMJmCvatXsnL5KrI0Jm8CG560OXj6\nVJ5cWaTP+N6HlPbWrg2xTRCyIUop0ixrTpQQQieYDJu4bd0jnRN1n/oWlXVAKE1oZHJvjiLC0lpy\n9niN+AAqlTKixSXIeYi1elEmYYJ0Egr75Vn+RIn2vbWPt7a+xktRwkjfo1TRNJf5wjgSBJmQKJ1v\ngyccknIZ4l1bRvWXItaWS9xhuMjnz1/MD+74MW4+oFrXz4T6Ld/6co04TWgxCm7/7m5uve1OHlM2\niVJ0GazzQZ3782xSY+DpR3mirJVUWKrjkJC3XTzjdvjtnd+nd8WTJGSUDaXuxjDkUbUNLGKYnPXu\nc+h7Xv//2lWf4o7/takuX4Zb026KdWue5vYf/JLv3vhlvnKpjqMUln+TSqXEZwJ4bO4c7KNPBCBO\nFKGSlAdK1CoN146LtANK1YQ1/VoZVqKY/kodL4EHHtF+1WMmtuMOC/Q2Eqj8fCvYMXa0pIneWawE\nws4xZfpUrrxUZ1F+6LyTtRsljhkoasU1dWIXQU4QFpcQhjUSQ1+Qpqk2NKBZj7FSKeO6DtVqlZWr\nnzGtUFQHSsBdrzflTVm6eDFpTc/b/AWLQGqffHu7XnPjCwU69ptAoa3QRFP5juLUaR1c8+Uelsxb\nSt9KvZaTLQ4fact4JqjQe8a+euw3JtTChBJlGhDnF7KYRCkc9kQZZFUsMoTv7rCNecfGUylxqV+P\nh+sifR8pGKrW5DhMnNSNJMNv+PmiGHuEiyM9is+twXtRb5DCIUMxk6H9q92laZohG8ZZEiHN/nca\nLpPXiHv92ZX2Tz91Mt/xFXMWr+AbN2pL+d2TcwjXIxKKB++5A4AzzjwbSUL/gllU+vRiW7J4Gb95\n8EHmrVzNzJ9ewdMDGoiubJ80SkgkhMaPmFRfIOcKpEqpV/VBkAYHkus8CJV2kRilrUi3GqAs0u9J\nKhFqC46Tccg07R3K4il85oErOXzadHI6/kd1U8T6OCXJVLN6d4PDGNgu8NhI3x96pj3e2TClrd9D\nR/B35OcG8B2dd18ICsgogyxtojha8/viei6+LenuOACAfJBnY73OvdUajuFXcWIYN85jYs9+9K/p\nRRmuTTFCEKWZThtvltx6iVdGusRSkMnG2AlsR5AlO7dYh8tLQlGO6qws6iDZL371K86+/zdMnHJw\nU2mvXvEUs+Y+QsE9l6lmQ9/4vZvpmbqANEyw3aHM0zR8kaRaYqRIyBkUQlrpQ2YJXSLPOJOoUIxC\namlKNYuomfXhejvmVG7IAVO7sM81JOsTBS8Un2PFwGJKC54F4OX1FaZ3T+dfvvRV5p1xCwC9T3wf\n2dPBYd2d3D/rFw3acBIE5VoN4UgiY3IVayG1pSuxhaQU6vXt5H1EzqVvoNQsCRfWKhze09JsVyMA\nrUrzdUYlCmkMFdcNUHbK6L00xzxAV3sbre0TWbBgESVl1naSktQbNVUlLnoslK0hqZVKldBUPs+5\nAXEUUS/XSRtFXSXkgwLFXUN6AjB1YicXX6D54vvXPk89Efzq/jk89JBGdnmeh5fzyHfkaW3RUN6j\np7ezNyHRvBqP/vhjnHS2zhJd9GREx6gY1fEiawzXeFXosUmigEKk4zB7K0mUJsi0QM7QJDjJAGm4\n44Zn/Usp9y0j2WLgvEmIs7ci3FxrGj9kml4hICWLN5nhkAxkilpYR/UWmXGkGU8FSihGMEwJC8Gr\nacqEWn2IpkHFpOZQaVAuvxaL4q5yj4wG/huYjDb0Pwys5g1wj9SUQ2+lzl2LS7Tc8XMAvpDPmNiS\no91NGDdKK6Pj3/Nh5i9eya/nrOaW2/XV9775i7j23sXg+Hz18ov41oXapR5ITUe6YtkyHl+mrZaB\nco17fnoHd8+8qRmI9A47i8y+mbTaCw2L3PW2UqpnHKODMh2dU3Dz+1JPI55ZqYuAHjf+dI656Th6\nujtIDMZ34R0/Z2V/iSijGWEW21TD2T6w2KiIgf6ZrS1z0Erbtu0mLGhbqQ9oV0gSZ+RsR5/e5m+e\n59HW1goipdMUzPVsh2qpSKYynL3MAnZ16u9AMUZKn57J+rrnOB4L19yK6wXYxtqNRML4thbGF/JE\nxoLd8EIVP/CpV3eNPKi/UmbukoU8tOhxAFaVF/GfTy3ByY/htLM0r8StK5/WwS8Jgav7/okPn8mh\nZ17B/Eef5YneEsW+fgD8epU0qpPYKSY5ja62gCiK8NOMwFzh1wwso5RzKIZxM+A5rqXAj14LxSOS\nJp5diZQxLSmrDp9M7kl9w3n0pz+nXOxDKJdP/bNGIv36ipsI4xDvzPeTa8k1CyH0F/tJskTDcoZR\nkTqxrgKU69Yugo989v30dB3I9V+4mSXzdMr5gl5FuR7SCJk21nKYqibaoAFZzbIaMlE8s8rh9vs0\nseeXr72CtoEK9//6/ib/RpYp+vr6sG1wHL+ZXxBFIba06e8v0meK8Mbxy5TXV8iyjOcHNEFbLQyx\nVcLYHded3aF0tLfR3qmVsXBSwuhlLjn/LL7ap11DYZKi6iH2gE2tovm033XqFLr3a2X+ExFJPeYb\n1+s1cvmVd7HkGViaC4kGNEma+m2KapF4PjjmFqxEK34WoKQi8gycNQKnvmNL+3+/+CEee2wlmdTt\njCohanNEf++zJH3GGIwTSFI6fQfHpNBj2yjXI85S8kGAN1K7Z1Qha1Yn2tZy7prYNYSeUwLX97ZC\nj6SvQci1qyHgm4FfDw4OHggcAqxkiHtkEvAQmntkt+yW3bJbdsubKLuC0/aB4wYHBz8EMDg4mAEv\nWpb1hrhH/m1RH5vrIR9ZWuK3n9TPeqslfrdkGYe3tnDseR8C4F3v+yBerpWJ047m5l9qCHg9Spjc\n1U5H7mUW/uJf+CdTa7AQBLhSUg5DHjC0jov763z1+uupXXc9i0wlDGUrqr1PkArI5XWQSEl7q1Nw\n+lRdBswtTGDOomdY/cIAGwwMcObMXrq7p7FixRIefNhkZy1ZAcJUrsga5E47z3CU0kAAzUmqDW9h\nmP+GMz6IrY/Ubd6y0muuU55HLhhLqjIGwo0AOK5jMiUjekv6StzR0YYUgq6u/YlMJmokYd2mhAH7\nRXLj8vQP6NTPVCnsnIcc6xIOK0/iZAkqSZr18PbeO8ezq+rbN24nolKF5/q4eW2tlpKITSqjr3ct\n0lSJmdJxIEGakrND5vdrcv60JaOn5xfM/M4AtUTimyrr9bCMIiVLBL0l7at2Cq1MntZDb+8AK2qG\najbKkDKllmTkPL1m2vIFxNWvcQXNEiIDNXV9ycZKmYHBDDvQV9/LP/YBLj9lCsuWF5l8gbZXcp1T\nmPmN67j+C9eSby0QhqZMXj4gIWHilIlIc/21HRfPtjniiEM58j3a/dZ9aBujhMO3vlric8u+CkCc\n2ZTCYbwfw9jidDEKzT8y9AKF9ODar90GgB90U64PMHnaNA4w7qY9RjiMcAtIKXhVSizjLpO4qAza\nOnK4hrQ/iY+j+4SYNE252KzvJMmatR13VU444ZhmoL27u4darUZbq0tHu7b+l/X101IocOmF5+OM\n1TUes43LmTHjAL4983fM+lWN975b86l0Bzl+d69H3LMv1x+ty/F9b/b3eGTFw5z7wYtpD3QQd9lD\nS1kbPY/dPZrxjv4cEXrMCE5gR/KrO+6gXu9EhBMAiOIMkSW0xuPIcvqzVZbhCIWtMjzjYpNS0rOX\nRzcZrmvjm8S7pogG1Bkkgj1tyfm+05zLupvDzec1jLKZM/LHEUZNAKqWZf0P2speBPwN23CPWJa1\nS9wjz/X2MZCASGM8YdjipM+hXYojz7iMdSZ5obWjFSklbS2CKRPPA8ATIJvcxqKJ1iANiZIMKWzy\nJhtpSrvDVZ/4BLffcjtXXHctAHMXzSHNaiRugLS1e8Fh7614le2cnrBqKslsibOHx1hD7VqqPE8l\n3sCDDz9Eb69WhjorStcgGXKDyK3pZhsidMaVI0WzCKzKMjKDNImHMZ5JARJnWLHYrRVMe4v224Vq\nM9XaRlxvJIWCPoikrUsf5Tyfnja9UYOcT29vH7YtyOf1uB/Z1sovH36EID+GOFxHzUTNU4VmMxSK\nEWaFvCIypEr1mjJdkraNH7hN/+vriSCFOMI1STz5JGNCEFDZVGZVn3YHqJxPdVOJygVF7l2t0Q6y\nnvFZu06cVBhYXUYYrudW38ORUKrUCY0DubWQ44RjphEeNJkf/FAHypzWNqZN76H/7l9TyOtl6iT7\nQHreTtu6vloaCo7asLpSA+WQVvUcffbvruOqK6ez7Loa2SU6OLmFLVz739/GOyBHtKSPalm7LWoq\noefYLm656xuIwOCs7RxRKeGQ2Z3UvqnX0iZVxs7lOeuC0/n61G/r+S25yGA4IdfQelBgWDEbxgJk\nqcKWAbh63vuqglqxH9KUPlOHM62H1Go1BAJpgt4AcZKg0pQojpp0sZ5r4zgSW0p80444kSAcXiMJ\nejuZNn0KC0zSShhKCrk8ZIqJnXo++geKZKliSxTyjjM07n3lohdYuuwxhCd4evXLfM1E2ie2exST\nk4hENy1V/cxZ/xJhb8b6govMaUDA2t5HKD67mnMLx+MZ5+HC+x6F1IYjPrtdG7vaJrG0fzNSNnjc\nbaJEkSUK48kg73s4NqRJSr0Ry1EJzqYU25GkMsIeo19s+4FW2NvUmFRKUa5WiYyrLo4ToizbKv71\nWlzvu6K0JTANuHJwcHCRZVnfRFvUu8w9MlwObS8Qpymx6GB5QdcqnBs+j9fezTX/5+O8r1WfiHGS\nkamUcgae4XtoNbEjRwmTodVw7kvI0q2KBqCgo62Vc08/mdNm6CSNBQtuJslSbGkP1YlUbEV83lfW\nVvWcJ5aT7iHYvDGhbjJpnqvUEa5DGA9xMTuOQ5ZqqtUhjpGhdF+tuM1rbakrxOMgXOP3SjNsIcjS\ntFmJXimBUFJTbTaIH8TWwb6GrztMI3LCJk1fJo1fMc9i2vIF2tta8c3kqzhhRZLi+z6BuaGU4pA4\nqyG8LbTlxxFVdUiiOlBm333yuIGH7evGR2nEunVFjjj4UMpVbZFH9TreqNcO6A0XJycIOnwGDPwK\nO8HZ12EDKUvTft1NEbEoHaA728hvBr4HwOLfPcGyaiunXTSD+IezWGyoWRMEXS3aiu0w/tLjpk0l\nLxTXfP5KbnZ0tPhr37yZcf6R5IOAKd06wcQLJPZroEcc16FofPVZqjMQ3dGSnNCK6+w7zuHUriOQ\nA8vol9oAmDLxQPIHnEK8sU7fwhIlwyORZIpisUzpxQq2NtiIogFE6vJ0MSHx9C2hbsNzlRqho8gb\nmGtxWYWtGXsbW1ZD87a9jimhNJ30GL2P8mPadTmseCOeO9r0ZzNUB5COpyF+JjibpTEqzQhL/Qj0\nwSpdB9f38YIWGupChnW9/vpW7nT8tpWjjjqcRx7RfvZ7772H/uw5bFvSWjAc7o5koFTlZ/f+lrHj\n9LPujjxxnHLIgQWiJKWl5UgALrjoBcrxRh5+dgV95rD/28tTrmmfwblX38ITBqBw863vYb/RR9B/\nm6KtU3O4/+6RV5k3p3eHbbzztp+ivKW0TtFZq14wiiTTFL4NAk/lSJTtoIRq0vRlWapvEZnZyw06\nZfO11/A5EoJXVcZ+pRKRMTSyLKIahkwUonkTey0WxV1R2gPA2sHBwUXm95+jlfYfxD3SIIyaPFgi\n19NDv5PR0t4BwFkTTuK0j3+Q9593fJOIybbNhULpwAmAUjaOLbGRBkEwDFnR5LdpDITCd2za8gHv\nOEaniz5ww2Tm9w6gnAAp9jJjE2yV3TV/oca8PrZkha75mMlmZZZ8oYCTZKSxarpCpCtRmUGFmPcQ\nDHE3ZNmQ0hboQFSapkNk/FLiIVH20IGjXRsZWRQOkc/YW1vaSaZTfR3bxnNGEcWbh9weaYIrJE5n\nB6kJMtmOje8HCBlTaNFKe8HqEsec1IO0NWYYpZX+pqer7L1vO75jNwmSAt9my+gIz/cIMv3/X6yX\nSF9OmzCl15NJh3fT64Z865nfADA7hfTk/dkz3cLvEw0tHOUKHoj76Zx1J7ct1BCzB7+/lE6nzvWf\nuo7pk4/ioouv0mMgbTLgyo+9n5OP1Vfegyf3QK3COw6exJef1hhe76abePD+WaAUhx2kg62Wq5rp\nxjuSJAwRhgJ2hOPwYpaQxinSZPF2du9PqTbA6R84lrRNK/JR0ua51jLupHbar1Fin+wAABQSSURB\nVJrMotn6cDp68lQWF5eweNFqekITDCShvT2vU9Hz+lmqUnwvjxv4HHmGVlALZt0L2fBtOvzn7bNr\nlYFlCtm4untkwke5IAN9sEkhQI7DCQqkCShzw/IFSJWQRSlhrR+AOFI4tk0hf1ATTouzARFHwG93\nOn7bSj6X48wzdGGGlc+s4dHZs6nXKpzSYnjCpcfP7nqI3mKZ7/yProV6ybtOZsqB+5OXORYtfpoz\nztbZzv90k8Ocey7lxzcJ7r9fj/EHz1dkH8oxyzmJgwz/0PLbfY5um8uK52ucbrDbP1n4ESrrty+A\nAPDwY49QLFZpMUR0Ew+ayX6dHeRb2psFv+N4NI5r4zjgGT5sx7HxPRt7hC4S4pqbeaY0imwkw6xt\nBSNsh9N7JuMbwy2KEkgVK5auYOF8jcl/9dVXdzqWr+uMNC6QtZZlTTSPTgGWM8Q9Aq/DPfKP//iP\nWJaFZVm0HDjj9T5yt+yW3bJb/r+TiT0TOerUYznq1GOZcdJRO33druK0PwP80LKsPYE+4HJgD94A\n90h3a444cihXE8Ze/HEAjv4/bXzo1B48JLLBbyDAtcGxh8hTHKF0XT3RwDEbyEymAW8qgbhR004K\nbGwKOZ9L33chAN+5/jq+8PWZLCvWm5llmtB/6OxqELVLIUnjCMhwTHDAVgpHgYcgM0RKUdIIBg1Z\n+7p9oukuabRTkGGTakazVF+JbSnIObpenDCWTBLHJkEnphGQsO2tz1dpLP3AH00uF1BavQ5hcNqB\nFGT1jSRJiO02ChbU2S/I81xUo2KqeKSTW3BDjyyBMKqiDP560sFduihCpohrJhgXCPYQKa7rEvaZ\njimTaersWnbceWedy739j1Ky9f/32vdh8n5dyDClakiobAQirjL7wccIl+q5aN88kbGZi12rc0Rn\nN+dM1MHiBcuW0tPWxcUnHsXENm0JddsKp5CD0nP0jNZj9skvf4KrvvmflMKI1py2itcsrBLKnbtH\nFDDWBEw9IUnK/QgyHMMrkW8rsH9HwPhTulHotm+JM2puQCkMmXTkwbitmsPissveSfSDkDjKmvwf\nyo4Z6xXYkioyU3bKsXUsZHSQ454LNab5zpvn0ZIfChdtRyQksmHcNjaOiYUoYYKLwkEpqZ+ZYhVS\nOPg5G3/vNupxBJsM50xaZeXiR+hdOZ84MYRoCgYGHCq1CjNO1Cx7Qcv+ONGuYfMb4rs5HMNGePwx\nx3C1gDCKmsVCqtWN3Hrr9/n2rT9lwVLtdrnvoSW4jiTY6wXKa2Pue1RbyN8o+dy5MKLUC8okdi1Y\nVqP+rwnlkqJU03vrRPcybvznY1G2zf33/g0Ar+z1AfY/pgD8x3ZtzKREJVWKS00hgmqR1U8HtHft\nz9nnmria/SK2dKlWYmIzFbbtEMcjCV50CUVIfqwe58B3EZkiiwXSGXJ7CBSebeOZ/ZomKShFaWCg\nid1ulmHbgewqNetTwOE7+NMfzD2ikpB6GJJzJe95h3ZbTO8MyMkUaYtmqaFGmR+hhlJ3hVKoFDJT\nTzFJdfPTNCVOFZUoo2xqN9bCjFQ51JKU1FxjDj3yeD7ROYnVA481P0eoDKGGNsIWkwkmUs3WlaZJ\nU5mqOMQx1ViVYXBLVEiUJFux5qYNKssmJNdgRNEVQfJORt7Vz9ryPuPyAdK2m24GnVST6bJf5sBw\nR0numltsfkYzv0op4iTBC3waJUw8BDlvJLYtUUZpl4sltqQZAyph7hLtduicfiBRPSWsbEA4qeY1\nR1/3RCZJkoTIpJcrKXgxrFKrrcMxfrdkS8rIUQGZ2rWAVNZb4+TCQRTMeAZJgL8GRiUOvtQp6460\nSSoDJHmXzDO1KLs7CTzJ7WGRLuCCaTptvDzQz99dfinFo6YhN+t5yzkg9nTJebIZN7j+769i+sNz\nKM2eT4eJmTxeLiOGIWO2lURCasbTcWw83yWsVBid1/P+0kETCfYG2/VRhuRsTJAjyhKeixOOPGEq\nD3+mG4D2KQEXX3Yu+UKOvU1geGO9xEtRnSRLEFI/S1NFrVJkfZCj13BCT/9kN8edMESR29wLjX8b\n1YEBWwZI6dLWti8Hd+h+eo5HsqFAuRZhjzCHAwJ/jM8o38d1PYSpjNO/cCH14jKkpEkOpRTY0iUJ\nq6i6Xn9XfexyhONz223bB/N2JsJxcGgcJDoQ77g+vq8/Owgmcc3VEaufW89v5swB4FvfvIlnlpXI\nBTZ7twYcfcrZAMx/9CmeL32WqgqR3Xot9cURK1ekhIlCOvqQu+SkBzhg2pGUywOMfk4XRj7pstlc\nccX7+Nwh27cxd8iZVCNFZYXOcM1USlveJSqvQVV1rsbxp0ziiBPP4Yav3cI9P/8JAG0Tuhjf+iRt\nLS1aEb+gCa+yMR5CpKwVNp4hY7NdiYrKnFbsp1w2yYHKJkljqpUKkQEj/LE+7T+pfOm8ids9+9Gf\n8fNv/A/QxIUN0adyw0o+oE0vrL2cjBcTUJmHa5T+KMfFkzZp5vK8ORED1yEJBFGUkTTqD2ZacSul\nUS628Ud7Tkp7IeCQrnbax+pNEXjabyyEaHKkOI6rI8lSNTOkbFuiPVJavLxOKhBCkWQZfjCOsFHO\nSSn8QoEMw12MPtgyERMnEW2tGiGjEocwTQhJ6cyNpcNwepSLFYpxSBSFbDb99As5Dujsol6vN33v\nURQSx1lTib+edFVTRKwYZ9AKeaHwsOlB0mJ4KbyST1wWBF4nXkvjNtSCtEE5DkLk6BhpuE9sycFd\nHXy6ow17f1OubD+XVGj+4kZ9y3eckGe/M9/HQ/MXc9KxugboV1oKiHTnqBc757Ex1QeW7wjGt7ex\nCcWWRuEEV7KxVEJkHqMNwsuN60iVcVBXO0ne5pdXnKvfy4NTO49m7oolrO3T2aD4HnEYkqR18r5R\n2klKa66AUBmFcfpNjzijh2PPmtxsV0dnB6ADo1mWgbCRxn9t2x6u53P1Zz/EDV/+hh4jL8fdd9/N\n40/10jJOK/I9BxXtYzzcXItWTK5GW3zz+q/y9VqFSr1M2rS0U5A+Qa7Axedqn/QPv3k9QUfX6873\ncImTodiO62o6XiEE0twyXMcnHwgKRx3OZ03G4u8efITv3TyTl7IaufEuDz+iYbZ33bmUSEhCEVOP\ndXKOnbm4DQZRsxzffcnloIRGTBmK4qOPnk5Ly/D9PySTz/sk+e4nWPIbjWDz03Wce+4JdBZyBMbI\nOqy7C1ulPDZ/Nnd+T3Py+25A0NZFV0crkw88gIO6HwagbVUXbt7DdRxSqfWMQhGW17Dg0ElMmaKh\nnqeeeCJLFi3F9zwGVmtFLpy3mHtk9uzZf46P+ZNIzwF6A/RMcsmyFImPLYcCbRolktFm8OCur/Gu\nYT0lMvmgaZbxTP8GDmgbg5Q23kg9zDkvpiXvk8/ljRIGW0p9w5AOUprceKS+8kr9M9AMfDakGusP\ny3ujKLS2UouSJrlTmsVUwxjX95qBSCEEowo+a5KYrFG9O3Wo1iM8fxQqUwSB3kCV9GVsW2HbgjRs\nwJIyCjmfarmGyhpB1oQ4TrHlEC51cHAQy9px1Y3jPJ80TZloLFhXOri2jWfbkOirr22n+AVHw9ca\nGaa2AimwR7QihU32ZYNvjSLSNKG1fV8wty7Hz4GElCMwlDXYnmR8+z4U8jkKEzRXhX3qsZSznWON\npQth3fC2VGMC30UGDpZsVP2xGZ3voJ7EjHjpafYcdwzCEdipwJUuwlWcc8nJ+s3SFBLB/McriEj/\n/33b8qyp1ogj1eSgsNOEvWwHUGwp6EOo0D6K7oPbdTob8MkrTYUelWrXgvSQhis6TRJyuYB3v/sC\nPv/5L+r1kW8hF8DMrwpyN2ql/V2pmNyRx3YLVOs1PLSieOCHM7nmC5/jh/fcw0BFP8sQ+K7PSYcd\nzre/oQuEnP++91NN5WvO9bZie14TJJACnq3JqRrQQo04ENi2RJhC3B+4+GJ6lxZ5fNWDlCsl0kwH\n6c56l8+YfMBLe2RUNxmjpF7HtfPEsaJa0YpcAB2dnZx55tlM6dG3nv072pkzdh6XfGT7XMC9pM2+\nXYcSXKpvFGH/QmoCzjnhOE6ZoY3N/JhRzF+2mIlTTsI1ilWlMb7vMm/uHObNuovWFn0It/VMptCx\nD5P2+z5TevTBGwQ+teIybNtuUrL+9t5fMOvXd9PW3sbK3jVm3BU7U8+7mhH5R8lfk9L+U8mqtX8A\nMcNu+auWbMO8t7oJu+XPJBvXb3qrm/Dnd4/8pYvjyOZ31w1wbZ/G2aYMXC+Kkqb13dqWJ1ORKfXV\ngPGlrCqv513ndukrYAMwD6bi2JCFIYRgT3sktrQZJV3zzGGcFAgphgqwqW3PV+OTdySpAiWcJmWp\nIqIeVqFUAeOTbsv7rKuG1JWgbvz+Lg4qe0XXA3RlE/udZAndPQdQCG3C+U8BGqucJAlRVGtWX8+P\nDxghfZ57dudlu4aLa9vNazEAtkQ5Dpnr4pmkqGBsAdfzsF2HrEHqZO+B67j4x/vYts1J52uuj79Z\n3ke/l6O9q5u4bqCNXkBGSj0RqGGWdJaltLS24Ius+XszOLQjUUOZn3GSEEYpoS2GsbBlCCGJ4piv\nbX6Ja0sV0jRjXCFPHMc4ts0o4/LJsgySjCNPmcrobt0n14UxhYCXA59a1bBDJgmvyAJSZEjjkpt6\n2ETGHu03eWUWLdIuAvWqIo4ikA7uaO0qk45LotoIk4TEWLVhWMd2HKZMnYJt1lexbzWJlLi+i0gT\nUlOTe8aRU3n4jm8x96G5PPqknve9x+/DtPY2nqr0cvLpOhDpthRw413jm2nI/XMXvP6LdiLjJuzA\nAf0HyL/9178P+82ijx2vVylt7VbL6fiK7Z3CC5W1fOueVdzUqefyrIPaWLtpCx+4aguZ2cNBPiBX\naCWolKj0limVtKWufI9iucjcXxQ5/mR965p04CS6Wjw2VNZx+OxJAPQufpxnX7iO/SZ0NUnbgvxY\nNu3kfNittLeRhl/Z81yCwMWxXSyjtF/NMtI0RWUZedfwANuCJE0RMmtmoEopuMneg3/x7KEcCGBQ\ngSX04pBN8LbElrZ5ZkispG0Io0QzmUIgGW67e66uRJwJRZQkpJnTzKJSwkZ6Pq7rNPuTpRn9/XWk\na5MzmW1CgqdApNr3HhrmQ+E55HI+q/ueb1bj8fOjSdMNOI7NUKVXXWljV69rrV1dIGQzyBWMzuH4\nPtIb2VSGwfEBjm0jbJtEHmQGVGDbdpNQp8UE8w4+8CDqcUaYDWXHZmlMSkqinCa5krAlGVAoFPAa\nLHlKkb1Gck2axKhmcduUWj0CqZCiweQoSdKUcq1OsV5m7qYloODJegst+QKFfJ4RJj38xThklOcS\npwnSHCQygTGtOfYVHvWa7vuWOGYPKRjlBYw1rHTpyTOaZewA7rtHFxseovt1EY5BueRbaGmvU6qF\nuIZ+VKUZju3R1trajD2kUQ1p+7iei5uEeMa9ErUUmB4cSa6zh/OuuRqAk39yG1EM9XIfoTLl7KSD\nv+s5VX81IqUHWcxezUK1eUY6B0FcZ36fTrg58YKPcNv9V6DSHze3gRAutXpd51KIDDfQe+boo09C\nCpg76x5ax2s/+kthDdXiMkaldJfXAfDgPfey/77v4eRTj0OYw7qj64N856aZO2yn1Uh6ebPElCnb\nLbtlt+yW3fIHyuDg4HZBgzddae+W3bJbdstu+dPJnyUQuVt2y27ZLbvlTyO7lfZu2S27Zbf8Fcmb\nqrQty3qHZVkrLctabVnW1W/mZ73VYllWv2VZT1mW9aRlWQvMszGWZc2yLGuVZVkPmApAf7ViWdat\nlmWttyxr6bBnO+2jZVnXWJa1xrKsZyzLOv2tafUfLzvp9/WWZQ1YlrXYfL1j2N/+6vttWVabZVkP\nWZa13LKspy3L+ox5/rad7x30+Srz/C9rrgcHB9+UL/SB0IsuR7YnsATofrM+763+QnOyjNnm2deB\nvzc/Xw3801vdzj+yj8cCU4Glr9dHoAd4Eo1Q6jBrwXqr+/An7Pf1wOd38NoD3w79BlqAqeZnD1gF\ndL+d5/s1+vwXNddvpqU9A1gzODj4/ODg4CvAT4B3vomf91aLxfY3l3eiq/pgvp//Z23Rn1gGBwfn\nAtuiR3fWx/OAnwwODmaDg4P9wBr0mvirk530G/Scbyvv5G3Q78HBwfLg4OAS83MEPAO08Tae7530\n2TCg/+XM9ZuptPcB1g77fYChAXg7yiDwoGVZCy3L+qh5tlV1H2CXqvv8lUlhJ33cdv5f4O03/5+2\nLGuJZVn/PcxN8Lbrt2VZHeibxnx2vqbfVv0e1ufHzaO/mLneHYj808kxg4OD04CzgCstyzqON1jd\n569c/n/oI8B/Ap2Dg4NTgTLwjbe4PW+KWJblAT8DPmusz7f9mt5Bn/+i5vrNVNovAO3Dfm8zz96W\nMjg4uM583wDchb4mrbcsazzA61X3+SuWnfXxBWDfYa97W83/4ODghkHj2ARuYeha/Lbpt2VZEq28\nfjA4ONgocvK2nu8d9fkvba7fTKW9EOiyLGs/y7Js4L0M5xZ9G4llWa45nbEsayRwOvA0f0B1n78i\nsdjav7ezPt4NvNeyLNuyrAlAF/DGCSjeetmq30ZhNeRCYJn5+e3U7+8BKwYHB28e9uztPt/b9fkv\nbq7f5GjsO9AR2DXAP7zV0eE3sZ8T0OiYJ9HK+h/M8xy6kN4qYBYQvNVt/SP7+SOgBLwMFNEVjMbs\nrI/ANeiI+v9r345tGIShIIDeXukZlAmYhIKGaZBShIIGKqRw6L0JfPr2Fba8Jhn+vf6bc49Jln3u\nU353va/JneSTZDvs63k/z6d7uj33ReZHzdo3doAiHiIBiihtgCJKG6CI0gYoorQBiihtgCJKG6CI\n0gYo8gUxLtEnTiMbfAAAAABJRU5ErkJggg==\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "for i in range(20):\n", - " transformed_images[i] = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(transformed_images[i])\n", - " print(transformed_images[i].mean(),transformed_images[i].std(), \n", - " transformed_images[i].min(), transformed_images[i].max())\n", - "show(tutils.make_grid(transformed_images))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Random Affine transform" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "from PIL import Image" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "img = scipy.misc.ascent()\n", - "pil_img = Image.fromarray(img.astype(np.uint8))\n", - "pil_img" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "transformed_images = [None]*5\n", - "to_tensor = transforms.ToTensor()\n", - "for i in range(5):\n", - " t = transforms.RandomAffine(degrees=(-45, 45), fillcolor=128)\n", - " transformed_images[i] = to_tensor(t(pil_img))\n", - "plt.figure(figsize=(16, 16))\n", - "show(tutils.make_grid(transformed_images))" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "transformed_images = [None]*5\n", - "to_tensor = transforms.ToTensor()\n", - "for i in range(5):\n", - " t = transforms.RandomAffine(degrees=0, translate=(0.2, 0.2), fillcolor=255)\n", - " transformed_images[i] = to_tensor(t(pil_img))\n", - "plt.figure(figsize=(16, 16))\n", - "show(tutils.make_grid(transformed_images))" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "transformed_images = [None]*5\n", - "to_tensor = transforms.ToTensor()\n", - "for i in range(5):\n", - " t = transforms.RandomAffine(degrees=0, scale=(0.5, 1.5), fillcolor=255)\n", - " transformed_images[i] = to_tensor(t(pil_img))\n", - "plt.figure(figsize=(16, 16))\n", - "show(tutils.make_grid(transformed_images))" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "transformed_images = [None]*5\n", - "to_tensor = transforms.ToTensor()\n", - "for i in range(5):\n", - " t = transforms.RandomAffine(degrees=0, shear=10, fillcolor=255)\n", - " transformed_images[i] = to_tensor(t(pil_img))\n", - "plt.figure(figsize=(16, 16))\n", - "show(tutils.make_grid(transformed_images))" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "transformed_images = [None]*5\n", - "to_tensor = transforms.ToTensor()\n", - "for i in range(5):\n", - " t = transforms.RandomAffine(degrees=45, translate=(0.2, 0.2), scale=(0.7, 1.2), shear=10, fillcolor=255)\n", - " transformed_images[i] = to_tensor(t(pil_img))\n", - "plt.figure(figsize=(16, 16))\n", - "show(tutils.make_grid(transformed_images))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} diff --git a/test/smoke_test.py b/test/smoke_test.py index c3a4bdd19d6..38f0054e6b6 100644 --- a/test/smoke_test.py +++ b/test/smoke_test.py @@ -1,4 +1,146 @@ +"""Run smoke tests""" + +import os +import sys +from pathlib import Path + import torch import torchvision -import torchvision.datasets as dset -import torchvision.transforms +from torchvision.io import decode_avif, decode_heic, decode_image, decode_jpeg, read_file +from torchvision.models import resnet50, ResNet50_Weights + + +SCRIPT_DIR = Path(__file__).parent + + +def smoke_test_torchvision() -> None: + print( + "Is torchvision usable?", + all(x is not None for x in [torch.ops.image.decode_png, torch.ops.torchvision.roi_align]), + ) + + +def smoke_test_torchvision_read_decode() -> None: + img_jpg = decode_image(str(SCRIPT_DIR / "assets" / "encode_jpeg" / "grace_hopper_517x606.jpg")) + if img_jpg.shape != (3, 606, 517): + raise RuntimeError(f"Unexpected shape of img_jpg: {img_jpg.shape}") + + img_png = decode_image(str(SCRIPT_DIR / "assets" / "interlaced_png" / "wizard_low.png")) + if img_png.shape != (4, 471, 354): + raise RuntimeError(f"Unexpected shape of img_png: {img_png.shape}") + + img_webp = decode_image(str(SCRIPT_DIR / "assets/fakedata/logos/rgb_pytorch.webp")) + if img_webp.shape != (3, 100, 100): + raise RuntimeError(f"Unexpected shape of img_webp: {img_webp.shape}") + + if sys.platform == "linux": + pass + # TODO: Fix/uncomment below (the TODO below is mostly accurate but we're + # still observing some failures on some CUDA jobs. Most are working.) + # if torch.cuda.is_available(): + # # TODO: For whatever reason this only passes on the runners that + # # support CUDA. + # # Strangely, on the CPU runners where this fails, the AVIF/HEIC + # # tests (ran with pytest) are passing. This is likely related to a + # # libcxx symbol thing, and the proper libstdc++.so get loaded only + # # with pytest? Ugh. + # img_avif = decode_avif(read_file(str(SCRIPT_DIR / "assets/fakedata/logos/rgb_pytorch.avif"))) + # if img_avif.shape != (3, 100, 100): + # raise RuntimeError(f"Unexpected shape of img_avif: {img_avif.shape}") + + # img_heic = decode_heic( + # read_file(str(SCRIPT_DIR / "assets/fakedata/logos/rgb_pytorch_incorrectly_encoded_but_who_cares.heic")) + # ) + # if img_heic.shape != (3, 100, 100): + # raise RuntimeError(f"Unexpected shape of img_heic: {img_heic.shape}") + else: + try: + decode_avif(str(SCRIPT_DIR / "assets/fakedata/logos/rgb_pytorch.avif")) + except RuntimeError as e: + assert "torchvision-extra-decoders" in str(e) + + try: + decode_heic(str(SCRIPT_DIR / "assets/fakedata/logos/rgb_pytorch_incorrectly_encoded_but_who_cares.heic")) + except RuntimeError as e: + assert "torchvision-extra-decoders" in str(e) + + +def smoke_test_torchvision_decode_jpeg(device: str = "cpu"): + img_jpg_data = read_file(str(SCRIPT_DIR / "assets" / "encode_jpeg" / "grace_hopper_517x606.jpg")) + img_jpg = decode_jpeg(img_jpg_data, device=device) + if img_jpg.shape != (3, 606, 517): + raise RuntimeError(f"Unexpected shape of img_jpg: {img_jpg.shape}") + + +def smoke_test_compile() -> None: + try: + model = resnet50().cuda() + model = torch.compile(model) + x = torch.randn(1, 3, 224, 224, device="cuda") + out = model(x) + print(f"torch.compile model output: {out.shape}") + except RuntimeError: + if sys.platform == "win32": + print("Successfully caught torch.compile RuntimeError on win") + else: + raise + + +def smoke_test_torchvision_resnet50_classify(device: str = "cpu") -> None: + img = decode_image(str(SCRIPT_DIR / ".." / "gallery" / "assets" / "dog2.jpg")).to(device) + + # Step 1: Initialize model with the best available weights + weights = ResNet50_Weights.DEFAULT + model = resnet50(weights=weights, progress=False).to(device) + model.eval() + + # Step 2: Initialize the inference transforms + preprocess = weights.transforms(antialias=(device != "mps")) # antialias not supported on MPS + + # Step 3: Apply inference preprocessing transforms + batch = preprocess(img).unsqueeze(0) + + # Step 4: Use the model and print the predicted category + prediction = model(batch).squeeze(0).softmax(0) + class_id = prediction.argmax().item() + score = prediction[class_id].item() + category_name = weights.meta["categories"][class_id] + expected_category = "German shepherd" + print(f"{category_name} ({device}): {100 * score:.1f}%") + if category_name != expected_category: + raise RuntimeError(f"Failed ResNet50 classify {category_name} Expected: {expected_category}") + + +def main() -> None: + print(f"torchvision: {torchvision.__version__}") + print(f"torch.cuda.is_available: {torch.cuda.is_available()}") + + print(f"{torch.ops.image._jpeg_version() = }") + if not torch.ops.image._is_compiled_against_turbo(): + msg = "Torchvision wasn't compiled against libjpeg-turbo" + if os.getenv("IS_M1_CONDA_BUILD_JOB") == "1": + # When building the conda package on M1, it's difficult to enforce + # that we build against turbo due to interactions with the libwebp + # package. So we just accept it, instead of raising an error. + print(msg) + else: + raise ValueError(msg) + + smoke_test_torchvision() + smoke_test_torchvision_read_decode() + smoke_test_torchvision_resnet50_classify() + smoke_test_torchvision_decode_jpeg() + if torch.cuda.is_available(): + smoke_test_torchvision_decode_jpeg("cuda") + smoke_test_torchvision_resnet50_classify("cuda") + + # TODO: remove once pytorch/pytorch#110436 is resolved + if sys.version_info < (3, 12, 0): + smoke_test_compile() + + if torch.backends.mps.is_available(): + smoke_test_torchvision_resnet50_classify("mps") + + +if __name__ == "__main__": + main() diff --git a/test/test_architecture_ops.py b/test/test_architecture_ops.py new file mode 100644 index 00000000000..32ad1a32f89 --- /dev/null +++ b/test/test_architecture_ops.py @@ -0,0 +1,46 @@ +import unittest + +import pytest +import torch + +from torchvision.models.maxvit import SwapAxes, WindowDepartition, WindowPartition + + +class MaxvitTester(unittest.TestCase): + def test_maxvit_window_partition(self): + input_shape = (1, 3, 224, 224) + partition_size = 7 + n_partitions = input_shape[3] // partition_size + + x = torch.randn(input_shape) + + partition = WindowPartition() + departition = WindowDepartition() + + x_hat = partition(x, partition_size) + x_hat = departition(x_hat, partition_size, n_partitions, n_partitions) + + torch.testing.assert_close(x, x_hat) + + def test_maxvit_grid_partition(self): + input_shape = (1, 3, 224, 224) + partition_size = 7 + n_partitions = input_shape[3] // partition_size + + x = torch.randn(input_shape) + pre_swap = SwapAxes(-2, -3) + post_swap = SwapAxes(-2, -3) + + partition = WindowPartition() + departition = WindowDepartition() + + x_hat = partition(x, n_partitions) + x_hat = pre_swap(x_hat) + x_hat = post_swap(x_hat) + x_hat = departition(x_hat, n_partitions, partition_size, partition_size) + + torch.testing.assert_close(x, x_hat) + + +if __name__ == "__main__": + pytest.main([__file__]) diff --git a/test/test_backbone_utils.py b/test/test_backbone_utils.py index 41d54514568..c64e27f14ac 100644 --- a/test/test_backbone_utils.py +++ b/test/test_backbone_utils.py @@ -1,25 +1,336 @@ -import unittest - +import random +from copy import deepcopy +from itertools import chain +from typing import Mapping, Sequence +import pytest import torch -from torchvision.models.detection.backbone_utils import resnet_fpn_backbone - - -class ResnetFPNBackboneTester(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.dtype = torch.float32 - - def test_resnet18_fpn_backbone(self): - device = torch.device('cpu') - x = torch.rand(1, 3, 300, 300, dtype=self.dtype, device=device) - resnet18_fpn = resnet_fpn_backbone(backbone_name='resnet18', pretrained=False) - y = resnet18_fpn(x) - self.assertEqual(list(y.keys()), [0, 1, 2, 3, 'pool']) - - def test_resnet50_fpn_backbone(self): - device = torch.device('cpu') - x = torch.rand(1, 3, 300, 300, dtype=self.dtype, device=device) - resnet50_fpn = resnet_fpn_backbone(backbone_name='resnet50', pretrained=False) - y = resnet50_fpn(x) - self.assertEqual(list(y.keys()), [0, 1, 2, 3, 'pool']) +from common_utils import set_rng_seed +from torchvision import models +from torchvision.models._utils import IntermediateLayerGetter +from torchvision.models.detection.backbone_utils import BackboneWithFPN, mobilenet_backbone, resnet_fpn_backbone +from torchvision.models.feature_extraction import create_feature_extractor, get_graph_node_names + + +@pytest.mark.parametrize("backbone_name", ("resnet18", "resnet50")) +def test_resnet_fpn_backbone(backbone_name): + x = torch.rand(1, 3, 300, 300, dtype=torch.float32, device="cpu") + model = resnet_fpn_backbone(backbone_name=backbone_name, weights=None) + assert isinstance(model, BackboneWithFPN) + y = model(x) + assert list(y.keys()) == ["0", "1", "2", "3", "pool"] + + with pytest.raises(ValueError, match=r"Trainable layers should be in the range"): + resnet_fpn_backbone(backbone_name=backbone_name, weights=None, trainable_layers=6) + with pytest.raises(ValueError, match=r"Each returned layer should be in the range"): + resnet_fpn_backbone(backbone_name=backbone_name, weights=None, returned_layers=[0, 1, 2, 3]) + with pytest.raises(ValueError, match=r"Each returned layer should be in the range"): + resnet_fpn_backbone(backbone_name=backbone_name, weights=None, returned_layers=[2, 3, 4, 5]) + + +@pytest.mark.parametrize("backbone_name", ("mobilenet_v2", "mobilenet_v3_large", "mobilenet_v3_small")) +def test_mobilenet_backbone(backbone_name): + with pytest.raises(ValueError, match=r"Trainable layers should be in the range"): + mobilenet_backbone(backbone_name=backbone_name, weights=None, fpn=False, trainable_layers=-1) + with pytest.raises(ValueError, match=r"Each returned layer should be in the range"): + mobilenet_backbone(backbone_name=backbone_name, weights=None, fpn=True, returned_layers=[-1, 0, 1, 2]) + with pytest.raises(ValueError, match=r"Each returned layer should be in the range"): + mobilenet_backbone(backbone_name=backbone_name, weights=None, fpn=True, returned_layers=[3, 4, 5, 6]) + model_fpn = mobilenet_backbone(backbone_name=backbone_name, weights=None, fpn=True) + assert isinstance(model_fpn, BackboneWithFPN) + model = mobilenet_backbone(backbone_name=backbone_name, weights=None, fpn=False) + assert isinstance(model, torch.nn.Sequential) + + +# Needed by TestFxFeatureExtraction.test_leaf_module_and_function +def leaf_function(x): + return int(x) + + +# Needed by TestFXFeatureExtraction. Checking that node naming conventions +# are respected. Particularly the index postfix of repeated node names +class TestSubModule(torch.nn.Module): + def __init__(self): + super().__init__() + self.relu = torch.nn.ReLU() + + def forward(self, x): + x = x + 1 + x = x + 1 + x = self.relu(x) + x = self.relu(x) + return x + + +class TestModule(torch.nn.Module): + def __init__(self): + super().__init__() + self.submodule = TestSubModule() + self.relu = torch.nn.ReLU() + + def forward(self, x): + x = self.submodule(x) + x = x + 1 + x = x + 1 + x = self.relu(x) + x = self.relu(x) + return x + + +test_module_nodes = [ + "x", + "submodule.add", + "submodule.add_1", + "submodule.relu", + "submodule.relu_1", + "add", + "add_1", + "relu", + "relu_1", +] + + +class TestFxFeatureExtraction: + inp = torch.rand(1, 3, 224, 224, dtype=torch.float32, device="cpu") + model_defaults = {"num_classes": 1} + leaf_modules = [] + + def _create_feature_extractor(self, *args, **kwargs): + """ + Apply leaf modules + """ + tracer_kwargs = {} + if "tracer_kwargs" not in kwargs: + tracer_kwargs = {"leaf_modules": self.leaf_modules} + else: + tracer_kwargs = kwargs.pop("tracer_kwargs") + return create_feature_extractor(*args, **kwargs, tracer_kwargs=tracer_kwargs, suppress_diff_warning=True) + + def _get_return_nodes(self, model): + set_rng_seed(0) + exclude_nodes_filter = [ + "getitem", + "floordiv", + "size", + "chunk", + "_assert", + "eq", + "dim", + "getattr", + ] + train_nodes, eval_nodes = get_graph_node_names( + model, tracer_kwargs={"leaf_modules": self.leaf_modules}, suppress_diff_warning=True + ) + # Get rid of any nodes that don't return tensors as they cause issues + # when testing backward pass. + train_nodes = [n for n in train_nodes if not any(x in n for x in exclude_nodes_filter)] + eval_nodes = [n for n in eval_nodes if not any(x in n for x in exclude_nodes_filter)] + return random.sample(train_nodes, 10), random.sample(eval_nodes, 10) + + @pytest.mark.parametrize("model_name", models.list_models(models)) + def test_build_fx_feature_extractor(self, model_name): + set_rng_seed(0) + model = models.get_model(model_name, **self.model_defaults).eval() + train_return_nodes, eval_return_nodes = self._get_return_nodes(model) + # Check that it works with both a list and dict for return nodes + self._create_feature_extractor( + model, train_return_nodes={v: v for v in train_return_nodes}, eval_return_nodes=eval_return_nodes + ) + self._create_feature_extractor( + model, train_return_nodes=train_return_nodes, eval_return_nodes=eval_return_nodes + ) + # Check must specify return nodes + with pytest.raises(ValueError): + self._create_feature_extractor(model) + # Check return_nodes and train_return_nodes / eval_return nodes + # mutual exclusivity + with pytest.raises(ValueError): + self._create_feature_extractor( + model, return_nodes=train_return_nodes, train_return_nodes=train_return_nodes + ) + # Check train_return_nodes / eval_return nodes must both be specified + with pytest.raises(ValueError): + self._create_feature_extractor(model, train_return_nodes=train_return_nodes) + # Check invalid node name raises ValueError + with pytest.raises(ValueError): + # First just double check that this node really doesn't exist + if not any(n.startswith("l") or n.startswith("l.") for n in chain(train_return_nodes, eval_return_nodes)): + self._create_feature_extractor(model, train_return_nodes=["l"], eval_return_nodes=["l"]) + else: # otherwise skip this check + raise ValueError + + def test_node_name_conventions(self): + model = TestModule() + train_nodes, _ = get_graph_node_names(model) + assert all(a == b for a, b in zip(train_nodes, test_module_nodes)) + + @pytest.mark.parametrize("model_name", models.list_models(models)) + def test_forward_backward(self, model_name): + model = models.get_model(model_name, **self.model_defaults).train() + train_return_nodes, eval_return_nodes = self._get_return_nodes(model) + model = self._create_feature_extractor( + model, train_return_nodes=train_return_nodes, eval_return_nodes=eval_return_nodes + ) + out = model(self.inp) + out_agg = 0 + for node_out in out.values(): + if isinstance(node_out, Sequence): + out_agg += sum(o.float().mean() for o in node_out if o is not None) + elif isinstance(node_out, Mapping): + out_agg += sum(o.float().mean() for o in node_out.values() if o is not None) + else: + # Assume that the only other alternative at this point is a Tensor + out_agg += node_out.float().mean() + out_agg.backward() + + def test_feature_extraction_methods_equivalence(self): + model = models.resnet18(**self.model_defaults).eval() + return_layers = {"layer1": "layer1", "layer2": "layer2", "layer3": "layer3", "layer4": "layer4"} + + ilg_model = IntermediateLayerGetter(model, return_layers).eval() + fx_model = self._create_feature_extractor(model, return_layers) + + # Check that we have same parameters + for (n1, p1), (n2, p2) in zip(ilg_model.named_parameters(), fx_model.named_parameters()): + assert n1 == n2 + assert p1.equal(p2) + + # And that outputs match + with torch.no_grad(): + ilg_out = ilg_model(self.inp) + fgn_out = fx_model(self.inp) + assert all(k1 == k2 for k1, k2 in zip(ilg_out.keys(), fgn_out.keys())) + for k in ilg_out.keys(): + assert ilg_out[k].equal(fgn_out[k]) + + @pytest.mark.parametrize("model_name", models.list_models(models)) + def test_jit_forward_backward(self, model_name): + set_rng_seed(0) + model = models.get_model(model_name, **self.model_defaults).train() + train_return_nodes, eval_return_nodes = self._get_return_nodes(model) + model = self._create_feature_extractor( + model, train_return_nodes=train_return_nodes, eval_return_nodes=eval_return_nodes + ) + model = torch.jit.script(model) + fgn_out = model(self.inp) + out_agg = 0 + for node_out in fgn_out.values(): + if isinstance(node_out, Sequence): + out_agg += sum(o.float().mean() for o in node_out if o is not None) + elif isinstance(node_out, Mapping): + out_agg += sum(o.float().mean() for o in node_out.values() if o is not None) + else: + # Assume that the only other alternative at this point is a Tensor + out_agg += node_out.float().mean() + out_agg.backward() + + def test_train_eval(self): + class TestModel(torch.nn.Module): + def __init__(self): + super().__init__() + self.dropout = torch.nn.Dropout(p=1.0) + + def forward(self, x): + x = x.float().mean() + x = self.dropout(x) # dropout + if self.training: + x += 100 # add + else: + x *= 0 # mul + x -= 0 # sub + return x + + model = TestModel() + + train_return_nodes = ["dropout", "add", "sub"] + eval_return_nodes = ["dropout", "mul", "sub"] + + def checks(model, mode): + with torch.no_grad(): + out = model(torch.ones(10, 10)) + if mode == "train": + # Check that dropout is respected + assert out["dropout"].item() == 0 + # Check that control flow dependent on training_mode is respected + assert out["sub"].item() == 100 + assert "add" in out + assert "mul" not in out + elif mode == "eval": + # Check that dropout is respected + assert out["dropout"].item() == 1 + # Check that control flow dependent on training_mode is respected + assert out["sub"].item() == 0 + assert "mul" in out + assert "add" not in out + + # Starting from train mode + model.train() + fx_model = self._create_feature_extractor( + model, train_return_nodes=train_return_nodes, eval_return_nodes=eval_return_nodes + ) + # Check that the models stay in their original training state + assert model.training + assert fx_model.training + # Check outputs + checks(fx_model, "train") + # Check outputs after switching to eval mode + fx_model.eval() + checks(fx_model, "eval") + + # Starting from eval mode + model.eval() + fx_model = self._create_feature_extractor( + model, train_return_nodes=train_return_nodes, eval_return_nodes=eval_return_nodes + ) + # Check that the models stay in their original training state + assert not model.training + assert not fx_model.training + # Check outputs + checks(fx_model, "eval") + # Check outputs after switching to train mode + fx_model.train() + checks(fx_model, "train") + + def test_leaf_module_and_function(self): + class LeafModule(torch.nn.Module): + def forward(self, x): + # This would raise a TypeError if it were not in a leaf module + int(x.shape[0]) + return torch.nn.functional.relu(x + 4) + + class TestModule(torch.nn.Module): + def __init__(self): + super().__init__() + self.conv = torch.nn.Conv2d(3, 1, 3) + self.leaf_module = LeafModule() + + def forward(self, x): + leaf_function(x.shape[0]) + x = self.conv(x) + return self.leaf_module(x) + + model = self._create_feature_extractor( + TestModule(), + return_nodes=["leaf_module"], + tracer_kwargs={"leaf_modules": [LeafModule], "autowrap_functions": [leaf_function]}, + ).train() + + # Check that LeafModule is not in the list of nodes + assert "relu" not in [str(n) for n in model.graph.nodes] + assert "leaf_module" in [str(n) for n in model.graph.nodes] + + # Check forward + out = model(self.inp) + # And backward + out["leaf_module"].float().mean().backward() + + def test_deepcopy(self): + # Non-regression test for https://github.com/pytorch/vision/issues/8634 + model = models.efficientnet_b3(weights=None) + extractor = create_feature_extractor(model=model, return_nodes={"classifier.0": "out"}) + + extractor.eval() + extractor.train() + extractor = deepcopy(extractor) + extractor.eval() + extractor.train() diff --git a/test/test_cpp_models.py b/test/test_cpp_models.py deleted file mode 100644 index b6654a0278d..00000000000 --- a/test/test_cpp_models.py +++ /dev/null @@ -1,150 +0,0 @@ -import torch -import os -import unittest -from torchvision import models, transforms -import sys - -from PIL import Image -import torchvision.transforms.functional as F - -try: - from torchvision import _C_tests -except ImportError: - _C_tests = None - - -def process_model(model, tensor, func, name): - model.eval() - traced_script_module = torch.jit.trace(model, tensor) - traced_script_module.save("model.pt") - - py_output = model.forward(tensor) - cpp_output = func("model.pt", tensor) - - assert torch.allclose(py_output, cpp_output), 'Output mismatch of ' + name + ' models' - - -def read_image1(): - image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'assets', 'grace_hopper_517x606.jpg') - image = Image.open(image_path) - image = image.resize((224, 224)) - x = F.to_tensor(image) - return x.view(1, 3, 224, 224) - - -def read_image2(): - image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'assets', 'grace_hopper_517x606.jpg') - image = Image.open(image_path) - image = image.resize((299, 299)) - x = F.to_tensor(image) - x = x.view(1, 3, 299, 299) - return torch.cat([x, x], 0) - - -@unittest.skipIf( - sys.platform == "darwin" or True, - "C++ models are broken on OS X at the moment, and there's a BC breakage on master; " - "see https://github.com/pytorch/vision/issues/1191") -class Tester(unittest.TestCase): - pretrained = False - image = read_image1() - - def test_alexnet(self): - process_model(models.alexnet(self.pretrained), self.image, _C_tests.forward_alexnet, 'Alexnet') - - def test_vgg11(self): - process_model(models.vgg11(self.pretrained), self.image, _C_tests.forward_vgg11, 'VGG11') - - def test_vgg13(self): - process_model(models.vgg13(self.pretrained), self.image, _C_tests.forward_vgg13, 'VGG13') - - def test_vgg16(self): - process_model(models.vgg16(self.pretrained), self.image, _C_tests.forward_vgg16, 'VGG16') - - def test_vgg19(self): - process_model(models.vgg19(self.pretrained), self.image, _C_tests.forward_vgg19, 'VGG19') - - def test_vgg11_bn(self): - process_model(models.vgg11_bn(self.pretrained), self.image, _C_tests.forward_vgg11bn, 'VGG11BN') - - def test_vgg13_bn(self): - process_model(models.vgg13_bn(self.pretrained), self.image, _C_tests.forward_vgg13bn, 'VGG13BN') - - def test_vgg16_bn(self): - process_model(models.vgg16_bn(self.pretrained), self.image, _C_tests.forward_vgg16bn, 'VGG16BN') - - def test_vgg19_bn(self): - process_model(models.vgg19_bn(self.pretrained), self.image, _C_tests.forward_vgg19bn, 'VGG19BN') - - def test_resnet18(self): - process_model(models.resnet18(self.pretrained), self.image, _C_tests.forward_resnet18, 'Resnet18') - - def test_resnet34(self): - process_model(models.resnet34(self.pretrained), self.image, _C_tests.forward_resnet34, 'Resnet34') - - def test_resnet50(self): - process_model(models.resnet50(self.pretrained), self.image, _C_tests.forward_resnet50, 'Resnet50') - - def test_resnet101(self): - process_model(models.resnet101(self.pretrained), self.image, _C_tests.forward_resnet101, 'Resnet101') - - def test_resnet152(self): - process_model(models.resnet152(self.pretrained), self.image, _C_tests.forward_resnet152, 'Resnet152') - - def test_resnext50_32x4d(self): - process_model(models.resnext50_32x4d(), self.image, _C_tests.forward_resnext50_32x4d, 'ResNext50_32x4d') - - def test_resnext101_32x8d(self): - process_model(models.resnext101_32x8d(), self.image, _C_tests.forward_resnext101_32x8d, 'ResNext101_32x8d') - - def test_wide_resnet50_2(self): - process_model(models.wide_resnet50_2(), self.image, _C_tests.forward_wide_resnet50_2, 'WideResNet50_2') - - def test_wide_resnet101_2(self): - process_model(models.wide_resnet101_2(), self.image, _C_tests.forward_wide_resnet101_2, 'WideResNet101_2') - - def test_squeezenet1_0(self): - process_model(models.squeezenet1_0(self.pretrained), self.image, - _C_tests.forward_squeezenet1_0, 'Squeezenet1.0') - - def test_squeezenet1_1(self): - process_model(models.squeezenet1_1(self.pretrained), self.image, - _C_tests.forward_squeezenet1_1, 'Squeezenet1.1') - - def test_densenet121(self): - process_model(models.densenet121(self.pretrained), self.image, _C_tests.forward_densenet121, 'Densenet121') - - def test_densenet169(self): - process_model(models.densenet169(self.pretrained), self.image, _C_tests.forward_densenet169, 'Densenet169') - - def test_densenet201(self): - process_model(models.densenet201(self.pretrained), self.image, _C_tests.forward_densenet201, 'Densenet201') - - def test_densenet161(self): - process_model(models.densenet161(self.pretrained), self.image, _C_tests.forward_densenet161, 'Densenet161') - - def test_mobilenet_v2(self): - process_model(models.mobilenet_v2(self.pretrained), self.image, _C_tests.forward_mobilenetv2, 'MobileNet') - - def test_googlenet(self): - process_model(models.googlenet(self.pretrained), self.image, _C_tests.forward_googlenet, 'GoogLeNet') - - def test_mnasnet0_5(self): - process_model(models.mnasnet0_5(self.pretrained), self.image, _C_tests.forward_mnasnet0_5, 'MNASNet0_5') - - def test_mnasnet0_75(self): - process_model(models.mnasnet0_75(self.pretrained), self.image, _C_tests.forward_mnasnet0_75, 'MNASNet0_75') - - def test_mnasnet1_0(self): - process_model(models.mnasnet1_0(self.pretrained), self.image, _C_tests.forward_mnasnet1_0, 'MNASNet1_0') - - def test_mnasnet1_3(self): - process_model(models.mnasnet1_3(self.pretrained), self.image, _C_tests.forward_mnasnet1_3, 'MNASNet1_3') - - def test_inception_v3(self): - self.image = read_image2() - process_model(models.inception_v3(self.pretrained), self.image, _C_tests.forward_inceptionv3, 'Inceptionv3') - - -if __name__ == '__main__': - unittest.main() diff --git a/test/test_datasets.py b/test/test_datasets.py index 2410f18de09..7e91571744a 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -1,215 +1,3549 @@ -import sys +import bz2 +import contextlib +import csv +import io +import itertools +import json import os +import pathlib +import pickle +import random +import re +import shutil +import string import unittest -import mock +import xml.etree.ElementTree as ET +import zipfile +from typing import Callable, Tuple, Union + +import datasets_utils import numpy as np import PIL -from PIL import Image -from torch._utils_internal import get_file_path_2 -import torchvision -from common_utils import get_tmp_dir -from fakedata_generation import mnist_root, cifar_root, imagenet_root, \ - cityscapes_root, svhn_root - - -try: - import scipy - HAS_SCIPY = True -except ImportError: - HAS_SCIPY = False - - -class Tester(unittest.TestCase): - def generic_classification_dataset_test(self, dataset, num_images=1): - self.assertEqual(len(dataset), num_images) - img, target = dataset[0] - self.assertTrue(isinstance(img, PIL.Image.Image)) - self.assertTrue(isinstance(target, int)) - - def generic_segmentation_dataset_test(self, dataset, num_images=1): - self.assertEqual(len(dataset), num_images) - img, target = dataset[0] - self.assertTrue(isinstance(img, PIL.Image.Image)) - self.assertTrue(isinstance(target, PIL.Image.Image)) - - def test_imagefolder(self): - # TODO: create the fake data on-the-fly - FAKEDATA_DIR = get_file_path_2( - os.path.dirname(os.path.abspath(__file__)), 'assets', 'fakedata') - - with get_tmp_dir(src=os.path.join(FAKEDATA_DIR, 'imagefolder')) as root: - classes = sorted(['a', 'b']) - class_a_image_files = [os.path.join(root, 'a', file) - for file in ('a1.png', 'a2.png', 'a3.png')] - class_b_image_files = [os.path.join(root, 'b', file) - for file in ('b1.png', 'b2.png', 'b3.png', 'b4.png')] - dataset = torchvision.datasets.ImageFolder(root, loader=lambda x: x) - - # test if all classes are present - self.assertEqual(classes, sorted(dataset.classes)) - - # test if combination of classes and class_to_index functions correctly - for cls in classes: - self.assertEqual(cls, dataset.classes[dataset.class_to_idx[cls]]) - - # test if all images were detected correctly - class_a_idx = dataset.class_to_idx['a'] - class_b_idx = dataset.class_to_idx['b'] - imgs_a = [(img_file, class_a_idx) for img_file in class_a_image_files] - imgs_b = [(img_file, class_b_idx) for img_file in class_b_image_files] - imgs = sorted(imgs_a + imgs_b) - self.assertEqual(imgs, dataset.imgs) - - # test if the datasets outputs all images correctly - outputs = sorted([dataset[i] for i in range(len(dataset))]) - self.assertEqual(imgs, outputs) - - # redo all tests with specified valid image files - dataset = torchvision.datasets.ImageFolder(root, loader=lambda x: x, - is_valid_file=lambda x: '3' in x) - self.assertEqual(classes, sorted(dataset.classes)) - - class_a_idx = dataset.class_to_idx['a'] - class_b_idx = dataset.class_to_idx['b'] - imgs_a = [(img_file, class_a_idx) for img_file in class_a_image_files - if '3' in img_file] - imgs_b = [(img_file, class_b_idx) for img_file in class_b_image_files - if '3' in img_file] - imgs = sorted(imgs_a + imgs_b) - self.assertEqual(imgs, dataset.imgs) - - outputs = sorted([dataset[i] for i in range(len(dataset))]) - self.assertEqual(imgs, outputs) - - @mock.patch('torchvision.datasets.mnist.download_and_extract_archive') - def test_mnist(self, mock_download_extract): - num_examples = 30 - with mnist_root(num_examples, "MNIST") as root: - dataset = torchvision.datasets.MNIST(root, download=True) - self.generic_classification_dataset_test(dataset, num_images=num_examples) - img, target = dataset[0] - self.assertEqual(dataset.class_to_idx[dataset.classes[0]], target) - - @mock.patch('torchvision.datasets.mnist.download_and_extract_archive') - def test_kmnist(self, mock_download_extract): - num_examples = 30 - with mnist_root(num_examples, "KMNIST") as root: - dataset = torchvision.datasets.KMNIST(root, download=True) - self.generic_classification_dataset_test(dataset, num_images=num_examples) - img, target = dataset[0] - self.assertEqual(dataset.class_to_idx[dataset.classes[0]], target) - - @mock.patch('torchvision.datasets.mnist.download_and_extract_archive') - def test_fashionmnist(self, mock_download_extract): - num_examples = 30 - with mnist_root(num_examples, "FashionMNIST") as root: - dataset = torchvision.datasets.FashionMNIST(root, download=True) - self.generic_classification_dataset_test(dataset, num_images=num_examples) - img, target = dataset[0] - self.assertEqual(dataset.class_to_idx[dataset.classes[0]], target) - - @mock.patch('torchvision.datasets.imagenet._verify_archive') - @unittest.skipIf(not HAS_SCIPY, "scipy unavailable") - def test_imagenet(self, mock_verify): - with imagenet_root() as root: - dataset = torchvision.datasets.ImageNet(root, split='train') - self.generic_classification_dataset_test(dataset) - - dataset = torchvision.datasets.ImageNet(root, split='val') - self.generic_classification_dataset_test(dataset) - - @mock.patch('torchvision.datasets.cifar.check_integrity') - @mock.patch('torchvision.datasets.cifar.CIFAR10._check_integrity') - def test_cifar10(self, mock_ext_check, mock_int_check): - mock_ext_check.return_value = True - mock_int_check.return_value = True - with cifar_root('CIFAR10') as root: - dataset = torchvision.datasets.CIFAR10(root, train=True, download=True) - self.generic_classification_dataset_test(dataset, num_images=5) - img, target = dataset[0] - self.assertEqual(dataset.class_to_idx[dataset.classes[0]], target) - - dataset = torchvision.datasets.CIFAR10(root, train=False, download=True) - self.generic_classification_dataset_test(dataset) - img, target = dataset[0] - self.assertEqual(dataset.class_to_idx[dataset.classes[0]], target) - - @mock.patch('torchvision.datasets.cifar.check_integrity') - @mock.patch('torchvision.datasets.cifar.CIFAR10._check_integrity') - def test_cifar100(self, mock_ext_check, mock_int_check): - mock_ext_check.return_value = True - mock_int_check.return_value = True - with cifar_root('CIFAR100') as root: - dataset = torchvision.datasets.CIFAR100(root, train=True, download=True) - self.generic_classification_dataset_test(dataset) - img, target = dataset[0] - self.assertEqual(dataset.class_to_idx[dataset.classes[0]], target) - - dataset = torchvision.datasets.CIFAR100(root, train=False, download=True) - self.generic_classification_dataset_test(dataset) - img, target = dataset[0] - self.assertEqual(dataset.class_to_idx[dataset.classes[0]], target) - - @unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows') - def test_cityscapes(self): - with cityscapes_root() as root: - - for mode in ['coarse', 'fine']: - - if mode == 'coarse': - splits = ['train', 'train_extra', 'val'] - else: - splits = ['train', 'val', 'test'] - - for split in splits: - for target_type in ['semantic', 'instance']: - dataset = torchvision.datasets.Cityscapes(root, split=split, - target_type=target_type, mode=mode) - self.generic_segmentation_dataset_test(dataset, num_images=2) - - color_dataset = torchvision.datasets.Cityscapes(root, split=split, - target_type='color', mode=mode) - color_img, color_target = color_dataset[0] - self.assertTrue(isinstance(color_img, PIL.Image.Image)) - self.assertTrue(np.array(color_target).shape[2] == 4) - - polygon_dataset = torchvision.datasets.Cityscapes(root, split=split, - target_type='polygon', mode=mode) - polygon_img, polygon_target = polygon_dataset[0] - self.assertTrue(isinstance(polygon_img, PIL.Image.Image)) - self.assertTrue(isinstance(polygon_target, dict)) - self.assertTrue(isinstance(polygon_target['imgHeight'], int)) - self.assertTrue(isinstance(polygon_target['objects'], list)) - - # Test multiple target types - targets_combo = ['semantic', 'polygon', 'color'] - multiple_types_dataset = torchvision.datasets.Cityscapes(root, split=split, - target_type=targets_combo, - mode=mode) - output = multiple_types_dataset[0] - self.assertTrue(isinstance(output, tuple)) - self.assertTrue(len(output) == 2) - self.assertTrue(isinstance(output[0], PIL.Image.Image)) - self.assertTrue(isinstance(output[1], tuple)) - self.assertTrue(len(output[1]) == 3) - self.assertTrue(isinstance(output[1][0], PIL.Image.Image)) # semantic - self.assertTrue(isinstance(output[1][1], dict)) # polygon - self.assertTrue(isinstance(output[1][2], PIL.Image.Image)) # color - - @mock.patch('torchvision.datasets.SVHN._check_integrity') - @unittest.skipIf(not HAS_SCIPY, "scipy unavailable") - def test_svhn(self, mock_check): - mock_check.return_value = True - with svhn_root() as root: - dataset = torchvision.datasets.SVHN(root, split="train") - self.generic_classification_dataset_test(dataset, num_images=2) - - dataset = torchvision.datasets.SVHN(root, split="test") - self.generic_classification_dataset_test(dataset, num_images=2) - - dataset = torchvision.datasets.SVHN(root, split="extra") - self.generic_classification_dataset_test(dataset, num_images=2) - - -if __name__ == '__main__': +import pytest +import torch +import torch.nn.functional as F +from common_utils import combinations_grid +from torchvision import datasets +from torchvision.transforms import v2 + + +class STL10TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.STL10 + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test", "unlabeled", "train+unlabeled")) + + @staticmethod + def _make_binary_file(num_elements, root, name): + file_name = os.path.join(root, name) + np.zeros(num_elements, dtype=np.uint8).tofile(file_name) + + @staticmethod + def _make_image_file(num_images, root, name, num_channels=3, height=96, width=96): + STL10TestCase._make_binary_file(num_images * num_channels * height * width, root, name) + + @staticmethod + def _make_label_file(num_images, root, name): + STL10TestCase._make_binary_file(num_images, root, name) + + @staticmethod + def _make_class_names_file(root, name="class_names.txt"): + with open(os.path.join(root, name), "w") as fh: + for cname in ("airplane", "bird"): + fh.write(f"{cname}\n") + + @staticmethod + def _make_fold_indices_file(root): + num_folds = 10 + offset = 0 + with open(os.path.join(root, "fold_indices.txt"), "w") as fh: + for fold in range(num_folds): + line = " ".join([str(idx) for idx in range(offset, offset + fold + 1)]) + fh.write(f"{line}\n") + offset += fold + 1 + + return tuple(range(1, num_folds + 1)) + + @staticmethod + def _make_train_files(root, num_unlabeled_images=1): + num_images_in_fold = STL10TestCase._make_fold_indices_file(root) + num_train_images = sum(num_images_in_fold) + + STL10TestCase._make_image_file(num_train_images, root, "train_X.bin") + STL10TestCase._make_label_file(num_train_images, root, "train_y.bin") + STL10TestCase._make_image_file(1, root, "unlabeled_X.bin") + + return dict(train=num_train_images, unlabeled=num_unlabeled_images) + + @staticmethod + def _make_test_files(root, num_images=2): + STL10TestCase._make_image_file(num_images, root, "test_X.bin") + STL10TestCase._make_label_file(num_images, root, "test_y.bin") + + return dict(test=num_images) + + def inject_fake_data(self, tmpdir, config): + root_folder = os.path.join(tmpdir, "stl10_binary") + os.mkdir(root_folder) + + num_images_in_split = self._make_train_files(root_folder) + num_images_in_split.update(self._make_test_files(root_folder)) + self._make_class_names_file(root_folder) + + return sum(num_images_in_split[part] for part in config["split"].split("+")) + + def test_folds(self): + for fold in range(10): + with self.create_dataset(split="train", folds=fold) as (dataset, _): + assert len(dataset) == fold + 1 + + def test_unlabeled(self): + with self.create_dataset(split="unlabeled") as (dataset, _): + labels = [dataset[idx][1] for idx in range(len(dataset))] + assert all(label == -1 for label in labels) + + def test_invalid_folds1(self): + with pytest.raises(ValueError): + with self.create_dataset(folds=10): + pass + + def test_invalid_folds2(self): + with pytest.raises(ValueError): + with self.create_dataset(folds="0"): + pass + + +class Caltech101TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Caltech101 + FEATURE_TYPES = (PIL.Image.Image, (int, np.ndarray, tuple)) + + ADDITIONAL_CONFIGS = combinations_grid(target_type=("category", "annotation", ["category", "annotation"])) + REQUIRED_PACKAGES = ("scipy",) + + def inject_fake_data(self, tmpdir, config): + root = pathlib.Path(tmpdir) / "caltech101" + images = root / "101_ObjectCategories" + annotations = root / "Annotations" + + categories = (("Faces", "Faces_2"), ("helicopter", "helicopter"), ("ying_yang", "ying_yang")) + num_images_per_category = 2 + + for image_category, annotation_category in categories: + datasets_utils.create_image_folder( + root=images, + name=image_category, + file_name_fn=lambda idx: f"image_{idx + 1:04d}.jpg", + num_examples=num_images_per_category, + ) + self._create_annotation_folder( + root=annotations, + name=annotation_category, + file_name_fn=lambda idx: f"annotation_{idx + 1:04d}.mat", + num_examples=num_images_per_category, + ) + + # This is included in the original archive, but is removed by the dataset. Thus, an empty directory suffices. + os.makedirs(images / "BACKGROUND_Google") + + return num_images_per_category * len(categories) + + def _create_annotation_folder(self, root, name, file_name_fn, num_examples): + root = pathlib.Path(root) / name + os.makedirs(root) + + for idx in range(num_examples): + self._create_annotation_file(root, file_name_fn(idx)) + + def _create_annotation_file(self, root, name): + mdict = dict(obj_contour=torch.rand((2, torch.randint(3, 6, size=())), dtype=torch.float64).numpy()) + datasets_utils.lazy_importer.scipy.io.savemat(str(pathlib.Path(root) / name), mdict) + + def test_combined_targets(self): + target_types = ["category", "annotation"] + + individual_targets = [] + for target_type in target_types: + with self.create_dataset(target_type=target_type) as (dataset, _): + _, target = dataset[0] + individual_targets.append(target) + + with self.create_dataset(target_type=target_types) as (dataset, _): + _, combined_targets = dataset[0] + + actual = len(individual_targets) + expected = len(combined_targets) + assert ( + actual == expected + ), "The number of the returned combined targets does not match the the number targets if requested " + f"individually: {actual} != {expected}", + + for target_type, combined_target, individual_target in zip(target_types, combined_targets, individual_targets): + with self.subTest(target_type=target_type): + actual = type(combined_target) + expected = type(individual_target) + assert ( + actual is expected + ), "Type of the combined target does not match the type of the corresponding individual target: " + f"{actual} is not {expected}", + + def test_transforms_v2_wrapper_spawn(self): + expected_size = (123, 321) + with self.create_dataset(target_type="category", transform=v2.Resize(size=expected_size)) as (dataset, _): + datasets_utils.check_transforms_v2_wrapper_spawn(dataset, expected_size=expected_size) + + +class Caltech256TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Caltech256 + + def inject_fake_data(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) / "caltech256" / "256_ObjectCategories" + + categories = ((1, "ak47"), (2, "american-flag"), (3, "backpack")) + num_images_per_category = 2 + + for idx, category in categories: + datasets_utils.create_image_folder( + tmpdir, + name=f"{idx:03d}.{category}", + file_name_fn=lambda image_idx: f"{idx:03d}_{image_idx + 1:04d}.jpg", + num_examples=num_images_per_category, + ) + + return num_images_per_category * len(categories) + + +class WIDERFaceTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.WIDERFace + FEATURE_TYPES = (PIL.Image.Image, (dict, type(None))) # test split returns None as target + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val", "test")) + + def inject_fake_data(self, tmpdir, config): + widerface_dir = pathlib.Path(tmpdir) / "widerface" + annotations_dir = widerface_dir / "wider_face_split" + os.makedirs(annotations_dir) + + split_to_idx = split_to_num_examples = { + "train": 1, + "val": 2, + "test": 3, + } + + # We need to create all folders regardless of the split in config + for split in ("train", "val", "test"): + split_idx = split_to_idx[split] + num_examples = split_to_num_examples[split] + + datasets_utils.create_image_folder( + root=tmpdir, + name=widerface_dir / f"WIDER_{split}" / "images" / "0--Parade", + file_name_fn=lambda image_idx: f"0_Parade_marchingband_1_{split_idx + image_idx}.jpg", + num_examples=num_examples, + ) + + annotation_file_name = { + "train": annotations_dir / "wider_face_train_bbx_gt.txt", + "val": annotations_dir / "wider_face_val_bbx_gt.txt", + "test": annotations_dir / "wider_face_test_filelist.txt", + }[split] + + annotation_content = { + "train": "".join( + f"0--Parade/0_Parade_marchingband_1_{split_idx + image_idx}.jpg\n1\n449 330 122 149 0 0 0 0 0 0\n" + for image_idx in range(num_examples) + ), + "val": "".join( + f"0--Parade/0_Parade_marchingband_1_{split_idx + image_idx}.jpg\n1\n501 160 285 443 0 0 0 0 0 0\n" + for image_idx in range(num_examples) + ), + "test": "".join( + f"0--Parade/0_Parade_marchingband_1_{split_idx + image_idx}.jpg\n" + for image_idx in range(num_examples) + ), + }[split] + + with open(annotation_file_name, "w") as annotation_file: + annotation_file.write(annotation_content) + + return split_to_num_examples[config["split"]] + + def test_transforms_v2_wrapper_spawn(self): + expected_size = (123, 321) + with self.create_dataset(transform=v2.Resize(size=expected_size)) as (dataset, _): + datasets_utils.check_transforms_v2_wrapper_spawn(dataset, expected_size=expected_size) + + +class CityScapesTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Cityscapes + TARGET_TYPES = ( + "instance", + "semantic", + "polygon", + "color", + ) + ADDITIONAL_CONFIGS = ( + *combinations_grid(mode=("fine",), split=("train", "test", "val"), target_type=TARGET_TYPES), + *combinations_grid( + mode=("coarse",), + split=("train", "train_extra", "val"), + target_type=TARGET_TYPES, + ), + ) + FEATURE_TYPES = (PIL.Image.Image, (dict, PIL.Image.Image)) + + def inject_fake_data(self, tmpdir, config): + + tmpdir = pathlib.Path(tmpdir) + + mode_to_splits = { + "Coarse": ["train", "train_extra", "val"], + "Fine": ["train", "test", "val"], + } + + if config["split"] == "train": # just for coverage of the number of samples + cities = ["bochum", "bremen"] + else: + cities = ["bochum"] + + polygon_target = { + "imgHeight": 1024, + "imgWidth": 2048, + "objects": [ + { + "label": "sky", + "polygon": [ + [1241, 0], + [1234, 156], + [1478, 197], + [1611, 172], + [1606, 0], + ], + }, + { + "label": "road", + "polygon": [ + [0, 448], + [1331, 274], + [1473, 265], + [2047, 605], + [2047, 1023], + [0, 1023], + ], + }, + ], + } + + for mode in ["Coarse", "Fine"]: + gt_dir = tmpdir / f"gt{mode}" + for split in mode_to_splits[mode]: + for city in cities: + + def make_image(name, size=10): + datasets_utils.create_image_folder( + root=gt_dir / split, + name=city, + file_name_fn=lambda _: name, + size=size, + num_examples=1, + ) + + make_image(f"{city}_000000_000000_gt{mode}_instanceIds.png") + make_image(f"{city}_000000_000000_gt{mode}_labelIds.png") + make_image(f"{city}_000000_000000_gt{mode}_color.png", size=(4, 10, 10)) + + polygon_target_name = gt_dir / split / city / f"{city}_000000_000000_gt{mode}_polygons.json" + with open(polygon_target_name, "w") as outfile: + json.dump(polygon_target, outfile) + + # Create leftImg8bit folder + for split in ["test", "train_extra", "train", "val"]: + for city in cities: + datasets_utils.create_image_folder( + root=tmpdir / "leftImg8bit" / split, + name=city, + file_name_fn=lambda _: f"{city}_000000_000000_leftImg8bit.png", + num_examples=1, + ) + + info = {"num_examples": len(cities)} + if config["target_type"] == "polygon": + info["expected_polygon_target"] = polygon_target + return info + + def test_combined_targets(self): + target_types = ["semantic", "polygon", "color"] + + with self.create_dataset(target_type=target_types) as (dataset, _): + output = dataset[0] + assert isinstance(output, tuple) + assert len(output) == 2 + assert isinstance(output[0], PIL.Image.Image) + assert isinstance(output[1], tuple) + assert len(output[1]) == 3 + assert isinstance(output[1][0], PIL.Image.Image) # semantic + assert isinstance(output[1][1], dict) # polygon + assert isinstance(output[1][2], PIL.Image.Image) # color + + def test_feature_types_target_color(self): + with self.create_dataset(target_type="color") as (dataset, _): + color_img, color_target = dataset[0] + assert isinstance(color_img, PIL.Image.Image) + assert np.array(color_target).shape[2] == 4 + + def test_feature_types_target_polygon(self): + with self.create_dataset(target_type="polygon") as (dataset, info): + polygon_img, polygon_target = dataset[0] + assert isinstance(polygon_img, PIL.Image.Image) + (polygon_target, info["expected_polygon_target"]) + + def test_transforms_v2_wrapper_spawn(self): + expected_size = (123, 321) + for target_type in ["instance", "semantic", ["instance", "semantic"]]: + with self.create_dataset(target_type=target_type, transform=v2.Resize(size=expected_size)) as (dataset, _): + datasets_utils.check_transforms_v2_wrapper_spawn(dataset, expected_size=expected_size) + + +class ImageNetTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.ImageNet + REQUIRED_PACKAGES = ("scipy",) + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val")) + + def inject_fake_data(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) + + wnid = "n01234567" + if config["split"] == "train": + num_examples = 3 + datasets_utils.create_image_folder( + root=tmpdir, + name=tmpdir / "train" / wnid / wnid, + file_name_fn=lambda image_idx: f"{wnid}_{image_idx}.JPEG", + num_examples=num_examples, + ) + else: + num_examples = 1 + datasets_utils.create_image_folder( + root=tmpdir, + name=tmpdir / "val" / wnid, + file_name_fn=lambda image_ifx: "ILSVRC2012_val_0000000{image_idx}.JPEG", + num_examples=num_examples, + ) + + wnid_to_classes = {wnid: [1]} + torch.save((wnid_to_classes, None), tmpdir / "meta.bin") + return num_examples + + def test_transforms_v2_wrapper_spawn(self): + expected_size = (123, 321) + with self.create_dataset(transform=v2.Resize(size=expected_size)) as (dataset, _): + datasets_utils.check_transforms_v2_wrapper_spawn(dataset, expected_size=expected_size) + + +class CIFAR10TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.CIFAR10 + ADDITIONAL_CONFIGS = combinations_grid(train=(True, False)) + + _VERSION_CONFIG = dict( + base_folder="cifar-10-batches-py", + train_files=tuple(f"data_batch_{idx}" for idx in range(1, 6)), + test_files=("test_batch",), + labels_key="labels", + meta_file="batches.meta", + num_categories=10, + categories_key="label_names", + ) + + def inject_fake_data(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) / self._VERSION_CONFIG["base_folder"] + os.makedirs(tmpdir) + + num_images_per_file = 1 + for name in itertools.chain(self._VERSION_CONFIG["train_files"], self._VERSION_CONFIG["test_files"]): + self._create_batch_file(tmpdir, name, num_images_per_file) + + categories = self._create_meta_file(tmpdir) + + return dict( + num_examples=num_images_per_file + * len(self._VERSION_CONFIG["train_files"] if config["train"] else self._VERSION_CONFIG["test_files"]), + categories=categories, + ) + + def _create_batch_file(self, root, name, num_images): + np_rng = np.random.RandomState(0) + data = datasets_utils.create_image_or_video_tensor((num_images, 32 * 32 * 3)) + labels = np_rng.randint(0, self._VERSION_CONFIG["num_categories"], size=num_images).tolist() + self._create_binary_file(root, name, {"data": data, self._VERSION_CONFIG["labels_key"]: labels}) + + def _create_meta_file(self, root): + categories = [ + f"{idx:0{len(str(self._VERSION_CONFIG['num_categories'] - 1))}d}" + for idx in range(self._VERSION_CONFIG["num_categories"]) + ] + self._create_binary_file( + root, self._VERSION_CONFIG["meta_file"], {self._VERSION_CONFIG["categories_key"]: categories} + ) + return categories + + def _create_binary_file(self, root, name, content): + with open(pathlib.Path(root) / name, "wb") as fh: + pickle.dump(content, fh) + + def test_class_to_idx(self): + with self.create_dataset() as (dataset, info): + expected = {category: label for label, category in enumerate(info["categories"])} + actual = dataset.class_to_idx + assert actual == expected + + +class CIFAR100(CIFAR10TestCase): + DATASET_CLASS = datasets.CIFAR100 + + _VERSION_CONFIG = dict( + base_folder="cifar-100-python", + train_files=("train",), + test_files=("test",), + labels_key="fine_labels", + meta_file="meta", + num_categories=100, + categories_key="fine_label_names", + ) + + +class CelebATestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.CelebA + FEATURE_TYPES = (PIL.Image.Image, (torch.Tensor, int, tuple, type(None))) + + ADDITIONAL_CONFIGS = combinations_grid( + split=("train", "valid", "test", "all"), + target_type=("attr", "identity", "bbox", "landmarks", ["attr", "identity"]), + ) + + _SPLIT_TO_IDX = dict(train=0, valid=1, test=2) + + def inject_fake_data(self, tmpdir, config): + base_folder = pathlib.Path(tmpdir) / "celeba" + os.makedirs(base_folder) + + num_images, num_images_per_split = self._create_split_txt(base_folder) + + datasets_utils.create_image_folder( + base_folder, "img_align_celeba", lambda idx: f"{idx + 1:06d}.jpg", num_images + ) + attr_names = self._create_attr_txt(base_folder, num_images) + self._create_identity_txt(base_folder, num_images) + self._create_bbox_txt(base_folder, num_images) + self._create_landmarks_txt(base_folder, num_images) + + return dict(num_examples=num_images_per_split[config["split"]], attr_names=attr_names) + + def _create_split_txt(self, root): + num_images_per_split = dict(train=4, valid=3, test=2) + + data = [ + [self._SPLIT_TO_IDX[split]] for split, num_images in num_images_per_split.items() for _ in range(num_images) + ] + self._create_txt(root, "list_eval_partition.txt", data) + + num_images_per_split["all"] = num_images = sum(num_images_per_split.values()) + return num_images, num_images_per_split + + def _create_attr_txt(self, root, num_images): + header = ("5_o_Clock_Shadow", "Young") + data = torch.rand((num_images, len(header))).ge(0.5).int().mul(2).sub(1).tolist() + self._create_txt(root, "list_attr_celeba.txt", data, header=header, add_num_examples=True) + return header + + def _create_identity_txt(self, root, num_images): + data = torch.randint(1, 4, size=(num_images, 1)).tolist() + self._create_txt(root, "identity_CelebA.txt", data) + + def _create_bbox_txt(self, root, num_images): + header = ("x_1", "y_1", "width", "height") + data = torch.randint(10, size=(num_images, len(header))).tolist() + self._create_txt( + root, "list_bbox_celeba.txt", data, header=header, add_num_examples=True, add_image_id_to_header=True + ) + + def _create_landmarks_txt(self, root, num_images): + header = ("lefteye_x", "rightmouth_y") + data = torch.randint(10, size=(num_images, len(header))).tolist() + self._create_txt(root, "list_landmarks_align_celeba.txt", data, header=header, add_num_examples=True) + + def _create_txt(self, root, name, data, header=None, add_num_examples=False, add_image_id_to_header=False): + with open(pathlib.Path(root) / name, "w") as fh: + if add_num_examples: + fh.write(f"{len(data)}\n") + + if header: + if add_image_id_to_header: + header = ("image_id", *header) + fh.write(f"{' '.join(header)}\n") + + for idx, line in enumerate(data, 1): + fh.write(f"{' '.join((f'{idx:06d}.jpg', *[str(value) for value in line]))}\n") + + def test_combined_targets(self): + target_types = ["attr", "identity", "bbox", "landmarks"] + + individual_targets = [] + for target_type in target_types: + with self.create_dataset(target_type=target_type) as (dataset, _): + _, target = dataset[0] + individual_targets.append(target) + + with self.create_dataset(target_type=target_types) as (dataset, _): + _, combined_targets = dataset[0] + + actual = len(individual_targets) + expected = len(combined_targets) + assert ( + actual == expected + ), "The number of the returned combined targets does not match the the number targets if requested " + f"individually: {actual} != {expected}", + + for target_type, combined_target, individual_target in zip(target_types, combined_targets, individual_targets): + with self.subTest(target_type=target_type): + actual = type(combined_target) + expected = type(individual_target) + assert ( + actual is expected + ), "Type of the combined target does not match the type of the corresponding individual target: " + f"{actual} is not {expected}", + + def test_no_target(self): + with self.create_dataset(target_type=[]) as (dataset, _): + _, target = dataset[0] + + assert target is None + + def test_attr_names(self): + with self.create_dataset() as (dataset, info): + assert tuple(dataset.attr_names) == info["attr_names"] + + def test_images_names_split(self): + with self.create_dataset(split="all") as (dataset, _): + all_imgs_names = set(dataset.filename) + + merged_imgs_names = set() + for split in ["train", "valid", "test"]: + with self.create_dataset(split=split) as (dataset, _): + merged_imgs_names.update(dataset.filename) + + assert merged_imgs_names == all_imgs_names + + def test_transforms_v2_wrapper_spawn(self): + expected_size = (123, 321) + for target_type in ["identity", "bbox", ["identity", "bbox"]]: + with self.create_dataset(target_type=target_type, transform=v2.Resize(size=expected_size)) as (dataset, _): + datasets_utils.check_transforms_v2_wrapper_spawn(dataset, expected_size=expected_size) + + +class VOCSegmentationTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.VOCSegmentation + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image) + + ADDITIONAL_CONFIGS = ( + *combinations_grid(year=[f"20{year:02d}" for year in range(7, 13)], image_set=("train", "val", "trainval")), + dict(year="2007", image_set="test"), + ) + + def inject_fake_data(self, tmpdir, config): + year, is_test_set = config["year"], config["image_set"] == "test" + image_set = config["image_set"] + + base_dir = pathlib.Path(tmpdir) + if year == "2011": + base_dir /= "TrainVal" + base_dir = base_dir / "VOCdevkit" / f"VOC{year}" + os.makedirs(base_dir) + + num_images, num_images_per_image_set = self._create_image_set_files(base_dir, "ImageSets", is_test_set) + datasets_utils.create_image_folder(base_dir, "JPEGImages", lambda idx: f"{idx:06d}.jpg", num_images) + + datasets_utils.create_image_folder(base_dir, "SegmentationClass", lambda idx: f"{idx:06d}.png", num_images) + annotation = self._create_annotation_files(base_dir, "Annotations", num_images) + + return dict(num_examples=num_images_per_image_set[image_set], annotation=annotation) + + def _create_image_set_files(self, root, name, is_test_set): + root = pathlib.Path(root) / name + src = pathlib.Path(root) / "Main" + os.makedirs(src, exist_ok=True) + + idcs = dict(train=(0, 1, 2), val=(3, 4), test=(5,)) + idcs["trainval"] = (*idcs["train"], *idcs["val"]) + + for image_set in ("test",) if is_test_set else ("train", "val", "trainval"): + self._create_image_set_file(src, image_set, idcs[image_set]) + + shutil.copytree(src, root / "Segmentation") + + num_images = max(itertools.chain(*idcs.values())) + 1 + num_images_per_image_set = {image_set: len(idcs_) for image_set, idcs_ in idcs.items()} + return num_images, num_images_per_image_set + + def _create_image_set_file(self, root, image_set, idcs): + with open(pathlib.Path(root) / f"{image_set}.txt", "w") as fh: + fh.writelines([f"{idx:06d}\n" for idx in idcs]) + + def _create_annotation_files(self, root, name, num_images): + root = pathlib.Path(root) / name + os.makedirs(root) + + for idx in range(num_images): + annotation = self._create_annotation_file(root, f"{idx:06d}.xml") + + return annotation + + def _create_annotation_file(self, root, name): + def add_child(parent, name, text=None): + child = ET.SubElement(parent, name) + child.text = text + return child + + def add_name(obj, name="dog"): + add_child(obj, "name", name) + return name + + def add_bndbox(obj, bndbox=None): + if bndbox is None: + bndbox = {"xmin": "1", "xmax": "2", "ymin": "3", "ymax": "4"} + + obj = add_child(obj, "bndbox") + for name, text in bndbox.items(): + add_child(obj, name, text) + + return bndbox + + annotation = ET.Element("annotation") + obj = add_child(annotation, "object") + data = dict(name=add_name(obj), bndbox=add_bndbox(obj)) + + with open(pathlib.Path(root) / name, "wb") as fh: + fh.write(ET.tostring(annotation)) + + return data + + def test_transforms_v2_wrapper_spawn(self): + expected_size = (123, 321) + with self.create_dataset(transform=v2.Resize(size=expected_size)) as (dataset, _): + datasets_utils.check_transforms_v2_wrapper_spawn(dataset, expected_size=expected_size) + + +class VOCDetectionTestCase(VOCSegmentationTestCase): + DATASET_CLASS = datasets.VOCDetection + FEATURE_TYPES = (PIL.Image.Image, dict) + + def test_annotations(self): + with self.create_dataset() as (dataset, info): + _, target = dataset[0] + + assert "annotation" in target + annotation = target["annotation"] + + assert "object" in annotation + objects = annotation["object"] + + assert len(objects) == 1 + object = objects[0] + + assert object == info["annotation"] + + def test_transforms_v2_wrapper_spawn(self): + expected_size = (123, 321) + with self.create_dataset(transform=v2.Resize(size=expected_size)) as (dataset, _): + datasets_utils.check_transforms_v2_wrapper_spawn(dataset, expected_size=expected_size) + + +class CocoDetectionTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.CocoDetection + FEATURE_TYPES = (PIL.Image.Image, list) + + REQUIRED_PACKAGES = ("pycocotools",) + + _IMAGE_FOLDER = "images" + _ANNOTATIONS_FOLDER = "annotations" + _ANNOTATIONS_FILE = "annotations.json" + + def dataset_args(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) + root = tmpdir / self._IMAGE_FOLDER + annotation_file = tmpdir / self._ANNOTATIONS_FOLDER / self._ANNOTATIONS_FILE + return root, annotation_file + + def inject_fake_data(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) + + num_images = 3 + num_annotations_per_image = 2 + + files = datasets_utils.create_image_folder( + tmpdir, name=self._IMAGE_FOLDER, file_name_fn=lambda idx: f"{idx:012d}.jpg", num_examples=num_images + ) + file_names = [file.relative_to(tmpdir / self._IMAGE_FOLDER) for file in files] + + annotation_folder = tmpdir / self._ANNOTATIONS_FOLDER + os.makedirs(annotation_folder) + + segmentation_kind = config.pop("segmentation_kind", "list") + info = self._create_annotation_file( + annotation_folder, + self._ANNOTATIONS_FILE, + file_names, + num_annotations_per_image, + segmentation_kind=segmentation_kind, + ) + + info["num_examples"] = num_images + return info + + def _create_annotation_file(self, root, name, file_names, num_annotations_per_image, segmentation_kind="list"): + image_ids = [int(file_name.stem) for file_name in file_names] + images = [dict(file_name=str(file_name), id=id) for file_name, id in zip(file_names, image_ids)] + + annotations, info = self._create_annotations(image_ids, num_annotations_per_image, segmentation_kind) + self._create_json(root, name, dict(images=images, annotations=annotations)) + + return info + + def _create_annotations(self, image_ids, num_annotations_per_image, segmentation_kind="list"): + annotations = [] + annotion_id = 0 + + for image_id in itertools.islice(itertools.cycle(image_ids), len(image_ids) * num_annotations_per_image): + segmentation = { + "list": [torch.rand(8).tolist()], + "rle": {"size": [10, 10], "counts": [1]}, + "rle_encoded": {"size": [2400, 2400], "counts": "PQRQ2[1\\Y2f0gNVNRhMg2"}, + "bad": 123, + }[segmentation_kind] + + annotations.append( + dict( + image_id=image_id, + id=annotion_id, + bbox=torch.rand(4).tolist(), + segmentation=segmentation, + category_id=int(torch.randint(91, ())), + area=float(torch.rand(1)), + iscrowd=int(torch.randint(2, size=(1,))), + ) + ) + annotion_id += 1 + return annotations, dict() + + def _create_json(self, root, name, content): + file = pathlib.Path(root) / name + with open(file, "w") as fh: + json.dump(content, fh) + return file + + def test_transforms_v2_wrapper_spawn(self): + expected_size = (123, 321) + with self.create_dataset(transform=v2.Resize(size=expected_size)) as (dataset, _): + datasets_utils.check_transforms_v2_wrapper_spawn(dataset, expected_size=expected_size) + + def test_slice_error(self): + with self.create_dataset() as (dataset, _): + with pytest.raises(ValueError, match="Index must be of type integer"): + dataset[:2] + + def test_segmentation_kind(self): + if isinstance(self, CocoCaptionsTestCase): + return + + for segmentation_kind in ("list", "rle", "rle_encoded"): + config = {"segmentation_kind": segmentation_kind} + with self.create_dataset(config) as (dataset, _): + dataset = datasets.wrap_dataset_for_transforms_v2(dataset, target_keys="all") + list(dataset) + + config = {"segmentation_kind": "bad"} + with self.create_dataset(config) as (dataset, _): + dataset = datasets.wrap_dataset_for_transforms_v2(dataset, target_keys="all") + with pytest.raises(ValueError, match="COCO segmentation expected to be a dict or a list"): + list(dataset) + + +class CocoCaptionsTestCase(CocoDetectionTestCase): + DATASET_CLASS = datasets.CocoCaptions + + def _create_annotations(self, image_ids, num_annotations_per_image, segmentation_kind="list"): + captions = [str(idx) for idx in range(num_annotations_per_image)] + annotations = combinations_grid(image_id=image_ids, caption=captions) + for id, annotation in enumerate(annotations): + annotation["id"] = id + return annotations, dict(captions=captions) + + def test_captions(self): + with self.create_dataset() as (dataset, info): + _, captions = dataset[0] + assert tuple(captions) == tuple(info["captions"]) + + def test_transforms_v2_wrapper_spawn(self): + # We need to define this method, because otherwise the test from the super class will + # be run + pytest.skip("CocoCaptions is currently not supported by the v2 wrapper.") + + +class UCF101TestCase(datasets_utils.VideoDatasetTestCase): + DATASET_CLASS = datasets.UCF101 + + ADDITIONAL_CONFIGS = combinations_grid(fold=(1, 2, 3), train=(True, False)) + + _VIDEO_FOLDER = "videos" + _ANNOTATIONS_FOLDER = "annotations" + + def dataset_args(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) + root = tmpdir / self._VIDEO_FOLDER + annotation_path = tmpdir / self._ANNOTATIONS_FOLDER + return root, annotation_path + + def inject_fake_data(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) + + video_folder = tmpdir / self._VIDEO_FOLDER + os.makedirs(video_folder) + video_files = self._create_videos(video_folder) + + annotations_folder = tmpdir / self._ANNOTATIONS_FOLDER + os.makedirs(annotations_folder) + num_examples = self._create_annotation_files(annotations_folder, video_files, config["fold"], config["train"]) + + return num_examples + + def _create_videos(self, root, num_examples_per_class=3): + def file_name_fn(cls, idx, clips_per_group=2): + return f"v_{cls}_g{(idx // clips_per_group) + 1:02d}_c{(idx % clips_per_group) + 1:02d}.avi" + + video_files = [ + datasets_utils.create_video_folder(root, cls, lambda idx: file_name_fn(cls, idx), num_examples_per_class) + for cls in ("ApplyEyeMakeup", "YoYo") + ] + return [path.relative_to(root) for path in itertools.chain(*video_files)] + + def _create_annotation_files(self, root, video_files, fold, train): + current_videos = random.sample(video_files, random.randrange(1, len(video_files) - 1)) + current_annotation = self._annotation_file_name(fold, train) + self._create_annotation_file(root, current_annotation, current_videos) + + other_videos = set(video_files) - set(current_videos) + other_annotations = [ + self._annotation_file_name(fold, train) for fold, train in itertools.product((1, 2, 3), (True, False)) + ] + other_annotations.remove(current_annotation) + for name in other_annotations: + self._create_annotation_file(root, name, other_videos) + + return len(current_videos) + + def _annotation_file_name(self, fold, train): + return f"{'train' if train else 'test'}list{fold:02d}.txt" + + def _create_annotation_file(self, root, name, video_files): + with open(pathlib.Path(root) / name, "w") as fh: + fh.writelines(f"{str(file).replace(os.sep, '/')}\n" for file in sorted(video_files)) + + +class LSUNTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.LSUN + + REQUIRED_PACKAGES = ("lmdb",) + ADDITIONAL_CONFIGS = combinations_grid(classes=("train", "test", "val", ["bedroom_train", "church_outdoor_train"])) + + _CATEGORIES = ( + "bedroom", + "bridge", + "church_outdoor", + "classroom", + "conference_room", + "dining_room", + "kitchen", + "living_room", + "restaurant", + "tower", + ) + + def inject_fake_data(self, tmpdir, config): + root = pathlib.Path(tmpdir) + + num_images = 0 + for cls in self._parse_classes(config["classes"]): + num_images += self._create_lmdb(root, cls) + + return num_images + + @contextlib.contextmanager + def create_dataset(self, *args, **kwargs): + with super().create_dataset(*args, **kwargs) as output: + yield output + # Currently datasets.LSUN caches the keys in the current directory rather than in the root directory. Thus, + # this creates a number of _cache_* files in the current directory that will not be removed together + # with the temporary directory + for file in os.listdir(os.getcwd()): + if file.startswith("_cache_"): + try: + os.remove(file) + except FileNotFoundError: + # When the same test is run in parallel (in fb internal tests), a thread may remove another + # thread's file. We should be able to remove the try/except when + # https://github.com/pytorch/vision/issues/825 is fixed. + pass + + def _parse_classes(self, classes): + if not isinstance(classes, str): + return classes + + split = classes + if split == "test": + return [split] + + return [f"{category}_{split}" for category in self._CATEGORIES] + + def _create_lmdb(self, root, cls): + lmdb = datasets_utils.lazy_importer.lmdb + hexdigits_lowercase = string.digits + string.ascii_lowercase[:6] + + folder = f"{cls}_lmdb" + + num_images = torch.randint(1, 4, size=()).item() + format = "png" + files = datasets_utils.create_image_folder(root, folder, lambda idx: f"{idx}.{format}", num_images) + + with lmdb.open(str(root / folder)) as env, env.begin(write=True) as txn: + for file in files: + key = "".join(random.choice(hexdigits_lowercase) for _ in range(40)).encode() + + buffer = io.BytesIO() + PIL.Image.open(file).save(buffer, format) + buffer.seek(0) + value = buffer.read() + + txn.put(key, value) + + os.remove(file) + + return num_images + + def test_not_found_or_corrupted(self): + # LSUN does not raise built-in exception, but a custom one. It is expressive enough to not 'cast' it to + # RuntimeError or FileNotFoundError that are normally checked by this test. + with pytest.raises(datasets_utils.lazy_importer.lmdb.Error): + super().test_not_found_or_corrupted() + + +class KineticsTestCase(datasets_utils.VideoDatasetTestCase): + DATASET_CLASS = datasets.Kinetics + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val"), num_classes=("400", "600", "700")) + + def inject_fake_data(self, tmpdir, config): + classes = ("Abseiling", "Zumba") + num_videos_per_class = 2 + tmpdir = pathlib.Path(tmpdir) / config["split"] + digits = string.ascii_letters + string.digits + "-_" + for cls in classes: + datasets_utils.create_video_folder( + tmpdir, + cls, + lambda _: f"{datasets_utils.create_random_string(11, digits)}.mp4", + num_videos_per_class, + ) + return num_videos_per_class * len(classes) + + @pytest.mark.xfail(reason="FIXME") + def test_transforms_v2_wrapper_spawn(self): + expected_size = (123, 321) + with self.create_dataset(output_format="TCHW", transform=v2.Resize(size=expected_size)) as (dataset, _): + datasets_utils.check_transforms_v2_wrapper_spawn(dataset, expected_size=expected_size) + + +class HMDB51TestCase(datasets_utils.VideoDatasetTestCase): + DATASET_CLASS = datasets.HMDB51 + + ADDITIONAL_CONFIGS = combinations_grid(fold=(1, 2, 3), train=(True, False)) + + _VIDEO_FOLDER = "videos" + _SPLITS_FOLDER = "splits" + _CLASSES = ("brush_hair", "wave") + + def dataset_args(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) + root = tmpdir / self._VIDEO_FOLDER + annotation_path = tmpdir / self._SPLITS_FOLDER + return root, annotation_path + + def inject_fake_data(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) + + video_folder = tmpdir / self._VIDEO_FOLDER + os.makedirs(video_folder) + video_files = self._create_videos(video_folder) + + splits_folder = tmpdir / self._SPLITS_FOLDER + os.makedirs(splits_folder) + num_examples = self._create_split_files(splits_folder, video_files, config["fold"], config["train"]) + + return num_examples + + def _create_videos(self, root, num_examples_per_class=3): + def file_name_fn(cls, idx, clips_per_group=2): + return f"{cls}_{(idx // clips_per_group) + 1:d}_{(idx % clips_per_group) + 1:d}.avi" + + return [ + ( + cls, + datasets_utils.create_video_folder( + root, + cls, + lambda idx: file_name_fn(cls, idx), + num_examples_per_class, + ), + ) + for cls in self._CLASSES + ] + + def _create_split_files(self, root, video_files, fold, train): + num_videos = num_train_videos = 0 + + for cls, videos in video_files: + num_videos += len(videos) + + train_videos = set(random.sample(videos, random.randrange(1, len(videos) - 1))) + num_train_videos += len(train_videos) + + with open(pathlib.Path(root) / f"{cls}_test_split{fold}.txt", "w") as fh: + fh.writelines(f"{file.name} {1 if file in train_videos else 2}\n" for file in videos) + + return num_train_videos if train else (num_videos - num_train_videos) + + +class OmniglotTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Omniglot + + ADDITIONAL_CONFIGS = combinations_grid(background=(True, False)) + + def inject_fake_data(self, tmpdir, config): + target_folder = ( + pathlib.Path(tmpdir) / "omniglot-py" / f"images_{'background' if config['background'] else 'evaluation'}" + ) + os.makedirs(target_folder) + + num_images = 0 + for name in ("Alphabet_of_the_Magi", "Tifinagh"): + num_images += self._create_alphabet_folder(target_folder, name) + + return num_images + + def _create_alphabet_folder(self, root, name): + num_images_total = 0 + for idx in range(torch.randint(1, 4, size=()).item()): + num_images = torch.randint(1, 4, size=()).item() + num_images_total += num_images + + datasets_utils.create_image_folder( + root / name, f"character{idx:02d}", lambda image_idx: f"{image_idx:02d}.png", num_images + ) + + return num_images_total + + +class SBUTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.SBU + FEATURE_TYPES = (PIL.Image.Image, str) + + def inject_fake_data(self, tmpdir, config): + num_images = 3 + + dataset_folder = pathlib.Path(tmpdir) / "dataset" + images = datasets_utils.create_image_folder(tmpdir, "dataset", self._create_file_name, num_images) + + self._create_urls_txt(dataset_folder, images) + self._create_captions_txt(dataset_folder, num_images) + + return num_images + + def _create_file_name(self, idx): + part1 = datasets_utils.create_random_string(10, string.digits) + part2 = datasets_utils.create_random_string(10, string.ascii_lowercase, string.digits[:6]) + return f"{part1}_{part2}.jpg" + + def _create_urls_txt(self, root, images): + with open(root / "SBU_captioned_photo_dataset_urls.txt", "w") as fh: + for image in images: + fh.write( + f"http://static.flickr.com/{datasets_utils.create_random_string(4, string.digits)}/{image.name}\n" + ) + + def _create_captions_txt(self, root, num_images): + with open(root / "SBU_captioned_photo_dataset_captions.txt", "w") as fh: + for _ in range(num_images): + fh.write(f"{datasets_utils.create_random_string(10)}\n") + + +class SEMEIONTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.SEMEION + + def inject_fake_data(self, tmpdir, config): + num_images = 3 + + images = torch.rand(num_images, 256) + labels = F.one_hot(torch.randint(10, size=(num_images,))) + with open(pathlib.Path(tmpdir) / "semeion.data", "w") as fh: + for image, one_hot_labels in zip(images, labels): + image_columns = " ".join([f"{pixel.item():.4f}" for pixel in image]) + labels_columns = " ".join([str(label.item()) for label in one_hot_labels]) + fh.write(f"{image_columns} {labels_columns}\n") + + return num_images + + +class USPSTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.USPS + + ADDITIONAL_CONFIGS = combinations_grid(train=(True, False)) + + def inject_fake_data(self, tmpdir, config): + num_images = 2 if config["train"] else 1 + + images = torch.rand(num_images, 256) * 2 - 1 + labels = torch.randint(1, 11, size=(num_images,)) + + with bz2.open(pathlib.Path(tmpdir) / f"usps{'.t' if not config['train'] else ''}.bz2", "w") as fh: + for image, label in zip(images, labels): + line = " ".join((str(label.item()), *[f"{idx}:{pixel:.6f}" for idx, pixel in enumerate(image, 1)])) + fh.write(f"{line}\n".encode()) + + return num_images + + +class SBDatasetTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.SBDataset + FEATURE_TYPES = (PIL.Image.Image, (np.ndarray, PIL.Image.Image)) + + REQUIRED_PACKAGES = ("scipy.io", "scipy.sparse") + + ADDITIONAL_CONFIGS = combinations_grid( + image_set=("train", "val", "train_noval"), mode=("boundaries", "segmentation") + ) + + _NUM_CLASSES = 20 + + def inject_fake_data(self, tmpdir, config): + num_images, num_images_per_image_set = self._create_split_files(tmpdir) + + sizes = self._create_target_folder(tmpdir, "cls", num_images) + + datasets_utils.create_image_folder( + tmpdir, "img", lambda idx: f"{self._file_stem(idx)}.jpg", num_images, size=lambda idx: sizes[idx] + ) + + return num_images_per_image_set[config["image_set"]] + + def _create_split_files(self, root): + root = pathlib.Path(root) + + splits = dict(train=(0, 1, 2), train_noval=(0, 2), val=(3,)) + + for split, idcs in splits.items(): + self._create_split_file(root, split, idcs) + + num_images = max(itertools.chain(*splits.values())) + 1 + num_images_per_split = {split: len(idcs) for split, idcs in splits.items()} + return num_images, num_images_per_split + + def _create_split_file(self, root, name, idcs): + with open(root / f"{name}.txt", "w") as fh: + fh.writelines(f"{self._file_stem(idx)}\n" for idx in idcs) + + def _create_target_folder(self, root, name, num_images): + io = datasets_utils.lazy_importer.scipy.io + + target_folder = pathlib.Path(root) / name + os.makedirs(target_folder) + + sizes = [torch.randint(1, 4, size=(2,)).tolist() for _ in range(num_images)] + for idx, size in enumerate(sizes): + content = dict( + GTcls=dict(Boundaries=self._create_boundaries(size), Segmentation=self._create_segmentation(size)) + ) + io.savemat(target_folder / f"{self._file_stem(idx)}.mat", content) + + return sizes + + def _create_boundaries(self, size): + sparse = datasets_utils.lazy_importer.scipy.sparse + return [ + [sparse.csc_matrix(torch.randint(0, 2, size=size, dtype=torch.uint8).numpy())] + for _ in range(self._NUM_CLASSES) + ] + + def _create_segmentation(self, size): + return torch.randint(0, self._NUM_CLASSES + 1, size=size, dtype=torch.uint8).numpy() + + def _file_stem(self, idx): + return f"2008_{idx:06d}" + + def test_transforms_v2_wrapper_spawn(self): + expected_size = (123, 321) + with self.create_dataset(mode="segmentation", transforms=v2.Resize(size=expected_size)) as (dataset, _): + datasets_utils.check_transforms_v2_wrapper_spawn(dataset, expected_size=expected_size) + + +class FakeDataTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.FakeData + FEATURE_TYPES = (PIL.Image.Image, int) + + def dataset_args(self, tmpdir, config): + return () + + def inject_fake_data(self, tmpdir, config): + return config["size"] + + def test_not_found_or_corrupted(self): + self.skipTest("The data is generated at creation and thus cannot be non-existent or corrupted.") + + +class PhotoTourTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.PhotoTour + + # The PhotoTour dataset returns examples with different features with respect to the 'train' parameter. Thus, + # we overwrite 'FEATURE_TYPES' with a dummy value to satisfy the initial checks of the base class. Furthermore, we + # overwrite the 'test_feature_types()' method to select the correct feature types before the test is run. + FEATURE_TYPES = () + _TRAIN_FEATURE_TYPES = (torch.Tensor,) + _TEST_FEATURE_TYPES = (torch.Tensor, torch.Tensor, torch.Tensor) + + combinations_grid(train=(True, False)) + + _NAME = "liberty" + + def dataset_args(self, tmpdir, config): + return tmpdir, self._NAME + + def inject_fake_data(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) + + # In contrast to the original data, the fake images injected here comprise only a single patch. Thus, + # num_images == num_patches. + num_patches = 5 + + image_files = self._create_images(tmpdir, self._NAME, num_patches) + point_ids, info_file = self._create_info_file(tmpdir / self._NAME, num_patches) + num_matches, matches_file = self._create_matches_file(tmpdir / self._NAME, num_patches, point_ids) + + self._create_archive(tmpdir, self._NAME, *image_files, info_file, matches_file) + + return num_patches if config["train"] else num_matches + + def _create_images(self, root, name, num_images): + # The images in the PhotoTour dataset comprises of multiple grayscale patches of 64 x 64 pixels. Thus, the + # smallest fake image is 64 x 64 pixels and comprises a single patch. + return datasets_utils.create_image_folder( + root, name, lambda idx: f"patches{idx:04d}.bmp", num_images, size=(1, 64, 64) + ) + + def _create_info_file(self, root, num_images): + point_ids = torch.randint(num_images, size=(num_images,)).tolist() + + file = root / "info.txt" + with open(file, "w") as fh: + fh.writelines([f"{point_id} 0\n" for point_id in point_ids]) + + return point_ids, file + + def _create_matches_file(self, root, num_patches, point_ids): + lines = [ + f"{patch_id1} {point_ids[patch_id1]} 0 {patch_id2} {point_ids[patch_id2]} 0\n" + for patch_id1, patch_id2 in itertools.combinations(range(num_patches), 2) + ] + + file = root / "m50_100000_100000_0.txt" + with open(file, "w") as fh: + fh.writelines(lines) + + return len(lines), file + + def _create_archive(self, root, name, *files): + archive = root / f"{name}.zip" + with zipfile.ZipFile(archive, "w") as zip: + for file in files: + zip.write(file, arcname=file.relative_to(root)) + + return archive + + @datasets_utils.test_all_configs + def test_feature_types(self, config): + feature_types = self.FEATURE_TYPES + self.FEATURE_TYPES = self._TRAIN_FEATURE_TYPES if config["train"] else self._TEST_FEATURE_TYPES + try: + super().test_feature_types.__wrapped__(self, config) + finally: + self.FEATURE_TYPES = feature_types + + +class Flickr8kTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Flickr8k + + FEATURE_TYPES = (PIL.Image.Image, list) + + _IMAGES_FOLDER = "images" + _ANNOTATIONS_FILE = "captions.html" + + def dataset_args(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) + root = tmpdir / self._IMAGES_FOLDER + ann_file = tmpdir / self._ANNOTATIONS_FILE + return str(root), str(ann_file) + + def inject_fake_data(self, tmpdir, config): + num_images = 3 + num_captions_per_image = 3 + + tmpdir = pathlib.Path(tmpdir) + + images = self._create_images(tmpdir, self._IMAGES_FOLDER, num_images) + self._create_annotations_file(tmpdir, self._ANNOTATIONS_FILE, images, num_captions_per_image) + + return dict(num_examples=num_images, captions=self._create_captions(num_captions_per_image)) + + def _create_images(self, root, name, num_images): + return datasets_utils.create_image_folder(root, name, self._image_file_name, num_images) + + def _image_file_name(self, idx): + id = datasets_utils.create_random_string(10, string.digits) + checksum = datasets_utils.create_random_string(10, string.digits, string.ascii_lowercase[:6]) + size = datasets_utils.create_random_string(1, "qwcko") + return f"{id}_{checksum}_{size}.jpg" + + def _create_annotations_file(self, root, name, images, num_captions_per_image): + with open(root / name, "w") as fh: + fh.write("") + for image in (None, *images): + self._add_image(fh, image, num_captions_per_image) + fh.write("
") + + def _add_image(self, fh, image, num_captions_per_image): + fh.write("") + self._add_image_header(fh, image) + fh.write("
    ") + self._add_image_captions(fh, num_captions_per_image) + fh.write("
") + + def _add_image_header(self, fh, image=None): + if image: + url = f"http://www.flickr.com/photos/user/{image.name.split('_')[0]}/" + data = f'{url}' + else: + data = "Image Not Found" + fh.write(f"{data}") + + def _add_image_captions(self, fh, num_captions_per_image): + for caption in self._create_captions(num_captions_per_image): + fh.write(f"
  • {caption}") + + def _create_captions(self, num_captions_per_image): + return [str(idx) for idx in range(num_captions_per_image)] + + def test_captions(self): + with self.create_dataset() as (dataset, info): + _, captions = dataset[0] + assert len(captions) == len(info["captions"]) + assert all([a == b for a, b in zip(captions, info["captions"])]) + + +class Flickr30kTestCase(Flickr8kTestCase): + DATASET_CLASS = datasets.Flickr30k + + FEATURE_TYPES = (PIL.Image.Image, list) + + _ANNOTATIONS_FILE = "captions.token" + + def _image_file_name(self, idx): + return f"{idx}.jpg" + + def _create_annotations_file(self, root, name, images, num_captions_per_image): + with open(root / name, "w") as fh: + for image, (idx, caption) in itertools.product( + images, enumerate(self._create_captions(num_captions_per_image)) + ): + fh.write(f"{image.name}#{idx}\t{caption}\n") + + +class MNISTTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.MNIST + + ADDITIONAL_CONFIGS = combinations_grid(train=(True, False)) + + _MAGIC_DTYPES = { + torch.uint8: 8, + torch.int8: 9, + torch.int16: 11, + torch.int32: 12, + torch.float32: 13, + torch.float64: 14, + } + + _IMAGES_SIZE = (28, 28) + _IMAGES_DTYPE = torch.uint8 + + _LABELS_SIZE = () + _LABELS_DTYPE = torch.uint8 + + def inject_fake_data(self, tmpdir, config): + raw_dir = pathlib.Path(tmpdir) / self.DATASET_CLASS.__name__ / "raw" + os.makedirs(raw_dir, exist_ok=True) + + num_images = self._num_images(config) + self._create_binary_file( + raw_dir, self._images_file(config), (num_images, *self._IMAGES_SIZE), self._IMAGES_DTYPE + ) + self._create_binary_file( + raw_dir, self._labels_file(config), (num_images, *self._LABELS_SIZE), self._LABELS_DTYPE + ) + return num_images + + def _num_images(self, config): + return 2 if config["train"] else 1 + + def _images_file(self, config): + return f"{self._prefix(config)}-images-idx3-ubyte" + + def _labels_file(self, config): + return f"{self._prefix(config)}-labels-idx1-ubyte" + + def _prefix(self, config): + return "train" if config["train"] else "t10k" + + def _create_binary_file(self, root, filename, size, dtype): + with open(pathlib.Path(root) / filename, "wb") as fh: + for meta in (self._magic(dtype, len(size)), *size): + fh.write(self._encode(meta)) + + # If ever an MNIST variant is added that uses floating point data, this should be adapted. + data = torch.randint(0, torch.iinfo(dtype).max + 1, size, dtype=dtype) + fh.write(data.numpy().tobytes()) + + def _magic(self, dtype, dims): + return self._MAGIC_DTYPES[dtype] * 256 + dims + + def _encode(self, v): + return torch.tensor(v, dtype=torch.int32).numpy().tobytes()[::-1] + + +class FashionMNISTTestCase(MNISTTestCase): + DATASET_CLASS = datasets.FashionMNIST + + +class KMNISTTestCase(MNISTTestCase): + DATASET_CLASS = datasets.KMNIST + + +class EMNISTTestCase(MNISTTestCase): + DATASET_CLASS = datasets.EMNIST + + DEFAULT_CONFIG = dict(split="byclass") + ADDITIONAL_CONFIGS = combinations_grid( + split=("byclass", "bymerge", "balanced", "letters", "digits", "mnist"), train=(True, False) + ) + + def _prefix(self, config): + return f"emnist-{config['split']}-{'train' if config['train'] else 'test'}" + + +class QMNISTTestCase(MNISTTestCase): + DATASET_CLASS = datasets.QMNIST + + ADDITIONAL_CONFIGS = combinations_grid(what=("train", "test", "test10k", "nist")) + + _LABELS_SIZE = (8,) + _LABELS_DTYPE = torch.int32 + + def _num_images(self, config): + if config["what"] == "nist": + return 3 + elif config["what"] == "train": + return 2 + elif config["what"] == "test50k": + # The split 'test50k' is defined as the last 50k images beginning at index 10000. Thus, we need to create + # more than 10000 images for the dataset to not be empty. Since this takes significantly longer than the + # creation of all other splits, this is excluded from the 'ADDITIONAL_CONFIGS' and is tested only once in + # 'test_num_examples_test50k'. + return 10001 + else: + return 1 + + def _labels_file(self, config): + return f"{self._prefix(config)}-labels-idx2-int" + + def _prefix(self, config): + if config["what"] == "nist": + return "xnist" + + if config["what"] is None: + what = "train" if config["train"] else "test" + elif config["what"].startswith("test"): + what = "test" + else: + what = config["what"] + + return f"qmnist-{what}" + + def test_num_examples_test50k(self): + with self.create_dataset(what="test50k") as (dataset, info): + # Since the split 'test50k' selects all images beginning from the index 10000, we subtract the number of + # created examples by this. + assert len(dataset) == info["num_examples"] - 10000 + + +class MovingMNISTTestCase(datasets_utils.DatasetTestCase): + DATASET_CLASS = datasets.MovingMNIST + FEATURE_TYPES = (torch.Tensor,) + + ADDITIONAL_CONFIGS = combinations_grid(split=(None, "train", "test"), split_ratio=(10, 1, 19)) + + _NUM_FRAMES = 20 + + def inject_fake_data(self, tmpdir, config): + base_folder = os.path.join(tmpdir, self.DATASET_CLASS.__name__) + os.makedirs(base_folder, exist_ok=True) + num_samples = 5 + data = np.concatenate( + [ + np.zeros((config["split_ratio"], num_samples, 64, 64)), + np.ones((self._NUM_FRAMES - config["split_ratio"], num_samples, 64, 64)), + ] + ) + np.save(os.path.join(base_folder, "mnist_test_seq.npy"), data) + return num_samples + + @datasets_utils.test_all_configs + def test_split(self, config): + with self.create_dataset(config) as (dataset, _): + if config["split"] == "train": + assert (dataset.data == 0).all() + elif config["split"] == "test": + assert (dataset.data == 1).all() + else: + assert dataset.data.size()[1] == self._NUM_FRAMES + + +class DatasetFolderTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.DatasetFolder + + _EXTENSIONS = ("jpg", "png") + + # DatasetFolder has two mutually exclusive parameters: 'extensions' and 'is_valid_file'. One of both is required. + # We only iterate over different 'extensions' here and handle the tests for 'is_valid_file' in the + # 'test_is_valid_file()' method. + DEFAULT_CONFIG = dict(extensions=_EXTENSIONS) + ADDITIONAL_CONFIGS = combinations_grid(extensions=[(ext,) for ext in _EXTENSIONS]) + + def dataset_args(self, tmpdir, config): + return tmpdir, datasets.folder.pil_loader + + def inject_fake_data(self, tmpdir, config): + extensions = config["extensions"] or self._is_valid_file_to_extensions(config["is_valid_file"]) + + num_examples_total = 0 + classes = [] + for ext, cls in zip(self._EXTENSIONS, string.ascii_letters): + if ext not in extensions: + continue + + num_examples = torch.randint(1, 3, size=()).item() + datasets_utils.create_image_folder(tmpdir, cls, lambda idx: self._file_name_fn(cls, ext, idx), num_examples) + + num_examples_total += num_examples + classes.append(cls) + + if config.pop("make_empty_class", False): + os.makedirs(pathlib.Path(tmpdir) / "empty_class") + classes.append("empty_class") + + return dict(num_examples=num_examples_total, classes=classes) + + def _file_name_fn(self, cls, ext, idx): + return f"{cls}_{idx}.{ext}" + + def _is_valid_file_to_extensions(self, is_valid_file): + return {ext for ext in self._EXTENSIONS if is_valid_file(f"foo.{ext}")} + + @datasets_utils.test_all_configs + def test_is_valid_file(self, config): + extensions = config.pop("extensions") + # We need to explicitly pass extensions=None here or otherwise it would be filled by the value from the + # DEFAULT_CONFIG. + with self.create_dataset( + config, extensions=None, is_valid_file=lambda file: pathlib.Path(file).suffix[1:] in extensions + ) as (dataset, info): + assert len(dataset) == info["num_examples"] + + @datasets_utils.test_all_configs + def test_classes(self, config): + with self.create_dataset(config) as (dataset, info): + assert len(dataset.classes) == len(info["classes"]) + assert all([a == b for a, b in zip(dataset.classes, info["classes"])]) + + def test_allow_empty(self): + config = { + "extensions": self._EXTENSIONS, + "make_empty_class": True, + } + + config["allow_empty"] = True + with self.create_dataset(config) as (dataset, info): + assert "empty_class" in dataset.classes + assert len(dataset.classes) == len(info["classes"]) + assert all([a == b for a, b in zip(dataset.classes, info["classes"])]) + + config["allow_empty"] = False + with pytest.raises(FileNotFoundError, match="Found no valid file"): + with self.create_dataset(config) as (dataset, info): + pass + + +class ImageFolderTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.ImageFolder + + def inject_fake_data(self, tmpdir, config): + num_examples_total = 0 + classes = ("a", "b") + for cls in classes: + num_examples = torch.randint(1, 3, size=()).item() + num_examples_total += num_examples + + datasets_utils.create_image_folder(tmpdir, cls, lambda idx: f"{cls}_{idx}.png", num_examples) + + return dict(num_examples=num_examples_total, classes=classes) + + @datasets_utils.test_all_configs + def test_classes(self, config): + with self.create_dataset(config) as (dataset, info): + assert len(dataset.classes) == len(info["classes"]) + assert all([a == b for a, b in zip(dataset.classes, info["classes"])]) + + +class KittiTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Kitti + FEATURE_TYPES = (PIL.Image.Image, (list, type(None))) # test split returns None as target + ADDITIONAL_CONFIGS = combinations_grid(train=(True, False)) + + def inject_fake_data(self, tmpdir, config): + kitti_dir = os.path.join(tmpdir, "Kitti", "raw") + os.makedirs(kitti_dir) + + split_to_num_examples = { + True: 1, + False: 2, + } + + # We need to create all folders(training and testing). + for is_training in (True, False): + num_examples = split_to_num_examples[is_training] + + datasets_utils.create_image_folder( + root=kitti_dir, + name=os.path.join("training" if is_training else "testing", "image_2"), + file_name_fn=lambda image_idx: f"{image_idx:06d}.png", + num_examples=num_examples, + ) + if is_training: + for image_idx in range(num_examples): + target_file_dir = os.path.join(kitti_dir, "training", "label_2") + os.makedirs(target_file_dir) + target_file_name = os.path.join(target_file_dir, f"{image_idx:06d}.txt") + target_contents = "Pedestrian 0.00 0 -0.20 712.40 143.00 810.73 307.92 1.89 0.48 1.20 1.84 1.47 8.41 0.01\n" # noqa + with open(target_file_name, "w") as target_file: + target_file.write(target_contents) + + return split_to_num_examples[config["train"]] + + def test_transforms_v2_wrapper_spawn(self): + expected_size = (123, 321) + with self.create_dataset(transform=v2.Resize(size=expected_size)) as (dataset, _): + datasets_utils.check_transforms_v2_wrapper_spawn(dataset, expected_size=expected_size) + + +class SvhnTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.SVHN + REQUIRED_PACKAGES = ("scipy",) + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test", "extra")) + + def inject_fake_data(self, tmpdir, config): + import scipy.io as sio + + split = config["split"] + num_examples = { + "train": 2, + "test": 3, + "extra": 4, + }.get(split) + + file = f"{split}_32x32.mat" + images = np.zeros((32, 32, 3, num_examples), dtype=np.uint8) + targets = np.zeros((num_examples,), dtype=np.uint8) + sio.savemat(os.path.join(tmpdir, file), {"X": images, "y": targets}) + return num_examples + + +class Places365TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Places365 + ADDITIONAL_CONFIGS = combinations_grid( + split=("train-standard", "train-challenge", "val"), + small=(False, True), + ) + _CATEGORIES = "categories_places365.txt" + # {split: file} + _FILE_LISTS = { + "train-standard": "places365_train_standard.txt", + "train-challenge": "places365_train_challenge.txt", + "val": "places365_val.txt", + } + # {(split, small): folder_name} + _IMAGES = { + ("train-standard", False): "data_large_standard", + ("train-challenge", False): "data_large_challenge", + ("val", False): "val_large", + ("train-standard", True): "data_256_standard", + ("train-challenge", True): "data_256_challenge", + ("val", True): "val_256", + } + # (class, idx) + _CATEGORIES_CONTENT = ( + ("/a/airfield", 0), + ("/a/apartment_building/outdoor", 8), + ("/b/badlands", 30), + ) + # (file, idx) + _FILE_LIST_CONTENT = ( + ("Places365_val_00000001.png", 0), + *((f"{category}/Places365_train_00000001.png", idx) for category, idx in _CATEGORIES_CONTENT), + ) + + @staticmethod + def _make_txt(root, name, seq): + file = os.path.join(root, name) + with open(file, "w") as fh: + for text, idx in seq: + fh.write(f"{text} {idx}\n") + + @staticmethod + def _make_categories_txt(root, name): + Places365TestCase._make_txt(root, name, Places365TestCase._CATEGORIES_CONTENT) + + @staticmethod + def _make_file_list_txt(root, name): + Places365TestCase._make_txt(root, name, Places365TestCase._FILE_LIST_CONTENT) + + @staticmethod + def _make_image(file_name, size): + os.makedirs(os.path.dirname(file_name), exist_ok=True) + PIL.Image.fromarray(np.zeros((*size, 3), dtype=np.uint8)).save(file_name) + + @staticmethod + def _make_devkit_archive(root, split): + Places365TestCase._make_categories_txt(root, Places365TestCase._CATEGORIES) + Places365TestCase._make_file_list_txt(root, Places365TestCase._FILE_LISTS[split]) + + @staticmethod + def _make_images_archive(root, split, small): + folder_name = Places365TestCase._IMAGES[(split, small)] + image_size = (256, 256) if small else (512, random.randint(512, 1024)) + files, idcs = zip(*Places365TestCase._FILE_LIST_CONTENT) + images = [f.lstrip("/").replace("/", os.sep) for f in files] + for image in images: + Places365TestCase._make_image(os.path.join(root, folder_name, image), image_size) + + return [(os.path.join(root, folder_name, image), idx) for image, idx in zip(images, idcs)] + + def inject_fake_data(self, tmpdir, config): + self._make_devkit_archive(tmpdir, config["split"]) + return len(self._make_images_archive(tmpdir, config["split"], config["small"])) + + def test_classes(self): + classes = list(map(lambda x: x[0], self._CATEGORIES_CONTENT)) + with self.create_dataset() as (dataset, _): + assert dataset.classes == classes + + def test_class_to_idx(self): + class_to_idx = dict(self._CATEGORIES_CONTENT) + with self.create_dataset() as (dataset, _): + assert dataset.class_to_idx == class_to_idx + + +class INaturalistTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.INaturalist + FEATURE_TYPES = (PIL.Image.Image, (int, tuple)) + + ADDITIONAL_CONFIGS = combinations_grid( + target_type=("kingdom", "full", "genus", ["kingdom", "phylum", "class", "order", "family", "genus", "full"]), + version=("2021_train",), + ) + + def inject_fake_data(self, tmpdir, config): + categories = [ + "00000_Akingdom_0phylum_Aclass_Aorder_Afamily_Agenus_Aspecies", + "00001_Akingdom_1phylum_Aclass_Border_Afamily_Bgenus_Aspecies", + "00002_Akingdom_2phylum_Cclass_Corder_Cfamily_Cgenus_Cspecies", + ] + + num_images_per_category = 3 + for category in categories: + datasets_utils.create_image_folder( + root=os.path.join(tmpdir, config["version"]), + name=category, + file_name_fn=lambda idx: f"image_{idx + 1:04d}.jpg", + num_examples=num_images_per_category, + ) + + return num_images_per_category * len(categories) + + def test_targets(self): + target_types = ["kingdom", "phylum", "class", "order", "family", "genus", "full"] + + with self.create_dataset(target_type=target_types, version="2021_valid") as (dataset, _): + items = [d[1] for d in dataset] + for i, item in enumerate(items): + assert dataset.category_name("kingdom", item[0]) == "Akingdom" + assert dataset.category_name("phylum", item[1]) == f"{i // 3}phylum" + assert item[6] == i // 3 + + +class LFWPeopleTestCase(datasets_utils.DatasetTestCase): + DATASET_CLASS = datasets.LFWPeople + FEATURE_TYPES = (PIL.Image.Image, int) + ADDITIONAL_CONFIGS = combinations_grid( + split=("10fold", "train", "test"), image_set=("original", "funneled", "deepfunneled") + ) + _IMAGES_DIR = {"original": "lfw", "funneled": "lfw_funneled", "deepfunneled": "lfw-deepfunneled"} + _file_id = {"10fold": "", "train": "DevTrain", "test": "DevTest"} + + def inject_fake_data(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) / "lfw-py" + os.makedirs(tmpdir, exist_ok=True) + return dict( + num_examples=self._create_images_dir(tmpdir, self._IMAGES_DIR[config["image_set"]], config["split"]), + split=config["split"], + ) + + def _create_images_dir(self, root, idir, split): + idir = os.path.join(root, idir) + os.makedirs(idir, exist_ok=True) + n, flines = (10, ["10\n"]) if split == "10fold" else (1, []) + num_examples = 0 + names = [] + for _ in range(n): + num_people = random.randint(2, 5) + flines.append(f"{num_people}\n") + for i in range(num_people): + name = self._create_random_id() + no = random.randint(1, 10) + flines.append(f"{name}\t{no}\n") + names.append(f"{name}\t{no}\n") + datasets_utils.create_image_folder(idir, name, lambda n: f"{name}_{n+1:04d}.jpg", no, 250) + num_examples += no + with open(pathlib.Path(root) / f"people{self._file_id[split]}.txt", "w") as f: + f.writelines(flines) + with open(pathlib.Path(root) / "lfw-names.txt", "w") as f: + f.writelines(sorted(names)) + + return num_examples + + def _create_random_id(self): + part1 = datasets_utils.create_random_string(random.randint(5, 7)) + part2 = datasets_utils.create_random_string(random.randint(4, 7)) + return f"{part1}_{part2}" + + +class LFWPairsTestCase(LFWPeopleTestCase): + DATASET_CLASS = datasets.LFWPairs + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, int) + + def _create_images_dir(self, root, idir, split): + idir = os.path.join(root, idir) + os.makedirs(idir, exist_ok=True) + num_pairs = 7 # effectively 7*2*n = 14*n + n, self.flines = (10, [f"10\t{num_pairs}"]) if split == "10fold" else (1, [str(num_pairs)]) + for _ in range(n): + self._inject_pairs(idir, num_pairs, True) + self._inject_pairs(idir, num_pairs, False) + with open(pathlib.Path(root) / f"pairs{self._file_id[split]}.txt", "w") as f: + f.writelines(self.flines) + + return num_pairs * 2 * n + + def _inject_pairs(self, root, num_pairs, same): + for i in range(num_pairs): + name1 = self._create_random_id() + name2 = name1 if same else self._create_random_id() + no1, no2 = random.randint(1, 100), random.randint(1, 100) + if same: + self.flines.append(f"\n{name1}\t{no1}\t{no2}") + else: + self.flines.append(f"\n{name1}\t{no1}\t{name2}\t{no2}") + + datasets_utils.create_image_folder(root, name1, lambda _: f"{name1}_{no1:04d}.jpg", 1, 250) + datasets_utils.create_image_folder(root, name2, lambda _: f"{name2}_{no2:04d}.jpg", 1, 250) + + +class SintelTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Sintel + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test"), pass_name=("clean", "final", "both")) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None))) + + FLOW_H, FLOW_W = 3, 4 + + def inject_fake_data(self, tmpdir, config): + root = pathlib.Path(tmpdir) / "Sintel" + + num_images_per_scene = 3 if config["split"] == "train" else 4 + num_scenes = 2 + + for split_dir in ("training", "test"): + for pass_name in ("clean", "final"): + image_root = root / split_dir / pass_name + + for scene_id in range(num_scenes): + scene_dir = image_root / f"scene_{scene_id}" + datasets_utils.create_image_folder( + image_root, + name=str(scene_dir), + file_name_fn=lambda image_idx: f"frame_000{image_idx}.png", + num_examples=num_images_per_scene, + ) + + flow_root = root / "training" / "flow" + for scene_id in range(num_scenes): + scene_dir = flow_root / f"scene_{scene_id}" + os.makedirs(scene_dir) + for i in range(num_images_per_scene - 1): + file_name = str(scene_dir / f"frame_000{i}.flo") + datasets_utils.make_fake_flo_file(h=self.FLOW_H, w=self.FLOW_W, file_name=file_name) + + # with e.g. num_images_per_scene = 3, for a single scene with have 3 images + # which are frame_0000, frame_0001 and frame_0002 + # They will be consecutively paired as (frame_0000, frame_0001), (frame_0001, frame_0002), + # that is 3 - 1 = 2 examples. Hence the formula below + num_passes = 2 if config["pass_name"] == "both" else 1 + num_examples = (num_images_per_scene - 1) * num_scenes * num_passes + return num_examples + + def test_flow(self): + # Make sure flow exists for train split, and make sure there are as many flow values as (pairs of) images + h, w = self.FLOW_H, self.FLOW_W + expected_flow = np.arange(2 * h * w).reshape(h, w, 2).transpose(2, 0, 1) + with self.create_dataset(split="train") as (dataset, _): + assert dataset._flow_list and len(dataset._flow_list) == len(dataset._image_list) + for _, _, flow in dataset: + assert flow.shape == (2, h, w) + np.testing.assert_allclose(flow, expected_flow) + + # Make sure flow is always None for test split + with self.create_dataset(split="test") as (dataset, _): + assert dataset._image_list and not dataset._flow_list + for _, _, flow in dataset: + assert flow is None + + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass + + with pytest.raises(ValueError, match="Unknown value 'bad' for argument pass_name"): + with self.create_dataset(pass_name="bad"): + pass + + +class KittiFlowTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.KittiFlow + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test")) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + + def inject_fake_data(self, tmpdir, config): + root = pathlib.Path(tmpdir) / "KittiFlow" + + num_examples = 2 if config["split"] == "train" else 3 + for split_dir in ("training", "testing"): + + datasets_utils.create_image_folder( + root / split_dir, + name="image_2", + file_name_fn=lambda image_idx: f"{image_idx}_10.png", + num_examples=num_examples, + ) + datasets_utils.create_image_folder( + root / split_dir, + name="image_2", + file_name_fn=lambda image_idx: f"{image_idx}_11.png", + num_examples=num_examples, + ) + + # For kitti the ground truth flows are encoded as 16-bits pngs. + # create_image_folder() will actually create 8-bits pngs, but it doesn't + # matter much: the flow reader will still be able to read the files, it + # will just be garbage flow value - but we don't care about that here. + datasets_utils.create_image_folder( + root / "training", + name="flow_occ", + file_name_fn=lambda image_idx: f"{image_idx}_10.png", + num_examples=num_examples, + ) + + return num_examples + + def test_flow_and_valid(self): + # Make sure flow exists for train split, and make sure there are as many flow values as (pairs of) images + # Also assert flow and valid are of the expected shape + with self.create_dataset(split="train") as (dataset, _): + assert dataset._flow_list and len(dataset._flow_list) == len(dataset._image_list) + for _, _, flow, valid in dataset: + two, h, w = flow.shape + assert two == 2 + assert valid.shape == (h, w) + + # Make sure flow and valid are always None for test split + with self.create_dataset(split="test") as (dataset, _): + assert dataset._image_list and not dataset._flow_list + for _, _, flow, valid in dataset: + assert flow is None + assert valid is None + + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass + + +class FlyingChairsTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.FlyingChairs + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val")) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None))) + + FLOW_H, FLOW_W = 3, 4 + + def _make_split_file(self, root, num_examples): + # We create a fake split file here, but users are asked to download the real one from the authors website + split_ids = [1] * num_examples["train"] + [2] * num_examples["val"] + random.shuffle(split_ids) + with open(str(root / "FlyingChairs_train_val.txt"), "w+") as split_file: + for split_id in split_ids: + split_file.write(f"{split_id}\n") + + def inject_fake_data(self, tmpdir, config): + root = pathlib.Path(tmpdir) / "FlyingChairs" + + num_examples = {"train": 5, "val": 3} + num_examples_total = sum(num_examples.values()) + + datasets_utils.create_image_folder( # img1 + root, + name="data", + file_name_fn=lambda image_idx: f"00{image_idx}_img1.ppm", + num_examples=num_examples_total, + ) + datasets_utils.create_image_folder( # img2 + root, + name="data", + file_name_fn=lambda image_idx: f"00{image_idx}_img2.ppm", + num_examples=num_examples_total, + ) + for i in range(num_examples_total): + file_name = str(root / "data" / f"00{i}_flow.flo") + datasets_utils.make_fake_flo_file(h=self.FLOW_H, w=self.FLOW_W, file_name=file_name) + + self._make_split_file(root, num_examples) + + return num_examples[config["split"]] + + @datasets_utils.test_all_configs + def test_flow(self, config): + # Make sure flow always exists, and make sure there are as many flow values as (pairs of) images + # Also make sure the flow is properly decoded + + h, w = self.FLOW_H, self.FLOW_W + expected_flow = np.arange(2 * h * w).reshape(h, w, 2).transpose(2, 0, 1) + with self.create_dataset(config=config) as (dataset, _): + assert dataset._flow_list and len(dataset._flow_list) == len(dataset._image_list) + for _, _, flow in dataset: + assert flow.shape == (2, h, w) + np.testing.assert_allclose(flow, expected_flow) + + +class FlyingThings3DTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.FlyingThings3D + ADDITIONAL_CONFIGS = combinations_grid( + split=("train", "test"), pass_name=("clean", "final", "both"), camera=("left", "right", "both") + ) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None))) + + FLOW_H, FLOW_W = 3, 4 + + def inject_fake_data(self, tmpdir, config): + root = pathlib.Path(tmpdir) / "FlyingThings3D" + + num_images_per_camera = 3 if config["split"] == "train" else 4 + passes = ("frames_cleanpass", "frames_finalpass") + splits = ("TRAIN", "TEST") + letters = ("A", "B", "C") + subfolders = ("0000", "0001") + cameras = ("left", "right") + for pass_name, split, letter, subfolder, camera in itertools.product( + passes, splits, letters, subfolders, cameras + ): + current_folder = root / pass_name / split / letter / subfolder + datasets_utils.create_image_folder( + current_folder, + name=camera, + file_name_fn=lambda image_idx: f"00{image_idx}.png", + num_examples=num_images_per_camera, + ) + + directions = ("into_future", "into_past") + for split, letter, subfolder, direction, camera in itertools.product( + splits, letters, subfolders, directions, cameras + ): + current_folder = root / "optical_flow" / split / letter / subfolder / direction / camera + os.makedirs(str(current_folder), exist_ok=True) + for i in range(num_images_per_camera): + datasets_utils.make_fake_pfm_file(self.FLOW_H, self.FLOW_W, file_name=str(current_folder / f"{i}.pfm")) + + num_cameras = 2 if config["camera"] == "both" else 1 + num_passes = 2 if config["pass_name"] == "both" else 1 + num_examples = ( + (num_images_per_camera - 1) * num_cameras * len(subfolders) * len(letters) * len(splits) * num_passes + ) + return num_examples + + @datasets_utils.test_all_configs + def test_flow(self, config): + h, w = self.FLOW_H, self.FLOW_W + expected_flow = np.arange(3 * h * w).reshape(h, w, 3).transpose(2, 0, 1) + expected_flow = np.flip(expected_flow, axis=1) + expected_flow = expected_flow[:2, :, :] + + with self.create_dataset(config=config) as (dataset, _): + assert dataset._flow_list and len(dataset._flow_list) == len(dataset._image_list) + for _, _, flow in dataset: + assert flow.shape == (2, self.FLOW_H, self.FLOW_W) + np.testing.assert_allclose(flow, expected_flow) + + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass + + with pytest.raises(ValueError, match="Unknown value 'bad' for argument pass_name"): + with self.create_dataset(pass_name="bad"): + pass + + with pytest.raises(ValueError, match="Unknown value 'bad' for argument camera"): + with self.create_dataset(camera="bad"): + pass + + +class HD1KTestCase(KittiFlowTestCase): + DATASET_CLASS = datasets.HD1K + + def inject_fake_data(self, tmpdir, config): + root = pathlib.Path(tmpdir) / "hd1k" + + num_sequences = 4 if config["split"] == "train" else 3 + num_examples_per_train_sequence = 3 + + for seq_idx in range(num_sequences): + # Training data + datasets_utils.create_image_folder( + root / "hd1k_input", + name="image_2", + file_name_fn=lambda image_idx: f"{seq_idx:06d}_{image_idx}.png", + num_examples=num_examples_per_train_sequence, + ) + datasets_utils.create_image_folder( + root / "hd1k_flow_gt", + name="flow_occ", + file_name_fn=lambda image_idx: f"{seq_idx:06d}_{image_idx}.png", + num_examples=num_examples_per_train_sequence, + ) + + # Test data + datasets_utils.create_image_folder( + root / "hd1k_challenge", + name="image_2", + file_name_fn=lambda _: f"{seq_idx:06d}_10.png", + num_examples=1, + ) + datasets_utils.create_image_folder( + root / "hd1k_challenge", + name="image_2", + file_name_fn=lambda _: f"{seq_idx:06d}_11.png", + num_examples=1, + ) + + num_examples_per_sequence = num_examples_per_train_sequence if config["split"] == "train" else 2 + return num_sequences * (num_examples_per_sequence - 1) + + +class EuroSATTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.EuroSAT + FEATURE_TYPES = (PIL.Image.Image, int) + + def inject_fake_data(self, tmpdir, config): + data_folder = os.path.join(tmpdir, "eurosat", "2750") + os.makedirs(data_folder) + + num_examples_per_class = 3 + classes = ("AnnualCrop", "Forest") + for cls in classes: + datasets_utils.create_image_folder( + root=data_folder, + name=cls, + file_name_fn=lambda idx: f"{cls}_{idx}.jpg", + num_examples=num_examples_per_class, + ) + + return len(classes) * num_examples_per_class + + +class Food101TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Food101 + FEATURE_TYPES = (PIL.Image.Image, int) + + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test")) + + def inject_fake_data(self, tmpdir: str, config): + root_folder = pathlib.Path(tmpdir) / "food-101" + image_folder = root_folder / "images" + meta_folder = root_folder / "meta" + + image_folder.mkdir(parents=True) + meta_folder.mkdir() + + num_images_per_class = 5 + + metadata = {} + n_samples_per_class = 3 if config["split"] == "train" else 2 + sampled_classes = ("apple_pie", "crab_cakes", "gyoza") + for cls in sampled_classes: + im_fnames = datasets_utils.create_image_folder( + image_folder, + cls, + file_name_fn=lambda idx: f"{idx}.jpg", + num_examples=num_images_per_class, + ) + metadata[cls] = [ + "/".join(fname.relative_to(image_folder).with_suffix("").parts) + for fname in random.choices(im_fnames, k=n_samples_per_class) + ] + + with open(meta_folder / f"{config['split']}.json", "w") as file: + file.write(json.dumps(metadata)) + + return len(sampled_classes * n_samples_per_class) + + +class FGVCAircraftTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.FGVCAircraft + ADDITIONAL_CONFIGS = combinations_grid( + split=("train", "val", "trainval", "test"), annotation_level=("variant", "family", "manufacturer") + ) + + def inject_fake_data(self, tmpdir: str, config): + split = config["split"] + annotation_level = config["annotation_level"] + annotation_level_to_file = { + "variant": "variants.txt", + "family": "families.txt", + "manufacturer": "manufacturers.txt", + } + + root_folder = pathlib.Path(tmpdir) / "fgvc-aircraft-2013b" + data_folder = root_folder / "data" + + classes = ["707-320", "Hawk T1", "Tornado"] + num_images_per_class = 5 + + datasets_utils.create_image_folder( + data_folder, + "images", + file_name_fn=lambda idx: f"{idx}.jpg", + num_examples=num_images_per_class * len(classes), + ) + + annotation_file = data_folder / annotation_level_to_file[annotation_level] + with open(annotation_file, "w") as file: + file.write("\n".join(classes)) + + num_samples_per_class = 4 if split == "trainval" else 2 + images_classes = [] + for i in range(len(classes)): + images_classes.extend( + [ + f"{idx} {classes[i]}" + for idx in random.sample( + range(i * num_images_per_class, (i + 1) * num_images_per_class), num_samples_per_class + ) + ] + ) + + images_annotation_file = data_folder / f"images_{annotation_level}_{split}.txt" + with open(images_annotation_file, "w") as file: + file.write("\n".join(images_classes)) + + return len(classes * num_samples_per_class) + + +class SUN397TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.SUN397 + + def inject_fake_data(self, tmpdir: str, config): + data_dir = pathlib.Path(tmpdir) / "SUN397" + data_dir.mkdir() + + num_images_per_class = 5 + sampled_classes = ("abbey", "airplane_cabin", "airport_terminal") + im_paths = [] + + for cls in sampled_classes: + image_folder = data_dir / cls[0] + im_paths.extend( + datasets_utils.create_image_folder( + image_folder, + image_folder / cls, + file_name_fn=lambda idx: f"sun_{idx}.jpg", + num_examples=num_images_per_class, + ) + ) + + with open(data_dir / "ClassName.txt", "w") as file: + file.writelines("\n".join(f"/{cls[0]}/{cls}" for cls in sampled_classes)) + + num_samples = len(im_paths) + + return num_samples + + +class DTDTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.DTD + FEATURE_TYPES = (PIL.Image.Image, int) + + ADDITIONAL_CONFIGS = combinations_grid( + split=("train", "test", "val"), + # There is no need to test the whole matrix here, since each fold is treated exactly the same + partition=(1, 5, 10), + ) + + def inject_fake_data(self, tmpdir: str, config): + data_folder = pathlib.Path(tmpdir) / "dtd" / "dtd" + + num_images_per_class = 3 + image_folder = data_folder / "images" + image_files = [] + for cls in ("banded", "marbled", "zigzagged"): + image_files.extend( + datasets_utils.create_image_folder( + image_folder, + cls, + file_name_fn=lambda idx: f"{cls}_{idx:04d}.jpg", + num_examples=num_images_per_class, + ) + ) + + meta_folder = data_folder / "labels" + meta_folder.mkdir() + image_ids = [str(path.relative_to(path.parents[1])).replace(os.sep, "/") for path in image_files] + image_ids_in_config = random.choices(image_ids, k=len(image_files) // 2) + with open(meta_folder / f"{config['split']}{config['partition']}.txt", "w") as file: + file.write("\n".join(image_ids_in_config) + "\n") + + return len(image_ids_in_config) + + +class FER2013TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.FER2013 + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test")) + + FEATURE_TYPES = (PIL.Image.Image, (int, type(None))) + + def inject_fake_data(self, tmpdir, config): + base_folder = os.path.join(tmpdir, "fer2013") + os.makedirs(base_folder) + + use_icml = config.pop("use_icml", False) + use_fer = config.pop("use_fer", False) + + num_samples = 5 + + if use_icml or use_fer: + pixels_key, usage_key = (" pixels", " Usage") if use_icml else ("pixels", "Usage") + fieldnames = ("emotion", usage_key, pixels_key) if use_icml else ("emotion", pixels_key, usage_key) + filename = "icml_face_data.csv" if use_icml else "fer2013.csv" + with open(os.path.join(base_folder, filename), "w", newline="") as file: + writer = csv.DictWriter( + file, + fieldnames=fieldnames, + quoting=csv.QUOTE_NONNUMERIC, + quotechar='"', + ) + writer.writeheader() + for i in range(num_samples): + row = { + "emotion": str(int(torch.randint(0, 7, ()))), + usage_key: "Training" if i % 2 else "PublicTest", + pixels_key: " ".join( + str(pixel) + for pixel in datasets_utils.create_image_or_video_tensor((48, 48)).view(-1).tolist() + ), + } + + writer.writerow(row) + else: + with open(os.path.join(base_folder, f"{config['split']}.csv"), "w", newline="") as file: + writer = csv.DictWriter( + file, + fieldnames=("emotion", "pixels") if config["split"] == "train" else ("pixels",), + quoting=csv.QUOTE_NONNUMERIC, + quotechar='"', + ) + writer.writeheader() + for _ in range(num_samples): + row = dict( + pixels=" ".join( + str(pixel) + for pixel in datasets_utils.create_image_or_video_tensor((48, 48)).view(-1).tolist() + ) + ) + if config["split"] == "train": + row["emotion"] = str(int(torch.randint(0, 7, ()))) + + writer.writerow(row) + + return num_samples + + def test_icml_file(self): + config = {"split": "test"} + with self.create_dataset(config=config) as (dataset, _): + assert all(s[1] is None for s in dataset) + + for split in ("train", "test"): + for d in ({"use_icml": True}, {"use_fer": True}): + config = {"split": split, **d} + with self.create_dataset(config=config) as (dataset, _): + assert all(s[1] is not None for s in dataset) + + +class GTSRBTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.GTSRB + FEATURE_TYPES = (PIL.Image.Image, int) + + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test")) + + def inject_fake_data(self, tmpdir: str, config): + root_folder = os.path.join(tmpdir, "gtsrb") + os.makedirs(root_folder, exist_ok=True) + + # Train data + train_folder = os.path.join(root_folder, "GTSRB", "Training") + os.makedirs(train_folder, exist_ok=True) + + num_examples = 3 if config["split"] == "train" else 4 + classes = ("00000", "00042", "00012") + for class_idx in classes: + datasets_utils.create_image_folder( + train_folder, + name=class_idx, + file_name_fn=lambda image_idx: f"{class_idx}_{image_idx:05d}.ppm", + num_examples=num_examples, + ) + + total_number_of_examples = num_examples * len(classes) + # Test data + test_folder = os.path.join(root_folder, "GTSRB", "Final_Test", "Images") + os.makedirs(test_folder, exist_ok=True) + + with open(os.path.join(root_folder, "GT-final_test.csv"), "w") as csv_file: + csv_file.write("Filename;Width;Height;Roi.X1;Roi.Y1;Roi.X2;Roi.Y2;ClassId\n") + + for _ in range(total_number_of_examples): + image_file = datasets_utils.create_random_string(5, string.digits) + ".ppm" + datasets_utils.create_image_file(test_folder, image_file) + row = [ + image_file, + torch.randint(1, 100, size=()).item(), + torch.randint(1, 100, size=()).item(), + torch.randint(1, 100, size=()).item(), + torch.randint(1, 100, size=()).item(), + torch.randint(1, 100, size=()).item(), + torch.randint(1, 100, size=()).item(), + torch.randint(0, 43, size=()).item(), + ] + csv_file.write(";".join(map(str, row)) + "\n") + + return total_number_of_examples + + +class CLEVRClassificationTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.CLEVRClassification + FEATURE_TYPES = (PIL.Image.Image, (int, type(None))) + + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val", "test")) + + def inject_fake_data(self, tmpdir, config): + data_folder = pathlib.Path(tmpdir) / "clevr" / "CLEVR_v1.0" + + images_folder = data_folder / "images" + image_files = datasets_utils.create_image_folder( + images_folder, config["split"], lambda idx: f"CLEVR_{config['split']}_{idx:06d}.png", num_examples=5 + ) + + scenes_folder = data_folder / "scenes" + scenes_folder.mkdir() + if config["split"] != "test": + with open(scenes_folder / f"CLEVR_{config['split']}_scenes.json", "w") as file: + json.dump( + dict( + info=dict(), + scenes=[ + dict(image_filename=image_file.name, objects=[dict()] * int(torch.randint(10, ()))) + for image_file in image_files + ], + ), + file, + ) + + return len(image_files) + + +class OxfordIIITPetTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.OxfordIIITPet + FEATURE_TYPES = (PIL.Image.Image, (int, PIL.Image.Image, tuple, type(None))) + + ADDITIONAL_CONFIGS = combinations_grid( + split=("trainval", "test"), + target_types=("category", "binary-category", "segmentation", ["category", "segmentation"], []), + ) + + def inject_fake_data(self, tmpdir, config): + base_folder = os.path.join(tmpdir, "oxford-iiit-pet") + + classification_anns_meta = ( + dict(cls="Abyssinian", label=0, species="cat"), + dict(cls="Keeshond", label=18, species="dog"), + dict(cls="Yorkshire Terrier", label=37, species="dog"), + ) + split_and_classification_anns = [ + self._meta_to_split_and_classification_ann(meta, idx) + for meta, idx in itertools.product(classification_anns_meta, (1, 2, 10)) + ] + image_ids, *_ = zip(*split_and_classification_anns) + + image_files = datasets_utils.create_image_folder( + base_folder, "images", file_name_fn=lambda idx: f"{image_ids[idx]}.jpg", num_examples=len(image_ids) + ) + + anns_folder = os.path.join(base_folder, "annotations") + os.makedirs(anns_folder) + split_and_classification_anns_in_split = random.choices(split_and_classification_anns, k=len(image_ids) // 2) + with open(os.path.join(anns_folder, f"{config['split']}.txt"), "w", newline="") as file: + writer = csv.writer(file, delimiter=" ") + for split_and_classification_ann in split_and_classification_anns_in_split: + writer.writerow(split_and_classification_ann) + + segmentation_files = datasets_utils.create_image_folder( + anns_folder, "trimaps", file_name_fn=lambda idx: f"{image_ids[idx]}.png", num_examples=len(image_ids) + ) + + # The dataset has some rogue files + for path in image_files[:2]: + path.with_suffix(".mat").touch() + for path in segmentation_files: + path.with_name(f".{path.name}").touch() + + return len(split_and_classification_anns_in_split) + + def _meta_to_split_and_classification_ann(self, meta, idx): + image_id = "_".join( + [ + *[(str.title if meta["species"] == "cat" else str.lower)(part) for part in meta["cls"].split()], + str(idx), + ] + ) + class_id = str(meta["label"] + 1) + species = "1" if meta["species"] == "cat" else "2" + breed_id = "-1" + return (image_id, class_id, species, breed_id) + + def test_transforms_v2_wrapper_spawn(self): + expected_size = (123, 321) + with self.create_dataset(transform=v2.Resize(size=expected_size)) as (dataset, _): + datasets_utils.check_transforms_v2_wrapper_spawn(dataset, expected_size=expected_size) + + +class StanfordCarsTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StanfordCars + REQUIRED_PACKAGES = ("scipy",) + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test")) + + def inject_fake_data(self, tmpdir, config): + import scipy.io as io + from numpy.core.records import fromarrays + + num_examples = {"train": 5, "test": 7}[config["split"]] + num_classes = 3 + base_folder = pathlib.Path(tmpdir) / "stanford_cars" + + devkit = base_folder / "devkit" + devkit.mkdir(parents=True) + + if config["split"] == "train": + images_folder_name = "cars_train" + annotations_mat_path = devkit / "cars_train_annos.mat" + else: + images_folder_name = "cars_test" + annotations_mat_path = base_folder / "cars_test_annos_withlabels.mat" + + datasets_utils.create_image_folder( + root=base_folder, + name=images_folder_name, + file_name_fn=lambda image_index: f"{image_index:5d}.jpg", + num_examples=num_examples, + ) + + classes = np.random.randint(1, num_classes + 1, num_examples, dtype=np.uint8) + fnames = [f"{i:5d}.jpg" for i in range(num_examples)] + rec_array = fromarrays( + [classes, fnames], + names=["class", "fname"], + ) + io.savemat(annotations_mat_path, {"annotations": rec_array}) + + random_class_names = ["random_name"] * num_classes + io.savemat(devkit / "cars_meta.mat", {"class_names": random_class_names}) + + return num_examples + + +class Country211TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Country211 + + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "valid", "test")) + + def inject_fake_data(self, tmpdir: str, config): + split_folder = pathlib.Path(tmpdir) / "country211" / config["split"] + split_folder.mkdir(parents=True, exist_ok=True) + + num_examples = { + "train": 3, + "valid": 4, + "test": 5, + }[config["split"]] + + classes = ("AD", "BS", "GR") + for cls in classes: + datasets_utils.create_image_folder( + split_folder, + name=cls, + file_name_fn=lambda idx: f"{idx}.jpg", + num_examples=num_examples, + ) + + return num_examples * len(classes) + + +class Flowers102TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Flowers102 + + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val", "test")) + REQUIRED_PACKAGES = ("scipy",) + + def inject_fake_data(self, tmpdir: str, config): + base_folder = pathlib.Path(tmpdir) / "flowers-102" + + num_classes = 3 + num_images_per_split = dict(train=5, val=4, test=3) + num_images_total = sum(num_images_per_split.values()) + datasets_utils.create_image_folder( + base_folder, + "jpg", + file_name_fn=lambda idx: f"image_{idx + 1:05d}.jpg", + num_examples=num_images_total, + ) + + label_dict = dict( + labels=np.random.randint(1, num_classes + 1, size=(1, num_images_total), dtype=np.uint8), + ) + datasets_utils.lazy_importer.scipy.io.savemat(str(base_folder / "imagelabels.mat"), label_dict) + + setid_mat = np.arange(1, num_images_total + 1, dtype=np.uint16) + np.random.shuffle(setid_mat) + setid_dict = dict( + trnid=setid_mat[: num_images_per_split["train"]].reshape(1, -1), + valid=setid_mat[num_images_per_split["train"] : -num_images_per_split["test"]].reshape(1, -1), + tstid=setid_mat[-num_images_per_split["test"] :].reshape(1, -1), + ) + datasets_utils.lazy_importer.scipy.io.savemat(str(base_folder / "setid.mat"), setid_dict) + + return num_images_per_split[config["split"]] + + +class PCAMTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.PCAM + + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val", "test")) + REQUIRED_PACKAGES = ("h5py",) + + def inject_fake_data(self, tmpdir: str, config): + base_folder = pathlib.Path(tmpdir) / "pcam" + base_folder.mkdir() + + num_images = {"train": 2, "test": 3, "val": 4}[config["split"]] + + images_file = datasets.PCAM._FILES[config["split"]]["images"][0] + with datasets_utils.lazy_importer.h5py.File(str(base_folder / images_file), "w") as f: + f["x"] = np.random.randint(0, 256, size=(num_images, 10, 10, 3), dtype=np.uint8) + + targets_file = datasets.PCAM._FILES[config["split"]]["targets"][0] + with datasets_utils.lazy_importer.h5py.File(str(base_folder / targets_file), "w") as f: + f["y"] = np.random.randint(0, 2, size=(num_images, 1, 1, 1), dtype=np.uint8) + + return num_images + + +class RenderedSST2TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.RenderedSST2 + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val", "test")) + SPLIT_TO_FOLDER = {"train": "train", "val": "valid", "test": "test"} + + def inject_fake_data(self, tmpdir: str, config): + root_folder = pathlib.Path(tmpdir) / "rendered-sst2" + image_folder = root_folder / self.SPLIT_TO_FOLDER[config["split"]] + + num_images_per_class = {"train": 5, "test": 6, "val": 7} + sampled_classes = ["positive", "negative"] + for cls in sampled_classes: + datasets_utils.create_image_folder( + image_folder, + cls, + file_name_fn=lambda idx: f"{idx}.png", + num_examples=num_images_per_class[config["split"]], + ) + + return len(sampled_classes) * num_images_per_class[config["split"]] + + +class Kitti2012StereoTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Kitti2012Stereo + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test")) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + + def inject_fake_data(self, tmpdir, config): + kitti_dir = pathlib.Path(tmpdir) / "Kitti2012" + os.makedirs(kitti_dir, exist_ok=True) + + split_dir = kitti_dir / (config["split"] + "ing") + os.makedirs(split_dir, exist_ok=True) + + num_examples = {"train": 4, "test": 3}.get(config["split"], 0) + + datasets_utils.create_image_folder( + root=split_dir, + name="colored_0", + file_name_fn=lambda i: f"{i:06d}_10.png", + num_examples=num_examples, + size=(3, 100, 200), + ) + datasets_utils.create_image_folder( + root=split_dir, + name="colored_1", + file_name_fn=lambda i: f"{i:06d}_10.png", + num_examples=num_examples, + size=(3, 100, 200), + ) + + if config["split"] == "train": + datasets_utils.create_image_folder( + root=split_dir, + name="disp_noc", + file_name_fn=lambda i: f"{i:06d}.png", + num_examples=num_examples, + # Kitti2012 uses a single channel image for disparities + size=(1, 100, 200), + ) + + return num_examples + + def test_train_splits(self): + for split in ["train"]: + with self.create_dataset(split=split) as (dataset, _): + for left, right, disparity, mask in dataset: + assert mask is None + datasets_utils.shape_test_for_stereo(left, right, disparity) + + def test_test_split(self): + for split in ["test"]: + with self.create_dataset(split=split) as (dataset, _): + for left, right, disparity, mask in dataset: + assert mask is None + assert disparity is None + datasets_utils.shape_test_for_stereo(left, right) + + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass + + +class Kitti2015StereoTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Kitti2015Stereo + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test")) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + + def inject_fake_data(self, tmpdir, config): + kitti_dir = pathlib.Path(tmpdir) / "Kitti2015" + os.makedirs(kitti_dir, exist_ok=True) + + split_dir = kitti_dir / (config["split"] + "ing") + os.makedirs(split_dir, exist_ok=True) + + num_examples = {"train": 4, "test": 6}.get(config["split"], 0) + + datasets_utils.create_image_folder( + root=split_dir, + name="image_2", + file_name_fn=lambda i: f"{i:06d}_10.png", + num_examples=num_examples, + size=(3, 100, 200), + ) + datasets_utils.create_image_folder( + root=split_dir, + name="image_3", + file_name_fn=lambda i: f"{i:06d}_10.png", + num_examples=num_examples, + size=(3, 100, 200), + ) + + if config["split"] == "train": + datasets_utils.create_image_folder( + root=split_dir, + name="disp_occ_0", + file_name_fn=lambda i: f"{i:06d}.png", + num_examples=num_examples, + # Kitti2015 uses a single channel image for disparities + size=(1, 100, 200), + ) + + datasets_utils.create_image_folder( + root=split_dir, + name="disp_occ_1", + file_name_fn=lambda i: f"{i:06d}.png", + num_examples=num_examples, + # Kitti2015 uses a single channel image for disparities + size=(1, 100, 200), + ) + + return num_examples + + def test_train_splits(self): + for split in ["train"]: + with self.create_dataset(split=split) as (dataset, _): + for left, right, disparity, mask in dataset: + assert mask is None + datasets_utils.shape_test_for_stereo(left, right, disparity) + + def test_test_split(self): + for split in ["test"]: + with self.create_dataset(split=split) as (dataset, _): + for left, right, disparity, mask in dataset: + assert mask is None + assert disparity is None + datasets_utils.shape_test_for_stereo(left, right) + + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass + + +class CarlaStereoTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.CarlaStereo + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, None)) + + @staticmethod + def _create_scene_folders(num_examples: int, root_dir: Union[str, pathlib.Path]): + # make the root_dir if it does not exits + os.makedirs(root_dir, exist_ok=True) + + for i in range(num_examples): + scene_dir = pathlib.Path(root_dir) / f"scene_{i}" + os.makedirs(scene_dir, exist_ok=True) + # populate with left right images + datasets_utils.create_image_file(root=scene_dir, name="im0.png", size=(100, 100)) + datasets_utils.create_image_file(root=scene_dir, name="im1.png", size=(100, 100)) + datasets_utils.make_fake_pfm_file(100, 100, file_name=str(scene_dir / "disp0GT.pfm")) + datasets_utils.make_fake_pfm_file(100, 100, file_name=str(scene_dir / "disp1GT.pfm")) + + def inject_fake_data(self, tmpdir, config): + carla_dir = pathlib.Path(tmpdir) / "carla-highres" + os.makedirs(carla_dir, exist_ok=True) + + split_dir = pathlib.Path(carla_dir) / "trainingF" + os.makedirs(split_dir, exist_ok=True) + + num_examples = 6 + self._create_scene_folders(num_examples=num_examples, root_dir=split_dir) + + return num_examples + + def test_train_splits(self): + with self.create_dataset() as (dataset, _): + for left, right, disparity in dataset: + datasets_utils.shape_test_for_stereo(left, right, disparity) + + +class CREStereoTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.CREStereo + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, np.ndarray, type(None)) + + def inject_fake_data(self, tmpdir, config): + crestereo_dir = pathlib.Path(tmpdir) / "CREStereo" + os.makedirs(crestereo_dir, exist_ok=True) + + examples = {"tree": 2, "shapenet": 3, "reflective": 6, "hole": 5} + + for category_name in ["shapenet", "reflective", "tree", "hole"]: + split_dir = crestereo_dir / category_name + os.makedirs(split_dir, exist_ok=True) + num_examples = examples[category_name] + + for idx in range(num_examples): + datasets_utils.create_image_file(root=split_dir, name=f"{idx}_left.jpg", size=(100, 100)) + datasets_utils.create_image_file(root=split_dir, name=f"{idx}_right.jpg", size=(100, 100)) + # these are going to end up being gray scale images + datasets_utils.create_image_file(root=split_dir, name=f"{idx}_left.disp.png", size=(1, 100, 100)) + datasets_utils.create_image_file(root=split_dir, name=f"{idx}_right.disp.png", size=(1, 100, 100)) + + return sum(examples.values()) + + def test_splits(self): + with self.create_dataset() as (dataset, _): + for left, right, disparity, mask in dataset: + assert mask is None + datasets_utils.shape_test_for_stereo(left, right, disparity) + + +class FallingThingsStereoTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.FallingThingsStereo + ADDITIONAL_CONFIGS = combinations_grid(variant=("single", "mixed", "both")) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None))) + + @staticmethod + def _make_dummy_depth_map(root: str, name: str, size: Tuple[int, int]): + file = pathlib.Path(root) / name + image = np.ones((size[0], size[1]), dtype=np.uint8) + PIL.Image.fromarray(image).save(file) + + @staticmethod + def _make_scene_folder(root: str, scene_name: str, size: Tuple[int, int]) -> None: + root = pathlib.Path(root) / scene_name + os.makedirs(root, exist_ok=True) + # jpg images + datasets_utils.create_image_file(root, "image1.left.jpg", size=(3, size[1], size[0])) + datasets_utils.create_image_file(root, "image1.right.jpg", size=(3, size[1], size[0])) + # single channel depth maps + FallingThingsStereoTestCase._make_dummy_depth_map(root, "image1.left.depth.png", size=(size[0], size[1])) + FallingThingsStereoTestCase._make_dummy_depth_map(root, "image1.right.depth.png", size=(size[0], size[1])) + # camera settings json. Minimal example for _read_disparity function testing + settings_json = {"camera_settings": [{"intrinsic_settings": {"fx": 1}}]} + with open(root / "_camera_settings.json", "w") as f: + json.dump(settings_json, f) + + def inject_fake_data(self, tmpdir, config): + fallingthings_dir = pathlib.Path(tmpdir) / "FallingThings" + os.makedirs(fallingthings_dir, exist_ok=True) + + num_examples = {"single": 2, "mixed": 3, "both": 4}.get(config["variant"], 0) + + variants = { + "single": ["single"], + "mixed": ["mixed"], + "both": ["single", "mixed"], + }.get(config["variant"], []) + + variant_dir_prefixes = { + "single": 1, + "mixed": 0, + } + + for variant_name in variants: + variant_dir = pathlib.Path(fallingthings_dir) / variant_name + os.makedirs(variant_dir, exist_ok=True) + + for i in range(variant_dir_prefixes[variant_name]): + variant_dir = variant_dir / f"{i:02d}" + os.makedirs(variant_dir, exist_ok=True) + + for i in range(num_examples): + self._make_scene_folder( + root=variant_dir, + scene_name=f"scene_{i:06d}", + size=(100, 200), + ) + + if config["variant"] == "both": + num_examples *= 2 + return num_examples + + def test_splits(self): + for variant_name in ["single", "mixed"]: + with self.create_dataset(variant=variant_name) as (dataset, _): + for left, right, disparity in dataset: + datasets_utils.shape_test_for_stereo(left, right, disparity) + + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument variant"): + with self.create_dataset(variant="bad"): + pass + + +class SceneFlowStereoTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.SceneFlowStereo + ADDITIONAL_CONFIGS = combinations_grid( + variant=("FlyingThings3D", "Driving", "Monkaa"), pass_name=("clean", "final", "both") + ) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None))) + + @staticmethod + def _create_pfm_folder( + root: str, name: str, file_name_fn: Callable[..., str], num_examples: int, size: Tuple[int, int] + ) -> None: + root = pathlib.Path(root) / name + os.makedirs(root, exist_ok=True) + + for i in range(num_examples): + datasets_utils.make_fake_pfm_file(size[0], size[1], root / file_name_fn(i)) + + def inject_fake_data(self, tmpdir, config): + scene_flow_dir = pathlib.Path(tmpdir) / "SceneFlow" + os.makedirs(scene_flow_dir, exist_ok=True) + + variant_dir = scene_flow_dir / config["variant"] + variant_dir_prefixes = { + "Monkaa": 0, + "Driving": 2, + "FlyingThings3D": 2, + } + os.makedirs(variant_dir, exist_ok=True) + + num_examples = {"FlyingThings3D": 4, "Driving": 6, "Monkaa": 5}.get(config["variant"], 0) + + passes = { + "clean": ["frames_cleanpass"], + "final": ["frames_finalpass"], + "both": ["frames_cleanpass", "frames_finalpass"], + }.get(config["pass_name"], []) + + for pass_dir_name in passes: + # create pass directories + pass_dir = variant_dir / pass_dir_name + disp_dir = variant_dir / "disparity" + os.makedirs(pass_dir, exist_ok=True) + os.makedirs(disp_dir, exist_ok=True) + + for i in range(variant_dir_prefixes.get(config["variant"], 0)): + pass_dir = pass_dir / str(i) + disp_dir = disp_dir / str(i) + os.makedirs(pass_dir, exist_ok=True) + os.makedirs(disp_dir, exist_ok=True) + + for direction in ["left", "right"]: + for scene_idx in range(num_examples): + os.makedirs(pass_dir / f"scene_{scene_idx:06d}", exist_ok=True) + datasets_utils.create_image_folder( + root=pass_dir / f"scene_{scene_idx:06d}", + name=direction, + file_name_fn=lambda i: f"{i:06d}.png", + num_examples=1, + size=(3, 200, 100), + ) + + os.makedirs(disp_dir / f"scene_{scene_idx:06d}", exist_ok=True) + self._create_pfm_folder( + root=disp_dir / f"scene_{scene_idx:06d}", + name=direction, + file_name_fn=lambda i: f"{i:06d}.pfm", + num_examples=1, + size=(100, 200), + ) + + if config["pass_name"] == "both": + num_examples *= 2 + return num_examples + + def test_splits(self): + for variant_name, pass_name in itertools.product(["FlyingThings3D", "Driving", "Monkaa"], ["clean", "final"]): + with self.create_dataset(variant=variant_name, pass_name=pass_name) as (dataset, _): + for left, right, disparity in dataset: + datasets_utils.shape_test_for_stereo(left, right, disparity) + + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument variant"): + with self.create_dataset(variant="bad"): + pass + + +class InStereo2k(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.InStereo2k + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None))) + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test")) + + @staticmethod + def _make_scene_folder(root: str, name: str, size: Tuple[int, int]): + root = pathlib.Path(root) / name + os.makedirs(root, exist_ok=True) + + datasets_utils.create_image_file(root=root, name="left.png", size=(3, size[0], size[1])) + datasets_utils.create_image_file(root=root, name="right.png", size=(3, size[0], size[1])) + datasets_utils.create_image_file(root=root, name="left_disp.png", size=(1, size[0], size[1])) + datasets_utils.create_image_file(root=root, name="right_disp.png", size=(1, size[0], size[1])) + + def inject_fake_data(self, tmpdir, config): + in_stereo_dir = pathlib.Path(tmpdir) / "InStereo2k" + os.makedirs(in_stereo_dir, exist_ok=True) + + split_dir = pathlib.Path(in_stereo_dir) / config["split"] + os.makedirs(split_dir, exist_ok=True) + + num_examples = {"train": 4, "test": 5}.get(config["split"], 0) + + for i in range(num_examples): + self._make_scene_folder(split_dir, f"scene_{i:06d}", (100, 200)) + + return num_examples + + def test_splits(self): + for split_name in ["train", "test"]: + with self.create_dataset(split=split_name) as (dataset, _): + for left, right, disparity in dataset: + datasets_utils.shape_test_for_stereo(left, right, disparity) + + def test_bad_input(self): + with pytest.raises( + ValueError, match="Unknown value 'bad' for argument split. Valid values are {'train', 'test'}." + ): + with self.create_dataset(split="bad"): + pass + + +class SintelStereoTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.SintelStereo + ADDITIONAL_CONFIGS = combinations_grid(pass_name=("final", "clean", "both")) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + + def inject_fake_data(self, tmpdir, config): + sintel_dir = pathlib.Path(tmpdir) / "Sintel" + os.makedirs(sintel_dir, exist_ok=True) + + split_dir = pathlib.Path(sintel_dir) / "training" + os.makedirs(split_dir, exist_ok=True) + + # a single setting, since there are no splits + num_examples = {"final": 2, "clean": 3} + pass_names = { + "final": ["final"], + "clean": ["clean"], + "both": ["final", "clean"], + }.get(config["pass_name"], []) + + for p in pass_names: + for view in [f"{p}_left", f"{p}_right"]: + root = split_dir / view + os.makedirs(root, exist_ok=True) + + datasets_utils.create_image_folder( + root=root, + name="scene1", + file_name_fn=lambda i: f"{i:06d}.png", + num_examples=num_examples[p], + size=(3, 100, 200), + ) + + datasets_utils.create_image_folder( + root=split_dir / "occlusions", + name="scene1", + file_name_fn=lambda i: f"{i:06d}.png", + num_examples=max(num_examples.values()), + size=(1, 100, 200), + ) + + datasets_utils.create_image_folder( + root=split_dir / "outofframe", + name="scene1", + file_name_fn=lambda i: f"{i:06d}.png", + num_examples=max(num_examples.values()), + size=(1, 100, 200), + ) + + datasets_utils.create_image_folder( + root=split_dir / "disparities", + name="scene1", + file_name_fn=lambda i: f"{i:06d}.png", + num_examples=max(num_examples.values()), + size=(3, 100, 200), + ) + + if config["pass_name"] == "both": + num_examples = sum(num_examples.values()) + else: + num_examples = num_examples.get(config["pass_name"], 0) + + return num_examples + + def test_splits(self): + for pass_name in ["final", "clean", "both"]: + with self.create_dataset(pass_name=pass_name) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + datasets_utils.shape_test_for_stereo(left, right, disparity, valid_mask) + + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument pass_name"): + with self.create_dataset(pass_name="bad"): + pass + + +class ETH3DStereoestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.ETH3DStereo + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test")) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + + @staticmethod + def _create_scene_folder(num_examples: int, root_dir: str): + # make the root_dir if it does not exits + root_dir = pathlib.Path(root_dir) + os.makedirs(root_dir, exist_ok=True) + + for i in range(num_examples): + scene_dir = root_dir / f"scene_{i}" + os.makedirs(scene_dir, exist_ok=True) + # populate with left right images + datasets_utils.create_image_file(root=scene_dir, name="im0.png", size=(100, 100)) + datasets_utils.create_image_file(root=scene_dir, name="im1.png", size=(100, 100)) + + @staticmethod + def _create_annotation_folder(num_examples: int, root_dir: str): + # make the root_dir if it does not exits + root_dir = pathlib.Path(root_dir) + os.makedirs(root_dir, exist_ok=True) + + # create scene directories + for i in range(num_examples): + scene_dir = root_dir / f"scene_{i}" + os.makedirs(scene_dir, exist_ok=True) + # populate with a random png file for occlusion mask, and a pfm file for disparity + datasets_utils.create_image_file(root=scene_dir, name="mask0nocc.png", size=(1, 100, 100)) + + pfm_path = scene_dir / "disp0GT.pfm" + datasets_utils.make_fake_pfm_file(h=100, w=100, file_name=pfm_path) + + def inject_fake_data(self, tmpdir, config): + eth3d_dir = pathlib.Path(tmpdir) / "ETH3D" + + num_examples = 2 if config["split"] == "train" else 3 + + split_name = "two_view_training" if config["split"] == "train" else "two_view_test" + split_dir = eth3d_dir / split_name + self._create_scene_folder(num_examples, split_dir) + + if config["split"] == "train": + annot_dir = eth3d_dir / "two_view_training_gt" + self._create_annotation_folder(num_examples, annot_dir) + + return num_examples + + def test_training_splits(self): + with self.create_dataset(split="train") as (dataset, _): + for left, right, disparity, valid_mask in dataset: + datasets_utils.shape_test_for_stereo(left, right, disparity, valid_mask) + + def test_testing_splits(self): + with self.create_dataset(split="test") as (dataset, _): + assert all(d == (None, None) for d in dataset._disparities) + for left, right, disparity, valid_mask in dataset: + assert valid_mask is None + datasets_utils.shape_test_for_stereo(left, right, disparity) + + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass + + +class Middlebury2014StereoTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Middlebury2014Stereo + ADDITIONAL_CONFIGS = combinations_grid( + split=("train", "additional"), + calibration=("perfect", "imperfect", "both"), + use_ambient_views=(True, False), + ) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + + @staticmethod + def _make_scene_folder(root_dir: str, scene_name: str, split: str) -> None: + calibrations = [None] if split == "test" else ["-perfect", "-imperfect"] + root_dir = pathlib.Path(root_dir) + + for c in calibrations: + scene_dir = root_dir / f"{scene_name}{c}" + os.makedirs(scene_dir, exist_ok=True) + # make normal images first + datasets_utils.create_image_file(root=scene_dir, name="im0.png", size=(3, 100, 100)) + datasets_utils.create_image_file(root=scene_dir, name="im1.png", size=(3, 100, 100)) + datasets_utils.create_image_file(root=scene_dir, name="im1E.png", size=(3, 100, 100)) + datasets_utils.create_image_file(root=scene_dir, name="im1L.png", size=(3, 100, 100)) + # these are going to end up being gray scale images + datasets_utils.make_fake_pfm_file(h=100, w=100, file_name=scene_dir / "disp0.pfm") + datasets_utils.make_fake_pfm_file(h=100, w=100, file_name=scene_dir / "disp1.pfm") + + def inject_fake_data(self, tmpdir, config): + split_scene_map = { + "train": ["Adirondack", "Jadeplant", "Motorcycle", "Piano"], + "additional": ["Backpack", "Bicycle1", "Cable", "Classroom1"], + "test": ["Plants", "Classroom2E", "Classroom2", "Australia"], + } + + middlebury_dir = pathlib.Path(tmpdir, "Middlebury2014") + os.makedirs(middlebury_dir, exist_ok=True) + + split_dir = middlebury_dir / config["split"] + os.makedirs(split_dir, exist_ok=True) + + num_examples = {"train": 2, "additional": 3, "test": 4}.get(config["split"], 0) + for idx in range(num_examples): + scene_name = split_scene_map[config["split"]][idx] + self._make_scene_folder(root_dir=split_dir, scene_name=scene_name, split=config["split"]) + + if config["calibration"] == "both": + num_examples *= 2 + return num_examples + + def test_train_splits(self): + for split, calibration in itertools.product(["train", "additional"], ["perfect", "imperfect", "both"]): + with self.create_dataset(split=split, calibration=calibration) as (dataset, _): + for left, right, disparity, mask in dataset: + datasets_utils.shape_test_for_stereo(left, right, disparity, mask) + + def test_test_split(self): + for split in ["test"]: + with self.create_dataset(split=split, calibration=None) as (dataset, _): + for left, right, disparity, mask in dataset: + datasets_utils.shape_test_for_stereo(left, right) + + def test_augmented_view_usage(self): + with self.create_dataset(split="train", use_ambient_views=True) as (dataset, _): + for left, right, disparity, mask in dataset: + datasets_utils.shape_test_for_stereo(left, right, disparity, mask) + + def test_value_err_train(self): + # train set invalid + split = "train" + calibration = None + with pytest.raises( + ValueError, + match=f"Split '{split}' has calibration settings, however None was provided as an argument." + f"\nSetting calibration to 'perfect' for split '{split}'. Available calibration settings are: 'perfect', 'imperfect', 'both'.", + ): + with self.create_dataset(split=split, calibration=calibration): + pass + + def test_value_err_test(self): + # test set invalid + split = "test" + calibration = "perfect" + with pytest.raises( + ValueError, match="Split 'test' has only no calibration settings, please set `calibration=None`." + ): + with self.create_dataset(split=split, calibration=calibration): + pass + + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass + + +class ImagenetteTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Imagenette + ADDITIONAL_CONFIGS = combinations_grid(split=["train", "val"], size=["full", "320px", "160px"]) + + _WNIDS = [ + "n01440764", + "n02102040", + "n02979186", + "n03000684", + "n03028079", + "n03394916", + "n03417042", + "n03425413", + "n03445777", + "n03888257", + ] + + def inject_fake_data(self, tmpdir, config): + archive_root = "imagenette2" + if config["size"] != "full": + archive_root += f"-{config['size'].replace('px', '')}" + image_root = pathlib.Path(tmpdir) / archive_root / config["split"] + + num_images_per_class = 3 + for wnid in self._WNIDS: + datasets_utils.create_image_folder( + root=image_root, + name=wnid, + file_name_fn=lambda idx: f"{wnid}_{idx}.JPEG", + num_examples=num_images_per_class, + ) + + return num_images_per_class * len(self._WNIDS) + + +class TestDatasetWrapper: + def test_unknown_type(self): + unknown_object = object() + with pytest.raises( + TypeError, match=re.escape("is meant for subclasses of `torchvision.datasets.VisionDataset`") + ): + datasets.wrap_dataset_for_transforms_v2(unknown_object) + + def test_unknown_dataset(self): + class MyVisionDataset(datasets.VisionDataset): + pass + + dataset = MyVisionDataset("root") + + with pytest.raises(TypeError, match="No wrapper exist"): + datasets.wrap_dataset_for_transforms_v2(dataset) + + def test_missing_wrapper(self): + dataset = datasets.FakeData() + + with pytest.raises(TypeError, match="please open an issue"): + datasets.wrap_dataset_for_transforms_v2(dataset) + + def test_subclass(self, mocker): + from torchvision import tv_tensors + + sentinel = object() + mocker.patch.dict( + tv_tensors._dataset_wrapper.WRAPPER_FACTORIES, + clear=False, + values={datasets.FakeData: lambda dataset, target_keys: lambda idx, sample: sentinel}, + ) + + class MyFakeData(datasets.FakeData): + pass + + dataset = MyFakeData() + wrapped_dataset = datasets.wrap_dataset_for_transforms_v2(dataset) + + assert wrapped_dataset[0] is sentinel + + +if __name__ == "__main__": unittest.main() diff --git a/test/test_datasets_download.py b/test/test_datasets_download.py new file mode 100644 index 00000000000..856a02b9d44 --- /dev/null +++ b/test/test_datasets_download.py @@ -0,0 +1,388 @@ +import contextlib +import itertools +import shutil +import tempfile +import time +import traceback +import unittest.mock +import warnings +from datetime import datetime +from os import path +from urllib.error import HTTPError, URLError +from urllib.parse import urlparse +from urllib.request import Request, urlopen + +import pytest +from torchvision import datasets +from torchvision.datasets.utils import _get_redirect_url, USER_AGENT + + +def limit_requests_per_time(min_secs_between_requests=2.0): + last_requests = {} + + def outer_wrapper(fn): + def inner_wrapper(request, *args, **kwargs): + url = request.full_url if isinstance(request, Request) else request + + netloc = urlparse(url).netloc + last_request = last_requests.get(netloc) + if last_request is not None: + elapsed_secs = (datetime.now() - last_request).total_seconds() + delta = min_secs_between_requests - elapsed_secs + if delta > 0: + time.sleep(delta) + + response = fn(request, *args, **kwargs) + last_requests[netloc] = datetime.now() + + return response + + return inner_wrapper + + return outer_wrapper + + +urlopen = limit_requests_per_time()(urlopen) + + +def resolve_redirects(max_hops=3): + def outer_wrapper(fn): + def inner_wrapper(request, *args, **kwargs): + initial_url = request.full_url if isinstance(request, Request) else request + url = _get_redirect_url(initial_url, max_hops=max_hops) + + if url == initial_url: + return fn(request, *args, **kwargs) + + warnings.warn(f"The URL {initial_url} ultimately redirects to {url}.") + + if not isinstance(request, Request): + return fn(url, *args, **kwargs) + + request_attrs = { + attr: getattr(request, attr) for attr in ("data", "headers", "origin_req_host", "unverifiable") + } + # the 'method' attribute does only exist if the request was created with it + if hasattr(request, "method"): + request_attrs["method"] = request.method + + return fn(Request(url, **request_attrs), *args, **kwargs) + + return inner_wrapper + + return outer_wrapper + + +urlopen = resolve_redirects()(urlopen) + + +@contextlib.contextmanager +def log_download_attempts( + urls, + *, + dataset_module, +): + def maybe_add_mock(*, module, name, stack, lst=None): + patcher = unittest.mock.patch(f"torchvision.datasets.{module}.{name}") + + try: + mock = stack.enter_context(patcher) + except AttributeError: + return + + if lst is not None: + lst.append(mock) + + with contextlib.ExitStack() as stack: + download_url_mocks = [] + download_file_from_google_drive_mocks = [] + for module in [dataset_module, "utils"]: + maybe_add_mock(module=module, name="download_url", stack=stack, lst=download_url_mocks) + maybe_add_mock( + module=module, + name="download_file_from_google_drive", + stack=stack, + lst=download_file_from_google_drive_mocks, + ) + maybe_add_mock(module=module, name="extract_archive", stack=stack) + + try: + yield + finally: + for download_url_mock in download_url_mocks: + for args, kwargs in download_url_mock.call_args_list: + urls.append(args[0] if args else kwargs["url"]) + + for download_file_from_google_drive_mock in download_file_from_google_drive_mocks: + for args, kwargs in download_file_from_google_drive_mock.call_args_list: + file_id = args[0] if args else kwargs["file_id"] + urls.append(f"https://drive.google.com/file/d/{file_id}") + + +def retry(fn, times=1, wait=5.0): + tbs = [] + for _ in range(times + 1): + try: + return fn() + except AssertionError as error: + tbs.append("".join(traceback.format_exception(type(error), error, error.__traceback__))) + time.sleep(wait) + else: + raise AssertionError( + "\n".join( + ( + "\n", + *[f"{'_' * 40} {idx:2d} {'_' * 40}\n\n{tb}" for idx, tb in enumerate(tbs, 1)], + ( + f"Assertion failed {times + 1} times with {wait:.1f} seconds intermediate wait time. " + f"You can find the the full tracebacks above." + ), + ) + ) + ) + + +@contextlib.contextmanager +def assert_server_response_ok(): + try: + yield + except HTTPError as error: + raise AssertionError(f"The server returned {error.code}: {error.reason}.") from error + except URLError as error: + raise AssertionError( + "Connection not possible due to SSL." if "SSL" in str(error) else "The request timed out." + ) from error + except RecursionError as error: + raise AssertionError(str(error)) from error + + +def assert_url_is_accessible(url, timeout=5.0): + request = Request(url, headers={"User-Agent": USER_AGENT}, method="HEAD") + with assert_server_response_ok(): + urlopen(request, timeout=timeout) + + +def collect_urls(dataset_cls, *args, **kwargs): + urls = [] + with contextlib.suppress(Exception), log_download_attempts( + urls, dataset_module=dataset_cls.__module__.split(".")[-1] + ): + dataset_cls(*args, **kwargs) + + return [(url, f"{dataset_cls.__name__}, {url}") for url in urls] + + +# This is a workaround since fixtures, such as the built-in tmp_dir, can only be used within a test but not within a +# parametrization. Thus, we use a single root directory for all datasets and remove it when all download tests are run. +ROOT = tempfile.mkdtemp() + + +@pytest.fixture(scope="module", autouse=True) +def root(): + yield ROOT + shutil.rmtree(ROOT) + + +def places365(): + return itertools.chain.from_iterable( + [ + collect_urls( + datasets.Places365, + ROOT, + split=split, + small=small, + download=True, + ) + for split, small in itertools.product(("train-standard", "train-challenge", "val"), (False, True)) + ] + ) + + +def caltech101(): + return collect_urls(datasets.Caltech101, ROOT, download=True) + + +def caltech256(): + return collect_urls(datasets.Caltech256, ROOT, download=True) + + +def cifar10(): + return collect_urls(datasets.CIFAR10, ROOT, download=True) + + +def cifar100(): + return collect_urls(datasets.CIFAR100, ROOT, download=True) + + +def voc(): + # TODO: Also test the "2007-test" key + return itertools.chain.from_iterable( + [ + collect_urls(datasets.VOCSegmentation, ROOT, year=year, download=True) + for year in ("2007", "2008", "2009", "2010", "2011", "2012") + ] + ) + + +def mnist(): + with unittest.mock.patch.object(datasets.MNIST, "mirrors", datasets.MNIST.mirrors[-1:]): + return collect_urls(datasets.MNIST, ROOT, download=True) + + +def fashion_mnist(): + return collect_urls(datasets.FashionMNIST, ROOT, download=True) + + +def kmnist(): + return collect_urls(datasets.KMNIST, ROOT, download=True) + + +def emnist(): + # the 'split' argument can be any valid one, since everything is downloaded anyway + return collect_urls(datasets.EMNIST, ROOT, split="byclass", download=True) + + +def qmnist(): + return itertools.chain.from_iterable( + [collect_urls(datasets.QMNIST, ROOT, what=what, download=True) for what in ("train", "test", "nist")] + ) + + +def moving_mnist(): + return collect_urls(datasets.MovingMNIST, ROOT, download=True) + + +def omniglot(): + return itertools.chain.from_iterable( + [collect_urls(datasets.Omniglot, ROOT, background=background, download=True) for background in (True, False)] + ) + + +def phototour(): + return itertools.chain.from_iterable( + [ + collect_urls(datasets.PhotoTour, ROOT, name=name, download=True) + # The names postfixed with '_harris' point to the domain 'matthewalunbrown.com'. For some reason all + # requests timeout from within CI. They are disabled until this is resolved. + for name in ("notredame", "yosemite", "liberty") # "notredame_harris", "yosemite_harris", "liberty_harris" + ] + ) + + +def sbdataset(): + return collect_urls(datasets.SBDataset, ROOT, download=True) + + +def sbu(): + return collect_urls(datasets.SBU, ROOT, download=True) + + +def semeion(): + return collect_urls(datasets.SEMEION, ROOT, download=True) + + +def stl10(): + return collect_urls(datasets.STL10, ROOT, download=True) + + +def svhn(): + return itertools.chain.from_iterable( + [collect_urls(datasets.SVHN, ROOT, split=split, download=True) for split in ("train", "test", "extra")] + ) + + +def usps(): + return itertools.chain.from_iterable( + [collect_urls(datasets.USPS, ROOT, train=train, download=True) for train in (True, False)] + ) + + +def celeba(): + return collect_urls(datasets.CelebA, ROOT, download=True) + + +def widerface(): + return collect_urls(datasets.WIDERFace, ROOT, download=True) + + +def kinetics(): + return itertools.chain.from_iterable( + [ + collect_urls( + datasets.Kinetics, + path.join(ROOT, f"Kinetics{num_classes}"), + frames_per_clip=1, + num_classes=num_classes, + split=split, + download=True, + ) + for num_classes, split in itertools.product(("400", "600", "700"), ("train", "val")) + ] + ) + + +def kitti(): + return itertools.chain.from_iterable( + [collect_urls(datasets.Kitti, ROOT, train=train, download=True) for train in (True, False)] + ) + + +def url_parametrization(*dataset_urls_and_ids_fns): + return pytest.mark.parametrize( + "url", + [ + pytest.param(url, id=id) + for dataset_urls_and_ids_fn in dataset_urls_and_ids_fns + for url, id in sorted(set(dataset_urls_and_ids_fn())) + ], + ) + + +@url_parametrization( + caltech101, + caltech256, + cifar10, + cifar100, + # The VOC download server is unstable. See https://github.com/pytorch/vision/issues/2953 for details. + # voc, + mnist, + fashion_mnist, + kmnist, + emnist, + qmnist, + omniglot, + phototour, + sbdataset, + semeion, + stl10, + svhn, + usps, + celeba, + widerface, + kinetics, + kitti, + places365, + sbu, +) +def test_url_is_accessible(url): + """ + If you see this test failing, find the offending dataset in the parametrization and move it to + ``test_url_is_not_accessible`` and link an issue detailing the problem. + """ + retry(lambda: assert_url_is_accessible(url)) + + +# TODO: if e.g. caltech101 starts failing, remove the pytest.mark.parametrize below and use +# @url_parametrization(caltech101) +@pytest.mark.parametrize("url", ("http://url_that_doesnt_exist.com",)) # here until we actually have a failing dataset +@pytest.mark.xfail +def test_url_is_not_accessible(url): + """ + As the name implies, this test is the 'inverse' of ``test_url_is_accessible``. Since the download servers are + beyond our control, some files might not be accessible for longer stretches of time. Still, we want to know if they + come back up, or if we need to remove the download functionality of the dataset for good. + + If you see this test failing, find the offending dataset in the parametrization and move it to + ``test_url_is_accessible``. + """ + assert_url_is_accessible(url) diff --git a/test/test_datasets_samplers.py b/test/test_datasets_samplers.py index 90f3f3806aa..9e3826b2c13 100644 --- a/test/test_datasets_samplers.py +++ b/test/test_datasets_samplers.py @@ -1,117 +1,86 @@ -import contextlib -import sys -import os +import pytest import torch -import unittest - +from common_utils import assert_equal, get_list_of_videos from torchvision import io -from torchvision.datasets.samplers import ( - DistributedSampler, - RandomClipSampler, - UniformClipSampler, -) -from torchvision.datasets.video_utils import VideoClips, unfold -from torchvision import get_video_backend - -from common_utils import get_tmp_dir - - -@contextlib.contextmanager -def get_list_of_videos(num_videos=5, sizes=None, fps=None): - with get_tmp_dir() as tmp_dir: - names = [] - for i in range(num_videos): - if sizes is None: - size = 5 * (i + 1) - else: - size = sizes[i] - if fps is None: - f = 5 - else: - f = fps[i] - data = torch.randint(0, 255, (size, 300, 400, 3), dtype=torch.uint8) - name = os.path.join(tmp_dir, "{}.mp4".format(i)) - names.append(name) - io.write_video(name, data, fps=f) - - yield names +from torchvision.datasets.samplers import DistributedSampler, RandomClipSampler, UniformClipSampler +from torchvision.datasets.video_utils import VideoClips -@unittest.skipIf(not io.video._av_available(), "this test requires av") -class Tester(unittest.TestCase): - def test_random_clip_sampler(self): - with get_list_of_videos(num_videos=3, sizes=[25, 25, 25]) as video_list: - video_clips = VideoClips(video_list, 5, 5) - sampler = RandomClipSampler(video_clips, 3) - self.assertEqual(len(sampler), 3 * 3) - indices = torch.tensor(list(iter(sampler))) - videos = indices // 5 - v_idxs, count = torch.unique(videos, return_counts=True) - self.assertTrue(v_idxs.equal(torch.tensor([0, 1, 2]))) - self.assertTrue(count.equal(torch.tensor([3, 3, 3]))) +@pytest.mark.skipif(not io.video._av_available(), reason="this test requires av") +class TestDatasetsSamplers: + def test_random_clip_sampler(self, tmpdir): + video_list = get_list_of_videos(tmpdir, num_videos=3, sizes=[25, 25, 25]) + video_clips = VideoClips(video_list, 5, 5) + sampler = RandomClipSampler(video_clips, 3) + assert len(sampler) == 3 * 3 + indices = torch.tensor(list(iter(sampler))) + videos = torch.div(indices, 5, rounding_mode="floor") + v_idxs, count = torch.unique(videos, return_counts=True) + assert_equal(v_idxs, torch.tensor([0, 1, 2])) + assert_equal(count, torch.tensor([3, 3, 3])) - def test_random_clip_sampler_unequal(self): - with get_list_of_videos(num_videos=3, sizes=[10, 25, 25]) as video_list: - video_clips = VideoClips(video_list, 5, 5) - sampler = RandomClipSampler(video_clips, 3) - self.assertEqual(len(sampler), 2 + 3 + 3) - indices = list(iter(sampler)) - self.assertIn(0, indices) - self.assertIn(1, indices) - # remove elements of the first video, to simplify testing - indices.remove(0) - indices.remove(1) - indices = torch.tensor(indices) - 2 - videos = indices // 5 - v_idxs, count = torch.unique(videos, return_counts=True) - self.assertTrue(v_idxs.equal(torch.tensor([0, 1]))) - self.assertTrue(count.equal(torch.tensor([3, 3]))) + def test_random_clip_sampler_unequal(self, tmpdir): + video_list = get_list_of_videos(tmpdir, num_videos=3, sizes=[10, 25, 25]) + video_clips = VideoClips(video_list, 5, 5) + sampler = RandomClipSampler(video_clips, 3) + assert len(sampler) == 2 + 3 + 3 + indices = list(iter(sampler)) + assert 0 in indices + assert 1 in indices + # remove elements of the first video, to simplify testing + indices.remove(0) + indices.remove(1) + indices = torch.tensor(indices) - 2 + videos = torch.div(indices, 5, rounding_mode="floor") + v_idxs, count = torch.unique(videos, return_counts=True) + assert_equal(v_idxs, torch.tensor([0, 1])) + assert_equal(count, torch.tensor([3, 3])) - def test_uniform_clip_sampler(self): - with get_list_of_videos(num_videos=3, sizes=[25, 25, 25]) as video_list: - video_clips = VideoClips(video_list, 5, 5) - sampler = UniformClipSampler(video_clips, 3) - self.assertEqual(len(sampler), 3 * 3) - indices = torch.tensor(list(iter(sampler))) - videos = indices // 5 - v_idxs, count = torch.unique(videos, return_counts=True) - self.assertTrue(v_idxs.equal(torch.tensor([0, 1, 2]))) - self.assertTrue(count.equal(torch.tensor([3, 3, 3]))) - self.assertTrue(indices.equal(torch.tensor([0, 2, 4, 5, 7, 9, 10, 12, 14]))) + def test_uniform_clip_sampler(self, tmpdir): + video_list = get_list_of_videos(tmpdir, num_videos=3, sizes=[25, 25, 25]) + video_clips = VideoClips(video_list, 5, 5) + sampler = UniformClipSampler(video_clips, 3) + assert len(sampler) == 3 * 3 + indices = torch.tensor(list(iter(sampler))) + videos = torch.div(indices, 5, rounding_mode="floor") + v_idxs, count = torch.unique(videos, return_counts=True) + assert_equal(v_idxs, torch.tensor([0, 1, 2])) + assert_equal(count, torch.tensor([3, 3, 3])) + assert_equal(indices, torch.tensor([0, 2, 4, 5, 7, 9, 10, 12, 14])) - def test_uniform_clip_sampler_insufficient_clips(self): - with get_list_of_videos(num_videos=3, sizes=[10, 25, 25]) as video_list: - video_clips = VideoClips(video_list, 5, 5) - sampler = UniformClipSampler(video_clips, 3) - self.assertEqual(len(sampler), 3 * 3) - indices = torch.tensor(list(iter(sampler))) - self.assertTrue(indices.equal(torch.tensor([0, 0, 1, 2, 4, 6, 7, 9, 11]))) + def test_uniform_clip_sampler_insufficient_clips(self, tmpdir): + video_list = get_list_of_videos(tmpdir, num_videos=3, sizes=[10, 25, 25]) + video_clips = VideoClips(video_list, 5, 5) + sampler = UniformClipSampler(video_clips, 3) + assert len(sampler) == 3 * 3 + indices = torch.tensor(list(iter(sampler))) + assert_equal(indices, torch.tensor([0, 0, 1, 2, 4, 6, 7, 9, 11])) - def test_distributed_sampler_and_uniform_clip_sampler(self): - with get_list_of_videos(num_videos=3, sizes=[25, 25, 25]) as video_list: - video_clips = VideoClips(video_list, 5, 5) - clip_sampler = UniformClipSampler(video_clips, 3) + def test_distributed_sampler_and_uniform_clip_sampler(self, tmpdir): + video_list = get_list_of_videos(tmpdir, num_videos=3, sizes=[25, 25, 25]) + video_clips = VideoClips(video_list, 5, 5) + clip_sampler = UniformClipSampler(video_clips, 3) - distributed_sampler_rank0 = DistributedSampler( - clip_sampler, - num_replicas=2, - rank=0, - group_size=3, - ) - indices = torch.tensor(list(iter(distributed_sampler_rank0))) - self.assertEqual(len(distributed_sampler_rank0), 6) - self.assertTrue(indices.equal(torch.tensor([0, 2, 4, 10, 12, 14]))) + distributed_sampler_rank0 = DistributedSampler( + clip_sampler, + num_replicas=2, + rank=0, + group_size=3, + ) + indices = torch.tensor(list(iter(distributed_sampler_rank0))) + assert len(distributed_sampler_rank0) == 6 + assert_equal(indices, torch.tensor([0, 2, 4, 10, 12, 14])) - distributed_sampler_rank1 = DistributedSampler( - clip_sampler, - num_replicas=2, - rank=1, - group_size=3, - ) - indices = torch.tensor(list(iter(distributed_sampler_rank1))) - self.assertEqual(len(distributed_sampler_rank1), 6) - self.assertTrue(indices.equal(torch.tensor([5, 7, 9, 0, 2, 4]))) + distributed_sampler_rank1 = DistributedSampler( + clip_sampler, + num_replicas=2, + rank=1, + group_size=3, + ) + indices = torch.tensor(list(iter(distributed_sampler_rank1))) + assert len(distributed_sampler_rank1) == 6 + assert_equal(indices, torch.tensor([5, 7, 9, 0, 2, 4])) -if __name__ == '__main__': - unittest.main() +if __name__ == "__main__": + pytest.main([__file__]) diff --git a/test/test_datasets_transforms.py b/test/test_datasets_transforms.py deleted file mode 100644 index 6cffd4f76a9..00000000000 --- a/test/test_datasets_transforms.py +++ /dev/null @@ -1,72 +0,0 @@ -import os -import shutil -import contextlib -import tempfile -import unittest -from torchvision.datasets import ImageFolder - -FAKEDATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), - 'assets', 'fakedata') - - -@contextlib.contextmanager -def tmp_dir(src=None, **kwargs): - tmp_dir = tempfile.mkdtemp(**kwargs) - if src is not None: - os.rmdir(tmp_dir) - shutil.copytree(src, tmp_dir) - try: - yield tmp_dir - finally: - shutil.rmtree(tmp_dir) - - -def mock_transform(return_value, arg_list): - def mock(arg): - arg_list.append(arg) - return return_value - return mock - - -class Tester(unittest.TestCase): - def test_transform(self): - with tmp_dir(src=os.path.join(FAKEDATA_DIR, 'imagefolder')) as root: - class_a_image_files = [os.path.join(root, 'a', file) - for file in ('a1.png', 'a2.png', 'a3.png')] - class_b_image_files = [os.path.join(root, 'b', file) - for file in ('b1.png', 'b2.png', 'b3.png', 'b4.png')] - return_value = os.path.join(root, 'a', 'a1.png') - args = [] - transform = mock_transform(return_value, args) - dataset = ImageFolder(root, loader=lambda x: x, transform=transform) - - outputs = [dataset[i][0] for i in range(len(dataset))] - self.assertEqual([return_value] * len(outputs), outputs) - - imgs = sorted(class_a_image_files + class_b_image_files) - self.assertEqual(imgs, sorted(args)) - - def test_target_transform(self): - with tmp_dir(src=os.path.join(FAKEDATA_DIR, 'imagefolder')) as root: - class_a_image_files = [os.path.join(root, 'a', file) - for file in ('a1.png', 'a2.png', 'a3.png')] - class_b_image_files = [os.path.join(root, 'b', file) - for file in ('b1.png', 'b2.png', 'b3.png', 'b4.png')] - return_value = os.path.join(root, 'a', 'a1.png') - args = [] - target_transform = mock_transform(return_value, args) - dataset = ImageFolder(root, loader=lambda x: x, - target_transform=target_transform) - - outputs = [dataset[i][1] for i in range(len(dataset))] - self.assertEqual([return_value] * len(outputs), outputs) - - class_a_idx = dataset.class_to_idx['a'] - class_b_idx = dataset.class_to_idx['b'] - targets = sorted([class_a_idx] * len(class_a_image_files) + - [class_b_idx] * len(class_b_image_files)) - self.assertEqual(targets, sorted(args)) - - -if __name__ == '__main__': - unittest.main() diff --git a/test/test_datasets_utils.py b/test/test_datasets_utils.py index 14a53b75c54..500163dc7d1 100644 --- a/test/test_datasets_utils.py +++ b/test/test_datasets_utils.py @@ -1,140 +1,287 @@ +import contextlib +import gzip import os -import sys -import tempfile -import torchvision.datasets.utils as utils -import unittest -import zipfile +import pathlib +import re import tarfile -import gzip -import warnings -from torch._six import PY2 -from torch._utils_internal import get_file_path_2 +import zipfile -from common_utils import get_tmp_dir +import pytest +import torch +import torchvision.datasets.utils as utils +from common_utils import assert_equal +from torch._utils_internal import get_file_path_2 # @manual=fbcode//caffe2:utils_internal +from torchvision.datasets.folder import make_dataset +from torchvision.datasets.utils import _COMPRESSED_FILE_OPENERS -if sys.version_info < (3,): - from urllib2 import URLError -else: - from urllib.error import URLError +TEST_FILE = get_file_path_2( + os.path.dirname(os.path.abspath(__file__)), "assets", "encode_jpeg", "grace_hopper_517x606.jpg" +) -TEST_FILE = get_file_path_2( - os.path.dirname(os.path.abspath(__file__)), 'assets', 'grace_hopper_517x606.jpg') +def patch_url_redirection(mocker, redirect_url): + class Response: + def __init__(self, url): + self.url = url + + @contextlib.contextmanager + def patched_opener(*args, **kwargs): + yield Response(redirect_url) + + return mocker.patch("torchvision.datasets.utils.urllib.request.urlopen", side_effect=patched_opener) + + +class TestDatasetsUtils: + def test_get_redirect_url(self, mocker): + url = "https://url.org" + expected_redirect_url = "https://redirect.url.org" + + mock = patch_url_redirection(mocker, expected_redirect_url) + + actual = utils._get_redirect_url(url) + assert actual == expected_redirect_url + + assert mock.call_count == 2 + call_args_1, call_args_2 = mock.call_args_list + assert call_args_1[0][0].full_url == url + assert call_args_2[0][0].full_url == expected_redirect_url + + def test_get_redirect_url_max_hops_exceeded(self, mocker): + url = "https://url.org" + redirect_url = "https://redirect.url.org" + mock = patch_url_redirection(mocker, redirect_url) -class Tester(unittest.TestCase): + with pytest.raises(RecursionError): + utils._get_redirect_url(url, max_hops=0) - def test_check_md5(self): + assert mock.call_count == 1 + assert mock.call_args[0][0].full_url == url + + @pytest.mark.parametrize("use_pathlib", (True, False)) + def test_check_md5(self, use_pathlib): fpath = TEST_FILE - correct_md5 = '9c0bb82894bb3af7f7675ef2b3b6dcdc' - false_md5 = '' - self.assertTrue(utils.check_md5(fpath, correct_md5)) - self.assertFalse(utils.check_md5(fpath, false_md5)) + if use_pathlib: + fpath = pathlib.Path(fpath) + correct_md5 = "9c0bb82894bb3af7f7675ef2b3b6dcdc" + false_md5 = "" + assert utils.check_md5(fpath, correct_md5) + assert not utils.check_md5(fpath, false_md5) def test_check_integrity(self): existing_fpath = TEST_FILE - nonexisting_fpath = '' - correct_md5 = '9c0bb82894bb3af7f7675ef2b3b6dcdc' - false_md5 = '' - self.assertTrue(utils.check_integrity(existing_fpath, correct_md5)) - self.assertFalse(utils.check_integrity(existing_fpath, false_md5)) - self.assertTrue(utils.check_integrity(existing_fpath)) - self.assertFalse(utils.check_integrity(nonexisting_fpath)) - - @unittest.skipIf(PY2, "https://github.com/pytorch/vision/issues/1268") - def test_download_url(self): - with get_tmp_dir() as temp_dir: - url = "http://github.com/pytorch/vision/archive/master.zip" - try: - utils.download_url(url, temp_dir) - self.assertFalse(len(os.listdir(temp_dir)) == 0) - except URLError: - msg = "could not download test file '{}'".format(url) - warnings.warn(msg, RuntimeWarning) - raise unittest.SkipTest(msg) - - @unittest.skipIf(PY2, "https://github.com/pytorch/vision/issues/1268") - def test_download_url_retry_http(self): - with get_tmp_dir() as temp_dir: - url = "https://github.com/pytorch/vision/archive/master.zip" - try: - utils.download_url(url, temp_dir) - self.assertFalse(len(os.listdir(temp_dir)) == 0) - except URLError: - msg = "could not download test file '{}'".format(url) - warnings.warn(msg, RuntimeWarning) - raise unittest.SkipTest(msg) - - @unittest.skipIf(sys.version_info < (3,), "Python2 doesn't raise error") - def test_download_url_dont_exist(self): - with get_tmp_dir() as temp_dir: - url = "http://github.com/pytorch/vision/archive/this_doesnt_exist.zip" - with self.assertRaises(URLError): - utils.download_url(url, temp_dir) - - @unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows') - def test_extract_zip(self): - with get_tmp_dir() as temp_dir: - with tempfile.NamedTemporaryFile(suffix='.zip') as f: - with zipfile.ZipFile(f, 'w') as zf: - zf.writestr('file.tst', 'this is the content') - utils.extract_archive(f.name, temp_dir) - self.assertTrue(os.path.exists(os.path.join(temp_dir, 'file.tst'))) - with open(os.path.join(temp_dir, 'file.tst'), 'r') as nf: - data = nf.read() - self.assertEqual(data, 'this is the content') - - @unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows') - def test_extract_tar(self): - for ext, mode in zip(['.tar', '.tar.gz'], ['w', 'w:gz']): - with get_tmp_dir() as temp_dir: - with tempfile.NamedTemporaryFile() as bf: - bf.write("this is the content".encode()) - bf.seek(0) - with tempfile.NamedTemporaryFile(suffix=ext) as f: - with tarfile.open(f.name, mode=mode) as zf: - zf.add(bf.name, arcname='file.tst') - utils.extract_archive(f.name, temp_dir) - self.assertTrue(os.path.exists(os.path.join(temp_dir, 'file.tst'))) - with open(os.path.join(temp_dir, 'file.tst'), 'r') as nf: - data = nf.read() - self.assertEqual(data, 'this is the content') - - @unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows') - @unittest.skipIf(sys.version_info < (3,), "Extracting .tar.xz files is not supported under Python 2.x") - def test_extract_tar_xz(self): - for ext, mode in zip(['.tar.xz'], ['w:xz']): - with get_tmp_dir() as temp_dir: - with tempfile.NamedTemporaryFile() as bf: - bf.write("this is the content".encode()) - bf.seek(0) - with tempfile.NamedTemporaryFile(suffix=ext) as f: - with tarfile.open(f.name, mode=mode) as zf: - zf.add(bf.name, arcname='file.tst') - utils.extract_archive(f.name, temp_dir) - self.assertTrue(os.path.exists(os.path.join(temp_dir, 'file.tst'))) - with open(os.path.join(temp_dir, 'file.tst'), 'r') as nf: - data = nf.read() - self.assertEqual(data, 'this is the content') - - @unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows') - def test_extract_gzip(self): - with get_tmp_dir() as temp_dir: - with tempfile.NamedTemporaryFile(suffix='.gz') as f: - with gzip.GzipFile(f.name, 'wb') as zf: - zf.write('this is the content'.encode()) - utils.extract_archive(f.name, temp_dir) - f_name = os.path.join(temp_dir, os.path.splitext(os.path.basename(f.name))[0]) - self.assertTrue(os.path.exists(f_name)) - with open(os.path.join(f_name), 'r') as nf: - data = nf.read() - self.assertEqual(data, 'this is the content') + nonexisting_fpath = "" + correct_md5 = "9c0bb82894bb3af7f7675ef2b3b6dcdc" + false_md5 = "" + assert utils.check_integrity(existing_fpath, correct_md5) + assert not utils.check_integrity(existing_fpath, false_md5) + assert utils.check_integrity(existing_fpath) + assert not utils.check_integrity(nonexisting_fpath) + + def test_get_google_drive_file_id(self): + url = "https://drive.google.com/file/d/1GO-BHUYRuvzr1Gtp2_fqXRsr9TIeYbhV/view" + expected = "1GO-BHUYRuvzr1Gtp2_fqXRsr9TIeYbhV" + + actual = utils._get_google_drive_file_id(url) + assert actual == expected + + def test_get_google_drive_file_id_invalid_url(self): + url = "http://www.vision.caltech.edu/visipedia-data/CUB-200-2011/CUB_200_2011.tgz" + + assert utils._get_google_drive_file_id(url) is None + + @pytest.mark.parametrize( + "file, expected", + [ + ("foo.tar.bz2", (".tar.bz2", ".tar", ".bz2")), + ("foo.tar.xz", (".tar.xz", ".tar", ".xz")), + ("foo.tar", (".tar", ".tar", None)), + ("foo.tar.gz", (".tar.gz", ".tar", ".gz")), + ("foo.tbz", (".tbz", ".tar", ".bz2")), + ("foo.tbz2", (".tbz2", ".tar", ".bz2")), + ("foo.tgz", (".tgz", ".tar", ".gz")), + ("foo.bz2", (".bz2", None, ".bz2")), + ("foo.gz", (".gz", None, ".gz")), + ("foo.zip", (".zip", ".zip", None)), + ("foo.xz", (".xz", None, ".xz")), + ("foo.bar.tar.gz", (".tar.gz", ".tar", ".gz")), + ("foo.bar.gz", (".gz", None, ".gz")), + ("foo.bar.zip", (".zip", ".zip", None)), + ], + ) + def test_detect_file_type(self, file, expected): + assert utils._detect_file_type(file) == expected + + @pytest.mark.parametrize("file", ["foo", "foo.tar.baz", "foo.bar"]) + def test_detect_file_type_incompatible(self, file): + # tests detect file type for no extension, unknown compression and unknown partial extension + with pytest.raises(RuntimeError): + utils._detect_file_type(file) + + @pytest.mark.parametrize("extension", [".bz2", ".gz", ".xz"]) + @pytest.mark.parametrize("use_pathlib", (True, False)) + def test_decompress(self, extension, tmpdir, use_pathlib): + def create_compressed(root, content="this is the content"): + file = os.path.join(root, "file") + compressed = f"{file}{extension}" + compressed_file_opener = _COMPRESSED_FILE_OPENERS[extension] + + with compressed_file_opener(compressed, "wb") as fh: + fh.write(content.encode()) + + return compressed, file, content + + compressed, file, content = create_compressed(tmpdir) + if use_pathlib: + compressed = pathlib.Path(compressed) + + utils._decompress(compressed) + + assert os.path.exists(file) + + with open(file) as fh: + assert fh.read() == content + + def test_decompress_no_compression(self): + with pytest.raises(RuntimeError): + utils._decompress("foo.tar") + + @pytest.mark.parametrize("use_pathlib", (True, False)) + def test_decompress_remove_finished(self, tmpdir, use_pathlib): + def create_compressed(root, content="this is the content"): + file = os.path.join(root, "file") + compressed = f"{file}.gz" + + with gzip.open(compressed, "wb") as fh: + fh.write(content.encode()) + + return compressed, file, content + + compressed, file, content = create_compressed(tmpdir) + print(f"{type(compressed)=}") + if use_pathlib: + compressed = pathlib.Path(compressed) + tmpdir = pathlib.Path(tmpdir) + + extracted_dir = utils.extract_archive(compressed, tmpdir, remove_finished=True) + + assert not os.path.exists(compressed) + if use_pathlib: + assert isinstance(extracted_dir, pathlib.Path) + assert isinstance(compressed, pathlib.Path) + else: + assert isinstance(extracted_dir, str) + assert isinstance(compressed, str) + + @pytest.mark.parametrize("extension", [".gz", ".xz"]) + @pytest.mark.parametrize("remove_finished", [True, False]) + def test_extract_archive_defer_to_decompress(self, extension, remove_finished, mocker): + filename = "foo" + file = f"{filename}{extension}" + + mocked = mocker.patch("torchvision.datasets.utils._decompress") + utils.extract_archive(file, remove_finished=remove_finished) + + mocked.assert_called_once_with(file, filename, remove_finished=remove_finished) + + @pytest.mark.parametrize("use_pathlib", (True, False)) + def test_extract_zip(self, tmpdir, use_pathlib): + def create_archive(root, content="this is the content"): + file = os.path.join(root, "dst.txt") + archive = os.path.join(root, "archive.zip") + + with zipfile.ZipFile(archive, "w") as zf: + zf.writestr(os.path.basename(file), content) + + return archive, file, content + + if use_pathlib: + tmpdir = pathlib.Path(tmpdir) + archive, file, content = create_archive(tmpdir) + + utils.extract_archive(archive, tmpdir) + + assert os.path.exists(file) + + with open(file) as fh: + assert fh.read() == content + + @pytest.mark.parametrize( + "extension, mode", [(".tar", "w"), (".tar.gz", "w:gz"), (".tgz", "w:gz"), (".tar.xz", "w:xz")] + ) + @pytest.mark.parametrize("use_pathlib", (True, False)) + def test_extract_tar(self, extension, mode, tmpdir, use_pathlib): + def create_archive(root, extension, mode, content="this is the content"): + src = os.path.join(root, "src.txt") + dst = os.path.join(root, "dst.txt") + archive = os.path.join(root, f"archive{extension}") + + with open(src, "w") as fh: + fh.write(content) + + with tarfile.open(archive, mode=mode) as fh: + fh.add(src, arcname=os.path.basename(dst)) + + return archive, dst, content + + if use_pathlib: + tmpdir = pathlib.Path(tmpdir) + archive, file, content = create_archive(tmpdir, extension, mode) + + utils.extract_archive(archive, tmpdir) + + assert os.path.exists(file) + + with open(file) as fh: + assert fh.read() == content def test_verify_str_arg(self): - self.assertEqual("a", utils.verify_str_arg("a", "arg", ("a",))) - self.assertRaises(ValueError, utils.verify_str_arg, 0, ("a",), "arg") - self.assertRaises(ValueError, utils.verify_str_arg, "b", ("a",), "arg") + assert "a" == utils.verify_str_arg("a", "arg", ("a",)) + pytest.raises(ValueError, utils.verify_str_arg, 0, ("a",), "arg") + pytest.raises(ValueError, utils.verify_str_arg, "b", ("a",), "arg") + + @pytest.mark.parametrize( + ("dtype", "actual_hex", "expected_hex"), + [ + (torch.uint8, "01 23 45 67 89 AB CD EF", "01 23 45 67 89 AB CD EF"), + (torch.float16, "01 23 45 67 89 AB CD EF", "23 01 67 45 AB 89 EF CD"), + (torch.int32, "01 23 45 67 89 AB CD EF", "67 45 23 01 EF CD AB 89"), + (torch.float64, "01 23 45 67 89 AB CD EF", "EF CD AB 89 67 45 23 01"), + ], + ) + def test_flip_byte_order(self, dtype, actual_hex, expected_hex): + def to_tensor(hex): + return torch.frombuffer(bytes.fromhex(hex), dtype=dtype) + + assert_equal( + utils._flip_byte_order(to_tensor(actual_hex)), + to_tensor(expected_hex), + ) + + +@pytest.mark.parametrize( + ("kwargs", "expected_error_msg"), + [ + (dict(is_valid_file=lambda path: pathlib.Path(path).suffix in {".png", ".jpeg"}), "classes c"), + (dict(extensions=".png"), re.escape("classes b, c. Supported extensions are: .png")), + (dict(extensions=(".png", ".jpeg")), re.escape("classes c. Supported extensions are: .png, .jpeg")), + ], +) +def test_make_dataset_no_valid_files(tmpdir, kwargs, expected_error_msg): + tmpdir = pathlib.Path(tmpdir) + + (tmpdir / "a").mkdir() + (tmpdir / "a" / "a.png").touch() + + (tmpdir / "b").mkdir() + (tmpdir / "b" / "b.jpeg").touch() + + (tmpdir / "c").mkdir() + (tmpdir / "c" / "c.unknown").touch() + + with pytest.raises(FileNotFoundError, match=expected_error_msg): + make_dataset(str(tmpdir), **kwargs) -if __name__ == '__main__': - unittest.main() +if __name__ == "__main__": + pytest.main([__file__]) diff --git a/test/test_datasets_video_utils.py b/test/test_datasets_video_utils.py index 2488edc613d..51330911e50 100644 --- a/test/test_datasets_video_utils.py +++ b/test/test_datasets_video_utils.py @@ -1,101 +1,71 @@ -import contextlib -import sys -import os +import pytest import torch -import unittest - +from common_utils import assert_equal, get_list_of_videos from torchvision import io -from torchvision.datasets.video_utils import VideoClips, unfold - -from common_utils import get_tmp_dir - - -@contextlib.contextmanager -def get_list_of_videos(num_videos=5, sizes=None, fps=None): - with get_tmp_dir() as tmp_dir: - names = [] - for i in range(num_videos): - if sizes is None: - size = 5 * (i + 1) - else: - size = sizes[i] - if fps is None: - f = 5 - else: - f = fps[i] - data = torch.randint(0, 255, (size, 300, 400, 3), dtype=torch.uint8) - name = os.path.join(tmp_dir, "{}.mp4".format(i)) - names.append(name) - io.write_video(name, data, fps=f) +from torchvision.datasets.video_utils import unfold, VideoClips - yield names - - -class Tester(unittest.TestCase): +class TestVideo: def test_unfold(self): a = torch.arange(7) r = unfold(a, 3, 3, 1) - expected = torch.tensor([ - [0, 1, 2], - [3, 4, 5], - ]) - self.assertTrue(r.equal(expected)) + expected = torch.tensor( + [ + [0, 1, 2], + [3, 4, 5], + ] + ) + assert_equal(r, expected) r = unfold(a, 3, 2, 1) - expected = torch.tensor([ - [0, 1, 2], - [2, 3, 4], - [4, 5, 6] - ]) - self.assertTrue(r.equal(expected)) + expected = torch.tensor([[0, 1, 2], [2, 3, 4], [4, 5, 6]]) + assert_equal(r, expected) r = unfold(a, 3, 2, 2) - expected = torch.tensor([ - [0, 2, 4], - [2, 4, 6], - ]) - self.assertTrue(r.equal(expected)) - - @unittest.skipIf(not io.video._av_available(), "this test requires av") - @unittest.skipIf(sys.platform == 'win32', 'temporarily disabled on Windows') - def test_video_clips(self): - with get_list_of_videos(num_videos=3) as video_list: - video_clips = VideoClips(video_list, 5, 5) - self.assertEqual(video_clips.num_clips(), 1 + 2 + 3) - for i, (v_idx, c_idx) in enumerate([(0, 0), (1, 0), (1, 1), (2, 0), (2, 1), (2, 2)]): - video_idx, clip_idx = video_clips.get_clip_location(i) - self.assertEqual(video_idx, v_idx) - self.assertEqual(clip_idx, c_idx) - - video_clips = VideoClips(video_list, 6, 6) - self.assertEqual(video_clips.num_clips(), 0 + 1 + 2) - for i, (v_idx, c_idx) in enumerate([(1, 0), (2, 0), (2, 1)]): - video_idx, clip_idx = video_clips.get_clip_location(i) - self.assertEqual(video_idx, v_idx) - self.assertEqual(clip_idx, c_idx) - - video_clips = VideoClips(video_list, 6, 1) - self.assertEqual(video_clips.num_clips(), 0 + (10 - 6 + 1) + (15 - 6 + 1)) - for i, v_idx, c_idx in [(0, 1, 0), (4, 1, 4), (5, 2, 0), (6, 2, 1)]: - video_idx, clip_idx = video_clips.get_clip_location(i) - self.assertEqual(video_idx, v_idx) - self.assertEqual(clip_idx, c_idx) - - @unittest.skipIf(not io.video._av_available(), "this test requires av") - @unittest.skipIf(sys.platform == 'win32', 'temporarily disabled on Windows') - def test_video_clips_custom_fps(self): - with get_list_of_videos(num_videos=3, sizes=[12, 12, 12], fps=[3, 4, 6]) as video_list: - num_frames = 4 - for fps in [1, 3, 4, 10]: - video_clips = VideoClips(video_list, num_frames, num_frames, fps) - for i in range(video_clips.num_clips()): - video, audio, info, video_idx = video_clips.get_clip(i) - self.assertEqual(video.shape[0], num_frames) - self.assertEqual(info["video_fps"], fps) - self.assertEqual(info, {"video_fps": fps}) - # TODO add tests checking that the content is right + expected = torch.tensor( + [ + [0, 2, 4], + [2, 4, 6], + ] + ) + assert_equal(r, expected) + + @pytest.mark.skipif(not io.video._av_available(), reason="this test requires av") + def test_video_clips(self, tmpdir): + video_list = get_list_of_videos(tmpdir, num_videos=3) + video_clips = VideoClips(video_list, 5, 5, num_workers=2) + assert video_clips.num_clips() == 1 + 2 + 3 + for i, (v_idx, c_idx) in enumerate([(0, 0), (1, 0), (1, 1), (2, 0), (2, 1), (2, 2)]): + video_idx, clip_idx = video_clips.get_clip_location(i) + assert video_idx == v_idx + assert clip_idx == c_idx + + video_clips = VideoClips(video_list, 6, 6) + assert video_clips.num_clips() == 0 + 1 + 2 + for i, (v_idx, c_idx) in enumerate([(1, 0), (2, 0), (2, 1)]): + video_idx, clip_idx = video_clips.get_clip_location(i) + assert video_idx == v_idx + assert clip_idx == c_idx + + video_clips = VideoClips(video_list, 6, 1) + assert video_clips.num_clips() == 0 + (10 - 6 + 1) + (15 - 6 + 1) + for i, v_idx, c_idx in [(0, 1, 0), (4, 1, 4), (5, 2, 0), (6, 2, 1)]: + video_idx, clip_idx = video_clips.get_clip_location(i) + assert video_idx == v_idx + assert clip_idx == c_idx + + @pytest.mark.skipif(not io.video._av_available(), reason="this test requires av") + def test_video_clips_custom_fps(self, tmpdir): + video_list = get_list_of_videos(tmpdir, num_videos=3, sizes=[12, 12, 12], fps=[3, 4, 6]) + num_frames = 4 + for fps in [1, 3, 4, 10]: + video_clips = VideoClips(video_list, num_frames, num_frames, fps) + for i in range(video_clips.num_clips()): + video, audio, info, video_idx = video_clips.get_clip(i) + assert video.shape[0] == num_frames + assert info["video_fps"] == fps + # TODO add tests checking that the content is right def test_compute_clips_for_video(self): video_pts = torch.arange(30) @@ -104,25 +74,32 @@ def test_compute_clips_for_video(self): orig_fps = 30 duration = float(len(video_pts)) / orig_fps new_fps = 13 - clips, idxs = VideoClips.compute_clips_for_video(video_pts, num_frames, num_frames, - orig_fps, new_fps) + clips, idxs = VideoClips.compute_clips_for_video(video_pts, num_frames, num_frames, orig_fps, new_fps) resampled_idxs = VideoClips._resample_video_idx(int(duration * new_fps), orig_fps, new_fps) - self.assertEqual(len(clips), 1) - self.assertTrue(clips.equal(idxs)) - self.assertTrue(idxs[0].equal(resampled_idxs)) + assert len(clips) == 1 + assert_equal(clips, idxs) + assert_equal(idxs[0], resampled_idxs) # case 2: all frames appear only once num_frames = 4 orig_fps = 30 duration = float(len(video_pts)) / orig_fps new_fps = 12 - clips, idxs = VideoClips.compute_clips_for_video(video_pts, num_frames, num_frames, - orig_fps, new_fps) + clips, idxs = VideoClips.compute_clips_for_video(video_pts, num_frames, num_frames, orig_fps, new_fps) resampled_idxs = VideoClips._resample_video_idx(int(duration * new_fps), orig_fps, new_fps) - self.assertEqual(len(clips), 3) - self.assertTrue(clips.equal(idxs)) - self.assertTrue(idxs.flatten().equal(resampled_idxs)) + assert len(clips) == 3 + assert_equal(clips, idxs) + assert_equal(idxs.flatten(), resampled_idxs) + + # case 3: frames aren't enough for a clip + num_frames = 32 + orig_fps = 30 + new_fps = 13 + with pytest.warns(UserWarning): + clips, idxs = VideoClips.compute_clips_for_video(video_pts, num_frames, num_frames, orig_fps, new_fps) + assert len(clips) == 0 + assert len(idxs) == 0 -if __name__ == '__main__': - unittest.main() +if __name__ == "__main__": + pytest.main([__file__]) diff --git a/test/test_datasets_video_utils_opt.py b/test/test_datasets_video_utils_opt.py index f94af400838..5e6b19bfb95 100644 --- a/test/test_datasets_video_utils_opt.py +++ b/test/test_datasets_video_utils_opt.py @@ -1,11 +1,12 @@ import unittest -from torchvision import set_video_backend -import test_datasets_video_utils +import test_datasets_video_utils +from torchvision import set_video_backend # noqa: 401 -set_video_backend('video_reader') +# Disabling the video backend switching temporarily +# set_video_backend('video_reader') -if __name__ == '__main__': +if __name__ == "__main__": suite = unittest.TestLoader().loadTestsFromModule(test_datasets_video_utils) unittest.TextTestRunner(verbosity=1).run(suite) diff --git a/test/test_extended_models.py b/test/test_extended_models.py new file mode 100644 index 00000000000..0c918c0afd1 --- /dev/null +++ b/test/test_extended_models.py @@ -0,0 +1,503 @@ +import copy +import os +import pickle + +import pytest +import test_models as TM +import torch +from common_extended_utils import get_file_size_mb, get_ops +from torchvision import models +from torchvision.models import get_model_weights, Weights, WeightsEnum +from torchvision.models._utils import handle_legacy_interface +from torchvision.models.detection.backbone_utils import mobilenet_backbone, resnet_fpn_backbone + +run_if_test_with_extended = pytest.mark.skipif( + os.getenv("PYTORCH_TEST_WITH_EXTENDED", "0") != "1", + reason="Extended tests are disabled by default. Set PYTORCH_TEST_WITH_EXTENDED=1 to run them.", +) + + +@pytest.mark.parametrize( + "name, model_class", + [ + ("resnet50", models.ResNet), + ("retinanet_resnet50_fpn_v2", models.detection.RetinaNet), + ("raft_large", models.optical_flow.RAFT), + ("quantized_resnet50", models.quantization.QuantizableResNet), + ("lraspp_mobilenet_v3_large", models.segmentation.LRASPP), + ("mvit_v1_b", models.video.MViT), + ], +) +def test_get_model(name, model_class): + assert isinstance(models.get_model(name), model_class) + + +@pytest.mark.parametrize( + "name, model_fn", + [ + ("resnet50", models.resnet50), + ("retinanet_resnet50_fpn_v2", models.detection.retinanet_resnet50_fpn_v2), + ("raft_large", models.optical_flow.raft_large), + ("quantized_resnet50", models.quantization.resnet50), + ("lraspp_mobilenet_v3_large", models.segmentation.lraspp_mobilenet_v3_large), + ("mvit_v1_b", models.video.mvit_v1_b), + ], +) +def test_get_model_builder(name, model_fn): + assert models.get_model_builder(name) == model_fn + + +@pytest.mark.parametrize( + "name, weight", + [ + ("resnet50", models.ResNet50_Weights), + ("retinanet_resnet50_fpn_v2", models.detection.RetinaNet_ResNet50_FPN_V2_Weights), + ("raft_large", models.optical_flow.Raft_Large_Weights), + ("quantized_resnet50", models.quantization.ResNet50_QuantizedWeights), + ("lraspp_mobilenet_v3_large", models.segmentation.LRASPP_MobileNet_V3_Large_Weights), + ("mvit_v1_b", models.video.MViT_V1_B_Weights), + ], +) +def test_get_model_weights(name, weight): + assert models.get_model_weights(name) == weight + + +@pytest.mark.parametrize("copy_fn", [copy.copy, copy.deepcopy]) +@pytest.mark.parametrize( + "name", + [ + "resnet50", + "retinanet_resnet50_fpn_v2", + "raft_large", + "quantized_resnet50", + "lraspp_mobilenet_v3_large", + "mvit_v1_b", + ], +) +def test_weights_copyable(copy_fn, name): + for weights in list(models.get_model_weights(name)): + # It is somewhat surprising that (deep-)copying is an identity operation here, but this is the default behavior + # of enums: https://docs.python.org/3/howto/enum.html#enum-members-aka-instances + # Checking for equality, i.e. `==`, is sufficient (and even preferable) for our use case, should we need to drop + # support for the identity operation in the future. + assert copy_fn(weights) is weights + + +@pytest.mark.parametrize( + "name", + [ + "resnet50", + "retinanet_resnet50_fpn_v2", + "raft_large", + "quantized_resnet50", + "lraspp_mobilenet_v3_large", + "mvit_v1_b", + ], +) +def test_weights_deserializable(name): + for weights in list(models.get_model_weights(name)): + # It is somewhat surprising that deserialization is an identity operation here, but this is the default behavior + # of enums: https://docs.python.org/3/howto/enum.html#enum-members-aka-instances + # Checking for equality, i.e. `==`, is sufficient (and even preferable) for our use case, should we need to drop + # support for the identity operation in the future. + assert pickle.loads(pickle.dumps(weights)) is weights + + +def get_models_from_module(module): + return [ + v.__name__ + for k, v in module.__dict__.items() + if callable(v) and k[0].islower() and k[0] != "_" and k not in models._api.__all__ + ] + + +@pytest.mark.parametrize( + "module", [models, models.detection, models.quantization, models.segmentation, models.video, models.optical_flow] +) +def test_list_models(module): + a = set(get_models_from_module(module)) + b = set(x.replace("quantized_", "") for x in models.list_models(module)) + + assert len(b) > 0 + assert a == b + + +@pytest.mark.parametrize( + "include_filters", + [ + None, + [], + (), + "", + "*resnet*", + ["*alexnet*"], + "*not-existing-model-for-test?", + ["*resnet*", "*alexnet*"], + ["*resnet*", "*alexnet*", "*not-existing-model-for-test?"], + ("*resnet*", "*alexnet*"), + set(["*resnet*", "*alexnet*"]), + ], +) +@pytest.mark.parametrize( + "exclude_filters", + [ + None, + [], + (), + "", + "*resnet*", + ["*alexnet*"], + ["*not-existing-model-for-test?"], + ["resnet34", "*not-existing-model-for-test?"], + ["resnet34", "*resnet1*"], + ("resnet34", "*resnet1*"), + set(["resnet34", "*resnet1*"]), + ], +) +def test_list_models_filters(include_filters, exclude_filters): + actual = set(models.list_models(models, include=include_filters, exclude=exclude_filters)) + classification_models = set(get_models_from_module(models)) + + if isinstance(include_filters, str): + include_filters = [include_filters] + if isinstance(exclude_filters, str): + exclude_filters = [exclude_filters] + + if include_filters: + expected = set() + for include_f in include_filters: + include_f = include_f.strip("*?") + expected = expected | set(x for x in classification_models if include_f in x) + else: + expected = classification_models + + if exclude_filters: + for exclude_f in exclude_filters: + exclude_f = exclude_f.strip("*?") + if exclude_f != "": + a_exclude = set(x for x in classification_models if exclude_f in x) + expected = expected - a_exclude + + assert expected == actual + + +@pytest.mark.parametrize( + "name, weight", + [ + ("ResNet50_Weights.IMAGENET1K_V1", models.ResNet50_Weights.IMAGENET1K_V1), + ("ResNet50_Weights.DEFAULT", models.ResNet50_Weights.IMAGENET1K_V2), + ( + "ResNet50_QuantizedWeights.DEFAULT", + models.quantization.ResNet50_QuantizedWeights.IMAGENET1K_FBGEMM_V2, + ), + ( + "ResNet50_QuantizedWeights.IMAGENET1K_FBGEMM_V1", + models.quantization.ResNet50_QuantizedWeights.IMAGENET1K_FBGEMM_V1, + ), + ], +) +def test_get_weight(name, weight): + assert models.get_weight(name) == weight + + +@pytest.mark.parametrize( + "model_fn", + TM.list_model_fns(models) + + TM.list_model_fns(models.detection) + + TM.list_model_fns(models.quantization) + + TM.list_model_fns(models.segmentation) + + TM.list_model_fns(models.video) + + TM.list_model_fns(models.optical_flow), +) +def test_naming_conventions(model_fn): + weights_enum = get_model_weights(model_fn) + assert weights_enum is not None + assert len(weights_enum) == 0 or hasattr(weights_enum, "DEFAULT") + + +detection_models_input_dims = { + "fasterrcnn_mobilenet_v3_large_320_fpn": (320, 320), + "fasterrcnn_mobilenet_v3_large_fpn": (800, 800), + "fasterrcnn_resnet50_fpn": (800, 800), + "fasterrcnn_resnet50_fpn_v2": (800, 800), + "fcos_resnet50_fpn": (800, 800), + "keypointrcnn_resnet50_fpn": (1333, 1333), + "maskrcnn_resnet50_fpn": (800, 800), + "maskrcnn_resnet50_fpn_v2": (800, 800), + "retinanet_resnet50_fpn": (800, 800), + "retinanet_resnet50_fpn_v2": (800, 800), + "ssd300_vgg16": (300, 300), + "ssdlite320_mobilenet_v3_large": (320, 320), +} + + +@pytest.mark.parametrize( + "model_fn", + TM.list_model_fns(models) + + TM.list_model_fns(models.detection) + + TM.list_model_fns(models.quantization) + + TM.list_model_fns(models.segmentation) + + TM.list_model_fns(models.video) + + TM.list_model_fns(models.optical_flow), +) +@run_if_test_with_extended +def test_schema_meta_validation(model_fn): + if model_fn.__name__ == "maskrcnn_resnet50_fpn_v2": + pytest.skip(reason="FIXME https://github.com/pytorch/vision/issues/7349") + + # list of all possible supported high-level fields for weights meta-data + permitted_fields = { + "backend", + "categories", + "keypoint_names", + "license", + "_metrics", + "min_size", + "min_temporal_size", + "num_params", + "recipe", + "unquantized", + "_docs", + "_ops", + "_file_size", + } + # mandatory fields for each computer vision task + classification_fields = {"categories", ("_metrics", "ImageNet-1K", "acc@1"), ("_metrics", "ImageNet-1K", "acc@5")} + defaults = { + "all": {"_metrics", "min_size", "num_params", "recipe", "_docs", "_file_size", "_ops"}, + "models": classification_fields, + "detection": {"categories", ("_metrics", "COCO-val2017", "box_map")}, + "quantization": classification_fields | {"backend", "unquantized"}, + "segmentation": { + "categories", + ("_metrics", "COCO-val2017-VOC-labels", "miou"), + ("_metrics", "COCO-val2017-VOC-labels", "pixel_acc"), + }, + "video": {"categories", ("_metrics", "Kinetics-400", "acc@1"), ("_metrics", "Kinetics-400", "acc@5")}, + "optical_flow": set(), + } + model_name = model_fn.__name__ + module_name = model_fn.__module__.split(".")[-2] + expected_fields = defaults["all"] | defaults[module_name] + + weights_enum = get_model_weights(model_fn) + if len(weights_enum) == 0: + pytest.skip(f"Model '{model_name}' doesn't have any pre-trained weights.") + + problematic_weights = {} + incorrect_meta = [] + bad_names = [] + for w in weights_enum: + actual_fields = set(w.meta.keys()) + actual_fields |= set( + ("_metrics", dataset, metric_key) + for dataset in w.meta.get("_metrics", {}).keys() + for metric_key in w.meta.get("_metrics", {}).get(dataset, {}).keys() + ) + missing_fields = expected_fields - actual_fields + unsupported_fields = set(w.meta.keys()) - permitted_fields + if missing_fields or unsupported_fields: + problematic_weights[w] = {"missing": missing_fields, "unsupported": unsupported_fields} + + if w == weights_enum.DEFAULT or any(w.meta[k] != weights_enum.DEFAULT.meta[k] for k in ["num_params", "_ops"]): + if module_name == "quantization": + # parameters() count doesn't work well with quantization, so we check against the non-quantized + unquantized_w = w.meta.get("unquantized") + if unquantized_w is not None: + if w.meta.get("num_params") != unquantized_w.meta.get("num_params"): + incorrect_meta.append((w, "num_params")) + + # the methodology for quantized ops count doesn't work as well, so we take unquantized FLOPs + # instead + if w.meta["_ops"] != unquantized_w.meta.get("_ops"): + incorrect_meta.append((w, "_ops")) + + else: + # loading the model and using it for parameter and ops verification + model = model_fn(weights=w) + + if w.meta.get("num_params") != sum(p.numel() for p in model.parameters()): + incorrect_meta.append((w, "num_params")) + + kwargs = {} + if model_name in detection_models_input_dims: + # detection models have non default height and width + height, width = detection_models_input_dims[model_name] + kwargs = {"height": height, "width": width} + + if not model_fn.__name__.startswith("vit"): + # FIXME: https://github.com/pytorch/vision/issues/7871 + calculated_ops = get_ops(model=model, weight=w, **kwargs) + if calculated_ops != w.meta["_ops"]: + incorrect_meta.append((w, "_ops")) + + if not w.name.isupper(): + bad_names.append(w) + + if get_file_size_mb(w) != w.meta.get("_file_size"): + incorrect_meta.append((w, "_file_size")) + + assert not problematic_weights + assert not incorrect_meta + assert not bad_names + + +@pytest.mark.parametrize( + "model_fn", + TM.list_model_fns(models) + + TM.list_model_fns(models.detection) + + TM.list_model_fns(models.quantization) + + TM.list_model_fns(models.segmentation) + + TM.list_model_fns(models.video) + + TM.list_model_fns(models.optical_flow), +) +@run_if_test_with_extended +def test_transforms_jit(model_fn): + model_name = model_fn.__name__ + weights_enum = get_model_weights(model_fn) + if len(weights_enum) == 0: + pytest.skip(f"Model '{model_name}' doesn't have any pre-trained weights.") + + defaults = { + "models": { + "input_shape": (1, 3, 224, 224), + }, + "detection": { + "input_shape": (3, 300, 300), + }, + "quantization": { + "input_shape": (1, 3, 224, 224), + }, + "segmentation": { + "input_shape": (1, 3, 520, 520), + }, + "video": { + "input_shape": (1, 3, 4, 112, 112), + }, + "optical_flow": { + "input_shape": (1, 3, 128, 128), + }, + } + module_name = model_fn.__module__.split(".")[-2] + + kwargs = {**defaults[module_name], **TM._model_params.get(model_name, {})} + input_shape = kwargs.pop("input_shape") + x = torch.rand(input_shape) + if module_name == "optical_flow": + args = (x, x) + else: + if module_name == "video": + x = x.permute(0, 2, 1, 3, 4) + args = (x,) + + problematic_weights = [] + for w in weights_enum: + transforms = w.transforms() + try: + TM._check_jit_scriptable(transforms, args) + except Exception: + problematic_weights.append(w) + + assert not problematic_weights + + +# With this filter, every unexpected warning will be turned into an error +@pytest.mark.filterwarnings("error") +class TestHandleLegacyInterface: + class ModelWeights(WeightsEnum): + Sentinel = Weights(url="https://pytorch.org", transforms=lambda x: x, meta=dict()) + + @pytest.mark.parametrize( + "kwargs", + [ + pytest.param(dict(), id="empty"), + pytest.param(dict(weights=None), id="None"), + pytest.param(dict(weights=ModelWeights.Sentinel), id="Weights"), + ], + ) + def test_no_warn(self, kwargs): + @handle_legacy_interface(weights=("pretrained", self.ModelWeights.Sentinel)) + def builder(*, weights=None): + pass + + builder(**kwargs) + + @pytest.mark.parametrize("pretrained", (True, False)) + def test_pretrained_pos(self, pretrained): + @handle_legacy_interface(weights=("pretrained", self.ModelWeights.Sentinel)) + def builder(*, weights=None): + pass + + with pytest.warns(UserWarning, match="positional"): + builder(pretrained) + + @pytest.mark.parametrize("pretrained", (True, False)) + def test_pretrained_kw(self, pretrained): + @handle_legacy_interface(weights=("pretrained", self.ModelWeights.Sentinel)) + def builder(*, weights=None): + pass + + with pytest.warns(UserWarning, match="deprecated"): + builder(pretrained) + + @pytest.mark.parametrize("pretrained", (True, False)) + @pytest.mark.parametrize("positional", (True, False)) + def test_equivalent_behavior_weights(self, pretrained, positional): + @handle_legacy_interface(weights=("pretrained", self.ModelWeights.Sentinel)) + def builder(*, weights=None): + pass + + args, kwargs = ((pretrained,), dict()) if positional else ((), dict(pretrained=pretrained)) + with pytest.warns(UserWarning, match=f"weights={self.ModelWeights.Sentinel if pretrained else None}"): + builder(*args, **kwargs) + + def test_multi_params(self): + weights_params = ("weights", "weights_other") + pretrained_params = [param.replace("weights", "pretrained") for param in weights_params] + + @handle_legacy_interface( + **{ + weights_param: (pretrained_param, self.ModelWeights.Sentinel) + for weights_param, pretrained_param in zip(weights_params, pretrained_params) + } + ) + def builder(*, weights=None, weights_other=None): + pass + + for pretrained_param in pretrained_params: + with pytest.warns(UserWarning, match="deprecated"): + builder(**{pretrained_param: True}) + + def test_default_callable(self): + @handle_legacy_interface( + weights=( + "pretrained", + lambda kwargs: self.ModelWeights.Sentinel if kwargs["flag"] else None, + ) + ) + def builder(*, weights=None, flag): + pass + + with pytest.warns(UserWarning, match="deprecated"): + builder(pretrained=True, flag=True) + + with pytest.raises(ValueError, match="weights"): + builder(pretrained=True, flag=False) + + @pytest.mark.parametrize( + "model_fn", + [fn for fn in TM.list_model_fns(models) if fn.__name__ not in {"vit_h_14", "regnet_y_128gf"}] + + TM.list_model_fns(models.detection) + + TM.list_model_fns(models.quantization) + + TM.list_model_fns(models.segmentation) + + TM.list_model_fns(models.video) + + TM.list_model_fns(models.optical_flow) + + [ + lambda pretrained: resnet_fpn_backbone(backbone_name="resnet50", pretrained=pretrained), + lambda pretrained: mobilenet_backbone(backbone_name="mobilenet_v2", fpn=False, pretrained=pretrained), + ], + ) + @run_if_test_with_extended + def test_pretrained_deprecation(self, model_fn): + with pytest.warns(UserWarning, match="deprecated"): + model_fn(pretrained=True) diff --git a/test/test_functional_tensor.py b/test/test_functional_tensor.py index e318420102b..b5352f18f21 100644 --- a/test/test_functional_tensor.py +++ b/test/test_functional_tensor.py @@ -1,81 +1,1282 @@ -from __future__ import division +import colorsys +import itertools +import math +import os +from functools import partial +from typing import Sequence + +import numpy as np +import PIL.Image +import pytest import torch -import torchvision.transforms as transforms -import torchvision.transforms.functional_tensor as F_t +import torchvision.transforms as T +import torchvision.transforms._functional_pil as F_pil +import torchvision.transforms._functional_tensor as F_t import torchvision.transforms.functional as F -import numpy as np -import unittest -import random - - -class Tester(unittest.TestCase): - - def test_vflip(self): - img_tensor = torch.randn(3, 16, 16) - vflipped_img = F_t.vflip(img_tensor) - vflipped_img_again = F_t.vflip(vflipped_img) - self.assertEqual(vflipped_img.shape, img_tensor.shape) - self.assertTrue(torch.equal(img_tensor, vflipped_img_again)) - - def test_hflip(self): - img_tensor = torch.randn(3, 16, 16) - hflipped_img = F_t.hflip(img_tensor) - hflipped_img_again = F_t.hflip(hflipped_img) - self.assertEqual(hflipped_img.shape, img_tensor.shape) - self.assertTrue(torch.equal(img_tensor, hflipped_img_again)) - - def test_crop(self): - img_tensor = torch.randint(0, 255, (3, 16, 16), dtype=torch.uint8) - top = random.randint(0, 15) - left = random.randint(0, 15) - height = random.randint(1, 16 - top) - width = random.randint(1, 16 - left) - img_cropped = F_t.crop(img_tensor, top, left, height, width) - img_PIL = transforms.ToPILImage()(img_tensor) - img_PIL_cropped = F.crop(img_PIL, top, left, height, width) - img_cropped_GT = transforms.ToTensor()(img_PIL_cropped) - - self.assertTrue(torch.equal(img_cropped, (img_cropped_GT * 255).to(torch.uint8)), - "functional_tensor crop not working") - - def test_adjustments(self): - fns = ((F.adjust_brightness, F_t.adjust_brightness), - (F.adjust_contrast, F_t.adjust_contrast), - (F.adjust_saturation, F_t.adjust_saturation)) - - for _ in range(20): - channels = 3 - dims = torch.randint(1, 50, (2,)) - shape = (channels, dims[0], dims[1]) - - if torch.randint(0, 2, (1,)) == 0: - img = torch.rand(*shape, dtype=torch.float) - else: - img = torch.randint(0, 256, shape, dtype=torch.uint8) - - factor = 3 * torch.rand(1) - for f, ft in fns: - - ft_img = ft(img, factor) - if not img.dtype.is_floating_point: - ft_img = ft_img.to(torch.float) / 255 - - img_pil = transforms.ToPILImage()(img) - f_img_pil = f(img_pil, factor) - f_img = transforms.ToTensor()(f_img_pil) - - # F uses uint8 and F_t uses float, so there is a small - # difference in values caused by (at most 5) truncations. - max_diff = (ft_img - f_img).abs().max() - self.assertLess(max_diff, 5 / 255 + 1e-5) - - def test_rgb_to_grayscale(self): - img_tensor = torch.randint(0, 255, (3, 16, 16), dtype=torch.uint8) - grayscale_tensor = F_t.rgb_to_grayscale(img_tensor).to(int) - grayscale_pil_img = torch.tensor(np.array(F.to_grayscale(F.to_pil_image(img_tensor)))).to(int) - max_diff = (grayscale_tensor - grayscale_pil_img).abs().max() - self.assertLess(max_diff, 1.0001) - - -if __name__ == '__main__': - unittest.main() +from common_utils import ( + _assert_approx_equal_tensor_to_pil, + _assert_equal_tensor_to_pil, + _create_data, + _create_data_batch, + _test_fn_on_batch, + assert_equal, + cpu_and_cuda, + needs_cuda, +) +from torchvision.transforms import InterpolationMode + +NEAREST, NEAREST_EXACT, BILINEAR, BICUBIC = ( + InterpolationMode.NEAREST, + InterpolationMode.NEAREST_EXACT, + InterpolationMode.BILINEAR, + InterpolationMode.BICUBIC, +) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("fn", [F.get_image_size, F.get_image_num_channels, F.get_dimensions]) +def test_image_sizes(device, fn): + script_F = torch.jit.script(fn) + + img_tensor, pil_img = _create_data(16, 18, 3, device=device) + value_img = fn(img_tensor) + value_pil_img = fn(pil_img) + assert value_img == value_pil_img + + value_img_script = script_F(img_tensor) + assert value_img == value_img_script + + batch_tensors = _create_data_batch(16, 18, 3, num_samples=4, device=device) + value_img_batch = fn(batch_tensors) + assert value_img == value_img_batch + + +@needs_cuda +def test_scale_channel(): + """Make sure that _scale_channel gives the same results on CPU and GPU as + histc or bincount are used depending on the device. + """ + # TODO: when # https://github.com/pytorch/pytorch/issues/53194 is fixed, + # only use bincount and remove that test. + size = (1_000,) + img_chan = torch.randint(0, 256, size=size).to("cpu") + scaled_cpu = F_t._scale_channel(img_chan) + scaled_cuda = F_t._scale_channel(img_chan.to("cuda")) + assert_equal(scaled_cpu, scaled_cuda.to("cpu")) + + +class TestRotate: + + ALL_DTYPES = [None, torch.float32, torch.float64, torch.float16] + scripted_rotate = torch.jit.script(F.rotate) + IMG_W = 26 + + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("height, width", [(7, 33), (26, IMG_W), (32, IMG_W)]) + @pytest.mark.parametrize( + "center", + [ + None, + (int(IMG_W * 0.3), int(IMG_W * 0.4)), + [int(IMG_W * 0.5), int(IMG_W * 0.6)], + ], + ) + @pytest.mark.parametrize("dt", ALL_DTYPES) + @pytest.mark.parametrize("angle", range(-180, 180, 34)) + @pytest.mark.parametrize("expand", [True, False]) + @pytest.mark.parametrize( + "fill", + [ + None, + [0, 0, 0], + (1, 2, 3), + [255, 255, 255], + [ + 1, + ], + (2.0,), + ], + ) + @pytest.mark.parametrize("fn", [F.rotate, scripted_rotate]) + def test_rotate(self, device, height, width, center, dt, angle, expand, fill, fn): + tensor, pil_img = _create_data(height, width, device=device) + + if dt == torch.float16 and torch.device(device).type == "cpu": + # skip float16 on CPU case + return + + if dt is not None: + tensor = tensor.to(dtype=dt) + + f_pil = int(fill[0]) if fill is not None and len(fill) == 1 else fill + out_pil_img = F.rotate(pil_img, angle=angle, interpolation=NEAREST, expand=expand, center=center, fill=f_pil) + out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1))) + + out_tensor = fn(tensor, angle=angle, interpolation=NEAREST, expand=expand, center=center, fill=fill).cpu() + + if out_tensor.dtype != torch.uint8: + out_tensor = out_tensor.to(torch.uint8) + + assert ( + out_tensor.shape == out_pil_tensor.shape + ), f"{(height, width, NEAREST, dt, angle, expand, center)}: {out_tensor.shape} vs {out_pil_tensor.shape}" + + num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0 + ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2] + # Tolerance : less than 3% of different pixels + assert ratio_diff_pixels < 0.03, ( + f"{(height, width, NEAREST, dt, angle, expand, center, fill)}: " + f"{ratio_diff_pixels}\n{out_tensor[0, :7, :7]} vs \n" + f"{out_pil_tensor[0, :7, :7]}" + ) + + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("dt", ALL_DTYPES) + def test_rotate_batch(self, device, dt): + if dt == torch.float16 and device == "cpu": + # skip float16 on CPU case + return + + batch_tensors = _create_data_batch(26, 36, num_samples=4, device=device) + if dt is not None: + batch_tensors = batch_tensors.to(dtype=dt) + + center = (20, 22) + _test_fn_on_batch(batch_tensors, F.rotate, angle=32, interpolation=NEAREST, expand=True, center=center) + + def test_rotate_interpolation_type(self): + tensor, _ = _create_data(26, 26) + res1 = F.rotate(tensor, 45, interpolation=PIL.Image.BILINEAR) + res2 = F.rotate(tensor, 45, interpolation=BILINEAR) + assert_equal(res1, res2) + + +class TestAffine: + + ALL_DTYPES = [None, torch.float32, torch.float64, torch.float16] + scripted_affine = torch.jit.script(F.affine) + + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("height, width", [(26, 26), (32, 26)]) + @pytest.mark.parametrize("dt", ALL_DTYPES) + def test_identity_map(self, device, height, width, dt): + # Tests on square and rectangular images + tensor, pil_img = _create_data(height, width, device=device) + + if dt == torch.float16 and device == "cpu": + # skip float16 on CPU case + return + + if dt is not None: + tensor = tensor.to(dtype=dt) + + # 1) identity map + out_tensor = F.affine(tensor, angle=0, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], interpolation=NEAREST) + + assert_equal(tensor, out_tensor, msg=f"{out_tensor[0, :5, :5]} vs {tensor[0, :5, :5]}") + out_tensor = self.scripted_affine( + tensor, angle=0, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], interpolation=NEAREST + ) + assert_equal(tensor, out_tensor, msg=f"{out_tensor[0, :5, :5]} vs {tensor[0, :5, :5]}") + + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("height, width", [(26, 26)]) + @pytest.mark.parametrize("dt", ALL_DTYPES) + @pytest.mark.parametrize( + "angle, config", + [ + (90, {"k": 1, "dims": (-1, -2)}), + (45, None), + (30, None), + (-30, None), + (-45, None), + (-90, {"k": -1, "dims": (-1, -2)}), + (180, {"k": 2, "dims": (-1, -2)}), + ], + ) + @pytest.mark.parametrize("fn", [F.affine, scripted_affine]) + def test_square_rotations(self, device, height, width, dt, angle, config, fn): + # 2) Test rotation + tensor, pil_img = _create_data(height, width, device=device) + + if dt == torch.float16 and device == "cpu": + # skip float16 on CPU case + return + + if dt is not None: + tensor = tensor.to(dtype=dt) + + out_pil_img = F.affine( + pil_img, angle=angle, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], interpolation=NEAREST + ) + out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1))).to(device) + + out_tensor = fn(tensor, angle=angle, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], interpolation=NEAREST) + if config is not None: + assert_equal(torch.rot90(tensor, **config), out_tensor) + + if out_tensor.dtype != torch.uint8: + out_tensor = out_tensor.to(torch.uint8) + + num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0 + ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2] + # Tolerance : less than 6% of different pixels + assert ratio_diff_pixels < 0.06 + + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("height, width", [(32, 26)]) + @pytest.mark.parametrize("dt", ALL_DTYPES) + @pytest.mark.parametrize("angle", [90, 45, 15, -30, -60, -120]) + @pytest.mark.parametrize("fn", [F.affine, scripted_affine]) + @pytest.mark.parametrize("center", [None, [0, 0]]) + def test_rect_rotations(self, device, height, width, dt, angle, fn, center): + # Tests on rectangular images + tensor, pil_img = _create_data(height, width, device=device) + + if dt == torch.float16 and device == "cpu": + # skip float16 on CPU case + return + + if dt is not None: + tensor = tensor.to(dtype=dt) + + out_pil_img = F.affine( + pil_img, angle=angle, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], interpolation=NEAREST, center=center + ) + out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1))) + + out_tensor = fn( + tensor, angle=angle, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], interpolation=NEAREST, center=center + ).cpu() + + if out_tensor.dtype != torch.uint8: + out_tensor = out_tensor.to(torch.uint8) + + num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0 + ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2] + # Tolerance : less than 3% of different pixels + assert ratio_diff_pixels < 0.03 + + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("height, width", [(26, 26), (32, 26)]) + @pytest.mark.parametrize("dt", ALL_DTYPES) + @pytest.mark.parametrize("t", [[10, 12], (-12, -13)]) + @pytest.mark.parametrize("fn", [F.affine, scripted_affine]) + def test_translations(self, device, height, width, dt, t, fn): + # 3) Test translation + tensor, pil_img = _create_data(height, width, device=device) + + if dt == torch.float16 and device == "cpu": + # skip float16 on CPU case + return + + if dt is not None: + tensor = tensor.to(dtype=dt) + + out_pil_img = F.affine(pil_img, angle=0, translate=t, scale=1.0, shear=[0.0, 0.0], interpolation=NEAREST) + + out_tensor = fn(tensor, angle=0, translate=t, scale=1.0, shear=[0.0, 0.0], interpolation=NEAREST) + + if out_tensor.dtype != torch.uint8: + out_tensor = out_tensor.to(torch.uint8) + + _assert_equal_tensor_to_pil(out_tensor, out_pil_img) + + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("height, width", [(26, 26), (32, 26)]) + @pytest.mark.parametrize("dt", ALL_DTYPES) + @pytest.mark.parametrize( + "a, t, s, sh, f", + [ + (45.5, [5, 6], 1.0, [0.0, 0.0], None), + (33, (5, -4), 1.0, [0.0, 0.0], [0, 0, 0]), + (45, [-5, 4], 1.2, [0.0, 0.0], (1, 2, 3)), + (33, (-4, -8), 2.0, [0.0, 0.0], [255, 255, 255]), + (85, (10, -10), 0.7, [0.0, 0.0], [1]), + (0, [0, 0], 1.0, [35.0], (2.0,)), + (-25, [0, 0], 1.2, [0.0, 15.0], None), + (-45, [-10, 0], 0.7, [2.0, 5.0], None), + (-45, [-10, -10], 1.2, [4.0, 5.0], None), + (-90, [0, 0], 1.0, [0.0, 0.0], None), + ], + ) + @pytest.mark.parametrize("fn", [F.affine, scripted_affine]) + def test_all_ops(self, device, height, width, dt, a, t, s, sh, f, fn): + # 4) Test rotation + translation + scale + shear + tensor, pil_img = _create_data(height, width, device=device) + + if dt == torch.float16 and device == "cpu": + # skip float16 on CPU case + return + + if dt is not None: + tensor = tensor.to(dtype=dt) + + f_pil = int(f[0]) if f is not None and len(f) == 1 else f + out_pil_img = F.affine(pil_img, angle=a, translate=t, scale=s, shear=sh, interpolation=NEAREST, fill=f_pil) + out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1))) + + out_tensor = fn(tensor, angle=a, translate=t, scale=s, shear=sh, interpolation=NEAREST, fill=f).cpu() + + if out_tensor.dtype != torch.uint8: + out_tensor = out_tensor.to(torch.uint8) + + num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0 + ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2] + # Tolerance : less than 5% (cpu), 6% (cuda) of different pixels + tol = 0.06 if device == "cuda" else 0.05 + assert ratio_diff_pixels < tol + + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("dt", ALL_DTYPES) + def test_batches(self, device, dt): + if dt == torch.float16 and device == "cpu": + # skip float16 on CPU case + return + + batch_tensors = _create_data_batch(26, 36, num_samples=4, device=device) + if dt is not None: + batch_tensors = batch_tensors.to(dtype=dt) + + _test_fn_on_batch(batch_tensors, F.affine, angle=-43, translate=[-3, 4], scale=1.2, shear=[4.0, 5.0]) + + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_interpolation_type(self, device): + tensor, pil_img = _create_data(26, 26, device=device) + + res1 = F.affine(tensor, 45, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], interpolation=PIL.Image.BILINEAR) + res2 = F.affine(tensor, 45, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], interpolation=BILINEAR) + assert_equal(res1, res2) + + +def _get_data_dims_and_points_for_perspective(): + # Ideally we would parametrize independently over data dims and points, but + # we want to tests on some points that also depend on the data dims. + # Pytest doesn't support covariant parametrization, so we do it somewhat manually here. + + data_dims = [(26, 34), (26, 26)] + points = [ + [[[0, 0], [33, 0], [33, 25], [0, 25]], [[3, 2], [32, 3], [30, 24], [2, 25]]], + [[[3, 2], [32, 3], [30, 24], [2, 25]], [[0, 0], [33, 0], [33, 25], [0, 25]]], + [[[3, 2], [32, 3], [30, 24], [2, 25]], [[5, 5], [30, 3], [33, 19], [4, 25]]], + ] + + dims_and_points = list(itertools.product(data_dims, points)) + + # up to here, we could just have used 2 @parametrized. + # Down below is the covarariant part as the points depend on the data dims. + + n = 10 + for dim in data_dims: + points += [(dim, T.RandomPerspective.get_params(dim[1], dim[0], i / n)) for i in range(n)] + return dims_and_points + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("dims_and_points", _get_data_dims_and_points_for_perspective()) +@pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16]) +@pytest.mark.parametrize("fill", (None, [0, 0, 0], [1, 2, 3], [255, 255, 255], [1], (2.0,))) +@pytest.mark.parametrize("fn", [F.perspective, torch.jit.script(F.perspective)]) +def test_perspective_pil_vs_tensor(device, dims_and_points, dt, fill, fn): + + if dt == torch.float16 and device == "cpu": + # skip float16 on CPU case + return + + data_dims, (spoints, epoints) = dims_and_points + + tensor, pil_img = _create_data(*data_dims, device=device) + if dt is not None: + tensor = tensor.to(dtype=dt) + + interpolation = NEAREST + fill_pil = int(fill[0]) if fill is not None and len(fill) == 1 else fill + out_pil_img = F.perspective( + pil_img, startpoints=spoints, endpoints=epoints, interpolation=interpolation, fill=fill_pil + ) + out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1))) + out_tensor = fn(tensor, startpoints=spoints, endpoints=epoints, interpolation=interpolation, fill=fill).cpu() + + if out_tensor.dtype != torch.uint8: + out_tensor = out_tensor.to(torch.uint8) + + num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0 + ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2] + # Tolerance : less than 5% of different pixels + assert ratio_diff_pixels < 0.05 + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("dims_and_points", _get_data_dims_and_points_for_perspective()) +@pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16]) +def test_perspective_batch(device, dims_and_points, dt): + + if dt == torch.float16 and device == "cpu": + # skip float16 on CPU case + return + + data_dims, (spoints, epoints) = dims_and_points + + batch_tensors = _create_data_batch(*data_dims, num_samples=4, device=device) + if dt is not None: + batch_tensors = batch_tensors.to(dtype=dt) + + # Ignore the equivalence between scripted and regular function on float16 cuda. The pixels at + # the border may be entirely different due to small rounding errors. + scripted_fn_atol = -1 if (dt == torch.float16 and device == "cuda") else 1e-8 + _test_fn_on_batch( + batch_tensors, + F.perspective, + scripted_fn_atol=scripted_fn_atol, + startpoints=spoints, + endpoints=epoints, + interpolation=NEAREST, + ) + + +def test_perspective_interpolation_type(): + spoints = [[0, 0], [33, 0], [33, 25], [0, 25]] + epoints = [[3, 2], [32, 3], [30, 24], [2, 25]] + tensor = torch.randint(0, 256, (3, 26, 26)) + + res1 = F.perspective(tensor, startpoints=spoints, endpoints=epoints, interpolation=PIL.Image.BILINEAR) + res2 = F.perspective(tensor, startpoints=spoints, endpoints=epoints, interpolation=BILINEAR) + assert_equal(res1, res2) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16]) +@pytest.mark.parametrize("size", [32, 26, [32], [32, 32], (32, 32), [26, 35]]) +@pytest.mark.parametrize("max_size", [None, 34, 40, 1000]) +@pytest.mark.parametrize("interpolation", [BILINEAR, BICUBIC, NEAREST, NEAREST_EXACT]) +def test_resize(device, dt, size, max_size, interpolation): + + if dt == torch.float16 and device == "cpu": + # skip float16 on CPU case + return + + if max_size is not None and isinstance(size, Sequence) and len(size) != 1: + return # unsupported + + torch.manual_seed(12) + script_fn = torch.jit.script(F.resize) + tensor, pil_img = _create_data(26, 36, device=device) + batch_tensors = _create_data_batch(16, 18, num_samples=4, device=device) + + if dt is not None: + # This is a trivial cast to float of uint8 data to test all cases + tensor = tensor.to(dt) + batch_tensors = batch_tensors.to(dt) + + resized_tensor = F.resize(tensor, size=size, interpolation=interpolation, max_size=max_size, antialias=True) + resized_pil_img = F.resize(pil_img, size=size, interpolation=interpolation, max_size=max_size, antialias=True) + + assert resized_tensor.size()[1:] == resized_pil_img.size[::-1] + + if interpolation != NEAREST: + # We can not check values if mode = NEAREST, as results are different + # E.g. resized_tensor = [[a, a, b, c, d, d, e, ...]] + # E.g. resized_pil_img = [[a, b, c, c, d, e, f, ...]] + resized_tensor_f = resized_tensor + # we need to cast to uint8 to compare with PIL image + if resized_tensor_f.dtype == torch.uint8: + resized_tensor_f = resized_tensor_f.to(torch.float) + + # Pay attention to high tolerance for MAE + _assert_approx_equal_tensor_to_pil(resized_tensor_f, resized_pil_img, tol=3.0) + + if isinstance(size, int): + script_size = [size] + else: + script_size = size + + resize_result = script_fn(tensor, size=script_size, interpolation=interpolation, max_size=max_size, antialias=True) + assert_equal(resized_tensor, resize_result) + + _test_fn_on_batch( + batch_tensors, F.resize, size=script_size, interpolation=interpolation, max_size=max_size, antialias=True + ) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +def test_resize_asserts(device): + + tensor, pil_img = _create_data(26, 36, device=device) + + res1 = F.resize(tensor, size=32, interpolation=PIL.Image.BILINEAR) + res2 = F.resize(tensor, size=32, interpolation=BILINEAR) + assert_equal(res1, res2) + + for img in (tensor, pil_img): + exp_msg = "max_size should only be passed if size specifies the length of the smaller edge" + with pytest.raises(ValueError, match=exp_msg): + F.resize(img, size=(32, 34), max_size=35) + with pytest.raises(ValueError, match="max_size = 32 must be strictly greater"): + F.resize(img, size=32, max_size=32) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16]) +@pytest.mark.parametrize("size", [[96, 72], [96, 420], [420, 72]]) +@pytest.mark.parametrize("interpolation", [BILINEAR, BICUBIC]) +def test_resize_antialias(device, dt, size, interpolation): + + if dt == torch.float16 and device == "cpu": + # skip float16 on CPU case + return + + torch.manual_seed(12) + script_fn = torch.jit.script(F.resize) + tensor, pil_img = _create_data(320, 290, device=device) + + if dt is not None: + # This is a trivial cast to float of uint8 data to test all cases + tensor = tensor.to(dt) + + resized_tensor = F.resize(tensor, size=size, interpolation=interpolation, antialias=True) + resized_pil_img = F.resize(pil_img, size=size, interpolation=interpolation, antialias=True) + + assert resized_tensor.size()[1:] == resized_pil_img.size[::-1] + + resized_tensor_f = resized_tensor + # we need to cast to uint8 to compare with PIL image + if resized_tensor_f.dtype == torch.uint8: + resized_tensor_f = resized_tensor_f.to(torch.float) + + _assert_approx_equal_tensor_to_pil(resized_tensor_f, resized_pil_img, tol=0.5, msg=f"{size}, {interpolation}, {dt}") + + accepted_tol = 1.0 + 1e-5 + if interpolation == BICUBIC: + # this overall mean value to make the tests pass + # High value is mostly required for test cases with + # downsampling and upsampling where we can not exactly + # match PIL implementation. + accepted_tol = 15.0 + + _assert_approx_equal_tensor_to_pil( + resized_tensor_f, resized_pil_img, tol=accepted_tol, agg_method="max", msg=f"{size}, {interpolation}, {dt}" + ) + + if isinstance(size, int): + script_size = [ + size, + ] + else: + script_size = size + + resize_result = script_fn(tensor, size=script_size, interpolation=interpolation, antialias=True) + assert_equal(resized_tensor, resize_result) + + +def check_functional_vs_PIL_vs_scripted( + fn, fn_pil, fn_t, config, device, dtype, channels=3, tol=2.0 + 1e-10, agg_method="max" +): + + script_fn = torch.jit.script(fn) + torch.manual_seed(15) + tensor, pil_img = _create_data(26, 34, channels=channels, device=device) + batch_tensors = _create_data_batch(16, 18, num_samples=4, channels=channels, device=device) + + if dtype is not None: + tensor = F.convert_image_dtype(tensor, dtype) + batch_tensors = F.convert_image_dtype(batch_tensors, dtype) + + out_fn_t = fn_t(tensor, **config) + out_pil = fn_pil(pil_img, **config) + out_scripted = script_fn(tensor, **config) + assert out_fn_t.dtype == out_scripted.dtype + assert out_fn_t.size()[1:] == out_pil.size[::-1] + + rbg_tensor = out_fn_t + + if out_fn_t.dtype != torch.uint8: + rbg_tensor = F.convert_image_dtype(out_fn_t, torch.uint8) + + # Check that max difference does not exceed 2 in [0, 255] range + # Exact matching is not possible due to incompatibility convert_image_dtype and PIL results + _assert_approx_equal_tensor_to_pil(rbg_tensor.float(), out_pil, tol=tol, agg_method=agg_method) + + atol = 1e-6 + if out_fn_t.dtype == torch.uint8 and "cuda" in torch.device(device).type: + atol = 1.0 + assert out_fn_t.allclose(out_scripted, atol=atol) + + # FIXME: fn will be scripted again in _test_fn_on_batch. We could avoid that. + _test_fn_on_batch(batch_tensors, fn, scripted_fn_atol=atol, **config) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64)) +@pytest.mark.parametrize("config", [{"brightness_factor": f} for f in (0.1, 0.5, 1.0, 1.34, 2.5)]) +@pytest.mark.parametrize("channels", [1, 3]) +def test_adjust_brightness(device, dtype, config, channels): + check_functional_vs_PIL_vs_scripted( + F.adjust_brightness, + F_pil.adjust_brightness, + F_t.adjust_brightness, + config, + device, + dtype, + channels, + ) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64)) +@pytest.mark.parametrize("channels", [1, 3]) +def test_invert(device, dtype, channels): + check_functional_vs_PIL_vs_scripted( + F.invert, F_pil.invert, F_t.invert, {}, device, dtype, channels, tol=1.0, agg_method="max" + ) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("config", [{"bits": bits} for bits in range(0, 8)]) +@pytest.mark.parametrize("channels", [1, 3]) +def test_posterize(device, config, channels): + check_functional_vs_PIL_vs_scripted( + F.posterize, + F_pil.posterize, + F_t.posterize, + config, + device, + dtype=None, + channels=channels, + tol=1.0, + agg_method="max", + ) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("config", [{"threshold": threshold} for threshold in [0, 64, 128, 192, 255]]) +@pytest.mark.parametrize("channels", [1, 3]) +def test_solarize1(device, config, channels): + check_functional_vs_PIL_vs_scripted( + F.solarize, + F_pil.solarize, + F_t.solarize, + config, + device, + dtype=None, + channels=channels, + tol=1.0, + agg_method="max", + ) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("dtype", (torch.float32, torch.float64)) +@pytest.mark.parametrize("config", [{"threshold": threshold} for threshold in [0.0, 0.25, 0.5, 0.75, 1.0]]) +@pytest.mark.parametrize("channels", [1, 3]) +def test_solarize2(device, dtype, config, channels): + check_functional_vs_PIL_vs_scripted( + F.solarize, + lambda img, threshold: F_pil.solarize(img, 255 * threshold), + F_t.solarize, + config, + device, + dtype, + channels, + tol=1.0, + agg_method="max", + ) + + +@pytest.mark.parametrize( + ("dtype", "threshold"), + [ + *[ + (dtype, threshold) + for dtype, threshold in itertools.product( + [torch.float32, torch.float16], + [0.0, 0.25, 0.5, 0.75, 1.0], + ) + ], + *[(torch.uint8, threshold) for threshold in [0, 64, 128, 192, 255]], + *[(torch.int64, threshold) for threshold in [0, 2**32, 2**63 - 1]], + ], +) +@pytest.mark.parametrize("device", cpu_and_cuda()) +def test_solarize_threshold_within_bound(threshold, dtype, device): + make_img = torch.rand if dtype.is_floating_point else partial(torch.randint, 0, torch.iinfo(dtype).max) + img = make_img((3, 12, 23), dtype=dtype, device=device) + F_t.solarize(img, threshold) + + +@pytest.mark.parametrize( + ("dtype", "threshold"), + [ + (torch.float32, 1.5), + (torch.float16, 1.5), + (torch.uint8, 260), + (torch.int64, 2**64), + ], +) +@pytest.mark.parametrize("device", cpu_and_cuda()) +def test_solarize_threshold_above_bound(threshold, dtype, device): + make_img = torch.rand if dtype.is_floating_point else partial(torch.randint, 0, torch.iinfo(dtype).max) + img = make_img((3, 12, 23), dtype=dtype, device=device) + with pytest.raises(TypeError, match="Threshold should be less than bound of img."): + F_t.solarize(img, threshold) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64)) +@pytest.mark.parametrize("config", [{"sharpness_factor": f} for f in [0.2, 0.5, 1.0, 1.5, 2.0]]) +@pytest.mark.parametrize("channels", [1, 3]) +def test_adjust_sharpness(device, dtype, config, channels): + check_functional_vs_PIL_vs_scripted( + F.adjust_sharpness, + F_pil.adjust_sharpness, + F_t.adjust_sharpness, + config, + device, + dtype, + channels, + ) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64)) +@pytest.mark.parametrize("channels", [1, 3]) +def test_autocontrast(device, dtype, channels): + check_functional_vs_PIL_vs_scripted( + F.autocontrast, F_pil.autocontrast, F_t.autocontrast, {}, device, dtype, channels, tol=1.0, agg_method="max" + ) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64)) +@pytest.mark.parametrize("channels", [1, 3]) +def test_autocontrast_equal_minmax(device, dtype, channels): + a = _create_data_batch(32, 32, num_samples=1, channels=channels, device=device) + a = a / 2.0 + 0.3 + assert (F.autocontrast(a)[0] == F.autocontrast(a[0])).all() + + a[0, 0] = 0.7 + assert (F.autocontrast(a)[0] == F.autocontrast(a[0])).all() + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("channels", [1, 3]) +def test_equalize(device, channels): + torch.use_deterministic_algorithms(False) + check_functional_vs_PIL_vs_scripted( + F.equalize, + F_pil.equalize, + F_t.equalize, + {}, + device, + dtype=None, + channels=channels, + tol=1.0, + agg_method="max", + ) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64)) +@pytest.mark.parametrize("config", [{"contrast_factor": f} for f in [0.2, 0.5, 1.0, 1.5, 2.0]]) +@pytest.mark.parametrize("channels", [1, 3]) +def test_adjust_contrast(device, dtype, config, channels): + check_functional_vs_PIL_vs_scripted( + F.adjust_contrast, F_pil.adjust_contrast, F_t.adjust_contrast, config, device, dtype, channels + ) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64)) +@pytest.mark.parametrize("config", [{"saturation_factor": f} for f in [0.5, 0.75, 1.0, 1.5, 2.0]]) +@pytest.mark.parametrize("channels", [1, 3]) +def test_adjust_saturation(device, dtype, config, channels): + check_functional_vs_PIL_vs_scripted( + F.adjust_saturation, F_pil.adjust_saturation, F_t.adjust_saturation, config, device, dtype, channels + ) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64)) +@pytest.mark.parametrize("config", [{"hue_factor": f} for f in [-0.45, -0.25, 0.0, 0.25, 0.45]]) +@pytest.mark.parametrize("channels", [1, 3]) +def test_adjust_hue(device, dtype, config, channels): + check_functional_vs_PIL_vs_scripted( + F.adjust_hue, F_pil.adjust_hue, F_t.adjust_hue, config, device, dtype, channels, tol=16.1, agg_method="max" + ) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64)) +@pytest.mark.parametrize("config", [{"gamma": g1, "gain": g2} for g1, g2 in zip([0.8, 1.0, 1.2], [0.7, 1.0, 1.3])]) +@pytest.mark.parametrize("channels", [1, 3]) +def test_adjust_gamma(device, dtype, config, channels): + check_functional_vs_PIL_vs_scripted( + F.adjust_gamma, + F_pil.adjust_gamma, + F_t.adjust_gamma, + config, + device, + dtype, + channels, + ) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16]) +@pytest.mark.parametrize("pad", [2, [3], [0, 3], (3, 3), [4, 2, 4, 3]]) +@pytest.mark.parametrize( + "config", + [ + {"padding_mode": "constant", "fill": 0}, + {"padding_mode": "constant", "fill": 10}, + {"padding_mode": "constant", "fill": 20.2}, + {"padding_mode": "edge"}, + {"padding_mode": "reflect"}, + {"padding_mode": "symmetric"}, + ], +) +def test_pad(device, dt, pad, config): + script_fn = torch.jit.script(F.pad) + tensor, pil_img = _create_data(7, 8, device=device) + batch_tensors = _create_data_batch(16, 18, num_samples=4, device=device) + + if dt == torch.float16 and device == "cpu": + # skip float16 on CPU case + return + + if dt is not None: + # This is a trivial cast to float of uint8 data to test all cases + tensor = tensor.to(dt) + batch_tensors = batch_tensors.to(dt) + + pad_tensor = F_t.pad(tensor, pad, **config) + pad_pil_img = F_pil.pad(pil_img, pad, **config) + + pad_tensor_8b = pad_tensor + # we need to cast to uint8 to compare with PIL image + if pad_tensor_8b.dtype != torch.uint8: + pad_tensor_8b = pad_tensor_8b.to(torch.uint8) + + _assert_equal_tensor_to_pil(pad_tensor_8b, pad_pil_img, msg=f"{pad}, {config}") + + if isinstance(pad, int): + script_pad = [ + pad, + ] + else: + script_pad = pad + pad_tensor_script = script_fn(tensor, script_pad, **config) + assert_equal(pad_tensor, pad_tensor_script, msg=f"{pad}, {config}") + + _test_fn_on_batch(batch_tensors, F.pad, padding=script_pad, **config) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("mode", [NEAREST, NEAREST_EXACT, BILINEAR, BICUBIC]) +def test_resized_crop(device, mode): + # test values of F.resized_crop in several cases: + # 1) resize to the same size, crop to the same size => should be identity + tensor, _ = _create_data(26, 36, device=device) + + out_tensor = F.resized_crop( + tensor, top=0, left=0, height=26, width=36, size=[26, 36], interpolation=mode, antialias=True + ) + assert_equal(tensor, out_tensor, msg=f"{out_tensor[0, :5, :5]} vs {tensor[0, :5, :5]}") + + # 2) resize by half and crop a TL corner + tensor, _ = _create_data(26, 36, device=device) + out_tensor = F.resized_crop(tensor, top=0, left=0, height=20, width=30, size=[10, 15], interpolation=NEAREST) + expected_out_tensor = tensor[:, :20:2, :30:2] + assert_equal( + expected_out_tensor, + out_tensor, + msg=f"{expected_out_tensor[0, :10, :10]} vs {out_tensor[0, :10, :10]}", + ) + + batch_tensors = _create_data_batch(26, 36, num_samples=4, device=device) + _test_fn_on_batch( + batch_tensors, + F.resized_crop, + top=1, + left=2, + height=20, + width=30, + size=[10, 15], + interpolation=NEAREST, + ) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize( + "func, args", + [ + (F_t.get_dimensions, ()), + (F_t.get_image_size, ()), + (F_t.get_image_num_channels, ()), + (F_t.vflip, ()), + (F_t.hflip, ()), + (F_t.crop, (1, 2, 4, 5)), + (F_t.adjust_brightness, (0.0,)), + (F_t.adjust_contrast, (1.0,)), + (F_t.adjust_hue, (-0.5,)), + (F_t.adjust_saturation, (2.0,)), + (F_t.pad, ([2], 2, "constant")), + (F_t.resize, ([10, 11],)), + (F_t.perspective, ([0.2])), + (F_t.gaussian_blur, ((2, 2), (0.7, 0.5))), + (F_t.invert, ()), + (F_t.posterize, (0,)), + (F_t.solarize, (0.3,)), + (F_t.adjust_sharpness, (0.3,)), + (F_t.autocontrast, ()), + (F_t.equalize, ()), + ], +) +def test_assert_image_tensor(device, func, args): + shape = (100,) + tensor = torch.rand(*shape, dtype=torch.float, device=device) + with pytest.raises(Exception, match=r"Tensor is not a torch image."): + func(tensor, *args) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +def test_vflip(device): + script_vflip = torch.jit.script(F.vflip) + + img_tensor, pil_img = _create_data(16, 18, device=device) + vflipped_img = F.vflip(img_tensor) + vflipped_pil_img = F.vflip(pil_img) + _assert_equal_tensor_to_pil(vflipped_img, vflipped_pil_img) + + # scriptable function test + vflipped_img_script = script_vflip(img_tensor) + assert_equal(vflipped_img, vflipped_img_script) + + batch_tensors = _create_data_batch(16, 18, num_samples=4, device=device) + _test_fn_on_batch(batch_tensors, F.vflip) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +def test_hflip(device): + script_hflip = torch.jit.script(F.hflip) + + img_tensor, pil_img = _create_data(16, 18, device=device) + hflipped_img = F.hflip(img_tensor) + hflipped_pil_img = F.hflip(pil_img) + _assert_equal_tensor_to_pil(hflipped_img, hflipped_pil_img) + + # scriptable function test + hflipped_img_script = script_hflip(img_tensor) + assert_equal(hflipped_img, hflipped_img_script) + + batch_tensors = _create_data_batch(16, 18, num_samples=4, device=device) + _test_fn_on_batch(batch_tensors, F.hflip) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize( + "top, left, height, width", + [ + (1, 2, 4, 5), # crop inside top-left corner + (2, 12, 3, 4), # crop inside top-right corner + (8, 3, 5, 6), # crop inside bottom-left corner + (8, 11, 4, 3), # crop inside bottom-right corner + (50, 50, 10, 10), # crop outside the image + (-50, -50, 10, 10), # crop outside the image + ], +) +def test_crop(device, top, left, height, width): + script_crop = torch.jit.script(F.crop) + + img_tensor, pil_img = _create_data(16, 18, device=device) + + pil_img_cropped = F.crop(pil_img, top, left, height, width) + + img_tensor_cropped = F.crop(img_tensor, top, left, height, width) + _assert_equal_tensor_to_pil(img_tensor_cropped, pil_img_cropped) + + img_tensor_cropped = script_crop(img_tensor, top, left, height, width) + _assert_equal_tensor_to_pil(img_tensor_cropped, pil_img_cropped) + + batch_tensors = _create_data_batch(16, 18, num_samples=4, device=device) + _test_fn_on_batch(batch_tensors, F.crop, top=top, left=left, height=height, width=width) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("image_size", ("small", "large")) +@pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16]) +@pytest.mark.parametrize("ksize", [(3, 3), [3, 5], (23, 23)]) +@pytest.mark.parametrize("sigma", [[0.5, 0.5], (0.5, 0.5), (0.8, 0.8), (1.7, 1.7)]) +@pytest.mark.parametrize("fn", [F.gaussian_blur, torch.jit.script(F.gaussian_blur)]) +def test_gaussian_blur(device, image_size, dt, ksize, sigma, fn): + + # true_cv2_results = { + # # np_img = np.arange(3 * 10 * 12, dtype="uint8").reshape((10, 12, 3)) + # # cv2.GaussianBlur(np_img, ksize=(3, 3), sigmaX=0.8) + # "3_3_0.8": ... + # # cv2.GaussianBlur(np_img, ksize=(3, 3), sigmaX=0.5) + # "3_3_0.5": ... + # # cv2.GaussianBlur(np_img, ksize=(3, 5), sigmaX=0.8) + # "3_5_0.8": ... + # # cv2.GaussianBlur(np_img, ksize=(3, 5), sigmaX=0.5) + # "3_5_0.5": ... + # # np_img2 = np.arange(26 * 28, dtype="uint8").reshape((26, 28)) + # # cv2.GaussianBlur(np_img2, ksize=(23, 23), sigmaX=1.7) + # "23_23_1.7": ... + # } + p = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "gaussian_blur_opencv_results.pt") + + true_cv2_results = torch.load(p, weights_only=False) + + if image_size == "small": + tensor = ( + torch.from_numpy(np.arange(3 * 10 * 12, dtype="uint8").reshape((10, 12, 3))).permute(2, 0, 1).to(device) + ) + else: + tensor = torch.from_numpy(np.arange(26 * 28, dtype="uint8").reshape((1, 26, 28))).to(device) + + if dt == torch.float16 and device == "cpu": + # skip float16 on CPU case + return + + if dt is not None: + tensor = tensor.to(dtype=dt) + + _ksize = (ksize, ksize) if isinstance(ksize, int) else ksize + _sigma = sigma[0] if sigma is not None else None + shape = tensor.shape + gt_key = f"{shape[-2]}_{shape[-1]}_{shape[-3]}__{_ksize[0]}_{_ksize[1]}_{_sigma}" + if gt_key not in true_cv2_results: + return + + true_out = ( + torch.tensor(true_cv2_results[gt_key]).reshape(shape[-2], shape[-1], shape[-3]).permute(2, 0, 1).to(tensor) + ) + + out = fn(tensor, kernel_size=ksize, sigma=sigma) + torch.testing.assert_close(out, true_out, rtol=0.0, atol=1.0, msg=f"{ksize}, {sigma}") + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +def test_hsv2rgb(device): + scripted_fn = torch.jit.script(F_t._hsv2rgb) + shape = (3, 100, 150) + for _ in range(10): + hsv_img = torch.rand(*shape, dtype=torch.float, device=device) + rgb_img = F_t._hsv2rgb(hsv_img) + ft_img = rgb_img.permute(1, 2, 0).flatten(0, 1) + + ( + h, + s, + v, + ) = hsv_img.unbind(0) + h = h.flatten().cpu().numpy() + s = s.flatten().cpu().numpy() + v = v.flatten().cpu().numpy() + + rgb = [] + for h1, s1, v1 in zip(h, s, v): + rgb.append(colorsys.hsv_to_rgb(h1, s1, v1)) + colorsys_img = torch.tensor(rgb, dtype=torch.float32, device=device) + torch.testing.assert_close(ft_img, colorsys_img, rtol=0.0, atol=1e-5) + + s_rgb_img = scripted_fn(hsv_img) + torch.testing.assert_close(rgb_img, s_rgb_img) + + batch_tensors = _create_data_batch(120, 100, num_samples=4, device=device).float() + _test_fn_on_batch(batch_tensors, F_t._hsv2rgb) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +def test_rgb2hsv(device): + scripted_fn = torch.jit.script(F_t._rgb2hsv) + shape = (3, 150, 100) + for _ in range(10): + rgb_img = torch.rand(*shape, dtype=torch.float, device=device) + hsv_img = F_t._rgb2hsv(rgb_img) + ft_hsv_img = hsv_img.permute(1, 2, 0).flatten(0, 1) + + ( + r, + g, + b, + ) = rgb_img.unbind(dim=-3) + r = r.flatten().cpu().numpy() + g = g.flatten().cpu().numpy() + b = b.flatten().cpu().numpy() + + hsv = [] + for r1, g1, b1 in zip(r, g, b): + hsv.append(colorsys.rgb_to_hsv(r1, g1, b1)) + + colorsys_img = torch.tensor(hsv, dtype=torch.float32, device=device) + + ft_hsv_img_h, ft_hsv_img_sv = torch.split(ft_hsv_img, [1, 2], dim=1) + colorsys_img_h, colorsys_img_sv = torch.split(colorsys_img, [1, 2], dim=1) + + max_diff_h = ((colorsys_img_h * 2 * math.pi).sin() - (ft_hsv_img_h * 2 * math.pi).sin()).abs().max() + max_diff_sv = (colorsys_img_sv - ft_hsv_img_sv).abs().max() + max_diff = max(max_diff_h, max_diff_sv) + assert max_diff < 1e-5 + + s_hsv_img = scripted_fn(rgb_img) + torch.testing.assert_close(hsv_img, s_hsv_img, rtol=1e-5, atol=1e-7) + + batch_tensors = _create_data_batch(120, 100, num_samples=4, device=device).float() + _test_fn_on_batch(batch_tensors, F_t._rgb2hsv) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("num_output_channels", (3, 1)) +def test_rgb_to_grayscale(device, num_output_channels): + script_rgb_to_grayscale = torch.jit.script(F.rgb_to_grayscale) + + img_tensor, pil_img = _create_data(32, 34, device=device) + + gray_pil_image = F.rgb_to_grayscale(pil_img, num_output_channels=num_output_channels) + gray_tensor = F.rgb_to_grayscale(img_tensor, num_output_channels=num_output_channels) + + _assert_approx_equal_tensor_to_pil(gray_tensor.float(), gray_pil_image, tol=1.0 + 1e-10, agg_method="max") + + s_gray_tensor = script_rgb_to_grayscale(img_tensor, num_output_channels=num_output_channels) + assert_equal(s_gray_tensor, gray_tensor) + + batch_tensors = _create_data_batch(16, 18, num_samples=4, device=device) + _test_fn_on_batch(batch_tensors, F.rgb_to_grayscale, num_output_channels=num_output_channels) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +def test_center_crop(device): + script_center_crop = torch.jit.script(F.center_crop) + + img_tensor, pil_img = _create_data(32, 34, device=device) + + cropped_pil_image = F.center_crop(pil_img, [10, 11]) + + cropped_tensor = F.center_crop(img_tensor, [10, 11]) + _assert_equal_tensor_to_pil(cropped_tensor, cropped_pil_image) + + cropped_tensor = script_center_crop(img_tensor, [10, 11]) + _assert_equal_tensor_to_pil(cropped_tensor, cropped_pil_image) + + batch_tensors = _create_data_batch(16, 18, num_samples=4, device=device) + _test_fn_on_batch(batch_tensors, F.center_crop, output_size=[10, 11]) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +def test_five_crop(device): + script_five_crop = torch.jit.script(F.five_crop) + + img_tensor, pil_img = _create_data(32, 34, device=device) + + cropped_pil_images = F.five_crop(pil_img, [10, 11]) + + cropped_tensors = F.five_crop(img_tensor, [10, 11]) + for i in range(5): + _assert_equal_tensor_to_pil(cropped_tensors[i], cropped_pil_images[i]) + + cropped_tensors = script_five_crop(img_tensor, [10, 11]) + for i in range(5): + _assert_equal_tensor_to_pil(cropped_tensors[i], cropped_pil_images[i]) + + batch_tensors = _create_data_batch(16, 18, num_samples=4, device=device) + tuple_transformed_batches = F.five_crop(batch_tensors, [10, 11]) + for i in range(len(batch_tensors)): + img_tensor = batch_tensors[i, ...] + tuple_transformed_imgs = F.five_crop(img_tensor, [10, 11]) + assert len(tuple_transformed_imgs) == len(tuple_transformed_batches) + + for j in range(len(tuple_transformed_imgs)): + true_transformed_img = tuple_transformed_imgs[j] + transformed_img = tuple_transformed_batches[j][i, ...] + assert_equal(true_transformed_img, transformed_img) + + # scriptable function test + s_tuple_transformed_batches = script_five_crop(batch_tensors, [10, 11]) + for transformed_batch, s_transformed_batch in zip(tuple_transformed_batches, s_tuple_transformed_batches): + assert_equal(transformed_batch, s_transformed_batch) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +def test_ten_crop(device): + script_ten_crop = torch.jit.script(F.ten_crop) + + img_tensor, pil_img = _create_data(32, 34, device=device) + + cropped_pil_images = F.ten_crop(pil_img, [10, 11]) + + cropped_tensors = F.ten_crop(img_tensor, [10, 11]) + for i in range(10): + _assert_equal_tensor_to_pil(cropped_tensors[i], cropped_pil_images[i]) + + cropped_tensors = script_ten_crop(img_tensor, [10, 11]) + for i in range(10): + _assert_equal_tensor_to_pil(cropped_tensors[i], cropped_pil_images[i]) + + batch_tensors = _create_data_batch(16, 18, num_samples=4, device=device) + tuple_transformed_batches = F.ten_crop(batch_tensors, [10, 11]) + for i in range(len(batch_tensors)): + img_tensor = batch_tensors[i, ...] + tuple_transformed_imgs = F.ten_crop(img_tensor, [10, 11]) + assert len(tuple_transformed_imgs) == len(tuple_transformed_batches) + + for j in range(len(tuple_transformed_imgs)): + true_transformed_img = tuple_transformed_imgs[j] + transformed_img = tuple_transformed_batches[j][i, ...] + assert_equal(true_transformed_img, transformed_img) + + # scriptable function test + s_tuple_transformed_batches = script_ten_crop(batch_tensors, [10, 11]) + for transformed_batch, s_transformed_batch in zip(tuple_transformed_batches, s_tuple_transformed_batches): + assert_equal(transformed_batch, s_transformed_batch) + + +def test_elastic_transform_asserts(): + with pytest.raises(TypeError, match="Argument displacement should be a Tensor"): + _ = F.elastic_transform("abc", displacement=None) + + with pytest.raises(TypeError, match="img should be PIL Image or Tensor"): + _ = F.elastic_transform("abc", displacement=torch.rand(1)) + + img_tensor = torch.rand(1, 3, 32, 24) + with pytest.raises(ValueError, match="Argument displacement shape should"): + _ = F.elastic_transform(img_tensor, displacement=torch.rand(1, 2)) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR, BICUBIC]) +@pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16]) +@pytest.mark.parametrize( + "fill", + [None, [255, 255, 255], (2.0,)], +) +def test_elastic_transform_consistency(device, interpolation, dt, fill): + script_elastic_transform = torch.jit.script(F.elastic_transform) + img_tensor, _ = _create_data(32, 34, device=device) + # As there is no PIL implementation for elastic_transform, + # thus we do not run tests tensor vs pillow + + if dt is not None: + img_tensor = img_tensor.to(dt) + + displacement = T.ElasticTransform.get_params([1.5, 1.5], [2.0, 2.0], [32, 34]) + kwargs = dict( + displacement=displacement, + interpolation=interpolation, + fill=fill, + ) + + out_tensor1 = F.elastic_transform(img_tensor, **kwargs) + out_tensor2 = script_elastic_transform(img_tensor, **kwargs) + assert_equal(out_tensor1, out_tensor2) + + batch_tensors = _create_data_batch(16, 18, num_samples=4, device=device) + displacement = T.ElasticTransform.get_params([1.5, 1.5], [2.0, 2.0], [16, 18]) + kwargs["displacement"] = displacement + if dt is not None: + batch_tensors = batch_tensors.to(dt) + _test_fn_on_batch(batch_tensors, F.elastic_transform, **kwargs) + + +if __name__ == "__main__": + pytest.main([__file__]) diff --git a/test/test_hub.py b/test/test_hub.py deleted file mode 100644 index 4ae9e51021b..00000000000 --- a/test/test_hub.py +++ /dev/null @@ -1,56 +0,0 @@ -import torch.hub as hub -import tempfile -import shutil -import os -import sys -import unittest - - -def sum_of_model_parameters(model): - s = 0 - for p in model.parameters(): - s += p.sum() - return s - - -SUM_OF_PRETRAINED_RESNET18_PARAMS = -12703.99609375 - - -@unittest.skipIf('torchvision' in sys.modules, - 'TestHub must start without torchvision imported') -class TestHub(unittest.TestCase): - # Only run this check ONCE before all tests start. - # - If torchvision is imported before all tests start, e.g. we might find _C.so - # which doesn't exist in downloaded zip but in the installed wheel. - # - After the first test is run, torchvision is already in sys.modules due to - # Python cache as we run all hub tests in the same python process. - - def test_load_from_github(self): - hub_model = hub.load( - 'pytorch/vision', - 'resnet18', - pretrained=True, - progress=False) - self.assertEqual(sum_of_model_parameters(hub_model).item(), - SUM_OF_PRETRAINED_RESNET18_PARAMS) - - def test_set_dir(self): - temp_dir = tempfile.gettempdir() - hub.set_dir(temp_dir) - hub_model = hub.load( - 'pytorch/vision', - 'resnet18', - pretrained=True, - progress=False) - self.assertEqual(sum_of_model_parameters(hub_model).item(), - SUM_OF_PRETRAINED_RESNET18_PARAMS) - self.assertTrue(os.path.exists(temp_dir + '/pytorch_vision_master')) - shutil.rmtree(temp_dir + '/pytorch_vision_master') - - def test_list_entrypoints(self): - entry_lists = hub.list('pytorch/vision', force_reload=True) - self.assertIn('resnet18', entry_lists) - - -if __name__ == "__main__": - unittest.main() diff --git a/test/test_image.py b/test/test_image.py new file mode 100644 index 00000000000..b8e96773267 --- /dev/null +++ b/test/test_image.py @@ -0,0 +1,1060 @@ +import concurrent.futures +import glob +import io +import os +import re +import sys +from pathlib import Path + +import numpy as np +import pytest +import requests +import torch +import torchvision.transforms.v2.functional as F +from common_utils import assert_equal, cpu_and_cuda, IN_OSS_CI, needs_cuda +from PIL import __version__ as PILLOW_VERSION, Image, ImageOps, ImageSequence +from torchvision.io.image import ( + decode_avif, + decode_gif, + decode_heic, + decode_image, + decode_jpeg, + decode_png, + decode_webp, + encode_jpeg, + encode_png, + ImageReadMode, + read_file, + read_image, + write_file, + write_jpeg, + write_png, +) + +IMAGE_ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets") +FAKEDATA_DIR = os.path.join(IMAGE_ROOT, "fakedata") +IMAGE_DIR = os.path.join(FAKEDATA_DIR, "imagefolder") +DAMAGED_JPEG = os.path.join(IMAGE_ROOT, "damaged_jpeg") +DAMAGED_PNG = os.path.join(IMAGE_ROOT, "damaged_png") +ENCODE_JPEG = os.path.join(IMAGE_ROOT, "encode_jpeg") +INTERLACED_PNG = os.path.join(IMAGE_ROOT, "interlaced_png") +TOOSMALL_PNG = os.path.join(IMAGE_ROOT, "toosmall_png") +IS_WINDOWS = sys.platform in ("win32", "cygwin") +IS_MACOS = sys.platform == "darwin" +IS_LINUX = sys.platform == "linux" +PILLOW_VERSION = tuple(int(x) for x in PILLOW_VERSION.split(".")) +WEBP_TEST_IMAGES_DIR = os.environ.get("WEBP_TEST_IMAGES_DIR", "") +# See https://github.com/pytorch/vision/pull/8724#issuecomment-2503964558 +HEIC_AVIF_MESSAGE = "AVIF and HEIF only available on linux." + + +def _get_safe_image_name(name): + # Used when we need to change the pytest "id" for an "image path" parameter. + # If we don't, the test id (i.e. its name) will contain the whole path to the image, which is machine-specific, + # and this creates issues when the test is running in a different machine than where it was collected + # (typically, in fb internal infra) + return name.split(os.path.sep)[-1] + + +def get_images(directory, img_ext): + assert os.path.isdir(directory) + image_paths = glob.glob(directory + f"/**/*{img_ext}", recursive=True) + for path in image_paths: + if path.split(os.sep)[-2] not in ["damaged_jpeg", "jpeg_write"]: + yield path + + +def pil_read_image(img_path): + with Image.open(img_path) as img: + return torch.from_numpy(np.array(img)) + + +def normalize_dimensions(img_pil): + if len(img_pil.shape) == 3: + img_pil = img_pil.permute(2, 0, 1) + else: + img_pil = img_pil.unsqueeze(0) + return img_pil + + +@pytest.mark.parametrize( + "img_path", + [pytest.param(jpeg_path, id=_get_safe_image_name(jpeg_path)) for jpeg_path in get_images(IMAGE_ROOT, ".jpg")], +) +@pytest.mark.parametrize( + "pil_mode, mode", + [ + (None, ImageReadMode.UNCHANGED), + ("L", ImageReadMode.GRAY), + ("RGB", ImageReadMode.RGB), + ], +) +@pytest.mark.parametrize("scripted", (False, True)) +@pytest.mark.parametrize("decode_fun", (decode_jpeg, decode_image)) +def test_decode_jpeg(img_path, pil_mode, mode, scripted, decode_fun): + + with Image.open(img_path) as img: + is_cmyk = img.mode == "CMYK" + if pil_mode is not None: + img = img.convert(pil_mode) + img_pil = torch.from_numpy(np.array(img)) + if is_cmyk and mode == ImageReadMode.UNCHANGED: + # flip the colors to match libjpeg + img_pil = 255 - img_pil + + img_pil = normalize_dimensions(img_pil) + data = read_file(img_path) + if scripted: + decode_fun = torch.jit.script(decode_fun) + img_ljpeg = decode_fun(data, mode=mode) + + # Permit a small variation on pixel values to account for implementation + # differences between Pillow and LibJPEG. + abs_mean_diff = (img_ljpeg.type(torch.float32) - img_pil).abs().mean().item() + assert abs_mean_diff < 2 + + +@pytest.mark.parametrize("codec", ["png", "jpeg"]) +@pytest.mark.parametrize("orientation", [1, 2, 3, 4, 5, 6, 7, 8, 0]) +def test_decode_with_exif_orientation(tmpdir, codec, orientation): + fp = os.path.join(tmpdir, f"exif_oriented_{orientation}.{codec}") + t = torch.randint(0, 256, size=(3, 256, 257), dtype=torch.uint8) + im = F.to_pil_image(t) + exif = im.getexif() + exif[0x0112] = orientation # set exif orientation + im.save(fp, codec.upper(), exif=exif.tobytes()) + + data = read_file(fp) + output = decode_image(data, apply_exif_orientation=True) + + pimg = Image.open(fp) + pimg = ImageOps.exif_transpose(pimg) + + expected = F.pil_to_tensor(pimg) + torch.testing.assert_close(expected, output) + + +@pytest.mark.parametrize("size", [65533, 1, 7, 10, 23, 33]) +def test_invalid_exif(tmpdir, size): + # Inspired from a PIL test: + # https://github.com/python-pillow/Pillow/blob/8f63748e50378424628155994efd7e0739a4d1d1/Tests/test_file_jpeg.py#L299 + fp = os.path.join(tmpdir, "invalid_exif.jpg") + t = torch.randint(0, 256, size=(3, 256, 257), dtype=torch.uint8) + im = F.to_pil_image(t) + im.save(fp, "JPEG", exif=b"1" * size) + + data = read_file(fp) + output = decode_image(data, apply_exif_orientation=True) + + pimg = Image.open(fp) + pimg = ImageOps.exif_transpose(pimg) + + expected = F.pil_to_tensor(pimg) + torch.testing.assert_close(expected, output) + + +def test_decode_bad_huffman_images(): + # sanity check: make sure we can decode the bad Huffman encoding + bad_huff = read_file(os.path.join(DAMAGED_JPEG, "bad_huffman.jpg")) + decode_jpeg(bad_huff) + + +@pytest.mark.parametrize( + "img_path", + [ + pytest.param(truncated_image, id=_get_safe_image_name(truncated_image)) + for truncated_image in glob.glob(os.path.join(DAMAGED_JPEG, "corrupt*.jpg")) + ], +) +def test_damaged_corrupt_images(img_path): + # Truncated images should raise an exception + data = read_file(img_path) + if "corrupt34" in img_path: + match_message = "Image is incomplete or truncated" + else: + match_message = "Unsupported marker type" + with pytest.raises(RuntimeError, match=match_message): + decode_jpeg(data) + + +@pytest.mark.parametrize( + "img_path", + [pytest.param(png_path, id=_get_safe_image_name(png_path)) for png_path in get_images(FAKEDATA_DIR, ".png")], +) +@pytest.mark.parametrize( + "pil_mode, mode", + [ + (None, ImageReadMode.UNCHANGED), + ("L", ImageReadMode.GRAY), + ("LA", ImageReadMode.GRAY_ALPHA), + ("RGB", ImageReadMode.RGB), + ("RGBA", ImageReadMode.RGB_ALPHA), + ], +) +@pytest.mark.parametrize("scripted", (False, True)) +@pytest.mark.parametrize("decode_fun", (decode_png, decode_image)) +def test_decode_png(img_path, pil_mode, mode, scripted, decode_fun): + + if scripted: + decode_fun = torch.jit.script(decode_fun) + + with Image.open(img_path) as img: + if pil_mode is not None: + img = img.convert(pil_mode) + img_pil = torch.from_numpy(np.array(img)) + + img_pil = normalize_dimensions(img_pil) + + if img_path.endswith("16.png"): + data = read_file(img_path) + img_lpng = decode_fun(data, mode=mode) + assert img_lpng.dtype == torch.uint16 + # PIL converts 16 bits pngs to uint8 + img_lpng = F.to_dtype(img_lpng, torch.uint8, scale=True) + else: + data = read_file(img_path) + img_lpng = decode_fun(data, mode=mode) + + tol = 0 if pil_mode is None else 1 + + if PILLOW_VERSION >= (8, 3) and pil_mode == "LA": + # Avoid checking the transparency channel until + # https://github.com/python-pillow/Pillow/issues/5593#issuecomment-878244910 + # is fixed. + # TODO: remove once fix is released in PIL. Should be > 8.3.1. + img_lpng, img_pil = img_lpng[0], img_pil[0] + + torch.testing.assert_close(img_lpng, img_pil, atol=tol, rtol=0) + + +def test_decode_png_errors(): + with pytest.raises(RuntimeError, match="Out of bound read in decode_png"): + decode_png(read_file(os.path.join(DAMAGED_PNG, "sigsegv.png"))) + with pytest.raises(RuntimeError, match="Content is too small for png"): + decode_png(read_file(os.path.join(TOOSMALL_PNG, "heapbof.png"))) + + +@pytest.mark.parametrize( + "img_path", + [pytest.param(png_path, id=_get_safe_image_name(png_path)) for png_path in get_images(IMAGE_DIR, ".png")], +) +@pytest.mark.parametrize("scripted", (True, False)) +def test_encode_png(img_path, scripted): + pil_image = Image.open(img_path) + img_pil = torch.from_numpy(np.array(pil_image)) + img_pil = img_pil.permute(2, 0, 1) + encode = torch.jit.script(encode_png) if scripted else encode_png + png_buf = encode(img_pil, compression_level=6) + + rec_img = Image.open(io.BytesIO(bytes(png_buf.tolist()))) + rec_img = torch.from_numpy(np.array(rec_img)) + rec_img = rec_img.permute(2, 0, 1) + + assert_equal(img_pil, rec_img) + + +def test_encode_png_errors(): + with pytest.raises(RuntimeError, match="Input tensor dtype should be uint8"): + encode_png(torch.empty((3, 100, 100), dtype=torch.float32)) + + with pytest.raises(RuntimeError, match="Compression level should be between 0 and 9"): + encode_png(torch.empty((3, 100, 100), dtype=torch.uint8), compression_level=-1) + + with pytest.raises(RuntimeError, match="Compression level should be between 0 and 9"): + encode_png(torch.empty((3, 100, 100), dtype=torch.uint8), compression_level=10) + + with pytest.raises(RuntimeError, match="The number of channels should be 1 or 3, got: 5"): + encode_png(torch.empty((5, 100, 100), dtype=torch.uint8)) + + +@pytest.mark.parametrize( + "img_path", + [pytest.param(png_path, id=_get_safe_image_name(png_path)) for png_path in get_images(IMAGE_DIR, ".png")], +) +@pytest.mark.parametrize("scripted", (True, False)) +def test_write_png(img_path, tmpdir, scripted): + pil_image = Image.open(img_path) + img_pil = torch.from_numpy(np.array(pil_image)) + img_pil = img_pil.permute(2, 0, 1) + + filename, _ = os.path.splitext(os.path.basename(img_path)) + torch_png = os.path.join(tmpdir, f"{filename}_torch.png") + write = torch.jit.script(write_png) if scripted else write_png + write(img_pil, torch_png, compression_level=6) + saved_image = torch.from_numpy(np.array(Image.open(torch_png))) + saved_image = saved_image.permute(2, 0, 1) + + assert_equal(img_pil, saved_image) + + +def test_read_image(): + # Just testing torchcsript, the functionality is somewhat tested already in other tests. + path = next(get_images(IMAGE_ROOT, ".jpg")) + out = read_image(path) + out_scripted = torch.jit.script(read_image)(path) + torch.testing.assert_close(out, out_scripted, atol=0, rtol=0) + + +@pytest.mark.parametrize("scripted", (True, False)) +def test_read_file(tmpdir, scripted): + fname, content = "test1.bin", b"TorchVision\211\n" + fpath = os.path.join(tmpdir, fname) + with open(fpath, "wb") as f: + f.write(content) + + fun = torch.jit.script(read_file) if scripted else read_file + data = fun(fpath) + expected = torch.tensor(list(content), dtype=torch.uint8) + os.unlink(fpath) + assert_equal(data, expected) + + with pytest.raises(RuntimeError, match="No such file or directory: 'tst'"): + read_file("tst") + + +def test_read_file_non_ascii(tmpdir): + fname, content = "日本語(Japanese).bin", b"TorchVision\211\n" + fpath = os.path.join(tmpdir, fname) + with open(fpath, "wb") as f: + f.write(content) + + data = read_file(fpath) + expected = torch.tensor(list(content), dtype=torch.uint8) + os.unlink(fpath) + assert_equal(data, expected) + + +@pytest.mark.parametrize("scripted", (True, False)) +def test_write_file(tmpdir, scripted): + fname, content = "test1.bin", b"TorchVision\211\n" + fpath = os.path.join(tmpdir, fname) + content_tensor = torch.tensor(list(content), dtype=torch.uint8) + write = torch.jit.script(write_file) if scripted else write_file + write(fpath, content_tensor) + + with open(fpath, "rb") as f: + saved_content = f.read() + os.unlink(fpath) + assert content == saved_content + + +def test_write_file_non_ascii(tmpdir): + fname, content = "日本語(Japanese).bin", b"TorchVision\211\n" + fpath = os.path.join(tmpdir, fname) + content_tensor = torch.tensor(list(content), dtype=torch.uint8) + write_file(fpath, content_tensor) + + with open(fpath, "rb") as f: + saved_content = f.read() + os.unlink(fpath) + assert content == saved_content + + +@pytest.mark.parametrize( + "shape", + [ + (27, 27), + (60, 60), + (105, 105), + ], +) +def test_read_1_bit_png(shape, tmpdir): + np_rng = np.random.RandomState(0) + image_path = os.path.join(tmpdir, f"test_{shape}.png") + pixels = np_rng.rand(*shape) > 0.5 + img = Image.fromarray(pixels) + img.save(image_path) + img1 = read_image(image_path) + img2 = normalize_dimensions(torch.as_tensor(pixels * 255, dtype=torch.uint8)) + assert_equal(img1, img2) + + +@pytest.mark.parametrize( + "shape", + [ + (27, 27), + (60, 60), + (105, 105), + ], +) +@pytest.mark.parametrize( + "mode", + [ + ImageReadMode.UNCHANGED, + ImageReadMode.GRAY, + ], +) +def test_read_1_bit_png_consistency(shape, mode, tmpdir): + np_rng = np.random.RandomState(0) + image_path = os.path.join(tmpdir, f"test_{shape}.png") + pixels = np_rng.rand(*shape) > 0.5 + img = Image.fromarray(pixels) + img.save(image_path) + img1 = read_image(image_path, mode) + img2 = read_image(image_path, mode) + assert_equal(img1, img2) + + +def test_read_interlaced_png(): + imgs = list(get_images(INTERLACED_PNG, ".png")) + with Image.open(imgs[0]) as im1, Image.open(imgs[1]) as im2: + assert not (im1.info.get("interlace") is im2.info.get("interlace")) + img1 = read_image(imgs[0]) + img2 = read_image(imgs[1]) + assert_equal(img1, img2) + + +@needs_cuda +@pytest.mark.parametrize("mode", [ImageReadMode.UNCHANGED, ImageReadMode.GRAY, ImageReadMode.RGB]) +@pytest.mark.parametrize("scripted", (False, True)) +def test_decode_jpegs_cuda(mode, scripted): + encoded_images = [] + for jpeg_path in get_images(IMAGE_ROOT, ".jpg"): + if "cmyk" in jpeg_path: + continue + encoded_image = read_file(jpeg_path) + encoded_images.append(encoded_image) + decoded_images_cpu = decode_jpeg(encoded_images, mode=mode) + decode_fn = torch.jit.script(decode_jpeg) if scripted else decode_jpeg + + # test multithreaded decoding + # in the current version we prevent this by using a lock but we still want to test it + num_workers = 10 + + with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor: + futures = [executor.submit(decode_fn, encoded_images, mode, "cuda") for _ in range(num_workers)] + decoded_images_threaded = [future.result() for future in futures] + assert len(decoded_images_threaded) == num_workers + for decoded_images in decoded_images_threaded: + assert len(decoded_images) == len(encoded_images) + for decoded_image_cuda, decoded_image_cpu in zip(decoded_images, decoded_images_cpu): + assert decoded_image_cuda.shape == decoded_image_cpu.shape + assert decoded_image_cuda.dtype == decoded_image_cpu.dtype == torch.uint8 + assert (decoded_image_cuda.cpu().float() - decoded_image_cpu.cpu().float()).abs().mean() < 2 + + +@needs_cuda +def test_decode_image_cuda_raises(): + data = torch.randint(0, 127, size=(255,), device="cuda", dtype=torch.uint8) + with pytest.raises(RuntimeError): + decode_image(data) + + +@needs_cuda +def test_decode_jpeg_cuda_device_param(): + path = next(path for path in get_images(IMAGE_ROOT, ".jpg") if "cmyk" not in path) + data = read_file(path) + current_device = torch.cuda.current_device() + current_stream = torch.cuda.current_stream() + num_devices = torch.cuda.device_count() + devices = ["cuda", torch.device("cuda")] + [torch.device(f"cuda:{i}") for i in range(num_devices)] + results = [] + for device in devices: + results.append(decode_jpeg(data, device=device)) + assert len(results) == len(devices) + for result in results: + assert torch.all(result.cpu() == results[0].cpu()) + assert current_device == torch.cuda.current_device() + assert current_stream == torch.cuda.current_stream() + + +@needs_cuda +def test_decode_jpeg_cuda_errors(): + data = read_file(next(get_images(IMAGE_ROOT, ".jpg"))) + with pytest.raises(RuntimeError, match="Expected a non empty 1-dimensional tensor"): + decode_jpeg(data.reshape(-1, 1), device="cuda") + with pytest.raises(ValueError, match="must be tensors"): + decode_jpeg([1, 2, 3]) + with pytest.raises(ValueError, match="Input tensor must be a CPU tensor"): + decode_jpeg(data.to("cuda"), device="cuda") + with pytest.raises(RuntimeError, match="Expected a torch.uint8 tensor"): + decode_jpeg(data.to(torch.float), device="cuda") + with pytest.raises(RuntimeError, match="Expected the device parameter to be a cuda device"): + torch.ops.image.decode_jpegs_cuda([data], ImageReadMode.UNCHANGED.value, "cpu") + with pytest.raises(ValueError, match="Input tensor must be a CPU tensor"): + decode_jpeg( + torch.empty((100,), dtype=torch.uint8, device="cuda"), + ) + with pytest.raises(ValueError, match="Input list must contain tensors on CPU"): + decode_jpeg( + [ + torch.empty((100,), dtype=torch.uint8, device="cuda"), + torch.empty((100,), dtype=torch.uint8, device="cuda"), + ] + ) + + with pytest.raises(ValueError, match="Input list must contain tensors on CPU"): + decode_jpeg( + [ + torch.empty((100,), dtype=torch.uint8, device="cuda"), + torch.empty((100,), dtype=torch.uint8, device="cuda"), + ], + device="cuda", + ) + + with pytest.raises(ValueError, match="Input list must contain tensors on CPU"): + decode_jpeg( + [ + torch.empty((100,), dtype=torch.uint8, device="cpu"), + torch.empty((100,), dtype=torch.uint8, device="cuda"), + ], + device="cuda", + ) + + with pytest.raises(RuntimeError, match="Expected a torch.uint8 tensor"): + decode_jpeg( + [ + torch.empty((100,), dtype=torch.uint8), + torch.empty((100,), dtype=torch.float32), + ], + device="cuda", + ) + + with pytest.raises(RuntimeError, match="Expected a non empty 1-dimensional tensor"): + decode_jpeg( + [ + torch.empty((100,), dtype=torch.uint8), + torch.empty((1, 100), dtype=torch.uint8), + ], + device="cuda", + ) + + with pytest.raises(RuntimeError, match="Error while decoding JPEG images"): + decode_jpeg( + [ + torch.empty((100,), dtype=torch.uint8), + torch.empty((100,), dtype=torch.uint8), + ], + device="cuda", + ) + + with pytest.raises(ValueError, match="Input list must contain at least one element"): + decode_jpeg([], device="cuda") + + +def test_encode_jpeg_errors(): + + with pytest.raises(RuntimeError, match="Input tensor dtype should be uint8"): + encode_jpeg(torch.empty((3, 100, 100), dtype=torch.float32)) + + with pytest.raises(ValueError, match="Image quality should be a positive number between 1 and 100"): + encode_jpeg(torch.empty((3, 100, 100), dtype=torch.uint8), quality=-1) + + with pytest.raises(ValueError, match="Image quality should be a positive number between 1 and 100"): + encode_jpeg(torch.empty((3, 100, 100), dtype=torch.uint8), quality=101) + + with pytest.raises(RuntimeError, match="The number of channels should be 1 or 3, got: 5"): + encode_jpeg(torch.empty((5, 100, 100), dtype=torch.uint8)) + + with pytest.raises(RuntimeError, match="Input data should be a 3-dimensional tensor"): + encode_jpeg(torch.empty((1, 3, 100, 100), dtype=torch.uint8)) + + with pytest.raises(RuntimeError, match="Input data should be a 3-dimensional tensor"): + encode_jpeg(torch.empty((100, 100), dtype=torch.uint8)) + + +@pytest.mark.skipif(IS_MACOS, reason="https://github.com/pytorch/vision/issues/8031") +@pytest.mark.parametrize( + "img_path", + [pytest.param(jpeg_path, id=_get_safe_image_name(jpeg_path)) for jpeg_path in get_images(ENCODE_JPEG, ".jpg")], +) +@pytest.mark.parametrize("scripted", (True, False)) +def test_encode_jpeg(img_path, scripted): + img = read_image(img_path) + + pil_img = F.to_pil_image(img) + buf = io.BytesIO() + pil_img.save(buf, format="JPEG", quality=75) + + encoded_jpeg_pil = torch.frombuffer(buf.getvalue(), dtype=torch.uint8) + + encode = torch.jit.script(encode_jpeg) if scripted else encode_jpeg + for src_img in [img, img.contiguous()]: + encoded_jpeg_torch = encode(src_img, quality=75) + assert_equal(encoded_jpeg_torch, encoded_jpeg_pil) + + +@needs_cuda +def test_encode_jpeg_cuda_device_param(): + path = next(path for path in get_images(IMAGE_ROOT, ".jpg") if "cmyk" not in path) + + data = read_image(path) + + current_device = torch.cuda.current_device() + current_stream = torch.cuda.current_stream() + num_devices = torch.cuda.device_count() + devices = ["cuda", torch.device("cuda")] + [torch.device(f"cuda:{i}") for i in range(num_devices)] + results = [] + for device in devices: + results.append(encode_jpeg(data.to(device=device))) + assert len(results) == len(devices) + for result in results: + assert torch.all(result.cpu() == results[0].cpu()) + assert current_device == torch.cuda.current_device() + assert current_stream == torch.cuda.current_stream() + + +@needs_cuda +@pytest.mark.parametrize( + "img_path", + [pytest.param(jpeg_path, id=_get_safe_image_name(jpeg_path)) for jpeg_path in get_images(IMAGE_ROOT, ".jpg")], +) +@pytest.mark.parametrize("scripted", (False, True)) +@pytest.mark.parametrize("contiguous", (False, True)) +def test_encode_jpeg_cuda(img_path, scripted, contiguous): + decoded_image_tv = read_image(img_path) + encode_fn = torch.jit.script(encode_jpeg) if scripted else encode_jpeg + + if "cmyk" in img_path: + pytest.xfail("Encoding a CMYK jpeg isn't supported") + if decoded_image_tv.shape[0] == 1: + pytest.xfail("Decoding a grayscale jpeg isn't supported") + # For more detail as to why check out: https://github.com/NVIDIA/cuda-samples/issues/23#issuecomment-559283013 + if contiguous: + decoded_image_tv = decoded_image_tv[None].contiguous(memory_format=torch.contiguous_format)[0] + else: + decoded_image_tv = decoded_image_tv[None].contiguous(memory_format=torch.channels_last)[0] + encoded_jpeg_cuda_tv = encode_fn(decoded_image_tv.cuda(), quality=75) + decoded_jpeg_cuda_tv = decode_jpeg(encoded_jpeg_cuda_tv.cpu()) + + # the actual encoded bytestreams from libnvjpeg and libjpeg-turbo differ for the same quality + # instead, we re-decode the encoded image and compare to the original + abs_mean_diff = (decoded_jpeg_cuda_tv.float() - decoded_image_tv.float()).abs().mean().item() + assert abs_mean_diff < 3 + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("scripted", (True, False)) +@pytest.mark.parametrize("contiguous", (True, False)) +def test_encode_jpegs_batch(scripted, contiguous, device): + if device == "cpu" and IS_MACOS: + pytest.skip("https://github.com/pytorch/vision/issues/8031") + decoded_images_tv = [] + for jpeg_path in get_images(IMAGE_ROOT, ".jpg"): + if "cmyk" in jpeg_path: + continue + decoded_image = read_image(jpeg_path) + if decoded_image.shape[0] == 1: + continue + if contiguous: + decoded_image = decoded_image[None].contiguous(memory_format=torch.contiguous_format)[0] + else: + decoded_image = decoded_image[None].contiguous(memory_format=torch.channels_last)[0] + decoded_images_tv.append(decoded_image) + + encode_fn = torch.jit.script(encode_jpeg) if scripted else encode_jpeg + + decoded_images_tv_device = [img.to(device=device) for img in decoded_images_tv] + encoded_jpegs_tv_device = encode_fn(decoded_images_tv_device, quality=75) + encoded_jpegs_tv_device = [decode_jpeg(img.cpu()) for img in encoded_jpegs_tv_device] + + for original, encoded_decoded in zip(decoded_images_tv, encoded_jpegs_tv_device): + c, h, w = original.shape + abs_mean_diff = (original.float() - encoded_decoded.float()).abs().mean().item() + assert abs_mean_diff < 3 + + # test multithreaded decoding + # in the current version we prevent this by using a lock but we still want to test it + num_workers = 10 + with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor: + futures = [executor.submit(encode_fn, decoded_images_tv_device) for _ in range(num_workers)] + encoded_images_threaded = [future.result() for future in futures] + assert len(encoded_images_threaded) == num_workers + for encoded_images in encoded_images_threaded: + assert len(decoded_images_tv_device) == len(encoded_images) + for i, (encoded_image_cuda, decoded_image_tv) in enumerate(zip(encoded_images, decoded_images_tv_device)): + # make sure all the threads produce identical outputs + assert torch.all(encoded_image_cuda == encoded_images_threaded[0][i]) + + # make sure the outputs are identical or close enough to baseline + decoded_cuda_encoded_image = decode_jpeg(encoded_image_cuda.cpu()) + assert decoded_cuda_encoded_image.shape == decoded_image_tv.shape + assert decoded_cuda_encoded_image.dtype == decoded_image_tv.dtype + assert (decoded_cuda_encoded_image.cpu().float() - decoded_image_tv.cpu().float()).abs().mean() < 3 + + +@needs_cuda +def test_single_encode_jpeg_cuda_errors(): + with pytest.raises(RuntimeError, match="Input tensor dtype should be uint8"): + encode_jpeg(torch.empty((3, 100, 100), dtype=torch.float32, device="cuda")) + + with pytest.raises(RuntimeError, match="The number of channels should be 3, got: 5"): + encode_jpeg(torch.empty((5, 100, 100), dtype=torch.uint8, device="cuda")) + + with pytest.raises(RuntimeError, match="The number of channels should be 3, got: 1"): + encode_jpeg(torch.empty((1, 100, 100), dtype=torch.uint8, device="cuda")) + + with pytest.raises(RuntimeError, match="Input data should be a 3-dimensional tensor"): + encode_jpeg(torch.empty((1, 3, 100, 100), dtype=torch.uint8, device="cuda")) + + with pytest.raises(RuntimeError, match="Input data should be a 3-dimensional tensor"): + encode_jpeg(torch.empty((100, 100), dtype=torch.uint8, device="cuda")) + + +@needs_cuda +def test_batch_encode_jpegs_cuda_errors(): + with pytest.raises(RuntimeError, match="Input tensor dtype should be uint8"): + encode_jpeg( + [ + torch.empty((3, 100, 100), dtype=torch.uint8, device="cuda"), + torch.empty((3, 100, 100), dtype=torch.float32, device="cuda"), + ] + ) + + with pytest.raises(RuntimeError, match="The number of channels should be 3, got: 5"): + encode_jpeg( + [ + torch.empty((3, 100, 100), dtype=torch.uint8, device="cuda"), + torch.empty((5, 100, 100), dtype=torch.uint8, device="cuda"), + ] + ) + + with pytest.raises(RuntimeError, match="The number of channels should be 3, got: 1"): + encode_jpeg( + [ + torch.empty((3, 100, 100), dtype=torch.uint8, device="cuda"), + torch.empty((1, 100, 100), dtype=torch.uint8, device="cuda"), + ] + ) + + with pytest.raises(RuntimeError, match="Input data should be a 3-dimensional tensor"): + encode_jpeg( + [ + torch.empty((3, 100, 100), dtype=torch.uint8, device="cuda"), + torch.empty((1, 3, 100, 100), dtype=torch.uint8, device="cuda"), + ] + ) + + with pytest.raises(RuntimeError, match="Input data should be a 3-dimensional tensor"): + encode_jpeg( + [ + torch.empty((3, 100, 100), dtype=torch.uint8, device="cuda"), + torch.empty((100, 100), dtype=torch.uint8, device="cuda"), + ] + ) + + with pytest.raises(RuntimeError, match="Input tensor should be on CPU"): + encode_jpeg( + [ + torch.empty((3, 100, 100), dtype=torch.uint8, device="cpu"), + torch.empty((3, 100, 100), dtype=torch.uint8, device="cuda"), + ] + ) + + with pytest.raises( + RuntimeError, match="All input tensors must be on the same CUDA device when encoding with nvjpeg" + ): + encode_jpeg( + [ + torch.empty((3, 100, 100), dtype=torch.uint8, device="cuda"), + torch.empty((3, 100, 100), dtype=torch.uint8, device="cpu"), + ] + ) + + if torch.cuda.device_count() >= 2: + with pytest.raises( + RuntimeError, match="All input tensors must be on the same CUDA device when encoding with nvjpeg" + ): + encode_jpeg( + [ + torch.empty((3, 100, 100), dtype=torch.uint8, device="cuda:0"), + torch.empty((3, 100, 100), dtype=torch.uint8, device="cuda:1"), + ] + ) + + with pytest.raises(ValueError, match="encode_jpeg requires at least one input tensor when a list is passed"): + encode_jpeg([]) + + +@pytest.mark.skipif(IS_MACOS, reason="https://github.com/pytorch/vision/issues/8031") +@pytest.mark.parametrize( + "img_path", + [pytest.param(jpeg_path, id=_get_safe_image_name(jpeg_path)) for jpeg_path in get_images(ENCODE_JPEG, ".jpg")], +) +@pytest.mark.parametrize("scripted", (True, False)) +def test_write_jpeg(img_path, tmpdir, scripted): + tmpdir = Path(tmpdir) + img = read_image(img_path) + pil_img = F.to_pil_image(img) + + torch_jpeg = str(tmpdir / "torch.jpg") + pil_jpeg = str(tmpdir / "pil.jpg") + + write = torch.jit.script(write_jpeg) if scripted else write_jpeg + write(img, torch_jpeg, quality=75) + pil_img.save(pil_jpeg, quality=75) + + with open(torch_jpeg, "rb") as f: + torch_bytes = f.read() + + with open(pil_jpeg, "rb") as f: + pil_bytes = f.read() + + assert_equal(torch_bytes, pil_bytes) + + +def test_pathlib_support(tmpdir): + # Just make sure pathlib.Path is supported where relevant + + jpeg_path = Path(next(get_images(ENCODE_JPEG, ".jpg"))) + + read_file(jpeg_path) + read_image(jpeg_path) + + write_path = Path(tmpdir) / "whatever" + img = torch.randint(0, 10, size=(3, 4, 4), dtype=torch.uint8) + + write_file(write_path, data=img.flatten()) + write_jpeg(img, write_path) + write_png(img, write_path) + + +@pytest.mark.parametrize( + "name", ("gifgrid", "fire", "porsche", "treescap", "treescap-interlaced", "solid2", "x-trans", "earth") +) +@pytest.mark.parametrize("scripted", (True, False)) +def test_decode_gif(tmpdir, name, scripted): + # Using test images from GIFLIB + # https://sourceforge.net/p/giflib/code/ci/master/tree/pic/, we assert PIL + # and torchvision decoded outputs are equal. + # We're not testing against "welcome2" because PIL and GIFLIB disagee on what + # the background color should be (likely a difference in the way they handle + # transparency?) + # 'earth' image is from wikipedia, licensed under CC BY-SA 3.0 + # https://creativecommons.org/licenses/by-sa/3.0/ + # it allows to properly test for transparency, TOP-LEFT offsets, and + # disposal modes. + + path = tmpdir / f"{name}.gif" + if name == "earth": + if IN_OSS_CI: + # TODO: Fix this... one day. + pytest.skip("Skipping 'earth' test as it's flaky on OSS CI") + url = "https://upload.wikimedia.org/wikipedia/commons/2/2c/Rotating_earth_%28large%29.gif" + else: + url = f"https://sourceforge.net/p/giflib/code/ci/master/tree/pic/{name}.gif?format=raw" + with open(path, "wb") as f: + f.write(requests.get(url).content) + + encoded_bytes = read_file(path) + f = torch.jit.script(decode_gif) if scripted else decode_gif + tv_out = f(encoded_bytes) + if tv_out.ndim == 3: + tv_out = tv_out[None] + + assert tv_out.is_contiguous(memory_format=torch.channels_last) + + # For some reason, not using Image.open() as a CM causes "ResourceWarning: unclosed file" + with Image.open(path) as pil_img: + pil_seq = ImageSequence.Iterator(pil_img) + + for pil_frame, tv_frame in zip(pil_seq, tv_out): + pil_frame = F.pil_to_tensor(pil_frame.convert("RGB")) + torch.testing.assert_close(tv_frame, pil_frame, atol=0, rtol=0) + + +@pytest.mark.parametrize( + "decode_fun, match", + [ + (decode_png, "Content is not png"), + (decode_jpeg, "Not a JPEG file"), + (decode_gif, re.escape("DGifOpenFileName() failed - 103")), + (decode_webp, "WebPGetFeatures failed."), + pytest.param( + decode_avif, "BMFF parsing failed", marks=pytest.mark.skipif(not IS_LINUX, reason=HEIC_AVIF_MESSAGE) + ), + pytest.param( + decode_heic, + "Invalid input: No 'ftyp' box", + marks=pytest.mark.skipif(not IS_LINUX, reason=HEIC_AVIF_MESSAGE), + ), + ], +) +def test_decode_bad_encoded_data(decode_fun, match): + encoded_data = torch.randint(0, 256, (100,), dtype=torch.uint8) + with pytest.raises(RuntimeError, match="Input tensor must be 1-dimensional"): + decode_fun(encoded_data[None]) + with pytest.raises(RuntimeError, match="Input tensor must have uint8 data type"): + decode_fun(encoded_data.float()) + with pytest.raises(RuntimeError, match="Input tensor must be contiguous"): + decode_fun(encoded_data[::2]) + with pytest.raises(RuntimeError, match=match): + decode_fun(encoded_data) + + +@pytest.mark.parametrize("decode_fun", (decode_webp, decode_image)) +@pytest.mark.parametrize("scripted", (False, True)) +def test_decode_webp(decode_fun, scripted): + encoded_bytes = read_file(next(get_images(FAKEDATA_DIR, ".webp"))) + if scripted: + decode_fun = torch.jit.script(decode_fun) + img = decode_fun(encoded_bytes) + assert img.shape == (3, 100, 100) + assert img[None].is_contiguous(memory_format=torch.channels_last) + img += 123 # make sure image buffer wasn't freed by underlying decoding lib + + +# This test is skipped by default because it requires webp images that we're not +# including within the repo. The test images were downloaded manually from the +# different pages of https://developers.google.com/speed/webp/gallery +@pytest.mark.skipif(not WEBP_TEST_IMAGES_DIR, reason="WEBP_TEST_IMAGES_DIR is not set") +@pytest.mark.parametrize("decode_fun", (decode_webp, decode_image)) +@pytest.mark.parametrize("scripted", (False, True)) +@pytest.mark.parametrize( + "mode, pil_mode", + ( + # Note that converting an RGBA image to RGB leads to bad results because the + # transparent pixels aren't necessarily set to "black" or "white", they can be + # random stuff. This is consistent with PIL results. + (ImageReadMode.RGB, "RGB"), + (ImageReadMode.RGB_ALPHA, "RGBA"), + (ImageReadMode.UNCHANGED, None), + ), +) +@pytest.mark.parametrize("filename", Path(WEBP_TEST_IMAGES_DIR).glob("*.webp"), ids=lambda p: p.name) +def test_decode_webp_against_pil(decode_fun, scripted, mode, pil_mode, filename): + encoded_bytes = read_file(filename) + if scripted: + decode_fun = torch.jit.script(decode_fun) + img = decode_fun(encoded_bytes, mode=mode) + assert img[None].is_contiguous(memory_format=torch.channels_last) + + pil_img = Image.open(filename).convert(pil_mode) + from_pil = F.pil_to_tensor(pil_img) + assert_equal(img, from_pil) + img += 123 # make sure image buffer wasn't freed by underlying decoding lib + + +@pytest.mark.skipif(not IS_LINUX, reason=HEIC_AVIF_MESSAGE) +@pytest.mark.parametrize("decode_fun", (decode_avif,)) +def test_decode_avif(decode_fun): + encoded_bytes = read_file(next(get_images(FAKEDATA_DIR, ".avif"))) + img = decode_fun(encoded_bytes) + assert img.shape == (3, 100, 100) + assert img[None].is_contiguous(memory_format=torch.channels_last) + img += 123 # make sure image buffer wasn't freed by underlying decoding lib + + +# Note: decode_image fails because some of these files have a (valid) signature +# we don't recognize. We should probably use libmagic.... +@pytest.mark.skipif(not IS_LINUX, reason=HEIC_AVIF_MESSAGE) +@pytest.mark.parametrize("decode_fun", (decode_avif, decode_heic)) +@pytest.mark.parametrize( + "mode, pil_mode", + ( + (ImageReadMode.RGB, "RGB"), + (ImageReadMode.RGB_ALPHA, "RGBA"), + (ImageReadMode.UNCHANGED, None), + ), +) +@pytest.mark.parametrize( + "filename", Path("/home/nicolashug/dev/libavif/tests/data/").glob("*.avif"), ids=lambda p: p.name +) +def test_decode_avif_heic_against_pil(decode_fun, mode, pil_mode, filename): + if "reversed_dimg_order" in str(filename): + # Pillow properly decodes this one, but we don't (order of parts of the + # image is wrong). This is due to a bug that was recently fixed in + # libavif. Hopefully this test will end up passing soon with a new + # libavif version https://github.com/AOMediaCodec/libavif/issues/2311 + pytest.xfail() + import pillow_avif # noqa + + encoded_bytes = read_file(filename) + try: + img = decode_fun(encoded_bytes, mode=mode) + except RuntimeError as e: + if any( + s in str(e) + for s in ( + "BMFF parsing failed", + "avifDecoderParse failed: ", + "file contains more than one image", + "no 'ispe' property", + "'iref' has double references", + "Invalid image grid", + "decode_heif failed: Invalid input: No 'meta' box", + ) + ): + pytest.skip(reason="Expected failure, that's OK") + else: + raise e + assert img[None].is_contiguous(memory_format=torch.channels_last) + if mode == ImageReadMode.RGB: + assert img.shape[0] == 3 + if mode == ImageReadMode.RGB_ALPHA: + assert img.shape[0] == 4 + + if img.dtype == torch.uint16: + img = F.to_dtype(img, dtype=torch.uint8, scale=True) + try: + from_pil = F.pil_to_tensor(Image.open(filename).convert(pil_mode)) + except RuntimeError as e: + if any(s in str(e) for s in ("Invalid image grid", "Failed to decode image: Not implemented")): + pytest.skip(reason="PIL failure") + else: + raise e + + if True: + from torchvision.utils import make_grid + + g = make_grid([img, from_pil]) + F.to_pil_image(g).save((f"/home/nicolashug/out_images/{filename.name}.{pil_mode}.png")) + + is_decode_heic = getattr(decode_fun, "__name__", getattr(decode_fun, "name", None)) == "decode_heic" + if mode == ImageReadMode.RGB and not is_decode_heic: + # We don't compare torchvision's AVIF against PIL for RGB because + # results look pretty different on RGBA images (other images are fine). + # The result on torchvision basically just plainly ignores the alpha + # channel, resuting in transparent pixels looking dark. PIL seems to be + # using a sort of k-nn thing (Take a look at the resuting images) + return + if filename.name == "sofa_grid1x5_420.avif" and is_decode_heic: + return + + torch.testing.assert_close(img, from_pil, rtol=0, atol=3) + + +@pytest.mark.skipif(not IS_LINUX, reason=HEIC_AVIF_MESSAGE) +@pytest.mark.parametrize("decode_fun", (decode_heic,)) +def test_decode_heic(decode_fun): + encoded_bytes = read_file(next(get_images(FAKEDATA_DIR, ".heic"))) + img = decode_fun(encoded_bytes) + assert img.shape == (3, 100, 100) + assert img[None].is_contiguous(memory_format=torch.channels_last) + img += 123 # make sure image buffer wasn't freed by underlying decoding lib + + +@pytest.mark.parametrize("input_type", ("Path", "str", "tensor")) +@pytest.mark.parametrize("scripted", (False, True)) +def test_decode_image_path(input_type, scripted): + # Check that decode_image can support not just tensors as input + path = next(get_images(IMAGE_ROOT, ".jpg")) + if input_type == "Path": + input = Path(path) + elif input_type == "str": + input = path + elif input_type == "tensor": + input = read_file(path) + else: + raise ValueError("Oops") + + if scripted and input_type == "Path": + pytest.xfail(reason="Can't pass a Path when scripting") + + decode_fun = torch.jit.script(decode_image) if scripted else decode_image + decode_fun(input) + + +def test_mode_str(): + # Make sure decode_image supports string modes. We just test decode_image, + # not all of the decoding functions, but they should all support that too. + # Torchscript fails when passing strings, which is expected. + path = next(get_images(IMAGE_ROOT, ".png")) + assert decode_image(path, mode="RGB").shape[0] == 3 + assert decode_image(path, mode="rGb").shape[0] == 3 + assert decode_image(path, mode="GRAY").shape[0] == 1 + assert decode_image(path, mode="RGBA").shape[0] == 4 + + +if __name__ == "__main__": + pytest.main([__file__]) diff --git a/test/test_internal_utils.py b/test/test_internal_utils.py new file mode 100644 index 00000000000..f5f8a040db9 --- /dev/null +++ b/test/test_internal_utils.py @@ -0,0 +1,17 @@ +import pytest +from torchvision._utils import sequence_to_str + + +@pytest.mark.parametrize( + ("seq", "separate_last", "expected"), + [ + ([], "", ""), + (["foo"], "", "'foo'"), + (["foo", "bar"], "", "'foo', 'bar'"), + (["foo", "bar"], "and ", "'foo' and 'bar'"), + (["foo", "bar", "baz"], "", "'foo', 'bar', 'baz'"), + (["foo", "bar", "baz"], "and ", "'foo', 'bar', and 'baz'"), + ], +) +def test_sequence_to_str(seq, separate_last, expected): + assert sequence_to_str(seq, separate_last=separate_last) == expected diff --git a/test/test_internet.py b/test/test_internet.py new file mode 100644 index 00000000000..34fc3d4aa08 --- /dev/null +++ b/test/test_internet.py @@ -0,0 +1,64 @@ +"""This file should contain all tests that need access to the internet (apart +from the ones in test_datasets_download.py) + +We want to bundle all internet-related tests in one file, so the file can be +cleanly ignored in FB internal test infra. +""" + +import os +import pathlib +from urllib.error import URLError + +import pytest +import torchvision.datasets.utils as utils + + +class TestDatasetUtils: + @pytest.mark.parametrize("use_pathlib", (True, False)) + def test_download_url(self, tmpdir, use_pathlib): + if use_pathlib: + tmpdir = pathlib.Path(tmpdir) + url = "http://github.com/pytorch/vision/archive/master.zip" + try: + utils.download_url(url, tmpdir) + assert len(os.listdir(tmpdir)) != 0 + except URLError: + pytest.skip(f"could not download test file '{url}'") + + @pytest.mark.parametrize("use_pathlib", (True, False)) + def test_download_url_retry_http(self, tmpdir, use_pathlib): + if use_pathlib: + tmpdir = pathlib.Path(tmpdir) + url = "https://github.com/pytorch/vision/archive/master.zip" + try: + utils.download_url(url, tmpdir) + assert len(os.listdir(tmpdir)) != 0 + except URLError: + pytest.skip(f"could not download test file '{url}'") + + @pytest.mark.parametrize("use_pathlib", (True, False)) + def test_download_url_dont_exist(self, tmpdir, use_pathlib): + if use_pathlib: + tmpdir = pathlib.Path(tmpdir) + url = "http://github.com/pytorch/vision/archive/this_doesnt_exist.zip" + with pytest.raises(URLError): + utils.download_url(url, tmpdir) + + @pytest.mark.parametrize("use_pathlib", (True, False)) + def test_download_url_dispatch_download_from_google_drive(self, mocker, tmpdir, use_pathlib): + if use_pathlib: + tmpdir = pathlib.Path(tmpdir) + url = "https://drive.google.com/file/d/1GO-BHUYRuvzr1Gtp2_fqXRsr9TIeYbhV/view" + + id = "1GO-BHUYRuvzr1Gtp2_fqXRsr9TIeYbhV" + filename = "filename" + md5 = "md5" + + mocked = mocker.patch("torchvision.datasets.utils.download_file_from_google_drive") + utils.download_url(url, tmpdir, filename, md5) + + mocked.assert_called_once_with(id, os.path.expanduser(tmpdir), filename, md5) + + +if __name__ == "__main__": + pytest.main([__file__]) diff --git a/test/test_io.py b/test/test_io.py index db292b73e0f..d2950ac9595 100644 --- a/test/test_io.py +++ b/test/test_io.py @@ -1,31 +1,29 @@ -import os import contextlib +import os +import sys import tempfile + +import pytest import torch -import torchvision.datasets.utils as utils import torchvision.io as io +from common_utils import assert_equal, cpu_and_cuda from torchvision import get_video_backend -import unittest -import sys -import warnings - -from common_utils import get_tmp_dir -if sys.version_info < (3,): - from urllib2 import URLError -else: - from urllib.error import URLError try: import av + # Do a version test too io.video._check_av_available() except ImportError: av = None +VIDEO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "videos") + + def _create_video_frames(num_frames, height, width): - y, x = torch.meshgrid(torch.linspace(-2, 2, height), torch.linspace(-2, 2, width)) + y, x = torch.meshgrid(torch.linspace(-2, 2, height), torch.linspace(-2, 2, width), indexing="ij") data = [] for i in range(num_frames): xc = float(i) / num_frames @@ -43,30 +41,32 @@ def temp_video(num_frames, height, width, fps, lossless=False, video_codec=None, raise ValueError("video_codec can't be specified together with lossless") if options is not None: raise ValueError("options can't be specified together with lossless") - video_codec = 'libx264rgb' - options = {'crf': '0'} + video_codec = "libx264rgb" + options = {"crf": "0"} if video_codec is None: if get_video_backend() == "pyav": - video_codec = 'libx264' + video_codec = "libx264" else: # when video_codec is not set, we assume it is libx264rgb which accepts # RGB pixel formats as input instead of YUV - video_codec = 'libx264rgb' + video_codec = "libx264rgb" if options is None: options = {} data = _create_video_frames(num_frames, height, width) - with tempfile.NamedTemporaryFile(suffix='.mp4') as f: + with tempfile.NamedTemporaryFile(suffix=".mp4") as f: + f.close() io.write_video(f.name, data, fps=fps, video_codec=video_codec, options=options) yield f.name, data + os.unlink(f.name) -@unittest.skipIf(get_video_backend() != "pyav" and not io._HAS_VIDEO_OPT, - "video_reader backend not available") -@unittest.skipIf(av is None, "PyAV unavailable") -@unittest.skipIf(sys.platform == 'win32', 'temporarily disabled on Windows') -class Tester(unittest.TestCase): +@pytest.mark.skipif( + get_video_backend() != "pyav" and not io._HAS_CPU_VIDEO_DECODER, reason="video_reader backend not available" +) +@pytest.mark.skipif(av is None, reason="PyAV unavailable") +class TestVideo: # compression adds artifacts, thus we add a tolerance of # 6 in 0-255 range TOLERANCE = 6 @@ -74,24 +74,24 @@ class Tester(unittest.TestCase): def test_write_read_video(self): with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data): lv, _, info = io.read_video(f_name) - self.assertTrue(data.equal(lv)) - self.assertEqual(info["video_fps"], 5) + assert_equal(data, lv) + assert info["video_fps"] == 5 - @unittest.skipIf(not io._HAS_VIDEO_OPT, "video_reader backend is not chosen") + @pytest.mark.skipif(not io._HAS_CPU_VIDEO_DECODER, reason="video_reader backend is not chosen") def test_probe_video_from_file(self): with temp_video(10, 300, 300, 5) as (f_name, data): video_info = io._probe_video_from_file(f_name) - self.assertAlmostEqual(video_info["video_duration"], 2, delta=0.1) - self.assertAlmostEqual(video_info["video_fps"], 5, delta=0.1) + assert pytest.approx(2, rel=0.0, abs=0.1) == video_info.video_duration + assert pytest.approx(5, rel=0.0, abs=0.1) == video_info.video_fps - @unittest.skipIf(not io._HAS_VIDEO_OPT, "video_reader backend is not chosen") + @pytest.mark.skipif(not io._HAS_CPU_VIDEO_DECODER, reason="video_reader backend is not chosen") def test_probe_video_from_memory(self): with temp_video(10, 300, 300, 5) as (f_name, data): with open(f_name, "rb") as fp: filebuffer = fp.read() video_info = io._probe_video_from_memory(filebuffer) - self.assertAlmostEqual(video_info["video_duration"], 2, delta=0.1) - self.assertAlmostEqual(video_info["video_fps"], 5, delta=0.1) + assert pytest.approx(2, rel=0.0, abs=0.1) == video_info.video_duration + assert pytest.approx(5, rel=0.0, abs=0.1) == video_info.video_fps def test_read_timestamps(self): with temp_video(10, 300, 300, 5) as (f_name, data): @@ -99,167 +99,194 @@ def test_read_timestamps(self): # note: not all formats/codecs provide accurate information for computing the # timestamps. For the format that we use here, this information is available, # so we use it as a baseline - container = av.open(f_name) - stream = container.streams[0] - pts_step = int(round(float(1 / (stream.average_rate * stream.time_base)))) - num_frames = int(round(float(stream.average_rate * stream.time_base * stream.duration))) - expected_pts = [i * pts_step for i in range(num_frames)] + with av.open(f_name) as container: + stream = container.streams[0] + pts_step = int(round(float(1 / (stream.average_rate * stream.time_base)))) + num_frames = int(round(float(stream.average_rate * stream.time_base * stream.duration))) + expected_pts = [i * pts_step for i in range(num_frames)] - self.assertEqual(pts, expected_pts) + assert pts == expected_pts - def test_read_partial_video(self): + @pytest.mark.parametrize("start", range(5)) + @pytest.mark.parametrize("offset", range(1, 4)) + def test_read_partial_video(self, start, offset): with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data): pts, _ = io.read_video_timestamps(f_name) - for start in range(5): - for l in range(1, 4): - lv, _, _ = io.read_video(f_name, pts[start], pts[start + l - 1]) - s_data = data[start:(start + l)] - self.assertEqual(len(lv), l) - self.assertTrue(s_data.equal(lv)) + + lv, _, _ = io.read_video(f_name, pts[start], pts[start + offset - 1]) + s_data = data[start : (start + offset)] + assert len(lv) == offset + assert_equal(s_data, lv) if get_video_backend() == "pyav": # for "video_reader" backend, we don't decode the closest early frame # when the given start pts is not matching any frame pts lv, _, _ = io.read_video(f_name, pts[4] + 1, pts[7]) - self.assertEqual(len(lv), 4) - self.assertTrue(data[4:8].equal(lv)) + assert len(lv) == 4 + assert_equal(data[4:8], lv) - def test_read_partial_video_bframes(self): + @pytest.mark.parametrize("start", range(0, 80, 20)) + @pytest.mark.parametrize("offset", range(1, 4)) + def test_read_partial_video_bframes(self, start, offset): # do not use lossless encoding, to test the presence of B-frames - options = {'bframes': '16', 'keyint': '10', 'min-keyint': '4'} + options = {"bframes": "16", "keyint": "10", "min-keyint": "4"} with temp_video(100, 300, 300, 5, options=options) as (f_name, data): pts, _ = io.read_video_timestamps(f_name) - for start in range(0, 80, 20): - for l in range(1, 4): - lv, _, _ = io.read_video(f_name, pts[start], pts[start + l - 1]) - s_data = data[start:(start + l)] - self.assertEqual(len(lv), l) - self.assertTrue((s_data.float() - lv.float()).abs().max() < self.TOLERANCE) + + lv, _, _ = io.read_video(f_name, pts[start], pts[start + offset - 1]) + s_data = data[start : (start + offset)] + assert len(lv) == offset + assert_equal(s_data, lv, rtol=0.0, atol=self.TOLERANCE) lv, _, _ = io.read_video(f_name, pts[4] + 1, pts[7]) # TODO fix this - if get_video_backend() == 'pyav': - self.assertEqual(len(lv), 4) - self.assertTrue((data[4:8].float() - lv.float()).abs().max() < self.TOLERANCE) + if get_video_backend() == "pyav": + assert len(lv) == 4 + assert_equal(data[4:8], lv, rtol=0.0, atol=self.TOLERANCE) else: - self.assertEqual(len(lv), 3) - self.assertTrue((data[5:8].float() - lv.float()).abs().max() < self.TOLERANCE) + assert len(lv) == 3 + assert_equal(data[5:8], lv, rtol=0.0, atol=self.TOLERANCE) def test_read_packed_b_frames_divx_file(self): - with get_tmp_dir() as temp_dir: - name = "hmdb51_Turnk_r_Pippi_Michel_cartwheel_f_cm_np2_le_med_6.avi" - f_name = os.path.join(temp_dir, name) - url = "https://download.pytorch.org/vision_tests/io/" + name - try: - utils.download_url(url, temp_dir) - pts, fps = io.read_video_timestamps(f_name) - - self.assertEqual(pts, sorted(pts)) - self.assertEqual(fps, 30) - except URLError: - msg = "could not download test file '{}'".format(url) - warnings.warn(msg, RuntimeWarning) - raise unittest.SkipTest(msg) + name = "hmdb51_Turnk_r_Pippi_Michel_cartwheel_f_cm_np2_le_med_6.avi" + f_name = os.path.join(VIDEO_DIR, name) + pts, fps = io.read_video_timestamps(f_name) + + assert pts == sorted(pts) + assert fps == 30 def test_read_timestamps_from_packet(self): - with temp_video(10, 300, 300, 5, video_codec='mpeg4') as (f_name, data): + with temp_video(10, 300, 300, 5, video_codec="mpeg4") as (f_name, data): pts, _ = io.read_video_timestamps(f_name) # note: not all formats/codecs provide accurate information for computing the # timestamps. For the format that we use here, this information is available, # so we use it as a baseline - container = av.open(f_name) - stream = container.streams[0] - # make sure we went through the optimized codepath - self.assertIn(b'Lavc', stream.codec_context.extradata) - pts_step = int(round(float(1 / (stream.average_rate * stream.time_base)))) - num_frames = int(round(float(stream.average_rate * stream.time_base * stream.duration))) - expected_pts = [i * pts_step for i in range(num_frames)] + with av.open(f_name) as container: + stream = container.streams[0] + # make sure we went through the optimized codepath + assert b"Lavc" in stream.codec_context.extradata + pts_step = int(round(float(1 / (stream.average_rate * stream.time_base)))) + num_frames = int(round(float(stream.average_rate * stream.time_base * stream.duration))) + expected_pts = [i * pts_step for i in range(num_frames)] - self.assertEqual(pts, expected_pts) + assert pts == expected_pts def test_read_video_pts_unit_sec(self): with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data): - lv, _, info = io.read_video(f_name, pts_unit='sec') + lv, _, info = io.read_video(f_name, pts_unit="sec") - self.assertTrue(data.equal(lv)) - self.assertEqual(info["video_fps"], 5) - self.assertEqual(info, {"video_fps": 5}) + assert_equal(data, lv) + assert info["video_fps"] == 5 + assert info == {"video_fps": 5} def test_read_timestamps_pts_unit_sec(self): with temp_video(10, 300, 300, 5) as (f_name, data): - pts, _ = io.read_video_timestamps(f_name, pts_unit='sec') + pts, _ = io.read_video_timestamps(f_name, pts_unit="sec") - container = av.open(f_name) - stream = container.streams[0] - pts_step = int(round(float(1 / (stream.average_rate * stream.time_base)))) - num_frames = int(round(float(stream.average_rate * stream.time_base * stream.duration))) - expected_pts = [i * pts_step * stream.time_base for i in range(num_frames)] + with av.open(f_name) as container: + stream = container.streams[0] + pts_step = int(round(float(1 / (stream.average_rate * stream.time_base)))) + num_frames = int(round(float(stream.average_rate * stream.time_base * stream.duration))) + expected_pts = [i * pts_step * stream.time_base for i in range(num_frames)] - self.assertEqual(pts, expected_pts) + assert pts == expected_pts - def test_read_partial_video_pts_unit_sec(self): + @pytest.mark.parametrize("start", range(5)) + @pytest.mark.parametrize("offset", range(1, 4)) + def test_read_partial_video_pts_unit_sec(self, start, offset): with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data): - pts, _ = io.read_video_timestamps(f_name, pts_unit='sec') - - for start in range(5): - for l in range(1, 4): - lv, _, _ = io.read_video(f_name, pts[start], pts[start + l - 1], pts_unit='sec') - s_data = data[start:(start + l)] - self.assertEqual(len(lv), l) - self.assertTrue(s_data.equal(lv)) - - container = av.open(f_name) - stream = container.streams[0] - lv, _, _ = io.read_video(f_name, - int(pts[4] * (1.0 / stream.time_base) + 1) * stream.time_base, pts[7], - pts_unit='sec') + pts, _ = io.read_video_timestamps(f_name, pts_unit="sec") + + lv, _, _ = io.read_video(f_name, pts[start], pts[start + offset - 1], pts_unit="sec") + s_data = data[start : (start + offset)] + assert len(lv) == offset + assert_equal(s_data, lv) + + with av.open(f_name) as container: + stream = container.streams[0] + lv, _, _ = io.read_video( + f_name, int(pts[4] * (1.0 / stream.time_base) + 1) * stream.time_base, pts[7], pts_unit="sec" + ) if get_video_backend() == "pyav": # for "video_reader" backend, we don't decode the closest early frame # when the given start pts is not matching any frame pts - self.assertEqual(len(lv), 4) - self.assertTrue(data[4:8].equal(lv)) + assert len(lv) == 4 + assert_equal(data[4:8], lv) def test_read_video_corrupted_file(self): - with tempfile.NamedTemporaryFile(suffix='.mp4') as f: - f.write(b'This is not an mpg4 file') + with tempfile.NamedTemporaryFile(suffix=".mp4") as f: + f.write(b"This is not an mpg4 file") video, audio, info = io.read_video(f.name) - self.assertIsInstance(video, torch.Tensor) - self.assertIsInstance(audio, torch.Tensor) - self.assertEqual(video.numel(), 0) - self.assertEqual(audio.numel(), 0) - self.assertEqual(info, {}) + assert isinstance(video, torch.Tensor) + assert isinstance(audio, torch.Tensor) + assert video.numel() == 0 + assert audio.numel() == 0 + assert info == {} def test_read_video_timestamps_corrupted_file(self): - with tempfile.NamedTemporaryFile(suffix='.mp4') as f: - f.write(b'This is not an mpg4 file') + with tempfile.NamedTemporaryFile(suffix=".mp4") as f: + f.write(b"This is not an mpg4 file") video_pts, video_fps = io.read_video_timestamps(f.name) - self.assertEqual(video_pts, []) - self.assertIs(video_fps, None) + assert video_pts == [] + assert video_fps is None + @pytest.mark.skip(reason="Temporarily disabled due to new pyav") def test_read_video_partially_corrupted_file(self): with temp_video(5, 4, 4, 5, lossless=True) as (f_name, data): - with open(f_name, 'r+b') as f: + with open(f_name, "r+b") as f: size = os.path.getsize(f_name) bytes_to_overwrite = size // 10 # seek to the middle of the file f.seek(5 * bytes_to_overwrite) # corrupt 10% of the file from the middle - f.write(b'\xff' * bytes_to_overwrite) + f.write(b"\xff" * bytes_to_overwrite) # this exercises the container.decode assertion check - video, audio, info = io.read_video(f.name, pts_unit='sec') + video, audio, info = io.read_video(f.name, pts_unit="sec") # check that size is not equal to 5, but 3 # TODO fix this - if get_video_backend() == 'pyav': - self.assertEqual(len(video), 3) + if get_video_backend() == "pyav": + assert len(video) == 3 else: - self.assertEqual(len(video), 4) + assert len(video) == 4 # but the valid decoded content is still correct - self.assertTrue(video[:3].equal(data[:3])) + assert_equal(video[:3], data[:3]) # and the last few frames are wrong - self.assertFalse(video.equal(data)) + with pytest.raises(AssertionError): + assert_equal(video, data) + + @pytest.mark.skipif(sys.platform == "win32", reason="temporarily disabled on Windows") + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_write_video_with_audio(self, device, tmpdir): + f_name = os.path.join(VIDEO_DIR, "R6llTwEh07w.mp4") + video_tensor, audio_tensor, info = io.read_video(f_name, pts_unit="sec") + + out_f_name = os.path.join(tmpdir, "testing.mp4") + io.video.write_video( + out_f_name, + video_tensor.to(device), + round(info["video_fps"]), + video_codec="libx264rgb", + options={"crf": "0"}, + audio_array=audio_tensor.to(device), + audio_fps=info["audio_fps"], + audio_codec="aac", + ) + + out_video_tensor, out_audio_tensor, out_info = io.read_video(out_f_name, pts_unit="sec") + + assert info["video_fps"] == out_info["video_fps"] + assert_equal(video_tensor, out_video_tensor) + + audio_stream = av.open(f_name).streams.audio[0] + out_audio_stream = av.open(out_f_name).streams.audio[0] + + assert info["audio_fps"] == out_info["audio_fps"] + assert audio_stream.rate == out_audio_stream.rate + assert pytest.approx(out_audio_stream.frames, rel=0.0, abs=1) == audio_stream.frames + assert audio_stream.frame_size == out_audio_stream.frame_size # TODO add tests for audio -if __name__ == '__main__': - unittest.main() +if __name__ == "__main__": + pytest.main(__file__) diff --git a/test/test_io_opt.py b/test/test_io_opt.py index 1ad3dea8fa2..f4e3d305295 100644 --- a/test/test_io_opt.py +++ b/test/test_io_opt.py @@ -1,11 +1,13 @@ import unittest -from torchvision import set_video_backend + import test_io +from torchvision import set_video_backend # noqa: 401 -set_video_backend('video_reader') +# Disabling the video backend switching temporarily +# set_video_backend('video_reader') -if __name__ == '__main__': +if __name__ == "__main__": suite = unittest.TestLoader().loadTestsFromModule(test_io) unittest.TextTestRunner(verbosity=1).run(suite) diff --git a/test/test_models.cpp b/test/test_models.cpp deleted file mode 100644 index 092fc567ac2..00000000000 --- a/test/test_models.cpp +++ /dev/null @@ -1,209 +0,0 @@ -#include -#include -#include - -#include "../torchvision/csrc/models/models.h" - -using namespace vision::models; - -template -torch::Tensor forward_model(const std::string& input_path, torch::Tensor x) { - Model network; - torch::load(network, input_path); - network->eval(); - return network->forward(x); -} - -torch::Tensor forward_alexnet(const std::string& input_path, torch::Tensor x) { - return forward_model(input_path, x); -} - -torch::Tensor forward_vgg11(const std::string& input_path, torch::Tensor x) { - return forward_model(input_path, x); -} -torch::Tensor forward_vgg13(const std::string& input_path, torch::Tensor x) { - return forward_model(input_path, x); -} -torch::Tensor forward_vgg16(const std::string& input_path, torch::Tensor x) { - return forward_model(input_path, x); -} -torch::Tensor forward_vgg19(const std::string& input_path, torch::Tensor x) { - return forward_model(input_path, x); -} - -torch::Tensor forward_vgg11bn(const std::string& input_path, torch::Tensor x) { - return forward_model(input_path, x); -} -torch::Tensor forward_vgg13bn(const std::string& input_path, torch::Tensor x) { - return forward_model(input_path, x); -} -torch::Tensor forward_vgg16bn(const std::string& input_path, torch::Tensor x) { - return forward_model(input_path, x); -} -torch::Tensor forward_vgg19bn(const std::string& input_path, torch::Tensor x) { - return forward_model(input_path, x); -} - -torch::Tensor forward_resnet18(const std::string& input_path, torch::Tensor x) { - return forward_model(input_path, x); -} -torch::Tensor forward_resnet34(const std::string& input_path, torch::Tensor x) { - return forward_model(input_path, x); -} -torch::Tensor forward_resnet50(const std::string& input_path, torch::Tensor x) { - return forward_model(input_path, x); -} -torch::Tensor forward_resnet101( - const std::string& input_path, - torch::Tensor x) { - return forward_model(input_path, x); -} -torch::Tensor forward_resnet152( - const std::string& input_path, - torch::Tensor x) { - return forward_model(input_path, x); -} -torch::Tensor forward_resnext50_32x4d( - const std::string& input_path, - torch::Tensor x) { - return forward_model(input_path, x); -} -torch::Tensor forward_resnext101_32x8d( - const std::string& input_path, - torch::Tensor x) { - return forward_model(input_path, x); -} -torch::Tensor forward_wide_resnet50_2( - const std::string& input_path, - torch::Tensor x) { - return forward_model(input_path, x); -} -torch::Tensor forward_wide_resnet101_2( - const std::string& input_path, - torch::Tensor x) { - return forward_model(input_path, x); -} - -torch::Tensor forward_squeezenet1_0( - const std::string& input_path, - torch::Tensor x) { - return forward_model(input_path, x); -} -torch::Tensor forward_squeezenet1_1( - const std::string& input_path, - torch::Tensor x) { - return forward_model(input_path, x); -} - -torch::Tensor forward_densenet121( - const std::string& input_path, - torch::Tensor x) { - return forward_model(input_path, x); -} -torch::Tensor forward_densenet169( - const std::string& input_path, - torch::Tensor x) { - return forward_model(input_path, x); -} -torch::Tensor forward_densenet201( - const std::string& input_path, - torch::Tensor x) { - return forward_model(input_path, x); -} -torch::Tensor forward_densenet161( - const std::string& input_path, - torch::Tensor x) { - return forward_model(input_path, x); -} - -torch::Tensor forward_mobilenetv2( - const std::string& input_path, - torch::Tensor x) { - return forward_model(input_path, x); -} - -torch::Tensor forward_googlenet( - const std::string& input_path, - torch::Tensor x) { - GoogLeNet network; - torch::load(network, input_path); - network->eval(); - return network->forward(x).output; -} -torch::Tensor forward_inceptionv3( - const std::string& input_path, - torch::Tensor x) { - InceptionV3 network; - torch::load(network, input_path); - network->eval(); - return network->forward(x).output; -} - -torch::Tensor forward_mnasnet0_5(const std::string& input_path, torch::Tensor x) { - return forward_model(input_path, x); -} -torch::Tensor forward_mnasnet0_75(const std::string& input_path, torch::Tensor x) { - return forward_model(input_path, x); -} -torch::Tensor forward_mnasnet1_0(const std::string& input_path, torch::Tensor x) { - return forward_model(input_path, x); -} -torch::Tensor forward_mnasnet1_3(const std::string& input_path, torch::Tensor x) { - return forward_model(input_path, x); -} - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def("forward_alexnet", &forward_alexnet, "forward_alexnet"); - - m.def("forward_vgg11", &forward_vgg11, "forward_vgg11"); - m.def("forward_vgg13", &forward_vgg13, "forward_vgg13"); - m.def("forward_vgg16", &forward_vgg16, "forward_vgg16"); - m.def("forward_vgg19", &forward_vgg19, "forward_vgg19"); - - m.def("forward_vgg11bn", &forward_vgg11bn, "forward_vgg11bn"); - m.def("forward_vgg13bn", &forward_vgg13bn, "forward_vgg13bn"); - m.def("forward_vgg16bn", &forward_vgg16bn, "forward_vgg16bn"); - m.def("forward_vgg19bn", &forward_vgg19bn, "forward_vgg19bn"); - - m.def("forward_resnet18", &forward_resnet18, "forward_resnet18"); - m.def("forward_resnet34", &forward_resnet34, "forward_resnet34"); - m.def("forward_resnet50", &forward_resnet50, "forward_resnet50"); - m.def("forward_resnet101", &forward_resnet101, "forward_resnet101"); - m.def("forward_resnet152", &forward_resnet152, "forward_resnet152"); - m.def( - "forward_resnext50_32x4d", - &forward_resnext50_32x4d, - "forward_resnext50_32x4d"); - m.def( - "forward_resnext101_32x8d", - &forward_resnext101_32x8d, - "forward_resnext101_32x8d"); - m.def( - "forward_wide_resnet50_2", - &forward_wide_resnet50_2, - "forward_wide_resnet50_2"); - m.def( - "forward_wide_resnet101_2", - &forward_wide_resnet101_2, - "forward_wide_resnet101_2"); - - m.def( - "forward_squeezenet1_0", &forward_squeezenet1_0, "forward_squeezenet1_0"); - m.def( - "forward_squeezenet1_1", &forward_squeezenet1_1, "forward_squeezenet1_1"); - - m.def("forward_densenet121", &forward_densenet121, "forward_densenet121"); - m.def("forward_densenet169", &forward_densenet169, "forward_densenet169"); - m.def("forward_densenet201", &forward_densenet201, "forward_densenet201"); - m.def("forward_densenet161", &forward_densenet161, "forward_densenet161"); - - m.def("forward_mobilenetv2", &forward_mobilenetv2, "forward_mobilenetv2"); - - m.def("forward_googlenet", &forward_googlenet, "forward_googlenet"); - m.def("forward_inceptionv3", &forward_inceptionv3, "forward_inceptionv3"); - - m.def("forward_mnasnet0_5", &forward_mnasnet0_5, "forward_mnasnet0_5"); - m.def("forward_mnasnet0_75", &forward_mnasnet0_75, "forward_mnasnet0_75"); - m.def("forward_mnasnet1_0", &forward_mnasnet1_0, "forward_mnasnet1_0"); - m.def("forward_mnasnet1_3", &forward_mnasnet1_3, "forward_mnasnet1_3"); -} diff --git a/test/test_models.py b/test/test_models.py index c70ef6830bf..202bbdbd0cd 100644 --- a/test/test_models.py +++ b/test/test_models.py @@ -1,118 +1,831 @@ -from common_utils import TestCase, map_nested_tensor_object +import contextlib +import functools +import operator +import os +import pkgutil +import platform +import sys +import warnings from collections import OrderedDict -from itertools import product +from tempfile import TemporaryDirectory +from typing import Any + +import pytest import torch -import numpy as np -from torchvision import models -import unittest -import traceback -import random +import torch.fx +import torch.nn as nn +from _utils_internal import get_relative_path +from common_utils import cpu_and_cuda, freeze_rng_state, map_nested_tensor_object, needs_cuda, set_rng_seed +from PIL import Image +from torchvision import models, transforms +from torchvision.models import get_model_builder, list_models + + +ACCEPT = os.getenv("EXPECTTEST_ACCEPT", "0") == "1" +SKIP_BIG_MODEL = os.getenv("SKIP_BIG_MODEL", "1") == "1" + + +def list_model_fns(module): + return [get_model_builder(name) for name in list_models(module)] + + +def _get_image(input_shape, real_image, device, dtype=None): + """This routine loads a real or random image based on `real_image` argument. + Currently, the real image is utilized for the following list of models: + - `retinanet_resnet50_fpn`, + - `retinanet_resnet50_fpn_v2`, + - `keypointrcnn_resnet50_fpn`, + - `fasterrcnn_resnet50_fpn`, + - `fasterrcnn_resnet50_fpn_v2`, + - `fcos_resnet50_fpn`, + - `maskrcnn_resnet50_fpn`, + - `maskrcnn_resnet50_fpn_v2`, + in `test_classification_model` and `test_detection_model`. + To do so, a keyword argument `real_image` was added to the abovelisted models in `_model_params` + """ + if real_image: + # TODO: Maybe unify file discovery logic with test_image.py + GRACE_HOPPER = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "assets", "encode_jpeg", "grace_hopper_517x606.jpg" + ) + + img = Image.open(GRACE_HOPPER) + + original_width, original_height = img.size + + # make the image square + img = img.crop((0, 0, original_width, original_width)) + img = img.resize(input_shape[1:3]) + + convert_tensor = transforms.ToTensor() + image = convert_tensor(img) + assert tuple(image.size()) == input_shape + return image.to(device=device, dtype=dtype) + + # RNG always on CPU, to ensure x in cuda tests is bitwise identical to x in cpu tests + return torch.rand(input_shape).to(device=device, dtype=dtype) + + +@pytest.fixture +def disable_weight_loading(mocker): + """When testing models, the two slowest operations are the downloading of the weights to a file and loading them + into the model. Unless, you want to test against specific weights, these steps can be disabled without any + drawbacks. + + Including this fixture into the signature of your test, i.e. `test_foo(disable_weight_loading)`, will recurse + through all models in `torchvision.models` and will patch all occurrences of the function + `download_state_dict_from_url` as well as the method `load_state_dict` on all subclasses of `nn.Module` to be + no-ops. + + .. warning: + + Loaded models are still executable as normal, but will always have random weights. Make sure to not use this + fixture if you want to compare the model output against reference values. + + """ + starting_point = models + function_name = "load_state_dict_from_url" + method_name = "load_state_dict" + + module_names = {info.name for info in pkgutil.walk_packages(starting_point.__path__, f"{starting_point.__name__}.")} + targets = {f"torchvision._internally_replaced_utils.{function_name}", f"torch.nn.Module.{method_name}"} + for name in module_names: + module = sys.modules.get(name) + if not module: + continue + + if function_name in module.__dict__: + targets.add(f"{module.__name__}.{function_name}") + targets.update( + { + f"{module.__name__}.{obj.__name__}.{method_name}" + for obj in module.__dict__.values() + if isinstance(obj, type) and issubclass(obj, nn.Module) and method_name in obj.__dict__ + } + ) -def set_rng_seed(seed): - torch.manual_seed(seed) - random.seed(seed) - np.random.seed(seed) + for target in targets: + # See https://github.com/pytorch/vision/pull/4867#discussion_r743677802 for details + with contextlib.suppress(AttributeError): + mocker.patch(target) -def get_available_classification_models(): - # TODO add a registration mechanism to torchvision.models - return [k for k, v in models.__dict__.items() if callable(v) and k[0].lower() == k[0] and k[0] != "_"] +def _get_expected_file(name=None): + # Determine expected file based on environment + expected_file_base = get_relative_path(os.path.realpath(__file__), "expect") + # Note: for legacy reasons, the reference file names all had "ModelTest.test_" in their names + # We hardcode it here to avoid having to re-generate the reference files + expected_file = os.path.join(expected_file_base, "ModelTester.test_" + name) + expected_file += "_expect.pkl" -def get_available_segmentation_models(): - # TODO add a registration mechanism to torchvision.models - return [k for k, v in models.segmentation.__dict__.items() if callable(v) and k[0].lower() == k[0] and k[0] != "_"] + if not ACCEPT and not os.path.exists(expected_file): + raise RuntimeError( + f"No expect file exists for {os.path.basename(expected_file)} in {expected_file}; " + "to accept the current output, re-run the failing test after setting the EXPECTTEST_ACCEPT " + "env variable. For example: EXPECTTEST_ACCEPT=1 pytest test/test_models.py -k alexnet" + ) + return expected_file + + +def _assert_expected(output, name, prec=None, atol=None, rtol=None): + """Test that a python value matches the recorded contents of a file + based on a "check" name. The value must be + pickable with `torch.save`. This file + is placed in the 'expect' directory in the same directory + as the test script. You can automatically update the recorded test + output using an EXPECTTEST_ACCEPT=1 env variable. + """ + expected_file = _get_expected_file(name) + + if ACCEPT: + filename = {os.path.basename(expected_file)} + print(f"Accepting updated output for {filename}:\n\n{output}") + torch.save(output, expected_file) + MAX_PICKLE_SIZE = 50 * 1000 # 50 KB + binary_size = os.path.getsize(expected_file) + if binary_size > MAX_PICKLE_SIZE: + raise RuntimeError(f"The output for {filename}, is larger than 50kb - got {binary_size}kb") + else: + expected = torch.load(expected_file, weights_only=True) + rtol = rtol or prec # keeping prec param for legacy reason, but could be removed ideally + atol = atol or prec + torch.testing.assert_close(output, expected, rtol=rtol, atol=atol, check_dtype=False, check_device=False) + + +def _check_jit_scriptable(nn_module, args, unwrapper=None, eager_out=None): + """Check that a nn.Module's results in TorchScript match eager and that it can be exported""" + + def get_export_import_copy(m): + """Save and load a TorchScript model""" + with TemporaryDirectory() as dir: + path = os.path.join(dir, "script.pt") + m.save(path) + imported = torch.jit.load(path) + return imported + + sm = torch.jit.script(nn_module) + sm.eval() + + if eager_out is None: + with torch.no_grad(), freeze_rng_state(): + eager_out = nn_module(*args) + + with torch.no_grad(), freeze_rng_state(): + script_out = sm(*args) + if unwrapper: + script_out = unwrapper(script_out) + + torch.testing.assert_close(eager_out, script_out, atol=1e-4, rtol=1e-4) + + m_import = get_export_import_copy(sm) + with torch.no_grad(), freeze_rng_state(): + imported_script_out = m_import(*args) + if unwrapper: + imported_script_out = unwrapper(imported_script_out) + + torch.testing.assert_close(script_out, imported_script_out, atol=3e-4, rtol=3e-4) + + +def _check_fx_compatible(model, inputs, eager_out=None): + model_fx = torch.fx.symbolic_trace(model) + if eager_out is None: + eager_out = model(inputs) + with torch.no_grad(), freeze_rng_state(): + fx_out = model_fx(inputs) + torch.testing.assert_close(eager_out, fx_out) + + +def _check_input_backprop(model, inputs): + if isinstance(inputs, list): + requires_grad = list() + for inp in inputs: + requires_grad.append(inp.requires_grad) + inp.requires_grad_(True) + else: + requires_grad = inputs.requires_grad + inputs.requires_grad_(True) + + out = model(inputs) + + if isinstance(out, dict): + out["out"].sum().backward() + else: + if isinstance(out[0], dict): + out[0]["scores"].sum().backward() + else: + out[0].sum().backward() -def get_available_detection_models(): - # TODO add a registration mechanism to torchvision.models - return [k for k, v in models.detection.__dict__.items() if callable(v) and k[0].lower() == k[0] and k[0] != "_"] + if isinstance(inputs, list): + for i, inp in enumerate(inputs): + assert inputs[i].grad is not None + inp.requires_grad_(requires_grad[i]) + else: + assert inputs.grad is not None + inputs.requires_grad_(requires_grad) -def get_available_video_models(): - # TODO add a registration mechanism to torchvision.models - return [k for k, v in models.video.__dict__.items() if callable(v) and k[0].lower() == k[0] and k[0] != "_"] +# If 'unwrapper' is provided it will be called with the script model outputs +# before they are compared to the eager model outputs. This is useful if the +# model outputs are different between TorchScript / Eager mode +script_model_unwrapper = { + "googlenet": lambda x: x.logits, + "inception_v3": lambda x: x.logits, + "fasterrcnn_resnet50_fpn": lambda x: x[1], + "fasterrcnn_resnet50_fpn_v2": lambda x: x[1], + "fasterrcnn_mobilenet_v3_large_fpn": lambda x: x[1], + "fasterrcnn_mobilenet_v3_large_320_fpn": lambda x: x[1], + "maskrcnn_resnet50_fpn": lambda x: x[1], + "maskrcnn_resnet50_fpn_v2": lambda x: x[1], + "keypointrcnn_resnet50_fpn": lambda x: x[1], + "retinanet_resnet50_fpn": lambda x: x[1], + "retinanet_resnet50_fpn_v2": lambda x: x[1], + "ssd300_vgg16": lambda x: x[1], + "ssdlite320_mobilenet_v3_large": lambda x: x[1], + "fcos_resnet50_fpn": lambda x: x[1], +} -# models that are in torch hub, as well as r3d_18. we tried testing all models -# but the test was too slow. not included are detection models, because -# they are not yet supported in JIT. -script_test_models = [ +# The following models exhibit flaky numerics under autocast in _test_*_model harnesses. +# This may be caused by the harness environment (e.g. num classes, input initialization +# via torch.rand), and does not prove autocast is unsuitable when training with real data +# (autocast has been used successfully with real data for some of these models). +# TODO: investigate why autocast numerics are flaky in the harnesses. +# +# For the following models, _test_*_model harnesses skip numerical checks on outputs when +# trying autocast. However, they still try an autocasted forward pass, so they still ensure +# autocast coverage suffices to prevent dtype errors in each model. +autocast_flaky_numerics = ( + "inception_v3", + "resnet101", + "resnet152", + "wide_resnet101_2", + "deeplabv3_resnet50", "deeplabv3_resnet101", - "mobilenet_v2", - "resnext50_32x4d", + "deeplabv3_mobilenet_v3_large", + "fcn_resnet50", "fcn_resnet101", - "googlenet", - "densenet121", - "resnet18", - "alexnet", - "shufflenet_v2_x1_0", - "squeezenet1_0", - "vgg11", - "inception_v3", - 'r3d_18', + "lraspp_mobilenet_v3_large", + "maskrcnn_resnet50_fpn", + "maskrcnn_resnet50_fpn_v2", + "keypointrcnn_resnet50_fpn", +) + +# The tests for the following quantized models are flaky possibly due to inconsistent +# rounding errors in different platforms. For this reason the input/output consistency +# tests under test_quantized_classification_model will be skipped for the following models. +quantized_flaky_models = ("inception_v3", "resnet50") + +# The tests for the following detection models are flaky. +# We run those tests on float64 to avoid floating point errors. +# FIXME: we shouldn't have to do that :'/ +detection_flaky_models = ("keypointrcnn_resnet50_fpn", "maskrcnn_resnet50_fpn", "maskrcnn_resnet50_fpn_v2") + + +# The following contains configuration parameters for all models which are used by +# the _test_*_model methods. +_model_params = { + "inception_v3": {"input_shape": (1, 3, 299, 299), "init_weights": True}, + "retinanet_resnet50_fpn": { + "num_classes": 20, + "score_thresh": 0.01, + "min_size": 224, + "max_size": 224, + "input_shape": (3, 224, 224), + "real_image": True, + }, + "retinanet_resnet50_fpn_v2": { + "num_classes": 20, + "score_thresh": 0.01, + "min_size": 224, + "max_size": 224, + "input_shape": (3, 224, 224), + "real_image": True, + }, + "keypointrcnn_resnet50_fpn": { + "num_classes": 2, + "min_size": 224, + "max_size": 224, + "box_score_thresh": 0.17, + "input_shape": (3, 224, 224), + "real_image": True, + }, + "fasterrcnn_resnet50_fpn": { + "num_classes": 20, + "min_size": 224, + "max_size": 224, + "input_shape": (3, 224, 224), + "real_image": True, + }, + "fasterrcnn_resnet50_fpn_v2": { + "num_classes": 20, + "min_size": 224, + "max_size": 224, + "input_shape": (3, 224, 224), + "real_image": True, + }, + "fcos_resnet50_fpn": { + "num_classes": 2, + "score_thresh": 0.05, + "min_size": 224, + "max_size": 224, + "input_shape": (3, 224, 224), + "real_image": True, + }, + "maskrcnn_resnet50_fpn": { + "num_classes": 10, + "min_size": 224, + "max_size": 224, + "input_shape": (3, 224, 224), + "real_image": True, + }, + "maskrcnn_resnet50_fpn_v2": { + "num_classes": 10, + "min_size": 224, + "max_size": 224, + "input_shape": (3, 224, 224), + "real_image": True, + }, + "fasterrcnn_mobilenet_v3_large_fpn": { + "box_score_thresh": 0.02076, + }, + "fasterrcnn_mobilenet_v3_large_320_fpn": { + "box_score_thresh": 0.02076, + "rpn_pre_nms_top_n_test": 1000, + "rpn_post_nms_top_n_test": 1000, + }, + "vit_h_14": { + "image_size": 56, + "input_shape": (1, 3, 56, 56), + }, + "mvit_v1_b": { + "input_shape": (1, 3, 16, 224, 224), + }, + "mvit_v2_s": { + "input_shape": (1, 3, 16, 224, 224), + }, + "s3d": { + "input_shape": (1, 3, 16, 224, 224), + }, + "googlenet": {"init_weights": True}, +} +# speeding up slow models: +slow_models = [ + "convnext_base", + "convnext_large", + "resnext101_32x8d", + "resnext101_64x4d", + "wide_resnet101_2", + "efficientnet_b6", + "efficientnet_b7", + "efficientnet_v2_m", + "efficientnet_v2_l", + "regnet_y_16gf", + "regnet_y_32gf", + "regnet_y_128gf", + "regnet_x_16gf", + "regnet_x_32gf", + "swin_t", + "swin_s", + "swin_b", + "swin_v2_t", + "swin_v2_s", + "swin_v2_b", ] +for m in slow_models: + _model_params[m] = {"input_shape": (1, 3, 64, 64)} -class ModelTester(TestCase): - def check_script(self, model, name): - if name not in script_test_models: - return - scriptable = True - msg = "" - try: - torch.jit.script(model) - except Exception as e: - tb = traceback.format_exc() - scriptable = False - msg = str(e) + str(tb) - self.assertTrue(scriptable, msg) - - def _test_classification_model(self, name, input_shape): - # passing num_class equal to a number other than 1000 helps in making the test - # more enforcing in nature - set_rng_seed(0) - model = models.__dict__[name](num_classes=50) - self.check_script(model, name) - model.eval() - x = torch.rand(input_shape) - out = model(x) - self.assertExpected(out, rtol=1e-2, atol=0.) - self.assertEqual(out.shape[-1], 50) - - def _test_segmentation_model(self, name): - # passing num_class equal to a number other than 1000 helps in making the test - # more enforcing in nature - model = models.segmentation.__dict__[name](num_classes=50, pretrained_backbone=False) - self.check_script(model, name) - model.eval() - input_shape = (1, 3, 300, 300) - x = torch.rand(input_shape) +# skip big models to reduce memory usage on CI test. We can exclude combinations of (platform-system, device). +skipped_big_models = { + "vit_h_14": {("Windows", "cpu"), ("Windows", "cuda")}, + "regnet_y_128gf": {("Windows", "cpu"), ("Windows", "cuda")}, + "mvit_v1_b": {("Windows", "cuda"), ("Linux", "cuda")}, + "mvit_v2_s": {("Windows", "cuda"), ("Linux", "cuda")}, +} + + +def is_skippable(model_name, device): + if model_name not in skipped_big_models: + return False + + platform_system = platform.system() + device_name = str(device).split(":")[0] + + return (platform_system, device_name) in skipped_big_models[model_name] + + +# The following contains configuration and expected values to be used tests that are model specific +_model_tests_values = { + "retinanet_resnet50_fpn": { + "max_trainable": 5, + "n_trn_params_per_layer": [36, 46, 65, 78, 88, 89], + }, + "retinanet_resnet50_fpn_v2": { + "max_trainable": 5, + "n_trn_params_per_layer": [44, 74, 131, 170, 200, 203], + }, + "keypointrcnn_resnet50_fpn": { + "max_trainable": 5, + "n_trn_params_per_layer": [48, 58, 77, 90, 100, 101], + }, + "fasterrcnn_resnet50_fpn": { + "max_trainable": 5, + "n_trn_params_per_layer": [30, 40, 59, 72, 82, 83], + }, + "fasterrcnn_resnet50_fpn_v2": { + "max_trainable": 5, + "n_trn_params_per_layer": [50, 80, 137, 176, 206, 209], + }, + "maskrcnn_resnet50_fpn": { + "max_trainable": 5, + "n_trn_params_per_layer": [42, 52, 71, 84, 94, 95], + }, + "maskrcnn_resnet50_fpn_v2": { + "max_trainable": 5, + "n_trn_params_per_layer": [66, 96, 153, 192, 222, 225], + }, + "fasterrcnn_mobilenet_v3_large_fpn": { + "max_trainable": 6, + "n_trn_params_per_layer": [22, 23, 44, 70, 91, 97, 100], + }, + "fasterrcnn_mobilenet_v3_large_320_fpn": { + "max_trainable": 6, + "n_trn_params_per_layer": [22, 23, 44, 70, 91, 97, 100], + }, + "ssd300_vgg16": { + "max_trainable": 5, + "n_trn_params_per_layer": [45, 51, 57, 63, 67, 71], + }, + "ssdlite320_mobilenet_v3_large": { + "max_trainable": 6, + "n_trn_params_per_layer": [96, 99, 138, 200, 239, 257, 266], + }, + "fcos_resnet50_fpn": { + "max_trainable": 5, + "n_trn_params_per_layer": [54, 64, 83, 96, 106, 107], + }, +} + + +def _make_sliced_model(model, stop_layer): + layers = OrderedDict() + for name, layer in model.named_children(): + layers[name] = layer + if name == stop_layer: + break + new_model = torch.nn.Sequential(layers) + return new_model + + +@pytest.mark.parametrize("model_fn", [models.densenet121, models.densenet169, models.densenet201, models.densenet161]) +def test_memory_efficient_densenet(model_fn): + input_shape = (1, 3, 300, 300) + x = torch.rand(input_shape) + + model1 = model_fn(num_classes=50, memory_efficient=True) + params = model1.state_dict() + num_params = sum(x.numel() for x in model1.parameters()) + model1.eval() + out1 = model1(x) + out1.sum().backward() + num_grad = sum(x.grad.numel() for x in model1.parameters() if x.grad is not None) + + model2 = model_fn(num_classes=50, memory_efficient=False) + model2.load_state_dict(params) + model2.eval() + out2 = model2(x) + + assert num_params == num_grad + torch.testing.assert_close(out1, out2, rtol=0.0, atol=1e-5) + + _check_input_backprop(model1, x) + _check_input_backprop(model2, x) + + +@pytest.mark.parametrize("dilate_layer_2", (True, False)) +@pytest.mark.parametrize("dilate_layer_3", (True, False)) +@pytest.mark.parametrize("dilate_layer_4", (True, False)) +def test_resnet_dilation(dilate_layer_2, dilate_layer_3, dilate_layer_4): + # TODO improve tests to also check that each layer has the right dimensionality + model = models.resnet50(replace_stride_with_dilation=(dilate_layer_2, dilate_layer_3, dilate_layer_4)) + model = _make_sliced_model(model, stop_layer="layer4") + model.eval() + x = torch.rand(1, 3, 224, 224) + out = model(x) + f = 2 ** sum((dilate_layer_2, dilate_layer_3, dilate_layer_4)) + assert out.shape == (1, 2048, 7 * f, 7 * f) + + +def test_mobilenet_v2_residual_setting(): + model = models.mobilenet_v2(inverted_residual_setting=[[1, 16, 1, 1], [6, 24, 2, 2]]) + model.eval() + x = torch.rand(1, 3, 224, 224) + out = model(x) + assert out.shape[-1] == 1000 + + +@pytest.mark.parametrize("model_fn", [models.mobilenet_v2, models.mobilenet_v3_large, models.mobilenet_v3_small]) +def test_mobilenet_norm_layer(model_fn): + model = model_fn() + assert any(isinstance(x, nn.BatchNorm2d) for x in model.modules()) + + def get_gn(num_channels): + return nn.GroupNorm(1, num_channels) + + model = model_fn(norm_layer=get_gn) + assert not (any(isinstance(x, nn.BatchNorm2d) for x in model.modules())) + assert any(isinstance(x, nn.GroupNorm) for x in model.modules()) + + +def test_inception_v3_eval(): + kwargs = {} + kwargs["transform_input"] = True + kwargs["aux_logits"] = True + kwargs["init_weights"] = False + name = "inception_v3" + model = models.Inception3(**kwargs) + model.aux_logits = False + model.AuxLogits = None + model = model.eval() + x = torch.rand(1, 3, 299, 299) + _check_jit_scriptable(model, (x,), unwrapper=script_model_unwrapper.get(name, None)) + _check_input_backprop(model, x) + + +def test_fasterrcnn_double(): + model = models.detection.fasterrcnn_resnet50_fpn(num_classes=50, weights=None, weights_backbone=None) + model.double() + model.eval() + input_shape = (3, 300, 300) + x = torch.rand(input_shape, dtype=torch.float64) + model_input = [x] + out = model(model_input) + assert model_input[0] is x + assert len(out) == 1 + assert "boxes" in out[0] + assert "scores" in out[0] + assert "labels" in out[0] + _check_input_backprop(model, model_input) + + +def test_googlenet_eval(): + kwargs = {} + kwargs["transform_input"] = True + kwargs["aux_logits"] = True + kwargs["init_weights"] = False + name = "googlenet" + model = models.GoogLeNet(**kwargs) + model.aux_logits = False + model.aux1 = None + model.aux2 = None + model = model.eval() + x = torch.rand(1, 3, 224, 224) + _check_jit_scriptable(model, (x,), unwrapper=script_model_unwrapper.get(name, None)) + _check_input_backprop(model, x) + + +@needs_cuda +def test_fasterrcnn_switch_devices(): + def checkOut(out): + assert len(out) == 1 + assert "boxes" in out[0] + assert "scores" in out[0] + assert "labels" in out[0] + + model = models.detection.fasterrcnn_resnet50_fpn(num_classes=50, weights=None, weights_backbone=None) + model.cuda() + model.eval() + input_shape = (3, 300, 300) + x = torch.rand(input_shape, device="cuda") + model_input = [x] + out = model(model_input) + assert model_input[0] is x + + checkOut(out) + + with torch.cuda.amp.autocast(): + out = model(model_input) + + checkOut(out) + + _check_input_backprop(model, model_input) + + # now switch to cpu and make sure it works + model.cpu() + x = x.cpu() + out_cpu = model([x]) + + checkOut(out_cpu) + + _check_input_backprop(model, [x]) + + +def test_generalizedrcnn_transform_repr(): + + min_size, max_size = 224, 299 + image_mean = [0.485, 0.456, 0.406] + image_std = [0.229, 0.224, 0.225] + + t = models.detection.transform.GeneralizedRCNNTransform( + min_size=min_size, max_size=max_size, image_mean=image_mean, image_std=image_std + ) + + # Check integrity of object __repr__ attribute + expected_string = "GeneralizedRCNNTransform(" + _indent = "\n " + expected_string += f"{_indent}Normalize(mean={image_mean}, std={image_std})" + expected_string += f"{_indent}Resize(min_size=({min_size},), max_size={max_size}, " + expected_string += "mode='bilinear')\n)" + assert t.__repr__() == expected_string + + +test_vit_conv_stem_configs = [ + models.vision_transformer.ConvStemConfig(kernel_size=3, stride=2, out_channels=64), + models.vision_transformer.ConvStemConfig(kernel_size=3, stride=2, out_channels=128), + models.vision_transformer.ConvStemConfig(kernel_size=3, stride=1, out_channels=128), + models.vision_transformer.ConvStemConfig(kernel_size=3, stride=2, out_channels=256), + models.vision_transformer.ConvStemConfig(kernel_size=3, stride=1, out_channels=256), + models.vision_transformer.ConvStemConfig(kernel_size=3, stride=2, out_channels=512), +] + + +def vitc_b_16(**kwargs: Any): + return models.VisionTransformer( + image_size=224, + patch_size=16, + num_layers=12, + num_heads=12, + hidden_dim=768, + mlp_dim=3072, + conv_stem_configs=test_vit_conv_stem_configs, + **kwargs, + ) + + +@pytest.mark.parametrize("model_fn", [vitc_b_16]) +@pytest.mark.parametrize("dev", cpu_and_cuda()) +def test_vitc_models(model_fn, dev): + test_classification_model(model_fn, dev) + + +@torch.backends.cudnn.flags(allow_tf32=False) # see: https://github.com/pytorch/vision/issues/7618 +@pytest.mark.parametrize("model_fn", list_model_fns(models)) +@pytest.mark.parametrize("dev", cpu_and_cuda()) +def test_classification_model(model_fn, dev): + set_rng_seed(0) + defaults = { + "num_classes": 50, + "input_shape": (1, 3, 224, 224), + } + model_name = model_fn.__name__ + if SKIP_BIG_MODEL and is_skippable(model_name, dev): + pytest.skip("Skipped to reduce memory usage. Set env var SKIP_BIG_MODEL=0 to enable test for this model") + kwargs = {**defaults, **_model_params.get(model_name, {})} + num_classes = kwargs.get("num_classes") + input_shape = kwargs.pop("input_shape") + real_image = kwargs.pop("real_image", False) + + model = model_fn(**kwargs) + model.eval().to(device=dev) + x = _get_image(input_shape=input_shape, real_image=real_image, device=dev) + out = model(x) + # FIXME: this if/else is nasty and only here to please our CI prior to the + # release. We rethink these tests altogether. + if model_name == "resnet101": + prec = 0.2 + else: + # FIXME: this is probably still way too high. + prec = 0.1 + _assert_expected(out.cpu(), model_name, prec=prec) + assert out.shape[-1] == num_classes + _check_jit_scriptable(model, (x,), unwrapper=script_model_unwrapper.get(model_name, None), eager_out=out) + _check_fx_compatible(model, x, eager_out=out) + + if dev == "cuda": + with torch.cuda.amp.autocast(): + out = model(x) + # See autocast_flaky_numerics comment at top of file. + if model_name not in autocast_flaky_numerics: + _assert_expected(out.cpu(), model_name, prec=0.1) + assert out.shape[-1] == 50 + + _check_input_backprop(model, x) + + +@pytest.mark.parametrize("model_fn", list_model_fns(models.segmentation)) +@pytest.mark.parametrize("dev", cpu_and_cuda()) +def test_segmentation_model(model_fn, dev): + set_rng_seed(0) + defaults = { + "num_classes": 10, + "weights_backbone": None, + "input_shape": (1, 3, 32, 32), + } + model_name = model_fn.__name__ + kwargs = {**defaults, **_model_params.get(model_name, {})} + input_shape = kwargs.pop("input_shape") + + model = model_fn(**kwargs) + model.eval().to(device=dev) + # RNG always on CPU, to ensure x in cuda tests is bitwise identical to x in cpu tests + x = torch.rand(input_shape).to(device=dev) + with torch.no_grad(), freeze_rng_state(): out = model(x) - self.assertEqual(tuple(out["out"].shape), (1, 50, 300, 300)) - - def _test_detection_model(self, name): - set_rng_seed(0) - model = models.detection.__dict__[name](num_classes=50, pretrained_backbone=False) - self.check_script(model, name) - model.eval() - input_shape = (3, 300, 300) - x = torch.rand(input_shape) - model_input = [x] + + def check_out(out): + prec = 0.01 + try: + # We first try to assert the entire output if possible. This is not + # only the best way to assert results but also handles the cases + # where we need to create a new expected result. + _assert_expected(out.cpu(), model_name, prec=prec) + except AssertionError: + # Unfortunately some segmentation models are flaky with autocast + # so instead of validating the probability scores, check that the class + # predictions match. + expected_file = _get_expected_file(model_name) + expected = torch.load(expected_file, weights_only=True) + torch.testing.assert_close( + out.argmax(dim=1), expected.argmax(dim=1), rtol=prec, atol=prec, check_device=False + ) + return False # Partial validation performed + + return True # Full validation performed + + full_validation = check_out(out["out"]) + + _check_jit_scriptable(model, (x,), unwrapper=script_model_unwrapper.get(model_name, None), eager_out=out) + _check_fx_compatible(model, x, eager_out=out) + + if dev == "cuda": + with torch.cuda.amp.autocast(), torch.no_grad(), freeze_rng_state(): + out = model(x) + # See autocast_flaky_numerics comment at top of file. + if model_name not in autocast_flaky_numerics: + full_validation &= check_out(out["out"]) + + if not full_validation: + msg = ( + f"The output of {test_segmentation_model.__name__} could only be partially validated. " + "This is likely due to unit-test flakiness, but you may " + "want to do additional manual checks if you made " + "significant changes to the codebase." + ) + warnings.warn(msg, RuntimeWarning) + pytest.skip(msg) + + _check_input_backprop(model, x) + + +@pytest.mark.parametrize("model_fn", list_model_fns(models.detection)) +@pytest.mark.parametrize("dev", cpu_and_cuda()) +def test_detection_model(model_fn, dev): + set_rng_seed(0) + defaults = { + "num_classes": 50, + "weights_backbone": None, + "input_shape": (3, 300, 300), + } + model_name = model_fn.__name__ + if model_name in detection_flaky_models: + dtype = torch.float64 + else: + dtype = torch.get_default_dtype() + kwargs = {**defaults, **_model_params.get(model_name, {})} + input_shape = kwargs.pop("input_shape") + real_image = kwargs.pop("real_image", False) + + model = model_fn(**kwargs) + model.eval().to(device=dev, dtype=dtype) + x = _get_image(input_shape=input_shape, real_image=real_image, device=dev, dtype=dtype) + model_input = [x] + with torch.no_grad(), freeze_rng_state(): out = model(model_input) - self.assertIs(model_input[0], x) - self.assertEqual(len(out), 1) + assert model_input[0] is x + + def check_out(out): + assert len(out) == 1 + + def compact(tensor): + tensor = tensor.cpu() + size = tensor.size() + elements_per_sample = functools.reduce(operator.mul, size[1:], 1) + if elements_per_sample > 30: + return compute_mean_std(tensor) + else: + return subsample_tensor(tensor) def subsample_tensor(tensor): - num_elems = tensor.numel() + num_elems = tensor.size(0) num_samples = 20 if num_elems <= num_samples: return tensor - flat_tensor = tensor.flatten() ith_index = num_elems // num_samples - return flat_tensor[ith_index - 1::ith_index] + return tensor[ith_index - 1 :: ith_index] def compute_mean_std(tensor): # can't compute mean of integral tensor @@ -121,128 +834,218 @@ def compute_mean_std(tensor): std = torch.std(tensor) return {"mean": mean, "std": std} - # maskrcnn_resnet_50_fpn numerically unstable across platforms, so for now - # compare results with mean and std - if name == "maskrcnn_resnet50_fpn": - test_value = map_nested_tensor_object(out, tensor_map_fn=compute_mean_std) - # mean values are small, use large rtol - self.assertExpected(test_value, rtol=.01, atol=.01) - else: - self.assertExpected(map_nested_tensor_object(out, tensor_map_fn=subsample_tensor)) - - self.assertTrue("boxes" in out[0]) - self.assertTrue("scores" in out[0]) - self.assertTrue("labels" in out[0]) - - def _test_video_model(self, name): - # the default input shape is - # bs * num_channels * clip_len * h *w - input_shape = (1, 3, 4, 112, 112) - # test both basicblock and Bottleneck - model = models.video.__dict__[name](num_classes=50) - self.check_script(model, name) - x = torch.rand(input_shape) - out = model(x) - self.assertEqual(out.shape[-1], 50) - - def _make_sliced_model(self, model, stop_layer): - layers = OrderedDict() - for name, layer in model.named_children(): - layers[name] = layer - if name == stop_layer: - break - new_model = torch.nn.Sequential(layers) - return new_model - - def test_memory_efficient_densenet(self): - input_shape = (1, 3, 300, 300) - x = torch.rand(input_shape) - - for name in ['densenet121', 'densenet169', 'densenet201', 'densenet161']: - model1 = models.__dict__[name](num_classes=50, memory_efficient=True) - params = model1.state_dict() - model1.eval() - out1 = model1(x) - out1.sum().backward() - - model2 = models.__dict__[name](num_classes=50, memory_efficient=False) - model2.load_state_dict(params) - model2.eval() - out2 = model2(x) - - max_diff = (out1 - out2).abs().max() - - self.assertTrue(max_diff < 1e-5) - - def test_resnet_dilation(self): - # TODO improve tests to also check that each layer has the right dimensionality - for i in product([False, True], [False, True], [False, True]): - model = models.__dict__["resnet50"](replace_stride_with_dilation=i) - model = self._make_sliced_model(model, stop_layer="layer4") - model.eval() - x = torch.rand(1, 3, 224, 224) + output = map_nested_tensor_object(out, tensor_map_fn=compact) + prec = 0.01 + try: + # We first try to assert the entire output if possible. This is not + # only the best way to assert results but also handles the cases + # where we need to create a new expected result. + _assert_expected(output, model_name, prec=prec) + except AssertionError: + # Unfortunately detection models are flaky due to the unstable sort + # in NMS. If matching across all outputs fails, use the same approach + # as in NMSTester.test_nms_cuda to see if this is caused by duplicate + # scores. + expected_file = _get_expected_file(model_name) + expected = torch.load(expected_file, weights_only=True) + torch.testing.assert_close( + output[0]["scores"], expected[0]["scores"], rtol=prec, atol=prec, check_device=False, check_dtype=False + ) + + # Note: Fmassa proposed turning off NMS by adapting the threshold + # and then using the Hungarian algorithm as in DETR to find the + # best match between output and expected boxes and eliminate some + # of the flakiness. Worth exploring. + return False # Partial validation performed + + return True # Full validation performed + + full_validation = check_out(out) + _check_jit_scriptable(model, ([x],), unwrapper=script_model_unwrapper.get(model_name, None), eager_out=out) + + if dev == "cuda": + with torch.cuda.amp.autocast(), torch.no_grad(), freeze_rng_state(): + out = model(model_input) + # See autocast_flaky_numerics comment at top of file. + if model_name not in autocast_flaky_numerics: + full_validation &= check_out(out) + + if not full_validation: + msg = ( + f"The output of {test_detection_model.__name__} could only be partially validated. " + "This is likely due to unit-test flakiness, but you may " + "want to do additional manual checks if you made " + "significant changes to the codebase." + ) + warnings.warn(msg, RuntimeWarning) + pytest.skip(msg) + + _check_input_backprop(model, model_input) + + +@pytest.mark.parametrize("model_fn", list_model_fns(models.detection)) +def test_detection_model_validation(model_fn): + set_rng_seed(0) + model = model_fn(num_classes=50, weights=None, weights_backbone=None) + input_shape = (3, 300, 300) + x = [torch.rand(input_shape)] + + # validate that targets are present in training + with pytest.raises(AssertionError): + model(x) + + # validate type + targets = [{"boxes": 0.0}] + with pytest.raises(AssertionError): + model(x, targets=targets) + + # validate boxes shape + for boxes in (torch.rand((4,)), torch.rand((1, 5))): + targets = [{"boxes": boxes}] + with pytest.raises(AssertionError): + model(x, targets=targets) + + # validate that no degenerate boxes are present + boxes = torch.tensor([[1, 3, 1, 4], [2, 4, 3, 4]]) + targets = [{"boxes": boxes}] + with pytest.raises(AssertionError): + model(x, targets=targets) + + +@pytest.mark.parametrize("model_fn", list_model_fns(models.video)) +@pytest.mark.parametrize("dev", cpu_and_cuda()) +def test_video_model(model_fn, dev): + set_rng_seed(0) + # the default input shape is + # bs * num_channels * clip_len * h *w + defaults = { + "input_shape": (1, 3, 4, 112, 112), + "num_classes": 50, + } + model_name = model_fn.__name__ + if SKIP_BIG_MODEL and is_skippable(model_name, dev): + pytest.skip("Skipped to reduce memory usage. Set env var SKIP_BIG_MODEL=0 to enable test for this model") + kwargs = {**defaults, **_model_params.get(model_name, {})} + num_classes = kwargs.get("num_classes") + input_shape = kwargs.pop("input_shape") + # test both basicblock and Bottleneck + model = model_fn(**kwargs) + model.eval().to(device=dev) + # RNG always on CPU, to ensure x in cuda tests is bitwise identical to x in cpu tests + x = torch.rand(input_shape).to(device=dev) + out = model(x) + _assert_expected(out.cpu(), model_name, prec=0.1) + assert out.shape[-1] == num_classes + _check_jit_scriptable(model, (x,), unwrapper=script_model_unwrapper.get(model_name, None), eager_out=out) + _check_fx_compatible(model, x, eager_out=out) + assert out.shape[-1] == num_classes + + if dev == "cuda": + with torch.cuda.amp.autocast(): out = model(x) - f = 2 ** sum(i) - self.assertEqual(out.shape, (1, 2048, 7 * f, 7 * f)) + # See autocast_flaky_numerics comment at top of file. + if model_name not in autocast_flaky_numerics: + _assert_expected(out.cpu(), model_name, prec=0.1) + assert out.shape[-1] == num_classes - def test_mobilenetv2_residual_setting(self): - model = models.__dict__["mobilenet_v2"](inverted_residual_setting=[[1, 16, 1, 1], [6, 24, 2, 2]]) - model.eval() - x = torch.rand(1, 3, 224, 224) - out = model(x) - self.assertEqual(out.shape[-1], 1000) - - def test_fasterrcnn_double(self): - model = models.detection.fasterrcnn_resnet50_fpn(num_classes=50, pretrained_backbone=False) - model.double() - model.eval() - input_shape = (3, 300, 300) - x = torch.rand(input_shape, dtype=torch.float64) - model_input = [x] - out = model(model_input) - self.assertIs(model_input[0], x) - self.assertEqual(len(out), 1) - self.assertTrue("boxes" in out[0]) - self.assertTrue("scores" in out[0]) - self.assertTrue("labels" in out[0]) + _check_input_backprop(model, x) + + +@pytest.mark.skipif( + not ( + "fbgemm" in torch.backends.quantized.supported_engines + and "qnnpack" in torch.backends.quantized.supported_engines + ), + reason="This Pytorch Build has not been built with fbgemm and qnnpack", +) +@pytest.mark.parametrize("model_fn", list_model_fns(models.quantization)) +def test_quantized_classification_model(model_fn): + set_rng_seed(0) + defaults = { + "num_classes": 5, + "input_shape": (1, 3, 224, 224), + "quantize": True, + } + model_name = model_fn.__name__ + kwargs = {**defaults, **_model_params.get(model_name, {})} + input_shape = kwargs.pop("input_shape") + + # First check if quantize=True provides models that can run with input data + model = model_fn(**kwargs) + model.eval() + x = torch.rand(input_shape) + out = model(x) + if model_name not in quantized_flaky_models: + _assert_expected(out.cpu(), model_name + "_quantized", prec=2e-2) + assert out.shape[-1] == 5 + _check_jit_scriptable(model, (x,), unwrapper=script_model_unwrapper.get(model_name, None), eager_out=out) + _check_fx_compatible(model, x, eager_out=out) + else: + try: + torch.jit.script(model) + except Exception as e: + raise AssertionError("model cannot be scripted.") from e + + kwargs["quantize"] = False + for eval_mode in [True, False]: + model = model_fn(**kwargs) + if eval_mode: + model.eval() + model.qconfig = torch.ao.quantization.default_qconfig + else: + model.train() + model.qconfig = torch.ao.quantization.default_qat_qconfig + + model.fuse_model(is_qat=not eval_mode) + if eval_mode: + torch.ao.quantization.prepare(model, inplace=True) + else: + torch.ao.quantization.prepare_qat(model, inplace=True) + model.eval() -for model_name in get_available_classification_models(): - # for-loop bodies don't define scopes, so we have to save the variables - # we want to close over in some way - def do_test(self, model_name=model_name): - input_shape = (1, 3, 224, 224) - if model_name in ['inception_v3']: - input_shape = (1, 3, 299, 299) - self._test_classification_model(model_name, input_shape) + torch.ao.quantization.convert(model, inplace=True) - setattr(ModelTester, "test_" + model_name, do_test) +@pytest.mark.parametrize("model_fn", list_model_fns(models.detection)) +def test_detection_model_trainable_backbone_layers(model_fn, disable_weight_loading): + model_name = model_fn.__name__ + max_trainable = _model_tests_values[model_name]["max_trainable"] + n_trainable_params = [] + for trainable_layers in range(0, max_trainable + 1): + model = model_fn(weights=None, weights_backbone="DEFAULT", trainable_backbone_layers=trainable_layers) -for model_name in get_available_segmentation_models(): - # for-loop bodies don't define scopes, so we have to save the variables - # we want to close over in some way - def do_test(self, model_name=model_name): - self._test_segmentation_model(model_name) + n_trainable_params.append(len([p for p in model.parameters() if p.requires_grad])) + assert n_trainable_params == _model_tests_values[model_name]["n_trn_params_per_layer"] - setattr(ModelTester, "test_" + model_name, do_test) +@needs_cuda +@pytest.mark.parametrize("model_fn", list_model_fns(models.optical_flow)) +@pytest.mark.parametrize("scripted", (False, True)) +def test_raft(model_fn, scripted): -for model_name in get_available_detection_models(): - # for-loop bodies don't define scopes, so we have to save the variables - # we want to close over in some way - def do_test(self, model_name=model_name): - self._test_detection_model(model_name) + torch.manual_seed(0) - setattr(ModelTester, "test_" + model_name, do_test) + # We need very small images, otherwise the pickle size would exceed the 50KB + # As a result we need to override the correlation pyramid to not downsample + # too much, otherwise we would get nan values (effective H and W would be + # reduced to 1) + corr_block = models.optical_flow.raft.CorrBlock(num_levels=2, radius=2) + model = model_fn(corr_block=corr_block).eval().to("cuda") + if scripted: + model = torch.jit.script(model) -for model_name in get_available_video_models(): + bs = 1 + img1 = torch.rand(bs, 3, 80, 72).cuda() + img2 = torch.rand(bs, 3, 80, 72).cuda() - def do_test(self, model_name=model_name): - self._test_video_model(model_name) + preds = model(img1, img2) + flow_pred = preds[-1] + # Tolerance is fairly high, but there are 2 * H * W outputs to check + # The .pkl were generated on the AWS cluter, on the CI it looks like the results are slightly different + _assert_expected(flow_pred.cpu(), name=model_fn.__name__, atol=1e-2, rtol=1) - setattr(ModelTester, "test_" + model_name, do_test) -if __name__ == '__main__': - unittest.main() +if __name__ == "__main__": + pytest.main([__file__]) diff --git a/test/test_models_detection_anchor_utils.py b/test/test_models_detection_anchor_utils.py new file mode 100644 index 00000000000..645d4624d64 --- /dev/null +++ b/test/test_models_detection_anchor_utils.py @@ -0,0 +1,99 @@ +import pytest +import torch +from common_utils import assert_equal +from torchvision.models.detection.anchor_utils import AnchorGenerator, DefaultBoxGenerator +from torchvision.models.detection.image_list import ImageList + + +class Tester: + def test_incorrect_anchors(self): + incorrect_sizes = ( + (2, 4, 8), + (32, 8), + ) + incorrect_aspects = (0.5, 1.0) + anc = AnchorGenerator(incorrect_sizes, incorrect_aspects) + image1 = torch.randn(3, 800, 800) + image_list = ImageList(image1, [(800, 800)]) + feature_maps = [torch.randn(1, 50)] + pytest.raises(AssertionError, anc, image_list, feature_maps) + + def _init_test_anchor_generator(self): + anchor_sizes = ((10,),) + aspect_ratios = ((1,),) + anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) + + return anchor_generator + + def _init_test_defaultbox_generator(self): + aspect_ratios = [[2]] + dbox_generator = DefaultBoxGenerator(aspect_ratios) + + return dbox_generator + + def get_features(self, images): + s0, s1 = images.shape[-2:] + features = [torch.rand(2, 8, s0 // 5, s1 // 5)] + return features + + def test_anchor_generator(self): + images = torch.randn(2, 3, 15, 15) + features = self.get_features(images) + image_shapes = [i.shape[-2:] for i in images] + images = ImageList(images, image_shapes) + + model = self._init_test_anchor_generator() + model.eval() + anchors = model(images, features) + + # Estimate the number of target anchors + grid_sizes = [f.shape[-2:] for f in features] + num_anchors_estimated = 0 + for sizes, num_anchors_per_loc in zip(grid_sizes, model.num_anchors_per_location()): + num_anchors_estimated += sizes[0] * sizes[1] * num_anchors_per_loc + + anchors_output = torch.tensor( + [ + [-5.0, -5.0, 5.0, 5.0], + [0.0, -5.0, 10.0, 5.0], + [5.0, -5.0, 15.0, 5.0], + [-5.0, 0.0, 5.0, 10.0], + [0.0, 0.0, 10.0, 10.0], + [5.0, 0.0, 15.0, 10.0], + [-5.0, 5.0, 5.0, 15.0], + [0.0, 5.0, 10.0, 15.0], + [5.0, 5.0, 15.0, 15.0], + ] + ) + + assert num_anchors_estimated == 9 + assert len(anchors) == 2 + assert tuple(anchors[0].shape) == (9, 4) + assert tuple(anchors[1].shape) == (9, 4) + assert_equal(anchors[0], anchors_output) + assert_equal(anchors[1], anchors_output) + + def test_defaultbox_generator(self): + images = torch.zeros(2, 3, 15, 15) + features = [torch.zeros(2, 8, 1, 1)] + image_shapes = [i.shape[-2:] for i in images] + images = ImageList(images, image_shapes) + + model = self._init_test_defaultbox_generator() + model.eval() + dboxes = model(images, features) + + dboxes_output = torch.tensor( + [ + [6.3750, 6.3750, 8.6250, 8.6250], + [4.7443, 4.7443, 10.2557, 10.2557], + [5.9090, 6.7045, 9.0910, 8.2955], + [6.7045, 5.9090, 8.2955, 9.0910], + ] + ) + + assert len(dboxes) == 2 + assert tuple(dboxes[0].shape) == (4, 4) + assert tuple(dboxes[1].shape) == (4, 4) + torch.testing.assert_close(dboxes[0], dboxes_output, rtol=1e-5, atol=1e-8) + torch.testing.assert_close(dboxes[1], dboxes_output, rtol=1e-5, atol=1e-8) diff --git a/test/test_models_detection_negative_samples.py b/test/test_models_detection_negative_samples.py new file mode 100644 index 00000000000..c91cfdf20a7 --- /dev/null +++ b/test/test_models_detection_negative_samples.py @@ -0,0 +1,167 @@ +import pytest +import torch +import torchvision.models +from common_utils import assert_equal +from torchvision.models.detection.faster_rcnn import FastRCNNPredictor, TwoMLPHead +from torchvision.models.detection.roi_heads import RoIHeads +from torchvision.models.detection.rpn import AnchorGenerator, RegionProposalNetwork, RPNHead +from torchvision.ops import MultiScaleRoIAlign + + +class TestModelsDetectionNegativeSamples: + def _make_empty_sample(self, add_masks=False, add_keypoints=False): + images = [torch.rand((3, 100, 100), dtype=torch.float32)] + boxes = torch.zeros((0, 4), dtype=torch.float32) + negative_target = { + "boxes": boxes, + "labels": torch.zeros(0, dtype=torch.int64), + "image_id": 4, + "area": (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]), + "iscrowd": torch.zeros((0,), dtype=torch.int64), + } + + if add_masks: + negative_target["masks"] = torch.zeros(0, 100, 100, dtype=torch.uint8) + + if add_keypoints: + negative_target["keypoints"] = torch.zeros(17, 0, 3, dtype=torch.float32) + + targets = [negative_target] + return images, targets + + def test_targets_to_anchors(self): + _, targets = self._make_empty_sample() + anchors = [torch.randint(-50, 50, (3, 4), dtype=torch.float32)] + + anchor_sizes = ((32,), (64,), (128,), (256,), (512,)) + aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) + rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) + rpn_head = RPNHead(4, rpn_anchor_generator.num_anchors_per_location()[0]) + + head = RegionProposalNetwork(rpn_anchor_generator, rpn_head, 0.5, 0.3, 256, 0.5, 2000, 2000, 0.7, 0.05) + + labels, matched_gt_boxes = head.assign_targets_to_anchors(anchors, targets) + + assert labels[0].sum() == 0 + assert labels[0].shape == torch.Size([anchors[0].shape[0]]) + assert labels[0].dtype == torch.float32 + + assert matched_gt_boxes[0].sum() == 0 + assert matched_gt_boxes[0].shape == anchors[0].shape + assert matched_gt_boxes[0].dtype == torch.float32 + + def test_assign_targets_to_proposals(self): + + proposals = [torch.randint(-50, 50, (20, 4), dtype=torch.float32)] + gt_boxes = [torch.zeros((0, 4), dtype=torch.float32)] + gt_labels = [torch.tensor([[0]], dtype=torch.int64)] + + box_roi_pool = MultiScaleRoIAlign(featmap_names=["0", "1", "2", "3"], output_size=7, sampling_ratio=2) + + resolution = box_roi_pool.output_size[0] + representation_size = 1024 + box_head = TwoMLPHead(4 * resolution**2, representation_size) + + representation_size = 1024 + box_predictor = FastRCNNPredictor(representation_size, 2) + + roi_heads = RoIHeads( + # Box + box_roi_pool, + box_head, + box_predictor, + 0.5, + 0.5, + 512, + 0.25, + None, + 0.05, + 0.5, + 100, + ) + + matched_idxs, labels = roi_heads.assign_targets_to_proposals(proposals, gt_boxes, gt_labels) + + assert matched_idxs[0].sum() == 0 + assert matched_idxs[0].shape == torch.Size([proposals[0].shape[0]]) + assert matched_idxs[0].dtype == torch.int64 + + assert labels[0].sum() == 0 + assert labels[0].shape == torch.Size([proposals[0].shape[0]]) + assert labels[0].dtype == torch.int64 + + @pytest.mark.parametrize( + "name", + [ + "fasterrcnn_resnet50_fpn", + "fasterrcnn_mobilenet_v3_large_fpn", + "fasterrcnn_mobilenet_v3_large_320_fpn", + ], + ) + def test_forward_negative_sample_frcnn(self, name): + model = torchvision.models.get_model( + name, weights=None, weights_backbone=None, num_classes=2, min_size=100, max_size=100 + ) + + images, targets = self._make_empty_sample() + loss_dict = model(images, targets) + + assert_equal(loss_dict["loss_box_reg"], torch.tensor(0.0)) + assert_equal(loss_dict["loss_rpn_box_reg"], torch.tensor(0.0)) + + def test_forward_negative_sample_mrcnn(self): + model = torchvision.models.detection.maskrcnn_resnet50_fpn( + weights=None, weights_backbone=None, num_classes=2, min_size=100, max_size=100 + ) + + images, targets = self._make_empty_sample(add_masks=True) + loss_dict = model(images, targets) + + assert_equal(loss_dict["loss_box_reg"], torch.tensor(0.0)) + assert_equal(loss_dict["loss_rpn_box_reg"], torch.tensor(0.0)) + assert_equal(loss_dict["loss_mask"], torch.tensor(0.0)) + + def test_forward_negative_sample_krcnn(self): + model = torchvision.models.detection.keypointrcnn_resnet50_fpn( + weights=None, weights_backbone=None, num_classes=2, min_size=100, max_size=100 + ) + + images, targets = self._make_empty_sample(add_keypoints=True) + loss_dict = model(images, targets) + + assert_equal(loss_dict["loss_box_reg"], torch.tensor(0.0)) + assert_equal(loss_dict["loss_rpn_box_reg"], torch.tensor(0.0)) + assert_equal(loss_dict["loss_keypoint"], torch.tensor(0.0)) + + def test_forward_negative_sample_retinanet(self): + model = torchvision.models.detection.retinanet_resnet50_fpn( + weights=None, weights_backbone=None, num_classes=2, min_size=100, max_size=100 + ) + + images, targets = self._make_empty_sample() + loss_dict = model(images, targets) + + assert_equal(loss_dict["bbox_regression"], torch.tensor(0.0)) + + def test_forward_negative_sample_fcos(self): + model = torchvision.models.detection.fcos_resnet50_fpn( + weights=None, weights_backbone=None, num_classes=2, min_size=100, max_size=100 + ) + + images, targets = self._make_empty_sample() + loss_dict = model(images, targets) + + assert_equal(loss_dict["bbox_regression"], torch.tensor(0.0)) + assert_equal(loss_dict["bbox_ctrness"], torch.tensor(0.0)) + + def test_forward_negative_sample_ssd(self): + model = torchvision.models.detection.ssd300_vgg16(weights=None, weights_backbone=None, num_classes=2) + + images, targets = self._make_empty_sample() + loss_dict = model(images, targets) + + assert_equal(loss_dict["bbox_regression"], torch.tensor(0.0)) + + +if __name__ == "__main__": + pytest.main([__file__]) diff --git a/test/test_models_detection_utils.py b/test/test_models_detection_utils.py new file mode 100644 index 00000000000..69703ab5817 --- /dev/null +++ b/test/test_models_detection_utils.py @@ -0,0 +1,85 @@ +import copy + +import pytest +import torch +from common_utils import assert_equal +from torchvision.models.detection import _utils, backbone_utils +from torchvision.models.detection.transform import GeneralizedRCNNTransform + + +class TestModelsDetectionUtils: + def test_balanced_positive_negative_sampler(self): + sampler = _utils.BalancedPositiveNegativeSampler(4, 0.25) + # keep all 6 negatives first, then add 3 positives, last two are ignore + matched_idxs = [torch.tensor([0, 0, 0, 0, 0, 0, 1, 1, 1, -1, -1])] + pos, neg = sampler(matched_idxs) + # we know the number of elements that should be sampled for the positive (1) + # and the negative (3), and their location. Let's make sure that they are + # there + assert pos[0].sum() == 1 + assert pos[0][6:9].sum() == 1 + assert neg[0].sum() == 3 + assert neg[0][0:6].sum() == 3 + + def test_box_linear_coder(self): + box_coder = _utils.BoxLinearCoder(normalize_by_size=True) + # Generate a random 10x4 boxes tensor, with coordinates < 50. + boxes = torch.rand(10, 4) * 50 + boxes.clamp_(min=1.0) # tiny boxes cause numerical instability in box regression + boxes[:, 2:] += boxes[:, :2] + + proposals = torch.tensor([0, 0, 101, 101] * 10).reshape(10, 4).float() + + rel_codes = box_coder.encode(boxes, proposals) + pred_boxes = box_coder.decode(rel_codes, boxes) + torch.allclose(proposals, pred_boxes) + + @pytest.mark.parametrize("train_layers, exp_froz_params", [(0, 53), (1, 43), (2, 24), (3, 11), (4, 1), (5, 0)]) + def test_resnet_fpn_backbone_frozen_layers(self, train_layers, exp_froz_params): + # we know how many initial layers and parameters of the network should + # be frozen for each trainable_backbone_layers parameter value + # i.e. all 53 params are frozen if trainable_backbone_layers=0 + # ad first 24 params are frozen if trainable_backbone_layers=2 + model = backbone_utils.resnet_fpn_backbone("resnet50", weights=None, trainable_layers=train_layers) + # boolean list that is true if the param at that index is frozen + is_frozen = [not parameter.requires_grad for _, parameter in model.named_parameters()] + # check that expected initial number of layers are frozen + assert all(is_frozen[:exp_froz_params]) + + def test_validate_resnet_inputs_detection(self): + # default number of backbone layers to train + ret = backbone_utils._validate_trainable_layers( + is_trained=True, trainable_backbone_layers=None, max_value=5, default_value=3 + ) + assert ret == 3 + # can't go beyond 5 + with pytest.raises(ValueError, match=r"Trainable backbone layers should be in the range"): + ret = backbone_utils._validate_trainable_layers( + is_trained=True, trainable_backbone_layers=6, max_value=5, default_value=3 + ) + # if not trained, should use all trainable layers and warn + with pytest.warns(UserWarning): + ret = backbone_utils._validate_trainable_layers( + is_trained=False, trainable_backbone_layers=0, max_value=5, default_value=3 + ) + assert ret == 5 + + def test_transform_copy_targets(self): + transform = GeneralizedRCNNTransform(300, 500, torch.zeros(3), torch.ones(3)) + image = [torch.rand(3, 200, 300), torch.rand(3, 200, 200)] + targets = [{"boxes": torch.rand(3, 4)}, {"boxes": torch.rand(2, 4)}] + targets_copy = copy.deepcopy(targets) + out = transform(image, targets) # noqa: F841 + assert_equal(targets[0]["boxes"], targets_copy[0]["boxes"]) + assert_equal(targets[1]["boxes"], targets_copy[1]["boxes"]) + + def test_not_float_normalize(self): + transform = GeneralizedRCNNTransform(300, 500, torch.zeros(3), torch.ones(3)) + image = [torch.randint(0, 255, (3, 200, 300), dtype=torch.uint8)] + targets = [{"boxes": torch.rand(3, 4)}] + with pytest.raises(TypeError): + out = transform(image, targets) # noqa: F841 + + +if __name__ == "__main__": + pytest.main([__file__]) diff --git a/test/test_onnx.py b/test/test_onnx.py index 090f16cc550..0350c817ff8 100644 --- a/test/test_onnx.py +++ b/test/test_onnx.py @@ -1,53 +1,71 @@ import io +from collections import OrderedDict +from typing import List, Optional, Tuple + +import pytest import torch -from torchvision import ops -from torchvision import models +from common_utils import assert_equal, set_rng_seed +from torchvision import models, ops +from torchvision.models.detection.faster_rcnn import FastRCNNPredictor, TwoMLPHead from torchvision.models.detection.image_list import ImageList -from torchvision.models.detection.transform import GeneralizedRCNNTransform -from torchvision.models.detection.rpn import AnchorGenerator, RPNHead, RegionProposalNetwork -from torchvision.models.detection.backbone_utils import resnet_fpn_backbone from torchvision.models.detection.roi_heads import RoIHeads -from torchvision.models.detection.faster_rcnn import FastRCNNPredictor, TwoMLPHead -from torchvision.models.detection.mask_rcnn import MaskRCNNHeads, MaskRCNNPredictor - -from collections import OrderedDict - -# onnxruntime requires python 3.5 or above -try: - import onnxruntime -except ImportError: - onnxruntime = None +from torchvision.models.detection.rpn import AnchorGenerator, RegionProposalNetwork, RPNHead +from torchvision.models.detection.transform import GeneralizedRCNNTransform +from torchvision.ops import _register_onnx_ops -import unittest -from torchvision.ops._register_onnx_ops import _onnx_opset_version +# In environments without onnxruntime we prefer to +# invoke all tests in the repo and have this one skipped rather than fail. +onnxruntime = pytest.importorskip("onnxruntime") -@unittest.skipIf(onnxruntime is None, 'ONNX Runtime unavailable') -class ONNXExporterTester(unittest.TestCase): +class TestONNXExporter: @classmethod - def setUpClass(cls): + def setup_class(cls): torch.manual_seed(123) - def run_model(self, model, inputs_list, tolerate_small_mismatch=False): + def run_model( + self, + model, + inputs_list, + do_constant_folding=True, + dynamic_axes=None, + output_names=None, + input_names=None, + opset_version: Optional[int] = None, + ): + if opset_version is None: + opset_version = _register_onnx_ops.BASE_ONNX_OPSET_VERSION + model.eval() onnx_io = io.BytesIO() + if isinstance(inputs_list[0][-1], dict): + torch_onnx_input = inputs_list[0] + ({},) + else: + torch_onnx_input = inputs_list[0] # export to onnx with the first input - torch.onnx.export(model, inputs_list[0], onnx_io, - do_constant_folding=True, opset_version=_onnx_opset_version) - + torch.onnx.export( + model, + torch_onnx_input, + onnx_io, + do_constant_folding=do_constant_folding, + opset_version=opset_version, + dynamic_axes=dynamic_axes, + input_names=input_names, + output_names=output_names, + verbose=True, + ) # validate the exported model with onnx runtime for test_inputs in inputs_list: with torch.no_grad(): - if isinstance(test_inputs, torch.Tensor) or \ - isinstance(test_inputs, list): + if isinstance(test_inputs, torch.Tensor) or isinstance(test_inputs, list): test_inputs = (test_inputs,) test_ouputs = model(*test_inputs) if isinstance(test_ouputs, torch.Tensor): test_ouputs = (test_ouputs,) - self.ort_validate(onnx_io, test_inputs, test_ouputs, tolerate_small_mismatch) + self.ort_validate(onnx_io, test_inputs, test_ouputs) - def ort_validate(self, onnx_io, inputs, outputs, tolerate_small_mismatch=False): + def ort_validate(self, onnx_io, inputs, outputs): inputs, _ = torch.jit._flatten(inputs) outputs, _ = torch.jit._flatten(outputs) @@ -61,23 +79,19 @@ def to_numpy(tensor): inputs = list(map(to_numpy, inputs)) outputs = list(map(to_numpy, outputs)) - ort_session = onnxruntime.InferenceSession(onnx_io.getvalue()) + ort_session = onnxruntime.InferenceSession(onnx_io.getvalue(), providers=onnxruntime.get_available_providers()) # compute onnxruntime output prediction - ort_inputs = dict((ort_session.get_inputs()[i].name, inpt) for i, inpt in enumerate(inputs)) + ort_inputs = {ort_session.get_inputs()[i].name: inpt for i, inpt in enumerate(inputs)} ort_outs = ort_session.run(None, ort_inputs) + for i in range(0, len(outputs)): - try: - torch.testing.assert_allclose(outputs[i], ort_outs[i], rtol=1e-03, atol=1e-05) - except AssertionError as error: - if tolerate_small_mismatch: - self.assertIn("(0.00%)", str(error), str(error)) - else: - raise + torch.testing.assert_close(outputs[i], ort_outs[i], rtol=1e-03, atol=1e-05) def test_nms(self): - boxes = torch.rand(5, 4) - boxes[:, 2:] += torch.rand(5, 2) - scores = torch.randn(5) + num_boxes = 100 + boxes = torch.rand(num_boxes, 4) + boxes[:, 2:] += boxes[:, :2] + scores = torch.randn(num_boxes) class Module(torch.nn.Module): def forward(self, boxes, scores): @@ -85,12 +99,79 @@ def forward(self, boxes, scores): self.run_model(Module(), [(boxes, scores)]) + def test_batched_nms(self): + num_boxes = 100 + boxes = torch.rand(num_boxes, 4) + boxes[:, 2:] += boxes[:, :2] + scores = torch.randn(num_boxes) + idxs = torch.randint(0, 5, size=(num_boxes,)) + + class Module(torch.nn.Module): + def forward(self, boxes, scores, idxs): + return ops.batched_nms(boxes, scores, idxs, 0.5) + + self.run_model(Module(), [(boxes, scores, idxs)]) + + def test_clip_boxes_to_image(self): + boxes = torch.randn(5, 4) * 500 + boxes[:, 2:] += boxes[:, :2] + size = torch.randn(200, 300) + + size_2 = torch.randn(300, 400) + + class Module(torch.nn.Module): + def forward(self, boxes, size): + return ops.boxes.clip_boxes_to_image(boxes, size.shape) + + self.run_model( + Module(), [(boxes, size), (boxes, size_2)], input_names=["boxes", "size"], dynamic_axes={"size": [0, 1]} + ) + def test_roi_align(self): x = torch.rand(1, 1, 10, 10, dtype=torch.float32) single_roi = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32) model = ops.RoIAlign((5, 5), 1, 2) self.run_model(model, [(x, single_roi)]) + x = torch.rand(1, 1, 10, 10, dtype=torch.float32) + single_roi = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32) + model = ops.RoIAlign((5, 5), 1, -1) + self.run_model(model, [(x, single_roi)]) + + def test_roi_align_aligned(self): + supported_onnx_version = _register_onnx_ops._ONNX_OPSET_VERSION_16 + x = torch.rand(1, 1, 10, 10, dtype=torch.float32) + single_roi = torch.tensor([[0, 1.5, 1.5, 3, 3]], dtype=torch.float32) + model = ops.RoIAlign((5, 5), 1, 2, aligned=True) + self.run_model(model, [(x, single_roi)], opset_version=supported_onnx_version) + + x = torch.rand(1, 1, 10, 10, dtype=torch.float32) + single_roi = torch.tensor([[0, 0.2, 0.3, 4.5, 3.5]], dtype=torch.float32) + model = ops.RoIAlign((5, 5), 0.5, 3, aligned=True) + self.run_model(model, [(x, single_roi)], opset_version=supported_onnx_version) + + x = torch.rand(1, 1, 10, 10, dtype=torch.float32) + single_roi = torch.tensor([[0, 0.2, 0.3, 4.5, 3.5]], dtype=torch.float32) + model = ops.RoIAlign((5, 5), 1.8, 2, aligned=True) + self.run_model(model, [(x, single_roi)], opset_version=supported_onnx_version) + + x = torch.rand(1, 1, 10, 10, dtype=torch.float32) + single_roi = torch.tensor([[0, 0.2, 0.3, 4.5, 3.5]], dtype=torch.float32) + model = ops.RoIAlign((2, 2), 2.5, 0, aligned=True) + self.run_model(model, [(x, single_roi)], opset_version=supported_onnx_version) + + x = torch.rand(1, 1, 10, 10, dtype=torch.float32) + single_roi = torch.tensor([[0, 0.2, 0.3, 4.5, 3.5]], dtype=torch.float32) + model = ops.RoIAlign((2, 2), 2.5, -1, aligned=True) + self.run_model(model, [(x, single_roi)], opset_version=supported_onnx_version) + + def test_roi_align_malformed_boxes(self): + supported_onnx_version = _register_onnx_ops._ONNX_OPSET_VERSION_16 + x = torch.randn(1, 1, 10, 10, dtype=torch.float32) + single_roi = torch.tensor([[0, 2, 0.3, 1.5, 1.5]], dtype=torch.float32) + model = ops.RoIAlign((5, 5), 1, 1, aligned=True) + self.run_model(model, [(x, single_roi)], opset_version=supported_onnx_version) + def test_roi_pool(self): x = torch.rand(1, 1, 10, 10, dtype=torch.float32) rois = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32) @@ -99,19 +180,33 @@ def test_roi_pool(self): model = ops.RoIPool((pool_h, pool_w), 2) self.run_model(model, [(x, rois)]) - def test_transform_images(self): + def test_resize_images(self): + class TransformModule(torch.nn.Module): + def __init__(self_module): + super().__init__() + self_module.transform = self._init_test_generalized_rcnn_transform() + + def forward(self_module, images): + return self_module.transform.resize(images, None)[0] + + input = torch.rand(3, 10, 20) + input_test = torch.rand(3, 100, 150) + self.run_model( + TransformModule(), [(input,), (input_test,)], input_names=["input1"], dynamic_axes={"input1": [0, 1, 2]} + ) + def test_transform_images(self): class TransformModule(torch.nn.Module): def __init__(self_module): - super(TransformModule, self_module).__init__() + super().__init__() self_module.transform = self._init_test_generalized_rcnn_transform() def forward(self_module, images): return self_module.transform(images)[0].tensors - input = [torch.rand(3, 100, 200), torch.rand(3, 200, 200)] - input_test = [torch.rand(3, 100, 200), torch.rand(3, 200, 200)] - self.run_model(TransformModule(), [input, input_test]) + input = torch.rand(3, 100, 200), torch.rand(3, 200, 200) + input_test = torch.rand(3, 100, 200), torch.rand(3, 200, 200) + self.run_model(TransformModule(), [(input,), (input_test,)]) def _init_test_generalized_rcnn_transform(self): min_size = 100 @@ -134,12 +229,20 @@ def _init_test_rpn(self): rpn_pre_nms_top_n = dict(training=2000, testing=1000) rpn_post_nms_top_n = dict(training=2000, testing=1000) rpn_nms_thresh = 0.7 + rpn_score_thresh = 0.0 rpn = RegionProposalNetwork( - rpn_anchor_generator, rpn_head, - rpn_fg_iou_thresh, rpn_bg_iou_thresh, - rpn_batch_size_per_image, rpn_positive_fraction, - rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh) + rpn_anchor_generator, + rpn_head, + rpn_fg_iou_thresh, + rpn_bg_iou_thresh, + rpn_batch_size_per_image, + rpn_positive_fraction, + rpn_pre_nms_top_n, + rpn_post_nms_top_n, + rpn_nms_thresh, + score_thresh=rpn_score_thresh, + ) return rpn def _init_test_roi_heads_faster_rcnn(self): @@ -155,145 +258,194 @@ def _init_test_roi_heads_faster_rcnn(self): box_nms_thresh = 0.5 box_detections_per_img = 100 - box_roi_pool = ops.MultiScaleRoIAlign( - featmap_names=['0', '1', '2', '3'], - output_size=7, - sampling_ratio=2) + box_roi_pool = ops.MultiScaleRoIAlign(featmap_names=["0", "1", "2", "3"], output_size=7, sampling_ratio=2) resolution = box_roi_pool.output_size[0] representation_size = 1024 - box_head = TwoMLPHead( - out_channels * resolution ** 2, - representation_size) + box_head = TwoMLPHead(out_channels * resolution**2, representation_size) representation_size = 1024 - box_predictor = FastRCNNPredictor( - representation_size, - num_classes) + box_predictor = FastRCNNPredictor(representation_size, num_classes) roi_heads = RoIHeads( - box_roi_pool, box_head, box_predictor, - box_fg_iou_thresh, box_bg_iou_thresh, - box_batch_size_per_image, box_positive_fraction, + box_roi_pool, + box_head, + box_predictor, + box_fg_iou_thresh, + box_bg_iou_thresh, + box_batch_size_per_image, + box_positive_fraction, bbox_reg_weights, - box_score_thresh, box_nms_thresh, box_detections_per_img) + box_score_thresh, + box_nms_thresh, + box_detections_per_img, + ) return roi_heads def get_features(self, images): s0, s1 = images.shape[-2:] features = [ - ('0', torch.rand(2, 256, s0 // 4, s1 // 4)), - ('1', torch.rand(2, 256, s0 // 8, s1 // 8)), - ('2', torch.rand(2, 256, s0 // 16, s1 // 16)), - ('3', torch.rand(2, 256, s0 // 32, s1 // 32)), - ('4', torch.rand(2, 256, s0 // 64, s1 // 64)), + ("0", torch.rand(2, 256, s0 // 4, s1 // 4)), + ("1", torch.rand(2, 256, s0 // 8, s1 // 8)), + ("2", torch.rand(2, 256, s0 // 16, s1 // 16)), + ("3", torch.rand(2, 256, s0 // 32, s1 // 32)), + ("4", torch.rand(2, 256, s0 // 64, s1 // 64)), ] features = OrderedDict(features) return features def test_rpn(self): + set_rng_seed(0) + class RPNModule(torch.nn.Module): - def __init__(self_module, images): - super(RPNModule, self_module).__init__() + def __init__(self_module): + super().__init__() self_module.rpn = self._init_test_rpn() - self_module.images = ImageList(images, [i.shape[-2:] for i in images]) - def forward(self_module, features): - return self_module.rpn(self_module.images, features) + def forward(self_module, images, features): + images = ImageList(images, [i.shape[-2:] for i in images]) + return self_module.rpn(images, features) - images = torch.rand(2, 3, 600, 600) + images = torch.rand(2, 3, 150, 150) features = self.get_features(images) - test_features = self.get_features(images) + images2 = torch.rand(2, 3, 80, 80) + test_features = self.get_features(images2) - model = RPNModule(images) + model = RPNModule() model.eval() - model(features) - self.run_model(model, [(features,), (test_features,)], tolerate_small_mismatch=True) + model(images, features) + + self.run_model( + model, + [(images, features), (images2, test_features)], + input_names=["input1", "input2", "input3", "input4", "input5", "input6"], + dynamic_axes={ + "input1": [0, 1, 2, 3], + "input2": [0, 1, 2, 3], + "input3": [0, 1, 2, 3], + "input4": [0, 1, 2, 3], + "input5": [0, 1, 2, 3], + "input6": [0, 1, 2, 3], + }, + ) def test_multi_scale_roi_align(self): - class TransformModule(torch.nn.Module): def __init__(self): - super(TransformModule, self).__init__() - self.model = ops.MultiScaleRoIAlign(['feat1', 'feat2'], 3, 2) + super().__init__() + self.model = ops.MultiScaleRoIAlign(["feat1", "feat2"], 3, 2) self.image_sizes = [(512, 512)] def forward(self, input, boxes): return self.model(input, boxes, self.image_sizes) i = OrderedDict() - i['feat1'] = torch.rand(1, 5, 64, 64) - i['feat2'] = torch.rand(1, 5, 16, 16) + i["feat1"] = torch.rand(1, 5, 64, 64) + i["feat2"] = torch.rand(1, 5, 16, 16) boxes = torch.rand(6, 4) * 256 boxes[:, 2:] += boxes[:, :2] i1 = OrderedDict() - i1['feat1'] = torch.rand(1, 5, 64, 64) - i1['feat2'] = torch.rand(1, 5, 16, 16) + i1["feat1"] = torch.rand(1, 5, 64, 64) + i1["feat2"] = torch.rand(1, 5, 16, 16) boxes1 = torch.rand(6, 4) * 256 boxes1[:, 2:] += boxes1[:, :2] - self.run_model(TransformModule(), [(i, [boxes],), (i1, [boxes1],)]) + self.run_model( + TransformModule(), + [ + ( + i, + [boxes], + ), + ( + i1, + [boxes1], + ), + ], + ) def test_roi_heads(self): class RoiHeadsModule(torch.nn.Module): - def __init__(self_module, images): - super(RoiHeadsModule, self_module).__init__() + def __init__(self_module): + super().__init__() self_module.transform = self._init_test_generalized_rcnn_transform() self_module.rpn = self._init_test_rpn() self_module.roi_heads = self._init_test_roi_heads_faster_rcnn() - self_module.original_image_sizes = [img.shape[-2:] for img in images] - self_module.images = ImageList(images, [i.shape[-2:] for i in images]) - - def forward(self_module, features): - proposals, _ = self_module.rpn(self_module.images, features) - detections, _ = self_module.roi_heads(features, proposals, self_module.images.image_sizes) - detections = self_module.transform.postprocess(detections, - self_module.images.image_sizes, - self_module.original_image_sizes) + + def forward(self_module, images, features): + original_image_sizes = [img.shape[-2:] for img in images] + images = ImageList(images, [i.shape[-2:] for i in images]) + proposals, _ = self_module.rpn(images, features) + detections, _ = self_module.roi_heads(features, proposals, images.image_sizes) + detections = self_module.transform.postprocess(detections, images.image_sizes, original_image_sizes) return detections - images = torch.rand(2, 3, 600, 600) + images = torch.rand(2, 3, 100, 100) features = self.get_features(images) - test_features = self.get_features(images) + images2 = torch.rand(2, 3, 150, 150) + test_features = self.get_features(images2) - model = RoiHeadsModule(images) + model = RoiHeadsModule() model.eval() - model(features) - self.run_model(model, [(features,), (test_features,)]) + model(images, features) + + self.run_model( + model, + [(images, features), (images2, test_features)], + input_names=["input1", "input2", "input3", "input4", "input5", "input6"], + dynamic_axes={ + "input1": [0, 1, 2, 3], + "input2": [0, 1, 2, 3], + "input3": [0, 1, 2, 3], + "input4": [0, 1, 2, 3], + "input5": [0, 1, 2, 3], + "input6": [0, 1, 2, 3], + }, + ) + + def get_image(self, rel_path: str, size: Tuple[int, int]) -> torch.Tensor: + import os - def get_image_from_url(self, url): - import requests - import numpy from PIL import Image - from io import BytesIO - from torchvision import transforms + from torchvision.transforms import functional as F - data = requests.get(url) - image = Image.open(BytesIO(data.content)).convert("RGB") - image = image.resize((300, 200), Image.BILINEAR) + data_dir = os.path.join(os.path.dirname(__file__), "assets") + path = os.path.join(data_dir, *rel_path.split("/")) + image = Image.open(path).convert("RGB").resize(size, Image.BILINEAR) - to_tensor = transforms.ToTensor() - return to_tensor(image) + return F.convert_image_dtype(F.pil_to_tensor(image)) - def get_test_images(self): - image_url = "http://farm3.staticflickr.com/2469/3915380994_2e611b1779_z.jpg" - image = self.get_image_from_url(url=image_url) - image_url2 = "https://pytorch.org/tutorials/_static/img/tv_tutorial/tv_image05.png" - image2 = self.get_image_from_url(url=image_url2) - images = [image] - test_images = [image2] - return images, test_images + def get_test_images(self) -> Tuple[List[torch.Tensor], List[torch.Tensor]]: + return ( + [self.get_image("encode_jpeg/grace_hopper_517x606.jpg", (100, 320))], + [self.get_image("fakedata/logos/rgb_pytorch.png", (250, 380))], + ) def test_faster_rcnn(self): images, test_images = self.get_test_images() - - model = models.detection.faster_rcnn.fasterrcnn_resnet50_fpn(pretrained=True, - min_size=200, - max_size=300) + dummy_image = [torch.ones(3, 100, 100) * 0.3] + model = models.detection.faster_rcnn.fasterrcnn_resnet50_fpn( + weights=models.detection.faster_rcnn.FasterRCNN_ResNet50_FPN_Weights.DEFAULT, min_size=200, max_size=300 + ) model.eval() model(images) - self.run_model(model, [(images,), (test_images,)]) + # Test exported model on images of different size, or dummy input + self.run_model( + model, + [(images,), (test_images,), (dummy_image,)], + input_names=["images_tensors"], + output_names=["outputs"], + dynamic_axes={"images_tensors": [0, 1, 2], "outputs": [0, 1, 2]}, + ) + # Test exported model for an image with no detections on other images + self.run_model( + model, + [(dummy_image,), (images,)], + input_names=["images_tensors"], + output_names=["outputs"], + dynamic_axes={"images_tensors": [0, 1, 2], "outputs": [0, 1, 2]}, + ) # Verify that paste_mask_in_image beahves the same in tracing. # This test also compares both paste_masks_in_image and _onnx_paste_masks_in_image @@ -305,11 +457,11 @@ def test_paste_mask_in_image(self): boxes *= 50 o_im_s = (100, 100) from torchvision.models.detection.roi_heads import paste_masks_in_image + out = paste_masks_in_image(masks, boxes, o_im_s) - jit_trace = torch.jit.trace(paste_masks_in_image, - (masks, boxes, - [torch.tensor(o_im_s[0]), - torch.tensor(o_im_s[1])])) + jit_trace = torch.jit.trace( + paste_masks_in_image, (masks, boxes, [torch.tensor(o_im_s[0]), torch.tensor(o_im_s[1])]) + ) out_trace = jit_trace(masks, boxes, [torch.tensor(o_im_s[0]), torch.tensor(o_im_s[1])]) assert torch.all(out.eq(out_trace)) @@ -320,20 +472,111 @@ def test_paste_mask_in_image(self): boxes2 *= 100 o_im_s2 = (200, 200) from torchvision.models.detection.roi_heads import paste_masks_in_image + out2 = paste_masks_in_image(masks2, boxes2, o_im_s2) out_trace2 = jit_trace(masks2, boxes2, [torch.tensor(o_im_s2[0]), torch.tensor(o_im_s2[1])]) assert torch.all(out2.eq(out_trace2)) - @unittest.skip("Disable test until Resize opset 11 is implemented in ONNX Runtime") def test_mask_rcnn(self): images, test_images = self.get_test_images() - - model = models.detection.mask_rcnn.maskrcnn_resnet50_fpn(pretrained=True) + dummy_image = [torch.ones(3, 100, 100) * 0.3] + model = models.detection.mask_rcnn.maskrcnn_resnet50_fpn( + weights=models.detection.mask_rcnn.MaskRCNN_ResNet50_FPN_Weights.DEFAULT, min_size=200, max_size=300 + ) model.eval() model(images) - self.run_model(model, [(images,), (test_images,)]) - - -if __name__ == '__main__': - unittest.main() + # Test exported model on images of different size, or dummy input + self.run_model( + model, + [(images,), (test_images,), (dummy_image,)], + input_names=["images_tensors"], + output_names=["boxes", "labels", "scores", "masks"], + dynamic_axes={ + "images_tensors": [0, 1, 2], + "boxes": [0, 1], + "labels": [0], + "scores": [0], + "masks": [0, 1, 2], + }, + ) + # Test exported model for an image with no detections on other images + self.run_model( + model, + [(dummy_image,), (images,)], + input_names=["images_tensors"], + output_names=["boxes", "labels", "scores", "masks"], + dynamic_axes={ + "images_tensors": [0, 1, 2], + "boxes": [0, 1], + "labels": [0], + "scores": [0], + "masks": [0, 1, 2], + }, + ) + + # Verify that heatmaps_to_keypoints behaves the same in tracing. + # This test also compares both heatmaps_to_keypoints and _onnx_heatmaps_to_keypoints + # (since jit_trace witll call _heatmaps_to_keypoints). + def test_heatmaps_to_keypoints(self): + maps = torch.rand(10, 1, 26, 26) + rois = torch.rand(10, 4) + from torchvision.models.detection.roi_heads import heatmaps_to_keypoints + + out = heatmaps_to_keypoints(maps, rois) + jit_trace = torch.jit.trace(heatmaps_to_keypoints, (maps, rois)) + out_trace = jit_trace(maps, rois) + + assert_equal(out[0], out_trace[0]) + assert_equal(out[1], out_trace[1]) + + maps2 = torch.rand(20, 2, 21, 21) + rois2 = torch.rand(20, 4) + from torchvision.models.detection.roi_heads import heatmaps_to_keypoints + + out2 = heatmaps_to_keypoints(maps2, rois2) + out_trace2 = jit_trace(maps2, rois2) + + assert_equal(out2[0], out_trace2[0]) + assert_equal(out2[1], out_trace2[1]) + + def test_keypoint_rcnn(self): + images, test_images = self.get_test_images() + dummy_images = [torch.ones(3, 100, 100) * 0.3] + model = models.detection.keypoint_rcnn.keypointrcnn_resnet50_fpn( + weights=models.detection.keypoint_rcnn.KeypointRCNN_ResNet50_FPN_Weights.DEFAULT, min_size=200, max_size=300 + ) + model.eval() + model(images) + self.run_model( + model, + [(images,), (test_images,), (dummy_images,)], + input_names=["images_tensors"], + output_names=["outputs1", "outputs2", "outputs3", "outputs4"], + dynamic_axes={"images_tensors": [0, 1, 2]}, + ) + + self.run_model( + model, + [(dummy_images,), (test_images,)], + input_names=["images_tensors"], + output_names=["outputs1", "outputs2", "outputs3", "outputs4"], + dynamic_axes={"images_tensors": [0, 1, 2]}, + ) + + def test_shufflenet_v2_dynamic_axes(self): + model = models.shufflenet_v2_x0_5(weights=models.ShuffleNet_V2_X0_5_Weights.DEFAULT) + dummy_input = torch.randn(1, 3, 224, 224, requires_grad=True) + test_inputs = torch.cat([dummy_input, dummy_input, dummy_input], 0) + + self.run_model( + model, + [(dummy_input,), (test_inputs,)], + input_names=["input_images"], + output_names=["output"], + dynamic_axes={"input_images": {0: "batch_size"}, "output": {0: "batch_size"}}, + ) + + +if __name__ == "__main__": + pytest.main([__file__]) diff --git a/test/test_ops.py b/test/test_ops.py index c4cc3fe0bd6..1ba7a2c9efa 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1,109 +1,293 @@ -from __future__ import division +import math +import os +from abc import ABC, abstractmethod +from functools import lru_cache +from itertools import product +from typing import Callable, List, Tuple + import numpy as np +import pytest import torch +import torch.fx +import torch.nn.functional as F +import torch.testing._internal.optests as optests +from common_utils import assert_equal, cpu_and_cuda, cpu_and_cuda_and_mps, needs_cuda, needs_mps +from PIL import Image +from torch import nn, Tensor +from torch._dynamo.utils import is_compile_supported from torch.autograd import gradcheck +from torch.nn.modules.utils import _pair +from torchvision import models, ops +from torchvision.models.feature_extraction import get_graph_node_names -from torchvision import ops -from itertools import product -import unittest +OPTESTS = [ + "test_schema", + "test_autograd_registration", + "test_faketensor", + "test_aot_dispatch_dynamic", +] + + +# Context manager for setting deterministic flag and automatically +# resetting it to its original value +class DeterministicGuard: + def __init__(self, deterministic, *, warn_only=False): + self.deterministic = deterministic + self.warn_only = warn_only + + def __enter__(self): + self.deterministic_restore = torch.are_deterministic_algorithms_enabled() + self.warn_only_restore = torch.is_deterministic_algorithms_warn_only_enabled() + torch.use_deterministic_algorithms(self.deterministic, warn_only=self.warn_only) + + def __exit__(self, exception_type, exception_value, traceback): + torch.use_deterministic_algorithms(self.deterministic_restore, warn_only=self.warn_only_restore) + + +class RoIOpTesterModuleWrapper(nn.Module): + def __init__(self, obj): + super().__init__() + self.layer = obj + self.n_inputs = 2 + + def forward(self, a, b): + self.layer(a, b) + + +class MultiScaleRoIAlignModuleWrapper(nn.Module): + def __init__(self, obj): + super().__init__() + self.layer = obj + self.n_inputs = 3 + + def forward(self, a, b, c): + self.layer(a, b, c) + + +class DeformConvModuleWrapper(nn.Module): + def __init__(self, obj): + super().__init__() + self.layer = obj + self.n_inputs = 3 + def forward(self, a, b, c): + self.layer(a, b, c) -class RoIOpTester(object): - @classmethod - def setUpClass(cls): - cls.dtype = torch.float64 - def test_forward_cpu_contiguous(self): - self._test_forward(device=torch.device('cpu'), contiguous=True) +class StochasticDepthWrapper(nn.Module): + def __init__(self, obj): + super().__init__() + self.layer = obj + self.n_inputs = 1 - def test_forward_cpu_non_contiguous(self): - self._test_forward(device=torch.device('cpu'), contiguous=False) + def forward(self, a): + self.layer(a) - def test_backward_cpu_contiguous(self): - self._test_backward(device=torch.device('cpu'), contiguous=True) - def test_backward_cpu_non_contiguous(self): - self._test_backward(device=torch.device('cpu'), contiguous=False) +class DropBlockWrapper(nn.Module): + def __init__(self, obj): + super().__init__() + self.layer = obj + self.n_inputs = 1 - @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable") - def test_forward_cuda_contiguous(self): - self._test_forward(device=torch.device('cuda'), contiguous=True) + def forward(self, a): + self.layer(a) - @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable") - def test_forward_cuda_non_contiguous(self): - self._test_forward(device=torch.device('cuda'), contiguous=False) - @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable") - def test_backward_cuda_contiguous(self): - self._test_backward(device=torch.device('cuda'), contiguous=True) +class PoolWrapper(nn.Module): + def __init__(self, pool: nn.Module): + super().__init__() + self.pool = pool - @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable") - def test_backward_cuda_non_contiguous(self): - self._test_backward(device=torch.device('cuda'), contiguous=False) + def forward(self, imgs: Tensor, boxes: List[Tensor]) -> Tensor: + return self.pool(imgs, boxes) + + +class RoIOpTester(ABC): + dtype = torch.float64 + mps_dtype = torch.float32 + mps_backward_atol = 2e-2 + + @pytest.mark.parametrize("device", cpu_and_cuda_and_mps()) + @pytest.mark.parametrize("contiguous", (True, False)) + @pytest.mark.parametrize( + "x_dtype", + ( + torch.float16, + torch.float32, + torch.float64, + ), + ids=str, + ) + def test_forward(self, device, contiguous, x_dtype, rois_dtype=None, deterministic=False, **kwargs): + if device == "mps" and x_dtype is torch.float64: + pytest.skip("MPS does not support float64") + + rois_dtype = x_dtype if rois_dtype is None else rois_dtype + + tol = 1e-5 + if x_dtype is torch.half: + if device == "mps": + tol = 5e-3 + else: + tol = 4e-3 + elif x_dtype == torch.bfloat16: + tol = 5e-3 - def _test_forward(self, device, contiguous): pool_size = 5 - # n_channels % (pool_size ** 2) == 0 required for PS opeartions. - n_channels = 2 * (pool_size ** 2) - x = torch.rand(2, n_channels, 10, 10, dtype=self.dtype, device=device) + # n_channels % (pool_size ** 2) == 0 required for PS operations. + n_channels = 2 * (pool_size**2) + x = torch.rand(2, n_channels, 10, 10, dtype=x_dtype, device=device) if not contiguous: x = x.permute(0, 1, 3, 2) - rois = torch.tensor([[0, 0, 0, 9, 9], # format is (xyxy) - [0, 0, 5, 4, 9], - [0, 5, 5, 9, 9], - [1, 0, 0, 9, 9]], - dtype=self.dtype, device=device) + rois = torch.tensor( + [[0, 0, 0, 9, 9], [0, 0, 5, 4, 9], [0, 5, 5, 9, 9], [1, 0, 0, 9, 9]], # format is (xyxy) + dtype=rois_dtype, + device=device, + ) pool_h, pool_w = pool_size, pool_size - y = self.fn(x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1) - gt_y = self.expected_fn(x, rois, pool_h, pool_w, spatial_scale=1, - sampling_ratio=-1, device=device, dtype=self.dtype) - - self.assertTrue(torch.allclose(gt_y, y)) - - def _test_backward(self, device, contiguous): + with DeterministicGuard(deterministic): + y = self.fn(x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs) + # the following should be true whether we're running an autocast test or not. + assert y.dtype == x.dtype + gt_y = self.expected_fn( + x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, device=device, dtype=x_dtype, **kwargs + ) + + torch.testing.assert_close(gt_y.to(y), y, rtol=tol, atol=tol) + + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_is_leaf_node(self, device): + op_obj = self.make_obj(wrap=True).to(device=device) + graph_node_names = get_graph_node_names(op_obj) + + assert len(graph_node_names) == 2 + assert len(graph_node_names[0]) == len(graph_node_names[1]) + assert len(graph_node_names[0]) == 1 + op_obj.n_inputs + + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_torch_fx_trace(self, device, x_dtype=torch.float, rois_dtype=torch.float): + op_obj = self.make_obj().to(device=device) + graph_module = torch.fx.symbolic_trace(op_obj) + pool_size = 5 + n_channels = 2 * (pool_size**2) + x = torch.rand(2, n_channels, 5, 5, dtype=x_dtype, device=device) + rois = torch.tensor( + [[0, 0, 0, 9, 9], [0, 0, 5, 4, 9], [0, 5, 5, 9, 9], [1, 0, 0, 9, 9]], # format is (xyxy) + dtype=rois_dtype, + device=device, + ) + output_gt = op_obj(x, rois) + assert output_gt.dtype == x.dtype + output_fx = graph_module(x, rois) + assert output_fx.dtype == x.dtype + tol = 1e-5 + torch.testing.assert_close(output_gt, output_fx, rtol=tol, atol=tol) + + @pytest.mark.parametrize("seed", range(10)) + @pytest.mark.parametrize("device", cpu_and_cuda_and_mps()) + @pytest.mark.parametrize("contiguous", (True, False)) + def test_backward(self, seed, device, contiguous, deterministic=False): + atol = self.mps_backward_atol if device == "mps" else 1e-05 + dtype = self.mps_dtype if device == "mps" else self.dtype + + torch.random.manual_seed(seed) pool_size = 2 - x = torch.rand(1, 2 * (pool_size ** 2), 5, 5, dtype=self.dtype, device=device, requires_grad=True) + x = torch.rand(1, 2 * (pool_size**2), 5, 5, dtype=dtype, device=device, requires_grad=True) if not contiguous: x = x.permute(0, 1, 3, 2) - rois = torch.tensor([[0, 0, 0, 4, 4], # format is (xyxy) - [0, 0, 2, 3, 4], - [0, 2, 2, 4, 4]], - dtype=self.dtype, device=device) + rois = torch.tensor( + [[0, 0, 0, 4, 4], [0, 0, 2, 3, 4], [0, 2, 2, 4, 4]], dtype=dtype, device=device # format is (xyxy) + ) def func(z): return self.fn(z, rois, pool_size, pool_size, spatial_scale=1, sampling_ratio=1) script_func = self.get_script_fn(rois, pool_size) - self.assertTrue(gradcheck(func, (x,))) - self.assertTrue(gradcheck(script_func, (x,))) - return + with DeterministicGuard(deterministic): + gradcheck(func, (x,), atol=atol) + + gradcheck(script_func, (x,), atol=atol) + + @needs_mps + def test_mps_error_inputs(self): + pool_size = 2 + x = torch.rand(1, 2 * (pool_size**2), 5, 5, dtype=torch.float16, device="mps", requires_grad=True) + rois = torch.tensor( + [[0, 0, 0, 4, 4], [0, 0, 2, 3, 4], [0, 2, 2, 4, 4]], dtype=torch.float16, device="mps" # format is (xyxy) + ) + + def func(z): + return self.fn(z, rois, pool_size, pool_size, spatial_scale=1, sampling_ratio=1) + with pytest.raises( + RuntimeError, match="MPS does not support (?:ps_)?roi_(?:align|pool)? backward with float16 inputs." + ): + gradcheck(func, (x,)) + + @needs_cuda + @pytest.mark.parametrize("x_dtype", (torch.float, torch.half)) + @pytest.mark.parametrize("rois_dtype", (torch.float, torch.half)) + def test_autocast(self, x_dtype, rois_dtype): + with torch.cuda.amp.autocast(): + self.test_forward(torch.device("cuda"), contiguous=False, x_dtype=x_dtype, rois_dtype=rois_dtype) + + def _helper_boxes_shape(self, func): + # test boxes as Tensor[N, 5] + with pytest.raises(AssertionError): + a = torch.linspace(1, 8 * 8, 8 * 8).reshape(1, 1, 8, 8) + boxes = torch.tensor([[0, 0, 3, 3]], dtype=a.dtype) + func(a, boxes, output_size=(2, 2)) + + # test boxes as List[Tensor[N, 4]] + with pytest.raises(AssertionError): + a = torch.linspace(1, 8 * 8, 8 * 8).reshape(1, 1, 8, 8) + boxes = torch.tensor([[0, 0, 3]], dtype=a.dtype) + ops.roi_pool(a, [boxes], output_size=(2, 2)) + + def _helper_jit_boxes_list(self, model): + x = torch.rand(2, 1, 10, 10) + roi = torch.tensor([[0, 0, 0, 9, 9], [0, 0, 5, 4, 9], [0, 5, 5, 9, 9], [1, 0, 0, 9, 9]], dtype=torch.float).t() + rois = [roi, roi] + scriped = torch.jit.script(model) + y = scriped(x, rois) + assert y.shape == (10, 1, 3, 3) + + @abstractmethod def fn(*args, **kwargs): pass + @abstractmethod + def make_obj(*args, **kwargs): + pass + + @abstractmethod def get_script_fn(*args, **kwargs): pass + @abstractmethod def expected_fn(*args, **kwargs): pass -class RoIPoolTester(RoIOpTester, unittest.TestCase): +class TestRoiPool(RoIOpTester): def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs): return ops.RoIPool((pool_h, pool_w), spatial_scale)(x, rois) + def make_obj(self, pool_h=5, pool_w=5, spatial_scale=1, wrap=False): + obj = ops.RoIPool((pool_h, pool_w), spatial_scale) + return RoIOpTesterModuleWrapper(obj) if wrap else obj + def get_script_fn(self, rois, pool_size): - @torch.jit.script - def script_fn(input, rois, pool_size): - # type: (torch.Tensor, torch.Tensor, int) -> torch.Tensor - return ops.roi_pool(input, rois, pool_size, 1.0)[0] - return lambda x: script_fn(x, rois, pool_size) + scriped = torch.jit.script(ops.roi_pool) + return lambda x: scriped(x, rois, pool_size) - def expected_fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, - device=None, dtype=torch.float64): + def expected_fn( + self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, device=None, dtype=torch.float64 + ): if device is None: device = torch.device("cpu") @@ -116,7 +300,7 @@ def get_slice(k, block): for roi_idx, roi in enumerate(rois): batch_idx = int(roi[0]) j_begin, i_begin, j_end, i_end = (int(round(x.item() * spatial_scale)) for x in roi[1:]) - roi_x = x[batch_idx, :, i_begin:i_end + 1, j_begin:j_end + 1] + roi_x = x[batch_idx, :, i_begin : i_end + 1, j_begin : j_end + 1] roi_h, roi_w = roi_x.shape[-2:] bin_h = roi_h / pool_h @@ -129,24 +313,35 @@ def get_slice(k, block): y[roi_idx, :, i, j] = bin_x.reshape(n_channels, -1).max(dim=1)[0] return y + def test_boxes_shape(self): + self._helper_boxes_shape(ops.roi_pool) + + def test_jit_boxes_list(self): + model = PoolWrapper(ops.RoIPool(output_size=[3, 3], spatial_scale=1.0)) + self._helper_jit_boxes_list(model) + + +class TestPSRoIPool(RoIOpTester): + mps_backward_atol = 5e-2 -class PSRoIPoolTester(RoIOpTester, unittest.TestCase): def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs): return ops.PSRoIPool((pool_h, pool_w), 1)(x, rois) + def make_obj(self, pool_h=5, pool_w=5, spatial_scale=1, wrap=False): + obj = ops.PSRoIPool((pool_h, pool_w), spatial_scale) + return RoIOpTesterModuleWrapper(obj) if wrap else obj + def get_script_fn(self, rois, pool_size): - @torch.jit.script - def script_fn(input, rois, pool_size): - # type: (torch.Tensor, torch.Tensor, int) -> torch.Tensor - return ops.ps_roi_pool(input, rois, pool_size, 1.0)[0] - return lambda x: script_fn(x, rois, pool_size) + scriped = torch.jit.script(ops.ps_roi_pool) + return lambda x: scriped(x, rois, pool_size) - def expected_fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, - device=None, dtype=torch.float64): + def expected_fn( + self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, device=None, dtype=torch.float64 + ): if device is None: device = torch.device("cpu") n_input_channels = x.size(1) - self.assertEqual(n_input_channels % (pool_h * pool_w), 0, "input channels must be divisible by ph * pw") + assert n_input_channels % (pool_h * pool_w) == 0, "input channels must be divisible by ph * pw" n_output_channels = int(n_input_channels / (pool_h * pool_w)) y = torch.zeros(rois.size(0), n_output_channels, pool_h, pool_w, dtype=dtype, device=device) @@ -156,7 +351,7 @@ def get_slice(k, block): for roi_idx, roi in enumerate(rois): batch_idx = int(roi[0]) j_begin, i_begin, j_end, i_end = (int(round(x.item() * spatial_scale)) for x in roi[1:]) - roi_x = x[batch_idx, :, i_begin:i_end + 1, j_begin:j_end + 1] + roi_x = x[batch_idx, :, i_begin : i_end + 1, j_begin : j_end + 1] roi_height = max(i_end - i_begin, 1) roi_width = max(j_end - j_begin, 1) @@ -173,55 +368,82 @@ def get_slice(k, block): y[roi_idx, c_out, i, j] = t / area return y + def test_boxes_shape(self): + self._helper_boxes_shape(ops.ps_roi_pool) -def bilinear_interpolate(data, height, width, y, x): - if y < -1.0 or y > height or x < -1.0 or x > width: - return 0. - y = min(max(0, y), height - 1) - x = min(max(0, x), width - 1) +def bilinear_interpolate(data, y, x, snap_border=False): + height, width = data.shape - y_low = int(y) - y_high = min(y_low + 1, height - 1) + if snap_border: + if -1 < y <= 0: + y = 0 + elif height - 1 <= y < height: + y = height - 1 - x_low = int(x) - x_high = min(x_low + 1, width - 1) + if -1 < x <= 0: + x = 0 + elif width - 1 <= x < width: + x = width - 1 - wy_h = y - y_low - wy_l = 1 - wy_h + y_low = int(math.floor(y)) + x_low = int(math.floor(x)) + y_high = y_low + 1 + x_high = x_low + 1 + wy_h = y - y_low wx_h = x - x_low + wy_l = 1 - wy_h wx_l = 1 - wx_h val = 0 - for wx, x in zip((wx_l, wx_h), (x_low, x_high)): - for wy, y in zip((wy_l, wy_h), (y_low, y_high)): - val += wx * wy * data[y * width + x] + for wx, xp in zip((wx_l, wx_h), (x_low, x_high)): + for wy, yp in zip((wy_l, wy_h), (y_low, y_high)): + if 0 <= yp < height and 0 <= xp < width: + val += wx * wy * data[yp, xp] return val -class RoIAlignTester(RoIOpTester, unittest.TestCase): - def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs): - return ops.RoIAlign((pool_h, pool_w), spatial_scale=spatial_scale, - sampling_ratio=sampling_ratio)(x, rois) +class TestRoIAlign(RoIOpTester): + mps_backward_atol = 6e-2 - def get_script_fn(self, rois, pool_size): - @torch.jit.script - def script_fn(input, rois, pool_size): - # type: (torch.Tensor, torch.Tensor, int) -> torch.Tensor - return ops.roi_align(input, rois, pool_size, 1.0)[0] - return lambda x: script_fn(x, rois, pool_size) + def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, aligned=False, **kwargs): + return ops.RoIAlign( + (pool_h, pool_w), spatial_scale=spatial_scale, sampling_ratio=sampling_ratio, aligned=aligned + )(x, rois) + + def make_obj(self, pool_h=5, pool_w=5, spatial_scale=1, sampling_ratio=-1, aligned=False, wrap=False): + obj = ops.RoIAlign( + (pool_h, pool_w), spatial_scale=spatial_scale, sampling_ratio=sampling_ratio, aligned=aligned + ) + return RoIOpTesterModuleWrapper(obj) if wrap else obj - def expected_fn(self, in_data, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, - device=None, dtype=torch.float64): + def get_script_fn(self, rois, pool_size): + scriped = torch.jit.script(ops.roi_align) + return lambda x: scriped(x, rois, pool_size) + + def expected_fn( + self, + in_data, + rois, + pool_h, + pool_w, + spatial_scale=1, + sampling_ratio=-1, + aligned=False, + device=None, + dtype=torch.float64, + ): if device is None: device = torch.device("cpu") n_channels = in_data.size(1) out_data = torch.zeros(rois.size(0), n_channels, pool_h, pool_w, dtype=dtype, device=device) + offset = 0.5 if aligned else 0.0 + for r, roi in enumerate(rois): batch_idx = int(roi[0]) - j_begin, i_begin, j_end, i_end = (x.item() * spatial_scale for x in roi[1:]) + j_begin, i_begin, j_end, i_end = (x.item() * spatial_scale - offset for x in roi[1:]) roi_h = i_end - i_begin roi_w = j_end - j_begin @@ -236,42 +458,185 @@ def expected_fn(self, in_data, rois, pool_h, pool_w, spatial_scale=1, sampling_r grid_w = sampling_ratio if sampling_ratio > 0 else int(np.ceil(bin_w)) for channel in range(0, n_channels): - val = 0 for iy in range(0, grid_h): y = start_h + (iy + 0.5) * bin_h / grid_h for ix in range(0, grid_w): x = start_w + (ix + 0.5) * bin_w / grid_w - val += bilinear_interpolate( - in_data[batch_idx, channel, :, :].flatten(), - in_data.size(-2), - in_data.size(-1), - y, x - ) + val += bilinear_interpolate(in_data[batch_idx, channel, :, :], y, x, snap_border=True) val /= grid_h * grid_w out_data[r, channel, i, j] = val return out_data + def test_boxes_shape(self): + self._helper_boxes_shape(ops.roi_align) + + @pytest.mark.parametrize("aligned", (True, False)) + @pytest.mark.parametrize("device", cpu_and_cuda_and_mps()) + @pytest.mark.parametrize("x_dtype", (torch.float16, torch.float32, torch.float64)) # , ids=str) + @pytest.mark.parametrize("contiguous", (True, False)) + @pytest.mark.parametrize("deterministic", (True, False)) + @pytest.mark.opcheck_only_one() + def test_forward(self, device, contiguous, deterministic, aligned, x_dtype, rois_dtype=None): + if deterministic and device == "cpu": + pytest.skip("cpu is always deterministic, don't retest") + super().test_forward( + device=device, + contiguous=contiguous, + deterministic=deterministic, + x_dtype=x_dtype, + rois_dtype=rois_dtype, + aligned=aligned, + ) + + @needs_cuda + @pytest.mark.parametrize("aligned", (True, False)) + @pytest.mark.parametrize("deterministic", (True, False)) + @pytest.mark.parametrize("x_dtype", (torch.float, torch.half)) + @pytest.mark.parametrize("rois_dtype", (torch.float, torch.half)) + @pytest.mark.opcheck_only_one() + def test_autocast(self, aligned, deterministic, x_dtype, rois_dtype): + with torch.cuda.amp.autocast(): + self.test_forward( + torch.device("cuda"), + contiguous=False, + deterministic=deterministic, + aligned=aligned, + x_dtype=x_dtype, + rois_dtype=rois_dtype, + ) + + @pytest.mark.skip(reason="1/5000 flaky failure") + @pytest.mark.parametrize("aligned", (True, False)) + @pytest.mark.parametrize("deterministic", (True, False)) + @pytest.mark.parametrize("x_dtype", (torch.float, torch.bfloat16)) + @pytest.mark.parametrize("rois_dtype", (torch.float, torch.bfloat16)) + def test_autocast_cpu(self, aligned, deterministic, x_dtype, rois_dtype): + with torch.cpu.amp.autocast(): + self.test_forward( + torch.device("cpu"), + contiguous=False, + deterministic=deterministic, + aligned=aligned, + x_dtype=x_dtype, + rois_dtype=rois_dtype, + ) + + @pytest.mark.parametrize("seed", range(10)) + @pytest.mark.parametrize("device", cpu_and_cuda_and_mps()) + @pytest.mark.parametrize("contiguous", (True, False)) + @pytest.mark.parametrize("deterministic", (True, False)) + @pytest.mark.opcheck_only_one() + def test_backward(self, seed, device, contiguous, deterministic): + if deterministic and device == "cpu": + pytest.skip("cpu is always deterministic, don't retest") + if deterministic and device == "mps": + pytest.skip("no deterministic implementation for mps") + if deterministic and not is_compile_supported(device): + pytest.skip("deterministic implementation only if torch.compile supported") + super().test_backward(seed, device, contiguous, deterministic) + + def _make_rois(self, img_size, num_imgs, dtype, num_rois=1000): + rois = torch.randint(0, img_size // 2, size=(num_rois, 5)).to(dtype) + rois[:, 0] = torch.randint(0, num_imgs, size=(num_rois,)) # set batch index + rois[:, 3:] += rois[:, 1:3] # make sure boxes aren't degenerate + return rois + + @pytest.mark.parametrize("aligned", (True, False)) + @pytest.mark.parametrize("scale, zero_point", ((1, 0), (2, 10), (0.1, 50))) + @pytest.mark.parametrize("qdtype", (torch.qint8, torch.quint8, torch.qint32)) + @pytest.mark.opcheck_only_one() + def test_qroialign(self, aligned, scale, zero_point, qdtype): + """Make sure quantized version of RoIAlign is close to float version""" + pool_size = 5 + img_size = 10 + n_channels = 2 + num_imgs = 1 + dtype = torch.float + + x = torch.randint(50, 100, size=(num_imgs, n_channels, img_size, img_size)).to(dtype) + qx = torch.quantize_per_tensor(x, scale=scale, zero_point=zero_point, dtype=qdtype) + + rois = self._make_rois(img_size, num_imgs, dtype) + qrois = torch.quantize_per_tensor(rois, scale=scale, zero_point=zero_point, dtype=qdtype) + + x, rois = qx.dequantize(), qrois.dequantize() # we want to pass the same inputs + + y = ops.roi_align( + x, + rois, + output_size=pool_size, + spatial_scale=1, + sampling_ratio=-1, + aligned=aligned, + ) + qy = ops.roi_align( + qx, + qrois, + output_size=pool_size, + spatial_scale=1, + sampling_ratio=-1, + aligned=aligned, + ) + + # The output qy is itself a quantized tensor and there might have been a loss of info when it was + # quantized. For a fair comparison we need to quantize y as well + quantized_float_y = torch.quantize_per_tensor(y, scale=scale, zero_point=zero_point, dtype=qdtype) + + try: + # Ideally, we would assert this, which passes with (scale, zero) == (1, 0) + assert (qy == quantized_float_y).all() + except AssertionError: + # But because the computation aren't exactly the same between the 2 RoIAlign procedures, some + # rounding error may lead to a difference of 2 in the output. + # For example with (scale, zero) = (2, 10), 45.00000... will be quantized to 44 + # but 45.00000001 will be rounded to 46. We make sure below that: + # - such discrepancies between qy and quantized_float_y are very rare (less then 5%) + # - any difference between qy and quantized_float_y is == scale + diff_idx = torch.where(qy != quantized_float_y) + num_diff = diff_idx[0].numel() + assert num_diff / qy.numel() < 0.05 + + abs_diff = torch.abs(qy[diff_idx].dequantize() - quantized_float_y[diff_idx].dequantize()) + t_scale = torch.full_like(abs_diff, fill_value=scale) + torch.testing.assert_close(abs_diff, t_scale, rtol=1e-5, atol=1e-5) + + def test_qroi_align_multiple_images(self): + dtype = torch.float + x = torch.randint(50, 100, size=(2, 3, 10, 10)).to(dtype) + qx = torch.quantize_per_tensor(x, scale=1, zero_point=0, dtype=torch.qint8) + rois = self._make_rois(img_size=10, num_imgs=2, dtype=dtype, num_rois=10) + qrois = torch.quantize_per_tensor(rois, scale=1, zero_point=0, dtype=torch.qint8) + with pytest.raises(RuntimeError, match="Only one image per batch is allowed"): + ops.roi_align(qx, qrois, output_size=5) + + def test_jit_boxes_list(self): + model = PoolWrapper(ops.RoIAlign(output_size=[3, 3], spatial_scale=1.0, sampling_ratio=-1)) + self._helper_jit_boxes_list(model) + + +class TestPSRoIAlign(RoIOpTester): + mps_backward_atol = 5e-2 -class PSRoIAlignTester(RoIOpTester, unittest.TestCase): def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs): - return ops.PSRoIAlign((pool_h, pool_w), spatial_scale=spatial_scale, - sampling_ratio=sampling_ratio)(x, rois) + return ops.PSRoIAlign((pool_h, pool_w), spatial_scale=spatial_scale, sampling_ratio=sampling_ratio)(x, rois) + + def make_obj(self, pool_h=5, pool_w=5, spatial_scale=1, sampling_ratio=-1, wrap=False): + obj = ops.PSRoIAlign((pool_h, pool_w), spatial_scale=spatial_scale, sampling_ratio=sampling_ratio) + return RoIOpTesterModuleWrapper(obj) if wrap else obj def get_script_fn(self, rois, pool_size): - @torch.jit.script - def script_fn(input, rois, pool_size): - # type: (torch.Tensor, torch.Tensor, int) -> torch.Tensor - return ops.ps_roi_align(input, rois, pool_size, 1.0)[0] - return lambda x: script_fn(x, rois, pool_size) + scriped = torch.jit.script(ops.ps_roi_align) + return lambda x: scriped(x, rois, pool_size) - def expected_fn(self, in_data, rois, pool_h, pool_w, device, spatial_scale=1, - sampling_ratio=-1, dtype=torch.float64): + def expected_fn( + self, in_data, rois, pool_h, pool_w, device, spatial_scale=1, sampling_ratio=-1, dtype=torch.float64 + ): if device is None: device = torch.device("cpu") n_input_channels = in_data.size(1) - self.assertEqual(n_input_channels % (pool_h * pool_w), 0, "input channels must be divisible by ph * pw") + assert n_input_channels % (pool_h * pool_w) == 0, "input channels must be divisible by ph * pw" n_output_channels = int(n_input_channels / (pool_h * pool_w)) out_data = torch.zeros(rois.size(0), n_output_channels, pool_h, pool_w, dtype=dtype, device=device) @@ -298,24 +663,91 @@ def expected_fn(self, in_data, rois, pool_h, pool_w, device, spatial_scale=1, y = start_h + (iy + 0.5) * bin_h / grid_h for ix in range(0, grid_w): x = start_w + (ix + 0.5) * bin_w / grid_w - val += bilinear_interpolate( - in_data[batch_idx, c_in, :, :].flatten(), - in_data.size(-2), - in_data.size(-1), - y, x - ) + val += bilinear_interpolate(in_data[batch_idx, c_in, :, :], y, x, snap_border=True) val /= grid_h * grid_w out_data[r, c_out, i, j] = val return out_data - -class NMSTester(unittest.TestCase): - def reference_nms(self, boxes, scores, iou_threshold): + def test_boxes_shape(self): + self._helper_boxes_shape(ops.ps_roi_align) + + +@pytest.mark.parametrize( + "op", + ( + torch.ops.torchvision.roi_pool, + torch.ops.torchvision.ps_roi_pool, + torch.ops.torchvision.roi_align, + torch.ops.torchvision.ps_roi_align, + ), +) +@pytest.mark.parametrize("dtype", (torch.float16, torch.float32, torch.float64)) +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("requires_grad", (True, False)) +def test_roi_opcheck(op, dtype, device, requires_grad): + # This manually calls opcheck() on the roi ops. We do that instead of + # relying on opcheck.generate_opcheck_tests() as e.g. done for nms, because + # pytest and generate_opcheck_tests() don't interact very well when it comes + # to skipping tests - and these ops need to skip the MPS tests since MPS we + # don't support dynamic shapes yet for MPS. + rois = torch.tensor( + [[0, 0, 0, 9, 9], [0, 0, 5, 4, 9], [0, 5, 5, 9, 9], [1, 0, 0, 9, 9]], + dtype=dtype, + device=device, + requires_grad=requires_grad, + ) + pool_size = 5 + num_channels = 2 * (pool_size**2) + x = torch.rand(2, num_channels, 10, 10, dtype=dtype, device=device) + + kwargs = dict(rois=rois, spatial_scale=1, pooled_height=pool_size, pooled_width=pool_size) + if op in (torch.ops.torchvision.roi_align, torch.ops.torchvision.ps_roi_align): + kwargs["sampling_ratio"] = -1 + if op is torch.ops.torchvision.roi_align: + kwargs["aligned"] = True + + optests.opcheck(op, args=(x,), kwargs=kwargs) + + +class TestMultiScaleRoIAlign: + def make_obj(self, fmap_names=None, output_size=(7, 7), sampling_ratio=2, wrap=False): + if fmap_names is None: + fmap_names = ["0"] + obj = ops.poolers.MultiScaleRoIAlign(fmap_names, output_size, sampling_ratio) + return MultiScaleRoIAlignModuleWrapper(obj) if wrap else obj + + def test_msroialign_repr(self): + fmap_names = ["0"] + output_size = (7, 7) + sampling_ratio = 2 + # Pass mock feature map names + t = self.make_obj(fmap_names, output_size, sampling_ratio, wrap=False) + + # Check integrity of object __repr__ attribute + expected_string = ( + f"MultiScaleRoIAlign(featmap_names={fmap_names}, output_size={output_size}, " + f"sampling_ratio={sampling_ratio})" + ) + assert repr(t) == expected_string + + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_is_leaf_node(self, device): + op_obj = self.make_obj(wrap=True).to(device=device) + graph_node_names = get_graph_node_names(op_obj) + + assert len(graph_node_names) == 2 + assert len(graph_node_names[0]) == len(graph_node_names[1]) + assert len(graph_node_names[0]) == 1 + op_obj.n_inputs + + +class TestNMS: + def _reference_nms(self, boxes, scores, iou_threshold): """ Args: - box_scores (N, 5): boxes in corner-form and probabilities. - iou_threshold: intersection over union threshold. + boxes: boxes in corner-form + scores: probabilities + iou_threshold: intersection over union threshold Returns: picked: a list of indexes of the kept boxes """ @@ -339,33 +771,1207 @@ def _create_tensors_with_iou(self, N, iou_thresh): # let b0 be [x0, y0, x1, y1], and b1 be [x0, y0, x1 + d, y1], # then, in order to satisfy ops.iou(b0, b1) == iou_thresh, # we need to have d = (x1 - x0) * (1 - iou_thresh) / iou_thresh + # Adjust the threshold upward a bit with the intent of creating + # at least one box that exceeds (barely) the threshold and so + # should be suppressed. boxes = torch.rand(N, 4) * 100 boxes[:, 2:] += boxes[:, :2] boxes[-1, :] = boxes[0, :] x0, y0, x1, y1 = boxes[-1].tolist() + iou_thresh += 1e-5 boxes[-1, 2] += (x1 - x0) * (1 - iou_thresh) / iou_thresh scores = torch.rand(N) return boxes, scores - def test_nms(self): - err_msg = 'NMS incompatible between CPU and reference implementation for IoU={}' - for iou in [0.2, 0.5, 0.8]: - boxes, scores = self._create_tensors_with_iou(1000, iou) - keep_ref = self.reference_nms(boxes, scores, iou) - keep = ops.nms(boxes, scores, iou) - self.assertTrue(torch.allclose(keep, keep_ref), err_msg.format(iou)) + @pytest.mark.parametrize("iou", (0.2, 0.5, 0.8)) + @pytest.mark.parametrize("seed", range(10)) + @pytest.mark.opcheck_only_one() + def test_nms_ref(self, iou, seed): + torch.random.manual_seed(seed) + err_msg = "NMS incompatible between CPU and reference implementation for IoU={}" + boxes, scores = self._create_tensors_with_iou(1000, iou) + keep_ref = self._reference_nms(boxes, scores, iou) + keep = ops.nms(boxes, scores, iou) + torch.testing.assert_close(keep, keep_ref, msg=err_msg.format(iou)) + + def test_nms_input_errors(self): + with pytest.raises(RuntimeError): + ops.nms(torch.rand(4), torch.rand(3), 0.5) + with pytest.raises(RuntimeError): + ops.nms(torch.rand(3, 5), torch.rand(3), 0.5) + with pytest.raises(RuntimeError): + ops.nms(torch.rand(3, 4), torch.rand(3, 2), 0.5) + with pytest.raises(RuntimeError): + ops.nms(torch.rand(3, 4), torch.rand(4), 0.5) + + @pytest.mark.parametrize("iou", (0.2, 0.5, 0.8)) + @pytest.mark.parametrize("scale, zero_point", ((1, 0), (2, 50), (3, 10))) + @pytest.mark.opcheck_only_one() + def test_qnms(self, iou, scale, zero_point): + # Note: we compare qnms vs nms instead of qnms vs reference implementation. + # This is because with the int conversion, the trick used in _create_tensors_with_iou + # doesn't really work (in fact, nms vs reference implem will also fail with ints) + err_msg = "NMS and QNMS give different results for IoU={}" + boxes, scores = self._create_tensors_with_iou(1000, iou) + scores *= 100 # otherwise most scores would be 0 or 1 after int conversion + + qboxes = torch.quantize_per_tensor(boxes, scale=scale, zero_point=zero_point, dtype=torch.quint8) + qscores = torch.quantize_per_tensor(scores, scale=scale, zero_point=zero_point, dtype=torch.quint8) + + boxes = qboxes.dequantize() + scores = qscores.dequantize() + + keep = ops.nms(boxes, scores, iou) + qkeep = ops.nms(qboxes, qscores, iou) + + torch.testing.assert_close(qkeep, keep, msg=err_msg.format(iou)) + + @pytest.mark.parametrize( + "device", + ( + pytest.param("cuda", marks=pytest.mark.needs_cuda), + pytest.param("mps", marks=pytest.mark.needs_mps), + ), + ) + @pytest.mark.parametrize("iou", (0.2, 0.5, 0.8)) + @pytest.mark.opcheck_only_one() + def test_nms_gpu(self, iou, device, dtype=torch.float64): + dtype = torch.float32 if device == "mps" else dtype + tol = 1e-3 if dtype is torch.half else 1e-5 + err_msg = "NMS incompatible between CPU and CUDA for IoU={}" + + boxes, scores = self._create_tensors_with_iou(1000, iou) + r_cpu = ops.nms(boxes, scores, iou) + r_gpu = ops.nms(boxes.to(device), scores.to(device), iou) + + is_eq = torch.allclose(r_cpu, r_gpu.cpu()) + if not is_eq: + # if the indices are not the same, ensure that it's because the scores + # are duplicate + is_eq = torch.allclose(scores[r_cpu], scores[r_gpu.cpu()], rtol=tol, atol=tol) + assert is_eq, err_msg.format(iou) + + @needs_cuda + @pytest.mark.parametrize("iou", (0.2, 0.5, 0.8)) + @pytest.mark.parametrize("dtype", (torch.float, torch.half)) + @pytest.mark.opcheck_only_one() + def test_autocast(self, iou, dtype): + with torch.cuda.amp.autocast(): + self.test_nms_gpu(iou=iou, dtype=dtype, device="cuda") + + @pytest.mark.parametrize("iou", (0.2, 0.5, 0.8)) + @pytest.mark.parametrize("dtype", (torch.float, torch.bfloat16)) + def test_autocast_cpu(self, iou, dtype): + boxes, scores = self._create_tensors_with_iou(1000, iou) + with torch.cpu.amp.autocast(): + keep_ref_float = ops.nms(boxes.to(dtype).float(), scores.to(dtype).float(), iou) + keep_dtype = ops.nms(boxes.to(dtype), scores.to(dtype), iou) + torch.testing.assert_close(keep_ref_float, keep_dtype) + + @pytest.mark.parametrize( + "device", + ( + pytest.param("cuda", marks=pytest.mark.needs_cuda), + pytest.param("mps", marks=pytest.mark.needs_mps), + ), + ) + @pytest.mark.opcheck_only_one() + def test_nms_float16(self, device): + boxes = torch.tensor( + [ + [285.3538, 185.5758, 1193.5110, 851.4551], + [285.1472, 188.7374, 1192.4984, 851.0669], + [279.2440, 197.9812, 1189.4746, 849.2019], + ] + ).to(device) + scores = torch.tensor([0.6370, 0.7569, 0.3966]).to(device) + + iou_thres = 0.2 + keep32 = ops.nms(boxes, scores, iou_thres) + keep16 = ops.nms(boxes.to(torch.float16), scores.to(torch.float16), iou_thres) + assert_equal(keep32, keep16) + + @pytest.mark.parametrize("seed", range(10)) + @pytest.mark.opcheck_only_one() + def test_batched_nms_implementations(self, seed): + """Make sure that both implementations of batched_nms yield identical results""" + torch.random.manual_seed(seed) + + num_boxes = 1000 + iou_threshold = 0.9 + + boxes = torch.cat((torch.rand(num_boxes, 2), torch.rand(num_boxes, 2) + 10), dim=1) + assert max(boxes[:, 0]) < min(boxes[:, 2]) # x1 < x2 + assert max(boxes[:, 1]) < min(boxes[:, 3]) # y1 < y2 + + scores = torch.rand(num_boxes) + idxs = torch.randint(0, 4, size=(num_boxes,)) + keep_vanilla = ops.boxes._batched_nms_vanilla(boxes, scores, idxs, iou_threshold) + keep_trick = ops.boxes._batched_nms_coordinate_trick(boxes, scores, idxs, iou_threshold) + + torch.testing.assert_close( + keep_vanilla, keep_trick, msg="The vanilla and the trick implementation yield different nms outputs." + ) + + # Also make sure an empty tensor is returned if boxes is empty + empty = torch.empty((0,), dtype=torch.int64) + torch.testing.assert_close(empty, ops.batched_nms(empty, None, None, None)) + + +optests.generate_opcheck_tests( + testcase=TestNMS, + namespaces=["torchvision"], + failures_dict_path=os.path.join(os.path.dirname(__file__), "optests_failures_dict.json"), + additional_decorators=[], + test_utils=OPTESTS, +) + + +class TestDeformConv: + dtype = torch.float64 + + def expected_fn(self, x, weight, offset, mask, bias, stride=1, padding=0, dilation=1): + stride_h, stride_w = _pair(stride) + pad_h, pad_w = _pair(padding) + dil_h, dil_w = _pair(dilation) + weight_h, weight_w = weight.shape[-2:] + + n_batches, n_in_channels, in_h, in_w = x.shape + n_out_channels = weight.shape[0] + + out_h = (in_h + 2 * pad_h - (dil_h * (weight_h - 1) + 1)) // stride_h + 1 + out_w = (in_w + 2 * pad_w - (dil_w * (weight_w - 1) + 1)) // stride_w + 1 + + n_offset_grps = offset.shape[1] // (2 * weight_h * weight_w) + in_c_per_offset_grp = n_in_channels // n_offset_grps + + n_weight_grps = n_in_channels // weight.shape[1] + in_c_per_weight_grp = weight.shape[1] + out_c_per_weight_grp = n_out_channels // n_weight_grps + + out = torch.zeros(n_batches, n_out_channels, out_h, out_w, device=x.device, dtype=x.dtype) + for b in range(n_batches): + for c_out in range(n_out_channels): + for i in range(out_h): + for j in range(out_w): + for di in range(weight_h): + for dj in range(weight_w): + for c in range(in_c_per_weight_grp): + weight_grp = c_out // out_c_per_weight_grp + c_in = weight_grp * in_c_per_weight_grp + c + + offset_grp = c_in // in_c_per_offset_grp + mask_idx = offset_grp * (weight_h * weight_w) + di * weight_w + dj + offset_idx = 2 * mask_idx + + pi = stride_h * i - pad_h + dil_h * di + offset[b, offset_idx, i, j] + pj = stride_w * j - pad_w + dil_w * dj + offset[b, offset_idx + 1, i, j] + + mask_value = 1.0 + if mask is not None: + mask_value = mask[b, mask_idx, i, j] + + out[b, c_out, i, j] += ( + mask_value + * weight[c_out, c, di, dj] + * bilinear_interpolate(x[b, c_in, :, :], pi, pj) + ) + out += bias.view(1, n_out_channels, 1, 1) + return out + + @lru_cache(maxsize=None) + def get_fn_args(self, device, contiguous, batch_sz, dtype): + n_in_channels = 6 + n_out_channels = 2 + n_weight_grps = 2 + n_offset_grps = 3 + + stride = (2, 1) + pad = (1, 0) + dilation = (2, 1) + + stride_h, stride_w = stride + pad_h, pad_w = pad + dil_h, dil_w = dilation + weight_h, weight_w = (3, 2) + in_h, in_w = (5, 4) + + out_h = (in_h + 2 * pad_h - (dil_h * (weight_h - 1) + 1)) // stride_h + 1 + out_w = (in_w + 2 * pad_w - (dil_w * (weight_w - 1) + 1)) // stride_w + 1 + + x = torch.rand(batch_sz, n_in_channels, in_h, in_w, device=device, dtype=dtype, requires_grad=True) + + offset = torch.randn( + batch_sz, + n_offset_grps * 2 * weight_h * weight_w, + out_h, + out_w, + device=device, + dtype=dtype, + requires_grad=True, + ) + + mask = torch.randn( + batch_sz, n_offset_grps * weight_h * weight_w, out_h, out_w, device=device, dtype=dtype, requires_grad=True + ) + + weight = torch.randn( + n_out_channels, + n_in_channels // n_weight_grps, + weight_h, + weight_w, + device=device, + dtype=dtype, + requires_grad=True, + ) + + bias = torch.randn(n_out_channels, device=device, dtype=dtype, requires_grad=True) - @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable") - def test_nms_cuda(self): - err_msg = 'NMS incompatible between CPU and CUDA for IoU={}' - - for iou in [0.2, 0.5, 0.8]: - boxes, scores = self._create_tensors_with_iou(1000, iou) - r_cpu = ops.nms(boxes, scores, iou) - r_cuda = ops.nms(boxes.cuda(), scores.cuda(), iou) + if not contiguous: + x = x.permute(0, 1, 3, 2).contiguous().permute(0, 1, 3, 2) + offset = offset.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1) + mask = mask.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1) + weight = weight.permute(3, 2, 0, 1).contiguous().permute(2, 3, 1, 0) + + return x, weight, offset, mask, bias, stride, pad, dilation + + def make_obj(self, in_channels=6, out_channels=2, kernel_size=(3, 2), groups=2, wrap=False): + obj = ops.DeformConv2d( + in_channels, out_channels, kernel_size, stride=(2, 1), padding=(1, 0), dilation=(2, 1), groups=groups + ) + return DeformConvModuleWrapper(obj) if wrap else obj + + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_is_leaf_node(self, device): + op_obj = self.make_obj(wrap=True).to(device=device) + graph_node_names = get_graph_node_names(op_obj) + + assert len(graph_node_names) == 2 + assert len(graph_node_names[0]) == len(graph_node_names[1]) + assert len(graph_node_names[0]) == 1 + op_obj.n_inputs + + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("contiguous", (True, False)) + @pytest.mark.parametrize("batch_sz", (0, 33)) + @pytest.mark.opcheck_only_one() + def test_forward(self, device, contiguous, batch_sz, dtype=None): + dtype = dtype or self.dtype + x, _, offset, mask, _, stride, padding, dilation = self.get_fn_args(device, contiguous, batch_sz, dtype) + in_channels = 6 + out_channels = 2 + kernel_size = (3, 2) + groups = 2 + tol = 2e-3 if dtype is torch.half else 1e-5 + + layer = self.make_obj(in_channels, out_channels, kernel_size, groups, wrap=False).to( + device=x.device, dtype=dtype + ) + res = layer(x, offset, mask) + + weight = layer.weight.data + bias = layer.bias.data + expected = self.expected_fn(x, weight, offset, mask, bias, stride=stride, padding=padding, dilation=dilation) + + torch.testing.assert_close( + res.to(expected), expected, rtol=tol, atol=tol, msg=f"\nres:\n{res}\nexpected:\n{expected}" + ) + + # no modulation test + res = layer(x, offset) + expected = self.expected_fn(x, weight, offset, None, bias, stride=stride, padding=padding, dilation=dilation) + + torch.testing.assert_close( + res.to(expected), expected, rtol=tol, atol=tol, msg=f"\nres:\n{res}\nexpected:\n{expected}" + ) + + def test_wrong_sizes(self): + in_channels = 6 + out_channels = 2 + kernel_size = (3, 2) + groups = 2 + x, _, offset, mask, _, stride, padding, dilation = self.get_fn_args( + "cpu", contiguous=True, batch_sz=10, dtype=self.dtype + ) + layer = ops.DeformConv2d( + in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups + ) + with pytest.raises(RuntimeError, match="the shape of the offset"): + wrong_offset = torch.rand_like(offset[:, :2]) + layer(x, wrong_offset) + + with pytest.raises(RuntimeError, match=r"mask.shape\[1\] is not valid"): + wrong_mask = torch.rand_like(mask[:, :2]) + layer(x, offset, wrong_mask) + + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("contiguous", (True, False)) + @pytest.mark.parametrize("batch_sz", (0, 33)) + @pytest.mark.opcheck_only_one() + def test_backward(self, device, contiguous, batch_sz): + x, weight, offset, mask, bias, stride, padding, dilation = self.get_fn_args( + device, contiguous, batch_sz, self.dtype + ) + + def func(x_, offset_, mask_, weight_, bias_): + return ops.deform_conv2d( + x_, offset_, weight_, bias_, stride=stride, padding=padding, dilation=dilation, mask=mask_ + ) + + gradcheck(func, (x, offset, mask, weight, bias), nondet_tol=1e-5, fast_mode=True) + + def func_no_mask(x_, offset_, weight_, bias_): + return ops.deform_conv2d( + x_, offset_, weight_, bias_, stride=stride, padding=padding, dilation=dilation, mask=None + ) + + gradcheck(func_no_mask, (x, offset, weight, bias), nondet_tol=1e-5, fast_mode=True) - self.assertTrue(torch.allclose(r_cpu, r_cuda.cpu()), err_msg.format(iou)) + @torch.jit.script + def script_func(x_, offset_, mask_, weight_, bias_, stride_, pad_, dilation_): + # type:(Tensor, Tensor, Tensor, Tensor, Tensor, Tuple[int, int], Tuple[int, int], Tuple[int, int])->Tensor + return ops.deform_conv2d( + x_, offset_, weight_, bias_, stride=stride_, padding=pad_, dilation=dilation_, mask=mask_ + ) + + gradcheck( + lambda z, off, msk, wei, bi: script_func(z, off, msk, wei, bi, stride, padding, dilation), + (x, offset, mask, weight, bias), + nondet_tol=1e-5, + fast_mode=True, + ) + @torch.jit.script + def script_func_no_mask(x_, offset_, weight_, bias_, stride_, pad_, dilation_): + # type:(Tensor, Tensor, Tensor, Tensor, Tuple[int, int], Tuple[int, int], Tuple[int, int])->Tensor + return ops.deform_conv2d( + x_, offset_, weight_, bias_, stride=stride_, padding=pad_, dilation=dilation_, mask=None + ) + + gradcheck( + lambda z, off, wei, bi: script_func_no_mask(z, off, wei, bi, stride, padding, dilation), + (x, offset, weight, bias), + nondet_tol=1e-5, + fast_mode=True, + ) + + @needs_cuda + @pytest.mark.parametrize("contiguous", (True, False)) + @pytest.mark.opcheck_only_one() + def test_compare_cpu_cuda_grads(self, contiguous): + # Test from https://github.com/pytorch/vision/issues/2598 + # Run on CUDA only + + # compare grads computed on CUDA with grads computed on CPU + true_cpu_grads = None + + init_weight = torch.randn(9, 9, 3, 3, requires_grad=True) + img = torch.randn(8, 9, 1000, 110) + offset = torch.rand(8, 2 * 3 * 3, 1000, 110) + mask = torch.rand(8, 3 * 3, 1000, 110) -if __name__ == '__main__': - unittest.main() + if not contiguous: + img = img.permute(0, 1, 3, 2).contiguous().permute(0, 1, 3, 2) + offset = offset.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1) + mask = mask.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1) + weight = init_weight.permute(3, 2, 0, 1).contiguous().permute(2, 3, 1, 0) + else: + weight = init_weight + + for d in ["cpu", "cuda"]: + out = ops.deform_conv2d(img.to(d), offset.to(d), weight.to(d), padding=1, mask=mask.to(d)) + out.mean().backward() + if true_cpu_grads is None: + true_cpu_grads = init_weight.grad + assert true_cpu_grads is not None + else: + assert init_weight.grad is not None + res_grads = init_weight.grad.to("cpu") + torch.testing.assert_close(true_cpu_grads, res_grads) + + @needs_cuda + @pytest.mark.parametrize("batch_sz", (0, 33)) + @pytest.mark.parametrize("dtype", (torch.float, torch.half)) + @pytest.mark.opcheck_only_one() + def test_autocast(self, batch_sz, dtype): + with torch.cuda.amp.autocast(): + self.test_forward(torch.device("cuda"), contiguous=False, batch_sz=batch_sz, dtype=dtype) + + def test_forward_scriptability(self): + # Non-regression test for https://github.com/pytorch/vision/issues/4078 + torch.jit.script(ops.DeformConv2d(in_channels=8, out_channels=8, kernel_size=3)) + + +optests.generate_opcheck_tests( + testcase=TestDeformConv, + namespaces=["torchvision"], + failures_dict_path=os.path.join(os.path.dirname(__file__), "optests_failures_dict.json"), + additional_decorators=[], + test_utils=OPTESTS, +) + + +class TestFrozenBNT: + def test_frozenbatchnorm2d_repr(self): + num_features = 32 + eps = 1e-5 + t = ops.misc.FrozenBatchNorm2d(num_features, eps=eps) + + # Check integrity of object __repr__ attribute + expected_string = f"FrozenBatchNorm2d({num_features}, eps={eps})" + assert repr(t) == expected_string + + @pytest.mark.parametrize("seed", range(10)) + def test_frozenbatchnorm2d_eps(self, seed): + torch.random.manual_seed(seed) + sample_size = (4, 32, 28, 28) + x = torch.rand(sample_size) + state_dict = dict( + weight=torch.rand(sample_size[1]), + bias=torch.rand(sample_size[1]), + running_mean=torch.rand(sample_size[1]), + running_var=torch.rand(sample_size[1]), + num_batches_tracked=torch.tensor(100), + ) + + # Check that default eps is equal to the one of BN + fbn = ops.misc.FrozenBatchNorm2d(sample_size[1]) + fbn.load_state_dict(state_dict, strict=False) + bn = torch.nn.BatchNorm2d(sample_size[1]).eval() + bn.load_state_dict(state_dict) + # Difference is expected to fall in an acceptable range + torch.testing.assert_close(fbn(x), bn(x), rtol=1e-5, atol=1e-6) + + # Check computation for eps > 0 + fbn = ops.misc.FrozenBatchNorm2d(sample_size[1], eps=1e-5) + fbn.load_state_dict(state_dict, strict=False) + bn = torch.nn.BatchNorm2d(sample_size[1], eps=1e-5).eval() + bn.load_state_dict(state_dict) + torch.testing.assert_close(fbn(x), bn(x), rtol=1e-5, atol=1e-6) + + +class TestBoxConversionToRoi: + def _get_box_sequences(): + # Define here the argument type of `boxes` supported by region pooling operations + box_tensor = torch.tensor([[0, 0, 0, 100, 100], [1, 0, 0, 100, 100]], dtype=torch.float) + box_list = [ + torch.tensor([[0, 0, 100, 100]], dtype=torch.float), + torch.tensor([[0, 0, 100, 100]], dtype=torch.float), + ] + box_tuple = tuple(box_list) + return box_tensor, box_list, box_tuple + + @pytest.mark.parametrize("box_sequence", _get_box_sequences()) + def test_check_roi_boxes_shape(self, box_sequence): + # Ensure common sequences of tensors are supported + ops._utils.check_roi_boxes_shape(box_sequence) + + @pytest.mark.parametrize("box_sequence", _get_box_sequences()) + def test_convert_boxes_to_roi_format(self, box_sequence): + # Ensure common sequences of tensors yield the same result + ref_tensor = None + if ref_tensor is None: + ref_tensor = box_sequence + else: + assert_equal(ref_tensor, ops._utils.convert_boxes_to_roi_format(box_sequence)) + + +class TestBoxConvert: + def test_bbox_same(self): + box_tensor = torch.tensor( + [[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float + ) + + exp_xyxy = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float) + + assert exp_xyxy.size() == torch.Size([4, 4]) + assert_equal(ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xyxy"), exp_xyxy) + assert_equal(ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="xywh"), exp_xyxy) + assert_equal(ops.box_convert(box_tensor, in_fmt="cxcywh", out_fmt="cxcywh"), exp_xyxy) + + def test_bbox_xyxy_xywh(self): + # Simple test convert boxes to xywh and back. Make sure they are same. + # box_tensor is in x1 y1 x2 y2 format. + box_tensor = torch.tensor( + [[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float + ) + exp_xywh = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float) + + assert exp_xywh.size() == torch.Size([4, 4]) + box_xywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xywh") + assert_equal(box_xywh, exp_xywh) + + # Reverse conversion + box_xyxy = ops.box_convert(box_xywh, in_fmt="xywh", out_fmt="xyxy") + assert_equal(box_xyxy, box_tensor) + + def test_bbox_xyxy_cxcywh(self): + # Simple test convert boxes to cxcywh and back. Make sure they are same. + # box_tensor is in x1 y1 x2 y2 format. + box_tensor = torch.tensor( + [[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float + ) + exp_cxcywh = torch.tensor( + [[50, 50, 100, 100], [0, 0, 0, 0], [20, 25, 20, 20], [58, 65, 70, 60]], dtype=torch.float + ) + + assert exp_cxcywh.size() == torch.Size([4, 4]) + box_cxcywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="cxcywh") + assert_equal(box_cxcywh, exp_cxcywh) + + # Reverse conversion + box_xyxy = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xyxy") + assert_equal(box_xyxy, box_tensor) + + def test_bbox_xywh_cxcywh(self): + box_tensor = torch.tensor( + [[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float + ) + + exp_cxcywh = torch.tensor( + [[50, 50, 100, 100], [0, 0, 0, 0], [20, 25, 20, 20], [58, 65, 70, 60]], dtype=torch.float + ) + + assert exp_cxcywh.size() == torch.Size([4, 4]) + box_cxcywh = ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="cxcywh") + assert_equal(box_cxcywh, exp_cxcywh) + + # Reverse conversion + box_xywh = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xywh") + assert_equal(box_xywh, box_tensor) + + @pytest.mark.parametrize("inv_infmt", ["xwyh", "cxwyh"]) + @pytest.mark.parametrize("inv_outfmt", ["xwcx", "xhwcy"]) + def test_bbox_invalid(self, inv_infmt, inv_outfmt): + box_tensor = torch.tensor( + [[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float + ) + + with pytest.raises(ValueError): + ops.box_convert(box_tensor, inv_infmt, inv_outfmt) + + def test_bbox_convert_jit(self): + box_tensor = torch.tensor( + [[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float + ) + + scripted_fn = torch.jit.script(ops.box_convert) + + box_xywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xywh") + scripted_xywh = scripted_fn(box_tensor, "xyxy", "xywh") + torch.testing.assert_close(scripted_xywh, box_xywh) + + box_cxcywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="cxcywh") + scripted_cxcywh = scripted_fn(box_tensor, "xyxy", "cxcywh") + torch.testing.assert_close(scripted_cxcywh, box_cxcywh) + + +class TestBoxArea: + def area_check(self, box, expected, atol=1e-4): + out = ops.box_area(box) + torch.testing.assert_close(out, expected, rtol=0.0, check_dtype=False, atol=atol) + + @pytest.mark.parametrize("dtype", [torch.int8, torch.int16, torch.int32, torch.int64]) + def test_int_boxes(self, dtype): + box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0]], dtype=dtype) + expected = torch.tensor([10000, 0], dtype=torch.int32) + self.area_check(box_tensor, expected) + + @pytest.mark.parametrize("dtype", [torch.float32, torch.float64]) + def test_float_boxes(self, dtype): + box_tensor = torch.tensor(FLOAT_BOXES, dtype=dtype) + expected = torch.tensor([604723.0806, 600965.4666, 592761.0085], dtype=dtype) + self.area_check(box_tensor, expected) + + def test_float16_box(self): + box_tensor = torch.tensor( + [[2.825, 1.8625, 3.90, 4.85], [2.825, 4.875, 19.20, 5.10], [2.925, 1.80, 8.90, 4.90]], dtype=torch.float16 + ) + + expected = torch.tensor([3.2170, 3.7108, 18.5071], dtype=torch.float16) + self.area_check(box_tensor, expected, atol=0.01) + + def test_box_area_jit(self): + box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0]], dtype=torch.float) + expected = ops.box_area(box_tensor) + scripted_fn = torch.jit.script(ops.box_area) + scripted_area = scripted_fn(box_tensor) + torch.testing.assert_close(scripted_area, expected) + + +INT_BOXES = [[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300], [0, 0, 25, 25]] +INT_BOXES2 = [[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]] +FLOAT_BOXES = [ + [285.3538, 185.5758, 1193.5110, 851.4551], + [285.1472, 188.7374, 1192.4984, 851.0669], + [279.2440, 197.9812, 1189.4746, 849.2019], +] + + +def gen_box(size, dtype=torch.float): + xy1 = torch.rand((size, 2), dtype=dtype) + xy2 = xy1 + torch.rand((size, 2), dtype=dtype) + return torch.cat([xy1, xy2], axis=-1) + + +class TestIouBase: + @staticmethod + def _run_test(target_fn: Callable, actual_box1, actual_box2, dtypes, atol, expected): + for dtype in dtypes: + actual_box1 = torch.tensor(actual_box1, dtype=dtype) + actual_box2 = torch.tensor(actual_box2, dtype=dtype) + expected_box = torch.tensor(expected) + out = target_fn(actual_box1, actual_box2) + torch.testing.assert_close(out, expected_box, rtol=0.0, check_dtype=False, atol=atol) + + @staticmethod + def _run_jit_test(target_fn: Callable, actual_box: List): + box_tensor = torch.tensor(actual_box, dtype=torch.float) + expected = target_fn(box_tensor, box_tensor) + scripted_fn = torch.jit.script(target_fn) + scripted_out = scripted_fn(box_tensor, box_tensor) + torch.testing.assert_close(scripted_out, expected) + + @staticmethod + def _cartesian_product(boxes1, boxes2, target_fn: Callable): + N = boxes1.size(0) + M = boxes2.size(0) + result = torch.zeros((N, M)) + for i in range(N): + for j in range(M): + result[i, j] = target_fn(boxes1[i].unsqueeze(0), boxes2[j].unsqueeze(0)) + return result + + @staticmethod + def _run_cartesian_test(target_fn: Callable): + boxes1 = gen_box(5) + boxes2 = gen_box(7) + a = TestIouBase._cartesian_product(boxes1, boxes2, target_fn) + b = target_fn(boxes1, boxes2) + torch.testing.assert_close(a, b) + + +class TestBoxIou(TestIouBase): + int_expected = [[1.0, 0.25, 0.0], [0.25, 1.0, 0.0], [0.0, 0.0, 1.0], [0.0625, 0.25, 0.0]] + float_expected = [[1.0, 0.9933, 0.9673], [0.9933, 1.0, 0.9737], [0.9673, 0.9737, 1.0]] + + @pytest.mark.parametrize( + "actual_box1, actual_box2, dtypes, atol, expected", + [ + pytest.param(INT_BOXES, INT_BOXES2, [torch.int16, torch.int32, torch.int64], 1e-4, int_expected), + pytest.param(FLOAT_BOXES, FLOAT_BOXES, [torch.float16], 0.002, float_expected), + pytest.param(FLOAT_BOXES, FLOAT_BOXES, [torch.float32, torch.float64], 1e-3, float_expected), + ], + ) + def test_iou(self, actual_box1, actual_box2, dtypes, atol, expected): + self._run_test(ops.box_iou, actual_box1, actual_box2, dtypes, atol, expected) + + def test_iou_jit(self): + self._run_jit_test(ops.box_iou, INT_BOXES) + + def test_iou_cartesian(self): + self._run_cartesian_test(ops.box_iou) + + +class TestGeneralizedBoxIou(TestIouBase): + int_expected = [[1.0, 0.25, -0.7778], [0.25, 1.0, -0.8611], [-0.7778, -0.8611, 1.0], [0.0625, 0.25, -0.8819]] + float_expected = [[1.0, 0.9933, 0.9673], [0.9933, 1.0, 0.9737], [0.9673, 0.9737, 1.0]] + + @pytest.mark.parametrize( + "actual_box1, actual_box2, dtypes, atol, expected", + [ + pytest.param(INT_BOXES, INT_BOXES2, [torch.int16, torch.int32, torch.int64], 1e-4, int_expected), + pytest.param(FLOAT_BOXES, FLOAT_BOXES, [torch.float16], 0.002, float_expected), + pytest.param(FLOAT_BOXES, FLOAT_BOXES, [torch.float32, torch.float64], 1e-3, float_expected), + ], + ) + def test_iou(self, actual_box1, actual_box2, dtypes, atol, expected): + self._run_test(ops.generalized_box_iou, actual_box1, actual_box2, dtypes, atol, expected) + + def test_iou_jit(self): + self._run_jit_test(ops.generalized_box_iou, INT_BOXES) + + def test_iou_cartesian(self): + self._run_cartesian_test(ops.generalized_box_iou) + + +class TestDistanceBoxIoU(TestIouBase): + int_expected = [ + [1.0000, 0.1875, -0.4444], + [0.1875, 1.0000, -0.5625], + [-0.4444, -0.5625, 1.0000], + [-0.0781, 0.1875, -0.6267], + ] + float_expected = [[1.0, 0.9933, 0.9673], [0.9933, 1.0, 0.9737], [0.9673, 0.9737, 1.0]] + + @pytest.mark.parametrize( + "actual_box1, actual_box2, dtypes, atol, expected", + [ + pytest.param(INT_BOXES, INT_BOXES2, [torch.int16, torch.int32, torch.int64], 1e-4, int_expected), + pytest.param(FLOAT_BOXES, FLOAT_BOXES, [torch.float16], 0.002, float_expected), + pytest.param(FLOAT_BOXES, FLOAT_BOXES, [torch.float32, torch.float64], 1e-3, float_expected), + ], + ) + def test_iou(self, actual_box1, actual_box2, dtypes, atol, expected): + self._run_test(ops.distance_box_iou, actual_box1, actual_box2, dtypes, atol, expected) + + def test_iou_jit(self): + self._run_jit_test(ops.distance_box_iou, INT_BOXES) + + def test_iou_cartesian(self): + self._run_cartesian_test(ops.distance_box_iou) + + +class TestCompleteBoxIou(TestIouBase): + int_expected = [ + [1.0000, 0.1875, -0.4444], + [0.1875, 1.0000, -0.5625], + [-0.4444, -0.5625, 1.0000], + [-0.0781, 0.1875, -0.6267], + ] + float_expected = [[1.0, 0.9933, 0.9673], [0.9933, 1.0, 0.9737], [0.9673, 0.9737, 1.0]] + + @pytest.mark.parametrize( + "actual_box1, actual_box2, dtypes, atol, expected", + [ + pytest.param(INT_BOXES, INT_BOXES2, [torch.int16, torch.int32, torch.int64], 1e-4, int_expected), + pytest.param(FLOAT_BOXES, FLOAT_BOXES, [torch.float16], 0.002, float_expected), + pytest.param(FLOAT_BOXES, FLOAT_BOXES, [torch.float32, torch.float64], 1e-3, float_expected), + ], + ) + def test_iou(self, actual_box1, actual_box2, dtypes, atol, expected): + self._run_test(ops.complete_box_iou, actual_box1, actual_box2, dtypes, atol, expected) + + def test_iou_jit(self): + self._run_jit_test(ops.complete_box_iou, INT_BOXES) + + def test_iou_cartesian(self): + self._run_cartesian_test(ops.complete_box_iou) + + +def get_boxes(dtype, device): + box1 = torch.tensor([-1, -1, 1, 1], dtype=dtype, device=device) + box2 = torch.tensor([0, 0, 1, 1], dtype=dtype, device=device) + box3 = torch.tensor([0, 1, 1, 2], dtype=dtype, device=device) + box4 = torch.tensor([1, 1, 2, 2], dtype=dtype, device=device) + + box1s = torch.stack([box2, box2], dim=0) + box2s = torch.stack([box3, box4], dim=0) + + return box1, box2, box3, box4, box1s, box2s + + +def assert_iou_loss(iou_fn, box1, box2, expected_loss, device, reduction="none"): + computed_loss = iou_fn(box1, box2, reduction=reduction) + expected_loss = torch.tensor(expected_loss, device=device) + torch.testing.assert_close(computed_loss, expected_loss) + + +def assert_empty_loss(iou_fn, dtype, device): + box1 = torch.randn([0, 4], dtype=dtype, device=device).requires_grad_() + box2 = torch.randn([0, 4], dtype=dtype, device=device).requires_grad_() + loss = iou_fn(box1, box2, reduction="mean") + loss.backward() + torch.testing.assert_close(loss, torch.tensor(0.0, device=device)) + assert box1.grad is not None, "box1.grad should not be None after backward is called" + assert box2.grad is not None, "box2.grad should not be None after backward is called" + loss = iou_fn(box1, box2, reduction="none") + assert loss.numel() == 0, f"{str(iou_fn)} for two empty box should be empty" + + +class TestGeneralizedBoxIouLoss: + # We refer to original test: https://github.com/facebookresearch/fvcore/blob/main/tests/test_giou_loss.py + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("dtype", [torch.float32, torch.half]) + def test_giou_loss(self, dtype, device): + box1, box2, box3, box4, box1s, box2s = get_boxes(dtype, device) + + # Identical boxes should have loss of 0 + assert_iou_loss(ops.generalized_box_iou_loss, box1, box1, 0.0, device=device) + + # quarter size box inside other box = IoU of 0.25 + assert_iou_loss(ops.generalized_box_iou_loss, box1, box2, 0.75, device=device) + + # Two side by side boxes, area=union + # IoU=0 and GIoU=0 (loss 1.0) + assert_iou_loss(ops.generalized_box_iou_loss, box2, box3, 1.0, device=device) + + # Two diagonally adjacent boxes, area=2*union + # IoU=0 and GIoU=-0.5 (loss 1.5) + assert_iou_loss(ops.generalized_box_iou_loss, box2, box4, 1.5, device=device) + + # Test batched loss and reductions + assert_iou_loss(ops.generalized_box_iou_loss, box1s, box2s, 2.5, device=device, reduction="sum") + assert_iou_loss(ops.generalized_box_iou_loss, box1s, box2s, 1.25, device=device, reduction="mean") + + # Test reduction value + # reduction value other than ["none", "mean", "sum"] should raise a ValueError + with pytest.raises(ValueError, match="Invalid"): + ops.generalized_box_iou_loss(box1s, box2s, reduction="xyz") + + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("dtype", [torch.float32, torch.half]) + def test_empty_inputs(self, dtype, device): + assert_empty_loss(ops.generalized_box_iou_loss, dtype, device) + + +class TestCompleteBoxIouLoss: + @pytest.mark.parametrize("dtype", [torch.float32, torch.half]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_ciou_loss(self, dtype, device): + box1, box2, box3, box4, box1s, box2s = get_boxes(dtype, device) + + assert_iou_loss(ops.complete_box_iou_loss, box1, box1, 0.0, device=device) + assert_iou_loss(ops.complete_box_iou_loss, box1, box2, 0.8125, device=device) + assert_iou_loss(ops.complete_box_iou_loss, box1, box3, 1.1923, device=device) + assert_iou_loss(ops.complete_box_iou_loss, box1, box4, 1.2500, device=device) + assert_iou_loss(ops.complete_box_iou_loss, box1s, box2s, 1.2250, device=device, reduction="mean") + assert_iou_loss(ops.complete_box_iou_loss, box1s, box2s, 2.4500, device=device, reduction="sum") + + with pytest.raises(ValueError, match="Invalid"): + ops.complete_box_iou_loss(box1s, box2s, reduction="xyz") + + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("dtype", [torch.float32, torch.half]) + def test_empty_inputs(self, dtype, device): + assert_empty_loss(ops.complete_box_iou_loss, dtype, device) + + +class TestDistanceBoxIouLoss: + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("dtype", [torch.float32, torch.half]) + def test_distance_iou_loss(self, dtype, device): + box1, box2, box3, box4, box1s, box2s = get_boxes(dtype, device) + + assert_iou_loss(ops.distance_box_iou_loss, box1, box1, 0.0, device=device) + assert_iou_loss(ops.distance_box_iou_loss, box1, box2, 0.8125, device=device) + assert_iou_loss(ops.distance_box_iou_loss, box1, box3, 1.1923, device=device) + assert_iou_loss(ops.distance_box_iou_loss, box1, box4, 1.2500, device=device) + assert_iou_loss(ops.distance_box_iou_loss, box1s, box2s, 1.2250, device=device, reduction="mean") + assert_iou_loss(ops.distance_box_iou_loss, box1s, box2s, 2.4500, device=device, reduction="sum") + + with pytest.raises(ValueError, match="Invalid"): + ops.distance_box_iou_loss(box1s, box2s, reduction="xyz") + + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("dtype", [torch.float32, torch.half]) + def test_empty_distance_iou_inputs(self, dtype, device): + assert_empty_loss(ops.distance_box_iou_loss, dtype, device) + + +class TestFocalLoss: + def _generate_diverse_input_target_pair(self, shape=(5, 2), **kwargs): + def logit(p): + return torch.log(p / (1 - p)) + + def generate_tensor_with_range_type(shape, range_type, **kwargs): + if range_type != "random_binary": + low, high = { + "small": (0.0, 0.2), + "big": (0.8, 1.0), + "zeros": (0.0, 0.0), + "ones": (1.0, 1.0), + "random": (0.0, 1.0), + }[range_type] + return torch.testing.make_tensor(shape, low=low, high=high, **kwargs) + else: + return torch.randint(0, 2, shape, **kwargs) + + # This function will return inputs and targets with shape: (shape[0]*9, shape[1]) + inputs = [] + targets = [] + for input_range_type, target_range_type in [ + ("small", "zeros"), + ("small", "ones"), + ("small", "random_binary"), + ("big", "zeros"), + ("big", "ones"), + ("big", "random_binary"), + ("random", "zeros"), + ("random", "ones"), + ("random", "random_binary"), + ]: + inputs.append(logit(generate_tensor_with_range_type(shape, input_range_type, **kwargs))) + targets.append(generate_tensor_with_range_type(shape, target_range_type, **kwargs)) + + return torch.cat(inputs), torch.cat(targets) + + @pytest.mark.parametrize("alpha", [-1.0, 0.0, 0.58, 1.0]) + @pytest.mark.parametrize("gamma", [0, 2]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("dtype", [torch.float32, torch.half]) + @pytest.mark.parametrize("seed", [0, 1]) + def test_correct_ratio(self, alpha, gamma, device, dtype, seed): + if device == "cpu" and dtype is torch.half: + pytest.skip("Currently torch.half is not fully supported on cpu") + # For testing the ratio with manual calculation, we require the reduction to be "none" + reduction = "none" + torch.random.manual_seed(seed) + inputs, targets = self._generate_diverse_input_target_pair(dtype=dtype, device=device) + focal_loss = ops.sigmoid_focal_loss(inputs, targets, gamma=gamma, alpha=alpha, reduction=reduction) + ce_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction=reduction) + + assert torch.all( + focal_loss <= ce_loss + ), "focal loss must be less or equal to cross entropy loss with same input" + + loss_ratio = (focal_loss / ce_loss).squeeze() + prob = torch.sigmoid(inputs) + p_t = prob * targets + (1 - prob) * (1 - targets) + correct_ratio = (1.0 - p_t) ** gamma + if alpha >= 0: + alpha_t = alpha * targets + (1 - alpha) * (1 - targets) + correct_ratio = correct_ratio * alpha_t + + tol = 1e-3 if dtype is torch.half else 1e-5 + torch.testing.assert_close(correct_ratio, loss_ratio, atol=tol, rtol=tol) + + @pytest.mark.parametrize("reduction", ["mean", "sum"]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("dtype", [torch.float32, torch.half]) + @pytest.mark.parametrize("seed", [2, 3]) + def test_equal_ce_loss(self, reduction, device, dtype, seed): + if device == "cpu" and dtype is torch.half: + pytest.skip("Currently torch.half is not fully supported on cpu") + # focal loss should be equal ce_loss if alpha=-1 and gamma=0 + alpha = -1 + gamma = 0 + torch.random.manual_seed(seed) + inputs, targets = self._generate_diverse_input_target_pair(dtype=dtype, device=device) + inputs_fl = inputs.clone().requires_grad_() + targets_fl = targets.clone() + inputs_ce = inputs.clone().requires_grad_() + targets_ce = targets.clone() + focal_loss = ops.sigmoid_focal_loss(inputs_fl, targets_fl, gamma=gamma, alpha=alpha, reduction=reduction) + ce_loss = F.binary_cross_entropy_with_logits(inputs_ce, targets_ce, reduction=reduction) + + torch.testing.assert_close(focal_loss, ce_loss) + + focal_loss.backward() + ce_loss.backward() + torch.testing.assert_close(inputs_fl.grad, inputs_ce.grad) + + @pytest.mark.parametrize("alpha", [-1.0, 0.0, 0.58, 1.0]) + @pytest.mark.parametrize("gamma", [0, 2]) + @pytest.mark.parametrize("reduction", ["none", "mean", "sum"]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("dtype", [torch.float32, torch.half]) + @pytest.mark.parametrize("seed", [4, 5]) + def test_jit(self, alpha, gamma, reduction, device, dtype, seed): + if device == "cpu" and dtype is torch.half: + pytest.skip("Currently torch.half is not fully supported on cpu") + script_fn = torch.jit.script(ops.sigmoid_focal_loss) + torch.random.manual_seed(seed) + inputs, targets = self._generate_diverse_input_target_pair(dtype=dtype, device=device) + focal_loss = ops.sigmoid_focal_loss(inputs, targets, gamma=gamma, alpha=alpha, reduction=reduction) + scripted_focal_loss = script_fn(inputs, targets, gamma=gamma, alpha=alpha, reduction=reduction) + + tol = 1e-3 if dtype is torch.half else 1e-5 + torch.testing.assert_close(focal_loss, scripted_focal_loss, rtol=tol, atol=tol) + + # Raise ValueError for anonymous reduction mode + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("dtype", [torch.float32, torch.half]) + def test_reduction_mode(self, device, dtype, reduction="xyz"): + if device == "cpu" and dtype is torch.half: + pytest.skip("Currently torch.half is not fully supported on cpu") + torch.random.manual_seed(0) + inputs, targets = self._generate_diverse_input_target_pair(device=device, dtype=dtype) + with pytest.raises(ValueError, match="Invalid"): + ops.sigmoid_focal_loss(inputs, targets, 0.25, 2, reduction) + + +class TestMasksToBoxes: + def test_masks_box(self): + def masks_box_check(masks, expected, atol=1e-4): + out = ops.masks_to_boxes(masks) + assert out.dtype == torch.float + torch.testing.assert_close(out, expected, rtol=0.0, check_dtype=True, atol=atol) + + # Check for int type boxes. + def _get_image(): + assets_directory = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets") + mask_path = os.path.join(assets_directory, "masks.tiff") + image = Image.open(mask_path) + return image + + def _create_masks(image, masks): + for index in range(image.n_frames): + image.seek(index) + frame = np.array(image) + masks[index] = torch.tensor(frame) + + return masks + + expected = torch.tensor( + [ + [127, 2, 165, 40], + [2, 50, 44, 92], + [56, 63, 98, 100], + [139, 68, 175, 104], + [160, 112, 198, 145], + [49, 138, 99, 182], + [108, 148, 152, 213], + ], + dtype=torch.float, + ) + + image = _get_image() + for dtype in [torch.float16, torch.float32, torch.float64]: + masks = torch.zeros((image.n_frames, image.height, image.width), dtype=dtype) + masks = _create_masks(image, masks) + masks_box_check(masks, expected) + + +class TestStochasticDepth: + @pytest.mark.parametrize("seed", range(10)) + @pytest.mark.parametrize("p", [0.2, 0.5, 0.8]) + @pytest.mark.parametrize("mode", ["batch", "row"]) + def test_stochastic_depth_random(self, seed, mode, p): + torch.manual_seed(seed) + stats = pytest.importorskip("scipy.stats") + batch_size = 5 + x = torch.ones(size=(batch_size, 3, 4, 4)) + layer = ops.StochasticDepth(p=p, mode=mode) + layer.__repr__() + + trials = 250 + num_samples = 0 + counts = 0 + for _ in range(trials): + out = layer(x) + non_zero_count = out.sum(dim=(1, 2, 3)).nonzero().size(0) + if mode == "batch": + if non_zero_count == 0: + counts += 1 + num_samples += 1 + elif mode == "row": + counts += batch_size - non_zero_count + num_samples += batch_size + + p_value = stats.binomtest(counts, num_samples, p=p).pvalue + assert p_value > 0.01 + + @pytest.mark.parametrize("seed", range(10)) + @pytest.mark.parametrize("p", (0, 1)) + @pytest.mark.parametrize("mode", ["batch", "row"]) + def test_stochastic_depth(self, seed, mode, p): + torch.manual_seed(seed) + batch_size = 5 + x = torch.ones(size=(batch_size, 3, 4, 4)) + layer = ops.StochasticDepth(p=p, mode=mode) + + out = layer(x) + if p == 0: + assert out.equal(x) + elif p == 1: + assert out.equal(torch.zeros_like(x)) + + def make_obj(self, p, mode, wrap=False): + obj = ops.StochasticDepth(p, mode) + return StochasticDepthWrapper(obj) if wrap else obj + + @pytest.mark.parametrize("p", (0, 1)) + @pytest.mark.parametrize("mode", ["batch", "row"]) + def test_is_leaf_node(self, p, mode): + op_obj = self.make_obj(p, mode, wrap=True) + graph_node_names = get_graph_node_names(op_obj) + + assert len(graph_node_names) == 2 + assert len(graph_node_names[0]) == len(graph_node_names[1]) + assert len(graph_node_names[0]) == 1 + op_obj.n_inputs + + +class TestUtils: + @pytest.mark.parametrize("norm_layer", [None, nn.BatchNorm2d, nn.LayerNorm]) + def test_split_normalization_params(self, norm_layer): + model = models.mobilenet_v3_large(norm_layer=norm_layer) + params = ops._utils.split_normalization_params(model, None if norm_layer is None else [norm_layer]) + + assert len(params[0]) == 92 + assert len(params[1]) == 82 + + +class TestDropBlock: + @pytest.mark.parametrize("seed", range(10)) + @pytest.mark.parametrize("dim", [2, 3]) + @pytest.mark.parametrize("p", [0, 0.5]) + @pytest.mark.parametrize("block_size", [5, 11]) + @pytest.mark.parametrize("inplace", [True, False]) + def test_drop_block(self, seed, dim, p, block_size, inplace): + torch.manual_seed(seed) + batch_size = 5 + channels = 3 + height = 11 + width = height + depth = height + if dim == 2: + x = torch.ones(size=(batch_size, channels, height, width)) + layer = ops.DropBlock2d(p=p, block_size=block_size, inplace=inplace) + feature_size = height * width + elif dim == 3: + x = torch.ones(size=(batch_size, channels, depth, height, width)) + layer = ops.DropBlock3d(p=p, block_size=block_size, inplace=inplace) + feature_size = depth * height * width + layer.__repr__() + + out = layer(x) + if p == 0: + assert out.equal(x) + if block_size == height: + for b, c in product(range(batch_size), range(channels)): + assert out[b, c].count_nonzero() in (0, feature_size) + + @pytest.mark.parametrize("seed", range(10)) + @pytest.mark.parametrize("dim", [2, 3]) + @pytest.mark.parametrize("p", [0.1, 0.2]) + @pytest.mark.parametrize("block_size", [3]) + @pytest.mark.parametrize("inplace", [False]) + def test_drop_block_random(self, seed, dim, p, block_size, inplace): + torch.manual_seed(seed) + batch_size = 5 + channels = 3 + height = 11 + width = height + depth = height + if dim == 2: + x = torch.ones(size=(batch_size, channels, height, width)) + layer = ops.DropBlock2d(p=p, block_size=block_size, inplace=inplace) + elif dim == 3: + x = torch.ones(size=(batch_size, channels, depth, height, width)) + layer = ops.DropBlock3d(p=p, block_size=block_size, inplace=inplace) + + trials = 250 + num_samples = 0 + counts = 0 + cell_numel = torch.tensor(x.shape).prod() + for _ in range(trials): + with torch.no_grad(): + out = layer(x) + non_zero_count = out.nonzero().size(0) + counts += cell_numel - non_zero_count + num_samples += cell_numel + + assert abs(p - counts / num_samples) / p < 0.15 + + def make_obj(self, dim, p, block_size, inplace, wrap=False): + if dim == 2: + obj = ops.DropBlock2d(p, block_size, inplace) + elif dim == 3: + obj = ops.DropBlock3d(p, block_size, inplace) + return DropBlockWrapper(obj) if wrap else obj + + @pytest.mark.parametrize("dim", (2, 3)) + @pytest.mark.parametrize("p", [0, 1]) + @pytest.mark.parametrize("block_size", [5, 7]) + @pytest.mark.parametrize("inplace", [True, False]) + def test_is_leaf_node(self, dim, p, block_size, inplace): + op_obj = self.make_obj(dim, p, block_size, inplace, wrap=True) + graph_node_names = get_graph_node_names(op_obj) + + assert len(graph_node_names) == 2 + assert len(graph_node_names[0]) == len(graph_node_names[1]) + assert len(graph_node_names[0]) == 1 + op_obj.n_inputs + + +if __name__ == "__main__": + pytest.main([__file__]) diff --git a/test/test_prototype_datasets_builtin.py b/test/test_prototype_datasets_builtin.py new file mode 100644 index 00000000000..5f8fc90debf --- /dev/null +++ b/test/test_prototype_datasets_builtin.py @@ -0,0 +1,282 @@ +import io +import pickle +from collections import deque +from pathlib import Path + +import pytest +import torch +import torchvision.transforms.v2 as transforms + +from builtin_dataset_mocks import DATASET_MOCKS, parametrize_dataset_mocks +from torch.testing._comparison import not_close_error_metas, ObjectPair, TensorLikePair + +# TODO: replace with torchdata.dataloader2.DataLoader2 as soon as it is stable-ish +from torch.utils.data import DataLoader + +# TODO: replace with torchdata equivalent as soon as it is available +from torch.utils.data.graph_settings import get_all_graph_pipes + +from torchdata.dataloader2.graph.utils import traverse_dps +from torchdata.datapipes.iter import ShardingFilter, Shuffler +from torchdata.datapipes.utils import StreamWrapper +from torchvision import tv_tensors +from torchvision._utils import sequence_to_str +from torchvision.prototype import datasets +from torchvision.prototype.datasets.utils import EncodedImage +from torchvision.prototype.datasets.utils._internal import INFINITE_BUFFER_SIZE +from torchvision.prototype.tv_tensors import Label +from torchvision.transforms.v2._utils import is_pure_tensor + + +def assert_samples_equal(*args, msg=None, **kwargs): + error_metas = not_close_error_metas( + *args, pair_types=(TensorLikePair, ObjectPair), rtol=0, atol=0, equal_nan=True, **kwargs + ) + if error_metas: + raise error_metas[0].to_error(msg) + + +def extract_datapipes(dp): + return get_all_graph_pipes(traverse_dps(dp)) + + +def consume(iterator): + # Copied from the official itertools recipes: https://docs.python.org/3/library/itertools.html#itertools-recipes + deque(iterator, maxlen=0) + + +def next_consume(iterator): + item = next(iterator) + consume(iterator) + return item + + +@pytest.fixture(autouse=True) +def test_home(mocker, tmp_path): + mocker.patch("torchvision.prototype.datasets._api.home", return_value=str(tmp_path)) + mocker.patch("torchvision.prototype.datasets.home", return_value=str(tmp_path)) + yield tmp_path + + +def test_coverage(): + untested_datasets = set(datasets.list_datasets()) - DATASET_MOCKS.keys() + if untested_datasets: + raise AssertionError( + f"The dataset(s) {sequence_to_str(sorted(untested_datasets), separate_last='and ')} " + f"are exposed through `torchvision.prototype.datasets.load()`, but are not tested. " + f"Please add mock data to `test/builtin_dataset_mocks.py`." + ) + + +@pytest.mark.filterwarnings("error") +class TestCommon: + @pytest.mark.parametrize("name", datasets.list_datasets()) + def test_info(self, name): + try: + info = datasets.info(name) + except ValueError: + raise AssertionError("No info available.") from None + + if not (isinstance(info, dict) and all(isinstance(key, str) for key in info.keys())): + raise AssertionError("Info should be a dictionary with string keys.") + + @parametrize_dataset_mocks(DATASET_MOCKS) + def test_smoke(self, dataset_mock, config): + dataset, _ = dataset_mock.load(config) + + if not isinstance(dataset, datasets.utils.Dataset): + raise AssertionError(f"Loading the dataset should return an Dataset, but got {type(dataset)} instead.") + + @parametrize_dataset_mocks(DATASET_MOCKS) + def test_sample(self, dataset_mock, config): + dataset, _ = dataset_mock.load(config) + + try: + sample = next_consume(iter(dataset)) + except StopIteration: + raise AssertionError("Unable to draw any sample.") from None + except Exception as error: + raise AssertionError("Drawing a sample raised the error above.") from error + + if not isinstance(sample, dict): + raise AssertionError(f"Samples should be dictionaries, but got {type(sample)} instead.") + + if not sample: + raise AssertionError("Sample dictionary is empty.") + + @parametrize_dataset_mocks(DATASET_MOCKS) + def test_num_samples(self, dataset_mock, config): + dataset, mock_info = dataset_mock.load(config) + + assert len(list(dataset)) == mock_info["num_samples"] + + @pytest.fixture + def log_session_streams(self): + debug_unclosed_streams = StreamWrapper.debug_unclosed_streams + try: + StreamWrapper.debug_unclosed_streams = True + yield + finally: + StreamWrapper.debug_unclosed_streams = debug_unclosed_streams + + @parametrize_dataset_mocks(DATASET_MOCKS) + def test_stream_closing(self, log_session_streams, dataset_mock, config): + def make_msg_and_close(head): + unclosed_streams = [] + for stream in list(StreamWrapper.session_streams.keys()): + unclosed_streams.append(repr(stream.file_obj)) + stream.close() + unclosed_streams = "\n".join(unclosed_streams) + return f"{head}\n\n{unclosed_streams}" + + if StreamWrapper.session_streams: + raise pytest.UsageError(make_msg_and_close("A previous test did not close the following streams:")) + + dataset, _ = dataset_mock.load(config) + + consume(iter(dataset)) + + if StreamWrapper.session_streams: + raise AssertionError(make_msg_and_close("The following streams were not closed after a full iteration:")) + + @parametrize_dataset_mocks(DATASET_MOCKS) + def test_no_unaccompanied_pure_tensors(self, dataset_mock, config): + dataset, _ = dataset_mock.load(config) + sample = next_consume(iter(dataset)) + + pure_tensors = {key for key, value in sample.items() if is_pure_tensor(value)} + + if pure_tensors and not any( + isinstance(item, (tv_tensors.Image, tv_tensors.Video, EncodedImage)) for item in sample.values() + ): + raise AssertionError( + f"The values of key(s) " + f"{sequence_to_str(sorted(pure_tensors), separate_last='and ')} contained pure tensors, " + f"but didn't find any (encoded) image or video." + ) + + @parametrize_dataset_mocks(DATASET_MOCKS) + def test_transformable(self, dataset_mock, config): + dataset, _ = dataset_mock.load(config) + + dataset = dataset.map(transforms.Identity()) + + consume(iter(dataset)) + + @parametrize_dataset_mocks(DATASET_MOCKS) + def test_traversable(self, dataset_mock, config): + dataset, _ = dataset_mock.load(config) + + traverse_dps(dataset) + + @parametrize_dataset_mocks(DATASET_MOCKS) + def test_serializable(self, dataset_mock, config): + dataset, _ = dataset_mock.load(config) + + pickle.dumps(dataset) + + # This has to be a proper function, since lambda's or local functions + # cannot be pickled, but this is a requirement for the DataLoader with + # multiprocessing, i.e. num_workers > 0 + def _collate_fn(self, batch): + return batch + + @pytest.mark.parametrize("num_workers", [0, 1]) + @parametrize_dataset_mocks(DATASET_MOCKS) + def test_data_loader(self, dataset_mock, config, num_workers): + dataset, _ = dataset_mock.load(config) + + dl = DataLoader( + dataset, + batch_size=2, + num_workers=num_workers, + collate_fn=self._collate_fn, + ) + + consume(dl) + + # TODO: we need to enforce not only that both a Shuffler and a ShardingFilter are part of the datapipe, but also + # that the Shuffler comes before the ShardingFilter. Early commits in https://github.com/pytorch/vision/pull/5680 + # contain a custom test for that, but we opted to wait for a potential solution / test from torchdata for now. + @parametrize_dataset_mocks(DATASET_MOCKS) + @pytest.mark.parametrize("annotation_dp_type", (Shuffler, ShardingFilter)) + def test_has_annotations(self, dataset_mock, config, annotation_dp_type): + dataset, _ = dataset_mock.load(config) + + if not any(isinstance(dp, annotation_dp_type) for dp in extract_datapipes(dataset)): + raise AssertionError(f"The dataset doesn't contain a {annotation_dp_type.__name__}() datapipe.") + + @parametrize_dataset_mocks(DATASET_MOCKS) + def test_save_load(self, dataset_mock, config): + dataset, _ = dataset_mock.load(config) + + sample = next_consume(iter(dataset)) + + with io.BytesIO() as buffer: + torch.save(sample, buffer) + buffer.seek(0) + assert_samples_equal(torch.load(buffer, weights_only=True), sample) + + @parametrize_dataset_mocks(DATASET_MOCKS) + def test_infinite_buffer_size(self, dataset_mock, config): + dataset, _ = dataset_mock.load(config) + + for dp in extract_datapipes(dataset): + if hasattr(dp, "buffer_size"): + # TODO: replace this with the proper sentinel as soon as https://github.com/pytorch/data/issues/335 is + # resolved + assert dp.buffer_size == INFINITE_BUFFER_SIZE + + @parametrize_dataset_mocks(DATASET_MOCKS) + def test_has_length(self, dataset_mock, config): + dataset, _ = dataset_mock.load(config) + + assert len(dataset) > 0 + + +@parametrize_dataset_mocks(DATASET_MOCKS["qmnist"]) +class TestQMNIST: + def test_extra_label(self, dataset_mock, config): + dataset, _ = dataset_mock.load(config) + + sample = next_consume(iter(dataset)) + for key, type in ( + ("nist_hsf_series", int), + ("nist_writer_id", int), + ("digit_index", int), + ("nist_label", int), + ("global_digit_index", int), + ("duplicate", bool), + ("unused", bool), + ): + assert key in sample and isinstance(sample[key], type) + + +@parametrize_dataset_mocks(DATASET_MOCKS["gtsrb"]) +class TestGTSRB: + def test_label_matches_path(self, dataset_mock, config): + # We read the labels from the csv files instead. But for the trainset, the labels are also part of the path. + # This test makes sure that they're both the same + if config["split"] != "train": + return + + dataset, _ = dataset_mock.load(config) + + for sample in dataset: + label_from_path = int(Path(sample["path"]).parent.name) + assert sample["label"] == label_from_path + + +@parametrize_dataset_mocks(DATASET_MOCKS["usps"]) +class TestUSPS: + def test_sample_content(self, dataset_mock, config): + dataset, _ = dataset_mock.load(config) + + for sample in dataset: + assert "image" in sample + assert "label" in sample + + assert isinstance(sample["image"], tv_tensors.Image) + assert isinstance(sample["label"], Label) + + assert sample["image"].shape == (1, 16, 16) diff --git a/test/test_prototype_datasets_utils.py b/test/test_prototype_datasets_utils.py new file mode 100644 index 00000000000..2098ac736ac --- /dev/null +++ b/test/test_prototype_datasets_utils.py @@ -0,0 +1,302 @@ +import gzip +import pathlib +import sys + +import numpy as np +import pytest +import torch +from datasets_utils import make_fake_flo_file, make_tar +from torchdata.datapipes.iter import FileOpener, TarArchiveLoader +from torchvision.datasets._optical_flow import _read_flo as read_flo_ref +from torchvision.datasets.utils import _decompress +from torchvision.prototype.datasets.utils import Dataset, GDriveResource, HttpResource, OnlineResource +from torchvision.prototype.datasets.utils._internal import fromfile, read_flo + + +@pytest.mark.filterwarnings("error:The given NumPy array is not writeable:UserWarning") +@pytest.mark.parametrize( + ("np_dtype", "torch_dtype", "byte_order"), + [ + (">f4", torch.float32, "big"), + ("i8", torch.int64, "big"), + ("|u1", torch.uint8, sys.byteorder), + ], +) +@pytest.mark.parametrize("count", (-1, 2)) +@pytest.mark.parametrize("mode", ("rb", "r+b")) +def test_fromfile(tmpdir, np_dtype, torch_dtype, byte_order, count, mode): + path = tmpdir / "data.bin" + rng = np.random.RandomState(0) + rng.randn(5 if count == -1 else count + 1).astype(np_dtype).tofile(path) + + for count_ in (-1, count // 2): + expected = torch.from_numpy(np.fromfile(path, dtype=np_dtype, count=count_).astype(np_dtype[1:])) + + with open(path, mode) as file: + actual = fromfile(file, dtype=torch_dtype, byte_order=byte_order, count=count_) + + torch.testing.assert_close(actual, expected) + + +def test_read_flo(tmpdir): + path = tmpdir / "test.flo" + make_fake_flo_file(3, 4, path) + + with open(path, "rb") as file: + actual = read_flo(file) + + expected = torch.from_numpy(read_flo_ref(path).astype("f4", copy=False)) + + torch.testing.assert_close(actual, expected) + + +class TestOnlineResource: + class DummyResource(OnlineResource): + def __init__(self, download_fn=None, **kwargs): + super().__init__(**kwargs) + self._download_fn = download_fn + + def _download(self, root): + if self._download_fn is None: + raise pytest.UsageError( + "`_download()` was called, but `DummyResource(...)` was constructed without `download_fn`." + ) + + return self._download_fn(self, root) + + def _make_file(self, root, *, content, name="file.txt"): + file = root / name + with open(file, "w") as fh: + fh.write(content) + + return file + + def _make_folder(self, root, *, name="folder"): + folder = root / name + subfolder = folder / "subfolder" + subfolder.mkdir(parents=True) + + files = {} + for idx, root in enumerate([folder, folder, subfolder]): + content = f"sentinel{idx}" + file = self._make_file(root, name=f"file{idx}.txt", content=content) + files[str(file)] = content + + return folder, files + + def _make_tar(self, root, *, name="archive.tar", remove=True): + folder, files = self._make_folder(root, name=name.split(".")[0]) + archive = make_tar(root, name, folder, remove=remove) + files = {str(archive / pathlib.Path(file).relative_to(root)): content for file, content in files.items()} + return archive, files + + def test_load_file(self, tmp_path): + content = "sentinel" + file = self._make_file(tmp_path, content=content) + + resource = self.DummyResource(file_name=file.name) + + dp = resource.load(tmp_path) + assert isinstance(dp, FileOpener) + + data = list(dp) + assert len(data) == 1 + + path, buffer = data[0] + assert path == str(file) + assert buffer.read().decode() == content + + def test_load_folder(self, tmp_path): + folder, files = self._make_folder(tmp_path) + + resource = self.DummyResource(file_name=folder.name) + + dp = resource.load(tmp_path) + assert isinstance(dp, FileOpener) + assert {path: buffer.read().decode() for path, buffer in dp} == files + + def test_load_archive(self, tmp_path): + archive, files = self._make_tar(tmp_path) + + resource = self.DummyResource(file_name=archive.name) + + dp = resource.load(tmp_path) + assert isinstance(dp, TarArchiveLoader) + assert {path: buffer.read().decode() for path, buffer in dp} == files + + def test_priority_decompressed_gt_raw(self, tmp_path): + # We don't need to actually compress here. Adding the suffix is sufficient + self._make_file(tmp_path, content="raw_sentinel", name="file.txt.gz") + file = self._make_file(tmp_path, content="decompressed_sentinel", name="file.txt") + + resource = self.DummyResource(file_name=file.name) + + dp = resource.load(tmp_path) + path, buffer = next(iter(dp)) + + assert path == str(file) + assert buffer.read().decode() == "decompressed_sentinel" + + def test_priority_extracted_gt_decompressed(self, tmp_path): + archive, _ = self._make_tar(tmp_path, remove=False) + + resource = self.DummyResource(file_name=archive.name) + + dp = resource.load(tmp_path) + # If the archive had been selected, this would be a `TarArchiveReader` + assert isinstance(dp, FileOpener) + + def test_download(self, tmp_path): + download_fn_was_called = False + + def download_fn(resource, root): + nonlocal download_fn_was_called + download_fn_was_called = True + + return self._make_file(root, content="_", name=resource.file_name) + + resource = self.DummyResource( + file_name="file.txt", + download_fn=download_fn, + ) + + resource.load(tmp_path) + + assert download_fn_was_called, "`download_fn()` was never called" + + # This tests the `"decompress"` literal as well as a custom callable + @pytest.mark.parametrize( + "preprocess", + [ + "decompress", + lambda path: _decompress(str(path), remove_finished=True), + ], + ) + def test_preprocess_decompress(self, tmp_path, preprocess): + file_name = "file.txt.gz" + content = "sentinel" + + def download_fn(resource, root): + file = root / resource.file_name + with gzip.open(file, "wb") as fh: + fh.write(content.encode()) + return file + + resource = self.DummyResource(file_name=file_name, preprocess=preprocess, download_fn=download_fn) + + dp = resource.load(tmp_path) + data = list(dp) + assert len(data) == 1 + + path, buffer = data[0] + assert path == str(tmp_path / file_name).replace(".gz", "") + assert buffer.read().decode() == content + + def test_preprocess_extract(self, tmp_path): + files = None + + def download_fn(resource, root): + nonlocal files + archive, files = self._make_tar(root, name=resource.file_name) + return archive + + resource = self.DummyResource(file_name="folder.tar", preprocess="extract", download_fn=download_fn) + + dp = resource.load(tmp_path) + assert files is not None, "`download_fn()` was never called" + assert isinstance(dp, FileOpener) + + actual = {path: buffer.read().decode() for path, buffer in dp} + expected = { + path.replace(resource.file_name, resource.file_name.split(".")[0]): content + for path, content in files.items() + } + assert actual == expected + + def test_preprocess_only_after_download(self, tmp_path): + file = self._make_file(tmp_path, content="_") + + def preprocess(path): + raise AssertionError("`preprocess` was called although the file was already present.") + + resource = self.DummyResource( + file_name=file.name, + preprocess=preprocess, + ) + + resource.load(tmp_path) + + +class TestHttpResource: + def test_resolve_to_http(self, mocker): + file_name = "data.tar" + original_url = f"http://downloads.pytorch.org/{file_name}" + + redirected_url = original_url.replace("http", "https") + + sha256_sentinel = "sha256_sentinel" + + def preprocess_sentinel(path): + return path + + original_resource = HttpResource( + original_url, + sha256=sha256_sentinel, + preprocess=preprocess_sentinel, + ) + + mocker.patch("torchvision.prototype.datasets.utils._resource._get_redirect_url", return_value=redirected_url) + redirected_resource = original_resource.resolve() + + assert isinstance(redirected_resource, HttpResource) + assert redirected_resource.url == redirected_url + assert redirected_resource.file_name == file_name + assert redirected_resource.sha256 == sha256_sentinel + assert redirected_resource._preprocess is preprocess_sentinel + + def test_resolve_to_gdrive(self, mocker): + file_name = "data.tar" + original_url = f"http://downloads.pytorch.org/{file_name}" + + id_sentinel = "id-sentinel" + redirected_url = f"https://drive.google.com/file/d/{id_sentinel}/view" + + sha256_sentinel = "sha256_sentinel" + + def preprocess_sentinel(path): + return path + + original_resource = HttpResource( + original_url, + sha256=sha256_sentinel, + preprocess=preprocess_sentinel, + ) + + mocker.patch("torchvision.prototype.datasets.utils._resource._get_redirect_url", return_value=redirected_url) + redirected_resource = original_resource.resolve() + + assert isinstance(redirected_resource, GDriveResource) + assert redirected_resource.id == id_sentinel + assert redirected_resource.file_name == file_name + assert redirected_resource.sha256 == sha256_sentinel + assert redirected_resource._preprocess is preprocess_sentinel + + +def test_missing_dependency_error(): + class DummyDataset(Dataset): + def __init__(self): + super().__init__(root="root", dependencies=("fake_dependency",)) + + def _resources(self): + pass + + def _datapipe(self, resource_dps): + pass + + def __len__(self): + pass + + with pytest.raises(ModuleNotFoundError, match="depends on the third-party package 'fake_dependency'"): + DummyDataset() diff --git a/test/test_prototype_models.py b/test/test_prototype_models.py new file mode 100644 index 00000000000..d32df68f1f4 --- /dev/null +++ b/test/test_prototype_models.py @@ -0,0 +1,84 @@ +import pytest +import test_models as TM +import torch +from common_utils import cpu_and_cuda, set_rng_seed +from torchvision.prototype import models + + +@pytest.mark.parametrize("model_fn", (models.depth.stereo.raft_stereo_base,)) +@pytest.mark.parametrize("model_mode", ("standard", "scripted")) +@pytest.mark.parametrize("dev", cpu_and_cuda()) +def test_raft_stereo(model_fn, model_mode, dev): + # A simple test to make sure the model can do forward pass and jit scriptable + set_rng_seed(0) + + # Use corr_pyramid and corr_block with smaller num_levels and radius to prevent nan output + # get the idea from test_models.test_raft + corr_pyramid = models.depth.stereo.raft_stereo.CorrPyramid1d(num_levels=2) + corr_block = models.depth.stereo.raft_stereo.CorrBlock1d(num_levels=2, radius=2) + model = model_fn(corr_pyramid=corr_pyramid, corr_block=corr_block).eval().to(dev) + + if model_mode == "scripted": + model = torch.jit.script(model) + + img1 = torch.rand(1, 3, 64, 64).to(dev) + img2 = torch.rand(1, 3, 64, 64).to(dev) + num_iters = 3 + + preds = model(img1, img2, num_iters=num_iters) + depth_pred = preds[-1] + + assert len(preds) == num_iters, "Number of predictions should be the same as model.num_iters" + + assert depth_pred.shape == torch.Size( + [1, 1, 64, 64] + ), f"The output shape of depth_pred should be [1, 1, 64, 64] but instead it is {preds[0].shape}" + + # Test against expected file output + TM._assert_expected(depth_pred, name=model_fn.__name__, atol=1e-2, rtol=1e-2) + + +@pytest.mark.parametrize("model_fn", (models.depth.stereo.crestereo_base,)) +@pytest.mark.parametrize("model_mode", ("standard", "scripted")) +@pytest.mark.parametrize("dev", cpu_and_cuda()) +def test_crestereo(model_fn, model_mode, dev): + set_rng_seed(0) + + model = model_fn().eval().to(dev) + + if model_mode == "scripted": + model = torch.jit.script(model) + + img1 = torch.rand(1, 3, 64, 64).to(dev) + img2 = torch.rand(1, 3, 64, 64).to(dev) + iterations = 3 + + preds = model(img1, img2, flow_init=None, num_iters=iterations) + disparity_pred = preds[-1] + + # all the pyramid levels except the highest res make only half the number of iterations + expected_iterations = (iterations // 2) * (len(model.resolutions) - 1) + expected_iterations += iterations + assert ( + len(preds) == expected_iterations + ), "Number of predictions should be the number of iterations multiplied by the number of pyramid levels" + + assert disparity_pred.shape == torch.Size( + [1, 2, 64, 64] + ), f"Predicted disparity should have the same spatial shape as the input. Inputs shape {img1.shape[2:]}, Prediction shape {disparity_pred.shape[2:]}" + + assert all( + d.shape == torch.Size([1, 2, 64, 64]) for d in preds + ), "All predicted disparities are expected to have the same shape" + + # test a backward pass with a dummy loss as well + preds = torch.stack(preds, dim=0) + targets = torch.ones_like(preds, requires_grad=False) + loss = torch.nn.functional.mse_loss(preds, targets) + + try: + loss.backward() + except Exception as e: + assert False, f"Backward pass failed with an unexpected exception: {e.__class__.__name__} {e}" + + TM._assert_expected(disparity_pred, name=model_fn.__name__, atol=1e-2, rtol=1e-2) diff --git a/test/test_prototype_transforms.py b/test/test_prototype_transforms.py new file mode 100644 index 00000000000..85ef98cf7b8 --- /dev/null +++ b/test/test_prototype_transforms.py @@ -0,0 +1,429 @@ +import collections.abc +import re + +import PIL.Image +import pytest +import torch + +from common_utils import assert_equal, make_bounding_boxes, make_detection_masks, make_image, make_video + +from torchvision.prototype import transforms, tv_tensors +from torchvision.transforms.v2._utils import check_type, is_pure_tensor +from torchvision.transforms.v2.functional import clamp_bounding_boxes, InterpolationMode, pil_to_tensor, to_pil_image + +from torchvision.tv_tensors import BoundingBoxes, BoundingBoxFormat, Image, Mask, Video + + +def _parse_categories(categories): + if categories is None: + num_categories = int(torch.randint(1, 11, ())) + elif isinstance(categories, int): + num_categories = categories + categories = [f"category{idx}" for idx in range(num_categories)] + elif isinstance(categories, collections.abc.Sequence) and all(isinstance(category, str) for category in categories): + categories = list(categories) + num_categories = len(categories) + else: + raise pytest.UsageError( + f"`categories` can either be `None` (default), an integer, or a sequence of strings, " + f"but got '{categories}' instead." + ) + return categories, num_categories + + +def make_label(*, extra_dims=(), categories=10, dtype=torch.int64, device="cpu"): + categories, num_categories = _parse_categories(categories) + # The idiom `make_tensor(..., dtype=torch.int64).to(dtype)` is intentional to only get integer values, + # regardless of the requested dtype, e.g. 0 or 0.0 rather than 0 or 0.123 + data = torch.testing.make_tensor(extra_dims, low=0, high=num_categories, dtype=torch.int64, device=device).to(dtype) + return tv_tensors.Label(data, categories=categories) + + +class TestSimpleCopyPaste: + def create_fake_image(self, mocker, image_type): + if image_type == PIL.Image.Image: + return PIL.Image.new("RGB", (32, 32), 123) + return mocker.MagicMock(spec=image_type) + + def test__extract_image_targets_assertion(self, mocker): + transform = transforms.SimpleCopyPaste() + + flat_sample = [ + # images, batch size = 2 + self.create_fake_image(mocker, Image), + # labels, bboxes, masks + mocker.MagicMock(spec=tv_tensors.Label), + mocker.MagicMock(spec=BoundingBoxes), + mocker.MagicMock(spec=Mask), + # labels, bboxes, masks + mocker.MagicMock(spec=BoundingBoxes), + mocker.MagicMock(spec=Mask), + ] + + with pytest.raises(TypeError, match="requires input sample to contain equal sized list of Images"): + transform._extract_image_targets(flat_sample) + + @pytest.mark.parametrize("image_type", [Image, PIL.Image.Image, torch.Tensor]) + @pytest.mark.parametrize("label_type", [tv_tensors.Label, tv_tensors.OneHotLabel]) + def test__extract_image_targets(self, image_type, label_type, mocker): + transform = transforms.SimpleCopyPaste() + + flat_sample = [ + # images, batch size = 2 + self.create_fake_image(mocker, image_type), + self.create_fake_image(mocker, image_type), + # labels, bboxes, masks + mocker.MagicMock(spec=label_type), + mocker.MagicMock(spec=BoundingBoxes), + mocker.MagicMock(spec=Mask), + # labels, bboxes, masks + mocker.MagicMock(spec=label_type), + mocker.MagicMock(spec=BoundingBoxes), + mocker.MagicMock(spec=Mask), + ] + + images, targets = transform._extract_image_targets(flat_sample) + + assert len(images) == len(targets) == 2 + if image_type == PIL.Image.Image: + torch.testing.assert_close(images[0], pil_to_tensor(flat_sample[0])) + torch.testing.assert_close(images[1], pil_to_tensor(flat_sample[1])) + else: + assert images[0] == flat_sample[0] + assert images[1] == flat_sample[1] + + for target in targets: + for key, type_ in [ + ("boxes", BoundingBoxes), + ("masks", Mask), + ("labels", label_type), + ]: + assert key in target + assert isinstance(target[key], type_) + assert target[key] in flat_sample + + @pytest.mark.parametrize("label_type", [tv_tensors.Label, tv_tensors.OneHotLabel]) + def test__copy_paste(self, label_type): + image = 2 * torch.ones(3, 32, 32) + masks = torch.zeros(2, 32, 32) + masks[0, 3:9, 2:8] = 1 + masks[1, 20:30, 20:30] = 1 + labels = torch.tensor([1, 2]) + blending = True + resize_interpolation = InterpolationMode.BILINEAR + antialias = None + if label_type == tv_tensors.OneHotLabel: + labels = torch.nn.functional.one_hot(labels, num_classes=5) + target = { + "boxes": BoundingBoxes( + torch.tensor([[2.0, 3.0, 8.0, 9.0], [20.0, 20.0, 30.0, 30.0]]), format="XYXY", canvas_size=(32, 32) + ), + "masks": Mask(masks), + "labels": label_type(labels), + } + + paste_image = 10 * torch.ones(3, 32, 32) + paste_masks = torch.zeros(2, 32, 32) + paste_masks[0, 13:19, 12:18] = 1 + paste_masks[1, 15:19, 1:8] = 1 + paste_labels = torch.tensor([3, 4]) + if label_type == tv_tensors.OneHotLabel: + paste_labels = torch.nn.functional.one_hot(paste_labels, num_classes=5) + paste_target = { + "boxes": BoundingBoxes( + torch.tensor([[12.0, 13.0, 19.0, 18.0], [1.0, 15.0, 8.0, 19.0]]), format="XYXY", canvas_size=(32, 32) + ), + "masks": Mask(paste_masks), + "labels": label_type(paste_labels), + } + + transform = transforms.SimpleCopyPaste() + random_selection = torch.tensor([0, 1]) + output_image, output_target = transform._copy_paste( + image, target, paste_image, paste_target, random_selection, blending, resize_interpolation, antialias + ) + + assert output_image.unique().tolist() == [2, 10] + assert output_target["boxes"].shape == (4, 4) + torch.testing.assert_close(output_target["boxes"][:2, :], target["boxes"]) + torch.testing.assert_close(output_target["boxes"][2:, :], paste_target["boxes"]) + + expected_labels = torch.tensor([1, 2, 3, 4]) + if label_type == tv_tensors.OneHotLabel: + expected_labels = torch.nn.functional.one_hot(expected_labels, num_classes=5) + torch.testing.assert_close(output_target["labels"], label_type(expected_labels)) + + assert output_target["masks"].shape == (4, 32, 32) + torch.testing.assert_close(output_target["masks"][:2, :], target["masks"]) + torch.testing.assert_close(output_target["masks"][2:, :], paste_target["masks"]) + + +class TestFixedSizeCrop: + def test_make_params(self, mocker): + crop_size = (7, 7) + batch_shape = (10,) + canvas_size = (11, 5) + + transform = transforms.FixedSizeCrop(size=crop_size) + + flat_inputs = [ + make_image(size=canvas_size, color_space="RGB"), + make_bounding_boxes(format=BoundingBoxFormat.XYXY, canvas_size=canvas_size, num_boxes=batch_shape[0]), + ] + params = transform.make_params(flat_inputs) + + assert params["needs_crop"] + assert params["height"] <= crop_size[0] + assert params["width"] <= crop_size[1] + + assert ( + isinstance(params["is_valid"], torch.Tensor) + and params["is_valid"].dtype is torch.bool + and params["is_valid"].shape == batch_shape + ) + + assert params["needs_pad"] + assert any(pad > 0 for pad in params["padding"]) + + def test__transform_culling(self, mocker): + batch_size = 10 + canvas_size = (10, 10) + + is_valid = torch.randint(0, 2, (batch_size,), dtype=torch.bool) + mocker.patch( + "torchvision.prototype.transforms._geometry.FixedSizeCrop.make_params", + return_value=dict( + needs_crop=True, + top=0, + left=0, + height=canvas_size[0], + width=canvas_size[1], + is_valid=is_valid, + needs_pad=False, + ), + ) + + bounding_boxes = make_bounding_boxes( + format=BoundingBoxFormat.XYXY, canvas_size=canvas_size, num_boxes=batch_size + ) + masks = make_detection_masks(size=canvas_size, num_masks=batch_size) + labels = make_label(extra_dims=(batch_size,)) + + transform = transforms.FixedSizeCrop((-1, -1)) + mocker.patch("torchvision.prototype.transforms._geometry.has_any", return_value=True) + + output = transform( + dict( + bounding_boxes=bounding_boxes, + masks=masks, + labels=labels, + ) + ) + + assert_equal(output["bounding_boxes"], bounding_boxes[is_valid]) + assert_equal(output["masks"], masks[is_valid]) + assert_equal(output["labels"], labels[is_valid]) + + def test__transform_bounding_boxes_clamping(self, mocker): + batch_size = 3 + canvas_size = (10, 10) + + mocker.patch( + "torchvision.prototype.transforms._geometry.FixedSizeCrop.make_params", + return_value=dict( + needs_crop=True, + top=0, + left=0, + height=canvas_size[0], + width=canvas_size[1], + is_valid=torch.full((batch_size,), fill_value=True), + needs_pad=False, + ), + ) + + bounding_boxes = make_bounding_boxes( + format=BoundingBoxFormat.XYXY, canvas_size=canvas_size, num_boxes=batch_size + ) + mock = mocker.patch( + "torchvision.prototype.transforms._geometry.F.clamp_bounding_boxes", wraps=clamp_bounding_boxes + ) + + transform = transforms.FixedSizeCrop((-1, -1)) + mocker.patch("torchvision.prototype.transforms._geometry.has_any", return_value=True) + + transform(bounding_boxes) + + mock.assert_called_once() + + +class TestLabelToOneHot: + def test__transform(self): + categories = ["apple", "pear", "pineapple"] + labels = tv_tensors.Label(torch.tensor([0, 1, 2, 1]), categories=categories) + transform = transforms.LabelToOneHot() + ohe_labels = transform(labels) + assert isinstance(ohe_labels, tv_tensors.OneHotLabel) + assert ohe_labels.shape == (4, 3) + assert ohe_labels.categories == labels.categories == categories + + +class TestPermuteDimensions: + @pytest.mark.parametrize( + ("dims", "inverse_dims"), + [ + ( + {Image: (2, 1, 0), Video: None}, + {Image: (2, 1, 0), Video: None}, + ), + ( + {Image: (2, 1, 0), Video: (1, 2, 3, 0)}, + {Image: (2, 1, 0), Video: (3, 0, 1, 2)}, + ), + ], + ) + def test_call(self, dims, inverse_dims): + sample = dict( + image=make_image(), + bounding_boxes=make_bounding_boxes(format=BoundingBoxFormat.XYXY), + video=make_video(), + str="str", + int=0, + ) + + transform = transforms.PermuteDimensions(dims) + transformed_sample = transform(sample) + + for key, value in sample.items(): + value_type = type(value) + transformed_value = transformed_sample[key] + + if check_type(value, (Image, is_pure_tensor, Video)): + if transform.dims.get(value_type) is not None: + assert transformed_value.permute(inverse_dims[value_type]).equal(value) + assert type(transformed_value) == torch.Tensor + else: + assert transformed_value is value + + @pytest.mark.filterwarnings("error") + def test_plain_tensor_call(self): + tensor = torch.empty((2, 3, 4)) + transform = transforms.PermuteDimensions(dims=(1, 2, 0)) + + assert transform(tensor).shape == (3, 4, 2) + + @pytest.mark.parametrize("other_type", [Image, Video]) + def test_plain_tensor_warning(self, other_type): + with pytest.warns(UserWarning, match=re.escape("`torch.Tensor` will *not* be transformed")): + transforms.PermuteDimensions(dims={torch.Tensor: (0, 1), other_type: (1, 0)}) + + +class TestTransposeDimensions: + @pytest.mark.parametrize( + "dims", + [ + (-1, -2), + {Image: (1, 2), Video: None}, + ], + ) + def test_call(self, dims): + sample = dict( + image=make_image(), + bounding_boxes=make_bounding_boxes(format=BoundingBoxFormat.XYXY), + video=make_video(), + str="str", + int=0, + ) + + transform = transforms.TransposeDimensions(dims) + transformed_sample = transform(sample) + + for key, value in sample.items(): + value_type = type(value) + transformed_value = transformed_sample[key] + + transposed_dims = transform.dims.get(value_type) + if check_type(value, (Image, is_pure_tensor, Video)): + if transposed_dims is not None: + assert transformed_value.transpose(*transposed_dims).equal(value) + assert type(transformed_value) == torch.Tensor + else: + assert transformed_value is value + + @pytest.mark.filterwarnings("error") + def test_plain_tensor_call(self): + tensor = torch.empty((2, 3, 4)) + transform = transforms.TransposeDimensions(dims=(0, 2)) + + assert transform(tensor).shape == (4, 3, 2) + + @pytest.mark.parametrize("other_type", [Image, Video]) + def test_plain_tensor_warning(self, other_type): + with pytest.warns(UserWarning, match=re.escape("`torch.Tensor` will *not* be transformed")): + transforms.TransposeDimensions(dims={torch.Tensor: (0, 1), other_type: (1, 0)}) + + +import importlib.machinery +import importlib.util +from pathlib import Path + + +def import_transforms_from_references(reference): + HERE = Path(__file__).parent + PROJECT_ROOT = HERE.parent + + loader = importlib.machinery.SourceFileLoader( + "transforms", str(PROJECT_ROOT / "references" / reference / "transforms.py") + ) + spec = importlib.util.spec_from_loader("transforms", loader) + module = importlib.util.module_from_spec(spec) + loader.exec_module(module) + return module + + +det_transforms = import_transforms_from_references("detection") + + +def test_fixed_sized_crop_against_detection_reference(): + def make_tv_tensors(): + size = (600, 800) + num_objects = 22 + + pil_image = to_pil_image(make_image(size=size, color_space="RGB")) + target = { + "boxes": make_bounding_boxes(canvas_size=size, format="XYXY", num_boxes=num_objects, dtype=torch.float), + "labels": make_label(extra_dims=(num_objects,), categories=80), + "masks": make_detection_masks(size=size, num_masks=num_objects, dtype=torch.long), + } + + yield (pil_image, target) + + tensor_image = torch.Tensor(make_image(size=size, color_space="RGB")) + target = { + "boxes": make_bounding_boxes(canvas_size=size, format="XYXY", num_boxes=num_objects, dtype=torch.float), + "labels": make_label(extra_dims=(num_objects,), categories=80), + "masks": make_detection_masks(size=size, num_masks=num_objects, dtype=torch.long), + } + + yield (tensor_image, target) + + tv_tensor_image = make_image(size=size, color_space="RGB") + target = { + "boxes": make_bounding_boxes(canvas_size=size, format="XYXY", num_boxes=num_objects, dtype=torch.float), + "labels": make_label(extra_dims=(num_objects,), categories=80), + "masks": make_detection_masks(size=size, num_masks=num_objects, dtype=torch.long), + } + + yield (tv_tensor_image, target) + + t = transforms.FixedSizeCrop((1024, 1024), fill=0) + t_ref = det_transforms.FixedSizeCrop((1024, 1024), fill=0) + + for dp in make_tv_tensors(): + # We should use prototype transform first as reference transform performs inplace target update + torch.manual_seed(12) + output = t(dp) + + torch.manual_seed(12) + expected_output = t_ref(*dp) + + assert_equal(expected_output, output) diff --git a/test/test_quantized_models.py b/test/test_quantized_models.py deleted file mode 100644 index f20cc369276..00000000000 --- a/test/test_quantized_models.py +++ /dev/null @@ -1,90 +0,0 @@ -import torchvision -from common_utils import TestCase, map_nested_tensor_object -from collections import OrderedDict -from itertools import product -import torch -import numpy as np -from torchvision import models -import unittest -import traceback -import random - - -def set_rng_seed(seed): - torch.manual_seed(seed) - random.seed(seed) - np.random.seed(seed) - - -def get_available_quantizable_models(): - # TODO add a registration mechanism to torchvision.models - return [k for k, v in models.quantization.__dict__.items() if callable(v) and k[0].lower() == k[0] and k[0] != "_"] - - -# list of models that are not scriptable -scriptable_quantizable_models_blacklist = [] - - -@unittest.skipUnless('fbgemm' in torch.backends.quantized.supported_engines and - 'qnnpack' in torch.backends.quantized.supported_engines, - "This Pytorch Build has not been built with fbgemm and qnnpack") -class ModelTester(TestCase): - def check_quantized_model(self, model, input_shape): - x = torch.rand(input_shape) - model(x) - return - - def check_script(self, model, name): - if name in scriptable_quantizable_models_blacklist: - return - scriptable = True - msg = "" - try: - torch.jit.script(model) - except Exception as e: - tb = traceback.format_exc() - scriptable = False - msg = str(e) + str(tb) - self.assertTrue(scriptable, msg) - - def _test_classification_model(self, name, input_shape): - # First check if quantize=True provides models that can run with input data - - model = torchvision.models.quantization.__dict__[name](pretrained=False, quantize=True) - self.check_quantized_model(model, input_shape) - - for eval_mode in [True, False]: - model = torchvision.models.quantization.__dict__[name](pretrained=False, quantize=False) - if eval_mode: - model.eval() - model.qconfig = torch.quantization.default_qconfig - else: - model.train() - model.qconfig = torch.quantization.default_qat_qconfig - - model.fuse_model() - if eval_mode: - torch.quantization.prepare(model, inplace=True) - else: - torch.quantization.prepare_qat(model, inplace=True) - model.eval() - - torch.quantization.convert(model, inplace=True) - - self.check_script(model, name) - - -for model_name in get_available_quantizable_models(): - # for-loop bodies don't define scopes, so we have to save the variables - # we want to close over in some way - def do_test(self, model_name=model_name): - input_shape = (1, 3, 224, 224) - if model_name in ['inception_v3']: - input_shape = (1, 3, 299, 299) - self._test_classification_model(model_name, input_shape) - - setattr(ModelTester, "test_" + model_name, do_test) - - -if __name__ == '__main__': - unittest.main() diff --git a/test/test_transforms.py b/test/test_transforms.py index 1bbe1165f93..325ffa40b6c 100644 --- a/test/test_transforms.py +++ b/test/test_transforms.py @@ -1,16 +1,21 @@ -from __future__ import division +import math import os +import random +import re +import sys +from functools import partial + +import numpy as np +import pytest import torch import torchvision.transforms as transforms +import torchvision.transforms._functional_tensor as F_t import torchvision.transforms.functional as F -from torch._utils_internal import get_file_path_2 -import unittest -import math -import random -import numpy as np from PIL import Image +from torch._utils_internal import get_file_path_2 # @manual=fbcode//caffe2:utils_internal + try: - import accimage + import accimage # @manual=fbcode//pytorch/accimage:accimage except ImportError: accimage = None @@ -19,272 +24,481 @@ except ImportError: stats = None +from common_utils import assert_equal, cycle_over, float_dtypes, int_dtypes + + GRACE_HOPPER = get_file_path_2( - os.path.dirname(os.path.abspath(__file__)), 'assets', 'grace_hopper_517x606.jpg') + os.path.dirname(os.path.abspath(__file__)), "assets", "encode_jpeg", "grace_hopper_517x606.jpg" +) -class Tester(unittest.TestCase): +def _get_grayscale_test_image(img, fill=None): + img = img.convert("L") + fill = (fill[0],) if isinstance(fill, tuple) else fill + return img, fill - def test_crop(self): - height = random.randint(10, 32) * 2 - width = random.randint(10, 32) * 2 - oheight = random.randint(5, (height - 2) / 2) * 2 - owidth = random.randint(5, (width - 2) / 2) * 2 - img = torch.ones(3, height, width) - oh1 = (height - oheight) // 2 - ow1 = (width - owidth) // 2 - imgnarrow = img[:, oh1:oh1 + oheight, ow1:ow1 + owidth] - imgnarrow.fill_(0) - result = transforms.Compose([ - transforms.ToPILImage(), - transforms.CenterCrop((oheight, owidth)), - transforms.ToTensor(), - ])(img) - self.assertEqual(result.sum(), 0, - "height: {} width: {} oheight: {} owdith: {}".format(height, width, oheight, owidth)) - oheight += 1 - owidth += 1 - result = transforms.Compose([ - transforms.ToPILImage(), - transforms.CenterCrop((oheight, owidth)), - transforms.ToTensor(), - ])(img) - sum1 = result.sum() - self.assertGreater(sum1, 1, - "height: {} width: {} oheight: {} owdith: {}".format(height, width, oheight, owidth)) - oheight += 1 - owidth += 1 - result = transforms.Compose([ - transforms.ToPILImage(), - transforms.CenterCrop((oheight, owidth)), - transforms.ToTensor(), - ])(img) - sum2 = result.sum() - self.assertGreater(sum2, 0, - "height: {} width: {} oheight: {} owdith: {}".format(height, width, oheight, owidth)) - self.assertGreater(sum2, sum1, - "height: {} width: {} oheight: {} owdith: {}".format(height, width, oheight, owidth)) - - def test_five_crop(self): - to_pil_image = transforms.ToPILImage() - h = random.randint(5, 25) - w = random.randint(5, 25) - for single_dim in [True, False]: - crop_h = random.randint(1, h) - crop_w = random.randint(1, w) - if single_dim: - crop_h = min(crop_h, crop_w) - crop_w = crop_h - transform = transforms.FiveCrop(crop_h) - else: - transform = transforms.FiveCrop((crop_h, crop_w)) - - img = torch.FloatTensor(3, h, w).uniform_() - results = transform(to_pil_image(img)) - - self.assertEqual(len(results), 5) - for crop in results: - self.assertEqual(crop.size, (crop_w, crop_h)) - - to_pil_image = transforms.ToPILImage() - tl = to_pil_image(img[:, 0:crop_h, 0:crop_w]) - tr = to_pil_image(img[:, 0:crop_h, w - crop_w:]) - bl = to_pil_image(img[:, h - crop_h:, 0:crop_w]) - br = to_pil_image(img[:, h - crop_h:, w - crop_w:]) - center = transforms.CenterCrop((crop_h, crop_w))(to_pil_image(img)) - expected_output = (tl, tr, bl, br, center) - self.assertEqual(results, expected_output) - - def test_ten_crop(self): - to_pil_image = transforms.ToPILImage() - h = random.randint(5, 25) - w = random.randint(5, 25) - for should_vflip in [True, False]: - for single_dim in [True, False]: - crop_h = random.randint(1, h) - crop_w = random.randint(1, w) - if single_dim: - crop_h = min(crop_h, crop_w) - crop_w = crop_h - transform = transforms.TenCrop(crop_h, - vertical_flip=should_vflip) - five_crop = transforms.FiveCrop(crop_h) - else: - transform = transforms.TenCrop((crop_h, crop_w), - vertical_flip=should_vflip) - five_crop = transforms.FiveCrop((crop_h, crop_w)) - - img = to_pil_image(torch.FloatTensor(3, h, w).uniform_()) - results = transform(img) - expected_output = five_crop(img) - - # Checking if FiveCrop and TenCrop can be printed as string - transform.__repr__() - five_crop.__repr__() - - if should_vflip: - vflipped_img = img.transpose(Image.FLIP_TOP_BOTTOM) - expected_output += five_crop(vflipped_img) - else: - hflipped_img = img.transpose(Image.FLIP_LEFT_RIGHT) - expected_output += five_crop(hflipped_img) - - self.assertEqual(len(results), 10) - self.assertEqual(results, expected_output) - - def test_randomresized_params(self): - height = random.randint(24, 32) * 2 - width = random.randint(24, 32) * 2 - img = torch.ones(3, height, width) - to_pil_image = transforms.ToPILImage() - img = to_pil_image(img) - size = 100 - epsilon = 0.05 - min_scale = 0.25 - for _ in range(10): - scale_min = max(round(random.random(), 2), min_scale) - scale_range = (scale_min, scale_min + round(random.random(), 2)) - aspect_min = max(round(random.random(), 2), epsilon) - aspect_ratio_range = (aspect_min, aspect_min + round(random.random(), 2)) - randresizecrop = transforms.RandomResizedCrop(size, scale_range, aspect_ratio_range) - i, j, h, w = randresizecrop.get_params(img, scale_range, aspect_ratio_range) - aspect_ratio_obtained = w / h - self.assertTrue((min(aspect_ratio_range) - epsilon <= aspect_ratio_obtained and - aspect_ratio_obtained <= max(aspect_ratio_range) + epsilon) or - aspect_ratio_obtained == 1.0) - self.assertIsInstance(i, int) - self.assertIsInstance(j, int) - self.assertIsInstance(h, int) - self.assertIsInstance(w, int) - - def test_randomperspective(self): - for _ in range(10): - height = random.randint(24, 32) * 2 - width = random.randint(24, 32) * 2 - img = torch.ones(3, height, width) - to_pil_image = transforms.ToPILImage() - img = to_pil_image(img) - perp = transforms.RandomPerspective() - startpoints, endpoints = perp.get_params(width, height, 0.5) - tr_img = F.perspective(img, startpoints, endpoints) - tr_img2 = F.to_tensor(F.perspective(tr_img, endpoints, startpoints)) - tr_img = F.to_tensor(tr_img) - self.assertEqual(img.size[0], width) - self.assertEqual(img.size[1], height) - self.assertGreater(torch.nn.functional.mse_loss(tr_img, F.to_tensor(img)) + 0.3, - torch.nn.functional.mse_loss(tr_img2, F.to_tensor(img))) - - def test_resize(self): - height = random.randint(24, 32) * 2 - width = random.randint(24, 32) * 2 - osize = random.randint(5, 12) * 2 +class TestConvertImageDtype: + @pytest.mark.parametrize("input_dtype, output_dtype", cycle_over(float_dtypes())) + def test_float_to_float(self, input_dtype, output_dtype): + input_image = torch.tensor((0.0, 1.0), dtype=input_dtype) + transform = transforms.ConvertImageDtype(output_dtype) + transform_script = torch.jit.script(F.convert_image_dtype) - img = torch.ones(3, height, width) - result = transforms.Compose([ - transforms.ToPILImage(), - transforms.Resize(osize), - transforms.ToTensor(), - ])(img) - self.assertIn(osize, result.size()) - if height < width: - self.assertLessEqual(result.size(1), result.size(2)) - elif width < height: - self.assertGreaterEqual(result.size(1), result.size(2)) - - result = transforms.Compose([ - transforms.ToPILImage(), - transforms.Resize([osize, osize]), - transforms.ToTensor(), - ])(img) - self.assertIn(osize, result.size()) - self.assertEqual(result.size(1), osize) - self.assertEqual(result.size(2), osize) - - oheight = random.randint(5, 12) * 2 - owidth = random.randint(5, 12) * 2 - result = transforms.Compose([ - transforms.ToPILImage(), - transforms.Resize((oheight, owidth)), - transforms.ToTensor(), - ])(img) - self.assertEqual(result.size(1), oheight) - self.assertEqual(result.size(2), owidth) + output_image = transform(input_image) + output_image_script = transform_script(input_image, output_dtype) - result = transforms.Compose([ - transforms.ToPILImage(), - transforms.Resize([oheight, owidth]), - transforms.ToTensor(), - ])(img) - self.assertEqual(result.size(1), oheight) - self.assertEqual(result.size(2), owidth) + torch.testing.assert_close(output_image_script, output_image, rtol=0.0, atol=1e-6) - def test_random_crop(self): - height = random.randint(10, 32) * 2 - width = random.randint(10, 32) * 2 - oheight = random.randint(5, (height - 2) / 2) * 2 - owidth = random.randint(5, (width - 2) / 2) * 2 - img = torch.ones(3, height, width) - result = transforms.Compose([ - transforms.ToPILImage(), - transforms.RandomCrop((oheight, owidth)), - transforms.ToTensor(), - ])(img) - self.assertEqual(result.size(1), oheight) - self.assertEqual(result.size(2), owidth) + actual_min, actual_max = output_image.tolist() + desired_min, desired_max = 0.0, 1.0 - padding = random.randint(1, 20) - result = transforms.Compose([ - transforms.ToPILImage(), - transforms.RandomCrop((oheight, owidth), padding=padding), - transforms.ToTensor(), - ])(img) - self.assertEqual(result.size(1), oheight) - self.assertEqual(result.size(2), owidth) + assert abs(actual_min - desired_min) < 1e-7 + assert abs(actual_max - desired_max) < 1e-7 + + @pytest.mark.parametrize("input_dtype", float_dtypes()) + @pytest.mark.parametrize("output_dtype", int_dtypes()) + def test_float_to_int(self, input_dtype, output_dtype): + input_image = torch.tensor((0.0, 1.0), dtype=input_dtype) + transform = transforms.ConvertImageDtype(output_dtype) + transform_script = torch.jit.script(F.convert_image_dtype) + + if (input_dtype == torch.float32 and output_dtype in (torch.int32, torch.int64)) or ( + input_dtype == torch.float64 and output_dtype == torch.int64 + ): + with pytest.raises(RuntimeError): + transform(input_image) + else: + output_image = transform(input_image) + output_image_script = transform_script(input_image, output_dtype) + + torch.testing.assert_close(output_image_script, output_image, rtol=0.0, atol=1e-6) + + actual_min, actual_max = output_image.tolist() + desired_min, desired_max = 0, torch.iinfo(output_dtype).max + + assert actual_min == desired_min + assert actual_max == desired_max + + @pytest.mark.parametrize("input_dtype", int_dtypes()) + @pytest.mark.parametrize("output_dtype", float_dtypes()) + def test_int_to_float(self, input_dtype, output_dtype): + input_image = torch.tensor((0, torch.iinfo(input_dtype).max), dtype=input_dtype) + transform = transforms.ConvertImageDtype(output_dtype) + transform_script = torch.jit.script(F.convert_image_dtype) + + output_image = transform(input_image) + output_image_script = transform_script(input_image, output_dtype) + + torch.testing.assert_close(output_image_script, output_image, rtol=0.0, atol=1e-6) + + actual_min, actual_max = output_image.tolist() + desired_min, desired_max = 0.0, 1.0 + + assert abs(actual_min - desired_min) < 1e-7 + assert actual_min >= desired_min + assert abs(actual_max - desired_max) < 1e-7 + assert actual_max <= desired_max + + @pytest.mark.parametrize("input_dtype, output_dtype", cycle_over(int_dtypes())) + def test_dtype_int_to_int(self, input_dtype, output_dtype): + input_max = torch.iinfo(input_dtype).max + input_image = torch.tensor((0, input_max), dtype=input_dtype) + output_max = torch.iinfo(output_dtype).max + + transform = transforms.ConvertImageDtype(output_dtype) + transform_script = torch.jit.script(F.convert_image_dtype) + + output_image = transform(input_image) + output_image_script = transform_script(input_image, output_dtype) + + torch.testing.assert_close( + output_image_script, + output_image, + rtol=0.0, + atol=1e-6, + msg=f"{output_image_script} vs {output_image}", + ) + + actual_min, actual_max = output_image.tolist() + desired_min, desired_max = 0, output_max + + # see https://github.com/pytorch/vision/pull/2078#issuecomment-641036236 for details + if input_max >= output_max: + error_term = 0 + else: + error_term = 1 - (torch.iinfo(output_dtype).max + 1) // (torch.iinfo(input_dtype).max + 1) + + assert actual_min == desired_min + assert actual_max == (desired_max + error_term) + + @pytest.mark.parametrize("input_dtype, output_dtype", cycle_over(int_dtypes())) + def test_int_to_int_consistency(self, input_dtype, output_dtype): + input_max = torch.iinfo(input_dtype).max + input_image = torch.tensor((0, input_max), dtype=input_dtype) + + output_max = torch.iinfo(output_dtype).max + if output_max <= input_max: + return + + transform = transforms.ConvertImageDtype(output_dtype) + inverse_transfrom = transforms.ConvertImageDtype(input_dtype) + output_image = inverse_transfrom(transform(input_image)) + + actual_min, actual_max = output_image.tolist() + desired_min, desired_max = 0, input_max + + assert actual_min == desired_min + assert actual_max == desired_max - result = transforms.Compose([ - transforms.ToPILImage(), - transforms.RandomCrop((height, width)), - transforms.ToTensor() - ])(img) - self.assertEqual(result.size(1), height) - self.assertEqual(result.size(2), width) - self.assertTrue(np.allclose(img.numpy(), result.numpy())) - - result = transforms.Compose([ - transforms.ToPILImage(), - transforms.RandomCrop((height + 1, width + 1), pad_if_needed=True), - transforms.ToTensor(), - ])(img) - self.assertEqual(result.size(1), height + 1) - self.assertEqual(result.size(2), width + 1) - def test_pad(self): +@pytest.mark.skipif(accimage is None, reason="accimage not available") +class TestAccImage: + def test_accimage_to_tensor(self): + trans = transforms.PILToTensor() + + expected_output = trans(Image.open(GRACE_HOPPER).convert("RGB")) + output = trans(accimage.Image(GRACE_HOPPER)) + + torch.testing.assert_close(output, expected_output) + + def test_accimage_pil_to_tensor(self): + trans = transforms.PILToTensor() + + expected_output = trans(Image.open(GRACE_HOPPER).convert("RGB")) + output = trans(accimage.Image(GRACE_HOPPER)) + + assert expected_output.size() == output.size() + torch.testing.assert_close(output, expected_output) + + def test_accimage_resize(self): + trans = transforms.Compose( + [ + transforms.Resize(256, interpolation=Image.LINEAR), + transforms.PILToTensor(), + transforms.ConvertImageDtype(dtype=torch.float), + ] + ) + + # Checking if Compose, Resize and ToTensor can be printed as string + trans.__repr__() + + expected_output = trans(Image.open(GRACE_HOPPER).convert("RGB")) + output = trans(accimage.Image(GRACE_HOPPER)) + + assert expected_output.size() == output.size() + assert np.abs((expected_output - output).mean()) < 1e-3 + assert (expected_output - output).var() < 1e-5 + # note the high absolute tolerance + torch.testing.assert_close(output.numpy(), expected_output.numpy(), rtol=1e-5, atol=5e-2) + + def test_accimage_crop(self): + trans = transforms.Compose( + [transforms.CenterCrop(256), transforms.PILToTensor(), transforms.ConvertImageDtype(dtype=torch.float)] + ) + + # Checking if Compose, CenterCrop and ToTensor can be printed as string + trans.__repr__() + + expected_output = trans(Image.open(GRACE_HOPPER).convert("RGB")) + output = trans(accimage.Image(GRACE_HOPPER)) + + assert expected_output.size() == output.size() + torch.testing.assert_close(output, expected_output) + + +class TestToTensor: + @pytest.mark.parametrize("channels", [1, 3, 4]) + def test_to_tensor(self, channels): + height, width = 4, 4 + trans = transforms.ToTensor() + np_rng = np.random.RandomState(0) + + input_data = torch.ByteTensor(channels, height, width).random_(0, 255).float().div_(255) + img = transforms.ToPILImage()(input_data) + output = trans(img) + torch.testing.assert_close(output, input_data) + + ndarray = np_rng.randint(low=0, high=255, size=(height, width, channels)).astype(np.uint8) + output = trans(ndarray) + expected_output = ndarray.transpose((2, 0, 1)) / 255.0 + torch.testing.assert_close(output.numpy(), expected_output, check_dtype=False) + + ndarray = np_rng.rand(height, width, channels).astype(np.float32) + output = trans(ndarray) + expected_output = ndarray.transpose((2, 0, 1)) + torch.testing.assert_close(output.numpy(), expected_output, check_dtype=False) + + # separate test for mode '1' PIL images + input_data = torch.ByteTensor(1, height, width).bernoulli_() + img = transforms.ToPILImage()(input_data.mul(255)).convert("1") + output = trans(img) + torch.testing.assert_close(input_data, output, check_dtype=False) + + def test_to_tensor_errors(self): + height, width = 4, 4 + trans = transforms.ToTensor() + np_rng = np.random.RandomState(0) + + with pytest.raises(TypeError): + trans(np_rng.rand(1, height, width).tolist()) + + with pytest.raises(ValueError): + trans(np_rng.rand(height)) + + with pytest.raises(ValueError): + trans(np_rng.rand(1, 1, height, width)) + + @pytest.mark.parametrize("dtype", [torch.float16, torch.float, torch.double]) + def test_to_tensor_with_other_default_dtypes(self, dtype): + np_rng = np.random.RandomState(0) + current_def_dtype = torch.get_default_dtype() + + t = transforms.ToTensor() + np_arr = np_rng.randint(0, 255, (32, 32, 3), dtype=np.uint8) + img = Image.fromarray(np_arr) + + torch.set_default_dtype(dtype) + res = t(img) + assert res.dtype == dtype, f"{res.dtype} vs {dtype}" + + torch.set_default_dtype(current_def_dtype) + + @pytest.mark.parametrize("channels", [1, 3, 4]) + def test_pil_to_tensor(self, channels): + height, width = 4, 4 + trans = transforms.PILToTensor() + np_rng = np.random.RandomState(0) + + input_data = torch.ByteTensor(channels, height, width).random_(0, 255) + img = transforms.ToPILImage()(input_data) + output = trans(img) + torch.testing.assert_close(input_data, output) + + input_data = np_rng.randint(low=0, high=255, size=(height, width, channels)).astype(np.uint8) + img = transforms.ToPILImage()(input_data) + output = trans(img) + expected_output = input_data.transpose((2, 0, 1)) + torch.testing.assert_close(output.numpy(), expected_output) + + input_data = torch.as_tensor(np_rng.rand(channels, height, width).astype(np.float32)) + img = transforms.ToPILImage()(input_data) # CHW -> HWC and (* 255).byte() + output = trans(img) # HWC -> CHW + expected_output = (input_data * 255).byte() + torch.testing.assert_close(output, expected_output) + + # separate test for mode '1' PIL images + input_data = torch.ByteTensor(1, height, width).bernoulli_() + img = transforms.ToPILImage()(input_data.mul(255)).convert("1") + output = trans(img).view(torch.uint8).bool().to(torch.uint8) + torch.testing.assert_close(input_data, output) + + def test_pil_to_tensor_errors(self): + height, width = 4, 4 + trans = transforms.PILToTensor() + np_rng = np.random.RandomState(0) + + with pytest.raises(TypeError): + trans(np_rng.rand(1, height, width).tolist()) + + with pytest.raises(TypeError): + trans(np_rng.rand(1, height, width)) + + +def test_randomresized_params(): + height = random.randint(24, 32) * 2 + width = random.randint(24, 32) * 2 + img = torch.ones(3, height, width) + to_pil_image = transforms.ToPILImage() + img = to_pil_image(img) + size = 100 + epsilon = 0.05 + min_scale = 0.25 + for _ in range(10): + scale_min = max(round(random.random(), 2), min_scale) + scale_range = (scale_min, scale_min + round(random.random(), 2)) + aspect_min = max(round(random.random(), 2), epsilon) + aspect_ratio_range = (aspect_min, aspect_min + round(random.random(), 2)) + randresizecrop = transforms.RandomResizedCrop(size, scale_range, aspect_ratio_range, antialias=True) + i, j, h, w = randresizecrop.get_params(img, scale_range, aspect_ratio_range) + aspect_ratio_obtained = w / h + assert ( + min(aspect_ratio_range) - epsilon <= aspect_ratio_obtained + and aspect_ratio_obtained <= max(aspect_ratio_range) + epsilon + ) or aspect_ratio_obtained == 1.0 + assert isinstance(i, int) + assert isinstance(j, int) + assert isinstance(h, int) + assert isinstance(w, int) + + +@pytest.mark.parametrize( + "height, width", + [ + # height, width + # square image + (28, 28), + (27, 27), + # rectangular image: h < w + (28, 34), + (29, 35), + # rectangular image: h > w + (34, 28), + (35, 29), + ], +) +@pytest.mark.parametrize( + "osize", + [ + # single integer + 22, + 27, + 28, + 36, + # single integer in tuple/list + [ + 22, + ], + (27,), + ], +) +@pytest.mark.parametrize("max_size", (None, 37, 1000)) +def test_resize(height, width, osize, max_size): + img = Image.new("RGB", size=(width, height), color=127) + + t = transforms.Resize(osize, max_size=max_size, antialias=True) + result = t(img) + + msg = f"{height}, {width} - {osize} - {max_size}" + osize = osize[0] if isinstance(osize, (list, tuple)) else osize + # If size is an int, smaller edge of the image will be matched to this number. + # i.e, if height > width, then image will be rescaled to (size * height / width, size). + if height < width: + exp_w, exp_h = (int(osize * width / height), osize) # (w, h) + if max_size is not None and max_size < exp_w: + exp_w, exp_h = max_size, int(max_size * exp_h / exp_w) + assert result.size == (exp_w, exp_h), msg + elif width < height: + exp_w, exp_h = (osize, int(osize * height / width)) # (w, h) + if max_size is not None and max_size < exp_h: + exp_w, exp_h = int(max_size * exp_w / exp_h), max_size + assert result.size == (exp_w, exp_h), msg + else: + exp_w, exp_h = (osize, osize) # (w, h) + if max_size is not None and max_size < osize: + exp_w, exp_h = max_size, max_size + assert result.size == (exp_w, exp_h), msg + + +@pytest.mark.parametrize( + "height, width", + [ + # height, width + # square image + (28, 28), + (27, 27), + # rectangular image: h < w + (28, 34), + (29, 35), + # rectangular image: h > w + (34, 28), + (35, 29), + ], +) +@pytest.mark.parametrize( + "osize", + [ + # two integers sequence output + [22, 22], + [22, 28], + [22, 36], + [27, 22], + [36, 22], + [28, 28], + [28, 37], + [37, 27], + [37, 37], + ], +) +def test_resize_sequence_output(height, width, osize): + img = Image.new("RGB", size=(width, height), color=127) + oheight, owidth = osize + + t = transforms.Resize(osize, antialias=True) + result = t(img) + + assert (owidth, oheight) == result.size + + +def test_resize_antialias_error(): + osize = [37, 37] + img = Image.new("RGB", size=(35, 29), color=127) + + with pytest.warns(UserWarning, match=r"Anti-alias option is always applied for PIL Image input"): + t = transforms.Resize(osize, antialias=False) + t(img) + + +@pytest.mark.parametrize("height, width", ((32, 64), (64, 32))) +def test_resize_size_equals_small_edge_size(height, width): + # Non-regression test for https://github.com/pytorch/vision/issues/5405 + # max_size used to be ignored if size == small_edge_size + max_size = 40 + img = Image.new("RGB", size=(width, height), color=127) + + small_edge = min(height, width) + t = transforms.Resize(small_edge, max_size=max_size, antialias=True) + result = t(img) + assert max(result.size) == max_size + + +def test_resize_equal_input_output_sizes(): + # Regression test for https://github.com/pytorch/vision/issues/7518 + height, width = 28, 27 + img = Image.new("RGB", size=(width, height)) + + t = transforms.Resize((height, width), antialias=True) + result = t(img) + assert result is img + + +class TestPad: + @pytest.mark.parametrize("fill", [85, 85.0]) + def test_pad(self, fill): height = random.randint(10, 32) * 2 width = random.randint(10, 32) * 2 - img = torch.ones(3, height, width) + img = torch.ones(3, height, width, dtype=torch.uint8) padding = random.randint(1, 20) - result = transforms.Compose([ - transforms.ToPILImage(), - transforms.Pad(padding), - transforms.ToTensor(), - ])(img) - self.assertEqual(result.size(1), height + 2 * padding) - self.assertEqual(result.size(2), width + 2 * padding) + result = transforms.Compose( + [ + transforms.ToPILImage(), + transforms.Pad(padding, fill=fill), + transforms.PILToTensor(), + ] + )(img) + assert result.size(1) == height + 2 * padding + assert result.size(2) == width + 2 * padding + # check that all elements in the padded region correspond + # to the pad value + h_padded = result[:, :padding, :] + w_padded = result[:, :, :padding] + torch.testing.assert_close(h_padded, torch.full_like(h_padded, fill_value=fill), rtol=0.0, atol=0.0) + torch.testing.assert_close(w_padded, torch.full_like(w_padded, fill_value=fill), rtol=0.0, atol=0.0) + pytest.raises(ValueError, transforms.Pad(padding, fill=(1, 2)), transforms.ToPILImage()(img)) def test_pad_with_tuple_of_pad_values(self): height = random.randint(10, 32) * 2 width = random.randint(10, 32) * 2 img = transforms.ToPILImage()(torch.ones(3, height, width)) - padding = tuple([random.randint(1, 20) for _ in range(2)]) + padding = tuple(random.randint(1, 20) for _ in range(2)) output = transforms.Pad(padding)(img) - self.assertEqual(output.size, (width + padding[0] * 2, height + padding[1] * 2)) + assert output.size == (width + padding[0] * 2, height + padding[1] * 2) - padding = tuple([random.randint(1, 20) for _ in range(4)]) + padding = [random.randint(1, 20) for _ in range(4)] output = transforms.Pad(padding)(img) - self.assertEqual(output.size[0], width + padding[0] + padding[2]) - self.assertEqual(output.size[1], height + padding[1] + padding[3]) + assert output.size[0] == width + padding[0] + padding[2] + assert output.size[1] == height + padding[1] + padding[3] # Checking if Padding can be printed as string transforms.Pad(padding).__repr__() @@ -297,1124 +511,1735 @@ def test_pad_with_non_constant_padding_modes(self): img = F.pad(img, 1, (200, 200, 200)) # pad 3 to all sidess - edge_padded_img = F.pad(img, 3, padding_mode='edge') + edge_padded_img = F.pad(img, 3, padding_mode="edge") # First 6 elements of leftmost edge in the middle of the image, values are in order: # edge_pad, edge_pad, edge_pad, constant_pad, constant value added to leftmost edge, 0 edge_middle_slice = np.asarray(edge_padded_img).transpose(2, 0, 1)[0][17][:6] - self.assertTrue(np.all(edge_middle_slice == np.asarray([200, 200, 200, 200, 1, 0]))) - self.assertEqual(transforms.ToTensor()(edge_padded_img).size(), (3, 35, 35)) + assert_equal(edge_middle_slice, np.asarray([200, 200, 200, 200, 1, 0], dtype=np.uint8)) + assert transforms.PILToTensor()(edge_padded_img).size() == (3, 35, 35) # Pad 3 to left/right, 2 to top/bottom - reflect_padded_img = F.pad(img, (3, 2), padding_mode='reflect') + reflect_padded_img = F.pad(img, (3, 2), padding_mode="reflect") # First 6 elements of leftmost edge in the middle of the image, values are in order: # reflect_pad, reflect_pad, reflect_pad, constant_pad, constant value added to leftmost edge, 0 reflect_middle_slice = np.asarray(reflect_padded_img).transpose(2, 0, 1)[0][17][:6] - self.assertTrue(np.all(reflect_middle_slice == np.asarray([0, 0, 1, 200, 1, 0]))) - self.assertEqual(transforms.ToTensor()(reflect_padded_img).size(), (3, 33, 35)) + assert_equal(reflect_middle_slice, np.asarray([0, 0, 1, 200, 1, 0], dtype=np.uint8)) + assert transforms.PILToTensor()(reflect_padded_img).size() == (3, 33, 35) # Pad 3 to left, 2 to top, 2 to right, 1 to bottom - symmetric_padded_img = F.pad(img, (3, 2, 2, 1), padding_mode='symmetric') + symmetric_padded_img = F.pad(img, (3, 2, 2, 1), padding_mode="symmetric") # First 6 elements of leftmost edge in the middle of the image, values are in order: # sym_pad, sym_pad, sym_pad, constant_pad, constant value added to leftmost edge, 0 symmetric_middle_slice = np.asarray(symmetric_padded_img).transpose(2, 0, 1)[0][17][:6] - self.assertTrue(np.all(symmetric_middle_slice == np.asarray([0, 1, 200, 200, 1, 0]))) - self.assertEqual(transforms.ToTensor()(symmetric_padded_img).size(), (3, 32, 34)) + assert_equal(symmetric_middle_slice, np.asarray([0, 1, 200, 200, 1, 0], dtype=np.uint8)) + assert transforms.PILToTensor()(symmetric_padded_img).size() == (3, 32, 34) + + # Check negative padding explicitly for symmetric case, since it is not + # implemented for tensor case to compare to + # Crop 1 to left, pad 2 to top, pad 3 to right, crop 3 to bottom + symmetric_padded_img_neg = F.pad(img, (-1, 2, 3, -3), padding_mode="symmetric") + symmetric_neg_middle_left = np.asarray(symmetric_padded_img_neg).transpose(2, 0, 1)[0][17][:3] + symmetric_neg_middle_right = np.asarray(symmetric_padded_img_neg).transpose(2, 0, 1)[0][17][-4:] + assert_equal(symmetric_neg_middle_left, np.asarray([1, 0, 0], dtype=np.uint8)) + assert_equal(symmetric_neg_middle_right, np.asarray([200, 200, 0, 0], dtype=np.uint8)) + assert transforms.PILToTensor()(symmetric_padded_img_neg).size() == (3, 28, 31) def test_pad_raises_with_invalid_pad_sequence_len(self): - with self.assertRaises(ValueError): + with pytest.raises(ValueError): transforms.Pad(()) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): transforms.Pad((1, 2, 3)) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): transforms.Pad((1, 2, 3, 4, 5)) - def test_lambda(self): - trans = transforms.Lambda(lambda x: x.add(10)) - x = torch.randn(10) - y = trans(x) - self.assertTrue(y.equal(torch.add(x, 10))) - - trans = transforms.Lambda(lambda x: x.add_(10)) - x = torch.randn(10) - y = trans(x) - self.assertTrue(y.equal(x)) - - # Checking if Lambda can be printed as string - trans.__repr__() - - @unittest.skipIf(stats is None, 'scipy.stats not available') - def test_random_apply(self): - random_state = random.getstate() - random.seed(42) - random_apply_transform = transforms.RandomApply( - [ - transforms.RandomRotation((-45, 45)), - transforms.RandomHorizontalFlip(), - transforms.RandomVerticalFlip(), - ], p=0.75 - ) - img = transforms.ToPILImage()(torch.rand(3, 10, 10)) - num_samples = 250 - num_applies = 0 - for _ in range(num_samples): - out = random_apply_transform(img) - if out != img: - num_applies += 1 - - p_value = stats.binom_test(num_applies, num_samples, p=0.75) - random.setstate(random_state) - self.assertGreater(p_value, 0.0001) - - # Checking if RandomApply can be printed as string - random_apply_transform.__repr__() - - @unittest.skipIf(stats is None, 'scipy.stats not available') - def test_random_choice(self): - random_state = random.getstate() - random.seed(42) - random_choice_transform = transforms.RandomChoice( - [ - transforms.Resize(15), - transforms.Resize(20), - transforms.CenterCrop(10) - ] - ) - img = transforms.ToPILImage()(torch.rand(3, 25, 25)) - num_samples = 250 - num_resize_15 = 0 - num_resize_20 = 0 - num_crop_10 = 0 - for _ in range(num_samples): - out = random_choice_transform(img) - if out.size == (15, 15): - num_resize_15 += 1 - elif out.size == (20, 20): - num_resize_20 += 1 - elif out.size == (10, 10): - num_crop_10 += 1 - - p_value = stats.binom_test(num_resize_15, num_samples, p=0.33333) - self.assertGreater(p_value, 0.0001) - p_value = stats.binom_test(num_resize_20, num_samples, p=0.33333) - self.assertGreater(p_value, 0.0001) - p_value = stats.binom_test(num_crop_10, num_samples, p=0.33333) - self.assertGreater(p_value, 0.0001) - - random.setstate(random_state) - # Checking if RandomChoice can be printed as string - random_choice_transform.__repr__() - - @unittest.skipIf(stats is None, 'scipy.stats not available') - def test_random_order(self): - random_state = random.getstate() - random.seed(42) - random_order_transform = transforms.RandomOrder( - [ - transforms.Resize(20), - transforms.CenterCrop(10) - ] - ) - img = transforms.ToPILImage()(torch.rand(3, 25, 25)) - num_samples = 250 - num_normal_order = 0 - resize_crop_out = transforms.CenterCrop(10)(transforms.Resize(20)(img)) - for _ in range(num_samples): - out = random_order_transform(img) - if out == resize_crop_out: - num_normal_order += 1 - - p_value = stats.binom_test(num_normal_order, num_samples, p=0.5) - random.setstate(random_state) - self.assertGreater(p_value, 0.0001) - - # Checking if RandomOrder can be printed as string - random_order_transform.__repr__() - - def test_to_tensor(self): - test_channels = [1, 3, 4] - height, width = 4, 4 - trans = transforms.ToTensor() - - with self.assertRaises(TypeError): - trans(np.random.rand(1, height, width).tolist()) - - with self.assertRaises(ValueError): - trans(np.random.rand(height)) - trans(np.random.rand(1, 1, height, width)) - - for channels in test_channels: - input_data = torch.ByteTensor(channels, height, width).random_(0, 255).float().div_(255) - img = transforms.ToPILImage()(input_data) - output = trans(img) - self.assertTrue(np.allclose(input_data.numpy(), output.numpy())) - - ndarray = np.random.randint(low=0, high=255, size=(height, width, channels)).astype(np.uint8) - output = trans(ndarray) - expected_output = ndarray.transpose((2, 0, 1)) / 255.0 - self.assertTrue(np.allclose(output.numpy(), expected_output)) - - ndarray = np.random.rand(height, width, channels).astype(np.float32) - output = trans(ndarray) - expected_output = ndarray.transpose((2, 0, 1)) - self.assertTrue(np.allclose(output.numpy(), expected_output)) - - # separate test for mode '1' PIL images - input_data = torch.ByteTensor(1, height, width).bernoulli_() - img = transforms.ToPILImage()(input_data.mul(255)).convert('1') - output = trans(img) - self.assertTrue(np.allclose(input_data.numpy(), output.numpy())) - - @unittest.skipIf(accimage is None, 'accimage not available') - def test_accimage_to_tensor(self): - trans = transforms.ToTensor() - - expected_output = trans(Image.open(GRACE_HOPPER).convert('RGB')) - output = trans(accimage.Image(GRACE_HOPPER)) - - self.assertEqual(expected_output.size(), output.size()) - self.assertTrue(np.allclose(output.numpy(), expected_output.numpy())) - - @unittest.skipIf(accimage is None, 'accimage not available') - def test_accimage_resize(self): - trans = transforms.Compose([ - transforms.Resize(256, interpolation=Image.LINEAR), - transforms.ToTensor(), - ]) + def test_pad_with_mode_F_images(self): + pad = 2 + transform = transforms.Pad(pad) + + img = Image.new("F", (10, 10)) + padded_img = transform(img) + assert_equal(padded_img.size, [edge_size + 2 * pad for edge_size in img.size]) + + +@pytest.mark.parametrize( + "fn, trans, kwargs", + [ + (F.invert, transforms.RandomInvert, {}), + (F.posterize, transforms.RandomPosterize, {"bits": 4}), + (F.solarize, transforms.RandomSolarize, {"threshold": 192}), + (F.adjust_sharpness, transforms.RandomAdjustSharpness, {"sharpness_factor": 2.0}), + (F.autocontrast, transforms.RandomAutocontrast, {}), + (F.equalize, transforms.RandomEqualize, {}), + (F.vflip, transforms.RandomVerticalFlip, {}), + (F.hflip, transforms.RandomHorizontalFlip, {}), + (partial(F.to_grayscale, num_output_channels=3), transforms.RandomGrayscale, {}), + ], +) +@pytest.mark.parametrize("seed", range(10)) +@pytest.mark.parametrize("p", (0, 1)) +def test_randomness(fn, trans, kwargs, seed, p): + torch.manual_seed(seed) + img = transforms.ToPILImage()(torch.rand(3, 16, 18)) + + expected_transformed_img = fn(img, **kwargs) + randomly_transformed_img = trans(p=p, **kwargs)(img) + + if p == 0: + assert randomly_transformed_img == img + elif p == 1: + assert randomly_transformed_img == expected_transformed_img + + trans(**kwargs).__repr__() + + +def test_autocontrast_equal_minmax(): + img_tensor = torch.tensor([[[10]], [[128]], [[245]]], dtype=torch.uint8).expand(3, 32, 32) + img_pil = F.to_pil_image(img_tensor) + + img_tensor = F.autocontrast(img_tensor) + img_pil = F.autocontrast(img_pil) + torch.testing.assert_close(img_tensor, F.pil_to_tensor(img_pil)) + + +class TestToPil: + def _get_1_channel_tensor_various_types(): + img_data_float = torch.Tensor(1, 4, 4).uniform_() + expected_output = img_data_float.mul(255).int().float().div(255).numpy() + yield img_data_float, expected_output, "L" - # Checking if Compose, Resize and ToTensor can be printed as string - trans.__repr__() + img_data_byte = torch.ByteTensor(1, 4, 4).random_(0, 255) + expected_output = img_data_byte.float().div(255.0).numpy() + yield img_data_byte, expected_output, "L" - expected_output = trans(Image.open(GRACE_HOPPER).convert('RGB')) - output = trans(accimage.Image(GRACE_HOPPER)) + img_data_short = torch.ShortTensor(1, 4, 4).random_() + expected_output = img_data_short.numpy() + yield img_data_short, expected_output, "I;16" if sys.byteorder == "little" else "I;16B" - self.assertEqual(expected_output.size(), output.size()) - self.assertLess(np.abs((expected_output - output).mean()), 1e-3) - self.assertLess((expected_output - output).var(), 1e-5) - # note the high absolute tolerance - self.assertTrue(np.allclose(output.numpy(), expected_output.numpy(), atol=5e-2)) + img_data_int = torch.IntTensor(1, 4, 4).random_() + expected_output = img_data_int.numpy() + yield img_data_int, expected_output, "I" - @unittest.skipIf(accimage is None, 'accimage not available') - def test_accimage_crop(self): - trans = transforms.Compose([ - transforms.CenterCrop(256), - transforms.ToTensor(), - ]) + def _get_2d_tensor_various_types(): + img_data_float = torch.Tensor(4, 4).uniform_() + expected_output = img_data_float.mul(255).int().float().div(255).numpy() + yield img_data_float, expected_output, "L" - # Checking if Compose, CenterCrop and ToTensor can be printed as string - trans.__repr__() + img_data_byte = torch.ByteTensor(4, 4).random_(0, 255) + expected_output = img_data_byte.float().div(255.0).numpy() + yield img_data_byte, expected_output, "L" - expected_output = trans(Image.open(GRACE_HOPPER).convert('RGB')) - output = trans(accimage.Image(GRACE_HOPPER)) + img_data_short = torch.ShortTensor(4, 4).random_() + expected_output = img_data_short.numpy() + yield img_data_short, expected_output, "I;16" if sys.byteorder == "little" else "I;16B" - self.assertEqual(expected_output.size(), output.size()) - self.assertTrue(np.allclose(output.numpy(), expected_output.numpy())) + img_data_int = torch.IntTensor(4, 4).random_() + expected_output = img_data_int.numpy() + yield img_data_int, expected_output, "I" - def test_1_channel_tensor_to_pil_image(self): + @pytest.mark.parametrize("with_mode", [False, True]) + @pytest.mark.parametrize("img_data, expected_output, expected_mode", _get_1_channel_tensor_various_types()) + def test_1_channel_tensor_to_pil_image(self, with_mode, img_data, expected_output, expected_mode): + transform = transforms.ToPILImage(mode=expected_mode) if with_mode else transforms.ToPILImage() to_tensor = transforms.ToTensor() - img_data_float = torch.Tensor(1, 4, 4).uniform_() - img_data_byte = torch.ByteTensor(1, 4, 4).random_(0, 255) - img_data_short = torch.ShortTensor(1, 4, 4).random_() - img_data_int = torch.IntTensor(1, 4, 4).random_() + img = transform(img_data) + assert img.mode == expected_mode + torch.testing.assert_close(expected_output, to_tensor(img).numpy()) - inputs = [img_data_float, img_data_byte, img_data_short, img_data_int] - expected_outputs = [img_data_float.mul(255).int().float().div(255).numpy(), - img_data_byte.float().div(255.0).numpy(), - img_data_short.numpy(), - img_data_int.numpy()] - expected_modes = ['L', 'L', 'I;16', 'I'] - - for img_data, expected_output, mode in zip(inputs, expected_outputs, expected_modes): - for transform in [transforms.ToPILImage(), transforms.ToPILImage(mode=mode)]: - img = transform(img_data) - self.assertEqual(img.mode, mode) - self.assertTrue(np.allclose(expected_output, to_tensor(img).numpy())) + def test_1_channel_float_tensor_to_pil_image(self): + img_data = torch.Tensor(1, 4, 4).uniform_() # 'F' mode for torch.FloatTensor - img_F_mode = transforms.ToPILImage(mode='F')(img_data_float) - self.assertEqual(img_F_mode.mode, 'F') - self.assertTrue(np.allclose(np.array(Image.fromarray(img_data_float.squeeze(0).numpy(), mode='F')), - np.array(img_F_mode))) - - def test_1_channel_ndarray_to_pil_image(self): - img_data_float = torch.Tensor(4, 4, 1).uniform_().numpy() - img_data_byte = torch.ByteTensor(4, 4, 1).random_(0, 255).numpy() - img_data_short = torch.ShortTensor(4, 4, 1).random_().numpy() - img_data_int = torch.IntTensor(4, 4, 1).random_().numpy() - - inputs = [img_data_float, img_data_byte, img_data_short, img_data_int] - expected_modes = ['F', 'L', 'I;16', 'I'] - for img_data, mode in zip(inputs, expected_modes): - for transform in [transforms.ToPILImage(), transforms.ToPILImage(mode=mode)]: - img = transform(img_data) - self.assertEqual(img.mode, mode) - self.assertTrue(np.allclose(img_data[:, :, 0], img)) - - def test_2_channel_ndarray_to_pil_image(self): - def verify_img_data(img_data, mode): - if mode is None: - img = transforms.ToPILImage()(img_data) - self.assertEqual(img.mode, 'LA') # default should assume LA - else: - img = transforms.ToPILImage(mode=mode)(img_data) - self.assertEqual(img.mode, mode) - split = img.split() - for i in range(2): - self.assertTrue(np.allclose(img_data[:, :, i], split[i])) + img_F_mode = transforms.ToPILImage(mode="F")(img_data) + assert img_F_mode.mode == "F" + torch.testing.assert_close( + np.array(Image.fromarray(img_data.squeeze(0).numpy(), mode="F")), np.array(img_F_mode) + ) + @pytest.mark.parametrize("with_mode", [False, True]) + @pytest.mark.parametrize( + "img_data, expected_mode", + [ + (torch.Tensor(4, 4, 1).uniform_().numpy(), "L"), + (torch.ByteTensor(4, 4, 1).random_(0, 255).numpy(), "L"), + (torch.ShortTensor(4, 4, 1).random_().numpy(), "I;16" if sys.byteorder == "little" else "I;16B"), + (torch.IntTensor(4, 4, 1).random_().numpy(), "I"), + ], + ) + def test_1_channel_ndarray_to_pil_image(self, with_mode, img_data, expected_mode): + transform = transforms.ToPILImage(mode=expected_mode) if with_mode else transforms.ToPILImage() + img = transform(img_data) + assert img.mode == expected_mode + if np.issubdtype(img_data.dtype, np.floating): + img_data = (img_data * 255).astype(np.uint8) + # note: we explicitly convert img's dtype because pytorch doesn't support uint16 + # and otherwise assert_close wouldn't be able to construct a tensor from the uint16 array + torch.testing.assert_close(img_data[:, :, 0], np.asarray(img).astype(img_data.dtype)) + + @pytest.mark.parametrize("expected_mode", [None, "LA"]) + def test_2_channel_ndarray_to_pil_image(self, expected_mode): img_data = torch.ByteTensor(4, 4, 2).random_(0, 255).numpy() - for mode in [None, 'LA']: - verify_img_data(img_data, mode) + if expected_mode is None: + img = transforms.ToPILImage()(img_data) + assert img.mode == "LA" # default should assume LA + else: + img = transforms.ToPILImage(mode=expected_mode)(img_data) + assert img.mode == expected_mode + split = img.split() + for i in range(2): + torch.testing.assert_close(img_data[:, :, i], np.asarray(split[i])) + + def test_2_channel_ndarray_to_pil_image_error(self): + img_data = torch.ByteTensor(4, 4, 2).random_(0, 255).numpy() transforms.ToPILImage().__repr__() - with self.assertRaises(ValueError): - # should raise if we try a mode for 4 or 1 or 3 channel images - transforms.ToPILImage(mode='RGBA')(img_data) - transforms.ToPILImage(mode='P')(img_data) - transforms.ToPILImage(mode='RGB')(img_data) - - def test_2_channel_tensor_to_pil_image(self): - def verify_img_data(img_data, expected_output, mode): - if mode is None: - img = transforms.ToPILImage()(img_data) - self.assertEqual(img.mode, 'LA') # default should assume LA - else: - img = transforms.ToPILImage(mode=mode)(img_data) - self.assertEqual(img.mode, mode) - split = img.split() - for i in range(2): - self.assertTrue(np.allclose(expected_output[i].numpy(), F.to_tensor(split[i]).numpy())) + # should raise if we try a mode for 4 or 1 or 3 channel images + with pytest.raises(ValueError, match=r"Only modes \['LA'\] are supported for 2D inputs"): + transforms.ToPILImage(mode="RGBA")(img_data) + with pytest.raises(ValueError, match=r"Only modes \['LA'\] are supported for 2D inputs"): + transforms.ToPILImage(mode="P")(img_data) + with pytest.raises(ValueError, match=r"Only modes \['LA'\] are supported for 2D inputs"): + transforms.ToPILImage(mode="RGB")(img_data) + @pytest.mark.parametrize("expected_mode", [None, "LA"]) + def test_2_channel_tensor_to_pil_image(self, expected_mode): img_data = torch.Tensor(2, 4, 4).uniform_() expected_output = img_data.mul(255).int().float().div(255) - for mode in [None, 'LA']: - verify_img_data(img_data, expected_output, mode=mode) - - with self.assertRaises(ValueError): - # should raise if we try a mode for 4 or 1 or 3 channel images - transforms.ToPILImage(mode='RGBA')(img_data) - transforms.ToPILImage(mode='P')(img_data) - transforms.ToPILImage(mode='RGB')(img_data) - - def test_3_channel_tensor_to_pil_image(self): - def verify_img_data(img_data, expected_output, mode): - if mode is None: - img = transforms.ToPILImage()(img_data) - self.assertEqual(img.mode, 'RGB') # default should assume RGB - else: - img = transforms.ToPILImage(mode=mode)(img_data) - self.assertEqual(img.mode, mode) - split = img.split() - for i in range(3): - self.assertTrue(np.allclose(expected_output[i].numpy(), F.to_tensor(split[i]).numpy())) + if expected_mode is None: + img = transforms.ToPILImage()(img_data) + assert img.mode == "LA" # default should assume LA + else: + img = transforms.ToPILImage(mode=expected_mode)(img_data) + assert img.mode == expected_mode + + split = img.split() + for i in range(2): + torch.testing.assert_close(expected_output[i].numpy(), F.to_tensor(split[i]).squeeze(0).numpy()) + + def test_2_channel_tensor_to_pil_image_error(self): + img_data = torch.Tensor(2, 4, 4).uniform_() + + # should raise if we try a mode for 4 or 1 or 3 channel images + with pytest.raises(ValueError, match=r"Only modes \['LA'\] are supported for 2D inputs"): + transforms.ToPILImage(mode="RGBA")(img_data) + with pytest.raises(ValueError, match=r"Only modes \['LA'\] are supported for 2D inputs"): + transforms.ToPILImage(mode="P")(img_data) + with pytest.raises(ValueError, match=r"Only modes \['LA'\] are supported for 2D inputs"): + transforms.ToPILImage(mode="RGB")(img_data) + + @pytest.mark.parametrize("with_mode", [False, True]) + @pytest.mark.parametrize("img_data, expected_output, expected_mode", _get_2d_tensor_various_types()) + def test_2d_tensor_to_pil_image(self, with_mode, img_data, expected_output, expected_mode): + transform = transforms.ToPILImage(mode=expected_mode) if with_mode else transforms.ToPILImage() + to_tensor = transforms.ToTensor() + img = transform(img_data) + assert img.mode == expected_mode + torch.testing.assert_close(expected_output, to_tensor(img).numpy()[0]) + + @pytest.mark.parametrize("with_mode", [False, True]) + @pytest.mark.parametrize( + "img_data, expected_mode", + [ + (torch.Tensor(4, 4).uniform_().numpy(), "L"), + (torch.ByteTensor(4, 4).random_(0, 255).numpy(), "L"), + (torch.ShortTensor(4, 4).random_().numpy(), "I;16" if sys.byteorder == "little" else "I;16B"), + (torch.IntTensor(4, 4).random_().numpy(), "I"), + ], + ) + def test_2d_ndarray_to_pil_image(self, with_mode, img_data, expected_mode): + transform = transforms.ToPILImage(mode=expected_mode) if with_mode else transforms.ToPILImage() + img = transform(img_data) + assert img.mode == expected_mode + if np.issubdtype(img_data.dtype, np.floating): + img_data = (img_data * 255).astype(np.uint8) + np.testing.assert_allclose(img_data, img) + + @pytest.mark.parametrize("expected_mode", [None, "RGB", "HSV", "YCbCr"]) + def test_3_channel_tensor_to_pil_image(self, expected_mode): img_data = torch.Tensor(3, 4, 4).uniform_() expected_output = img_data.mul(255).int().float().div(255) - for mode in [None, 'RGB', 'HSV', 'YCbCr']: - verify_img_data(img_data, expected_output, mode=mode) - - with self.assertRaises(ValueError): - # should raise if we try a mode for 4 or 1 or 2 channel images - transforms.ToPILImage(mode='RGBA')(img_data) - transforms.ToPILImage(mode='P')(img_data) - transforms.ToPILImage(mode='LA')(img_data) - with self.assertRaises(ValueError): + if expected_mode is None: + img = transforms.ToPILImage()(img_data) + assert img.mode == "RGB" # default should assume RGB + else: + img = transforms.ToPILImage(mode=expected_mode)(img_data) + assert img.mode == expected_mode + split = img.split() + for i in range(3): + torch.testing.assert_close(expected_output[i].numpy(), F.to_tensor(split[i]).squeeze(0).numpy()) + + def test_3_channel_tensor_to_pil_image_error(self): + img_data = torch.Tensor(3, 4, 4).uniform_() + error_message_3d = r"Only modes \['RGB', 'YCbCr', 'HSV'\] are supported for 3D inputs" + # should raise if we try a mode for 4 or 1 or 2 channel images + with pytest.raises(ValueError, match=error_message_3d): + transforms.ToPILImage(mode="RGBA")(img_data) + with pytest.raises(ValueError, match=error_message_3d): + transforms.ToPILImage(mode="P")(img_data) + with pytest.raises(ValueError, match=error_message_3d): + transforms.ToPILImage(mode="LA")(img_data) + + with pytest.raises(ValueError, match=r"pic should be 2/3 dimensional. Got \d+ dimensions."): transforms.ToPILImage()(torch.Tensor(1, 3, 4, 4).uniform_()) - def test_3_channel_ndarray_to_pil_image(self): - def verify_img_data(img_data, mode): - if mode is None: - img = transforms.ToPILImage()(img_data) - self.assertEqual(img.mode, 'RGB') # default should assume RGB - else: - img = transforms.ToPILImage(mode=mode)(img_data) - self.assertEqual(img.mode, mode) - split = img.split() - for i in range(3): - self.assertTrue(np.allclose(img_data[:, :, i], split[i])) + @pytest.mark.parametrize("expected_mode", [None, "RGB", "HSV", "YCbCr"]) + def test_3_channel_ndarray_to_pil_image(self, expected_mode): + img_data = torch.ByteTensor(4, 4, 3).random_(0, 255).numpy() + if expected_mode is None: + img = transforms.ToPILImage()(img_data) + assert img.mode == "RGB" # default should assume RGB + else: + img = transforms.ToPILImage(mode=expected_mode)(img_data) + assert img.mode == expected_mode + split = img.split() + for i in range(3): + torch.testing.assert_close(img_data[:, :, i], np.asarray(split[i])) + + def test_3_channel_ndarray_to_pil_image_error(self): img_data = torch.ByteTensor(4, 4, 3).random_(0, 255).numpy() - for mode in [None, 'RGB', 'HSV', 'YCbCr']: - verify_img_data(img_data, mode) # Checking if ToPILImage can be printed as string transforms.ToPILImage().__repr__() - with self.assertRaises(ValueError): - # should raise if we try a mode for 4 or 1 or 2 channel images - transforms.ToPILImage(mode='RGBA')(img_data) - transforms.ToPILImage(mode='P')(img_data) - transforms.ToPILImage(mode='LA')(img_data) - - def test_4_channel_tensor_to_pil_image(self): - def verify_img_data(img_data, expected_output, mode): - if mode is None: - img = transforms.ToPILImage()(img_data) - self.assertEqual(img.mode, 'RGBA') # default should assume RGBA - else: - img = transforms.ToPILImage(mode=mode)(img_data) - self.assertEqual(img.mode, mode) - - split = img.split() - for i in range(4): - self.assertTrue(np.allclose(expected_output[i].numpy(), F.to_tensor(split[i]).numpy())) - + error_message_3d = r"Only modes \['RGB', 'YCbCr', 'HSV'\] are supported for 3D inputs" + # should raise if we try a mode for 4 or 1 or 2 channel images + with pytest.raises(ValueError, match=error_message_3d): + transforms.ToPILImage(mode="RGBA")(img_data) + with pytest.raises(ValueError, match=error_message_3d): + transforms.ToPILImage(mode="P")(img_data) + with pytest.raises(ValueError, match=error_message_3d): + transforms.ToPILImage(mode="LA")(img_data) + + @pytest.mark.parametrize("expected_mode", [None, "RGBA", "CMYK", "RGBX"]) + def test_4_channel_tensor_to_pil_image(self, expected_mode): img_data = torch.Tensor(4, 4, 4).uniform_() expected_output = img_data.mul(255).int().float().div(255) - for mode in [None, 'RGBA', 'CMYK', 'RGBX']: - verify_img_data(img_data, expected_output, mode) - - with self.assertRaises(ValueError): - # should raise if we try a mode for 3 or 1 or 2 channel images - transforms.ToPILImage(mode='RGB')(img_data) - transforms.ToPILImage(mode='P')(img_data) - transforms.ToPILImage(mode='LA')(img_data) - - def test_4_channel_ndarray_to_pil_image(self): - def verify_img_data(img_data, mode): - if mode is None: - img = transforms.ToPILImage()(img_data) - self.assertEqual(img.mode, 'RGBA') # default should assume RGBA - else: - img = transforms.ToPILImage(mode=mode)(img_data) - self.assertEqual(img.mode, mode) - split = img.split() - for i in range(4): - self.assertTrue(np.allclose(img_data[:, :, i], split[i])) - img_data = torch.ByteTensor(4, 4, 4).random_(0, 255).numpy() - for mode in [None, 'RGBA', 'CMYK', 'RGBX']: - verify_img_data(img_data, mode) + if expected_mode is None: + img = transforms.ToPILImage()(img_data) + assert img.mode == "RGBA" # default should assume RGBA + else: + img = transforms.ToPILImage(mode=expected_mode)(img_data) + assert img.mode == expected_mode - with self.assertRaises(ValueError): - # should raise if we try a mode for 3 or 1 or 2 channel images - transforms.ToPILImage(mode='RGB')(img_data) - transforms.ToPILImage(mode='P')(img_data) - transforms.ToPILImage(mode='LA')(img_data) + split = img.split() + for i in range(4): + torch.testing.assert_close(expected_output[i].numpy(), F.to_tensor(split[i]).squeeze(0).numpy()) - def test_2d_tensor_to_pil_image(self): - to_tensor = transforms.ToTensor() + def test_4_channel_tensor_to_pil_image_error(self): + img_data = torch.Tensor(4, 4, 4).uniform_() - img_data_float = torch.Tensor(4, 4).uniform_() - img_data_byte = torch.ByteTensor(4, 4).random_(0, 255) - img_data_short = torch.ShortTensor(4, 4).random_() - img_data_int = torch.IntTensor(4, 4).random_() + error_message_4d = r"Only modes \['RGBA', 'CMYK', 'RGBX'\] are supported for 4D inputs" + # should raise if we try a mode for 3 or 1 or 2 channel images + with pytest.raises(ValueError, match=error_message_4d): + transforms.ToPILImage(mode="RGB")(img_data) + with pytest.raises(ValueError, match=error_message_4d): + transforms.ToPILImage(mode="P")(img_data) + with pytest.raises(ValueError, match=error_message_4d): + transforms.ToPILImage(mode="LA")(img_data) + + @pytest.mark.parametrize("expected_mode", [None, "RGBA", "CMYK", "RGBX"]) + def test_4_channel_ndarray_to_pil_image(self, expected_mode): + img_data = torch.ByteTensor(4, 4, 4).random_(0, 255).numpy() - inputs = [img_data_float, img_data_byte, img_data_short, img_data_int] - expected_outputs = [img_data_float.mul(255).int().float().div(255).numpy(), - img_data_byte.float().div(255.0).numpy(), - img_data_short.numpy(), - img_data_int.numpy()] - expected_modes = ['L', 'L', 'I;16', 'I'] - - for img_data, expected_output, mode in zip(inputs, expected_outputs, expected_modes): - for transform in [transforms.ToPILImage(), transforms.ToPILImage(mode=mode)]: - img = transform(img_data) - self.assertEqual(img.mode, mode) - self.assertTrue(np.allclose(expected_output, to_tensor(img).numpy())) - - def test_2d_ndarray_to_pil_image(self): - img_data_float = torch.Tensor(4, 4).uniform_().numpy() - img_data_byte = torch.ByteTensor(4, 4).random_(0, 255).numpy() - img_data_short = torch.ShortTensor(4, 4).random_().numpy() - img_data_int = torch.IntTensor(4, 4).random_().numpy() - - inputs = [img_data_float, img_data_byte, img_data_short, img_data_int] - expected_modes = ['F', 'L', 'I;16', 'I'] - for img_data, mode in zip(inputs, expected_modes): - for transform in [transforms.ToPILImage(), transforms.ToPILImage(mode=mode)]: - img = transform(img_data) - self.assertEqual(img.mode, mode) - self.assertTrue(np.allclose(img_data, img)) + if expected_mode is None: + img = transforms.ToPILImage()(img_data) + assert img.mode == "RGBA" # default should assume RGBA + else: + img = transforms.ToPILImage(mode=expected_mode)(img_data) + assert img.mode == expected_mode + split = img.split() + for i in range(4): + torch.testing.assert_close(img_data[:, :, i], np.asarray(split[i])) + + def test_4_channel_ndarray_to_pil_image_error(self): + img_data = torch.ByteTensor(4, 4, 4).random_(0, 255).numpy() - def test_tensor_bad_types_to_pil_image(self): - with self.assertRaises(ValueError): - transforms.ToPILImage()(torch.ones(1, 3, 4, 4)) + error_message_4d = r"Only modes \['RGBA', 'CMYK', 'RGBX'\] are supported for 4D inputs" + # should raise if we try a mode for 3 or 1 or 2 channel images + with pytest.raises(ValueError, match=error_message_4d): + transforms.ToPILImage(mode="RGB")(img_data) + with pytest.raises(ValueError, match=error_message_4d): + transforms.ToPILImage(mode="P")(img_data) + with pytest.raises(ValueError, match=error_message_4d): + transforms.ToPILImage(mode="LA")(img_data) def test_ndarray_bad_types_to_pil_image(self): trans = transforms.ToPILImage() - with self.assertRaises(TypeError): + reg_msg = r"Input type \w+ is not supported" + with pytest.raises(TypeError, match=reg_msg): trans(np.ones([4, 4, 1], np.int64)) + with pytest.raises(TypeError, match=reg_msg): trans(np.ones([4, 4, 1], np.uint16)) + with pytest.raises(TypeError, match=reg_msg): trans(np.ones([4, 4, 1], np.uint32)) - trans(np.ones([4, 4, 1], np.float64)) - with self.assertRaises(ValueError): + with pytest.raises(ValueError, match=r"pic should be 2/3 dimensional. Got \d+ dimensions."): transforms.ToPILImage()(np.ones([1, 4, 4, 3])) + with pytest.raises(ValueError, match=r"pic should not have > 4 channels. Got \d+ channels."): + transforms.ToPILImage()(np.ones([4, 4, 6])) + + def test_tensor_bad_types_to_pil_image(self): + with pytest.raises(ValueError, match=r"pic should be 2/3 dimensional. Got \d+ dimensions."): + transforms.ToPILImage()(torch.ones(1, 3, 4, 4)) + with pytest.raises(ValueError, match=r"pic should not have > 4 channels. Got \d+ channels."): + transforms.ToPILImage()(torch.ones(6, 4, 4)) + + +def test_adjust_brightness(): + x_shape = [2, 2, 3] + x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1] + x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape) + x_pil = Image.fromarray(x_np, mode="RGB") + + # test 0 + y_pil = F.adjust_brightness(x_pil, 1) + y_np = np.array(y_pil) + torch.testing.assert_close(y_np, x_np) + + # test 1 + y_pil = F.adjust_brightness(x_pil, 0.5) + y_np = np.array(y_pil) + y_ans = [0, 2, 6, 27, 67, 113, 18, 4, 117, 45, 127, 0] + y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) + torch.testing.assert_close(y_np, y_ans) + + # test 2 + y_pil = F.adjust_brightness(x_pil, 2) + y_np = np.array(y_pil) + y_ans = [0, 10, 26, 108, 255, 255, 74, 16, 255, 180, 255, 2] + y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) + torch.testing.assert_close(y_np, y_ans) + + +def test_adjust_contrast(): + x_shape = [2, 2, 3] + x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1] + x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape) + x_pil = Image.fromarray(x_np, mode="RGB") + + # test 0 + y_pil = F.adjust_contrast(x_pil, 1) + y_np = np.array(y_pil) + torch.testing.assert_close(y_np, x_np) + + # test 1 + y_pil = F.adjust_contrast(x_pil, 0.5) + y_np = np.array(y_pil) + y_ans = [43, 45, 49, 70, 110, 156, 61, 47, 160, 88, 170, 43] + y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) + torch.testing.assert_close(y_np, y_ans) + + # test 2 + y_pil = F.adjust_contrast(x_pil, 2) + y_np = np.array(y_pil) + y_ans = [0, 0, 0, 22, 184, 255, 0, 0, 255, 94, 255, 0] + y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) + torch.testing.assert_close(y_np, y_ans) + + +def test_adjust_hue(): + x_shape = [2, 2, 3] + x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1] + x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape) + x_pil = Image.fromarray(x_np, mode="RGB") + + with pytest.raises(ValueError): + F.adjust_hue(x_pil, -0.7) + F.adjust_hue(x_pil, 1) + + # test 0: almost same as x_data but not exact. + # probably because hsv <-> rgb floating point ops + y_pil = F.adjust_hue(x_pil, 0) + y_np = np.array(y_pil) + y_ans = [0, 5, 13, 54, 139, 226, 35, 8, 234, 91, 255, 1] + y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) + torch.testing.assert_close(y_np, y_ans) + + # test 1 + y_pil = F.adjust_hue(x_pil, 0.25) + y_np = np.array(y_pil) + y_ans = [13, 0, 12, 224, 54, 226, 234, 8, 99, 1, 222, 255] + y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) + torch.testing.assert_close(y_np, y_ans) + + # test 2 + y_pil = F.adjust_hue(x_pil, -0.25) + y_np = np.array(y_pil) + y_ans = [0, 13, 2, 54, 226, 58, 8, 234, 152, 255, 43, 1] + y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) + torch.testing.assert_close(y_np, y_ans) + + +def test_adjust_sharpness(): + x_shape = [4, 4, 3] + x_data = [ + 75, + 121, + 114, + 105, + 97, + 107, + 105, + 32, + 66, + 111, + 117, + 114, + 99, + 104, + 97, + 0, + 0, + 65, + 108, + 101, + 120, + 97, + 110, + 100, + 101, + 114, + 32, + 86, + 114, + 121, + 110, + 105, + 111, + 116, + 105, + 115, + 0, + 0, + 73, + 32, + 108, + 111, + 118, + 101, + 32, + 121, + 111, + 117, + ] + x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape) + x_pil = Image.fromarray(x_np, mode="RGB") + + # test 0 + y_pil = F.adjust_sharpness(x_pil, 1) + y_np = np.array(y_pil) + torch.testing.assert_close(y_np, x_np) + + # test 1 + y_pil = F.adjust_sharpness(x_pil, 0.5) + y_np = np.array(y_pil) + y_ans = [ + 75, + 121, + 114, + 105, + 97, + 107, + 105, + 32, + 66, + 111, + 117, + 114, + 99, + 104, + 97, + 30, + 30, + 74, + 103, + 96, + 114, + 97, + 110, + 100, + 101, + 114, + 32, + 81, + 103, + 108, + 102, + 101, + 107, + 116, + 105, + 115, + 0, + 0, + 73, + 32, + 108, + 111, + 118, + 101, + 32, + 121, + 111, + 117, + ] + y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) + torch.testing.assert_close(y_np, y_ans) + + # test 2 + y_pil = F.adjust_sharpness(x_pil, 2) + y_np = np.array(y_pil) + y_ans = [ + 75, + 121, + 114, + 105, + 97, + 107, + 105, + 32, + 66, + 111, + 117, + 114, + 99, + 104, + 97, + 0, + 0, + 46, + 118, + 111, + 132, + 97, + 110, + 100, + 101, + 114, + 32, + 95, + 135, + 146, + 126, + 112, + 119, + 116, + 105, + 115, + 0, + 0, + 73, + 32, + 108, + 111, + 118, + 101, + 32, + 121, + 111, + 117, + ] + y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) + torch.testing.assert_close(y_np, y_ans) + + # test 3 + x_shape = [2, 2, 3] + x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1] + x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape) + x_pil = Image.fromarray(x_np, mode="RGB") + x_th = torch.tensor(x_np.transpose(2, 0, 1)) + y_pil = F.adjust_sharpness(x_pil, 2) + y_np = np.array(y_pil).transpose(2, 0, 1) + y_th = F.adjust_sharpness(x_th, 2) + torch.testing.assert_close(y_np, y_th.numpy()) + + +def test_adjust_gamma(): + x_shape = [2, 2, 3] + x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1] + x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape) + x_pil = Image.fromarray(x_np, mode="RGB") + + # test 0 + y_pil = F.adjust_gamma(x_pil, 1) + y_np = np.array(y_pil) + torch.testing.assert_close(y_np, x_np) + + # test 1 + y_pil = F.adjust_gamma(x_pil, 0.5) + y_np = np.array(y_pil) + y_ans = [0, 35, 57, 117, 186, 241, 97, 45, 245, 152, 255, 16] + y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) + torch.testing.assert_close(y_np, y_ans) + + # test 2 + y_pil = F.adjust_gamma(x_pil, 2) + y_np = np.array(y_pil) + y_ans = [0, 0, 0, 11, 71, 201, 5, 0, 215, 31, 255, 0] + y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) + torch.testing.assert_close(y_np, y_ans) + + +def test_adjusts_L_mode(): + x_shape = [2, 2, 3] + x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1] + x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape) + x_rgb = Image.fromarray(x_np, mode="RGB") + + x_l = x_rgb.convert("L") + assert F.adjust_brightness(x_l, 2).mode == "L" + assert F.adjust_saturation(x_l, 2).mode == "L" + assert F.adjust_contrast(x_l, 2).mode == "L" + assert F.adjust_hue(x_l, 0.4).mode == "L" + assert F.adjust_sharpness(x_l, 2).mode == "L" + assert F.adjust_gamma(x_l, 0.5).mode == "L" + + +def test_rotate(): + x = np.zeros((100, 100, 3), dtype=np.uint8) + x[40, 40] = [255, 255, 255] + + with pytest.raises(TypeError, match=r"img should be PIL Image"): + F.rotate(x, 10) + + img = F.to_pil_image(x) + + result = F.rotate(img, 45) + assert result.size == (100, 100) + r, c, ch = np.where(result) + assert all(x in r for x in [49, 50]) + assert all(x in c for x in [36]) + assert all(x in ch for x in [0, 1, 2]) + + result = F.rotate(img, 45, expand=True) + assert result.size == (142, 142) + r, c, ch = np.where(result) + assert all(x in r for x in [70, 71]) + assert all(x in c for x in [57]) + assert all(x in ch for x in [0, 1, 2]) + + result = F.rotate(img, 45, center=(40, 40)) + assert result.size == (100, 100) + r, c, ch = np.where(result) + assert all(x in r for x in [40]) + assert all(x in c for x in [40]) + assert all(x in ch for x in [0, 1, 2]) + + result_a = F.rotate(img, 90) + result_b = F.rotate(img, -270) + + assert_equal(np.array(result_a), np.array(result_b)) + + +@pytest.mark.parametrize("mode", ["L", "RGB", "F"]) +def test_rotate_fill(mode): + img = F.to_pil_image(np.ones((100, 100, 3), dtype=np.uint8) * 255, "RGB") + + num_bands = len(mode) + wrong_num_bands = num_bands + 1 + fill = 127 + + img_conv = img.convert(mode) + img_rot = F.rotate(img_conv, 45.0, fill=fill) + pixel = img_rot.getpixel((0, 0)) + + if not isinstance(pixel, tuple): + pixel = (pixel,) + assert pixel == tuple([fill] * num_bands) + + with pytest.raises(ValueError): + F.rotate(img_conv, 45.0, fill=tuple([fill] * wrong_num_bands)) + + +def test_gaussian_blur_asserts(): + np_img = np.ones((100, 100, 3), dtype=np.uint8) * 255 + img = F.to_pil_image(np_img, "RGB") + + with pytest.raises(ValueError, match=r"If kernel_size is a sequence its length should be 2"): + F.gaussian_blur(img, [3]) + with pytest.raises(ValueError, match=r"If kernel_size is a sequence its length should be 2"): + F.gaussian_blur(img, [3, 3, 3]) + with pytest.raises(ValueError, match=r"Kernel size should be a tuple/list of two integers"): + transforms.GaussianBlur([3, 3, 3]) + + with pytest.raises(ValueError, match=r"kernel_size should have odd and positive integers"): + F.gaussian_blur(img, [4, 4]) + with pytest.raises(ValueError, match=r"Kernel size value should be an odd and positive number"): + transforms.GaussianBlur([4, 4]) + + with pytest.raises(ValueError, match=r"kernel_size should have odd and positive integers"): + F.gaussian_blur(img, [-3, -3]) + with pytest.raises(ValueError, match=r"Kernel size value should be an odd and positive number"): + transforms.GaussianBlur([-3, -3]) + + with pytest.raises(ValueError, match=r"If sigma is a sequence, its length should be 2"): + F.gaussian_blur(img, 3, [1, 1, 1]) + with pytest.raises(ValueError, match=r"sigma should be a single number or a list/tuple with length 2"): + transforms.GaussianBlur(3, [1, 1, 1]) + + with pytest.raises(ValueError, match=r"sigma should have positive values"): + F.gaussian_blur(img, 3, -1.0) + with pytest.raises(ValueError, match=r"If sigma is a single number, it must be positive"): + transforms.GaussianBlur(3, -1.0) + + with pytest.raises(TypeError, match=r"kernel_size should be int or a sequence of integers"): + F.gaussian_blur(img, "kernel_size_string") + with pytest.raises(ValueError, match=r"Kernel size should be a tuple/list of two integers"): + transforms.GaussianBlur("kernel_size_string") + + with pytest.raises(TypeError, match=r"sigma should be either float or sequence of floats"): + F.gaussian_blur(img, 3, "sigma_string") + with pytest.raises(ValueError, match=r"sigma should be a single number or a list/tuple with length 2"): + transforms.GaussianBlur(3, "sigma_string") + + +def test_lambda(): + trans = transforms.Lambda(lambda x: x.add(10)) + x = torch.randn(10) + y = trans(x) + assert_equal(y, torch.add(x, 10)) + + trans = transforms.Lambda(lambda x: x.add_(10)) + x = torch.randn(10) + y = trans(x) + assert_equal(y, x) + + # Checking if Lambda can be printed as string + trans.__repr__() + + +def test_to_grayscale(): + """Unit tests for grayscale transform""" + + x_shape = [2, 2, 3] + x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1] + x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape) + x_pil = Image.fromarray(x_np, mode="RGB") + x_pil_2 = x_pil.convert("L") + gray_np = np.array(x_pil_2) + + # Test Set: Grayscale an image with desired number of output channels + # Case 1: RGB -> 1 channel grayscale + trans1 = transforms.Grayscale(num_output_channels=1) + gray_pil_1 = trans1(x_pil) + gray_np_1 = np.array(gray_pil_1) + assert gray_pil_1.mode == "L", "mode should be L" + assert gray_np_1.shape == tuple(x_shape[0:2]), "should be 1 channel" + assert_equal(gray_np, gray_np_1) + + # Case 2: RGB -> 3 channel grayscale + trans2 = transforms.Grayscale(num_output_channels=3) + gray_pil_2 = trans2(x_pil) + gray_np_2 = np.array(gray_pil_2) + assert gray_pil_2.mode == "RGB", "mode should be RGB" + assert gray_np_2.shape == tuple(x_shape), "should be 3 channel" + assert_equal(gray_np_2[:, :, 0], gray_np_2[:, :, 1]) + assert_equal(gray_np_2[:, :, 1], gray_np_2[:, :, 2]) + assert_equal(gray_np, gray_np_2[:, :, 0]) + + # Case 3: 1 channel grayscale -> 1 channel grayscale + trans3 = transforms.Grayscale(num_output_channels=1) + gray_pil_3 = trans3(x_pil_2) + gray_np_3 = np.array(gray_pil_3) + assert gray_pil_3.mode == "L", "mode should be L" + assert gray_np_3.shape == tuple(x_shape[0:2]), "should be 1 channel" + assert_equal(gray_np, gray_np_3) + + # Case 4: 1 channel grayscale -> 3 channel grayscale + trans4 = transforms.Grayscale(num_output_channels=3) + gray_pil_4 = trans4(x_pil_2) + gray_np_4 = np.array(gray_pil_4) + assert gray_pil_4.mode == "RGB", "mode should be RGB" + assert gray_np_4.shape == tuple(x_shape), "should be 3 channel" + assert_equal(gray_np_4[:, :, 0], gray_np_4[:, :, 1]) + assert_equal(gray_np_4[:, :, 1], gray_np_4[:, :, 2]) + assert_equal(gray_np, gray_np_4[:, :, 0]) + + # Checking if Grayscale can be printed as string + trans4.__repr__() + + +@pytest.mark.parametrize("seed", range(10)) +@pytest.mark.parametrize("p", (0, 1)) +def test_random_apply(p, seed): + torch.manual_seed(seed) + random_apply_transform = transforms.RandomApply([transforms.RandomRotation((45, 50))], p=p) + img = transforms.ToPILImage()(torch.rand(3, 30, 40)) + out = random_apply_transform(img) + if p == 0: + assert out == img + elif p == 1: + assert out != img + + # Checking if RandomApply can be printed as string + random_apply_transform.__repr__() + + +@pytest.mark.parametrize("seed", range(10)) +@pytest.mark.parametrize("proba_passthrough", (0, 1)) +def test_random_choice(proba_passthrough, seed): + random.seed(seed) # RandomChoice relies on python builtin random.choice, not pytorch + + random_choice_transform = transforms.RandomChoice( + [ + lambda x: x, # passthrough + transforms.RandomRotation((45, 50)), + ], + p=[proba_passthrough, 1 - proba_passthrough], + ) + + img = transforms.ToPILImage()(torch.rand(3, 30, 40)) + out = random_choice_transform(img) + if proba_passthrough == 1: + assert out == img + elif proba_passthrough == 0: + assert out != img + + # Checking if RandomChoice can be printed as string + random_choice_transform.__repr__() + + +@pytest.mark.skipif(stats is None, reason="scipy.stats not available") +def test_random_order(): + random_state = random.getstate() + random.seed(42) + random_order_transform = transforms.RandomOrder([transforms.Resize(20, antialias=True), transforms.CenterCrop(10)]) + img = transforms.ToPILImage()(torch.rand(3, 25, 25)) + num_samples = 250 + num_normal_order = 0 + resize_crop_out = transforms.CenterCrop(10)(transforms.Resize(20, antialias=True)(img)) + for _ in range(num_samples): + out = random_order_transform(img) + if out == resize_crop_out: + num_normal_order += 1 + + p_value = stats.binomtest(num_normal_order, num_samples, p=0.5).pvalue + random.setstate(random_state) + assert p_value > 0.0001 + + # Checking if RandomOrder can be printed as string + random_order_transform.__repr__() + + +def test_linear_transformation(): + num_samples = 1000 + x = torch.randn(num_samples, 3, 10, 10) + flat_x = x.view(x.size(0), x.size(1) * x.size(2) * x.size(3)) + # compute principal components + sigma = torch.mm(flat_x.t(), flat_x) / flat_x.size(0) + u, s, _ = np.linalg.svd(sigma.numpy()) + zca_epsilon = 1e-10 # avoid division by 0 + d = torch.Tensor(np.diag(1.0 / np.sqrt(s + zca_epsilon))) + u = torch.Tensor(u) + principal_components = torch.mm(torch.mm(u, d), u.t()) + mean_vector = torch.sum(flat_x, dim=0) / flat_x.size(0) + # initialize whitening matrix + whitening = transforms.LinearTransformation(principal_components, mean_vector) + # estimate covariance and mean using weak law of large number + num_features = flat_x.size(1) + cov = 0.0 + mean = 0.0 + for i in x: + xwhite = whitening(i) + xwhite = xwhite.view(1, -1).numpy() + cov += np.dot(xwhite, xwhite.T) / num_features + mean += np.sum(xwhite) / num_features + # if rtol for std = 1e-3 then rtol for cov = 2e-3 as std**2 = cov + torch.testing.assert_close( + cov / num_samples, np.identity(1), rtol=2e-3, atol=1e-8, check_dtype=False, msg="cov not close to 1" + ) + torch.testing.assert_close( + mean / num_samples, 0, rtol=1e-3, atol=1e-8, check_dtype=False, msg="mean not close to 0" + ) + + # Checking if LinearTransformation can be printed as string + whitening.__repr__() + + +@pytest.mark.parametrize("dtype", int_dtypes()) +def test_max_value(dtype): + + assert F_t._max_value(dtype) == torch.iinfo(dtype).max + # remove float testing as it can lead to errors such as + # runtime error: 5.7896e+76 is outside the range of representable values of type 'float' + # for dtype in float_dtypes(): + # self.assertGreater(F_t._max_value(dtype), torch.finfo(dtype).max) + + +@pytest.mark.xfail( + reason="torch.iinfo() is not supported by torchscript. See https://github.com/pytorch/pytorch/issues/41492." +) +def test_max_value_iinfo(): + @torch.jit.script + def max_value(image: torch.Tensor) -> int: + return 1 if image.is_floating_point() else torch.iinfo(image.dtype).max + + +@pytest.mark.parametrize("should_vflip", [True, False]) +@pytest.mark.parametrize("single_dim", [True, False]) +def test_ten_crop(should_vflip, single_dim): + to_pil_image = transforms.ToPILImage() + h = random.randint(5, 25) + w = random.randint(5, 25) + crop_h = random.randint(1, h) + crop_w = random.randint(1, w) + if single_dim: + crop_h = min(crop_h, crop_w) + crop_w = crop_h + transform = transforms.TenCrop(crop_h, vertical_flip=should_vflip) + five_crop = transforms.FiveCrop(crop_h) + else: + transform = transforms.TenCrop((crop_h, crop_w), vertical_flip=should_vflip) + five_crop = transforms.FiveCrop((crop_h, crop_w)) + + img = to_pil_image(torch.FloatTensor(3, h, w).uniform_()) + results = transform(img) + expected_output = five_crop(img) + + # Checking if FiveCrop and TenCrop can be printed as string + transform.__repr__() + five_crop.__repr__() + + if should_vflip: + vflipped_img = img.transpose(Image.FLIP_TOP_BOTTOM) + expected_output += five_crop(vflipped_img) + else: + hflipped_img = img.transpose(Image.FLIP_LEFT_RIGHT) + expected_output += five_crop(hflipped_img) + + assert len(results) == 10 + assert results == expected_output + + +@pytest.mark.parametrize("single_dim", [True, False]) +def test_five_crop(single_dim): + to_pil_image = transforms.ToPILImage() + h = random.randint(5, 25) + w = random.randint(5, 25) + crop_h = random.randint(1, h) + crop_w = random.randint(1, w) + if single_dim: + crop_h = min(crop_h, crop_w) + crop_w = crop_h + transform = transforms.FiveCrop(crop_h) + else: + transform = transforms.FiveCrop((crop_h, crop_w)) + + img = torch.FloatTensor(3, h, w).uniform_() + + results = transform(to_pil_image(img)) + + assert len(results) == 5 + for crop in results: + assert crop.size == (crop_w, crop_h) + + to_pil_image = transforms.ToPILImage() + tl = to_pil_image(img[:, 0:crop_h, 0:crop_w]) + tr = to_pil_image(img[:, 0:crop_h, w - crop_w :]) + bl = to_pil_image(img[:, h - crop_h :, 0:crop_w]) + br = to_pil_image(img[:, h - crop_h :, w - crop_w :]) + center = transforms.CenterCrop((crop_h, crop_w))(to_pil_image(img)) + expected_output = (tl, tr, bl, br, center) + assert results == expected_output + + +@pytest.mark.parametrize("policy", transforms.AutoAugmentPolicy) +@pytest.mark.parametrize("fill", [None, 85, (128, 128, 128)]) +@pytest.mark.parametrize("grayscale", [True, False]) +def test_autoaugment(policy, fill, grayscale): + random.seed(42) + img = Image.open(GRACE_HOPPER) + if grayscale: + img, fill = _get_grayscale_test_image(img, fill) + transform = transforms.AutoAugment(policy=policy, fill=fill) + for _ in range(100): + img = transform(img) + transform.__repr__() + + +@pytest.mark.parametrize("num_ops", [1, 2, 3]) +@pytest.mark.parametrize("magnitude", [7, 9, 11]) +@pytest.mark.parametrize("fill", [None, 85, (128, 128, 128)]) +@pytest.mark.parametrize("grayscale", [True, False]) +def test_randaugment(num_ops, magnitude, fill, grayscale): + random.seed(42) + img = Image.open(GRACE_HOPPER) + if grayscale: + img, fill = _get_grayscale_test_image(img, fill) + transform = transforms.RandAugment(num_ops=num_ops, magnitude=magnitude, fill=fill) + for _ in range(100): + img = transform(img) + transform.__repr__() + + +@pytest.mark.parametrize("fill", [None, 85, (128, 128, 128)]) +@pytest.mark.parametrize("num_magnitude_bins", [10, 13, 30]) +@pytest.mark.parametrize("grayscale", [True, False]) +def test_trivialaugmentwide(fill, num_magnitude_bins, grayscale): + random.seed(42) + img = Image.open(GRACE_HOPPER) + if grayscale: + img, fill = _get_grayscale_test_image(img, fill) + transform = transforms.TrivialAugmentWide(fill=fill, num_magnitude_bins=num_magnitude_bins) + for _ in range(100): + img = transform(img) + transform.__repr__() + + +@pytest.mark.parametrize("fill", [None, 85, (128, 128, 128)]) +@pytest.mark.parametrize("severity", [1, 10]) +@pytest.mark.parametrize("mixture_width", [1, 2]) +@pytest.mark.parametrize("chain_depth", [-1, 2]) +@pytest.mark.parametrize("all_ops", [True, False]) +@pytest.mark.parametrize("grayscale", [True, False]) +def test_augmix(fill, severity, mixture_width, chain_depth, all_ops, grayscale): + random.seed(42) + img = Image.open(GRACE_HOPPER) + if grayscale: + img, fill = _get_grayscale_test_image(img, fill) + transform = transforms.AugMix( + fill=fill, severity=severity, mixture_width=mixture_width, chain_depth=chain_depth, all_ops=all_ops + ) + for _ in range(100): + img = transform(img) + transform.__repr__() + + +def test_random_crop(): + height = random.randint(10, 32) * 2 + width = random.randint(10, 32) * 2 + oheight = random.randint(5, (height - 2) // 2) * 2 + owidth = random.randint(5, (width - 2) // 2) * 2 + img = torch.ones(3, height, width, dtype=torch.uint8) + result = transforms.Compose( + [ + transforms.ToPILImage(), + transforms.RandomCrop((oheight, owidth)), + transforms.PILToTensor(), + ] + )(img) + assert result.size(1) == oheight + assert result.size(2) == owidth + + padding = random.randint(1, 20) + result = transforms.Compose( + [ + transforms.ToPILImage(), + transforms.RandomCrop((oheight, owidth), padding=padding), + transforms.PILToTensor(), + ] + )(img) + assert result.size(1) == oheight + assert result.size(2) == owidth + + result = transforms.Compose( + [transforms.ToPILImage(), transforms.RandomCrop((height, width)), transforms.PILToTensor()] + )(img) + assert result.size(1) == height + assert result.size(2) == width + torch.testing.assert_close(result, img) + + result = transforms.Compose( + [ + transforms.ToPILImage(), + transforms.RandomCrop((height + 1, width + 1), pad_if_needed=True), + transforms.PILToTensor(), + ] + )(img) + assert result.size(1) == height + 1 + assert result.size(2) == width + 1 + + t = transforms.RandomCrop(33) + img = torch.ones(3, 32, 32) + with pytest.raises(ValueError, match=r"Required crop size .+ is larger than input image size .+"): + t(img) + + +def test_center_crop(): + height = random.randint(10, 32) * 2 + width = random.randint(10, 32) * 2 + oheight = random.randint(5, (height - 2) // 2) * 2 + owidth = random.randint(5, (width - 2) // 2) * 2 + + img = torch.ones(3, height, width, dtype=torch.uint8) + oh1 = (height - oheight) // 2 + ow1 = (width - owidth) // 2 + imgnarrow = img[:, oh1 : oh1 + oheight, ow1 : ow1 + owidth] + imgnarrow.fill_(0) + result = transforms.Compose( + [ + transforms.ToPILImage(), + transforms.CenterCrop((oheight, owidth)), + transforms.PILToTensor(), + ] + )(img) + assert result.sum() == 0 + oheight += 1 + owidth += 1 + result = transforms.Compose( + [ + transforms.ToPILImage(), + transforms.CenterCrop((oheight, owidth)), + transforms.PILToTensor(), + ] + )(img) + sum1 = result.sum() + assert sum1 > 1 + oheight += 1 + owidth += 1 + result = transforms.Compose( + [ + transforms.ToPILImage(), + transforms.CenterCrop((oheight, owidth)), + transforms.PILToTensor(), + ] + )(img) + sum2 = result.sum() + assert sum2 > 0 + assert sum2 > sum1 + + +@pytest.mark.parametrize("odd_image_size", (True, False)) +@pytest.mark.parametrize("delta", (1, 3, 5)) +@pytest.mark.parametrize("delta_width", (-2, -1, 0, 1, 2)) +@pytest.mark.parametrize("delta_height", (-2, -1, 0, 1, 2)) +def test_center_crop_2(odd_image_size, delta, delta_width, delta_height): + """Tests when center crop size is larger than image size, along any dimension""" + + # Since height is independent of width, we can ignore images with odd height and even width and vice-versa. + input_image_size = (random.randint(10, 32) * 2, random.randint(10, 32) * 2) + if odd_image_size: + input_image_size = (input_image_size[0] + 1, input_image_size[1] + 1) + + delta_height *= delta + delta_width *= delta + + img = torch.ones(3, *input_image_size, dtype=torch.uint8) + crop_size = (input_image_size[0] + delta_height, input_image_size[1] + delta_width) + + # Test both transforms, one with PIL input and one with tensor + output_pil = transforms.Compose( + [transforms.ToPILImage(), transforms.CenterCrop(crop_size), transforms.PILToTensor()], + )(img) + assert output_pil.size()[1:3] == crop_size + + output_tensor = transforms.CenterCrop(crop_size)(img) + assert output_tensor.size()[1:3] == crop_size + + # Ensure output for PIL and Tensor are equal + assert_equal( + output_tensor, + output_pil, + msg=f"image_size: {input_image_size} crop_size: {crop_size}", + ) + + # Check if content in center of both image and cropped output is same. + center_size = (min(crop_size[0], input_image_size[0]), min(crop_size[1], input_image_size[1])) + crop_center_tl, input_center_tl = [0, 0], [0, 0] + for index in range(2): + if crop_size[index] > input_image_size[index]: + crop_center_tl[index] = (crop_size[index] - input_image_size[index]) // 2 + else: + input_center_tl[index] = (input_image_size[index] - crop_size[index]) // 2 + + output_center = output_pil[ + :, + crop_center_tl[0] : crop_center_tl[0] + center_size[0], + crop_center_tl[1] : crop_center_tl[1] + center_size[1], + ] + + img_center = img[ + :, + input_center_tl[0] : input_center_tl[0] + center_size[0], + input_center_tl[1] : input_center_tl[1] + center_size[1], + ] + + assert_equal(output_center, img_center) + + +def test_color_jitter(): + color_jitter = transforms.ColorJitter(2, 2, 2, 0.1) + + x_shape = [2, 2, 3] + x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1] + x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape) + x_pil = Image.fromarray(x_np, mode="RGB") + x_pil_2 = x_pil.convert("L") + + for _ in range(10): + y_pil = color_jitter(x_pil) + assert y_pil.mode == x_pil.mode + + y_pil_2 = color_jitter(x_pil_2) + assert y_pil_2.mode == x_pil_2.mode + + # Checking if ColorJitter can be printed as string + color_jitter.__repr__() + + +@pytest.mark.parametrize("hue", [1, (-1, 1)]) +def test_color_jitter_hue_out_of_bounds(hue): + with pytest.raises(ValueError, match=re.escape("hue values should be between (-0.5, 0.5)")): + transforms.ColorJitter(hue=hue) + + +@pytest.mark.parametrize("seed", range(10)) +@pytest.mark.skipif(stats is None, reason="scipy.stats not available") +def test_random_erasing(seed): + torch.random.manual_seed(seed) + img = torch.ones(3, 128, 128) + + t = transforms.RandomErasing(scale=(0.1, 0.1), ratio=(1 / 3, 3.0)) + y, x, h, w, v = t.get_params( + img, + t.scale, + t.ratio, + [ + t.value, + ], + ) + aspect_ratio = h / w + # Add some tolerance due to the rounding and int conversion used in the transform + tol = 0.05 + assert 1 / 3 - tol <= aspect_ratio <= 3 + tol + + # Make sure that h > w and h < w are equally likely (log-scale sampling) + aspect_ratios = [] + random.seed(42) + trial = 1000 + for _ in range(trial): + y, x, h, w, v = t.get_params( + img, + t.scale, + t.ratio, + [ + t.value, + ], + ) + aspect_ratios.append(h / w) + + count_bigger_then_ones = len([1 for aspect_ratio in aspect_ratios if aspect_ratio > 1]) + p_value = stats.binomtest(count_bigger_then_ones, trial, p=0.5).pvalue + assert p_value > 0.0001 + + # Checking if RandomErasing can be printed as string + t.__repr__() + + +def test_random_rotation(): + + with pytest.raises(ValueError): + transforms.RandomRotation(-0.7) + + with pytest.raises(ValueError): + transforms.RandomRotation([-0.7]) + + with pytest.raises(ValueError): + transforms.RandomRotation([-0.7, 0, 0.7]) + + t = transforms.RandomRotation(0, fill=None) + assert t.fill == 0 + + t = transforms.RandomRotation(10) + angle = t.get_params(t.degrees) + assert angle > -10 and angle < 10 + + t = transforms.RandomRotation((-10, 10)) + angle = t.get_params(t.degrees) + assert -10 < angle < 10 - @unittest.skipIf(stats is None, 'scipy.stats not available') - def test_random_vertical_flip(self): - random_state = random.getstate() - random.seed(42) - img = transforms.ToPILImage()(torch.rand(3, 10, 10)) - vimg = img.transpose(Image.FLIP_TOP_BOTTOM) - - num_samples = 250 - num_vertical = 0 - for _ in range(num_samples): - out = transforms.RandomVerticalFlip()(img) - if out == vimg: - num_vertical += 1 - - p_value = stats.binom_test(num_vertical, num_samples, p=0.5) - random.setstate(random_state) - self.assertGreater(p_value, 0.0001) - - num_samples = 250 - num_vertical = 0 - for _ in range(num_samples): - out = transforms.RandomVerticalFlip(p=0.7)(img) - if out == vimg: - num_vertical += 1 - - p_value = stats.binom_test(num_vertical, num_samples, p=0.7) - random.setstate(random_state) - self.assertGreater(p_value, 0.0001) - - # Checking if RandomVerticalFlip can be printed as string - transforms.RandomVerticalFlip().__repr__() - - @unittest.skipIf(stats is None, 'scipy.stats not available') - def test_random_horizontal_flip(self): - random_state = random.getstate() - random.seed(42) - img = transforms.ToPILImage()(torch.rand(3, 10, 10)) - himg = img.transpose(Image.FLIP_LEFT_RIGHT) - - num_samples = 250 - num_horizontal = 0 - for _ in range(num_samples): - out = transforms.RandomHorizontalFlip()(img) - if out == himg: - num_horizontal += 1 - - p_value = stats.binom_test(num_horizontal, num_samples, p=0.5) - random.setstate(random_state) - self.assertGreater(p_value, 0.0001) - - num_samples = 250 - num_horizontal = 0 - for _ in range(num_samples): - out = transforms.RandomHorizontalFlip(p=0.7)(img) - if out == himg: - num_horizontal += 1 - - p_value = stats.binom_test(num_horizontal, num_samples, p=0.7) - random.setstate(random_state) - self.assertGreater(p_value, 0.0001) - - # Checking if RandomHorizontalFlip can be printed as string - transforms.RandomHorizontalFlip().__repr__() - - @unittest.skipIf(stats is None, 'scipy.stats is not available') - def test_normalize(self): - def samples_from_standard_normal(tensor): - p_value = stats.kstest(list(tensor.view(-1)), 'norm', args=(0, 1)).pvalue - return p_value > 0.0001 - - random_state = random.getstate() - random.seed(42) - for channels in [1, 3]: - img = torch.rand(channels, 10, 10) - mean = [img[c].mean() for c in range(channels)] - std = [img[c].std() for c in range(channels)] - normalized = transforms.Normalize(mean, std)(img) - self.assertTrue(samples_from_standard_normal(normalized)) - random.setstate(random_state) - - # Checking if Normalize can be printed as string - transforms.Normalize(mean, std).__repr__() - - # Checking the optional in-place behaviour - tensor = torch.rand((1, 16, 16)) - tensor_inplace = transforms.Normalize((0.5,), (0.5,), inplace=True)(tensor) - self.assertTrue(torch.equal(tensor, tensor_inplace)) - - def test_normalize_different_dtype(self): - for dtype1 in [torch.float32, torch.float64]: - img = torch.rand(3, 10, 10, dtype=dtype1) - for dtype2 in [torch.int64, torch.float32, torch.float64]: - mean = torch.tensor([1, 2, 3], dtype=dtype2) - std = torch.tensor([1, 2, 1], dtype=dtype2) - # checks that it doesn't crash - transforms.functional.normalize(img, mean, std) - - def test_adjust_brightness(self): - x_shape = [2, 2, 3] - x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1] - x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape) - x_pil = Image.fromarray(x_np, mode='RGB') - - # test 0 - y_pil = F.adjust_brightness(x_pil, 1) - y_np = np.array(y_pil) - self.assertTrue(np.allclose(y_np, x_np)) - - # test 1 - y_pil = F.adjust_brightness(x_pil, 0.5) - y_np = np.array(y_pil) - y_ans = [0, 2, 6, 27, 67, 113, 18, 4, 117, 45, 127, 0] - y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) - self.assertTrue(np.allclose(y_np, y_ans)) - - # test 2 - y_pil = F.adjust_brightness(x_pil, 2) - y_np = np.array(y_pil) - y_ans = [0, 10, 26, 108, 255, 255, 74, 16, 255, 180, 255, 2] - y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) - self.assertTrue(np.allclose(y_np, y_ans)) - - def test_adjust_contrast(self): - x_shape = [2, 2, 3] - x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1] - x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape) - x_pil = Image.fromarray(x_np, mode='RGB') - - # test 0 - y_pil = F.adjust_contrast(x_pil, 1) - y_np = np.array(y_pil) - self.assertTrue(np.allclose(y_np, x_np)) - - # test 1 - y_pil = F.adjust_contrast(x_pil, 0.5) - y_np = np.array(y_pil) - y_ans = [43, 45, 49, 70, 110, 156, 61, 47, 160, 88, 170, 43] - y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) - self.assertTrue(np.allclose(y_np, y_ans)) - - # test 2 - y_pil = F.adjust_contrast(x_pil, 2) - y_np = np.array(y_pil) - y_ans = [0, 0, 0, 22, 184, 255, 0, 0, 255, 94, 255, 0] - y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) - self.assertTrue(np.allclose(y_np, y_ans)) - - def test_adjust_saturation(self): - x_shape = [2, 2, 3] - x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1] - x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape) - x_pil = Image.fromarray(x_np, mode='RGB') - - # test 0 - y_pil = F.adjust_saturation(x_pil, 1) - y_np = np.array(y_pil) - self.assertTrue(np.allclose(y_np, x_np)) - - # test 1 - y_pil = F.adjust_saturation(x_pil, 0.5) - y_np = np.array(y_pil) - y_ans = [2, 4, 8, 87, 128, 173, 39, 25, 138, 133, 215, 88] - y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) - self.assertTrue(np.allclose(y_np, y_ans)) - - # test 2 - y_pil = F.adjust_saturation(x_pil, 2) - y_np = np.array(y_pil) - y_ans = [0, 6, 22, 0, 149, 255, 32, 0, 255, 4, 255, 0] - y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) - self.assertTrue(np.allclose(y_np, y_ans)) - - def test_adjust_hue(self): - x_shape = [2, 2, 3] - x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1] - x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape) - x_pil = Image.fromarray(x_np, mode='RGB') - - with self.assertRaises(ValueError): - F.adjust_hue(x_pil, -0.7) - F.adjust_hue(x_pil, 1) - - # test 0: almost same as x_data but not exact. - # probably because hsv <-> rgb floating point ops - y_pil = F.adjust_hue(x_pil, 0) - y_np = np.array(y_pil) - y_ans = [0, 5, 13, 54, 139, 226, 35, 8, 234, 91, 255, 1] - y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) - self.assertTrue(np.allclose(y_np, y_ans)) - - # test 1 - y_pil = F.adjust_hue(x_pil, 0.25) - y_np = np.array(y_pil) - y_ans = [13, 0, 12, 224, 54, 226, 234, 8, 99, 1, 222, 255] - y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) - self.assertTrue(np.allclose(y_np, y_ans)) - - # test 2 - y_pil = F.adjust_hue(x_pil, -0.25) - y_np = np.array(y_pil) - y_ans = [0, 13, 2, 54, 226, 58, 8, 234, 152, 255, 43, 1] - y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) - self.assertTrue(np.allclose(y_np, y_ans)) - - def test_adjust_gamma(self): - x_shape = [2, 2, 3] - x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1] - x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape) - x_pil = Image.fromarray(x_np, mode='RGB') - - # test 0 - y_pil = F.adjust_gamma(x_pil, 1) - y_np = np.array(y_pil) - self.assertTrue(np.allclose(y_np, x_np)) - - # test 1 - y_pil = F.adjust_gamma(x_pil, 0.5) - y_np = np.array(y_pil) - y_ans = [0, 35, 57, 117, 185, 240, 97, 45, 244, 151, 255, 15] - y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) - self.assertTrue(np.allclose(y_np, y_ans)) - - # test 2 - y_pil = F.adjust_gamma(x_pil, 2) - y_np = np.array(y_pil) - y_ans = [0, 0, 0, 11, 71, 200, 5, 0, 214, 31, 255, 0] - y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) - self.assertTrue(np.allclose(y_np, y_ans)) - - def test_adjusts_L_mode(self): - x_shape = [2, 2, 3] - x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1] - x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape) - x_rgb = Image.fromarray(x_np, mode='RGB') - - x_l = x_rgb.convert('L') - self.assertEqual(F.adjust_brightness(x_l, 2).mode, 'L') - self.assertEqual(F.adjust_saturation(x_l, 2).mode, 'L') - self.assertEqual(F.adjust_contrast(x_l, 2).mode, 'L') - self.assertEqual(F.adjust_hue(x_l, 0.4).mode, 'L') - self.assertEqual(F.adjust_gamma(x_l, 0.5).mode, 'L') - - def test_color_jitter(self): - color_jitter = transforms.ColorJitter(2, 2, 2, 0.1) - - x_shape = [2, 2, 3] - x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1] - x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape) - x_pil = Image.fromarray(x_np, mode='RGB') - x_pil_2 = x_pil.convert('L') - - for i in range(10): - y_pil = color_jitter(x_pil) - self.assertEqual(y_pil.mode, x_pil.mode) - - y_pil_2 = color_jitter(x_pil_2) - self.assertEqual(y_pil_2.mode, x_pil_2.mode) - - # Checking if ColorJitter can be printed as string - color_jitter.__repr__() - - def test_linear_transformation(self): - num_samples = 1000 - x = torch.randn(num_samples, 3, 10, 10) - flat_x = x.view(x.size(0), x.size(1) * x.size(2) * x.size(3)) - # compute principal components - sigma = torch.mm(flat_x.t(), flat_x) / flat_x.size(0) - u, s, _ = np.linalg.svd(sigma.numpy()) - zca_epsilon = 1e-10 # avoid division by 0 - d = torch.Tensor(np.diag(1. / np.sqrt(s + zca_epsilon))) - u = torch.Tensor(u) - principal_components = torch.mm(torch.mm(u, d), u.t()) - mean_vector = (torch.sum(flat_x, dim=0) / flat_x.size(0)) - # initialize whitening matrix - whitening = transforms.LinearTransformation(principal_components, mean_vector) - # estimate covariance and mean using weak law of large number - num_features = flat_x.size(1) - cov = 0.0 - mean = 0.0 - for i in x: - xwhite = whitening(i) - xwhite = xwhite.view(1, -1).numpy() - cov += np.dot(xwhite, xwhite.T) / num_features - mean += np.sum(xwhite) / num_features - # if rtol for std = 1e-3 then rtol for cov = 2e-3 as std**2 = cov - self.assertTrue(np.allclose(cov / num_samples, np.identity(1), rtol=2e-3), - "cov not close to 1") - self.assertTrue(np.allclose(mean / num_samples, 0, rtol=1e-3), - "mean not close to 0") - - # Checking if LinearTransformation can be printed as string - whitening.__repr__() - - def test_rotate(self): - x = np.zeros((100, 100, 3), dtype=np.uint8) - x[40, 40] = [255, 255, 255] - - with self.assertRaises(TypeError): - F.rotate(x, 10) - - img = F.to_pil_image(x) - - result = F.rotate(img, 45) - self.assertEqual(result.size, (100, 100)) - r, c, ch = np.where(result) - self.assertTrue(all(x in r for x in [49, 50])) - self.assertTrue(all(x in c for x in [36])) - self.assertTrue(all(x in ch for x in [0, 1, 2])) - - result = F.rotate(img, 45, expand=True) - self.assertEqual(result.size, (142, 142)) - r, c, ch = np.where(result) - self.assertTrue(all(x in r for x in [70, 71])) - self.assertTrue(all(x in c for x in [57])) - self.assertTrue(all(x in ch for x in [0, 1, 2])) - - result = F.rotate(img, 45, center=(40, 40)) - self.assertEqual(result.size, (100, 100)) - r, c, ch = np.where(result) - self.assertTrue(all(x in r for x in [40])) - self.assertTrue(all(x in c for x in [40])) - self.assertTrue(all(x in ch for x in [0, 1, 2])) - - result_a = F.rotate(img, 90) - result_b = F.rotate(img, -270) - - self.assertTrue(np.all(np.array(result_a) == np.array(result_b))) - - def test_affine(self): + # Checking if RandomRotation can be printed as string + t.__repr__() + + t = transforms.RandomRotation((-10, 10), interpolation=Image.BILINEAR) + assert t.interpolation == transforms.InterpolationMode.BILINEAR + + +def test_random_rotation_error(): + # assert fill being either a Sequence or a Number + with pytest.raises(TypeError): + transforms.RandomRotation(0, fill={}) + + +def test_randomperspective(): + for _ in range(10): + height = random.randint(24, 32) * 2 + width = random.randint(24, 32) * 2 + img = torch.ones(3, height, width) + to_pil_image = transforms.ToPILImage() + img = to_pil_image(img) + perp = transforms.RandomPerspective() + startpoints, endpoints = perp.get_params(width, height, 0.5) + tr_img = F.perspective(img, startpoints, endpoints) + tr_img2 = F.convert_image_dtype(F.pil_to_tensor(F.perspective(tr_img, endpoints, startpoints))) + tr_img = F.convert_image_dtype(F.pil_to_tensor(tr_img)) + assert img.size[0] == width + assert img.size[1] == height + assert torch.nn.functional.mse_loss( + tr_img, F.convert_image_dtype(F.pil_to_tensor(img)) + ) + 0.3 > torch.nn.functional.mse_loss(tr_img2, F.convert_image_dtype(F.pil_to_tensor(img))) + + +@pytest.mark.parametrize("seed", range(10)) +@pytest.mark.parametrize("mode", ["L", "RGB", "F"]) +def test_randomperspective_fill(mode, seed): + torch.random.manual_seed(seed) + + # assert fill being either a Sequence or a Number + with pytest.raises(TypeError): + transforms.RandomPerspective(fill={}) + + t = transforms.RandomPerspective(fill=None) + assert t.fill == 0 + + height = 100 + width = 100 + img = torch.ones(3, height, width) + to_pil_image = transforms.ToPILImage() + img = to_pil_image(img) + fill = 127 + num_bands = len(mode) + + img_conv = img.convert(mode) + perspective = transforms.RandomPerspective(p=1, fill=fill) + tr_img = perspective(img_conv) + pixel = tr_img.getpixel((0, 0)) + + if not isinstance(pixel, tuple): + pixel = (pixel,) + assert pixel == tuple([fill] * num_bands) + + startpoints, endpoints = transforms.RandomPerspective.get_params(width, height, 0.5) + tr_img = F.perspective(img_conv, startpoints, endpoints, fill=fill) + pixel = tr_img.getpixel((0, 0)) + + if not isinstance(pixel, tuple): + pixel = (pixel,) + assert pixel == tuple([fill] * num_bands) + + wrong_num_bands = num_bands + 1 + with pytest.raises(ValueError): + F.perspective(img_conv, startpoints, endpoints, fill=tuple([fill] * wrong_num_bands)) + + +@pytest.mark.skipif(stats is None, reason="scipy.stats not available") +def test_normalize(): + def samples_from_standard_normal(tensor): + p_value = stats.kstest(list(tensor.view(-1)), "norm", args=(0, 1)).pvalue + return p_value > 0.0001 + + random_state = random.getstate() + random.seed(42) + for channels in [1, 3]: + img = torch.rand(channels, 10, 10) + mean = [img[c].mean() for c in range(channels)] + std = [img[c].std() for c in range(channels)] + normalized = transforms.Normalize(mean, std)(img) + assert samples_from_standard_normal(normalized) + random.setstate(random_state) + + # Checking if Normalize can be printed as string + transforms.Normalize(mean, std).__repr__() + + # Checking the optional in-place behaviour + tensor = torch.rand((1, 16, 16)) + tensor_inplace = transforms.Normalize((0.5,), (0.5,), inplace=True)(tensor) + assert_equal(tensor, tensor_inplace) + + +@pytest.mark.parametrize("dtype1", [torch.float32, torch.float64]) +@pytest.mark.parametrize("dtype2", [torch.int64, torch.float32, torch.float64]) +def test_normalize_different_dtype(dtype1, dtype2): + img = torch.rand(3, 10, 10, dtype=dtype1) + mean = torch.tensor([1, 2, 3], dtype=dtype2) + std = torch.tensor([1, 2, 1], dtype=dtype2) + # checks that it doesn't crash + transforms.functional.normalize(img, mean, std) + + +def test_normalize_3d_tensor(): + torch.manual_seed(28) + n_channels = 3 + img_size = 10 + mean = torch.rand(n_channels) + std = torch.rand(n_channels) + img = torch.rand(n_channels, img_size, img_size) + target = F.normalize(img, mean, std) + + mean_unsqueezed = mean.view(-1, 1, 1) + std_unsqueezed = std.view(-1, 1, 1) + result1 = F.normalize(img, mean_unsqueezed, std_unsqueezed) + result2 = F.normalize( + img, mean_unsqueezed.repeat(1, img_size, img_size), std_unsqueezed.repeat(1, img_size, img_size) + ) + torch.testing.assert_close(target, result1) + torch.testing.assert_close(target, result2) + + +class TestAffine: + @pytest.fixture(scope="class") + def input_img(self): input_img = np.zeros((40, 40, 3), dtype=np.uint8) - pts = [] - cnt = [20, 20] for pt in [(16, 16), (20, 16), (20, 20)]: for i in range(-5, 5): for j in range(-5, 5): input_img[pt[0] + i, pt[1] + j, :] = [255, 155, 55] - pts.append((pt[0] + i, pt[1] + j)) - pts = list(set(pts)) - - with self.assertRaises(TypeError): - F.affine(input_img, 10) - - pil_img = F.to_pil_image(input_img) - - def _to_3x3_inv(inv_result_matrix): - result_matrix = np.zeros((3, 3)) - result_matrix[:2, :] = np.array(inv_result_matrix).reshape((2, 3)) - result_matrix[2, 2] = 1 - return np.linalg.inv(result_matrix) - - def _test_transformation(a, t, s, sh): - a_rad = math.radians(a) - s_rad = [math.radians(sh_) for sh_ in sh] - cx, cy = cnt - tx, ty = t - sx, sy = s_rad - rot = a_rad - - # 1) Check transformation matrix: - C = np.array([[1, 0, cx], - [0, 1, cy], - [0, 0, 1]]) - T = np.array([[1, 0, tx], - [0, 1, ty], - [0, 0, 1]]) - Cinv = np.linalg.inv(C) - - RS = np.array( - [[s * math.cos(rot), -s * math.sin(rot), 0], - [s * math.sin(rot), s * math.cos(rot), 0], - [0, 0, 1]]) - - SHx = np.array([[1, -math.tan(sx), 0], - [0, 1, 0], - [0, 0, 1]]) - - SHy = np.array([[1, 0, 0], - [-math.tan(sy), 1, 0], - [0, 0, 1]]) - - RSS = np.matmul(RS, np.matmul(SHy, SHx)) - - true_matrix = np.matmul(T, np.matmul(C, np.matmul(RSS, Cinv))) - - result_matrix = _to_3x3_inv(F._get_inverse_affine_matrix(center=cnt, angle=a, - translate=t, scale=s, shear=sh)) - self.assertLess(np.sum(np.abs(true_matrix - result_matrix)), 1e-10) - # 2) Perform inverse mapping: - true_result = np.zeros((40, 40, 3), dtype=np.uint8) - inv_true_matrix = np.linalg.inv(true_matrix) - for y in range(true_result.shape[0]): - for x in range(true_result.shape[1]): - res = np.dot(inv_true_matrix, [x, y, 1]) - _x = int(res[0] + 0.5) - _y = int(res[1] + 0.5) - if 0 <= _x < input_img.shape[1] and 0 <= _y < input_img.shape[0]: - true_result[y, x, :] = input_img[_y, _x, :] - - result = F.affine(pil_img, angle=a, translate=t, scale=s, shear=sh) - self.assertEqual(result.size, pil_img.size) - # Compute number of different pixels: - np_result = np.array(result) - n_diff_pixels = np.sum(np_result != true_result) / 3 - # Accept 3 wrong pixels - self.assertLess(n_diff_pixels, 3, - "a={}, t={}, s={}, sh={}\n".format(a, t, s, sh) + - "n diff pixels={}\n".format(np.sum(np.array(result)[:, :, 0] != true_result[:, :, 0]))) + return input_img + + def test_affine_translate_seq(self, input_img): + with pytest.raises(TypeError, match=r"Argument translate should be a sequence"): + F.affine(input_img, 10, translate=0, scale=1, shear=1) + + @pytest.fixture(scope="class") + def pil_image(self, input_img): + return F.to_pil_image(input_img) + + def _to_3x3_inv(self, inv_result_matrix): + result_matrix = np.zeros((3, 3)) + result_matrix[:2, :] = np.array(inv_result_matrix).reshape((2, 3)) + result_matrix[2, 2] = 1 + return np.linalg.inv(result_matrix) + + def _test_transformation(self, angle, translate, scale, shear, pil_image, input_img, center=None): + + a_rad = math.radians(angle) + s_rad = [math.radians(sh_) for sh_ in shear] + cnt = [20, 20] if center is None else center + cx, cy = cnt + tx, ty = translate + sx, sy = s_rad + rot = a_rad + + # 1) Check transformation matrix: + C = np.array([[1, 0, cx], [0, 1, cy], [0, 0, 1]]) + T = np.array([[1, 0, tx], [0, 1, ty], [0, 0, 1]]) + Cinv = np.linalg.inv(C) + + RS = np.array( + [ + [scale * math.cos(rot), -scale * math.sin(rot), 0], + [scale * math.sin(rot), scale * math.cos(rot), 0], + [0, 0, 1], + ] + ) + + SHx = np.array([[1, -math.tan(sx), 0], [0, 1, 0], [0, 0, 1]]) + + SHy = np.array([[1, 0, 0], [-math.tan(sy), 1, 0], [0, 0, 1]]) + + RSS = np.matmul(RS, np.matmul(SHy, SHx)) + + true_matrix = np.matmul(T, np.matmul(C, np.matmul(RSS, Cinv))) + + result_matrix = self._to_3x3_inv( + F._get_inverse_affine_matrix(center=cnt, angle=angle, translate=translate, scale=scale, shear=shear) + ) + assert np.sum(np.abs(true_matrix - result_matrix)) < 1e-10 + # 2) Perform inverse mapping: + true_result = np.zeros((40, 40, 3), dtype=np.uint8) + inv_true_matrix = np.linalg.inv(true_matrix) + for y in range(true_result.shape[0]): + for x in range(true_result.shape[1]): + # Same as for PIL: + # https://github.com/python-pillow/Pillow/blob/71f8ec6a0cfc1008076a023c0756542539d057ab/ + # src/libImaging/Geometry.c#L1060 + input_pt = np.array([x + 0.5, y + 0.5, 1.0]) + res = np.floor(np.dot(inv_true_matrix, input_pt)).astype(int) + _x, _y = res[:2] + if 0 <= _x < input_img.shape[1] and 0 <= _y < input_img.shape[0]: + true_result[y, x, :] = input_img[_y, _x, :] + + result = F.affine(pil_image, angle=angle, translate=translate, scale=scale, shear=shear, center=center) + assert result.size == pil_image.size + # Compute number of different pixels: + np_result = np.array(result) + n_diff_pixels = np.sum(np_result != true_result) / 3 + # Accept 3 wrong pixels + error_msg = ( + f"angle={angle}, translate={translate}, scale={scale}, shear={shear}\nn diff pixels={n_diff_pixels}\n" + ) + assert n_diff_pixels < 3, error_msg + + def test_transformation_discrete(self, pil_image, input_img): + # Test rotation + angle = 45 + self._test_transformation( + angle=angle, translate=(0, 0), scale=1.0, shear=(0.0, 0.0), pil_image=pil_image, input_img=input_img + ) # Test rotation - a = 45 - _test_transformation(a=a, t=(0, 0), s=1.0, sh=(0.0, 0.0)) + angle = 45 + self._test_transformation( + angle=angle, + translate=(0, 0), + scale=1.0, + shear=(0.0, 0.0), + pil_image=pil_image, + input_img=input_img, + center=[0, 0], + ) # Test translation - t = [10, 15] - _test_transformation(a=0.0, t=t, s=1.0, sh=(0.0, 0.0)) + translate = [10, 15] + self._test_transformation( + angle=0.0, translate=translate, scale=1.0, shear=(0.0, 0.0), pil_image=pil_image, input_img=input_img + ) # Test scale - s = 1.2 - _test_transformation(a=0.0, t=(0.0, 0.0), s=s, sh=(0.0, 0.0)) + scale = 1.2 + self._test_transformation( + angle=0.0, translate=(0.0, 0.0), scale=scale, shear=(0.0, 0.0), pil_image=pil_image, input_img=input_img + ) # Test shear - sh = [45.0, 25.0] - _test_transformation(a=0.0, t=(0.0, 0.0), s=1.0, sh=sh) - - # Test rotation, scale, translation, shear - for a in range(-90, 90, 25): - for t1 in range(-10, 10, 5): - for s in [0.75, 0.98, 1.0, 1.1, 1.2]: - for sh in range(-15, 15, 5): - _test_transformation(a=a, t=(t1, t1), s=s, sh=(sh, sh)) - - def test_random_rotation(self): - - with self.assertRaises(ValueError): - transforms.RandomRotation(-0.7) - transforms.RandomRotation([-0.7]) - transforms.RandomRotation([-0.7, 0, 0.7]) - - t = transforms.RandomRotation(10) - angle = t.get_params(t.degrees) - self.assertTrue(angle > -10 and angle < 10) - - t = transforms.RandomRotation((-10, 10)) - angle = t.get_params(t.degrees) - self.assertTrue(angle > -10 and angle < 10) - - # Checking if RandomRotation can be printed as string - t.__repr__() - - def test_random_affine(self): - - with self.assertRaises(ValueError): - transforms.RandomAffine(-0.7) - transforms.RandomAffine([-0.7]) - transforms.RandomAffine([-0.7, 0, 0.7]) - - transforms.RandomAffine([-90, 90], translate=2.0) - transforms.RandomAffine([-90, 90], translate=[-1.0, 1.0]) - transforms.RandomAffine([-90, 90], translate=[-1.0, 0.0, 1.0]) - - transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.0]) - transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[-1.0, 1.0]) - transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, -0.5]) - transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 3.0, -0.5]) - - transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 0.5], shear=-7) - transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 0.5], shear=[-10]) - transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 0.5], shear=[-10, 0, 10]) - transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 0.5], shear=[-10, 0, 10, 0, 10]) - - x = np.zeros((100, 100, 3), dtype=np.uint8) - img = F.to_pil_image(x) - - t = transforms.RandomAffine(10, translate=[0.5, 0.3], scale=[0.7, 1.3], shear=[-10, 10, 20, 40]) - for _ in range(100): - angle, translations, scale, shear = t.get_params(t.degrees, t.translate, t.scale, t.shear, - img_size=img.size) - self.assertTrue(-10 < angle < 10) - self.assertTrue(-img.size[0] * 0.5 <= translations[0] <= img.size[0] * 0.5, - "{} vs {}".format(translations[0], img.size[0] * 0.5)) - self.assertTrue(-img.size[1] * 0.5 <= translations[1] <= img.size[1] * 0.5, - "{} vs {}".format(translations[1], img.size[1] * 0.5)) - self.assertTrue(0.7 < scale < 1.3) - self.assertTrue(-10 < shear[0] < 10) - self.assertTrue(-20 < shear[1] < 40) - - # Checking if RandomAffine can be printed as string - t.__repr__() - - t = transforms.RandomAffine(10, resample=Image.BILINEAR) - self.assertIn("Image.BILINEAR", t.__repr__()) - - def test_to_grayscale(self): - """Unit tests for grayscale transform""" - - x_shape = [2, 2, 3] - x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1] - x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape) - x_pil = Image.fromarray(x_np, mode='RGB') - x_pil_2 = x_pil.convert('L') - gray_np = np.array(x_pil_2) - - # Test Set: Grayscale an image with desired number of output channels - # Case 1: RGB -> 1 channel grayscale - trans1 = transforms.Grayscale(num_output_channels=1) - gray_pil_1 = trans1(x_pil) - gray_np_1 = np.array(gray_pil_1) - self.assertEqual(gray_pil_1.mode, 'L', 'mode should be L') - self.assertEqual(gray_np_1.shape, tuple(x_shape[0:2]), 'should be 1 channel') - np.testing.assert_equal(gray_np, gray_np_1) - - # Case 2: RGB -> 3 channel grayscale - trans2 = transforms.Grayscale(num_output_channels=3) - gray_pil_2 = trans2(x_pil) - gray_np_2 = np.array(gray_pil_2) - self.assertEqual(gray_pil_2.mode, 'RGB', 'mode should be RGB') - self.assertEqual(gray_np_2.shape, tuple(x_shape), 'should be 3 channel') - np.testing.assert_equal(gray_np_2[:, :, 0], gray_np_2[:, :, 1]) - np.testing.assert_equal(gray_np_2[:, :, 1], gray_np_2[:, :, 2]) - np.testing.assert_equal(gray_np, gray_np_2[:, :, 0]) - - # Case 3: 1 channel grayscale -> 1 channel grayscale - trans3 = transforms.Grayscale(num_output_channels=1) - gray_pil_3 = trans3(x_pil_2) - gray_np_3 = np.array(gray_pil_3) - self.assertEqual(gray_pil_3.mode, 'L', 'mode should be L') - self.assertEqual(gray_np_3.shape, tuple(x_shape[0:2]), 'should be 1 channel') - np.testing.assert_equal(gray_np, gray_np_3) - - # Case 4: 1 channel grayscale -> 3 channel grayscale - trans4 = transforms.Grayscale(num_output_channels=3) - gray_pil_4 = trans4(x_pil_2) - gray_np_4 = np.array(gray_pil_4) - self.assertEqual(gray_pil_4.mode, 'RGB', 'mode should be RGB') - self.assertEqual(gray_np_4.shape, tuple(x_shape), 'should be 3 channel') - np.testing.assert_equal(gray_np_4[:, :, 0], gray_np_4[:, :, 1]) - np.testing.assert_equal(gray_np_4[:, :, 1], gray_np_4[:, :, 2]) - np.testing.assert_equal(gray_np, gray_np_4[:, :, 0]) - - # Checking if Grayscale can be printed as string - trans4.__repr__() - - @unittest.skipIf(stats is None, 'scipy.stats not available') - def test_random_grayscale(self): - """Unit tests for random grayscale transform""" - - # Test Set 1: RGB -> 3 channel grayscale - random_state = random.getstate() - random.seed(42) - x_shape = [2, 2, 3] - x_np = np.random.randint(0, 256, x_shape, np.uint8) - x_pil = Image.fromarray(x_np, mode='RGB') - x_pil_2 = x_pil.convert('L') - gray_np = np.array(x_pil_2) - - num_samples = 250 - num_gray = 0 - for _ in range(num_samples): - gray_pil_2 = transforms.RandomGrayscale(p=0.5)(x_pil) - gray_np_2 = np.array(gray_pil_2) - if np.array_equal(gray_np_2[:, :, 0], gray_np_2[:, :, 1]) and \ - np.array_equal(gray_np_2[:, :, 1], gray_np_2[:, :, 2]) and \ - np.array_equal(gray_np, gray_np_2[:, :, 0]): - num_gray = num_gray + 1 - - p_value = stats.binom_test(num_gray, num_samples, p=0.5) - random.setstate(random_state) - self.assertGreater(p_value, 0.0001) - - # Test Set 2: grayscale -> 1 channel grayscale - random_state = random.getstate() - random.seed(42) - x_shape = [2, 2, 3] - x_np = np.random.randint(0, 256, x_shape, np.uint8) - x_pil = Image.fromarray(x_np, mode='RGB') - x_pil_2 = x_pil.convert('L') - gray_np = np.array(x_pil_2) - - num_samples = 250 - num_gray = 0 - for _ in range(num_samples): - gray_pil_3 = transforms.RandomGrayscale(p=0.5)(x_pil_2) - gray_np_3 = np.array(gray_pil_3) - if np.array_equal(gray_np, gray_np_3): - num_gray = num_gray + 1 - - p_value = stats.binom_test(num_gray, num_samples, p=1.0) # Note: grayscale is always unchanged - random.setstate(random_state) - self.assertGreater(p_value, 0.0001) - - # Test set 3: Explicit tests - x_shape = [2, 2, 3] - x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1] - x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape) - x_pil = Image.fromarray(x_np, mode='RGB') - x_pil_2 = x_pil.convert('L') - gray_np = np.array(x_pil_2) - - # Case 3a: RGB -> 3 channel grayscale (grayscaled) - trans2 = transforms.RandomGrayscale(p=1.0) - gray_pil_2 = trans2(x_pil) - gray_np_2 = np.array(gray_pil_2) - self.assertEqual(gray_pil_2.mode, 'RGB', 'mode should be RGB') - self.assertEqual(gray_np_2.shape, tuple(x_shape), 'should be 3 channel') - np.testing.assert_equal(gray_np_2[:, :, 0], gray_np_2[:, :, 1]) - np.testing.assert_equal(gray_np_2[:, :, 1], gray_np_2[:, :, 2]) - np.testing.assert_equal(gray_np, gray_np_2[:, :, 0]) - - # Case 3b: RGB -> 3 channel grayscale (unchanged) - trans2 = transforms.RandomGrayscale(p=0.0) - gray_pil_2 = trans2(x_pil) - gray_np_2 = np.array(gray_pil_2) - self.assertEqual(gray_pil_2.mode, 'RGB', 'mode should be RGB') - self.assertEqual(gray_np_2.shape, tuple(x_shape), 'should be 3 channel') - np.testing.assert_equal(x_np, gray_np_2) - - # Case 3c: 1 channel grayscale -> 1 channel grayscale (grayscaled) - trans3 = transforms.RandomGrayscale(p=1.0) - gray_pil_3 = trans3(x_pil_2) - gray_np_3 = np.array(gray_pil_3) - self.assertEqual(gray_pil_3.mode, 'L', 'mode should be L') - self.assertEqual(gray_np_3.shape, tuple(x_shape[0:2]), 'should be 1 channel') - np.testing.assert_equal(gray_np, gray_np_3) - - # Case 3d: 1 channel grayscale -> 1 channel grayscale (unchanged) - trans3 = transforms.RandomGrayscale(p=0.0) - gray_pil_3 = trans3(x_pil_2) - gray_np_3 = np.array(gray_pil_3) - self.assertEqual(gray_pil_3.mode, 'L', 'mode should be L') - self.assertEqual(gray_np_3.shape, tuple(x_shape[0:2]), 'should be 1 channel') - np.testing.assert_equal(gray_np, gray_np_3) - - # Checking if RandomGrayscale can be printed as string - trans3.__repr__() - - def test_random_erasing(self): - """Unit tests for random erasing transform""" - - img = torch.rand([3, 60, 60]) - - # Test Set 1: Erasing with int value - img_re = transforms.RandomErasing(value=0.2) - i, j, h, w, v = img_re.get_params(img, scale=img_re.scale, ratio=img_re.ratio, value=img_re.value) - img_output = F.erase(img, i, j, h, w, v) - self.assertEqual(img_output.size(0), 3) - - # Test Set 2: Check if the unerased region is preserved - orig_unerased = img.clone() - orig_unerased[:, i:i + h, j:j + w] = 0 - output_unerased = img_output.clone() - output_unerased[:, i:i + h, j:j + w] = 0 - self.assertTrue(torch.equal(orig_unerased, output_unerased)) - - # Test Set 3: Erasing with random value - img_re = transforms.RandomErasing(value='random')(img) - self.assertEqual(img_re.size(0), 3) - - # Test Set 4: Erasing with tuple value - img_re = transforms.RandomErasing(value=(0.2, 0.2, 0.2))(img) - self.assertEqual(img_re.size(0), 3) - - # Test Set 5: Testing the inplace behaviour - img_re = transforms.RandomErasing(value=(0.2), inplace=True)(img) - self.assertTrue(torch.equal(img_re, img)) - - # Test Set 6: Checking when no erased region is selected - img = torch.rand([3, 300, 1]) - img_re = transforms.RandomErasing(ratio=(0.1, 0.2), value='random')(img) - self.assertTrue(torch.equal(img_re, img)) - - -if __name__ == '__main__': - unittest.main() + shear = [45.0, 25.0] + self._test_transformation( + angle=0.0, translate=(0.0, 0.0), scale=1.0, shear=shear, pil_image=pil_image, input_img=input_img + ) + + # Test shear with top-left as center + shear = [45.0, 25.0] + self._test_transformation( + angle=0.0, + translate=(0.0, 0.0), + scale=1.0, + shear=shear, + pil_image=pil_image, + input_img=input_img, + center=[0, 0], + ) + + @pytest.mark.parametrize("angle", range(-90, 90, 36)) + @pytest.mark.parametrize("translate", range(-10, 10, 5)) + @pytest.mark.parametrize("scale", [0.77, 1.0, 1.27]) + @pytest.mark.parametrize("shear", range(-15, 15, 5)) + def test_transformation_range(self, angle, translate, scale, shear, pil_image, input_img): + self._test_transformation( + angle=angle, + translate=(translate, translate), + scale=scale, + shear=(shear, shear), + pil_image=pil_image, + input_img=input_img, + ) + + +def test_random_affine(): + + with pytest.raises(ValueError): + transforms.RandomAffine(-0.7) + with pytest.raises(ValueError): + transforms.RandomAffine([-0.7]) + with pytest.raises(ValueError): + transforms.RandomAffine([-0.7, 0, 0.7]) + with pytest.raises(TypeError): + transforms.RandomAffine([-90, 90], translate=2.0) + with pytest.raises(ValueError): + transforms.RandomAffine([-90, 90], translate=[-1.0, 1.0]) + with pytest.raises(ValueError): + transforms.RandomAffine([-90, 90], translate=[-1.0, 0.0, 1.0]) + + with pytest.raises(ValueError): + transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.0]) + with pytest.raises(ValueError): + transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[-1.0, 1.0]) + with pytest.raises(ValueError): + transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, -0.5]) + with pytest.raises(ValueError): + transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 3.0, -0.5]) + + with pytest.raises(ValueError): + transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 0.5], shear=-7) + with pytest.raises(ValueError): + transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 0.5], shear=[-10]) + with pytest.raises(ValueError): + transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 0.5], shear=[-10, 0, 10]) + with pytest.raises(ValueError): + transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 0.5], shear=[-10, 0, 10, 0, 10]) + + # assert fill being either a Sequence or a Number + with pytest.raises(TypeError): + transforms.RandomAffine(0, fill={}) + + t = transforms.RandomAffine(0, fill=None) + assert t.fill == 0 + + x = np.zeros((100, 100, 3), dtype=np.uint8) + img = F.to_pil_image(x) + + t = transforms.RandomAffine(10, translate=[0.5, 0.3], scale=[0.7, 1.3], shear=[-10, 10, 20, 40]) + for _ in range(100): + angle, translations, scale, shear = t.get_params(t.degrees, t.translate, t.scale, t.shear, img_size=img.size) + assert -10 < angle < 10 + assert -img.size[0] * 0.5 <= translations[0] <= img.size[0] * 0.5 + assert -img.size[1] * 0.5 <= translations[1] <= img.size[1] * 0.5 + assert 0.7 < scale < 1.3 + assert -10 < shear[0] < 10 + assert -20 < shear[1] < 40 + + # Checking if RandomAffine can be printed as string + t.__repr__() + + t = transforms.RandomAffine(10, interpolation=transforms.InterpolationMode.BILINEAR) + assert "bilinear" in t.__repr__() + + t = transforms.RandomAffine(10, interpolation=Image.BILINEAR) + assert t.interpolation == transforms.InterpolationMode.BILINEAR + + +def test_elastic_transformation(): + with pytest.raises(TypeError, match=r"alpha should be float or a sequence of floats"): + transforms.ElasticTransform(alpha=True, sigma=2.0) + with pytest.raises(TypeError, match=r"alpha should be a sequence of floats"): + transforms.ElasticTransform(alpha=[1.0, True], sigma=2.0) + with pytest.raises(ValueError, match=r"alpha is a sequence its length should be 2"): + transforms.ElasticTransform(alpha=[1.0, 0.0, 1.0], sigma=2.0) + + with pytest.raises(TypeError, match=r"sigma should be float or a sequence of floats"): + transforms.ElasticTransform(alpha=2.0, sigma=True) + with pytest.raises(TypeError, match=r"sigma should be a sequence of floats"): + transforms.ElasticTransform(alpha=2.0, sigma=[1.0, True]) + with pytest.raises(ValueError, match=r"sigma is a sequence its length should be 2"): + transforms.ElasticTransform(alpha=2.0, sigma=[1.0, 0.0, 1.0]) + + t = transforms.transforms.ElasticTransform(alpha=2.0, sigma=2.0, interpolation=Image.BILINEAR) + assert t.interpolation == transforms.InterpolationMode.BILINEAR + + with pytest.raises(TypeError, match=r"fill should be int or float"): + transforms.ElasticTransform(alpha=1.0, sigma=1.0, fill={}) + + x = torch.randint(0, 256, (3, 32, 32), dtype=torch.uint8) + img = F.to_pil_image(x) + t = transforms.ElasticTransform(alpha=0.0, sigma=0.0) + transformed_img = t(img) + assert transformed_img == img + + # Smoke test on PIL images + t = transforms.ElasticTransform(alpha=0.5, sigma=0.23) + transformed_img = t(img) + assert isinstance(transformed_img, Image.Image) + + # Checking if ElasticTransform can be printed as string + t.__repr__() + + +def test_random_grayscale_with_grayscale_input(): + transform = transforms.RandomGrayscale(p=1.0) + + image_tensor = torch.randint(0, 256, (1, 16, 16), dtype=torch.uint8) + output_tensor = transform(image_tensor) + torch.testing.assert_close(output_tensor, image_tensor) + + image_pil = F.to_pil_image(image_tensor) + output_pil = transform(image_pil) + torch.testing.assert_close(F.pil_to_tensor(output_pil), image_tensor) + + +if __name__ == "__main__": + pytest.main([__file__]) diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py new file mode 100644 index 00000000000..eac52dafc17 --- /dev/null +++ b/test/test_transforms_tensor.py @@ -0,0 +1,892 @@ +import os +import sys + +import numpy as np +import PIL.Image +import pytest +import torch +from common_utils import ( + _assert_approx_equal_tensor_to_pil, + _assert_equal_tensor_to_pil, + _create_data, + _create_data_batch, + assert_equal, + cpu_and_cuda, + float_dtypes, + get_tmp_dir, + int_dtypes, +) +from torchvision import transforms as T +from torchvision.transforms import functional as F, InterpolationMode +from torchvision.transforms.autoaugment import _apply_op + +NEAREST, NEAREST_EXACT, BILINEAR, BICUBIC = ( + InterpolationMode.NEAREST, + InterpolationMode.NEAREST_EXACT, + InterpolationMode.BILINEAR, + InterpolationMode.BICUBIC, +) + + +def _test_transform_vs_scripted(transform, s_transform, tensor, msg=None): + torch.manual_seed(12) + out1 = transform(tensor) + torch.manual_seed(12) + out2 = s_transform(tensor) + assert_equal(out1, out2, msg=msg) + + +def _test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors, msg=None): + torch.manual_seed(12) + transformed_batch = transform(batch_tensors) + + for i in range(len(batch_tensors)): + img_tensor = batch_tensors[i, ...] + torch.manual_seed(12) + transformed_img = transform(img_tensor) + assert_equal(transformed_img, transformed_batch[i, ...], msg=msg) + + torch.manual_seed(12) + s_transformed_batch = s_transform(batch_tensors) + assert_equal(transformed_batch, s_transformed_batch, msg=msg) + + +def _test_functional_op(f, device, channels=3, fn_kwargs=None, test_exact_match=True, **match_kwargs): + fn_kwargs = fn_kwargs or {} + + tensor, pil_img = _create_data(height=10, width=10, channels=channels, device=device) + transformed_tensor = f(tensor, **fn_kwargs) + transformed_pil_img = f(pil_img, **fn_kwargs) + if test_exact_match: + _assert_equal_tensor_to_pil(transformed_tensor, transformed_pil_img, **match_kwargs) + else: + _assert_approx_equal_tensor_to_pil(transformed_tensor, transformed_pil_img, **match_kwargs) + + +def _test_class_op(transform_cls, device, channels=3, meth_kwargs=None, test_exact_match=True, **match_kwargs): + meth_kwargs = meth_kwargs or {} + + # test for class interface + f = transform_cls(**meth_kwargs) + scripted_fn = torch.jit.script(f) + + tensor, pil_img = _create_data(26, 34, channels, device=device) + # set seed to reproduce the same transformation for tensor and PIL image + torch.manual_seed(12) + transformed_tensor = f(tensor) + torch.manual_seed(12) + transformed_pil_img = f(pil_img) + if test_exact_match: + _assert_equal_tensor_to_pil(transformed_tensor, transformed_pil_img, **match_kwargs) + else: + _assert_approx_equal_tensor_to_pil(transformed_tensor.float(), transformed_pil_img, **match_kwargs) + + torch.manual_seed(12) + transformed_tensor_script = scripted_fn(tensor) + assert_equal(transformed_tensor, transformed_tensor_script) + + batch_tensors = _create_data_batch(height=23, width=34, channels=channels, num_samples=4, device=device) + _test_transform_vs_scripted_on_batch(f, scripted_fn, batch_tensors) + + with get_tmp_dir() as tmp_dir: + scripted_fn.save(os.path.join(tmp_dir, f"t_{transform_cls.__name__}.pt")) + + +def _test_op(func, method, device, channels=3, fn_kwargs=None, meth_kwargs=None, test_exact_match=True, **match_kwargs): + _test_functional_op(func, device, channels, fn_kwargs, test_exact_match=test_exact_match, **match_kwargs) + _test_class_op(method, device, channels, meth_kwargs, test_exact_match=test_exact_match, **match_kwargs) + + +def _test_fn_save_load(fn, tmpdir): + scripted_fn = torch.jit.script(fn) + p = os.path.join(tmpdir, f"t_op_list_{getattr(fn, '__name__', fn.__class__.__name__)}.pt") + scripted_fn.save(p) + _ = torch.jit.load(p) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize( + "func,method,fn_kwargs,match_kwargs", + [ + (F.hflip, T.RandomHorizontalFlip, None, {}), + (F.vflip, T.RandomVerticalFlip, None, {}), + (F.invert, T.RandomInvert, None, {}), + (F.posterize, T.RandomPosterize, {"bits": 4}, {}), + (F.solarize, T.RandomSolarize, {"threshold": 192.0}, {}), + (F.adjust_sharpness, T.RandomAdjustSharpness, {"sharpness_factor": 2.0}, {}), + ( + F.autocontrast, + T.RandomAutocontrast, + None, + {"test_exact_match": False, "agg_method": "max", "tol": (1 + 1e-5), "allowed_percentage_diff": 0.05}, + ), + (F.equalize, T.RandomEqualize, None, {}), + ], +) +@pytest.mark.parametrize("channels", [1, 3]) +def test_random(func, method, device, channels, fn_kwargs, match_kwargs): + _test_op(func, method, device, channels, fn_kwargs, fn_kwargs, **match_kwargs) + + +@pytest.mark.parametrize("seed", range(10)) +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("channels", [1, 3]) +class TestColorJitter: + @pytest.fixture(autouse=True) + def set_random_seed(self, seed): + torch.random.manual_seed(seed) + + @pytest.mark.parametrize("brightness", [0.1, 0.5, 1.0, 1.34, (0.3, 0.7), [0.4, 0.5]]) + def test_color_jitter_brightness(self, brightness, device, channels): + tol = 1.0 + 1e-10 + meth_kwargs = {"brightness": brightness} + _test_class_op( + T.ColorJitter, + meth_kwargs=meth_kwargs, + test_exact_match=False, + device=device, + tol=tol, + agg_method="max", + channels=channels, + ) + + @pytest.mark.parametrize("contrast", [0.2, 0.5, 1.0, 1.5, (0.3, 0.7), [0.4, 0.5]]) + def test_color_jitter_contrast(self, contrast, device, channels): + tol = 1.0 + 1e-10 + meth_kwargs = {"contrast": contrast} + _test_class_op( + T.ColorJitter, + meth_kwargs=meth_kwargs, + test_exact_match=False, + device=device, + tol=tol, + agg_method="max", + channels=channels, + ) + + @pytest.mark.parametrize("saturation", [0.5, 0.75, 1.0, 1.25, (0.3, 0.7), [0.3, 0.4]]) + def test_color_jitter_saturation(self, saturation, device, channels): + tol = 1.0 + 1e-10 + meth_kwargs = {"saturation": saturation} + _test_class_op( + T.ColorJitter, + meth_kwargs=meth_kwargs, + test_exact_match=False, + device=device, + tol=tol, + agg_method="max", + channels=channels, + ) + + @pytest.mark.parametrize("hue", [0.2, 0.5, (-0.2, 0.3), [-0.4, 0.5]]) + def test_color_jitter_hue(self, hue, device, channels): + meth_kwargs = {"hue": hue} + _test_class_op( + T.ColorJitter, + meth_kwargs=meth_kwargs, + test_exact_match=False, + device=device, + tol=16.1, + agg_method="max", + channels=channels, + ) + + def test_color_jitter_all(self, device, channels): + # All 4 parameters together + meth_kwargs = {"brightness": 0.2, "contrast": 0.2, "saturation": 0.2, "hue": 0.2} + _test_class_op( + T.ColorJitter, + meth_kwargs=meth_kwargs, + test_exact_match=False, + device=device, + tol=12.1, + agg_method="max", + channels=channels, + ) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("m", ["constant", "edge", "reflect", "symmetric"]) +@pytest.mark.parametrize("mul", [1, -1]) +def test_pad(m, mul, device): + fill = 127 if m == "constant" else 0 + + # Test functional.pad (PIL and Tensor) with padding as single int + _test_functional_op(F.pad, fn_kwargs={"padding": mul * 2, "fill": fill, "padding_mode": m}, device=device) + # Test functional.pad and transforms.Pad with padding as [int, ] + fn_kwargs = meth_kwargs = { + "padding": [mul * 2], + "fill": fill, + "padding_mode": m, + } + _test_op(F.pad, T.Pad, device=device, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs) + # Test functional.pad and transforms.Pad with padding as list + fn_kwargs = meth_kwargs = {"padding": [mul * 4, 4], "fill": fill, "padding_mode": m} + _test_op(F.pad, T.Pad, device=device, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs) + # Test functional.pad and transforms.Pad with padding as tuple + fn_kwargs = meth_kwargs = {"padding": (mul * 2, 2, 2, mul * 2), "fill": fill, "padding_mode": m} + _test_op(F.pad, T.Pad, device=device, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +def test_crop(device): + fn_kwargs = {"top": 2, "left": 3, "height": 4, "width": 5} + # Test transforms.RandomCrop with size and padding as tuple + meth_kwargs = { + "size": (4, 5), + "padding": (4, 4), + "pad_if_needed": True, + } + _test_op(F.crop, T.RandomCrop, device=device, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs) + + # Test transforms.functional.crop including outside the image area + fn_kwargs = {"top": -2, "left": 3, "height": 4, "width": 5} # top + _test_functional_op(F.crop, fn_kwargs=fn_kwargs, device=device) + + fn_kwargs = {"top": 1, "left": -3, "height": 4, "width": 5} # left + _test_functional_op(F.crop, fn_kwargs=fn_kwargs, device=device) + + fn_kwargs = {"top": 7, "left": 3, "height": 4, "width": 5} # bottom + _test_functional_op(F.crop, fn_kwargs=fn_kwargs, device=device) + + fn_kwargs = {"top": 3, "left": 8, "height": 4, "width": 5} # right + _test_functional_op(F.crop, fn_kwargs=fn_kwargs, device=device) + + fn_kwargs = {"top": -3, "left": -3, "height": 15, "width": 15} # all + _test_functional_op(F.crop, fn_kwargs=fn_kwargs, device=device) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize( + "padding_config", + [ + {"padding_mode": "constant", "fill": 0}, + {"padding_mode": "constant", "fill": 10}, + {"padding_mode": "edge"}, + {"padding_mode": "reflect"}, + ], +) +@pytest.mark.parametrize("pad_if_needed", [True, False]) +@pytest.mark.parametrize("padding", [[5], [5, 4], [1, 2, 3, 4]]) +@pytest.mark.parametrize("size", [5, [5], [6, 6]]) +def test_random_crop(size, padding, pad_if_needed, padding_config, device): + config = dict(padding_config) + config["size"] = size + config["padding"] = padding + config["pad_if_needed"] = pad_if_needed + _test_class_op(T.RandomCrop, device, meth_kwargs=config) + + +def test_random_crop_save_load(tmpdir): + fn = T.RandomCrop(32, [4], pad_if_needed=True) + _test_fn_save_load(fn, tmpdir) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +def test_center_crop(device, tmpdir): + fn_kwargs = {"output_size": (4, 5)} + meth_kwargs = {"size": (4, 5)} + _test_op(F.center_crop, T.CenterCrop, device=device, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs) + fn_kwargs = {"output_size": (5,)} + meth_kwargs = {"size": (5,)} + _test_op(F.center_crop, T.CenterCrop, device=device, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs) + tensor = torch.randint(0, 256, (3, 10, 10), dtype=torch.uint8, device=device) + # Test torchscript of transforms.CenterCrop with size as int + f = T.CenterCrop(size=5) + scripted_fn = torch.jit.script(f) + scripted_fn(tensor) + + # Test torchscript of transforms.CenterCrop with size as [int, ] + f = T.CenterCrop(size=[5]) + scripted_fn = torch.jit.script(f) + scripted_fn(tensor) + + # Test torchscript of transforms.CenterCrop with size as tuple + f = T.CenterCrop(size=(6, 6)) + scripted_fn = torch.jit.script(f) + scripted_fn(tensor) + + +def test_center_crop_save_load(tmpdir): + fn = T.CenterCrop(size=[5]) + _test_fn_save_load(fn, tmpdir) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize( + "fn, method, out_length", + [ + # test_five_crop + (F.five_crop, T.FiveCrop, 5), + # test_ten_crop + (F.ten_crop, T.TenCrop, 10), + ], +) +@pytest.mark.parametrize("size", [(5,), [5], (4, 5), [4, 5]]) +def test_x_crop(fn, method, out_length, size, device): + meth_kwargs = fn_kwargs = {"size": size} + scripted_fn = torch.jit.script(fn) + + tensor, pil_img = _create_data(height=20, width=20, device=device) + transformed_t_list = fn(tensor, **fn_kwargs) + transformed_p_list = fn(pil_img, **fn_kwargs) + assert len(transformed_t_list) == len(transformed_p_list) + assert len(transformed_t_list) == out_length + for transformed_tensor, transformed_pil_img in zip(transformed_t_list, transformed_p_list): + _assert_equal_tensor_to_pil(transformed_tensor, transformed_pil_img) + + transformed_t_list_script = scripted_fn(tensor.detach().clone(), **fn_kwargs) + assert len(transformed_t_list) == len(transformed_t_list_script) + assert len(transformed_t_list_script) == out_length + for transformed_tensor, transformed_tensor_script in zip(transformed_t_list, transformed_t_list_script): + assert_equal(transformed_tensor, transformed_tensor_script) + + # test for class interface + fn = method(**meth_kwargs) + scripted_fn = torch.jit.script(fn) + output = scripted_fn(tensor) + assert len(output) == len(transformed_t_list_script) + + # test on batch of tensors + batch_tensors = _create_data_batch(height=23, width=34, channels=3, num_samples=4, device=device) + torch.manual_seed(12) + transformed_batch_list = fn(batch_tensors) + + for i in range(len(batch_tensors)): + img_tensor = batch_tensors[i, ...] + torch.manual_seed(12) + transformed_img_list = fn(img_tensor) + for transformed_img, transformed_batch in zip(transformed_img_list, transformed_batch_list): + assert_equal(transformed_img, transformed_batch[i, ...]) + + +@pytest.mark.parametrize("method", ["FiveCrop", "TenCrop"]) +def test_x_crop_save_load(method, tmpdir): + fn = getattr(T, method)(size=[5]) + _test_fn_save_load(fn, tmpdir) + + +class TestResize: + @pytest.mark.parametrize("size", [32, 34, 35, 36, 38]) + def test_resize_int(self, size): + # TODO: Minimal check for bug-fix, improve this later + x = torch.rand(3, 32, 46) + t = T.Resize(size=size, antialias=True) + y = t(x) + # If size is an int, smaller edge of the image will be matched to this number. + # i.e, if height > width, then image will be rescaled to (size * height / width, size). + assert isinstance(y, torch.Tensor) + assert y.shape[1] == size + assert y.shape[2] == int(size * 46 / 32) + + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64]) + @pytest.mark.parametrize("size", [[32], [32, 32], (32, 32), [34, 35]]) + @pytest.mark.parametrize("max_size", [None, 35, 1000]) + @pytest.mark.parametrize("interpolation", [BILINEAR, BICUBIC, NEAREST, NEAREST_EXACT]) + def test_resize_scripted(self, dt, size, max_size, interpolation, device): + tensor, _ = _create_data(height=34, width=36, device=device) + batch_tensors = torch.randint(0, 256, size=(4, 3, 44, 56), dtype=torch.uint8, device=device) + + if dt is not None: + # This is a trivial cast to float of uint8 data to test all cases + tensor = tensor.to(dt) + if max_size is not None and len(size) != 1: + pytest.skip("Size should be an int or a sequence of length 1 if max_size is specified") + + transform = T.Resize(size=size, interpolation=interpolation, max_size=max_size, antialias=True) + s_transform = torch.jit.script(transform) + _test_transform_vs_scripted(transform, s_transform, tensor) + _test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors) + + def test_resize_save_load(self, tmpdir): + fn = T.Resize(size=[32], antialias=True) + _test_fn_save_load(fn, tmpdir) + + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("scale", [(0.7, 1.2), [0.7, 1.2]]) + @pytest.mark.parametrize("ratio", [(0.75, 1.333), [0.75, 1.333]]) + @pytest.mark.parametrize("size", [(32,), [44], [32], [32, 32], (32, 32), [44, 55]]) + @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR, BICUBIC, NEAREST_EXACT]) + @pytest.mark.parametrize("antialias", [None, True, False]) + def test_resized_crop(self, scale, ratio, size, interpolation, antialias, device): + + if antialias and interpolation in {NEAREST, NEAREST_EXACT}: + pytest.skip(f"Can not resize if interpolation mode is {interpolation} and antialias=True") + + tensor = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8, device=device) + batch_tensors = torch.randint(0, 256, size=(4, 3, 44, 56), dtype=torch.uint8, device=device) + transform = T.RandomResizedCrop( + size=size, scale=scale, ratio=ratio, interpolation=interpolation, antialias=antialias + ) + s_transform = torch.jit.script(transform) + _test_transform_vs_scripted(transform, s_transform, tensor) + _test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors) + + def test_resized_crop_save_load(self, tmpdir): + fn = T.RandomResizedCrop(size=[32], antialias=True) + _test_fn_save_load(fn, tmpdir) + + +def _test_random_affine_helper(device, **kwargs): + tensor = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8, device=device) + batch_tensors = torch.randint(0, 256, size=(4, 3, 44, 56), dtype=torch.uint8, device=device) + transform = T.RandomAffine(**kwargs) + s_transform = torch.jit.script(transform) + + _test_transform_vs_scripted(transform, s_transform, tensor) + _test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors) + + +def test_random_affine_save_load(tmpdir): + fn = T.RandomAffine(degrees=45.0) + _test_fn_save_load(fn, tmpdir) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR]) +@pytest.mark.parametrize("shear", [15, 10.0, (5.0, 10.0), [-15, 15], [-10.0, 10.0, -11.0, 11.0]]) +def test_random_affine_shear(device, interpolation, shear): + _test_random_affine_helper(device, degrees=0.0, interpolation=interpolation, shear=shear) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR]) +@pytest.mark.parametrize("scale", [(0.7, 1.2), [0.7, 1.2]]) +def test_random_affine_scale(device, interpolation, scale): + _test_random_affine_helper(device, degrees=0.0, interpolation=interpolation, scale=scale) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR]) +@pytest.mark.parametrize("translate", [(0.1, 0.2), [0.2, 0.1]]) +def test_random_affine_translate(device, interpolation, translate): + _test_random_affine_helper(device, degrees=0.0, interpolation=interpolation, translate=translate) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR]) +@pytest.mark.parametrize("degrees", [45, 35.0, (-45, 45), [-90.0, 90.0]]) +def test_random_affine_degrees(device, interpolation, degrees): + _test_random_affine_helper(device, degrees=degrees, interpolation=interpolation) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR]) +@pytest.mark.parametrize("fill", [85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1]) +def test_random_affine_fill(device, interpolation, fill): + _test_random_affine_helper(device, degrees=0.0, interpolation=interpolation, fill=fill) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("center", [(0, 0), [10, 10], None, (56, 44)]) +@pytest.mark.parametrize("expand", [True, False]) +@pytest.mark.parametrize("degrees", [45, 35.0, (-45, 45), [-90.0, 90.0]]) +@pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR]) +@pytest.mark.parametrize("fill", [85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1]) +def test_random_rotate(device, center, expand, degrees, interpolation, fill): + tensor = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8, device=device) + batch_tensors = torch.randint(0, 256, size=(4, 3, 44, 56), dtype=torch.uint8, device=device) + + transform = T.RandomRotation(degrees=degrees, interpolation=interpolation, expand=expand, center=center, fill=fill) + s_transform = torch.jit.script(transform) + + _test_transform_vs_scripted(transform, s_transform, tensor) + _test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors) + + +def test_random_rotate_save_load(tmpdir): + fn = T.RandomRotation(degrees=45.0) + _test_fn_save_load(fn, tmpdir) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("distortion_scale", np.linspace(0.1, 1.0, num=20)) +@pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR]) +@pytest.mark.parametrize("fill", [85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1]) +def test_random_perspective(device, distortion_scale, interpolation, fill): + tensor = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8, device=device) + batch_tensors = torch.randint(0, 256, size=(4, 3, 44, 56), dtype=torch.uint8, device=device) + + transform = T.RandomPerspective(distortion_scale=distortion_scale, interpolation=interpolation, fill=fill) + s_transform = torch.jit.script(transform) + + _test_transform_vs_scripted(transform, s_transform, tensor) + _test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors) + + +def test_random_perspective_save_load(tmpdir): + fn = T.RandomPerspective() + _test_fn_save_load(fn, tmpdir) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize( + "Klass, meth_kwargs", + [(T.Grayscale, {"num_output_channels": 1}), (T.Grayscale, {"num_output_channels": 3}), (T.RandomGrayscale, {})], +) +def test_to_grayscale(device, Klass, meth_kwargs): + tol = 1.0 + 1e-10 + _test_class_op(Klass, meth_kwargs=meth_kwargs, test_exact_match=False, device=device, tol=tol, agg_method="max") + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("in_dtype", int_dtypes() + float_dtypes()) +@pytest.mark.parametrize("out_dtype", int_dtypes() + float_dtypes()) +def test_convert_image_dtype(device, in_dtype, out_dtype): + tensor, _ = _create_data(26, 34, device=device) + batch_tensors = torch.rand(4, 3, 44, 56, device=device) + + in_tensor = tensor.to(in_dtype) + in_batch_tensors = batch_tensors.to(in_dtype) + + fn = T.ConvertImageDtype(dtype=out_dtype) + scripted_fn = torch.jit.script(fn) + + if (in_dtype == torch.float32 and out_dtype in (torch.int32, torch.int64)) or ( + in_dtype == torch.float64 and out_dtype == torch.int64 + ): + with pytest.raises(RuntimeError, match=r"cannot be performed safely"): + _test_transform_vs_scripted(fn, scripted_fn, in_tensor) + with pytest.raises(RuntimeError, match=r"cannot be performed safely"): + _test_transform_vs_scripted_on_batch(fn, scripted_fn, in_batch_tensors) + return + + _test_transform_vs_scripted(fn, scripted_fn, in_tensor) + _test_transform_vs_scripted_on_batch(fn, scripted_fn, in_batch_tensors) + + +def test_convert_image_dtype_save_load(tmpdir): + fn = T.ConvertImageDtype(dtype=torch.uint8) + _test_fn_save_load(fn, tmpdir) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("policy", [policy for policy in T.AutoAugmentPolicy]) +@pytest.mark.parametrize("fill", [None, 85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1]) +def test_autoaugment(device, policy, fill): + tensor = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8, device=device) + batch_tensors = torch.randint(0, 256, size=(4, 3, 44, 56), dtype=torch.uint8, device=device) + + transform = T.AutoAugment(policy=policy, fill=fill) + s_transform = torch.jit.script(transform) + for _ in range(25): + _test_transform_vs_scripted(transform, s_transform, tensor) + _test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("num_ops", [1, 2, 3]) +@pytest.mark.parametrize("magnitude", [7, 9, 11]) +@pytest.mark.parametrize("fill", [None, 85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1]) +def test_randaugment(device, num_ops, magnitude, fill): + tensor = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8, device=device) + batch_tensors = torch.randint(0, 256, size=(4, 3, 44, 56), dtype=torch.uint8, device=device) + + transform = T.RandAugment(num_ops=num_ops, magnitude=magnitude, fill=fill) + s_transform = torch.jit.script(transform) + for _ in range(25): + _test_transform_vs_scripted(transform, s_transform, tensor) + _test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("fill", [None, 85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1]) +def test_trivialaugmentwide(device, fill): + tensor = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8, device=device) + batch_tensors = torch.randint(0, 256, size=(4, 3, 44, 56), dtype=torch.uint8, device=device) + + transform = T.TrivialAugmentWide(fill=fill) + s_transform = torch.jit.script(transform) + for _ in range(25): + _test_transform_vs_scripted(transform, s_transform, tensor) + _test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize("fill", [None, 85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1]) +def test_augmix(device, fill): + tensor = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8, device=device) + batch_tensors = torch.randint(0, 256, size=(4, 3, 44, 56), dtype=torch.uint8, device=device) + + class DeterministicAugMix(T.AugMix): + def _sample_dirichlet(self, params: torch.Tensor) -> torch.Tensor: + # patch the method to ensure that the order of rand calls doesn't affect the outcome + return params.softmax(dim=-1) + + transform = DeterministicAugMix(fill=fill) + s_transform = torch.jit.script(transform) + for _ in range(25): + _test_transform_vs_scripted(transform, s_transform, tensor) + _test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors) + + +@pytest.mark.parametrize("augmentation", [T.AutoAugment, T.RandAugment, T.TrivialAugmentWide, T.AugMix]) +def test_autoaugment_save_load(augmentation, tmpdir): + fn = augmentation() + _test_fn_save_load(fn, tmpdir) + + +@pytest.mark.parametrize("interpolation", [F.InterpolationMode.NEAREST, F.InterpolationMode.BILINEAR]) +@pytest.mark.parametrize("mode", ["X", "Y"]) +def test_autoaugment__op_apply_shear(interpolation, mode): + # We check that torchvision's implementation of shear is equivalent + # to official CIFAR10 autoaugment implementation: + # https://github.com/tensorflow/models/blob/885fda091c46c59d6c7bb5c7e760935eacc229da/research/autoaugment/augmentation_transforms.py#L273-L290 + image_size = 32 + + def shear(pil_img, level, mode, resample): + if mode == "X": + matrix = (1, level, 0, 0, 1, 0) + elif mode == "Y": + matrix = (1, 0, 0, level, 1, 0) + return pil_img.transform((image_size, image_size), PIL.Image.AFFINE, matrix, resample=resample) + + t_img, pil_img = _create_data(image_size, image_size) + + resample_pil = { + F.InterpolationMode.NEAREST: PIL.Image.NEAREST, + F.InterpolationMode.BILINEAR: PIL.Image.BILINEAR, + }[interpolation] + + level = 0.3 + expected_out = shear(pil_img, level, mode=mode, resample=resample_pil) + + # Check pil output vs expected pil + out = _apply_op(pil_img, op_name=f"Shear{mode}", magnitude=level, interpolation=interpolation, fill=0) + assert out == expected_out + + if interpolation == F.InterpolationMode.BILINEAR: + # We skip bilinear mode for tensors as + # affine transformation results are not exactly the same + # between tensors and pil images + # MAE as around 1.40 + # Max Abs error can be 163 or 170 + return + + # Check tensor output vs expected pil + out = _apply_op(t_img, op_name=f"Shear{mode}", magnitude=level, interpolation=interpolation, fill=0) + _assert_approx_equal_tensor_to_pil(out, expected_out) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize( + "config", + [ + {}, + {"value": 1}, + {"value": 0.2}, + {"value": "random"}, + {"value": (1, 1, 1)}, + {"value": (0.2, 0.2, 0.2)}, + {"value": [1, 1, 1]}, + {"value": [0.2, 0.2, 0.2]}, + {"value": "random", "ratio": (0.1, 0.2)}, + ], +) +def test_random_erasing(device, config): + tensor, _ = _create_data(24, 32, channels=3, device=device) + batch_tensors = torch.rand(4, 3, 44, 56, device=device) + + fn = T.RandomErasing(**config) + scripted_fn = torch.jit.script(fn) + _test_transform_vs_scripted(fn, scripted_fn, tensor) + _test_transform_vs_scripted_on_batch(fn, scripted_fn, batch_tensors) + + +def test_random_erasing_save_load(tmpdir): + fn = T.RandomErasing(value=0.2) + _test_fn_save_load(fn, tmpdir) + + +def test_random_erasing_with_invalid_data(): + img = torch.rand(3, 60, 60) + # Test Set 0: invalid value + random_erasing = T.RandomErasing(value=(0.1, 0.2, 0.3, 0.4), p=1.0) + with pytest.raises(ValueError, match="If value is a sequence, it should have either a single value or 3"): + random_erasing(img) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +def test_normalize(device, tmpdir): + fn = T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) + tensor, _ = _create_data(26, 34, device=device) + + with pytest.raises(TypeError, match="Input tensor should be a float tensor"): + fn(tensor) + + batch_tensors = torch.rand(4, 3, 44, 56, device=device) + tensor = tensor.to(dtype=torch.float32) / 255.0 + # test for class interface + scripted_fn = torch.jit.script(fn) + + _test_transform_vs_scripted(fn, scripted_fn, tensor) + _test_transform_vs_scripted_on_batch(fn, scripted_fn, batch_tensors) + + scripted_fn.save(os.path.join(tmpdir, "t_norm.pt")) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +def test_linear_transformation(device, tmpdir): + c, h, w = 3, 24, 32 + + tensor, _ = _create_data(h, w, channels=c, device=device) + + matrix = torch.rand(c * h * w, c * h * w, device=device) + mean_vector = torch.rand(c * h * w, device=device) + + fn = T.LinearTransformation(matrix, mean_vector) + scripted_fn = torch.jit.script(fn) + + _test_transform_vs_scripted(fn, scripted_fn, tensor) + + batch_tensors = torch.rand(4, c, h, w, device=device) + # We skip some tests from _test_transform_vs_scripted_on_batch as + # results for scripted and non-scripted transformations are not exactly the same + torch.manual_seed(12) + transformed_batch = fn(batch_tensors) + torch.manual_seed(12) + s_transformed_batch = scripted_fn(batch_tensors) + assert_equal(transformed_batch, s_transformed_batch) + + scripted_fn.save(os.path.join(tmpdir, "t_norm.pt")) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +def test_compose(device): + tensor, _ = _create_data(26, 34, device=device) + tensor = tensor.to(dtype=torch.float32) / 255.0 + transforms = T.Compose( + [ + T.CenterCrop(10), + T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + ] + ) + s_transforms = torch.nn.Sequential(*transforms.transforms) + + scripted_fn = torch.jit.script(s_transforms) + torch.manual_seed(12) + transformed_tensor = transforms(tensor) + torch.manual_seed(12) + transformed_tensor_script = scripted_fn(tensor) + assert_equal(transformed_tensor, transformed_tensor_script, msg=f"{transforms}") + + t = T.Compose( + [ + lambda x: x, + ] + ) + with pytest.raises(RuntimeError, match="cannot call a value of type 'Tensor'"): + torch.jit.script(t) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +def test_random_apply(device): + tensor, _ = _create_data(26, 34, device=device) + tensor = tensor.to(dtype=torch.float32) / 255.0 + + transforms = T.RandomApply( + [ + T.RandomHorizontalFlip(), + T.ColorJitter(), + ], + p=0.4, + ) + s_transforms = T.RandomApply( + torch.nn.ModuleList( + [ + T.RandomHorizontalFlip(), + T.ColorJitter(), + ] + ), + p=0.4, + ) + + scripted_fn = torch.jit.script(s_transforms) + torch.manual_seed(12) + transformed_tensor = transforms(tensor) + torch.manual_seed(12) + transformed_tensor_script = scripted_fn(tensor) + assert_equal(transformed_tensor, transformed_tensor_script, msg=f"{transforms}") + + if device == "cpu": + # Can't check this twice, otherwise + # "Can't redefine method: forward on class: __torch__.torchvision.transforms.transforms.RandomApply" + transforms = T.RandomApply( + [ + T.ColorJitter(), + ], + p=0.3, + ) + with pytest.raises(RuntimeError, match="Module 'RandomApply' has no attribute 'transforms'"): + torch.jit.script(transforms) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize( + "meth_kwargs", + [ + {"kernel_size": 3, "sigma": 0.75}, + {"kernel_size": 23, "sigma": [0.1, 2.0]}, + {"kernel_size": 23, "sigma": (0.1, 2.0)}, + {"kernel_size": [3, 3], "sigma": (1.0, 1.0)}, + {"kernel_size": (3, 3), "sigma": (0.1, 2.0)}, + {"kernel_size": [23], "sigma": 0.75}, + ], +) +@pytest.mark.parametrize("channels", [1, 3]) +def test_gaussian_blur(device, channels, meth_kwargs): + if all( + [ + device == "cuda", + channels == 1, + meth_kwargs["kernel_size"] in [23, [23]], + torch.version.cuda == "11.3", + sys.platform in ("win32", "cygwin"), + ] + ): + pytest.skip("Fails on Windows, see https://github.com/pytorch/vision/issues/5464") + + tol = 1.0 + 1e-10 + torch.manual_seed(12) + _test_class_op( + T.GaussianBlur, + meth_kwargs=meth_kwargs, + channels=channels, + test_exact_match=False, + device=device, + agg_method="max", + tol=tol, + ) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +@pytest.mark.parametrize( + "fill", + [ + 1, + 1.0, + [1], + [1.0], + (1,), + (1.0,), + [1, 2, 3], + [1.0, 2.0, 3.0], + (1, 2, 3), + (1.0, 2.0, 3.0), + ], +) +@pytest.mark.parametrize("channels", [1, 3]) +def test_elastic_transform(device, channels, fill): + if isinstance(fill, (list, tuple)) and len(fill) > 1 and channels == 1: + # For this the test would correctly fail, since the number of channels in the image does not match `fill`. + # Thus, this is not an issue in the transform, but rather a problem of parametrization that just gives the + # product of `fill` and `channels`. + return + + _test_class_op( + T.ElasticTransform, + meth_kwargs=dict(fill=fill), + channels=channels, + device=device, + ) diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py new file mode 100644 index 00000000000..fb49525ecfe --- /dev/null +++ b/test/test_transforms_v2.py @@ -0,0 +1,6218 @@ +import contextlib +import decimal +import functools +import inspect +import itertools +import math +import pickle +import random +import re +import sys +from copy import deepcopy +from pathlib import Path +from unittest import mock + +import numpy as np +import PIL.Image +import pytest + +import torch +import torchvision.ops +import torchvision.transforms.v2 as transforms + +from common_utils import ( + assert_equal, + cache, + cpu_and_cuda, + freeze_rng_state, + ignore_jit_no_profile_information_warning, + make_bounding_boxes, + make_detection_masks, + make_image, + make_image_pil, + make_image_tensor, + make_segmentation_mask, + make_video, + make_video_tensor, + needs_cuda, + set_rng_seed, +) + +from torch import nn +from torch.testing import assert_close +from torch.utils._pytree import tree_flatten, tree_map +from torch.utils.data import DataLoader, default_collate +from torchvision import tv_tensors +from torchvision.ops.boxes import box_iou + +from torchvision.transforms._functional_tensor import _max_value as get_max_value +from torchvision.transforms.functional import pil_modes_mapping, to_pil_image +from torchvision.transforms.v2 import functional as F +from torchvision.transforms.v2._utils import check_type, is_pure_tensor +from torchvision.transforms.v2.functional._geometry import _get_perspective_coeffs +from torchvision.transforms.v2.functional._utils import _get_kernel, _register_kernel_internal + + +# turns all warnings into errors for this module +pytestmark = [pytest.mark.filterwarnings("error")] + +if sys.version_info[:2] >= (3, 12): + # torchscript relies on some AST stuff that got deprecated in 3.12, + # so we have to explicitly ignore those otherwise we'd error on warnings due to the pytestmark filter above. + pytestmark.append(pytest.mark.filterwarnings("ignore::DeprecationWarning")) + + +@pytest.fixture(autouse=True) +def fix_rng_seed(): + set_rng_seed(0) + yield + + +def _to_tolerances(maybe_tolerance_dict): + if not isinstance(maybe_tolerance_dict, dict): + return dict(rtol=None, atol=None) + + tolerances = dict(rtol=0, atol=0) + tolerances.update(maybe_tolerance_dict) + return tolerances + + +def _check_kernel_cuda_vs_cpu(kernel, input, *args, rtol, atol, **kwargs): + """Checks if the kernel produces closes results for inputs on GPU and CPU.""" + if input.device.type != "cuda": + return + + input_cuda = input.as_subclass(torch.Tensor) + input_cpu = input_cuda.to("cpu") + + with freeze_rng_state(): + actual = kernel(input_cuda, *args, **kwargs) + with freeze_rng_state(): + expected = kernel(input_cpu, *args, **kwargs) + + assert_close(actual, expected, check_device=False, rtol=rtol, atol=atol) + + +@cache +def _script(obj): + try: + return torch.jit.script(obj) + except Exception as error: + name = getattr(obj, "__name__", obj.__class__.__name__) + raise AssertionError(f"Trying to `torch.jit.script` `{name}` raised the error above.") from error + + +def _check_kernel_scripted_vs_eager(kernel, input, *args, rtol, atol, **kwargs): + """Checks if the kernel is scriptable and if the scripted output is close to the eager one.""" + if input.device.type != "cpu": + return + + kernel_scripted = _script(kernel) + + input = input.as_subclass(torch.Tensor) + with ignore_jit_no_profile_information_warning(): + with freeze_rng_state(): + actual = kernel_scripted(input, *args, **kwargs) + with freeze_rng_state(): + expected = kernel(input, *args, **kwargs) + + assert_close(actual, expected, rtol=rtol, atol=atol) + + +def _check_kernel_batched_vs_unbatched(kernel, input, *args, rtol, atol, **kwargs): + """Checks if the kernel produces close results for batched and unbatched inputs.""" + unbatched_input = input.as_subclass(torch.Tensor) + + for batch_dims in [(2,), (2, 1)]: + repeats = [*batch_dims, *[1] * input.ndim] + + actual = kernel(unbatched_input.repeat(repeats), *args, **kwargs) + + expected = kernel(unbatched_input, *args, **kwargs) + # We can't directly call `.repeat()` on the output, since some kernel also return some additional metadata + if isinstance(expected, torch.Tensor): + expected = expected.repeat(repeats) + else: + tensor, *metadata = expected + expected = (tensor.repeat(repeats), *metadata) + + assert_close(actual, expected, rtol=rtol, atol=atol) + + for degenerate_batch_dims in [(0,), (5, 0), (0, 5)]: + degenerate_batched_input = torch.empty( + degenerate_batch_dims + input.shape, dtype=input.dtype, device=input.device + ) + + output = kernel(degenerate_batched_input, *args, **kwargs) + # Most kernels just return a tensor, but some also return some additional metadata + if not isinstance(output, torch.Tensor): + output, *_ = output + + assert output.shape[: -input.ndim] == degenerate_batch_dims + + +def check_kernel( + kernel, + input, + *args, + check_cuda_vs_cpu=True, + check_scripted_vs_eager=True, + check_batched_vs_unbatched=True, + **kwargs, +): + initial_input_version = input._version + + output = kernel(input.as_subclass(torch.Tensor), *args, **kwargs) + # Most kernels just return a tensor, but some also return some additional metadata + if not isinstance(output, torch.Tensor): + output, *_ = output + + # check that no inplace operation happened + assert input._version == initial_input_version + + if kernel not in {F.to_dtype_image, F.to_dtype_video}: + assert output.dtype == input.dtype + assert output.device == input.device + + if check_cuda_vs_cpu: + _check_kernel_cuda_vs_cpu(kernel, input, *args, **kwargs, **_to_tolerances(check_cuda_vs_cpu)) + + if check_scripted_vs_eager: + _check_kernel_scripted_vs_eager(kernel, input, *args, **kwargs, **_to_tolerances(check_scripted_vs_eager)) + + if check_batched_vs_unbatched: + _check_kernel_batched_vs_unbatched(kernel, input, *args, **kwargs, **_to_tolerances(check_batched_vs_unbatched)) + + +def _check_functional_scripted_smoke(functional, input, *args, **kwargs): + """Checks if the functional can be scripted and the scripted version can be called without error.""" + if not isinstance(input, tv_tensors.Image): + return + + functional_scripted = _script(functional) + with ignore_jit_no_profile_information_warning(): + functional_scripted(input.as_subclass(torch.Tensor), *args, **kwargs) + + +def check_functional(functional, input, *args, check_scripted_smoke=True, **kwargs): + unknown_input = object() + with pytest.raises(TypeError, match=re.escape(str(type(unknown_input)))): + functional(unknown_input, *args, **kwargs) + + with mock.patch("torch._C._log_api_usage_once", wraps=torch._C._log_api_usage_once) as spy: + output = functional(input, *args, **kwargs) + + spy.assert_any_call(f"{functional.__module__}.{functional.__name__}") + + assert isinstance(output, type(input)) + + if isinstance(input, tv_tensors.BoundingBoxes) and functional is not F.convert_bounding_box_format: + assert output.format == input.format + + if check_scripted_smoke: + _check_functional_scripted_smoke(functional, input, *args, **kwargs) + + +def check_functional_kernel_signature_match(functional, *, kernel, input_type): + """Checks if the signature of the functional matches the kernel signature.""" + functional_params = list(inspect.signature(functional).parameters.values())[1:] + kernel_params = list(inspect.signature(kernel).parameters.values())[1:] + + if issubclass(input_type, tv_tensors.TVTensor): + # We filter out metadata that is implicitly passed to the functional through the input tv_tensor, but has to be + # explicitly passed to the kernel. + explicit_metadata = { + tv_tensors.BoundingBoxes: {"format", "canvas_size"}, + } + kernel_params = [param for param in kernel_params if param.name not in explicit_metadata.get(input_type, set())] + + functional_params = iter(functional_params) + for functional_param, kernel_param in zip(functional_params, kernel_params): + try: + # In general, the functional parameters are a superset of the kernel parameters. Thus, we filter out + # functional parameters that have no kernel equivalent while keeping the order intact. + while functional_param.name != kernel_param.name: + functional_param = next(functional_params) + except StopIteration: + raise AssertionError( + f"Parameter `{kernel_param.name}` of kernel `{kernel.__name__}` " + f"has no corresponding parameter on the functional `{functional.__name__}`." + ) from None + + if issubclass(input_type, PIL.Image.Image): + # PIL kernels often have more correct annotations, since they are not limited by JIT. Thus, we don't check + # them in the first place. + functional_param._annotation = kernel_param._annotation = inspect.Parameter.empty + + assert functional_param == kernel_param + + +def _check_transform_v1_compatibility(transform, input, *, rtol, atol): + """If the transform defines the ``_v1_transform_cls`` attribute, checks if the transform has a public, static + ``get_params`` method that is the v1 equivalent, the output is close to v1, is scriptable, and the scripted version + can be called without error.""" + if not (type(input) is torch.Tensor or isinstance(input, PIL.Image.Image)): + return + + v1_transform_cls = transform._v1_transform_cls + if v1_transform_cls is None: + return + + if hasattr(v1_transform_cls, "get_params"): + assert type(transform).get_params is v1_transform_cls.get_params + + v1_transform = v1_transform_cls(**transform._extract_params_for_v1_transform()) + + with freeze_rng_state(): + output_v2 = transform(input) + + with freeze_rng_state(): + output_v1 = v1_transform(input) + + assert_close(F.to_image(output_v2), F.to_image(output_v1), rtol=rtol, atol=atol) + + if isinstance(input, PIL.Image.Image): + return + + _script(v1_transform)(input) + + +def _make_transform_sample(transform, *, image_or_video, adapter): + device = image_or_video.device if isinstance(image_or_video, torch.Tensor) else "cpu" + size = F.get_size(image_or_video) + input = dict( + image_or_video=image_or_video, + image_tv_tensor=make_image(size, device=device), + video_tv_tensor=make_video(size, device=device), + image_pil=make_image_pil(size), + bounding_boxes_xyxy=make_bounding_boxes(size, format=tv_tensors.BoundingBoxFormat.XYXY, device=device), + bounding_boxes_xywh=make_bounding_boxes(size, format=tv_tensors.BoundingBoxFormat.XYWH, device=device), + bounding_boxes_cxcywh=make_bounding_boxes(size, format=tv_tensors.BoundingBoxFormat.CXCYWH, device=device), + bounding_boxes_degenerate_xyxy=tv_tensors.BoundingBoxes( + [ + [0, 0, 0, 0], # no height or width + [0, 0, 0, 1], # no height + [0, 0, 1, 0], # no width + [2, 0, 1, 1], # x1 > x2, y1 < y2 + [0, 2, 1, 1], # x1 < x2, y1 > y2 + [2, 2, 1, 1], # x1 > x2, y1 > y2 + ], + format=tv_tensors.BoundingBoxFormat.XYXY, + canvas_size=size, + device=device, + ), + bounding_boxes_degenerate_xywh=tv_tensors.BoundingBoxes( + [ + [0, 0, 0, 0], # no height or width + [0, 0, 0, 1], # no height + [0, 0, 1, 0], # no width + [0, 0, 1, -1], # negative height + [0, 0, -1, 1], # negative width + [0, 0, -1, -1], # negative height and width + ], + format=tv_tensors.BoundingBoxFormat.XYWH, + canvas_size=size, + device=device, + ), + bounding_boxes_degenerate_cxcywh=tv_tensors.BoundingBoxes( + [ + [0, 0, 0, 0], # no height or width + [0, 0, 0, 1], # no height + [0, 0, 1, 0], # no width + [0, 0, 1, -1], # negative height + [0, 0, -1, 1], # negative width + [0, 0, -1, -1], # negative height and width + ], + format=tv_tensors.BoundingBoxFormat.CXCYWH, + canvas_size=size, + device=device, + ), + detection_mask=make_detection_masks(size, device=device), + segmentation_mask=make_segmentation_mask(size, device=device), + int=0, + float=0.0, + bool=True, + none=None, + str="str", + path=Path.cwd(), + object=object(), + tensor=torch.empty(5), + array=np.empty(5), + ) + if adapter is not None: + input = adapter(transform, input, device) + return input + + +def _check_transform_sample_input_smoke(transform, input, *, adapter): + # This is a bunch of input / output convention checks, using a big sample with different parts as input. + + if not check_type(input, (is_pure_tensor, PIL.Image.Image, tv_tensors.Image, tv_tensors.Video)): + return + + sample = _make_transform_sample( + # adapter might change transform inplace + transform=transform if adapter is None else deepcopy(transform), + image_or_video=input, + adapter=adapter, + ) + for container_type in [dict, list, tuple]: + if container_type is dict: + input = sample + else: + input = container_type(sample.values()) + + input_flat, input_spec = tree_flatten(input) + + with freeze_rng_state(): + torch.manual_seed(0) + output = transform(input) + output_flat, output_spec = tree_flatten(output) + + assert output_spec == input_spec + + for output_item, input_item, should_be_transformed in zip( + output_flat, input_flat, transforms.Transform()._needs_transform_list(input_flat) + ): + if should_be_transformed: + assert type(output_item) is type(input_item) + else: + assert output_item is input_item + + # Enforce that the transform does not turn a degenerate bounding box, e.g. marked by RandomIoUCrop (or any other + # future transform that does this), back into a valid one. + for degenerate_bounding_boxes in ( + bounding_box + for name, bounding_box in sample.items() + if "degenerate" in name and isinstance(bounding_box, tv_tensors.BoundingBoxes) + ): + sample = dict( + boxes=degenerate_bounding_boxes, + labels=torch.randint(10, (degenerate_bounding_boxes.shape[0],), device=degenerate_bounding_boxes.device), + ) + assert transforms.SanitizeBoundingBoxes()(sample)["boxes"].shape == (0, 4) + + +def check_transform(transform, input, check_v1_compatibility=True, check_sample_input=True): + pickle.loads(pickle.dumps(transform)) + + output = transform(input) + assert isinstance(output, type(input)) + + if isinstance(input, tv_tensors.BoundingBoxes) and not isinstance(transform, transforms.ConvertBoundingBoxFormat): + assert output.format == input.format + + if check_sample_input: + _check_transform_sample_input_smoke( + transform, input, adapter=check_sample_input if callable(check_sample_input) else None + ) + + if check_v1_compatibility: + _check_transform_v1_compatibility(transform, input, **_to_tolerances(check_v1_compatibility)) + + return output + + +def transform_cls_to_functional(transform_cls, **transform_specific_kwargs): + def wrapper(input, *args, **kwargs): + transform = transform_cls(*args, **transform_specific_kwargs, **kwargs) + return transform(input) + + wrapper.__name__ = transform_cls.__name__ + + return wrapper + + +def param_value_parametrization(**kwargs): + """Helper function to turn + + @pytest.mark.parametrize( + ("param", "value"), + ("a", 1), + ("a", 2), + ("a", 3), + ("b", -1.0) + ("b", 1.0) + ) + + into + + @param_value_parametrization(a=[1, 2, 3], b=[-1.0, 1.0]) + """ + return pytest.mark.parametrize( + ("param", "value"), + [(param, value) for param, values in kwargs.items() for value in values], + ) + + +def adapt_fill(value, *, dtype): + """Adapt fill values in the range [0.0, 1.0] to the value range of the dtype""" + if value is None: + return value + + max_value = get_max_value(dtype) + value_type = float if dtype.is_floating_point else int + + if isinstance(value, (int, float)): + return value_type(value * max_value) + elif isinstance(value, (list, tuple)): + return type(value)(value_type(v * max_value) for v in value) + else: + raise ValueError(f"fill should be an int or float, or a list or tuple of the former, but got '{value}'.") + + +EXHAUSTIVE_TYPE_FILLS = [ + None, + 1, + 0.5, + [1], + [0.2], + (0,), + (0.7,), + [1, 0, 1], + [0.1, 0.2, 0.3], + (0, 1, 0), + (0.9, 0.234, 0.314), +] +CORRECTNESS_FILLS = [ + v for v in EXHAUSTIVE_TYPE_FILLS if v is None or isinstance(v, float) or (isinstance(v, list) and len(v) > 1) +] + + +# We cannot use `list(transforms.InterpolationMode)` here, since it includes some PIL-only ones as well +INTERPOLATION_MODES = [ + transforms.InterpolationMode.NEAREST, + transforms.InterpolationMode.NEAREST_EXACT, + transforms.InterpolationMode.BILINEAR, + transforms.InterpolationMode.BICUBIC, +] + + +def reference_affine_bounding_boxes_helper(bounding_boxes, *, affine_matrix, new_canvas_size=None, clamp=True): + format = bounding_boxes.format + canvas_size = new_canvas_size or bounding_boxes.canvas_size + + def affine_bounding_boxes(bounding_boxes): + dtype = bounding_boxes.dtype + device = bounding_boxes.device + + # Go to float before converting to prevent precision loss in case of CXCYWH -> XYXY and W or H is 1 + input_xyxy = F.convert_bounding_box_format( + bounding_boxes.to(dtype=torch.float64, device="cpu", copy=True), + old_format=format, + new_format=tv_tensors.BoundingBoxFormat.XYXY, + inplace=True, + ) + x1, y1, x2, y2 = input_xyxy.squeeze(0).tolist() + + points = np.array( + [ + [x1, y1, 1.0], + [x2, y1, 1.0], + [x1, y2, 1.0], + [x2, y2, 1.0], + ] + ) + transformed_points = np.matmul(points, affine_matrix.astype(points.dtype).T) + + output_xyxy = torch.Tensor( + [ + float(np.min(transformed_points[:, 0])), + float(np.min(transformed_points[:, 1])), + float(np.max(transformed_points[:, 0])), + float(np.max(transformed_points[:, 1])), + ] + ) + + output = F.convert_bounding_box_format( + output_xyxy, old_format=tv_tensors.BoundingBoxFormat.XYXY, new_format=format + ) + + if clamp: + # It is important to clamp before casting, especially for CXCYWH format, dtype=int64 + output = F.clamp_bounding_boxes( + output, + format=format, + canvas_size=canvas_size, + ) + else: + # We leave the bounding box as float64 so the caller gets the full precision to perform any additional + # operation + dtype = output.dtype + + return output.to(dtype=dtype, device=device) + + return tv_tensors.BoundingBoxes( + torch.cat([affine_bounding_boxes(b) for b in bounding_boxes.reshape(-1, 4).unbind()], dim=0).reshape( + bounding_boxes.shape + ), + format=format, + canvas_size=canvas_size, + ) + + +class TestResize: + INPUT_SIZE = (17, 11) + OUTPUT_SIZES = [17, [17], (17,), None, [12, 13], (12, 13)] + + def _make_max_size_kwarg(self, *, use_max_size, size): + if size is None: + max_size = min(list(self.INPUT_SIZE)) + elif use_max_size: + if not (isinstance(size, int) or len(size) == 1): + # This would result in an `ValueError` + return None + + max_size = (size if isinstance(size, int) else size[0]) + 1 + else: + max_size = None + + return dict(max_size=max_size) + + def _compute_output_size(self, *, input_size, size, max_size): + if size is None: + size = max_size + + elif not (isinstance(size, int) or len(size) == 1): + return tuple(size) + + elif not isinstance(size, int): + size = size[0] + + old_height, old_width = input_size + ratio = old_width / old_height + if ratio > 1: + new_height = size + new_width = int(ratio * new_height) + else: + new_width = size + new_height = int(new_width / ratio) + + if max_size is not None and max(new_height, new_width) > max_size: + # Need to recompute the aspect ratio, since it might have changed due to rounding + ratio = new_width / new_height + if ratio > 1: + new_width = max_size + new_height = int(new_width / ratio) + else: + new_height = max_size + new_width = int(new_height * ratio) + + return new_height, new_width + + @pytest.mark.parametrize("size", OUTPUT_SIZES) + @pytest.mark.parametrize("interpolation", INTERPOLATION_MODES) + @pytest.mark.parametrize("use_max_size", [True, False]) + @pytest.mark.parametrize("antialias", [True, False]) + @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, size, interpolation, use_max_size, antialias, dtype, device): + if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)): + return + + # In contrast to CPU, there is no native `InterpolationMode.BICUBIC` implementation for uint8 images on CUDA. + # Internally, it uses the float path. Thus, we need to test with an enormous tolerance here to account for that. + atol = 30 if (interpolation is transforms.InterpolationMode.BICUBIC and dtype is torch.uint8) else 1 + check_cuda_vs_cpu_tolerances = dict(rtol=0, atol=atol / 255 if dtype.is_floating_point else atol) + + check_kernel( + F.resize_image, + make_image(self.INPUT_SIZE, dtype=dtype, device=device), + size=size, + interpolation=interpolation, + **max_size_kwarg, + antialias=antialias, + check_cuda_vs_cpu=check_cuda_vs_cpu_tolerances, + check_scripted_vs_eager=not isinstance(size, int), + ) + + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + @pytest.mark.parametrize("size", OUTPUT_SIZES) + @pytest.mark.parametrize("use_max_size", [True, False]) + @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_bounding_boxes(self, format, size, use_max_size, dtype, device): + if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)): + return + + bounding_boxes = make_bounding_boxes( + format=format, + canvas_size=self.INPUT_SIZE, + dtype=dtype, + device=device, + ) + check_kernel( + F.resize_bounding_boxes, + bounding_boxes, + canvas_size=bounding_boxes.canvas_size, + size=size, + **max_size_kwarg, + check_scripted_vs_eager=not isinstance(size, int), + ) + + @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_masks]) + def test_kernel_mask(self, make_mask): + check_kernel(F.resize_mask, make_mask(self.INPUT_SIZE), size=self.OUTPUT_SIZES[-1]) + + def test_kernel_video(self): + check_kernel(F.resize_video, make_video(self.INPUT_SIZE), size=self.OUTPUT_SIZES[-1], antialias=True) + + @pytest.mark.parametrize("size", OUTPUT_SIZES) + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + def test_functional(self, size, make_input): + max_size_kwarg = self._make_max_size_kwarg(use_max_size=size is None, size=size) + + check_functional( + F.resize, + make_input(self.INPUT_SIZE), + size=size, + **max_size_kwarg, + antialias=True, + check_scripted_smoke=not isinstance(size, int), + ) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.resize_image, torch.Tensor), + (F._geometry._resize_image_pil, PIL.Image.Image), + (F.resize_image, tv_tensors.Image), + (F.resize_bounding_boxes, tv_tensors.BoundingBoxes), + (F.resize_mask, tv_tensors.Mask), + (F.resize_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.resize, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize("size", OUTPUT_SIZES) + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize( + "make_input", + [ + make_image_tensor, + make_image_pil, + make_image, + make_bounding_boxes, + make_segmentation_mask, + make_detection_masks, + make_video, + ], + ) + def test_transform(self, size, device, make_input): + max_size_kwarg = self._make_max_size_kwarg(use_max_size=size is None, size=size) + + check_transform( + transforms.Resize(size=size, **max_size_kwarg, antialias=True), + make_input(self.INPUT_SIZE, device=device), + # atol=1 due to Resize v2 is using native uint8 interpolate path for bilinear and nearest modes + check_v1_compatibility=dict(rtol=0, atol=1) if size is not None else False, + ) + + def _check_output_size(self, input, output, *, size, max_size): + assert tuple(F.get_size(output)) == self._compute_output_size( + input_size=F.get_size(input), size=size, max_size=max_size + ) + + @pytest.mark.parametrize("size", OUTPUT_SIZES) + # `InterpolationMode.NEAREST` is modeled after the buggy `INTER_NEAREST` interpolation of CV2. + # The PIL equivalent of `InterpolationMode.NEAREST` is `InterpolationMode.NEAREST_EXACT` + @pytest.mark.parametrize("interpolation", set(INTERPOLATION_MODES) - {transforms.InterpolationMode.NEAREST}) + @pytest.mark.parametrize("use_max_size", [True, False]) + @pytest.mark.parametrize("fn", [F.resize, transform_cls_to_functional(transforms.Resize)]) + def test_image_correctness(self, size, interpolation, use_max_size, fn): + if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)): + return + + image = make_image(self.INPUT_SIZE, dtype=torch.uint8) + + actual = fn(image, size=size, interpolation=interpolation, **max_size_kwarg, antialias=True) + expected = F.to_image(F.resize(F.to_pil_image(image), size=size, interpolation=interpolation, **max_size_kwarg)) + + self._check_output_size(image, actual, size=size, **max_size_kwarg) + torch.testing.assert_close(actual, expected, atol=1, rtol=0) + + def _reference_resize_bounding_boxes(self, bounding_boxes, *, size, max_size=None): + old_height, old_width = bounding_boxes.canvas_size + new_height, new_width = self._compute_output_size( + input_size=bounding_boxes.canvas_size, size=size, max_size=max_size + ) + + if (old_height, old_width) == (new_height, new_width): + return bounding_boxes + + affine_matrix = np.array( + [ + [new_width / old_width, 0, 0], + [0, new_height / old_height, 0], + ], + ) + + return reference_affine_bounding_boxes_helper( + bounding_boxes, + affine_matrix=affine_matrix, + new_canvas_size=(new_height, new_width), + ) + + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + @pytest.mark.parametrize("size", OUTPUT_SIZES) + @pytest.mark.parametrize("use_max_size", [True, False]) + @pytest.mark.parametrize("fn", [F.resize, transform_cls_to_functional(transforms.Resize)]) + def test_bounding_boxes_correctness(self, format, size, use_max_size, fn): + if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)): + return + + bounding_boxes = make_bounding_boxes(format=format, canvas_size=self.INPUT_SIZE) + + actual = fn(bounding_boxes, size=size, **max_size_kwarg) + expected = self._reference_resize_bounding_boxes(bounding_boxes, size=size, **max_size_kwarg) + + self._check_output_size(bounding_boxes, actual, size=size, **max_size_kwarg) + torch.testing.assert_close(actual, expected) + + @pytest.mark.parametrize("interpolation", set(transforms.InterpolationMode) - set(INTERPOLATION_MODES)) + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_video], + ) + def test_pil_interpolation_compat_smoke(self, interpolation, make_input): + input = make_input(self.INPUT_SIZE) + + with ( + contextlib.nullcontext() + if isinstance(input, PIL.Image.Image) + # This error is triggered in PyTorch core + else pytest.raises(NotImplementedError, match=f"got {interpolation.value.lower()}") + ): + F.resize( + input, + size=self.OUTPUT_SIZES[0], + interpolation=interpolation, + ) + + def test_functional_pil_antialias_warning(self): + with pytest.warns(UserWarning, match="Anti-alias option is always applied for PIL Image input"): + F.resize(make_image_pil(self.INPUT_SIZE), size=self.OUTPUT_SIZES[0], antialias=False) + + @pytest.mark.parametrize("size", OUTPUT_SIZES) + @pytest.mark.parametrize( + "make_input", + [ + make_image_tensor, + make_image_pil, + make_image, + make_bounding_boxes, + make_segmentation_mask, + make_detection_masks, + make_video, + ], + ) + def test_max_size_error(self, size, make_input): + if size is None: + # value can be anything other than an integer + max_size = None + match = "max_size must be an integer when size is None" + elif isinstance(size, int) or len(size) == 1: + max_size = (size if isinstance(size, int) else size[0]) - 1 + match = "must be strictly greater than the requested size" + else: + # value can be anything other than None + max_size = -1 + match = "size should be an int or a sequence of length 1" + + with pytest.raises(ValueError, match=match): + F.resize(make_input(self.INPUT_SIZE), size=size, max_size=max_size, antialias=True) + + if isinstance(size, list) and len(size) != 1: + with pytest.raises(ValueError, match="max_size should only be passed if size is None or specifies"): + F.resize(make_input(self.INPUT_SIZE), size=size, max_size=500) + + @pytest.mark.parametrize( + "input_size, max_size, expected_size", + [ + ((10, 10), 10, (10, 10)), + ((10, 20), 40, (20, 40)), + ((20, 10), 40, (40, 20)), + ((10, 20), 10, (5, 10)), + ((20, 10), 10, (10, 5)), + ], + ) + @pytest.mark.parametrize( + "make_input", + [ + make_image_tensor, + make_image_pil, + make_image, + make_bounding_boxes, + make_segmentation_mask, + make_detection_masks, + make_video, + ], + ) + def test_resize_size_none(self, input_size, max_size, expected_size, make_input): + img = make_input(input_size) + out = F.resize(img, size=None, max_size=max_size) + assert F.get_size(out)[-2:] == list(expected_size) + + @pytest.mark.parametrize("interpolation", INTERPOLATION_MODES) + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_video], + ) + def test_interpolation_int(self, interpolation, make_input): + input = make_input(self.INPUT_SIZE) + + # `InterpolationMode.NEAREST_EXACT` has no proper corresponding integer equivalent. Internally, we map it to + # `0` to be the same as `InterpolationMode.NEAREST` for PIL. However, for the tensor backend there is a + # difference and thus we don't test it here. + if isinstance(input, torch.Tensor) and interpolation is transforms.InterpolationMode.NEAREST_EXACT: + return + + expected = F.resize(input, size=self.OUTPUT_SIZES[0], interpolation=interpolation, antialias=True) + actual = F.resize( + input, size=self.OUTPUT_SIZES[0], interpolation=pil_modes_mapping[interpolation], antialias=True + ) + + assert_equal(actual, expected) + + def test_transform_unknown_size_error(self): + with pytest.raises(ValueError, match="size can be an integer, a sequence of one or two integers, or None"): + transforms.Resize(size=object()) + + @pytest.mark.parametrize( + "size", [min(INPUT_SIZE), [min(INPUT_SIZE)], (min(INPUT_SIZE),), list(INPUT_SIZE), tuple(INPUT_SIZE)] + ) + @pytest.mark.parametrize( + "make_input", + [ + make_image_tensor, + make_image_pil, + make_image, + make_bounding_boxes, + make_segmentation_mask, + make_detection_masks, + make_video, + ], + ) + def test_noop(self, size, make_input): + input = make_input(self.INPUT_SIZE) + + output = F.resize(input, size=F.get_size(input), antialias=True) + + # This identity check is not a requirement. It is here to avoid breaking the behavior by accident. If there + # is a good reason to break this, feel free to downgrade to an equality check. + if isinstance(input, tv_tensors.TVTensor): + # We can't test identity directly, since that checks for the identity of the Python object. Since all + # tv_tensors unwrap before a kernel and wrap again afterwards, the Python object changes. Thus, we check + # that the underlying storage is the same + assert output.data_ptr() == input.data_ptr() + else: + assert output is input + + @pytest.mark.parametrize( + "make_input", + [ + make_image_tensor, + make_image_pil, + make_image, + make_bounding_boxes, + make_segmentation_mask, + make_detection_masks, + make_video, + ], + ) + def test_no_regression_5405(self, make_input): + # Checks that `max_size` is not ignored if `size == small_edge_size` + # See https://github.com/pytorch/vision/issues/5405 + + input = make_input(self.INPUT_SIZE) + + size = min(F.get_size(input)) + max_size = size + 1 + output = F.resize(input, size=size, max_size=max_size, antialias=True) + + assert max(F.get_size(output)) == max_size + + def _make_image(self, *args, batch_dims=(), memory_format=torch.contiguous_format, **kwargs): + # torch.channels_last memory_format is only available for 4D tensors, i.e. (B, C, H, W). However, images coming + # from PIL or our own I/O functions do not have a batch dimensions and are thus 3D, i.e. (C, H, W). Still, the + # layout of the data in memory is channels last. To emulate this when a 3D input is requested here, we create + # the image as 4D and create a view with the right shape afterwards. With this the layout in memory is channels + # last although PyTorch doesn't recognizes it as such. + emulate_channels_last = memory_format is torch.channels_last and len(batch_dims) != 1 + + image = make_image( + *args, + batch_dims=(math.prod(batch_dims),) if emulate_channels_last else batch_dims, + memory_format=memory_format, + **kwargs, + ) + + if emulate_channels_last: + image = tv_tensors.wrap(image.view(*batch_dims, *image.shape[-3:]), like=image) + + return image + + def _check_stride(self, image, *, memory_format): + C, H, W = F.get_dimensions(image) + if memory_format is torch.contiguous_format: + expected_stride = (H * W, W, 1) + elif memory_format is torch.channels_last: + expected_stride = (1, W * C, C) + else: + raise ValueError(f"Unknown memory_format: {memory_format}") + + assert image.stride() == expected_stride + + # TODO: We can remove this test and related torchvision workaround + # once we fixed related pytorch issue: https://github.com/pytorch/pytorch/issues/68430 + @pytest.mark.parametrize("interpolation", INTERPOLATION_MODES) + @pytest.mark.parametrize("antialias", [True, False]) + @pytest.mark.parametrize("memory_format", [torch.contiguous_format, torch.channels_last]) + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image_memory_format_consistency(self, interpolation, antialias, memory_format, dtype, device): + size = self.OUTPUT_SIZES[0] + + input = self._make_image(self.INPUT_SIZE, dtype=dtype, device=device, memory_format=memory_format) + + # Smoke test to make sure we aren't starting with wrong assumptions + self._check_stride(input, memory_format=memory_format) + + output = F.resize_image(input, size=size, interpolation=interpolation, antialias=antialias) + + self._check_stride(output, memory_format=memory_format) + + def test_float16_no_rounding(self): + # Make sure Resize() doesn't round float16 images + # Non-regression test for https://github.com/pytorch/vision/issues/7667 + + input = make_image_tensor(self.INPUT_SIZE, dtype=torch.float16) + output = F.resize_image(input, size=self.OUTPUT_SIZES[0], antialias=True) + + assert output.dtype is torch.float16 + assert (output.round() - output).abs().sum() > 0 + + +class TestHorizontalFlip: + @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, dtype, device): + check_kernel(F.horizontal_flip_image, make_image(dtype=dtype, device=device)) + + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_bounding_boxes(self, format, dtype, device): + bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device) + check_kernel( + F.horizontal_flip_bounding_boxes, + bounding_boxes, + format=format, + canvas_size=bounding_boxes.canvas_size, + ) + + @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_masks]) + def test_kernel_mask(self, make_mask): + check_kernel(F.horizontal_flip_mask, make_mask()) + + def test_kernel_video(self): + check_kernel(F.horizontal_flip_video, make_video()) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + def test_functional(self, make_input): + check_functional(F.horizontal_flip, make_input()) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.horizontal_flip_image, torch.Tensor), + (F._geometry._horizontal_flip_image_pil, PIL.Image.Image), + (F.horizontal_flip_image, tv_tensors.Image), + (F.horizontal_flip_bounding_boxes, tv_tensors.BoundingBoxes), + (F.horizontal_flip_mask, tv_tensors.Mask), + (F.horizontal_flip_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.horizontal_flip, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_transform(self, make_input, device): + check_transform(transforms.RandomHorizontalFlip(p=1), make_input(device=device)) + + @pytest.mark.parametrize( + "fn", [F.horizontal_flip, transform_cls_to_functional(transforms.RandomHorizontalFlip, p=1)] + ) + def test_image_correctness(self, fn): + image = make_image(dtype=torch.uint8, device="cpu") + + actual = fn(image) + expected = F.to_image(F.horizontal_flip(F.to_pil_image(image))) + + torch.testing.assert_close(actual, expected) + + def _reference_horizontal_flip_bounding_boxes(self, bounding_boxes): + affine_matrix = np.array( + [ + [-1, 0, bounding_boxes.canvas_size[1]], + [0, 1, 0], + ], + ) + + return reference_affine_bounding_boxes_helper(bounding_boxes, affine_matrix=affine_matrix) + + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + @pytest.mark.parametrize( + "fn", [F.horizontal_flip, transform_cls_to_functional(transforms.RandomHorizontalFlip, p=1)] + ) + def test_bounding_boxes_correctness(self, format, fn): + bounding_boxes = make_bounding_boxes(format=format) + + actual = fn(bounding_boxes) + expected = self._reference_horizontal_flip_bounding_boxes(bounding_boxes) + + torch.testing.assert_close(actual, expected) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_transform_noop(self, make_input, device): + input = make_input(device=device) + + transform = transforms.RandomHorizontalFlip(p=0) + + output = transform(input) + + assert_equal(output, input) + + +class TestAffine: + _EXHAUSTIVE_TYPE_AFFINE_KWARGS = dict( + # float, int + angle=[-10.9, 18], + # two-list of float, two-list of int, two-tuple of float, two-tuple of int + translate=[[6.3, -0.6], [1, -3], (16.6, -6.6), (-2, 4)], + # float + scale=[0.5], + # float, int, + # one-list of float, one-list of int, one-tuple of float, one-tuple of int + # two-list of float, two-list of int, two-tuple of float, two-tuple of int + shear=[35.6, 38, [-37.7], [-23], (5.3,), (-52,), [5.4, 21.8], [-47, 51], (-11.2, 36.7), (8, -53)], + # None + # two-list of float, two-list of int, two-tuple of float, two-tuple of int + center=[None, [1.2, 4.9], [-3, 1], (2.5, -4.7), (3, 2)], + ) + # The special case for shear makes sure we pick a value that is supported while JIT scripting + _MINIMAL_AFFINE_KWARGS = { + k: vs[0] if k != "shear" else next(v for v in vs if isinstance(v, list)) + for k, vs in _EXHAUSTIVE_TYPE_AFFINE_KWARGS.items() + } + _CORRECTNESS_AFFINE_KWARGS = { + k: [v for v in vs if v is None or isinstance(v, float) or (isinstance(v, list) and len(v) > 1)] + for k, vs in _EXHAUSTIVE_TYPE_AFFINE_KWARGS.items() + } + + _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES = dict( + degrees=[30, (-15, 20)], + translate=[None, (0.5, 0.5)], + scale=[None, (0.75, 1.25)], + shear=[None, (12, 30, -17, 5), 10, (-5, 12)], + ) + _CORRECTNESS_TRANSFORM_AFFINE_RANGES = { + k: next(v for v in vs if v is not None) for k, vs in _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES.items() + } + + def _check_kernel(self, kernel, input, *args, **kwargs): + kwargs_ = self._MINIMAL_AFFINE_KWARGS.copy() + kwargs_.update(kwargs) + check_kernel(kernel, input, *args, **kwargs_) + + @param_value_parametrization( + angle=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["angle"], + translate=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["translate"], + shear=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["shear"], + center=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["center"], + interpolation=[transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR], + fill=EXHAUSTIVE_TYPE_FILLS, + ) + @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, param, value, dtype, device): + if param == "fill": + value = adapt_fill(value, dtype=dtype) + self._check_kernel( + F.affine_image, + make_image(dtype=dtype, device=device), + **{param: value}, + check_scripted_vs_eager=not (param in {"shear", "fill"} and isinstance(value, (int, float))), + check_cuda_vs_cpu=dict(atol=1, rtol=0) + if dtype is torch.uint8 and param == "interpolation" and value is transforms.InterpolationMode.BILINEAR + else True, + ) + + @param_value_parametrization( + angle=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["angle"], + translate=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["translate"], + shear=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["shear"], + center=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["center"], + ) + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_bounding_boxes(self, param, value, format, dtype, device): + bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device) + self._check_kernel( + F.affine_bounding_boxes, + bounding_boxes, + format=format, + canvas_size=bounding_boxes.canvas_size, + **{param: value}, + check_scripted_vs_eager=not (param == "shear" and isinstance(value, (int, float))), + ) + + @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_masks]) + def test_kernel_mask(self, make_mask): + self._check_kernel(F.affine_mask, make_mask()) + + def test_kernel_video(self): + self._check_kernel(F.affine_video, make_video()) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + def test_functional(self, make_input): + check_functional(F.affine, make_input(), **self._MINIMAL_AFFINE_KWARGS) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.affine_image, torch.Tensor), + (F._geometry._affine_image_pil, PIL.Image.Image), + (F.affine_image, tv_tensors.Image), + (F.affine_bounding_boxes, tv_tensors.BoundingBoxes), + (F.affine_mask, tv_tensors.Mask), + (F.affine_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.affine, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_transform(self, make_input, device): + input = make_input(device=device) + + check_transform(transforms.RandomAffine(**self._CORRECTNESS_TRANSFORM_AFFINE_RANGES), input) + + @pytest.mark.parametrize("angle", _CORRECTNESS_AFFINE_KWARGS["angle"]) + @pytest.mark.parametrize("translate", _CORRECTNESS_AFFINE_KWARGS["translate"]) + @pytest.mark.parametrize("scale", _CORRECTNESS_AFFINE_KWARGS["scale"]) + @pytest.mark.parametrize("shear", _CORRECTNESS_AFFINE_KWARGS["shear"]) + @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) + @pytest.mark.parametrize( + "interpolation", [transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR] + ) + @pytest.mark.parametrize("fill", CORRECTNESS_FILLS) + def test_functional_image_correctness(self, angle, translate, scale, shear, center, interpolation, fill): + image = make_image(dtype=torch.uint8, device="cpu") + + fill = adapt_fill(fill, dtype=torch.uint8) + + actual = F.affine( + image, + angle=angle, + translate=translate, + scale=scale, + shear=shear, + center=center, + interpolation=interpolation, + fill=fill, + ) + expected = F.to_image( + F.affine( + F.to_pil_image(image), + angle=angle, + translate=translate, + scale=scale, + shear=shear, + center=center, + interpolation=interpolation, + fill=fill, + ) + ) + + mae = (actual.float() - expected.float()).abs().mean() + assert mae < 2 if interpolation is transforms.InterpolationMode.NEAREST else 8 + + @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) + @pytest.mark.parametrize( + "interpolation", [transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR] + ) + @pytest.mark.parametrize("fill", CORRECTNESS_FILLS) + @pytest.mark.parametrize("seed", list(range(5))) + def test_transform_image_correctness(self, center, interpolation, fill, seed): + image = make_image(dtype=torch.uint8, device="cpu") + + fill = adapt_fill(fill, dtype=torch.uint8) + + transform = transforms.RandomAffine( + **self._CORRECTNESS_TRANSFORM_AFFINE_RANGES, center=center, interpolation=interpolation, fill=fill + ) + + torch.manual_seed(seed) + actual = transform(image) + + torch.manual_seed(seed) + expected = F.to_image(transform(F.to_pil_image(image))) + + mae = (actual.float() - expected.float()).abs().mean() + assert mae < 2 if interpolation is transforms.InterpolationMode.NEAREST else 8 + + def _compute_affine_matrix(self, *, angle, translate, scale, shear, center): + rot = math.radians(angle) + cx, cy = center + tx, ty = translate + sx, sy = [math.radians(s) for s in ([shear, 0.0] if isinstance(shear, (int, float)) else shear)] + + c_matrix = np.array([[1, 0, cx], [0, 1, cy], [0, 0, 1]]) + t_matrix = np.array([[1, 0, tx], [0, 1, ty], [0, 0, 1]]) + c_matrix_inv = np.linalg.inv(c_matrix) + rs_matrix = np.array( + [ + [scale * math.cos(rot), -scale * math.sin(rot), 0], + [scale * math.sin(rot), scale * math.cos(rot), 0], + [0, 0, 1], + ] + ) + shear_x_matrix = np.array([[1, -math.tan(sx), 0], [0, 1, 0], [0, 0, 1]]) + shear_y_matrix = np.array([[1, 0, 0], [-math.tan(sy), 1, 0], [0, 0, 1]]) + rss_matrix = np.matmul(rs_matrix, np.matmul(shear_y_matrix, shear_x_matrix)) + true_matrix = np.matmul(t_matrix, np.matmul(c_matrix, np.matmul(rss_matrix, c_matrix_inv))) + return true_matrix[:2, :] + + def _reference_affine_bounding_boxes(self, bounding_boxes, *, angle, translate, scale, shear, center): + if center is None: + center = [s * 0.5 for s in bounding_boxes.canvas_size[::-1]] + + return reference_affine_bounding_boxes_helper( + bounding_boxes, + affine_matrix=self._compute_affine_matrix( + angle=angle, translate=translate, scale=scale, shear=shear, center=center + ), + ) + + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + @pytest.mark.parametrize("angle", _CORRECTNESS_AFFINE_KWARGS["angle"]) + @pytest.mark.parametrize("translate", _CORRECTNESS_AFFINE_KWARGS["translate"]) + @pytest.mark.parametrize("scale", _CORRECTNESS_AFFINE_KWARGS["scale"]) + @pytest.mark.parametrize("shear", _CORRECTNESS_AFFINE_KWARGS["shear"]) + @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) + def test_functional_bounding_boxes_correctness(self, format, angle, translate, scale, shear, center): + bounding_boxes = make_bounding_boxes(format=format) + + actual = F.affine( + bounding_boxes, + angle=angle, + translate=translate, + scale=scale, + shear=shear, + center=center, + ) + expected = self._reference_affine_bounding_boxes( + bounding_boxes, + angle=angle, + translate=translate, + scale=scale, + shear=shear, + center=center, + ) + + torch.testing.assert_close(actual, expected) + + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) + @pytest.mark.parametrize("seed", list(range(5))) + def test_transform_bounding_boxes_correctness(self, format, center, seed): + bounding_boxes = make_bounding_boxes(format=format) + + transform = transforms.RandomAffine(**self._CORRECTNESS_TRANSFORM_AFFINE_RANGES, center=center) + + torch.manual_seed(seed) + params = transform.make_params([bounding_boxes]) + + torch.manual_seed(seed) + actual = transform(bounding_boxes) + + expected = self._reference_affine_bounding_boxes(bounding_boxes, **params, center=center) + + torch.testing.assert_close(actual, expected) + + @pytest.mark.parametrize("degrees", _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES["degrees"]) + @pytest.mark.parametrize("translate", _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES["translate"]) + @pytest.mark.parametrize("scale", _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES["scale"]) + @pytest.mark.parametrize("shear", _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES["shear"]) + @pytest.mark.parametrize("seed", list(range(10))) + def test_transformmake_params_bounds(self, degrees, translate, scale, shear, seed): + image = make_image() + height, width = F.get_size(image) + + transform = transforms.RandomAffine(degrees=degrees, translate=translate, scale=scale, shear=shear) + + torch.manual_seed(seed) + params = transform.make_params([image]) + + if isinstance(degrees, (int, float)): + assert -degrees <= params["angle"] <= degrees + else: + assert degrees[0] <= params["angle"] <= degrees[1] + + if translate is not None: + width_max = int(round(translate[0] * width)) + height_max = int(round(translate[1] * height)) + assert -width_max <= params["translate"][0] <= width_max + assert -height_max <= params["translate"][1] <= height_max + else: + assert params["translate"] == (0, 0) + + if scale is not None: + assert scale[0] <= params["scale"] <= scale[1] + else: + assert params["scale"] == 1.0 + + if shear is not None: + if isinstance(shear, (int, float)): + assert -shear <= params["shear"][0] <= shear + assert params["shear"][1] == 0.0 + elif len(shear) == 2: + assert shear[0] <= params["shear"][0] <= shear[1] + assert params["shear"][1] == 0.0 + elif len(shear) == 4: + assert shear[0] <= params["shear"][0] <= shear[1] + assert shear[2] <= params["shear"][1] <= shear[3] + else: + assert params["shear"] == (0, 0) + + @pytest.mark.parametrize("param", ["degrees", "translate", "scale", "shear", "center"]) + @pytest.mark.parametrize("value", [0, [0], [0, 0, 0]]) + def test_transform_sequence_len_errors(self, param, value): + if param in {"degrees", "shear"} and not isinstance(value, list): + return + + kwargs = {param: value} + if param != "degrees": + kwargs["degrees"] = 0 + + with pytest.raises( + ValueError if isinstance(value, list) else TypeError, match=f"{param} should be a sequence of length 2" + ): + transforms.RandomAffine(**kwargs) + + def test_transform_negative_degrees_error(self): + with pytest.raises(ValueError, match="If degrees is a single number, it must be positive"): + transforms.RandomAffine(degrees=-1) + + @pytest.mark.parametrize("translate", [[-1, 0], [2, 0], [-1, 2]]) + def test_transform_translate_range_error(self, translate): + with pytest.raises(ValueError, match="translation values should be between 0 and 1"): + transforms.RandomAffine(degrees=0, translate=translate) + + @pytest.mark.parametrize("scale", [[-1, 0], [0, -1], [-1, -1]]) + def test_transform_scale_range_error(self, scale): + with pytest.raises(ValueError, match="scale values should be positive"): + transforms.RandomAffine(degrees=0, scale=scale) + + def test_transform_negative_shear_error(self): + with pytest.raises(ValueError, match="If shear is a single number, it must be positive"): + transforms.RandomAffine(degrees=0, shear=-1) + + def test_transform_unknown_fill_error(self): + with pytest.raises(TypeError, match="Got inappropriate fill arg"): + transforms.RandomAffine(degrees=0, fill="fill") + + +class TestVerticalFlip: + @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, dtype, device): + check_kernel(F.vertical_flip_image, make_image(dtype=dtype, device=device)) + + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_bounding_boxes(self, format, dtype, device): + bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device) + check_kernel( + F.vertical_flip_bounding_boxes, + bounding_boxes, + format=format, + canvas_size=bounding_boxes.canvas_size, + ) + + @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_masks]) + def test_kernel_mask(self, make_mask): + check_kernel(F.vertical_flip_mask, make_mask()) + + def test_kernel_video(self): + check_kernel(F.vertical_flip_video, make_video()) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + def test_functional(self, make_input): + check_functional(F.vertical_flip, make_input()) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.vertical_flip_image, torch.Tensor), + (F._geometry._vertical_flip_image_pil, PIL.Image.Image), + (F.vertical_flip_image, tv_tensors.Image), + (F.vertical_flip_bounding_boxes, tv_tensors.BoundingBoxes), + (F.vertical_flip_mask, tv_tensors.Mask), + (F.vertical_flip_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.vertical_flip, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_transform(self, make_input, device): + check_transform(transforms.RandomVerticalFlip(p=1), make_input(device=device)) + + @pytest.mark.parametrize("fn", [F.vertical_flip, transform_cls_to_functional(transforms.RandomVerticalFlip, p=1)]) + def test_image_correctness(self, fn): + image = make_image(dtype=torch.uint8, device="cpu") + + actual = fn(image) + expected = F.to_image(F.vertical_flip(F.to_pil_image(image))) + + torch.testing.assert_close(actual, expected) + + def _reference_vertical_flip_bounding_boxes(self, bounding_boxes): + affine_matrix = np.array( + [ + [1, 0, 0], + [0, -1, bounding_boxes.canvas_size[0]], + ], + ) + + return reference_affine_bounding_boxes_helper(bounding_boxes, affine_matrix=affine_matrix) + + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + @pytest.mark.parametrize("fn", [F.vertical_flip, transform_cls_to_functional(transforms.RandomVerticalFlip, p=1)]) + def test_bounding_boxes_correctness(self, format, fn): + bounding_boxes = make_bounding_boxes(format=format) + + actual = fn(bounding_boxes) + expected = self._reference_vertical_flip_bounding_boxes(bounding_boxes) + + torch.testing.assert_close(actual, expected) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_transform_noop(self, make_input, device): + input = make_input(device=device) + + transform = transforms.RandomVerticalFlip(p=0) + + output = transform(input) + + assert_equal(output, input) + + +class TestRotate: + _EXHAUSTIVE_TYPE_AFFINE_KWARGS = dict( + # float, int + angle=[-10.9, 18], + # None + # two-list of float, two-list of int, two-tuple of float, two-tuple of int + center=[None, [1.2, 4.9], [-3, 1], (2.5, -4.7), (3, 2)], + ) + _MINIMAL_AFFINE_KWARGS = {k: vs[0] for k, vs in _EXHAUSTIVE_TYPE_AFFINE_KWARGS.items()} + _CORRECTNESS_AFFINE_KWARGS = { + k: [v for v in vs if v is None or isinstance(v, float) or isinstance(v, list)] + for k, vs in _EXHAUSTIVE_TYPE_AFFINE_KWARGS.items() + } + + _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES = dict( + degrees=[30, (-15, 20)], + ) + _CORRECTNESS_TRANSFORM_AFFINE_RANGES = {k: vs[0] for k, vs in _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES.items()} + + @param_value_parametrization( + angle=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["angle"], + interpolation=[transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR], + expand=[False, True], + center=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["center"], + fill=EXHAUSTIVE_TYPE_FILLS, + ) + @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, param, value, dtype, device): + kwargs = {param: value} + if param != "angle": + kwargs["angle"] = self._MINIMAL_AFFINE_KWARGS["angle"] + check_kernel( + F.rotate_image, + make_image(dtype=dtype, device=device), + **kwargs, + check_scripted_vs_eager=not (param == "fill" and isinstance(value, (int, float))), + ) + + @param_value_parametrization( + angle=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["angle"], + expand=[False, True], + center=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["center"], + ) + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_bounding_boxes(self, param, value, format, dtype, device): + kwargs = {param: value} + if param != "angle": + kwargs["angle"] = self._MINIMAL_AFFINE_KWARGS["angle"] + + bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device) + + check_kernel( + F.rotate_bounding_boxes, + bounding_boxes, + format=format, + canvas_size=bounding_boxes.canvas_size, + **kwargs, + ) + + @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_masks]) + def test_kernel_mask(self, make_mask): + check_kernel(F.rotate_mask, make_mask(), **self._MINIMAL_AFFINE_KWARGS) + + def test_kernel_video(self): + check_kernel(F.rotate_video, make_video(), **self._MINIMAL_AFFINE_KWARGS) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + def test_functional(self, make_input): + check_functional(F.rotate, make_input(), **self._MINIMAL_AFFINE_KWARGS) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.rotate_image, torch.Tensor), + (F._geometry._rotate_image_pil, PIL.Image.Image), + (F.rotate_image, tv_tensors.Image), + (F.rotate_bounding_boxes, tv_tensors.BoundingBoxes), + (F.rotate_mask, tv_tensors.Mask), + (F.rotate_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.rotate, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_transform(self, make_input, device): + check_transform( + transforms.RandomRotation(**self._CORRECTNESS_TRANSFORM_AFFINE_RANGES), make_input(device=device) + ) + + @pytest.mark.parametrize("angle", _CORRECTNESS_AFFINE_KWARGS["angle"]) + @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) + @pytest.mark.parametrize( + "interpolation", [transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR] + ) + @pytest.mark.parametrize("expand", [False, True]) + @pytest.mark.parametrize("fill", CORRECTNESS_FILLS) + def test_functional_image_correctness(self, angle, center, interpolation, expand, fill): + image = make_image(dtype=torch.uint8, device="cpu") + + fill = adapt_fill(fill, dtype=torch.uint8) + + actual = F.rotate(image, angle=angle, center=center, interpolation=interpolation, expand=expand, fill=fill) + expected = F.to_image( + F.rotate( + F.to_pil_image(image), angle=angle, center=center, interpolation=interpolation, expand=expand, fill=fill + ) + ) + + mae = (actual.float() - expected.float()).abs().mean() + assert mae < 1 if interpolation is transforms.InterpolationMode.NEAREST else 6 + + @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) + @pytest.mark.parametrize( + "interpolation", [transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR] + ) + @pytest.mark.parametrize("expand", [False, True]) + @pytest.mark.parametrize("fill", CORRECTNESS_FILLS) + @pytest.mark.parametrize("seed", list(range(5))) + def test_transform_image_correctness(self, center, interpolation, expand, fill, seed): + image = make_image(dtype=torch.uint8, device="cpu") + + fill = adapt_fill(fill, dtype=torch.uint8) + + transform = transforms.RandomRotation( + **self._CORRECTNESS_TRANSFORM_AFFINE_RANGES, + center=center, + interpolation=interpolation, + expand=expand, + fill=fill, + ) + + torch.manual_seed(seed) + actual = transform(image) + + torch.manual_seed(seed) + expected = F.to_image(transform(F.to_pil_image(image))) + + mae = (actual.float() - expected.float()).abs().mean() + assert mae < 1 if interpolation is transforms.InterpolationMode.NEAREST else 6 + + def _compute_output_canvas_size(self, *, expand, canvas_size, affine_matrix): + if not expand: + return canvas_size, (0.0, 0.0) + + input_height, input_width = canvas_size + + input_image_frame = np.array( + [ + [0.0, 0.0, 1.0], + [0.0, input_height, 1.0], + [input_width, input_height, 1.0], + [input_width, 0.0, 1.0], + ], + dtype=np.float64, + ) + output_image_frame = np.matmul(input_image_frame, affine_matrix.astype(input_image_frame.dtype).T) + + recenter_x = float(np.min(output_image_frame[:, 0])) + recenter_y = float(np.min(output_image_frame[:, 1])) + + output_width = int(np.max(output_image_frame[:, 0]) - recenter_x) + output_height = int(np.max(output_image_frame[:, 1]) - recenter_y) + + return (output_height, output_width), (recenter_x, recenter_y) + + def _recenter_bounding_boxes_after_expand(self, bounding_boxes, *, recenter_xy): + x, y = recenter_xy + if bounding_boxes.format is tv_tensors.BoundingBoxFormat.XYXY: + translate = [x, y, x, y] + else: + translate = [x, y, 0.0, 0.0] + return tv_tensors.wrap( + (bounding_boxes.to(torch.float64) - torch.tensor(translate)).to(bounding_boxes.dtype), like=bounding_boxes + ) + + def _reference_rotate_bounding_boxes(self, bounding_boxes, *, angle, expand, center): + if center is None: + center = [s * 0.5 for s in bounding_boxes.canvas_size[::-1]] + cx, cy = center + + a = np.cos(angle * np.pi / 180.0) + b = np.sin(angle * np.pi / 180.0) + affine_matrix = np.array( + [ + [a, b, cx - cx * a - b * cy], + [-b, a, cy + cx * b - a * cy], + ], + ) + + new_canvas_size, recenter_xy = self._compute_output_canvas_size( + expand=expand, canvas_size=bounding_boxes.canvas_size, affine_matrix=affine_matrix + ) + + output = reference_affine_bounding_boxes_helper( + bounding_boxes, + affine_matrix=affine_matrix, + new_canvas_size=new_canvas_size, + clamp=False, + ) + + return F.clamp_bounding_boxes(self._recenter_bounding_boxes_after_expand(output, recenter_xy=recenter_xy)).to( + bounding_boxes + ) + + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + @pytest.mark.parametrize("angle", _CORRECTNESS_AFFINE_KWARGS["angle"]) + @pytest.mark.parametrize("expand", [False, True]) + @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) + def test_functional_bounding_boxes_correctness(self, format, angle, expand, center): + bounding_boxes = make_bounding_boxes(format=format) + + actual = F.rotate(bounding_boxes, angle=angle, expand=expand, center=center) + expected = self._reference_rotate_bounding_boxes(bounding_boxes, angle=angle, expand=expand, center=center) + + torch.testing.assert_close(actual, expected) + torch.testing.assert_close(F.get_size(actual), F.get_size(expected), atol=2 if expand else 0, rtol=0) + + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + @pytest.mark.parametrize("expand", [False, True]) + @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) + @pytest.mark.parametrize("seed", list(range(5))) + def test_transform_bounding_boxes_correctness(self, format, expand, center, seed): + bounding_boxes = make_bounding_boxes(format=format) + + transform = transforms.RandomRotation(**self._CORRECTNESS_TRANSFORM_AFFINE_RANGES, expand=expand, center=center) + + torch.manual_seed(seed) + params = transform.make_params([bounding_boxes]) + + torch.manual_seed(seed) + actual = transform(bounding_boxes) + + expected = self._reference_rotate_bounding_boxes(bounding_boxes, **params, expand=expand, center=center) + + torch.testing.assert_close(actual, expected) + torch.testing.assert_close(F.get_size(actual), F.get_size(expected), atol=2 if expand else 0, rtol=0) + + @pytest.mark.parametrize("degrees", _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES["degrees"]) + @pytest.mark.parametrize("seed", list(range(10))) + def test_transformmake_params_bounds(self, degrees, seed): + transform = transforms.RandomRotation(degrees=degrees) + + torch.manual_seed(seed) + params = transform.make_params([]) + + if isinstance(degrees, (int, float)): + assert -degrees <= params["angle"] <= degrees + else: + assert degrees[0] <= params["angle"] <= degrees[1] + + @pytest.mark.parametrize("param", ["degrees", "center"]) + @pytest.mark.parametrize("value", [0, [0], [0, 0, 0]]) + def test_transform_sequence_len_errors(self, param, value): + if param == "degrees" and not isinstance(value, list): + return + + kwargs = {param: value} + if param != "degrees": + kwargs["degrees"] = 0 + + with pytest.raises( + ValueError if isinstance(value, list) else TypeError, match=f"{param} should be a sequence of length 2" + ): + transforms.RandomRotation(**kwargs) + + def test_transform_negative_degrees_error(self): + with pytest.raises(ValueError, match="If degrees is a single number, it must be positive"): + transforms.RandomAffine(degrees=-1) + + def test_transform_unknown_fill_error(self): + with pytest.raises(TypeError, match="Got inappropriate fill arg"): + transforms.RandomAffine(degrees=0, fill="fill") + + @pytest.mark.parametrize("size", [(11, 17), (16, 16)]) + @pytest.mark.parametrize("angle", [0, 90, 180, 270]) + @pytest.mark.parametrize("expand", [False, True]) + def test_functional_image_fast_path_correctness(self, size, angle, expand): + image = make_image(size, dtype=torch.uint8, device="cpu") + + actual = F.rotate(image, angle=angle, expand=expand) + expected = F.to_image(F.rotate(F.to_pil_image(image), angle=angle, expand=expand)) + + torch.testing.assert_close(actual, expected) + + +class TestContainerTransforms: + class BuiltinTransform(transforms.Transform): + def transform(self, inpt, params): + return inpt + + class PackedInputTransform(nn.Module): + def forward(self, sample): + assert len(sample) == 2 + return sample + + class UnpackedInputTransform(nn.Module): + def forward(self, image, label): + return image, label + + @pytest.mark.parametrize( + "transform_cls", [transforms.Compose, functools.partial(transforms.RandomApply, p=1), transforms.RandomOrder] + ) + @pytest.mark.parametrize( + "wrapped_transform_clss", + [ + [BuiltinTransform], + [PackedInputTransform], + [UnpackedInputTransform], + [BuiltinTransform, BuiltinTransform], + [PackedInputTransform, PackedInputTransform], + [UnpackedInputTransform, UnpackedInputTransform], + [BuiltinTransform, PackedInputTransform, BuiltinTransform], + [BuiltinTransform, UnpackedInputTransform, BuiltinTransform], + [PackedInputTransform, BuiltinTransform, PackedInputTransform], + [UnpackedInputTransform, BuiltinTransform, UnpackedInputTransform], + ], + ) + @pytest.mark.parametrize("unpack", [True, False]) + def test_packed_unpacked(self, transform_cls, wrapped_transform_clss, unpack): + needs_packed_inputs = any(issubclass(cls, self.PackedInputTransform) for cls in wrapped_transform_clss) + needs_unpacked_inputs = any(issubclass(cls, self.UnpackedInputTransform) for cls in wrapped_transform_clss) + assert not (needs_packed_inputs and needs_unpacked_inputs) + + transform = transform_cls([cls() for cls in wrapped_transform_clss]) + + image = make_image() + label = 3 + packed_input = (image, label) + + def call_transform(): + if unpack: + return transform(*packed_input) + else: + return transform(packed_input) + + if needs_unpacked_inputs and not unpack: + with pytest.raises(TypeError, match="missing 1 required positional argument"): + call_transform() + elif needs_packed_inputs and unpack: + with pytest.raises(TypeError, match="takes 2 positional arguments but 3 were given"): + call_transform() + else: + output = call_transform() + + assert isinstance(output, tuple) and len(output) == 2 + assert output[0] is image + assert output[1] is label + + def test_compose(self): + transform = transforms.Compose( + [ + transforms.RandomHorizontalFlip(p=1), + transforms.RandomVerticalFlip(p=1), + ] + ) + + input = make_image() + + actual = check_transform(transform, input) + expected = F.vertical_flip(F.horizontal_flip(input)) + + assert_equal(actual, expected) + + @pytest.mark.parametrize("p", [0.0, 1.0]) + @pytest.mark.parametrize("sequence_type", [list, nn.ModuleList]) + def test_random_apply(self, p, sequence_type): + transform = transforms.RandomApply( + sequence_type( + [ + transforms.RandomHorizontalFlip(p=1), + transforms.RandomVerticalFlip(p=1), + ] + ), + p=p, + ) + + # This needs to be a pure tensor (or a PIL image), because otherwise check_transforms skips the v1 compatibility + # check + input = make_image_tensor() + output = check_transform(transform, input, check_v1_compatibility=issubclass(sequence_type, nn.ModuleList)) + + if p == 1: + assert_equal(output, F.vertical_flip(F.horizontal_flip(input))) + else: + assert output is input + + @pytest.mark.parametrize("p", [(0, 1), (1, 0)]) + def test_random_choice(self, p): + transform = transforms.RandomChoice( + [ + transforms.RandomHorizontalFlip(p=1), + transforms.RandomVerticalFlip(p=1), + ], + p=p, + ) + + input = make_image() + output = check_transform(transform, input) + + p_horz, p_vert = p + if p_horz: + assert_equal(output, F.horizontal_flip(input)) + else: + assert_equal(output, F.vertical_flip(input)) + + def test_random_order(self): + transform = transforms.Compose( + [ + transforms.RandomHorizontalFlip(p=1), + transforms.RandomVerticalFlip(p=1), + ] + ) + + input = make_image() + + actual = check_transform(transform, input) + # We can't really check whether the transforms are actually applied in random order. However, horizontal and + # vertical flip are commutative. Meaning, even under the assumption that the transform applies them in random + # order, we can use a fixed order to compute the expected value. + expected = F.vertical_flip(F.horizontal_flip(input)) + + assert_equal(actual, expected) + + def test_errors(self): + for cls in [transforms.Compose, transforms.RandomChoice, transforms.RandomOrder]: + with pytest.raises(TypeError, match="Argument transforms should be a sequence of callables"): + cls(lambda x: x) + + with pytest.raises(ValueError, match="at least one transform"): + transforms.Compose([]) + + for p in [-1, 2]: + with pytest.raises(ValueError, match=re.escape("value in the interval [0.0, 1.0]")): + transforms.RandomApply([lambda x: x], p=p) + + for transforms_, p in [([lambda x: x], []), ([], [1.0])]: + with pytest.raises(ValueError, match="Length of p doesn't match the number of transforms"): + transforms.RandomChoice(transforms_, p=p) + + +class TestToDtype: + @pytest.mark.parametrize( + ("kernel", "make_input"), + [ + (F.to_dtype_image, make_image_tensor), + (F.to_dtype_image, make_image), + (F.to_dtype_video, make_video), + ], + ) + @pytest.mark.parametrize("input_dtype", [torch.float32, torch.float64, torch.uint8]) + @pytest.mark.parametrize("output_dtype", [torch.float32, torch.float64, torch.uint8]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("scale", (True, False)) + def test_kernel(self, kernel, make_input, input_dtype, output_dtype, device, scale): + check_kernel( + kernel, + make_input(dtype=input_dtype, device=device), + dtype=output_dtype, + scale=scale, + ) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_video]) + @pytest.mark.parametrize("input_dtype", [torch.float32, torch.float64, torch.uint8]) + @pytest.mark.parametrize("output_dtype", [torch.float32, torch.float64, torch.uint8]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("scale", (True, False)) + def test_functional(self, make_input, input_dtype, output_dtype, device, scale): + check_functional( + F.to_dtype, + make_input(dtype=input_dtype, device=device), + dtype=output_dtype, + scale=scale, + ) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + @pytest.mark.parametrize("input_dtype", [torch.float32, torch.float64, torch.uint8]) + @pytest.mark.parametrize("output_dtype", [torch.float32, torch.float64, torch.uint8]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("scale", (True, False)) + @pytest.mark.parametrize("as_dict", (True, False)) + def test_transform(self, make_input, input_dtype, output_dtype, device, scale, as_dict): + input = make_input(dtype=input_dtype, device=device) + if as_dict: + output_dtype = {type(input): output_dtype} + check_transform(transforms.ToDtype(dtype=output_dtype, scale=scale), input, check_sample_input=not as_dict) + + def reference_convert_dtype_image_tensor(self, image, dtype=torch.float, scale=False): + input_dtype = image.dtype + output_dtype = dtype + + if not scale: + return image.to(dtype) + + if output_dtype == input_dtype: + return image + + def fn(value): + if input_dtype.is_floating_point: + if output_dtype.is_floating_point: + return value + else: + return round(decimal.Decimal(value) * torch.iinfo(output_dtype).max) + else: + input_max_value = torch.iinfo(input_dtype).max + + if output_dtype.is_floating_point: + return float(decimal.Decimal(value) / input_max_value) + else: + output_max_value = torch.iinfo(output_dtype).max + + if input_max_value > output_max_value: + factor = (input_max_value + 1) // (output_max_value + 1) + return value / factor + else: + factor = (output_max_value + 1) // (input_max_value + 1) + return value * factor + + return torch.tensor(tree_map(fn, image.tolist())).to(dtype=output_dtype, device=image.device) + + @pytest.mark.parametrize("input_dtype", [torch.float32, torch.float64, torch.uint8, torch.uint16]) + @pytest.mark.parametrize("output_dtype", [torch.float32, torch.float64, torch.uint8, torch.uint16]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("scale", (True, False)) + def test_image_correctness(self, input_dtype, output_dtype, device, scale): + if input_dtype.is_floating_point and output_dtype == torch.int64: + pytest.xfail("float to int64 conversion is not supported") + if input_dtype == torch.uint8 and output_dtype == torch.uint16 and device == "cuda": + pytest.xfail("uint8 to uint16 conversion is not supported on cuda") + + input = make_image(dtype=input_dtype, device=device) + + out = F.to_dtype(input, dtype=output_dtype, scale=scale) + expected = self.reference_convert_dtype_image_tensor(input, dtype=output_dtype, scale=scale) + + if input_dtype.is_floating_point and not output_dtype.is_floating_point and scale: + torch.testing.assert_close(out, expected, atol=1, rtol=0) + else: + torch.testing.assert_close(out, expected) + + def was_scaled(self, inpt): + # this assumes the target dtype is float + return inpt.max() <= 1 + + def make_inpt_with_bbox_and_mask(self, make_input): + H, W = 10, 10 + inpt_dtype = torch.uint8 + bbox_dtype = torch.float32 + mask_dtype = torch.bool + sample = { + "inpt": make_input(size=(H, W), dtype=inpt_dtype), + "bbox": make_bounding_boxes(canvas_size=(H, W), dtype=bbox_dtype), + "mask": make_detection_masks(size=(H, W), dtype=mask_dtype), + } + + return sample, inpt_dtype, bbox_dtype, mask_dtype + + @pytest.mark.parametrize("make_input", (make_image_tensor, make_image, make_video)) + @pytest.mark.parametrize("scale", (True, False)) + def test_dtype_not_a_dict(self, make_input, scale): + # assert only inpt gets transformed when dtype isn't a dict + + sample, inpt_dtype, bbox_dtype, mask_dtype = self.make_inpt_with_bbox_and_mask(make_input) + out = transforms.ToDtype(dtype=torch.float32, scale=scale)(sample) + + assert out["inpt"].dtype != inpt_dtype + assert out["inpt"].dtype == torch.float32 + if scale: + assert self.was_scaled(out["inpt"]) + else: + assert not self.was_scaled(out["inpt"]) + assert out["bbox"].dtype == bbox_dtype + assert out["mask"].dtype == mask_dtype + + @pytest.mark.parametrize("make_input", (make_image_tensor, make_image, make_video)) + def test_others_catch_all_and_none(self, make_input): + # make sure "others" works as a catch-all and that None means no conversion + + sample, inpt_dtype, bbox_dtype, mask_dtype = self.make_inpt_with_bbox_and_mask(make_input) + out = transforms.ToDtype(dtype={tv_tensors.Mask: torch.int64, "others": None})(sample) + assert out["inpt"].dtype == inpt_dtype + assert out["bbox"].dtype == bbox_dtype + assert out["mask"].dtype != mask_dtype + assert out["mask"].dtype == torch.int64 + + @pytest.mark.parametrize("make_input", (make_image_tensor, make_image, make_video)) + def test_typical_use_case(self, make_input): + # Typical use-case: want to convert dtype and scale for inpt and just dtype for masks. + # This just makes sure we now have a decent API for this + + sample, inpt_dtype, bbox_dtype, mask_dtype = self.make_inpt_with_bbox_and_mask(make_input) + out = transforms.ToDtype( + dtype={type(sample["inpt"]): torch.float32, tv_tensors.Mask: torch.int64, "others": None}, scale=True + )(sample) + assert out["inpt"].dtype != inpt_dtype + assert out["inpt"].dtype == torch.float32 + assert self.was_scaled(out["inpt"]) + assert out["bbox"].dtype == bbox_dtype + assert out["mask"].dtype != mask_dtype + assert out["mask"].dtype == torch.int64 + + @pytest.mark.parametrize("make_input", (make_image_tensor, make_image, make_video)) + def test_errors_warnings(self, make_input): + sample, inpt_dtype, bbox_dtype, mask_dtype = self.make_inpt_with_bbox_and_mask(make_input) + + with pytest.raises(ValueError, match="No dtype was specified for"): + out = transforms.ToDtype(dtype={tv_tensors.Mask: torch.float32})(sample) + with pytest.warns(UserWarning, match=re.escape("plain `torch.Tensor` will *not* be transformed")): + transforms.ToDtype(dtype={torch.Tensor: torch.float32, tv_tensors.Image: torch.float32}) + with pytest.warns(UserWarning, match="no scaling will be done"): + out = transforms.ToDtype(dtype={"others": None}, scale=True)(sample) + assert out["inpt"].dtype == inpt_dtype + assert out["bbox"].dtype == bbox_dtype + assert out["mask"].dtype == mask_dtype + + def test_uint16(self): + # These checks are probably already covered above but since uint16 is a + # newly supported dtype, we want to be extra careful, hence this + # explicit test + img_uint16 = torch.randint(0, 65535, (256, 512), dtype=torch.uint16) + + img_uint8 = F.to_dtype(img_uint16, torch.uint8, scale=True) + img_float32 = F.to_dtype(img_uint16, torch.float32, scale=True) + img_int32 = F.to_dtype(img_uint16, torch.int32, scale=True) + + assert_equal(img_uint8, (img_uint16 / 256).to(torch.uint8)) + assert_close(img_float32, (img_uint16 / 65535)) + + assert_close(F.to_dtype(img_float32, torch.uint16, scale=True), img_uint16, rtol=0, atol=1) + # Ideally we'd check against (img_uint16 & 0xFF00) but bitwise and isn't supported for it yet + # so we simulate it by scaling down and up again. + assert_equal(F.to_dtype(img_uint8, torch.uint16, scale=True), ((img_uint16 / 256).to(torch.uint16) * 256)) + assert_equal(F.to_dtype(img_int32, torch.uint16, scale=True), img_uint16) + + assert_equal(F.to_dtype(img_float32, torch.uint8, scale=True), img_uint8) + assert_close(F.to_dtype(img_uint8, torch.float32, scale=True), img_float32, rtol=0, atol=1e-2) + + +class TestAdjustBrightness: + _CORRECTNESS_BRIGHTNESS_FACTORS = [0.5, 0.0, 1.0, 5.0] + _DEFAULT_BRIGHTNESS_FACTOR = _CORRECTNESS_BRIGHTNESS_FACTORS[0] + + @pytest.mark.parametrize( + ("kernel", "make_input"), + [ + (F.adjust_brightness_image, make_image), + (F.adjust_brightness_video, make_video), + ], + ) + @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel(self, kernel, make_input, dtype, device): + check_kernel(kernel, make_input(dtype=dtype, device=device), brightness_factor=self._DEFAULT_BRIGHTNESS_FACTOR) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_video]) + def test_functional(self, make_input): + check_functional(F.adjust_brightness, make_input(), brightness_factor=self._DEFAULT_BRIGHTNESS_FACTOR) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.adjust_brightness_image, torch.Tensor), + (F._color._adjust_brightness_image_pil, PIL.Image.Image), + (F.adjust_brightness_image, tv_tensors.Image), + (F.adjust_brightness_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.adjust_brightness, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize("brightness_factor", _CORRECTNESS_BRIGHTNESS_FACTORS) + def test_image_correctness(self, brightness_factor): + image = make_image(dtype=torch.uint8, device="cpu") + + actual = F.adjust_brightness(image, brightness_factor=brightness_factor) + expected = F.to_image(F.adjust_brightness(F.to_pil_image(image), brightness_factor=brightness_factor)) + + torch.testing.assert_close(actual, expected) + + +class TestCutMixMixUp: + class DummyDataset: + def __init__(self, size, num_classes, one_hot_labels): + self.size = size + self.num_classes = num_classes + self.one_hot_labels = one_hot_labels + assert size < num_classes + + def __getitem__(self, idx): + img = torch.rand(3, 100, 100) + label = idx # This ensures all labels in a batch are unique and makes testing easier + if self.one_hot_labels: + label = torch.nn.functional.one_hot(torch.tensor(label), num_classes=self.num_classes) + return img, label + + def __len__(self): + return self.size + + @pytest.mark.parametrize("T", [transforms.CutMix, transforms.MixUp]) + @pytest.mark.parametrize("one_hot_labels", (True, False)) + def test_supported_input_structure(self, T, one_hot_labels): + + batch_size = 32 + num_classes = 100 + + dataset = self.DummyDataset(size=batch_size, num_classes=num_classes, one_hot_labels=one_hot_labels) + + cutmix_mixup = T(num_classes=num_classes) + + dl = DataLoader(dataset, batch_size=batch_size) + + # Input sanity checks + img, target = next(iter(dl)) + input_img_size = img.shape[-3:] + assert isinstance(img, torch.Tensor) and isinstance(target, torch.Tensor) + assert target.shape == (batch_size, num_classes) if one_hot_labels else (batch_size,) + + def check_output(img, target): + assert img.shape == (batch_size, *input_img_size) + assert target.shape == (batch_size, num_classes) + torch.testing.assert_close(target.sum(axis=-1), torch.ones(batch_size)) + num_non_zero_labels = (target != 0).sum(axis=-1) + assert (num_non_zero_labels == 2).all() + + # After Dataloader, as unpacked input + img, target = next(iter(dl)) + assert target.shape == (batch_size, num_classes) if one_hot_labels else (batch_size,) + img, target = cutmix_mixup(img, target) + check_output(img, target) + + # After Dataloader, as packed input + packed_from_dl = next(iter(dl)) + assert isinstance(packed_from_dl, list) + img, target = cutmix_mixup(packed_from_dl) + check_output(img, target) + + # As collation function. We expect default_collate to be used by users. + def collate_fn_1(batch): + return cutmix_mixup(default_collate(batch)) + + def collate_fn_2(batch): + return cutmix_mixup(*default_collate(batch)) + + for collate_fn in (collate_fn_1, collate_fn_2): + dl = DataLoader(dataset, batch_size=batch_size, collate_fn=collate_fn) + img, target = next(iter(dl)) + check_output(img, target) + + @needs_cuda + @pytest.mark.parametrize("T", [transforms.CutMix, transforms.MixUp]) + def test_cpu_vs_gpu(self, T): + num_classes = 10 + batch_size = 3 + H, W = 12, 12 + + imgs = torch.rand(batch_size, 3, H, W) + labels = torch.randint(0, num_classes, (batch_size,)) + cutmix_mixup = T(alpha=0.5, num_classes=num_classes) + + _check_kernel_cuda_vs_cpu(cutmix_mixup, imgs, labels, rtol=None, atol=None) + + @pytest.mark.parametrize("T", [transforms.CutMix, transforms.MixUp]) + def test_error(self, T): + + num_classes = 10 + batch_size = 9 + + imgs = torch.rand(batch_size, 3, 12, 12) + cutmix_mixup = T(alpha=0.5, num_classes=num_classes) + + for input_with_bad_type in ( + F.to_pil_image(imgs[0]), + tv_tensors.Mask(torch.rand(12, 12)), + tv_tensors.BoundingBoxes(torch.rand(2, 4), format="XYXY", canvas_size=12), + ): + with pytest.raises(ValueError, match="does not support PIL images, "): + cutmix_mixup(input_with_bad_type) + + with pytest.raises(ValueError, match="Could not infer where the labels are"): + cutmix_mixup({"img": imgs, "Nothing_else": 3}) + + with pytest.raises(ValueError, match="labels should be index based"): + # Note: the error message isn't ideal, but that's because the label heuristic found the img as the label + # It's OK, it's an edge-case. The important thing is that this fails loudly instead of passing silently + cutmix_mixup(imgs) + + with pytest.raises(ValueError, match="When using the default labels_getter"): + cutmix_mixup(imgs, "not_a_tensor") + + with pytest.raises(ValueError, match="Expected a batched input with 4 dims"): + cutmix_mixup(imgs[None, None], torch.randint(0, num_classes, size=(batch_size,))) + + with pytest.raises(ValueError, match="does not match the batch size of the labels"): + cutmix_mixup(imgs, torch.randint(0, num_classes, size=(batch_size + 1,))) + + with pytest.raises(ValueError, match="When passing 2D labels"): + wrong_num_classes = num_classes + 1 + T(alpha=0.5, num_classes=num_classes)(imgs, torch.randint(0, 2, size=(batch_size, wrong_num_classes))) + + with pytest.raises(ValueError, match="but got a tensor of shape"): + cutmix_mixup(imgs, torch.randint(0, 2, size=(2, 3, 4))) + + with pytest.raises(ValueError, match="num_classes must be passed"): + T(alpha=0.5)(imgs, torch.randint(0, num_classes, size=(batch_size,))) + + +@pytest.mark.parametrize("key", ("labels", "LABELS", "LaBeL", "SOME_WEIRD_KEY_THAT_HAS_LABeL_IN_IT")) +@pytest.mark.parametrize("sample_type", (tuple, list, dict)) +def test_labels_getter_default_heuristic(key, sample_type): + labels = torch.arange(10) + sample = {key: labels, "another_key": "whatever"} + if sample_type is not dict: + sample = sample_type((None, sample, "whatever_again")) + assert transforms._utils._find_labels_default_heuristic(sample) is labels + + if key.lower() != "labels": + # If "labels" is in the dict (case-insensitive), + # it takes precedence over other keys which would otherwise be a match + d = {key: "something_else", "labels": labels} + assert transforms._utils._find_labels_default_heuristic(d) is labels + + +class TestShapeGetters: + @pytest.mark.parametrize( + ("kernel", "make_input"), + [ + (F.get_dimensions_image, make_image_tensor), + (F._meta._get_dimensions_image_pil, make_image_pil), + (F.get_dimensions_image, make_image), + (F.get_dimensions_video, make_video), + ], + ) + def test_get_dimensions(self, kernel, make_input): + size = (10, 10) + color_space, num_channels = "RGB", 3 + + input = make_input(size, color_space=color_space) + + assert kernel(input) == F.get_dimensions(input) == [num_channels, *size] + + @pytest.mark.parametrize( + ("kernel", "make_input"), + [ + (F.get_num_channels_image, make_image_tensor), + (F._meta._get_num_channels_image_pil, make_image_pil), + (F.get_num_channels_image, make_image), + (F.get_num_channels_video, make_video), + ], + ) + def test_get_num_channels(self, kernel, make_input): + color_space, num_channels = "RGB", 3 + + input = make_input(color_space=color_space) + + assert kernel(input) == F.get_num_channels(input) == num_channels + + @pytest.mark.parametrize( + ("kernel", "make_input"), + [ + (F.get_size_image, make_image_tensor), + (F._meta._get_size_image_pil, make_image_pil), + (F.get_size_image, make_image), + (F.get_size_bounding_boxes, make_bounding_boxes), + (F.get_size_mask, make_detection_masks), + (F.get_size_mask, make_segmentation_mask), + (F.get_size_video, make_video), + ], + ) + def test_get_size(self, kernel, make_input): + size = (10, 10) + + input = make_input(size) + + assert kernel(input) == F.get_size(input) == list(size) + + @pytest.mark.parametrize( + ("kernel", "make_input"), + [ + (F.get_num_frames_video, make_video_tensor), + (F.get_num_frames_video, make_video), + ], + ) + def test_get_num_frames(self, kernel, make_input): + num_frames = 4 + + input = make_input(num_frames=num_frames) + + assert kernel(input) == F.get_num_frames(input) == num_frames + + @pytest.mark.parametrize( + ("functional", "make_input"), + [ + (F.get_dimensions, make_bounding_boxes), + (F.get_dimensions, make_detection_masks), + (F.get_dimensions, make_segmentation_mask), + (F.get_num_channels, make_bounding_boxes), + (F.get_num_channels, make_detection_masks), + (F.get_num_channels, make_segmentation_mask), + (F.get_num_frames, make_image_pil), + (F.get_num_frames, make_image), + (F.get_num_frames, make_bounding_boxes), + (F.get_num_frames, make_detection_masks), + (F.get_num_frames, make_segmentation_mask), + ], + ) + def test_unsupported_types(self, functional, make_input): + input = make_input() + + with pytest.raises(TypeError, match=re.escape(str(type(input)))): + functional(input) + + +class TestRegisterKernel: + @pytest.mark.parametrize("functional", (F.resize, "resize")) + def test_register_kernel(self, functional): + class CustomTVTensor(tv_tensors.TVTensor): + pass + + kernel_was_called = False + + @F.register_kernel(functional, CustomTVTensor) + def new_resize(dp, *args, **kwargs): + nonlocal kernel_was_called + kernel_was_called = True + return dp + + t = transforms.Resize(size=(224, 224), antialias=True) + + my_dp = CustomTVTensor(torch.rand(3, 10, 10)) + out = t(my_dp) + assert out is my_dp + assert kernel_was_called + + # Sanity check to make sure we didn't override the kernel of other types + t(torch.rand(3, 10, 10)).shape == (3, 224, 224) + t(tv_tensors.Image(torch.rand(3, 10, 10))).shape == (3, 224, 224) + + def test_errors(self): + with pytest.raises(ValueError, match="Could not find functional with name"): + F.register_kernel("bad_name", tv_tensors.Image) + + with pytest.raises(ValueError, match="Kernels can only be registered on functionals"): + F.register_kernel(tv_tensors.Image, F.resize) + + with pytest.raises(ValueError, match="Kernels can only be registered for subclasses"): + F.register_kernel(F.resize, object) + + with pytest.raises(ValueError, match="cannot be registered for the builtin tv_tensor classes"): + F.register_kernel(F.resize, tv_tensors.Image)(F.resize_image) + + class CustomTVTensor(tv_tensors.TVTensor): + pass + + def resize_custom_tv_tensor(): + pass + + F.register_kernel(F.resize, CustomTVTensor)(resize_custom_tv_tensor) + + with pytest.raises(ValueError, match="already has a kernel registered for type"): + F.register_kernel(F.resize, CustomTVTensor)(resize_custom_tv_tensor) + + +class TestGetKernel: + # We are using F.resize as functional and the kernels below as proxy. Any other functional / kernels combination + # would also be fine + KERNELS = { + torch.Tensor: F.resize_image, + PIL.Image.Image: F._geometry._resize_image_pil, + tv_tensors.Image: F.resize_image, + tv_tensors.BoundingBoxes: F.resize_bounding_boxes, + tv_tensors.Mask: F.resize_mask, + tv_tensors.Video: F.resize_video, + } + + @pytest.mark.parametrize("input_type", [str, int, object]) + def test_unsupported_types(self, input_type): + with pytest.raises(TypeError, match="supports inputs of type"): + _get_kernel(F.resize, input_type) + + def test_exact_match(self): + # We cannot use F.resize together with self.KERNELS mapping here directly here, since this is only the + # ideal wrapping. Practically, we have an intermediate wrapper layer. Thus, we create a new resize functional + # here, register the kernels without wrapper, and check the exact matching afterwards. + def resize_with_pure_kernels(): + pass + + for input_type, kernel in self.KERNELS.items(): + _register_kernel_internal(resize_with_pure_kernels, input_type, tv_tensor_wrapper=False)(kernel) + + assert _get_kernel(resize_with_pure_kernels, input_type) is kernel + + def test_builtin_tv_tensor_subclass(self): + # We cannot use F.resize together with self.KERNELS mapping here directly here, since this is only the + # ideal wrapping. Practically, we have an intermediate wrapper layer. Thus, we create a new resize functional + # here, register the kernels without wrapper, and check if subclasses of our builtin tv_tensors get dispatched + # to the kernel of the corresponding superclass + def resize_with_pure_kernels(): + pass + + class MyImage(tv_tensors.Image): + pass + + class MyBoundingBoxes(tv_tensors.BoundingBoxes): + pass + + class MyMask(tv_tensors.Mask): + pass + + class MyVideo(tv_tensors.Video): + pass + + for custom_tv_tensor_subclass in [ + MyImage, + MyBoundingBoxes, + MyMask, + MyVideo, + ]: + builtin_tv_tensor_class = custom_tv_tensor_subclass.__mro__[1] + builtin_tv_tensor_kernel = self.KERNELS[builtin_tv_tensor_class] + _register_kernel_internal(resize_with_pure_kernels, builtin_tv_tensor_class, tv_tensor_wrapper=False)( + builtin_tv_tensor_kernel + ) + + assert _get_kernel(resize_with_pure_kernels, custom_tv_tensor_subclass) is builtin_tv_tensor_kernel + + def test_tv_tensor_subclass(self): + class MyTVTensor(tv_tensors.TVTensor): + pass + + with pytest.raises(TypeError, match="supports inputs of type"): + _get_kernel(F.resize, MyTVTensor) + + def resize_my_tv_tensor(): + pass + + _register_kernel_internal(F.resize, MyTVTensor, tv_tensor_wrapper=False)(resize_my_tv_tensor) + + assert _get_kernel(F.resize, MyTVTensor) is resize_my_tv_tensor + + def test_pil_image_subclass(self): + opened_image = PIL.Image.open(Path(__file__).parent / "assets" / "encode_jpeg" / "grace_hopper_517x606.jpg") + loaded_image = opened_image.convert("RGB") + + # check the assumptions + assert isinstance(opened_image, PIL.Image.Image) + assert type(opened_image) is not PIL.Image.Image + + assert type(loaded_image) is PIL.Image.Image + + size = [17, 11] + for image in [opened_image, loaded_image]: + kernel = _get_kernel(F.resize, type(image)) + + output = kernel(image, size=size) + + assert F.get_size(output) == size + + +class TestPermuteChannels: + _DEFAULT_PERMUTATION = [2, 0, 1] + + @pytest.mark.parametrize( + ("kernel", "make_input"), + [ + (F.permute_channels_image, make_image_tensor), + # FIXME + # check_kernel does not support PIL kernel, but it should + (F.permute_channels_image, make_image), + (F.permute_channels_video, make_video), + ], + ) + @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel(self, kernel, make_input, dtype, device): + check_kernel(kernel, make_input(dtype=dtype, device=device), permutation=self._DEFAULT_PERMUTATION) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_video]) + def test_functional(self, make_input): + check_functional(F.permute_channels, make_input(), permutation=self._DEFAULT_PERMUTATION) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.permute_channels_image, torch.Tensor), + (F._color._permute_channels_image_pil, PIL.Image.Image), + (F.permute_channels_image, tv_tensors.Image), + (F.permute_channels_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.permute_channels, kernel=kernel, input_type=input_type) + + def reference_image_correctness(self, image, permutation): + channel_images = image.split(1, dim=-3) + permuted_channel_images = [channel_images[channel_idx] for channel_idx in permutation] + return tv_tensors.Image(torch.concat(permuted_channel_images, dim=-3)) + + @pytest.mark.parametrize("permutation", [[2, 0, 1], [1, 2, 0], [2, 0, 1], [0, 1, 2]]) + @pytest.mark.parametrize("batch_dims", [(), (2,), (2, 1)]) + def test_image_correctness(self, permutation, batch_dims): + image = make_image(batch_dims=batch_dims) + + actual = F.permute_channels(image, permutation=permutation) + expected = self.reference_image_correctness(image, permutation=permutation) + + torch.testing.assert_close(actual, expected) + + +class TestElastic: + def _make_displacement(self, inpt): + return torch.rand( + 1, + *F.get_size(inpt), + 2, + dtype=torch.float32, + device=inpt.device if isinstance(inpt, torch.Tensor) else "cpu", + ) + + @param_value_parametrization( + interpolation=[transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR], + fill=EXHAUSTIVE_TYPE_FILLS, + ) + @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8, torch.float16]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, param, value, dtype, device): + image = make_image_tensor(dtype=dtype, device=device) + + check_kernel( + F.elastic_image, + image, + displacement=self._make_displacement(image), + **{param: value}, + check_scripted_vs_eager=not (param == "fill" and isinstance(value, (int, float))), + check_cuda_vs_cpu=dtype is not torch.float16, + ) + + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_bounding_boxes(self, format, dtype, device): + bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device) + + check_kernel( + F.elastic_bounding_boxes, + bounding_boxes, + format=bounding_boxes.format, + canvas_size=bounding_boxes.canvas_size, + displacement=self._make_displacement(bounding_boxes), + ) + + @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_masks]) + def test_kernel_mask(self, make_mask): + mask = make_mask() + check_kernel(F.elastic_mask, mask, displacement=self._make_displacement(mask)) + + def test_kernel_video(self): + video = make_video() + check_kernel(F.elastic_video, video, displacement=self._make_displacement(video)) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + def test_functional(self, make_input): + input = make_input() + check_functional(F.elastic, input, displacement=self._make_displacement(input)) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.elastic_image, torch.Tensor), + (F._geometry._elastic_image_pil, PIL.Image.Image), + (F.elastic_image, tv_tensors.Image), + (F.elastic_bounding_boxes, tv_tensors.BoundingBoxes), + (F.elastic_mask, tv_tensors.Mask), + (F.elastic_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.elastic, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + def test_displacement_error(self, make_input): + input = make_input() + + with pytest.raises(TypeError, match="displacement should be a Tensor"): + F.elastic(input, displacement=None) + + with pytest.raises(ValueError, match="displacement shape should be"): + F.elastic(input, displacement=torch.rand(F.get_size(input))) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + # ElasticTransform needs larger images to avoid the needed internal padding being larger than the actual image + @pytest.mark.parametrize("size", [(163, 163), (72, 333), (313, 95)]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_transform(self, make_input, size, device): + # We have to skip that test on M1 because it's flaky: Mismatched elements: 35 / 89205 (0.0%) + # See https://github.com/pytorch/vision/issues/8154 + # All other platforms are fine, so the differences do not come from something we own in torchvision + check_v1_compatibility = False if sys.platform == "darwin" else dict(rtol=0, atol=1) + + check_transform( + transforms.ElasticTransform(), + make_input(size, device=device), + check_v1_compatibility=check_v1_compatibility, + ) + + +class TestToPureTensor: + def test_correctness(self): + input = { + "img": make_image(), + "img_tensor": make_image_tensor(), + "img_pil": make_image_pil(), + "mask": make_detection_masks(), + "video": make_video(), + "bbox": make_bounding_boxes(), + "str": "str", + } + + out = transforms.ToPureTensor()(input) + + for input_value, out_value in zip(input.values(), out.values()): + if isinstance(input_value, tv_tensors.TVTensor): + assert isinstance(out_value, torch.Tensor) and not isinstance(out_value, tv_tensors.TVTensor) + else: + assert isinstance(out_value, type(input_value)) + + +class TestCrop: + INPUT_SIZE = (21, 11) + + CORRECTNESS_CROP_KWARGS = [ + # center + dict(top=5, left=5, height=10, width=5), + # larger than input, i.e. pad + dict(top=-5, left=-5, height=30, width=20), + # sides: left, right, top, bottom + dict(top=-5, left=-5, height=30, width=10), + dict(top=-5, left=5, height=30, width=10), + dict(top=-5, left=-5, height=20, width=20), + dict(top=5, left=-5, height=20, width=20), + # corners: top-left, top-right, bottom-left, bottom-right + dict(top=-5, left=-5, height=20, width=10), + dict(top=-5, left=5, height=20, width=10), + dict(top=5, left=-5, height=20, width=10), + dict(top=5, left=5, height=20, width=10), + ] + MINIMAL_CROP_KWARGS = CORRECTNESS_CROP_KWARGS[0] + + @pytest.mark.parametrize("kwargs", CORRECTNESS_CROP_KWARGS) + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, kwargs, dtype, device): + check_kernel(F.crop_image, make_image(self.INPUT_SIZE, dtype=dtype, device=device), **kwargs) + + @pytest.mark.parametrize("kwargs", CORRECTNESS_CROP_KWARGS) + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_bounding_box(self, kwargs, format, dtype, device): + bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format, dtype=dtype, device=device) + check_kernel(F.crop_bounding_boxes, bounding_boxes, format=format, **kwargs) + + @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_masks]) + def test_kernel_mask(self, make_mask): + check_kernel(F.crop_mask, make_mask(self.INPUT_SIZE), **self.MINIMAL_CROP_KWARGS) + + def test_kernel_video(self): + check_kernel(F.crop_video, make_video(self.INPUT_SIZE), **self.MINIMAL_CROP_KWARGS) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + def test_functional(self, make_input): + check_functional(F.crop, make_input(self.INPUT_SIZE), **self.MINIMAL_CROP_KWARGS) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.crop_image, torch.Tensor), + (F._geometry._crop_image_pil, PIL.Image.Image), + (F.crop_image, tv_tensors.Image), + (F.crop_bounding_boxes, tv_tensors.BoundingBoxes), + (F.crop_mask, tv_tensors.Mask), + (F.crop_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.crop, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize("kwargs", CORRECTNESS_CROP_KWARGS) + def test_functional_image_correctness(self, kwargs): + image = make_image(self.INPUT_SIZE, dtype=torch.uint8, device="cpu") + + actual = F.crop(image, **kwargs) + expected = F.to_image(F.crop(F.to_pil_image(image), **kwargs)) + + assert_equal(actual, expected) + + @param_value_parametrization( + size=[(10, 5), (25, 15), (25, 5), (10, 15)], + fill=EXHAUSTIVE_TYPE_FILLS, + ) + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + def test_transform(self, param, value, make_input): + input = make_input(self.INPUT_SIZE) + + check_sample_input = True + if param == "fill": + if isinstance(value, (tuple, list)): + if isinstance(input, tv_tensors.Mask): + pytest.skip("F.pad_mask doesn't support non-scalar fill.") + else: + check_sample_input = False + + kwargs = dict( + # 1. size is required + # 2. the fill parameter only has an affect if we need padding + size=[s + 4 for s in self.INPUT_SIZE], + fill=adapt_fill(value, dtype=input.dtype if isinstance(input, torch.Tensor) else torch.uint8), + ) + else: + kwargs = {param: value} + + check_transform( + transforms.RandomCrop(**kwargs, pad_if_needed=True), + input, + check_v1_compatibility=param != "fill" or isinstance(value, (int, float)), + check_sample_input=check_sample_input, + ) + + @pytest.mark.parametrize("padding", [1, (1, 1), (1, 1, 1, 1)]) + def test_transform_padding(self, padding): + inpt = make_image(self.INPUT_SIZE) + + output_size = [s + 2 for s in F.get_size(inpt)] + transform = transforms.RandomCrop(output_size, padding=padding) + + output = transform(inpt) + + assert F.get_size(output) == output_size + + @pytest.mark.parametrize("padding", [None, 1, (1, 1), (1, 1, 1, 1)]) + def test_transform_insufficient_padding(self, padding): + inpt = make_image(self.INPUT_SIZE) + + output_size = [s + 3 for s in F.get_size(inpt)] + transform = transforms.RandomCrop(output_size, padding=padding) + + with pytest.raises(ValueError, match="larger than (padded )?input image size"): + transform(inpt) + + def test_transform_pad_if_needed(self): + inpt = make_image(self.INPUT_SIZE) + + output_size = [s * 2 for s in F.get_size(inpt)] + transform = transforms.RandomCrop(output_size, pad_if_needed=True) + + output = transform(inpt) + + assert F.get_size(output) == output_size + + @param_value_parametrization( + size=[(10, 5), (25, 15), (25, 5), (10, 15)], + fill=CORRECTNESS_FILLS, + padding_mode=["constant", "edge", "reflect", "symmetric"], + ) + @pytest.mark.parametrize("seed", list(range(5))) + def test_transform_image_correctness(self, param, value, seed): + kwargs = {param: value} + if param != "size": + # 1. size is required + # 2. the fill / padding_mode parameters only have an affect if we need padding + kwargs["size"] = [s + 4 for s in self.INPUT_SIZE] + if param == "fill": + kwargs["fill"] = adapt_fill(kwargs["fill"], dtype=torch.uint8) + + transform = transforms.RandomCrop(pad_if_needed=True, **kwargs) + + image = make_image(self.INPUT_SIZE) + + with freeze_rng_state(): + torch.manual_seed(seed) + actual = transform(image) + + torch.manual_seed(seed) + expected = F.to_image(transform(F.to_pil_image(image))) + + assert_equal(actual, expected) + + def _reference_crop_bounding_boxes(self, bounding_boxes, *, top, left, height, width): + affine_matrix = np.array( + [ + [1, 0, -left], + [0, 1, -top], + ], + ) + return reference_affine_bounding_boxes_helper( + bounding_boxes, affine_matrix=affine_matrix, new_canvas_size=(height, width) + ) + + @pytest.mark.parametrize("kwargs", CORRECTNESS_CROP_KWARGS) + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_functional_bounding_box_correctness(self, kwargs, format, dtype, device): + bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format, dtype=dtype, device=device) + + actual = F.crop(bounding_boxes, **kwargs) + expected = self._reference_crop_bounding_boxes(bounding_boxes, **kwargs) + + assert_equal(actual, expected, atol=1, rtol=0) + assert_equal(F.get_size(actual), F.get_size(expected)) + + @pytest.mark.parametrize("output_size", [(17, 11), (11, 17), (11, 11)]) + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("seed", list(range(5))) + def test_transform_bounding_boxes_correctness(self, output_size, format, dtype, device, seed): + input_size = [s * 2 for s in output_size] + bounding_boxes = make_bounding_boxes(input_size, format=format, dtype=dtype, device=device) + + transform = transforms.RandomCrop(output_size) + + with freeze_rng_state(): + torch.manual_seed(seed) + params = transform.make_params([bounding_boxes]) + assert not params.pop("needs_pad") + del params["padding"] + assert params.pop("needs_crop") + + torch.manual_seed(seed) + actual = transform(bounding_boxes) + + expected = self._reference_crop_bounding_boxes(bounding_boxes, **params) + + assert_equal(actual, expected) + assert_equal(F.get_size(actual), F.get_size(expected)) + + def test_errors(self): + with pytest.raises(ValueError, match="Please provide only two dimensions"): + transforms.RandomCrop([10, 12, 14]) + + with pytest.raises(TypeError, match="Got inappropriate padding arg"): + transforms.RandomCrop([10, 12], padding="abc") + + with pytest.raises(ValueError, match="Padding must be an int or a 1, 2, or 4"): + transforms.RandomCrop([10, 12], padding=[-0.7, 0, 0.7]) + + with pytest.raises(TypeError, match="Got inappropriate fill arg"): + transforms.RandomCrop([10, 12], padding=1, fill="abc") + + with pytest.raises(ValueError, match="Padding mode should be either"): + transforms.RandomCrop([10, 12], padding=1, padding_mode="abc") + + +class TestErase: + INPUT_SIZE = (17, 11) + FUNCTIONAL_KWARGS = dict( + zip("ijhwv", [2, 2, 10, 8, torch.tensor(0.0, dtype=torch.float32, device="cpu").reshape(-1, 1, 1)]) + ) + + @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, dtype, device): + check_kernel(F.erase_image, make_image(self.INPUT_SIZE, dtype=dtype, device=device), **self.FUNCTIONAL_KWARGS) + + @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image_inplace(self, dtype, device): + input = make_image(self.INPUT_SIZE, dtype=dtype, device=device) + input_version = input._version + + output_out_of_place = F.erase_image(input, **self.FUNCTIONAL_KWARGS) + assert output_out_of_place.data_ptr() != input.data_ptr() + assert output_out_of_place is not input + + output_inplace = F.erase_image(input, **self.FUNCTIONAL_KWARGS, inplace=True) + assert output_inplace.data_ptr() == input.data_ptr() + assert output_inplace._version > input_version + assert output_inplace is input + + assert_equal(output_inplace, output_out_of_place) + + def test_kernel_video(self): + check_kernel(F.erase_video, make_video(self.INPUT_SIZE), **self.FUNCTIONAL_KWARGS) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_video], + ) + def test_functional(self, make_input): + check_functional(F.erase, make_input(), **self.FUNCTIONAL_KWARGS) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.erase_image, torch.Tensor), + (F._augment._erase_image_pil, PIL.Image.Image), + (F.erase_image, tv_tensors.Image), + (F.erase_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.erase, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_video], + ) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_transform(self, make_input, device): + input = make_input(device=device) + + with pytest.warns(UserWarning, match="currently passing through inputs of type"): + check_transform( + transforms.RandomErasing(p=1), + input, + check_v1_compatibility=not isinstance(input, PIL.Image.Image), + ) + + def _reference_erase_image(self, image, *, i, j, h, w, v): + mask = torch.zeros_like(image, dtype=torch.bool) + mask[..., i : i + h, j : j + w] = True + + # The broadcasting and type casting logic is handled automagically in the kernel through indexing + value = torch.broadcast_to(v, (*image.shape[:-2], h, w)).to(image) + + erased_image = torch.empty_like(image) + erased_image[mask] = value.flatten() + erased_image[~mask] = image[~mask] + + return erased_image + + @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_functional_image_correctness(self, dtype, device): + image = make_image(dtype=dtype, device=device) + + actual = F.erase(image, **self.FUNCTIONAL_KWARGS) + expected = self._reference_erase_image(image, **self.FUNCTIONAL_KWARGS) + + assert_equal(actual, expected) + + @param_value_parametrization( + scale=[(0.1, 0.2), [0.0, 1.0]], + ratio=[(0.3, 0.7), [0.1, 5.0]], + value=[0, 0.5, (0, 1, 0), [-0.2, 0.0, 1.3], "random"], + ) + @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("seed", list(range(5))) + def test_transform_image_correctness(self, param, value, dtype, device, seed): + transform = transforms.RandomErasing(**{param: value}, p=1) + + image = make_image(dtype=dtype, device=device) + + with freeze_rng_state(): + torch.manual_seed(seed) + # This emulates the random apply check that happens before make_params is called + torch.rand(1) + params = transform.make_params([image]) + + torch.manual_seed(seed) + actual = transform(image) + + expected = self._reference_erase_image(image, **params) + + assert_equal(actual, expected) + + def test_transform_errors(self): + with pytest.raises(TypeError, match="Argument value should be either a number or str or a sequence"): + transforms.RandomErasing(value={}) + + with pytest.raises(ValueError, match="If value is str, it should be 'random'"): + transforms.RandomErasing(value="abc") + + with pytest.raises(TypeError, match="Scale should be a sequence"): + transforms.RandomErasing(scale=123) + + with pytest.raises(TypeError, match="Ratio should be a sequence"): + transforms.RandomErasing(ratio=123) + + with pytest.raises(ValueError, match="Scale should be between 0 and 1"): + transforms.RandomErasing(scale=[-1, 2]) + + transform = transforms.RandomErasing(value=[1, 2, 3, 4]) + + with pytest.raises(ValueError, match="If value is a sequence, it should have either a single value"): + transform.make_params([make_image()]) + + +class TestGaussianBlur: + @pytest.mark.parametrize("kernel_size", [1, 3, (3, 1), [3, 5]]) + @pytest.mark.parametrize("sigma", [None, 1.0, 1, (0.5,), [0.3], (0.3, 0.7), [0.9, 0.2]]) + def test_kernel_image(self, kernel_size, sigma): + check_kernel( + F.gaussian_blur_image, + make_image(), + kernel_size=kernel_size, + sigma=sigma, + check_scripted_vs_eager=not (isinstance(kernel_size, int) or isinstance(sigma, (float, int))), + ) + + def test_kernel_image_errors(self): + image = make_image_tensor() + + with pytest.raises(ValueError, match="kernel_size is a sequence its length should be 2"): + F.gaussian_blur_image(image, kernel_size=[1, 2, 3]) + + for kernel_size in [2, -1]: + with pytest.raises(ValueError, match="kernel_size should have odd and positive integers"): + F.gaussian_blur_image(image, kernel_size=kernel_size) + + with pytest.raises(ValueError, match="sigma is a sequence, its length should be 2"): + F.gaussian_blur_image(image, kernel_size=1, sigma=[1, 2, 3]) + + with pytest.raises(TypeError, match="sigma should be either float or sequence of floats"): + F.gaussian_blur_image(image, kernel_size=1, sigma=object()) + + with pytest.raises(ValueError, match="sigma should have positive values"): + F.gaussian_blur_image(image, kernel_size=1, sigma=-1) + + def test_kernel_video(self): + check_kernel(F.gaussian_blur_video, make_video(), kernel_size=(3, 3)) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_video], + ) + def test_functional(self, make_input): + check_functional(F.gaussian_blur, make_input(), kernel_size=(3, 3)) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.gaussian_blur_image, torch.Tensor), + (F._misc._gaussian_blur_image_pil, PIL.Image.Image), + (F.gaussian_blur_image, tv_tensors.Image), + (F.gaussian_blur_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.gaussian_blur, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("sigma", [5, 2.0, (0.5, 2), [1.3, 2.7]]) + def test_transform(self, make_input, device, sigma): + check_transform(transforms.GaussianBlur(kernel_size=3, sigma=sigma), make_input(device=device)) + + def test_assertions(self): + with pytest.raises(ValueError, match="Kernel size should be a tuple/list of two integers"): + transforms.GaussianBlur([10, 12, 14]) + + with pytest.raises(ValueError, match="Kernel size value should be an odd and positive number"): + transforms.GaussianBlur(4) + + with pytest.raises(ValueError, match="If sigma is a sequence its length should be 1 or 2. Got 3"): + transforms.GaussianBlur(3, sigma=[1, 2, 3]) + + with pytest.raises(ValueError, match="sigma values should be positive and of the form"): + transforms.GaussianBlur(3, sigma=-1.0) + + with pytest.raises(ValueError, match="sigma values should be positive and of the form"): + transforms.GaussianBlur(3, sigma=[2.0, 1.0]) + + with pytest.raises(TypeError, match="sigma should be a number or a sequence of numbers"): + transforms.GaussianBlur(3, sigma={}) + + @pytest.mark.parametrize("sigma", [10.0, [10.0, 12.0], (10, 12.0), [10]]) + def test_make_params(self, sigma): + transform = transforms.GaussianBlur(3, sigma=sigma) + params = transform.make_params([]) + + if isinstance(sigma, float): + assert params["sigma"][0] == params["sigma"][1] == sigma + elif isinstance(sigma, list) and len(sigma) == 1: + assert params["sigma"][0] == params["sigma"][1] == sigma[0] + else: + assert sigma[0] <= params["sigma"][0] <= sigma[1] + assert sigma[0] <= params["sigma"][1] <= sigma[1] + + # np_img = np.arange(3 * 10 * 12, dtype="uint8").reshape((10, 12, 3)) + # np_img2 = np.arange(26 * 28, dtype="uint8").reshape((26, 28)) + # { + # "10_12_3__3_3_0.8": cv2.GaussianBlur(np_img, ksize=(3, 3), sigmaX=0.8), + # "10_12_3__3_3_0.5": cv2.GaussianBlur(np_img, ksize=(3, 3), sigmaX=0.5), + # "10_12_3__3_5_0.8": cv2.GaussianBlur(np_img, ksize=(3, 5), sigmaX=0.8), + # "10_12_3__3_5_0.5": cv2.GaussianBlur(np_img, ksize=(3, 5), sigmaX=0.5), + # "26_28_1__23_23_1.7": cv2.GaussianBlur(np_img2, ksize=(23, 23), sigmaX=1.7), + # } + REFERENCE_GAUSSIAN_BLUR_IMAGE_RESULTS = torch.load( + Path(__file__).parent / "assets" / "gaussian_blur_opencv_results.pt", + weights_only=False, + ) + + @pytest.mark.parametrize( + ("dimensions", "kernel_size", "sigma"), + [ + ((3, 10, 12), (3, 3), 0.8), + ((3, 10, 12), (3, 3), 0.5), + ((3, 10, 12), (3, 5), 0.8), + ((3, 10, 12), (3, 5), 0.5), + ((1, 26, 28), (23, 23), 1.7), + ], + ) + @pytest.mark.parametrize("dtype", [torch.float32, torch.float64, torch.float16]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_functional_image_correctness(self, dimensions, kernel_size, sigma, dtype, device): + if dtype is torch.float16 and device == "cpu": + pytest.skip("The CPU implementation of float16 on CPU differs from opencv") + + num_channels, height, width = dimensions + + reference_results_key = f"{height}_{width}_{num_channels}__{kernel_size[0]}_{kernel_size[1]}_{sigma}" + expected = ( + torch.tensor(self.REFERENCE_GAUSSIAN_BLUR_IMAGE_RESULTS[reference_results_key]) + .reshape(height, width, num_channels) + .permute(2, 0, 1) + .to(dtype=dtype, device=device) + ) + + image = tv_tensors.Image( + torch.arange(num_channels * height * width, dtype=torch.uint8) + .reshape(height, width, num_channels) + .permute(2, 0, 1), + dtype=dtype, + device=device, + ) + + actual = F.gaussian_blur_image(image, kernel_size=kernel_size, sigma=sigma) + + torch.testing.assert_close(actual, expected, rtol=0, atol=1) + + +class TestGaussianNoise: + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image, make_video], + ) + def test_kernel(self, make_input): + check_kernel( + F.gaussian_noise, + make_input(dtype=torch.float32), + # This cannot pass because the noise on a batch in not per-image + check_batched_vs_unbatched=False, + ) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image, make_video], + ) + def test_functional(self, make_input): + check_functional(F.gaussian_noise, make_input(dtype=torch.float32)) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.gaussian_noise, torch.Tensor), + (F.gaussian_noise_image, tv_tensors.Image), + (F.gaussian_noise_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.gaussian_noise, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image, make_video], + ) + def test_transform(self, make_input): + def adapter(_, input, __): + # This transform doesn't support uint8 so we have to convert the auto-generated uint8 tensors to float32 + # Same for PIL images + for key, value in input.items(): + if isinstance(value, torch.Tensor) and not value.is_floating_point(): + input[key] = value.to(torch.float32) + if isinstance(value, PIL.Image.Image): + input[key] = F.pil_to_tensor(value).to(torch.float32) + return input + + check_transform(transforms.GaussianNoise(), make_input(dtype=torch.float32), check_sample_input=adapter) + + def test_bad_input(self): + with pytest.raises(ValueError, match="Gaussian Noise is not implemented for PIL images."): + F.gaussian_noise(make_image_pil()) + with pytest.raises(ValueError, match="Input tensor is expected to be in float dtype"): + F.gaussian_noise(make_image(dtype=torch.uint8)) + with pytest.raises(ValueError, match="sigma shouldn't be negative"): + F.gaussian_noise(make_image(dtype=torch.float32), sigma=-1) + + def test_clip(self): + img = make_image(dtype=torch.float32) + + out = F.gaussian_noise(img, mean=100, clip=False) + assert out.min() > 50 + + out = F.gaussian_noise(img, mean=100, clip=True) + assert (out == 1).all() + + out = F.gaussian_noise(img, mean=-100, clip=False) + assert out.min() < -50 + + out = F.gaussian_noise(img, mean=-100, clip=True) + assert (out == 0).all() + + +class TestAutoAugmentTransforms: + # These transforms have a lot of branches in their `forward()` passes which are conditioned on random sampling. + # It's typically very hard to test the effect on some parameters without heavy mocking logic. + # This class adds correctness tests for the kernels that are specific to those transforms. The rest of kernels, e.g. + # rotate, are tested in their respective classes. The rest of the tests here are mostly smoke tests. + + def _reference_shear_translate(self, image, *, transform_id, magnitude, interpolation, fill): + if isinstance(image, PIL.Image.Image): + input = image + else: + input = F.to_pil_image(image) + + matrix = { + "ShearX": (1, magnitude, 0, 0, 1, 0), + "ShearY": (1, 0, 0, magnitude, 1, 0), + "TranslateX": (1, 0, -int(magnitude), 0, 1, 0), + "TranslateY": (1, 0, 0, 0, 1, -int(magnitude)), + }[transform_id] + + output = input.transform( + input.size, PIL.Image.AFFINE, matrix, resample=pil_modes_mapping[interpolation], fill=fill + ) + + if isinstance(image, PIL.Image.Image): + return output + else: + return F.to_image(output) + + @pytest.mark.parametrize("transform_id", ["ShearX", "ShearY", "TranslateX", "TranslateY"]) + @pytest.mark.parametrize("magnitude", [0.3, -0.2, 0.0]) + @pytest.mark.parametrize( + "interpolation", [transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR] + ) + @pytest.mark.parametrize("fill", CORRECTNESS_FILLS) + @pytest.mark.parametrize("input_type", ["Tensor", "PIL"]) + def test_correctness_shear_translate(self, transform_id, magnitude, interpolation, fill, input_type): + # ShearX/Y and TranslateX/Y are the only ops that are native to the AA transforms. They are modeled after the + # reference implementation: + # https://github.com/tensorflow/models/blob/885fda091c46c59d6c7bb5c7e760935eacc229da/research/autoaugment/augmentation_transforms.py#L273-L362 + # All other ops are checked in their respective dedicated tests. + + image = make_image(dtype=torch.uint8, device="cpu") + if input_type == "PIL": + image = F.to_pil_image(image) + + if "Translate" in transform_id: + # For TranslateX/Y magnitude is a value in pixels + magnitude *= min(F.get_size(image)) + + actual = transforms.AutoAugment()._apply_image_or_video_transform( + image, + transform_id=transform_id, + magnitude=magnitude, + interpolation=interpolation, + fill={type(image): fill}, + ) + expected = self._reference_shear_translate( + image, transform_id=transform_id, magnitude=magnitude, interpolation=interpolation, fill=fill + ) + + if input_type == "PIL": + actual, expected = F.to_image(actual), F.to_image(expected) + + if "Shear" in transform_id and input_type == "Tensor": + mae = (actual.float() - expected.float()).abs().mean() + assert mae < (12 if interpolation is transforms.InterpolationMode.NEAREST else 5) + else: + assert_close(actual, expected, rtol=0, atol=1) + + def _sample_input_adapter(self, transform, input, device): + adapted_input = {} + image_or_video_found = False + for key, value in input.items(): + if isinstance(value, (tv_tensors.BoundingBoxes, tv_tensors.Mask)): + # AA transforms don't support bounding boxes or masks + continue + elif check_type(value, (tv_tensors.Image, tv_tensors.Video, is_pure_tensor, PIL.Image.Image)): + if image_or_video_found: + # AA transforms only support a single image or video + continue + image_or_video_found = True + adapted_input[key] = value + return adapted_input + + @pytest.mark.parametrize( + "transform", + [transforms.AutoAugment(), transforms.RandAugment(), transforms.TrivialAugmentWide(), transforms.AugMix()], + ) + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_video]) + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_transform_smoke(self, transform, make_input, dtype, device): + if make_input is make_image_pil and not (dtype is torch.uint8 and device == "cpu"): + pytest.skip( + "PIL image tests with parametrization other than dtype=torch.uint8 and device='cpu' " + "will degenerate to that anyway." + ) + input = make_input(dtype=dtype, device=device) + + with freeze_rng_state(): + # By default every test starts from the same random seed. This leads to minimal coverage of the sampling + # that happens inside forward(). To avoid calling the transform multiple times to achieve higher coverage, + # we build a reproducible random seed from the input type, dtype, and device. + torch.manual_seed(hash((make_input, dtype, device))) + + # For v2, we changed the random sampling of the AA transforms. This makes it impossible to compare the v1 + # and v2 outputs without complicated mocking and monkeypatching. Thus, we skip the v1 compatibility checks + # here and only check if we can script the v2 transform and subsequently call the result. + check_transform( + transform, input, check_v1_compatibility=False, check_sample_input=self._sample_input_adapter + ) + + if type(input) is torch.Tensor and dtype is torch.uint8: + _script(transform)(input) + + def test_auto_augment_policy_error(self): + with pytest.raises(ValueError, match="provided policy"): + transforms.AutoAugment(policy=None) + + @pytest.mark.parametrize("severity", [0, 11]) + def test_aug_mix_severity_error(self, severity): + with pytest.raises(ValueError, match="severity must be between"): + transforms.AugMix(severity=severity) + + +class TestConvertBoundingBoxFormat: + old_new_formats = list(itertools.permutations(iter(tv_tensors.BoundingBoxFormat), 2)) + + @pytest.mark.parametrize(("old_format", "new_format"), old_new_formats) + def test_kernel(self, old_format, new_format): + check_kernel( + F.convert_bounding_box_format, + make_bounding_boxes(format=old_format), + new_format=new_format, + old_format=old_format, + ) + + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + @pytest.mark.parametrize("inplace", [False, True]) + def test_kernel_noop(self, format, inplace): + input = make_bounding_boxes(format=format).as_subclass(torch.Tensor) + input_version = input._version + + output = F.convert_bounding_box_format(input, old_format=format, new_format=format, inplace=inplace) + + assert output is input + assert output.data_ptr() == input.data_ptr() + assert output._version == input_version + + @pytest.mark.parametrize(("old_format", "new_format"), old_new_formats) + def test_kernel_inplace(self, old_format, new_format): + input = make_bounding_boxes(format=old_format).as_subclass(torch.Tensor) + input_version = input._version + + output_out_of_place = F.convert_bounding_box_format(input, old_format=old_format, new_format=new_format) + assert output_out_of_place.data_ptr() != input.data_ptr() + assert output_out_of_place is not input + + output_inplace = F.convert_bounding_box_format( + input, old_format=old_format, new_format=new_format, inplace=True + ) + assert output_inplace.data_ptr() == input.data_ptr() + assert output_inplace._version > input_version + assert output_inplace is input + + assert_equal(output_inplace, output_out_of_place) + + @pytest.mark.parametrize(("old_format", "new_format"), old_new_formats) + def test_functional(self, old_format, new_format): + check_functional(F.convert_bounding_box_format, make_bounding_boxes(format=old_format), new_format=new_format) + + @pytest.mark.parametrize(("old_format", "new_format"), old_new_formats) + @pytest.mark.parametrize("format_type", ["enum", "str"]) + def test_transform(self, old_format, new_format, format_type): + check_transform( + transforms.ConvertBoundingBoxFormat(new_format.name if format_type == "str" else new_format), + make_bounding_boxes(format=old_format), + ) + + @pytest.mark.parametrize(("old_format", "new_format"), old_new_formats) + def test_strings(self, old_format, new_format): + # Non-regression test for https://github.com/pytorch/vision/issues/8258 + input = tv_tensors.BoundingBoxes(torch.tensor([[10, 10, 20, 20]]), format=old_format, canvas_size=(50, 50)) + expected = self._reference_convert_bounding_box_format(input, new_format) + + old_format = old_format.name + new_format = new_format.name + + out_functional = F.convert_bounding_box_format(input, new_format=new_format) + out_functional_tensor = F.convert_bounding_box_format( + input.as_subclass(torch.Tensor), old_format=old_format, new_format=new_format + ) + out_transform = transforms.ConvertBoundingBoxFormat(new_format)(input) + for out in (out_functional, out_functional_tensor, out_transform): + assert_equal(out, expected) + + def _reference_convert_bounding_box_format(self, bounding_boxes, new_format): + return tv_tensors.wrap( + torchvision.ops.box_convert( + bounding_boxes.as_subclass(torch.Tensor), + in_fmt=bounding_boxes.format.name.lower(), + out_fmt=new_format.name.lower(), + ).to(bounding_boxes.dtype), + like=bounding_boxes, + format=new_format, + ) + + @pytest.mark.parametrize(("old_format", "new_format"), old_new_formats) + @pytest.mark.parametrize("dtype", [torch.int64, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("fn_type", ["functional", "transform"]) + def test_correctness(self, old_format, new_format, dtype, device, fn_type): + bounding_boxes = make_bounding_boxes(format=old_format, dtype=dtype, device=device) + + if fn_type == "functional": + fn = functools.partial(F.convert_bounding_box_format, new_format=new_format) + else: + fn = transforms.ConvertBoundingBoxFormat(format=new_format) + + actual = fn(bounding_boxes) + expected = self._reference_convert_bounding_box_format(bounding_boxes, new_format) + + assert_equal(actual, expected) + + def test_errors(self): + input_tv_tensor = make_bounding_boxes() + input_pure_tensor = input_tv_tensor.as_subclass(torch.Tensor) + + for input in [input_tv_tensor, input_pure_tensor]: + with pytest.raises(TypeError, match="missing 1 required argument: 'new_format'"): + F.convert_bounding_box_format(input) + + with pytest.raises(ValueError, match="`old_format` has to be passed"): + F.convert_bounding_box_format(input_pure_tensor, new_format=input_tv_tensor.format) + + with pytest.raises(ValueError, match="`old_format` must not be passed"): + F.convert_bounding_box_format( + input_tv_tensor, old_format=input_tv_tensor.format, new_format=input_tv_tensor.format + ) + + +class TestResizedCrop: + INPUT_SIZE = (17, 11) + CROP_KWARGS = dict(top=2, left=2, height=5, width=7) + OUTPUT_SIZE = (19, 32) + + @pytest.mark.parametrize( + ("kernel", "make_input"), + [ + (F.resized_crop_image, make_image), + (F.resized_crop_bounding_boxes, make_bounding_boxes), + (F.resized_crop_mask, make_segmentation_mask), + (F.resized_crop_mask, make_detection_masks), + (F.resized_crop_video, make_video), + ], + ) + def test_kernel(self, kernel, make_input): + input = make_input(self.INPUT_SIZE) + if isinstance(input, tv_tensors.BoundingBoxes): + extra_kwargs = dict(format=input.format) + elif isinstance(input, tv_tensors.Mask): + extra_kwargs = dict() + else: + extra_kwargs = dict(antialias=True) + + check_kernel(kernel, input, **self.CROP_KWARGS, size=self.OUTPUT_SIZE, **extra_kwargs) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + def test_functional(self, make_input): + check_functional( + F.resized_crop, make_input(self.INPUT_SIZE), **self.CROP_KWARGS, size=self.OUTPUT_SIZE, antialias=True + ) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.resized_crop_image, torch.Tensor), + (F._geometry._resized_crop_image_pil, PIL.Image.Image), + (F.resized_crop_image, tv_tensors.Image), + (F.resized_crop_bounding_boxes, tv_tensors.BoundingBoxes), + (F.resized_crop_mask, tv_tensors.Mask), + (F.resized_crop_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.resized_crop, kernel=kernel, input_type=input_type) + + @param_value_parametrization( + scale=[(0.1, 0.2), [0.0, 1.0]], + ratio=[(0.3, 0.7), [0.1, 5.0]], + ) + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + def test_transform(self, param, value, make_input): + check_transform( + transforms.RandomResizedCrop(size=self.OUTPUT_SIZE, **{param: value}, antialias=True), + make_input(self.INPUT_SIZE), + check_v1_compatibility=dict(rtol=0, atol=1), + ) + + # `InterpolationMode.NEAREST` is modeled after the buggy `INTER_NEAREST` interpolation of CV2. + # The PIL equivalent of `InterpolationMode.NEAREST` is `InterpolationMode.NEAREST_EXACT` + @pytest.mark.parametrize("interpolation", set(INTERPOLATION_MODES) - {transforms.InterpolationMode.NEAREST}) + def test_functional_image_correctness(self, interpolation): + image = make_image(self.INPUT_SIZE, dtype=torch.uint8) + + actual = F.resized_crop( + image, **self.CROP_KWARGS, size=self.OUTPUT_SIZE, interpolation=interpolation, antialias=True + ) + expected = F.to_image( + F.resized_crop( + F.to_pil_image(image), **self.CROP_KWARGS, size=self.OUTPUT_SIZE, interpolation=interpolation + ) + ) + + torch.testing.assert_close(actual, expected, atol=1, rtol=0) + + def _reference_resized_crop_bounding_boxes(self, bounding_boxes, *, top, left, height, width, size): + new_height, new_width = size + + crop_affine_matrix = np.array( + [ + [1, 0, -left], + [0, 1, -top], + [0, 0, 1], + ], + ) + resize_affine_matrix = np.array( + [ + [new_width / width, 0, 0], + [0, new_height / height, 0], + [0, 0, 1], + ], + ) + affine_matrix = (resize_affine_matrix @ crop_affine_matrix)[:2, :] + + return reference_affine_bounding_boxes_helper( + bounding_boxes, + affine_matrix=affine_matrix, + new_canvas_size=size, + ) + + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + def test_functional_bounding_boxes_correctness(self, format): + bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format) + + actual = F.resized_crop(bounding_boxes, **self.CROP_KWARGS, size=self.OUTPUT_SIZE) + expected = self._reference_resized_crop_bounding_boxes( + bounding_boxes, **self.CROP_KWARGS, size=self.OUTPUT_SIZE + ) + + assert_equal(actual, expected) + assert_equal(F.get_size(actual), F.get_size(expected)) + + def test_transform_errors_warnings(self): + with pytest.raises(ValueError, match="provide only two dimensions"): + transforms.RandomResizedCrop(size=(1, 2, 3)) + + with pytest.raises(TypeError, match="Scale should be a sequence"): + transforms.RandomResizedCrop(size=self.INPUT_SIZE, scale=123) + + with pytest.raises(TypeError, match="Ratio should be a sequence"): + transforms.RandomResizedCrop(size=self.INPUT_SIZE, ratio=123) + + for param in ["scale", "ratio"]: + with pytest.warns(match="Scale and ratio should be of kind"): + transforms.RandomResizedCrop(size=self.INPUT_SIZE, **{param: [1, 0]}) + + +class TestPad: + EXHAUSTIVE_TYPE_PADDINGS = [1, (1,), (1, 2), (1, 2, 3, 4), [1], [1, 2], [1, 2, 3, 4]] + CORRECTNESS_PADDINGS = [ + padding + for padding in EXHAUSTIVE_TYPE_PADDINGS + if isinstance(padding, int) or isinstance(padding, list) and len(padding) > 1 + ] + PADDING_MODES = ["constant", "symmetric", "edge", "reflect"] + + @param_value_parametrization( + padding=EXHAUSTIVE_TYPE_PADDINGS, + fill=EXHAUSTIVE_TYPE_FILLS, + padding_mode=PADDING_MODES, + ) + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, param, value, dtype, device): + if param == "fill": + value = adapt_fill(value, dtype=dtype) + kwargs = {param: value} + if param != "padding": + kwargs["padding"] = [1] + + image = make_image(dtype=dtype, device=device) + + check_kernel( + F.pad_image, + image, + **kwargs, + check_scripted_vs_eager=not ( + (param == "padding" and isinstance(value, int)) + # See https://github.com/pytorch/vision/pull/7252#issue-1585585521 for details + or ( + param == "fill" + and ( + isinstance(value, tuple) or (isinstance(value, list) and any(isinstance(v, int) for v in value)) + ) + ) + ), + ) + + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + def test_kernel_bounding_boxes(self, format): + bounding_boxes = make_bounding_boxes(format=format) + check_kernel( + F.pad_bounding_boxes, + bounding_boxes, + format=bounding_boxes.format, + canvas_size=bounding_boxes.canvas_size, + padding=[1], + ) + + @pytest.mark.parametrize("padding_mode", ["symmetric", "edge", "reflect"]) + def test_kernel_bounding_boxes_errors(self, padding_mode): + bounding_boxes = make_bounding_boxes() + with pytest.raises(ValueError, match=f"'{padding_mode}' is not supported"): + F.pad_bounding_boxes( + bounding_boxes, + format=bounding_boxes.format, + canvas_size=bounding_boxes.canvas_size, + padding=[1], + padding_mode=padding_mode, + ) + + @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_masks]) + def test_kernel_mask(self, make_mask): + check_kernel(F.pad_mask, make_mask(), padding=[1]) + + @pytest.mark.parametrize("fill", [[1], (0,), [1, 0, 1], (0, 1, 0)]) + def test_kernel_mask_errors(self, fill): + with pytest.raises(ValueError, match="Non-scalar fill value is not supported"): + F.pad_mask(make_segmentation_mask(), padding=[1], fill=fill) + + def test_kernel_video(self): + check_kernel(F.pad_video, make_video(), padding=[1]) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + def test_functional(self, make_input): + check_functional(F.pad, make_input(), padding=[1]) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.pad_image, torch.Tensor), + # The PIL kernel uses fill=0 as default rather than fill=None as all others. + # Since the whole fill story is already really inconsistent, we won't introduce yet another case to allow + # for this test to pass. + # See https://github.com/pytorch/vision/issues/6623 for a discussion. + # (F._geometry._pad_image_pil, PIL.Image.Image), + (F.pad_image, tv_tensors.Image), + (F.pad_bounding_boxes, tv_tensors.BoundingBoxes), + (F.pad_mask, tv_tensors.Mask), + (F.pad_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.pad, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + def test_transform(self, make_input): + check_transform(transforms.Pad(padding=[1]), make_input()) + + def test_transform_errors(self): + with pytest.raises(TypeError, match="Got inappropriate padding arg"): + transforms.Pad("abc") + + with pytest.raises(ValueError, match="Padding must be an int or a 1, 2, or 4"): + transforms.Pad([-0.7, 0, 0.7]) + + with pytest.raises(TypeError, match="Got inappropriate fill arg"): + transforms.Pad(12, fill="abc") + + with pytest.raises(ValueError, match="Padding mode should be either"): + transforms.Pad(12, padding_mode="abc") + + @pytest.mark.parametrize("padding", CORRECTNESS_PADDINGS) + @pytest.mark.parametrize( + ("padding_mode", "fill"), + [ + *[("constant", fill) for fill in CORRECTNESS_FILLS], + *[(padding_mode, None) for padding_mode in ["symmetric", "edge", "reflect"]], + ], + ) + @pytest.mark.parametrize("fn", [F.pad, transform_cls_to_functional(transforms.Pad)]) + def test_image_correctness(self, padding, padding_mode, fill, fn): + image = make_image(dtype=torch.uint8, device="cpu") + + fill = adapt_fill(fill, dtype=torch.uint8) + + actual = fn(image, padding=padding, padding_mode=padding_mode, fill=fill) + expected = F.to_image(F.pad(F.to_pil_image(image), padding=padding, padding_mode=padding_mode, fill=fill)) + + assert_equal(actual, expected) + + def _reference_pad_bounding_boxes(self, bounding_boxes, *, padding): + if isinstance(padding, int): + padding = [padding] + left, top, right, bottom = padding * (4 // len(padding)) + + affine_matrix = np.array( + [ + [1, 0, left], + [0, 1, top], + ], + ) + + height = bounding_boxes.canvas_size[0] + top + bottom + width = bounding_boxes.canvas_size[1] + left + right + + return reference_affine_bounding_boxes_helper( + bounding_boxes, affine_matrix=affine_matrix, new_canvas_size=(height, width) + ) + + @pytest.mark.parametrize("padding", CORRECTNESS_PADDINGS) + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + @pytest.mark.parametrize("dtype", [torch.int64, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("fn", [F.pad, transform_cls_to_functional(transforms.Pad)]) + def test_bounding_boxes_correctness(self, padding, format, dtype, device, fn): + bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device) + + actual = fn(bounding_boxes, padding=padding) + expected = self._reference_pad_bounding_boxes(bounding_boxes, padding=padding) + + assert_equal(actual, expected) + + +class TestCenterCrop: + INPUT_SIZE = (17, 11) + OUTPUT_SIZES = [(3, 5), (5, 3), (4, 4), (21, 9), (13, 15), (19, 14), 3, (4,), [5], INPUT_SIZE] + + @pytest.mark.parametrize("output_size", OUTPUT_SIZES) + @pytest.mark.parametrize("dtype", [torch.int64, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, output_size, dtype, device): + check_kernel( + F.center_crop_image, + make_image(self.INPUT_SIZE, dtype=dtype, device=device), + output_size=output_size, + check_scripted_vs_eager=not isinstance(output_size, int), + ) + + @pytest.mark.parametrize("output_size", OUTPUT_SIZES) + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + def test_kernel_bounding_boxes(self, output_size, format): + bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format) + check_kernel( + F.center_crop_bounding_boxes, + bounding_boxes, + format=bounding_boxes.format, + canvas_size=bounding_boxes.canvas_size, + output_size=output_size, + check_scripted_vs_eager=not isinstance(output_size, int), + ) + + @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_masks]) + def test_kernel_mask(self, make_mask): + check_kernel(F.center_crop_mask, make_mask(), output_size=self.OUTPUT_SIZES[0]) + + def test_kernel_video(self): + check_kernel(F.center_crop_video, make_video(self.INPUT_SIZE), output_size=self.OUTPUT_SIZES[0]) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + def test_functional(self, make_input): + check_functional(F.center_crop, make_input(self.INPUT_SIZE), output_size=self.OUTPUT_SIZES[0]) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.center_crop_image, torch.Tensor), + (F._geometry._center_crop_image_pil, PIL.Image.Image), + (F.center_crop_image, tv_tensors.Image), + (F.center_crop_bounding_boxes, tv_tensors.BoundingBoxes), + (F.center_crop_mask, tv_tensors.Mask), + (F.center_crop_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.center_crop, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + def test_transform(self, make_input): + check_transform(transforms.CenterCrop(self.OUTPUT_SIZES[0]), make_input(self.INPUT_SIZE)) + + @pytest.mark.parametrize("output_size", OUTPUT_SIZES) + @pytest.mark.parametrize("fn", [F.center_crop, transform_cls_to_functional(transforms.CenterCrop)]) + def test_image_correctness(self, output_size, fn): + image = make_image(self.INPUT_SIZE, dtype=torch.uint8, device="cpu") + + actual = fn(image, output_size) + expected = F.to_image(F.center_crop(F.to_pil_image(image), output_size=output_size)) + + assert_equal(actual, expected) + + def _reference_center_crop_bounding_boxes(self, bounding_boxes, output_size): + image_height, image_width = bounding_boxes.canvas_size + if isinstance(output_size, int): + output_size = (output_size, output_size) + elif len(output_size) == 1: + output_size *= 2 + crop_height, crop_width = output_size + + top = int(round((image_height - crop_height) / 2)) + left = int(round((image_width - crop_width) / 2)) + + affine_matrix = np.array( + [ + [1, 0, -left], + [0, 1, -top], + ], + ) + return reference_affine_bounding_boxes_helper( + bounding_boxes, affine_matrix=affine_matrix, new_canvas_size=output_size + ) + + @pytest.mark.parametrize("output_size", OUTPUT_SIZES) + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + @pytest.mark.parametrize("dtype", [torch.int64, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("fn", [F.center_crop, transform_cls_to_functional(transforms.CenterCrop)]) + def test_bounding_boxes_correctness(self, output_size, format, dtype, device, fn): + bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format, dtype=dtype, device=device) + + actual = fn(bounding_boxes, output_size) + expected = self._reference_center_crop_bounding_boxes(bounding_boxes, output_size) + + assert_equal(actual, expected) + + +class TestPerspective: + COEFFICIENTS = [ + [1.2405, 0.1772, -6.9113, 0.0463, 1.251, -5.235, 0.00013, 0.0018], + [0.7366, -0.11724, 1.45775, -0.15012, 0.73406, 2.6019, -0.0072, -0.0063], + ] + START_END_POINTS = [ + ([[0, 0], [33, 0], [33, 25], [0, 25]], [[3, 2], [32, 3], [30, 24], [2, 25]]), + ([[3, 2], [32, 3], [30, 24], [2, 25]], [[0, 0], [33, 0], [33, 25], [0, 25]]), + ([[3, 2], [32, 3], [30, 24], [2, 25]], [[5, 5], [30, 3], [33, 19], [4, 25]]), + ] + MINIMAL_KWARGS = dict(startpoints=None, endpoints=None, coefficients=COEFFICIENTS[0]) + + @param_value_parametrization( + coefficients=COEFFICIENTS, + start_end_points=START_END_POINTS, + fill=EXHAUSTIVE_TYPE_FILLS, + ) + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, param, value, dtype, device): + if param == "start_end_points": + kwargs = dict(zip(["startpoints", "endpoints"], value)) + else: + kwargs = {"startpoints": None, "endpoints": None, param: value} + if param == "fill": + kwargs["coefficients"] = self.COEFFICIENTS[0] + + check_kernel( + F.perspective_image, + make_image(dtype=dtype, device=device), + **kwargs, + check_scripted_vs_eager=not (param == "fill" and isinstance(value, (int, float))), + ) + + def test_kernel_image_error(self): + image = make_image_tensor() + + with pytest.raises(ValueError, match="startpoints/endpoints or the coefficients must have non `None` values"): + F.perspective_image(image, startpoints=None, endpoints=None) + + with pytest.raises( + ValueError, match="startpoints/endpoints and the coefficients shouldn't be defined concurrently" + ): + startpoints, endpoints = self.START_END_POINTS[0] + coefficients = self.COEFFICIENTS[0] + F.perspective_image(image, startpoints=startpoints, endpoints=endpoints, coefficients=coefficients) + + with pytest.raises(ValueError, match="coefficients should have 8 float values"): + F.perspective_image(image, startpoints=None, endpoints=None, coefficients=list(range(7))) + + @param_value_parametrization( + coefficients=COEFFICIENTS, + start_end_points=START_END_POINTS, + ) + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + def test_kernel_bounding_boxes(self, param, value, format): + if param == "start_end_points": + kwargs = dict(zip(["startpoints", "endpoints"], value)) + else: + kwargs = {"startpoints": None, "endpoints": None, param: value} + + bounding_boxes = make_bounding_boxes(format=format) + + check_kernel( + F.perspective_bounding_boxes, + bounding_boxes, + format=bounding_boxes.format, + canvas_size=bounding_boxes.canvas_size, + **kwargs, + ) + + def test_kernel_bounding_boxes_error(self): + bounding_boxes = make_bounding_boxes() + format, canvas_size = bounding_boxes.format, bounding_boxes.canvas_size + bounding_boxes = bounding_boxes.as_subclass(torch.Tensor) + + with pytest.raises(RuntimeError, match="Denominator is zero"): + F.perspective_bounding_boxes( + bounding_boxes, + format=format, + canvas_size=canvas_size, + startpoints=None, + endpoints=None, + coefficients=[0.0] * 8, + ) + + @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_masks]) + def test_kernel_mask(self, make_mask): + check_kernel(F.perspective_mask, make_mask(), **self.MINIMAL_KWARGS) + + def test_kernel_video(self): + check_kernel(F.perspective_video, make_video(), **self.MINIMAL_KWARGS) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + def test_functional(self, make_input): + check_functional(F.perspective, make_input(), **self.MINIMAL_KWARGS) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.perspective_image, torch.Tensor), + (F._geometry._perspective_image_pil, PIL.Image.Image), + (F.perspective_image, tv_tensors.Image), + (F.perspective_bounding_boxes, tv_tensors.BoundingBoxes), + (F.perspective_mask, tv_tensors.Mask), + (F.perspective_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.perspective, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize("distortion_scale", [0.5, 0.0, 1.0]) + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + def test_transform(self, distortion_scale, make_input): + check_transform(transforms.RandomPerspective(distortion_scale=distortion_scale, p=1), make_input()) + + @pytest.mark.parametrize("distortion_scale", [-1, 2]) + def test_transform_error(self, distortion_scale): + with pytest.raises(ValueError, match="distortion_scale value should be between 0 and 1"): + transforms.RandomPerspective(distortion_scale=distortion_scale) + + @pytest.mark.parametrize("coefficients", COEFFICIENTS) + @pytest.mark.parametrize( + "interpolation", [transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR] + ) + @pytest.mark.parametrize("fill", CORRECTNESS_FILLS) + def test_image_functional_correctness(self, coefficients, interpolation, fill): + image = make_image(dtype=torch.uint8, device="cpu") + + actual = F.perspective( + image, startpoints=None, endpoints=None, coefficients=coefficients, interpolation=interpolation, fill=fill + ) + expected = F.to_image( + F.perspective( + F.to_pil_image(image), + startpoints=None, + endpoints=None, + coefficients=coefficients, + interpolation=interpolation, + fill=fill, + ) + ) + + if interpolation is transforms.InterpolationMode.BILINEAR: + abs_diff = (actual.float() - expected.float()).abs() + assert (abs_diff > 1).float().mean() < 7e-2 + mae = abs_diff.mean() + assert mae < 3 + else: + assert_equal(actual, expected) + + def _reference_perspective_bounding_boxes(self, bounding_boxes, *, startpoints, endpoints): + format = bounding_boxes.format + canvas_size = bounding_boxes.canvas_size + dtype = bounding_boxes.dtype + device = bounding_boxes.device + + coefficients = _get_perspective_coeffs(endpoints, startpoints) + + def perspective_bounding_boxes(bounding_boxes): + m1 = np.array( + [ + [coefficients[0], coefficients[1], coefficients[2]], + [coefficients[3], coefficients[4], coefficients[5]], + ] + ) + m2 = np.array( + [ + [coefficients[6], coefficients[7], 1.0], + [coefficients[6], coefficients[7], 1.0], + ] + ) + + # Go to float before converting to prevent precision loss in case of CXCYWH -> XYXY and W or H is 1 + input_xyxy = F.convert_bounding_box_format( + bounding_boxes.to(dtype=torch.float64, device="cpu", copy=True), + old_format=format, + new_format=tv_tensors.BoundingBoxFormat.XYXY, + inplace=True, + ) + x1, y1, x2, y2 = input_xyxy.squeeze(0).tolist() + + points = np.array( + [ + [x1, y1, 1.0], + [x2, y1, 1.0], + [x1, y2, 1.0], + [x2, y2, 1.0], + ] + ) + + numerator = points @ m1.T + denominator = points @ m2.T + transformed_points = numerator / denominator + + output_xyxy = torch.Tensor( + [ + float(np.min(transformed_points[:, 0])), + float(np.min(transformed_points[:, 1])), + float(np.max(transformed_points[:, 0])), + float(np.max(transformed_points[:, 1])), + ] + ) + + output = F.convert_bounding_box_format( + output_xyxy, old_format=tv_tensors.BoundingBoxFormat.XYXY, new_format=format + ) + + # It is important to clamp before casting, especially for CXCYWH format, dtype=int64 + return F.clamp_bounding_boxes( + output, + format=format, + canvas_size=canvas_size, + ).to(dtype=dtype, device=device) + + return tv_tensors.BoundingBoxes( + torch.cat([perspective_bounding_boxes(b) for b in bounding_boxes.reshape(-1, 4).unbind()], dim=0).reshape( + bounding_boxes.shape + ), + format=format, + canvas_size=canvas_size, + ) + + @pytest.mark.parametrize(("startpoints", "endpoints"), START_END_POINTS) + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + @pytest.mark.parametrize("dtype", [torch.int64, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_correctness_perspective_bounding_boxes(self, startpoints, endpoints, format, dtype, device): + bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device) + + actual = F.perspective(bounding_boxes, startpoints=startpoints, endpoints=endpoints) + expected = self._reference_perspective_bounding_boxes( + bounding_boxes, startpoints=startpoints, endpoints=endpoints + ) + + assert_close(actual, expected, rtol=0, atol=1) + + +class TestEqualize: + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, dtype, device): + check_kernel(F.equalize_image, make_image(dtype=dtype, device=device)) + + def test_kernel_video(self): + check_kernel(F.equalize_image, make_video()) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_video]) + def test_functional(self, make_input): + check_functional(F.equalize, make_input()) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.equalize_image, torch.Tensor), + (F._color._equalize_image_pil, PIL.Image.Image), + (F.equalize_image, tv_tensors.Image), + (F.equalize_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.equalize, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_video], + ) + def test_transform(self, make_input): + check_transform(transforms.RandomEqualize(p=1), make_input()) + + @pytest.mark.parametrize(("low", "high"), [(0, 64), (64, 192), (192, 256), (0, 1), (127, 128), (255, 256)]) + @pytest.mark.parametrize("fn", [F.equalize, transform_cls_to_functional(transforms.RandomEqualize, p=1)]) + def test_image_correctness(self, low, high, fn): + # We are not using the default `make_image` here since that uniformly samples the values over the whole value + # range. Since the whole point of F.equalize is to transform an arbitrary distribution of values into a uniform + # one over the full range, the information gain is low if we already provide something really close to the + # expected value. + image = tv_tensors.Image( + torch.testing.make_tensor((3, 117, 253), dtype=torch.uint8, device="cpu", low=low, high=high) + ) + + actual = fn(image) + expected = F.to_image(F.equalize(F.to_pil_image(image))) + + assert_equal(actual, expected) + + +class TestUniformTemporalSubsample: + def test_kernel_video(self): + check_kernel(F.uniform_temporal_subsample_video, make_video(), num_samples=2) + + @pytest.mark.parametrize("make_input", [make_video_tensor, make_video]) + def test_functional(self, make_input): + check_functional(F.uniform_temporal_subsample, make_input(), num_samples=2) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.uniform_temporal_subsample_video, torch.Tensor), + (F.uniform_temporal_subsample_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.uniform_temporal_subsample, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize("make_input", [make_video_tensor, make_video]) + def test_transform(self, make_input): + check_transform(transforms.UniformTemporalSubsample(num_samples=2), make_input()) + + def _reference_uniform_temporal_subsample_video(self, video, *, num_samples): + # Adapted from + # https://github.com/facebookresearch/pytorchvideo/blob/c8d23d8b7e597586a9e2d18f6ed31ad8aa379a7a/pytorchvideo/transforms/functional.py#L19 + t = video.shape[-4] + assert num_samples > 0 and t > 0 + # Sample by nearest neighbor interpolation if num_samples > t. + indices = torch.linspace(0, t - 1, num_samples, device=video.device) + indices = torch.clamp(indices, 0, t - 1).long() + return tv_tensors.Video(torch.index_select(video, -4, indices)) + + CORRECTNESS_NUM_FRAMES = 5 + + @pytest.mark.parametrize("num_samples", list(range(1, CORRECTNESS_NUM_FRAMES + 1))) + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize( + "fn", [F.uniform_temporal_subsample, transform_cls_to_functional(transforms.UniformTemporalSubsample)] + ) + def test_video_correctness(self, num_samples, dtype, device, fn): + video = make_video(num_frames=self.CORRECTNESS_NUM_FRAMES, dtype=dtype, device=device) + + actual = fn(video, num_samples=num_samples) + expected = self._reference_uniform_temporal_subsample_video(video, num_samples=num_samples) + + assert_equal(actual, expected) + + +class TestNormalize: + MEANS_STDS = [ + ((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + ([0.0, 0.0, 0.0], [1.0, 1.0, 1.0]), + ] + MEAN, STD = MEANS_STDS[0] + + @pytest.mark.parametrize(("mean", "std"), [*MEANS_STDS, (0.5, 2.0)]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, mean, std, device): + check_kernel(F.normalize_image, make_image(dtype=torch.float32, device=device), mean=self.MEAN, std=self.STD) + + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image_inplace(self, device): + input = make_image_tensor(dtype=torch.float32, device=device) + input_version = input._version + + output_out_of_place = F.normalize_image(input, mean=self.MEAN, std=self.STD) + assert output_out_of_place.data_ptr() != input.data_ptr() + assert output_out_of_place is not input + + output_inplace = F.normalize_image(input, mean=self.MEAN, std=self.STD, inplace=True) + assert output_inplace.data_ptr() == input.data_ptr() + assert output_inplace._version > input_version + assert output_inplace is input + + assert_equal(output_inplace, output_out_of_place) + + def test_kernel_video(self): + check_kernel(F.normalize_video, make_video(dtype=torch.float32), mean=self.MEAN, std=self.STD) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_video]) + def test_functional(self, make_input): + check_functional(F.normalize, make_input(dtype=torch.float32), mean=self.MEAN, std=self.STD) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.normalize_image, torch.Tensor), + (F.normalize_image, tv_tensors.Image), + (F.normalize_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.normalize, kernel=kernel, input_type=input_type) + + def test_functional_error(self): + with pytest.raises(TypeError, match="should be a float tensor"): + F.normalize_image(make_image(dtype=torch.uint8), mean=self.MEAN, std=self.STD) + + with pytest.raises(ValueError, match="tensor image of size"): + F.normalize_image(torch.rand(16, 16, dtype=torch.float32), mean=self.MEAN, std=self.STD) + + for std in [0, [0, 0, 0], [0, 1, 1]]: + with pytest.raises(ValueError, match="std evaluated to zero, leading to division by zero"): + F.normalize_image(make_image(dtype=torch.float32), mean=self.MEAN, std=std) + + def _sample_input_adapter(self, transform, input, device): + adapted_input = {} + for key, value in input.items(): + if isinstance(value, PIL.Image.Image): + # normalize doesn't support PIL images + continue + elif check_type(value, (is_pure_tensor, tv_tensors.Image, tv_tensors.Video)): + # normalize doesn't support integer images + value = F.to_dtype(value, torch.float32, scale=True) + adapted_input[key] = value + return adapted_input + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_video]) + def test_transform(self, make_input): + check_transform( + transforms.Normalize(mean=self.MEAN, std=self.STD), + make_input(dtype=torch.float32), + check_sample_input=self._sample_input_adapter, + ) + + def _reference_normalize_image(self, image, *, mean, std): + image = image.numpy() + mean, std = [np.array(stat, dtype=image.dtype).reshape((-1, 1, 1)) for stat in [mean, std]] + return tv_tensors.Image((image - mean) / std) + + @pytest.mark.parametrize(("mean", "std"), MEANS_STDS) + @pytest.mark.parametrize("dtype", [torch.float16, torch.float32, torch.float64]) + @pytest.mark.parametrize("fn", [F.normalize, transform_cls_to_functional(transforms.Normalize)]) + def test_correctness_image(self, mean, std, dtype, fn): + image = make_image(dtype=dtype) + + actual = fn(image, mean=mean, std=std) + expected = self._reference_normalize_image(image, mean=mean, std=std) + + assert_equal(actual, expected) + + +class TestClampBoundingBoxes: + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + @pytest.mark.parametrize("dtype", [torch.int64, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel(self, format, dtype, device): + bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device) + check_kernel( + F.clamp_bounding_boxes, + bounding_boxes, + format=bounding_boxes.format, + canvas_size=bounding_boxes.canvas_size, + ) + + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + def test_functional(self, format): + check_functional(F.clamp_bounding_boxes, make_bounding_boxes(format=format)) + + def test_errors(self): + input_tv_tensor = make_bounding_boxes() + input_pure_tensor = input_tv_tensor.as_subclass(torch.Tensor) + format, canvas_size = input_tv_tensor.format, input_tv_tensor.canvas_size + + for format_, canvas_size_ in [(None, None), (format, None), (None, canvas_size)]: + with pytest.raises( + ValueError, match="For pure tensor inputs, `format` and `canvas_size` have to be passed." + ): + F.clamp_bounding_boxes(input_pure_tensor, format=format_, canvas_size=canvas_size_) + + for format_, canvas_size_ in [(format, canvas_size), (format, None), (None, canvas_size)]: + with pytest.raises( + ValueError, match="For bounding box tv_tensor inputs, `format` and `canvas_size` must not be passed." + ): + F.clamp_bounding_boxes(input_tv_tensor, format=format_, canvas_size=canvas_size_) + + def test_transform(self): + check_transform(transforms.ClampBoundingBoxes(), make_bounding_boxes()) + + +class TestInvert: + @pytest.mark.parametrize("dtype", [torch.uint8, torch.int16, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, dtype, device): + check_kernel(F.invert_image, make_image(dtype=dtype, device=device)) + + def test_kernel_video(self): + check_kernel(F.invert_video, make_video()) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_pil, make_video]) + def test_functional(self, make_input): + check_functional(F.invert, make_input()) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.invert_image, torch.Tensor), + (F._color._invert_image_pil, PIL.Image.Image), + (F.invert_image, tv_tensors.Image), + (F.invert_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.invert, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_video]) + def test_transform(self, make_input): + check_transform(transforms.RandomInvert(p=1), make_input()) + + @pytest.mark.parametrize("fn", [F.invert, transform_cls_to_functional(transforms.RandomInvert, p=1)]) + def test_correctness_image(self, fn): + image = make_image(dtype=torch.uint8, device="cpu") + + actual = fn(image) + expected = F.to_image(F.invert(F.to_pil_image(image))) + + assert_equal(actual, expected) + + +class TestPosterize: + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, dtype, device): + check_kernel(F.posterize_image, make_image(dtype=dtype, device=device), bits=1) + + def test_kernel_video(self): + check_kernel(F.posterize_video, make_video(), bits=1) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_pil, make_video]) + def test_functional(self, make_input): + check_functional(F.posterize, make_input(), bits=1) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.posterize_image, torch.Tensor), + (F._color._posterize_image_pil, PIL.Image.Image), + (F.posterize_image, tv_tensors.Image), + (F.posterize_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.posterize, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_video]) + def test_transform(self, make_input): + check_transform(transforms.RandomPosterize(bits=1, p=1), make_input()) + + @pytest.mark.parametrize("bits", [1, 4, 8]) + @pytest.mark.parametrize("fn", [F.posterize, transform_cls_to_functional(transforms.RandomPosterize, p=1)]) + def test_correctness_image(self, bits, fn): + image = make_image(dtype=torch.uint8, device="cpu") + + actual = fn(image, bits=bits) + expected = F.to_image(F.posterize(F.to_pil_image(image), bits=bits)) + + assert_equal(actual, expected) + + +class TestSolarize: + def _make_threshold(self, input, *, factor=0.5): + dtype = input.dtype if isinstance(input, torch.Tensor) else torch.uint8 + return (float if dtype.is_floating_point else int)(get_max_value(dtype) * factor) + + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, dtype, device): + image = make_image(dtype=dtype, device=device) + check_kernel(F.solarize_image, image, threshold=self._make_threshold(image)) + + def test_kernel_video(self): + video = make_video() + check_kernel(F.solarize_video, video, threshold=self._make_threshold(video)) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_pil, make_video]) + def test_functional(self, make_input): + input = make_input() + check_functional(F.solarize, input, threshold=self._make_threshold(input)) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.solarize_image, torch.Tensor), + (F._color._solarize_image_pil, PIL.Image.Image), + (F.solarize_image, tv_tensors.Image), + (F.solarize_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.solarize, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize(("dtype", "threshold"), [(torch.uint8, 256), (torch.float, 1.5)]) + def test_functional_error(self, dtype, threshold): + with pytest.raises(TypeError, match="Threshold should be less or equal the maximum value of the dtype"): + F.solarize(make_image(dtype=dtype), threshold=threshold) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_video]) + def test_transform(self, make_input): + input = make_input() + check_transform(transforms.RandomSolarize(threshold=self._make_threshold(input), p=1), input) + + @pytest.mark.parametrize("threshold_factor", [0.0, 0.1, 0.5, 0.9, 1.0]) + @pytest.mark.parametrize("fn", [F.solarize, transform_cls_to_functional(transforms.RandomSolarize, p=1)]) + def test_correctness_image(self, threshold_factor, fn): + image = make_image(dtype=torch.uint8, device="cpu") + threshold = self._make_threshold(image, factor=threshold_factor) + + actual = fn(image, threshold=threshold) + expected = F.to_image(F.solarize(F.to_pil_image(image), threshold=threshold)) + + assert_equal(actual, expected) + + +class TestAutocontrast: + @pytest.mark.parametrize("dtype", [torch.uint8, torch.int16, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, dtype, device): + check_kernel(F.autocontrast_image, make_image(dtype=dtype, device=device)) + + def test_kernel_video(self): + check_kernel(F.autocontrast_video, make_video()) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_pil, make_video]) + def test_functional(self, make_input): + check_functional(F.autocontrast, make_input()) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.autocontrast_image, torch.Tensor), + (F._color._autocontrast_image_pil, PIL.Image.Image), + (F.autocontrast_image, tv_tensors.Image), + (F.autocontrast_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.autocontrast, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_video]) + def test_transform(self, make_input): + check_transform(transforms.RandomAutocontrast(p=1), make_input(), check_v1_compatibility=dict(rtol=0, atol=1)) + + @pytest.mark.parametrize("fn", [F.autocontrast, transform_cls_to_functional(transforms.RandomAutocontrast, p=1)]) + def test_correctness_image(self, fn): + image = make_image(dtype=torch.uint8, device="cpu") + + actual = fn(image) + expected = F.to_image(F.autocontrast(F.to_pil_image(image))) + + assert_close(actual, expected, rtol=0, atol=1) + + +class TestAdjustSharpness: + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, dtype, device): + check_kernel(F.adjust_sharpness_image, make_image(dtype=dtype, device=device), sharpness_factor=0.5) + + def test_kernel_video(self): + check_kernel(F.adjust_sharpness_video, make_video(), sharpness_factor=0.5) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_pil, make_video]) + def test_functional(self, make_input): + check_functional(F.adjust_sharpness, make_input(), sharpness_factor=0.5) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.adjust_sharpness_image, torch.Tensor), + (F._color._adjust_sharpness_image_pil, PIL.Image.Image), + (F.adjust_sharpness_image, tv_tensors.Image), + (F.adjust_sharpness_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.adjust_sharpness, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_video]) + def test_transform(self, make_input): + check_transform(transforms.RandomAdjustSharpness(sharpness_factor=0.5, p=1), make_input()) + + def test_functional_error(self): + with pytest.raises(TypeError, match="can have 1 or 3 channels"): + F.adjust_sharpness(make_image(color_space="RGBA"), sharpness_factor=0.5) + + with pytest.raises(ValueError, match="is not non-negative"): + F.adjust_sharpness(make_image(), sharpness_factor=-1) + + @pytest.mark.parametrize("sharpness_factor", [0.1, 0.5, 1.0]) + @pytest.mark.parametrize( + "fn", [F.adjust_sharpness, transform_cls_to_functional(transforms.RandomAdjustSharpness, p=1)] + ) + def test_correctness_image(self, sharpness_factor, fn): + image = make_image(dtype=torch.uint8, device="cpu") + + actual = fn(image, sharpness_factor=sharpness_factor) + expected = F.to_image(F.adjust_sharpness(F.to_pil_image(image), sharpness_factor=sharpness_factor)) + + assert_equal(actual, expected) + + +class TestAdjustContrast: + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, dtype, device): + check_kernel(F.adjust_contrast_image, make_image(dtype=dtype, device=device), contrast_factor=0.5) + + def test_kernel_video(self): + check_kernel(F.adjust_contrast_video, make_video(), contrast_factor=0.5) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_pil, make_video]) + def test_functional(self, make_input): + check_functional(F.adjust_contrast, make_input(), contrast_factor=0.5) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.adjust_contrast_image, torch.Tensor), + (F._color._adjust_contrast_image_pil, PIL.Image.Image), + (F.adjust_contrast_image, tv_tensors.Image), + (F.adjust_contrast_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.adjust_contrast, kernel=kernel, input_type=input_type) + + def test_functional_error(self): + with pytest.raises(TypeError, match="permitted channel values are 1 or 3"): + F.adjust_contrast(make_image(color_space="RGBA"), contrast_factor=0.5) + + with pytest.raises(ValueError, match="is not non-negative"): + F.adjust_contrast(make_image(), contrast_factor=-1) + + @pytest.mark.parametrize("contrast_factor", [0.1, 0.5, 1.0]) + def test_correctness_image(self, contrast_factor): + image = make_image(dtype=torch.uint8, device="cpu") + + actual = F.adjust_contrast(image, contrast_factor=contrast_factor) + expected = F.to_image(F.adjust_contrast(F.to_pil_image(image), contrast_factor=contrast_factor)) + + assert_close(actual, expected, rtol=0, atol=1) + + +class TestAdjustGamma: + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, dtype, device): + check_kernel(F.adjust_gamma_image, make_image(dtype=dtype, device=device), gamma=0.5) + + def test_kernel_video(self): + check_kernel(F.adjust_gamma_video, make_video(), gamma=0.5) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_pil, make_video]) + def test_functional(self, make_input): + check_functional(F.adjust_gamma, make_input(), gamma=0.5) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.adjust_gamma_image, torch.Tensor), + (F._color._adjust_gamma_image_pil, PIL.Image.Image), + (F.adjust_gamma_image, tv_tensors.Image), + (F.adjust_gamma_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.adjust_gamma, kernel=kernel, input_type=input_type) + + def test_functional_error(self): + with pytest.raises(ValueError, match="Gamma should be a non-negative real number"): + F.adjust_gamma(make_image(), gamma=-1) + + @pytest.mark.parametrize("gamma", [0.1, 0.5, 1.0]) + @pytest.mark.parametrize("gain", [0.1, 1.0, 2.0]) + def test_correctness_image(self, gamma, gain): + image = make_image(dtype=torch.uint8, device="cpu") + + actual = F.adjust_gamma(image, gamma=gamma, gain=gain) + expected = F.to_image(F.adjust_gamma(F.to_pil_image(image), gamma=gamma, gain=gain)) + + assert_equal(actual, expected) + + +class TestAdjustHue: + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, dtype, device): + check_kernel(F.adjust_hue_image, make_image(dtype=dtype, device=device), hue_factor=0.25) + + def test_kernel_video(self): + check_kernel(F.adjust_hue_video, make_video(), hue_factor=0.25) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_pil, make_video]) + def test_functional(self, make_input): + check_functional(F.adjust_hue, make_input(), hue_factor=0.25) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.adjust_hue_image, torch.Tensor), + (F._color._adjust_hue_image_pil, PIL.Image.Image), + (F.adjust_hue_image, tv_tensors.Image), + (F.adjust_hue_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.adjust_hue, kernel=kernel, input_type=input_type) + + def test_functional_error(self): + with pytest.raises(TypeError, match="permitted channel values are 1 or 3"): + F.adjust_hue(make_image(color_space="RGBA"), hue_factor=0.25) + + for hue_factor in [-1, 1]: + with pytest.raises(ValueError, match=re.escape("is not in [-0.5, 0.5]")): + F.adjust_hue(make_image(), hue_factor=hue_factor) + + @pytest.mark.parametrize("hue_factor", [-0.5, -0.3, 0.0, 0.2, 0.5]) + def test_correctness_image(self, hue_factor): + image = make_image(dtype=torch.uint8, device="cpu") + + actual = F.adjust_hue(image, hue_factor=hue_factor) + expected = F.to_image(F.adjust_hue(F.to_pil_image(image), hue_factor=hue_factor)) + + mae = (actual.float() - expected.float()).abs().mean() + assert mae < 2 + + +class TestAdjustSaturation: + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, dtype, device): + check_kernel(F.adjust_saturation_image, make_image(dtype=dtype, device=device), saturation_factor=0.5) + + def test_kernel_video(self): + check_kernel(F.adjust_saturation_video, make_video(), saturation_factor=0.5) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_pil, make_video]) + def test_functional(self, make_input): + check_functional(F.adjust_saturation, make_input(), saturation_factor=0.5) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.adjust_saturation_image, torch.Tensor), + (F._color._adjust_saturation_image_pil, PIL.Image.Image), + (F.adjust_saturation_image, tv_tensors.Image), + (F.adjust_saturation_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.adjust_saturation, kernel=kernel, input_type=input_type) + + def test_functional_error(self): + with pytest.raises(TypeError, match="permitted channel values are 1 or 3"): + F.adjust_saturation(make_image(color_space="RGBA"), saturation_factor=0.5) + + with pytest.raises(ValueError, match="is not non-negative"): + F.adjust_saturation(make_image(), saturation_factor=-1) + + @pytest.mark.parametrize("saturation_factor", [0.1, 0.5, 1.0]) + def test_correctness_image(self, saturation_factor): + image = make_image(dtype=torch.uint8, device="cpu") + + actual = F.adjust_saturation(image, saturation_factor=saturation_factor) + expected = F.to_image(F.adjust_saturation(F.to_pil_image(image), saturation_factor=saturation_factor)) + + assert_close(actual, expected, rtol=0, atol=1) + + +class TestFiveTenCrop: + INPUT_SIZE = (17, 11) + OUTPUT_SIZE = (3, 5) + + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("kernel", [F.five_crop_image, F.ten_crop_image]) + def test_kernel_image(self, dtype, device, kernel): + check_kernel( + kernel, + make_image(self.INPUT_SIZE, dtype=dtype, device=device), + size=self.OUTPUT_SIZE, + check_batched_vs_unbatched=False, + ) + + @pytest.mark.parametrize("kernel", [F.five_crop_video, F.ten_crop_video]) + def test_kernel_video(self, kernel): + check_kernel(kernel, make_video(self.INPUT_SIZE), size=self.OUTPUT_SIZE, check_batched_vs_unbatched=False) + + def _functional_wrapper(self, fn): + # This wrapper is needed to make five_crop / ten_crop compatible with check_functional, since that requires a + # single output rather than a sequence. + @functools.wraps(fn) + def wrapper(*args, **kwargs): + outputs = fn(*args, **kwargs) + return outputs[0] + + return wrapper + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_video], + ) + @pytest.mark.parametrize("functional", [F.five_crop, F.ten_crop]) + def test_functional(self, make_input, functional): + check_functional( + self._functional_wrapper(functional), + make_input(self.INPUT_SIZE), + size=self.OUTPUT_SIZE, + check_scripted_smoke=False, + ) + + @pytest.mark.parametrize( + ("functional", "kernel", "input_type"), + [ + (F.five_crop, F.five_crop_image, torch.Tensor), + (F.five_crop, F._geometry._five_crop_image_pil, PIL.Image.Image), + (F.five_crop, F.five_crop_image, tv_tensors.Image), + (F.five_crop, F.five_crop_video, tv_tensors.Video), + (F.ten_crop, F.ten_crop_image, torch.Tensor), + (F.ten_crop, F._geometry._ten_crop_image_pil, PIL.Image.Image), + (F.ten_crop, F.ten_crop_image, tv_tensors.Image), + (F.ten_crop, F.ten_crop_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, functional, kernel, input_type): + check_functional_kernel_signature_match(functional, kernel=kernel, input_type=input_type) + + class _TransformWrapper(nn.Module): + # This wrapper is needed to make FiveCrop / TenCrop compatible with check_transform, since that requires a + # single output rather than a sequence. + _v1_transform_cls = None + + def _extract_params_for_v1_transform(self): + return dict(five_ten_crop_transform=self.five_ten_crop_transform) + + def __init__(self, five_ten_crop_transform): + super().__init__() + type(self)._v1_transform_cls = type(self) + self.five_ten_crop_transform = five_ten_crop_transform + + def forward(self, input: torch.Tensor) -> torch.Tensor: + outputs = self.five_ten_crop_transform(input) + return outputs[0] + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_video], + ) + @pytest.mark.parametrize("transform_cls", [transforms.FiveCrop, transforms.TenCrop]) + def test_transform(self, make_input, transform_cls): + check_transform( + self._TransformWrapper(transform_cls(size=self.OUTPUT_SIZE)), + make_input(self.INPUT_SIZE), + check_sample_input=False, + ) + + @pytest.mark.parametrize("make_input", [make_bounding_boxes, make_detection_masks]) + @pytest.mark.parametrize("transform_cls", [transforms.FiveCrop, transforms.TenCrop]) + def test_transform_error(self, make_input, transform_cls): + transform = transform_cls(size=self.OUTPUT_SIZE) + + with pytest.raises(TypeError, match="not supported"): + transform(make_input(self.INPUT_SIZE)) + + @pytest.mark.parametrize("fn", [F.five_crop, transform_cls_to_functional(transforms.FiveCrop)]) + def test_correctness_image_five_crop(self, fn): + image = make_image(self.INPUT_SIZE, dtype=torch.uint8, device="cpu") + + actual = fn(image, size=self.OUTPUT_SIZE) + expected = F.five_crop(F.to_pil_image(image), size=self.OUTPUT_SIZE) + + assert isinstance(actual, tuple) + assert_equal(actual, [F.to_image(e) for e in expected]) + + @pytest.mark.parametrize("fn_or_class", [F.ten_crop, transforms.TenCrop]) + @pytest.mark.parametrize("vertical_flip", [False, True]) + def test_correctness_image_ten_crop(self, fn_or_class, vertical_flip): + if fn_or_class is transforms.TenCrop: + fn = transform_cls_to_functional(fn_or_class, size=self.OUTPUT_SIZE, vertical_flip=vertical_flip) + kwargs = dict() + else: + fn = fn_or_class + kwargs = dict(size=self.OUTPUT_SIZE, vertical_flip=vertical_flip) + + image = make_image(self.INPUT_SIZE, dtype=torch.uint8, device="cpu") + + actual = fn(image, **kwargs) + expected = F.ten_crop(F.to_pil_image(image), size=self.OUTPUT_SIZE, vertical_flip=vertical_flip) + + assert isinstance(actual, tuple) + assert_equal(actual, [F.to_image(e) for e in expected]) + + +class TestColorJitter: + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_video], + ) + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_transform(self, make_input, dtype, device): + if make_input is make_image_pil and not (dtype is torch.uint8 and device == "cpu"): + pytest.skip( + "PIL image tests with parametrization other than dtype=torch.uint8 and device='cpu' " + "will degenerate to that anyway." + ) + + check_transform( + transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.25), + make_input(dtype=dtype, device=device), + ) + + def test_transform_noop(self): + input = make_image() + input_version = input._version + + transform = transforms.ColorJitter() + output = transform(input) + + assert output is input + assert output.data_ptr() == input.data_ptr() + assert output._version == input_version + + def test_transform_error(self): + with pytest.raises(ValueError, match="must be non negative"): + transforms.ColorJitter(brightness=-1) + + for brightness in [object(), [1, 2, 3]]: + with pytest.raises(TypeError, match="single number or a sequence with length 2"): + transforms.ColorJitter(brightness=brightness) + + with pytest.raises(ValueError, match="values should be between"): + transforms.ColorJitter(brightness=(-1, 0.5)) + + with pytest.raises(ValueError, match="values should be between"): + transforms.ColorJitter(hue=1) + + @pytest.mark.parametrize("brightness", [None, 0.1, (0.2, 0.3)]) + @pytest.mark.parametrize("contrast", [None, 0.4, (0.5, 0.6)]) + @pytest.mark.parametrize("saturation", [None, 0.7, (0.8, 0.9)]) + @pytest.mark.parametrize("hue", [None, 0.3, (-0.1, 0.2)]) + def test_transform_correctness(self, brightness, contrast, saturation, hue): + image = make_image(dtype=torch.uint8, device="cpu") + + transform = transforms.ColorJitter(brightness=brightness, contrast=contrast, saturation=saturation, hue=hue) + + with freeze_rng_state(): + torch.manual_seed(0) + actual = transform(image) + + torch.manual_seed(0) + expected = F.to_image(transform(F.to_pil_image(image))) + + mae = (actual.float() - expected.float()).abs().mean() + assert mae < 2 + + +class TestRgbToGrayscale: + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, dtype, device): + check_kernel(F.rgb_to_grayscale_image, make_image(dtype=dtype, device=device)) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image]) + def test_functional(self, make_input): + check_functional(F.rgb_to_grayscale, make_input()) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.rgb_to_grayscale_image, torch.Tensor), + (F._color._rgb_to_grayscale_image_pil, PIL.Image.Image), + (F.rgb_to_grayscale_image, tv_tensors.Image), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.rgb_to_grayscale, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize("transform", [transforms.Grayscale(), transforms.RandomGrayscale(p=1)]) + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image]) + def test_transform(self, transform, make_input): + check_transform(transform, make_input()) + + @pytest.mark.parametrize("num_output_channels", [1, 3]) + @pytest.mark.parametrize("color_space", ["RGB", "GRAY"]) + @pytest.mark.parametrize("fn", [F.rgb_to_grayscale, transform_cls_to_functional(transforms.Grayscale)]) + def test_image_correctness(self, num_output_channels, color_space, fn): + image = make_image(dtype=torch.uint8, device="cpu", color_space=color_space) + + actual = fn(image, num_output_channels=num_output_channels) + expected = F.to_image(F.rgb_to_grayscale(F.to_pil_image(image), num_output_channels=num_output_channels)) + + assert_equal(actual, expected, rtol=0, atol=1) + + def test_expanded_channels_are_not_views_into_the_same_underlying_tensor(self): + image = make_image(dtype=torch.uint8, device="cpu", color_space="GRAY") + + output_image = F.rgb_to_grayscale(image, num_output_channels=3) + assert_equal(output_image[0][0][0], output_image[1][0][0]) + output_image[0][0][0] = output_image[0][0][0] + 1 + assert output_image[0][0][0] != output_image[1][0][0] + + @pytest.mark.parametrize("num_input_channels", [1, 3]) + def test_random_transform_correctness(self, num_input_channels): + image = make_image( + color_space={ + 1: "GRAY", + 3: "RGB", + }[num_input_channels], + dtype=torch.uint8, + device="cpu", + ) + + transform = transforms.RandomGrayscale(p=1) + + actual = transform(image) + expected = F.to_image(F.rgb_to_grayscale(F.to_pil_image(image), num_output_channels=num_input_channels)) + + assert_equal(actual, expected, rtol=0, atol=1) + + +class TestGrayscaleToRgb: + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, dtype, device): + check_kernel(F.grayscale_to_rgb_image, make_image(dtype=dtype, device=device)) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image]) + def test_functional(self, make_input): + check_functional(F.grayscale_to_rgb, make_input()) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.rgb_to_grayscale_image, torch.Tensor), + (F._color._rgb_to_grayscale_image_pil, PIL.Image.Image), + (F.rgb_to_grayscale_image, tv_tensors.Image), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.grayscale_to_rgb, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image]) + def test_transform(self, make_input): + check_transform(transforms.RGB(), make_input(color_space="GRAY")) + + @pytest.mark.parametrize("fn", [F.grayscale_to_rgb, transform_cls_to_functional(transforms.RGB)]) + def test_image_correctness(self, fn): + image = make_image(dtype=torch.uint8, device="cpu", color_space="GRAY") + + actual = fn(image) + expected = F.to_image(F.grayscale_to_rgb(F.to_pil_image(image))) + + assert_equal(actual, expected, rtol=0, atol=1) + + def test_expanded_channels_are_not_views_into_the_same_underlying_tensor(self): + image = make_image(dtype=torch.uint8, device="cpu", color_space="GRAY") + + output_image = F.grayscale_to_rgb(image) + assert_equal(output_image[0][0][0], output_image[1][0][0]) + output_image[0][0][0] = output_image[0][0][0] + 1 + assert output_image[0][0][0] != output_image[1][0][0] + + def test_rgb_image_is_unchanged(self): + image = make_image(dtype=torch.uint8, device="cpu", color_space="RGB") + assert_equal(image.shape[-3], 3) + assert_equal(F.grayscale_to_rgb(image), image) + + +class TestRandomZoomOut: + # Tests are light because this largely relies on the already tested `pad` kernels. + + @pytest.mark.parametrize( + "make_input", + [ + make_image_tensor, + make_image_pil, + make_image, + make_bounding_boxes, + make_segmentation_mask, + make_detection_masks, + make_video, + ], + ) + def test_transform(self, make_input): + check_transform(transforms.RandomZoomOut(p=1), make_input()) + + def test_transform_error(self): + for side_range in [None, 1, [1, 2, 3]]: + with pytest.raises( + ValueError if isinstance(side_range, list) else TypeError, match="should be a sequence of length 2" + ): + transforms.RandomZoomOut(side_range=side_range) + + for side_range in [[0.5, 1.5], [2.0, 1.0]]: + with pytest.raises(ValueError, match="Invalid side range"): + transforms.RandomZoomOut(side_range=side_range) + + @pytest.mark.parametrize("side_range", [(1.0, 4.0), [2.0, 5.0]]) + @pytest.mark.parametrize( + "make_input", + [ + make_image_tensor, + make_image_pil, + make_image, + make_bounding_boxes, + make_segmentation_mask, + make_detection_masks, + make_video, + ], + ) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_transform_params_correctness(self, side_range, make_input, device): + if make_input is make_image_pil and device != "cpu": + pytest.skip("PIL image tests with parametrization device!='cpu' will degenerate to that anyway.") + + transform = transforms.RandomZoomOut(side_range=side_range) + + input = make_input() + height, width = F.get_size(input) + + params = transform.make_params([input]) + assert "padding" in params + + padding = params["padding"] + assert len(padding) == 4 + + assert 0 <= padding[0] <= (side_range[1] - 1) * width + assert 0 <= padding[1] <= (side_range[1] - 1) * height + assert 0 <= padding[2] <= (side_range[1] - 1) * width + assert 0 <= padding[3] <= (side_range[1] - 1) * height + + +class TestRandomPhotometricDistort: + # Tests are light because this largely relies on the already tested + # `adjust_{brightness,contrast,saturation,hue}` and `permute_channels` kernels. + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_video], + ) + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_transform(self, make_input, dtype, device): + if make_input is make_image_pil and not (dtype is torch.uint8 and device == "cpu"): + pytest.skip( + "PIL image tests with parametrization other than dtype=torch.uint8 and device='cpu' " + "will degenerate to that anyway." + ) + + check_transform( + transforms.RandomPhotometricDistort( + brightness=(0.3, 0.4), contrast=(0.5, 0.6), saturation=(0.7, 0.8), hue=(-0.1, 0.2), p=1 + ), + make_input(dtype=dtype, device=device), + ) + + +class TestScaleJitter: + # Tests are light because this largely relies on the already tested `resize` kernels. + + INPUT_SIZE = (17, 11) + TARGET_SIZE = (12, 13) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_transform(self, make_input, device): + if make_input is make_image_pil and device != "cpu": + pytest.skip("PIL image tests with parametrization device!='cpu' will degenerate to that anyway.") + + check_transform(transforms.ScaleJitter(self.TARGET_SIZE), make_input(self.INPUT_SIZE, device=device)) + + def test_make_params(self): + input_size = self.INPUT_SIZE + target_size = self.TARGET_SIZE + scale_range = (0.5, 1.5) + + transform = transforms.ScaleJitter(target_size=target_size, scale_range=scale_range) + params = transform.make_params([make_image(input_size)]) + + assert "size" in params + size = params["size"] + + assert isinstance(size, tuple) and len(size) == 2 + height, width = size + + r_min = min(target_size[1] / input_size[0], target_size[0] / input_size[1]) * scale_range[0] + r_max = min(target_size[1] / input_size[0], target_size[0] / input_size[1]) * scale_range[1] + + assert int(input_size[0] * r_min) <= height <= int(input_size[0] * r_max) + assert int(input_size[1] * r_min) <= width <= int(input_size[1] * r_max) + + +class TestLinearTransform: + def _make_matrix_and_vector(self, input, *, device=None): + device = device or input.device + numel = math.prod(F.get_dimensions(input)) + transformation_matrix = torch.randn((numel, numel), device=device) + mean_vector = torch.randn((numel,), device=device) + return transformation_matrix, mean_vector + + def _sample_input_adapter(self, transform, input, device): + return {key: value for key, value in input.items() if not isinstance(value, PIL.Image.Image)} + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_video]) + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_transform(self, make_input, dtype, device): + input = make_input(dtype=dtype, device=device) + check_transform( + transforms.LinearTransformation(*self._make_matrix_and_vector(input)), + input, + check_sample_input=self._sample_input_adapter, + # Compat check is failing on M1 with: + # AssertionError: Tensor-likes are not close! + # Mismatched elements: 1 / 561 (0.2%) + # See https://github.com/pytorch/vision/issues/8453 + check_v1_compatibility=(sys.platform != "darwin"), + ) + + def test_transform_error(self): + with pytest.raises(ValueError, match="transformation_matrix should be square"): + transforms.LinearTransformation(transformation_matrix=torch.rand(2, 3), mean_vector=torch.rand(2)) + + with pytest.raises(ValueError, match="mean_vector should have the same length"): + transforms.LinearTransformation(transformation_matrix=torch.rand(2, 2), mean_vector=torch.rand(1)) + + for matrix_dtype, vector_dtype in [(torch.float32, torch.float64), (torch.float64, torch.float32)]: + with pytest.raises(ValueError, match="Input tensors should have the same dtype"): + transforms.LinearTransformation( + transformation_matrix=torch.rand(2, 2, dtype=matrix_dtype), + mean_vector=torch.rand(2, dtype=vector_dtype), + ) + + image = make_image() + transform = transforms.LinearTransformation(transformation_matrix=torch.rand(2, 2), mean_vector=torch.rand(2)) + with pytest.raises(ValueError, match="Input tensor and transformation matrix have incompatible shape"): + transform(image) + + transform = transforms.LinearTransformation(*self._make_matrix_and_vector(image)) + with pytest.raises(TypeError, match="does not support PIL images"): + transform(F.to_pil_image(image)) + + @needs_cuda + def test_transform_error_cuda(self): + for matrix_device, vector_device in [("cuda", "cpu"), ("cpu", "cuda")]: + with pytest.raises(ValueError, match="Input tensors should be on the same device"): + transforms.LinearTransformation( + transformation_matrix=torch.rand(2, 2, device=matrix_device), + mean_vector=torch.rand(2, device=vector_device), + ) + + for input_device, param_device in [("cuda", "cpu"), ("cpu", "cuda")]: + input = make_image(device=input_device) + transform = transforms.LinearTransformation(*self._make_matrix_and_vector(input, device=param_device)) + with pytest.raises( + ValueError, match="Input tensor should be on the same device as transformation matrix and mean vector" + ): + transform(input) + + +def make_image_numpy(*args, **kwargs): + image = make_image_tensor(*args, **kwargs) + return image.permute((1, 2, 0)).numpy() + + +class TestToImage: + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_image_numpy]) + @pytest.mark.parametrize("fn", [F.to_image, transform_cls_to_functional(transforms.ToImage)]) + def test_functional_and_transform(self, make_input, fn): + input = make_input() + output = fn(input) + + assert isinstance(output, tv_tensors.Image) + + input_size = list(input.shape[:2]) if isinstance(input, np.ndarray) else F.get_size(input) + assert F.get_size(output) == input_size + + if isinstance(input, torch.Tensor): + assert output.data_ptr() == input.data_ptr() + + def test_2d_np_array(self): + # Non-regression test for https://github.com/pytorch/vision/issues/8255 + input = np.random.rand(10, 10) + assert F.to_image(input).shape == (1, 10, 10) + + def test_functional_error(self): + with pytest.raises(TypeError, match="Input can either be a pure Tensor, a numpy array, or a PIL image"): + F.to_image(object()) + + +class TestToPILImage: + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_numpy]) + @pytest.mark.parametrize("color_space", ["RGB", "GRAY"]) + @pytest.mark.parametrize("fn", [F.to_pil_image, transform_cls_to_functional(transforms.ToPILImage)]) + def test_functional_and_transform(self, make_input, color_space, fn): + input = make_input(color_space=color_space) + output = fn(input) + + assert isinstance(output, PIL.Image.Image) + + input_size = list(input.shape[:2]) if isinstance(input, np.ndarray) else F.get_size(input) + assert F.get_size(output) == input_size + + def test_functional_error(self): + with pytest.raises(TypeError, match="pic should be Tensor or ndarray"): + F.to_pil_image(object()) + + for ndim in [1, 4]: + with pytest.raises(ValueError, match="pic should be 2/3 dimensional"): + F.to_pil_image(torch.empty(*[1] * ndim)) + + with pytest.raises(ValueError, match="pic should not have > 4 channels"): + num_channels = 5 + F.to_pil_image(torch.empty(num_channels, 1, 1)) + + +class TestToTensor: + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_image_numpy]) + def test_smoke(self, make_input): + with pytest.warns(UserWarning, match="deprecated and will be removed"): + transform = transforms.ToTensor() + + input = make_input() + output = transform(input) + + input_size = list(input.shape[:2]) if isinstance(input, np.ndarray) else F.get_size(input) + assert F.get_size(output) == input_size + + +class TestPILToTensor: + @pytest.mark.parametrize("color_space", ["RGB", "GRAY"]) + @pytest.mark.parametrize("fn", [F.pil_to_tensor, transform_cls_to_functional(transforms.PILToTensor)]) + def test_functional_and_transform(self, color_space, fn): + input = make_image_pil(color_space=color_space) + output = fn(input) + + assert isinstance(output, torch.Tensor) and not isinstance(output, tv_tensors.TVTensor) + assert F.get_size(output) == F.get_size(input) + + def test_functional_error(self): + with pytest.raises(TypeError, match="pic should be PIL Image"): + F.pil_to_tensor(object()) + + +class TestLambda: + @pytest.mark.parametrize("input", [object(), torch.empty(()), np.empty(()), "string", 1, 0.0]) + @pytest.mark.parametrize("types", [(), (torch.Tensor, np.ndarray)]) + def test_transform(self, input, types): + was_applied = False + + def was_applied_fn(input): + nonlocal was_applied + was_applied = True + return input + + transform = transforms.Lambda(was_applied_fn, *types) + output = transform(input) + + assert output is input + assert was_applied is (not types or isinstance(input, types)) + + +@pytest.mark.parametrize( + ("alias", "target"), + [ + pytest.param(alias, target, id=alias.__name__) + for alias, target in [ + (F.hflip, F.horizontal_flip), + (F.vflip, F.vertical_flip), + (F.get_image_num_channels, F.get_num_channels), + (F.to_pil_image, F.to_pil_image), + (F.elastic_transform, F.elastic), + (F.to_grayscale, F.rgb_to_grayscale), + ] + ], +) +def test_alias(alias, target): + assert alias is target + + +@pytest.mark.parametrize( + "make_inputs", + itertools.permutations( + [ + make_image_tensor, + make_image_tensor, + make_image_pil, + make_image, + make_video, + ], + 3, + ), +) +def test_pure_tensor_heuristic(make_inputs): + flat_inputs = [make_input() for make_input in make_inputs] + + def split_on_pure_tensor(to_split): + # This takes a sequence that is structurally aligned with `flat_inputs` and splits its items into three parts: + # 1. The first pure tensor. If none is present, this will be `None` + # 2. A list of the remaining pure tensors + # 3. A list of all other items + pure_tensors = [] + others = [] + # Splitting always happens on the original `flat_inputs` to avoid any erroneous type changes by the transform to + # affect the splitting. + for item, inpt in zip(to_split, flat_inputs): + (pure_tensors if is_pure_tensor(inpt) else others).append(item) + return pure_tensors[0] if pure_tensors else None, pure_tensors[1:], others + + class CopyCloneTransform(transforms.Transform): + def transform(self, inpt, params): + return inpt.clone() if isinstance(inpt, torch.Tensor) else inpt.copy() + + @staticmethod + def was_applied(output, inpt): + identity = output is inpt + if identity: + return False + + # Make sure nothing fishy is going on + assert_equal(output, inpt) + return True + + first_pure_tensor_input, other_pure_tensor_inputs, other_inputs = split_on_pure_tensor(flat_inputs) + + transform = CopyCloneTransform() + transformed_sample = transform(flat_inputs) + + first_pure_tensor_output, other_pure_tensor_outputs, other_outputs = split_on_pure_tensor(transformed_sample) + + if first_pure_tensor_input is not None: + if other_inputs: + assert not transform.was_applied(first_pure_tensor_output, first_pure_tensor_input) + else: + assert transform.was_applied(first_pure_tensor_output, first_pure_tensor_input) + + for output, inpt in zip(other_pure_tensor_outputs, other_pure_tensor_inputs): + assert not transform.was_applied(output, inpt) + + for input, output in zip(other_inputs, other_outputs): + assert transform.was_applied(output, input) + + +class TestRandomIoUCrop: + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("options", [[0.5, 0.9], [2.0]]) + def test_make_params(self, device, options): + orig_h, orig_w = size = (24, 32) + image = make_image(size) + bboxes = tv_tensors.BoundingBoxes( + torch.tensor([[1, 1, 10, 10], [20, 20, 23, 23], [1, 20, 10, 23], [20, 1, 23, 10]]), + format="XYXY", + canvas_size=size, + device=device, + ) + sample = [image, bboxes] + + transform = transforms.RandomIoUCrop(sampler_options=options) + + n_samples = 5 + for _ in range(n_samples): + + params = transform.make_params(sample) + + if options == [2.0]: + assert len(params) == 0 + return + + assert len(params["is_within_crop_area"]) > 0 + assert params["is_within_crop_area"].dtype == torch.bool + + assert int(transform.min_scale * orig_h) <= params["height"] <= int(transform.max_scale * orig_h) + assert int(transform.min_scale * orig_w) <= params["width"] <= int(transform.max_scale * orig_w) + + left, top = params["left"], params["top"] + new_h, new_w = params["height"], params["width"] + ious = box_iou( + bboxes, + torch.tensor([[left, top, left + new_w, top + new_h]], dtype=bboxes.dtype, device=bboxes.device), + ) + assert ious.max() >= options[0] or ious.max() >= options[1], f"{ious} vs {options}" + + def test__transform_empty_params(self, mocker): + transform = transforms.RandomIoUCrop(sampler_options=[2.0]) + image = tv_tensors.Image(torch.rand(1, 3, 4, 4)) + bboxes = tv_tensors.BoundingBoxes(torch.tensor([[1, 1, 2, 2]]), format="XYXY", canvas_size=(4, 4)) + label = torch.tensor([1]) + sample = [image, bboxes, label] + # Let's mock transform.make_params to control the output: + transform.make_params = mocker.MagicMock(return_value={}) + output = transform(sample) + torch.testing.assert_close(output, sample) + + def test_forward_assertion(self): + transform = transforms.RandomIoUCrop() + with pytest.raises( + TypeError, + match="requires input sample to contain tensor or PIL images and bounding boxes", + ): + transform(torch.tensor(0)) + + def test__transform(self, mocker): + transform = transforms.RandomIoUCrop() + + size = (32, 24) + image = make_image(size) + bboxes = make_bounding_boxes(format="XYXY", canvas_size=size, num_boxes=6) + masks = make_detection_masks(size, num_masks=6) + + sample = [image, bboxes, masks] + + is_within_crop_area = torch.tensor([0, 1, 0, 1, 0, 1], dtype=torch.bool) + + params = dict(top=1, left=2, height=12, width=12, is_within_crop_area=is_within_crop_area) + transform.make_params = mocker.MagicMock(return_value=params) + output = transform(sample) + + # check number of bboxes vs number of labels: + output_bboxes = output[1] + assert isinstance(output_bboxes, tv_tensors.BoundingBoxes) + assert (output_bboxes[~is_within_crop_area] == 0).all() + + output_masks = output[2] + assert isinstance(output_masks, tv_tensors.Mask) + + +class TestRandomShortestSize: + @pytest.mark.parametrize("min_size,max_size", [([5, 9], 20), ([5, 9], None)]) + def test_make_params(self, min_size, max_size): + canvas_size = (3, 10) + + transform = transforms.RandomShortestSize(min_size=min_size, max_size=max_size, antialias=True) + + sample = make_image(canvas_size) + params = transform.make_params([sample]) + + assert "size" in params + size = params["size"] + + assert isinstance(size, tuple) and len(size) == 2 + + longer = max(size) + shorter = min(size) + if max_size is not None: + assert longer <= max_size + assert shorter <= max_size + else: + assert shorter in min_size + + +class TestRandomResize: + def test_make_params(self): + min_size = 3 + max_size = 6 + + transform = transforms.RandomResize(min_size=min_size, max_size=max_size, antialias=True) + + for _ in range(10): + params = transform.make_params([]) + + assert isinstance(params["size"], list) and len(params["size"]) == 1 + size = params["size"][0] + + assert min_size <= size < max_size + + +@pytest.mark.parametrize("image_type", (PIL.Image, torch.Tensor, tv_tensors.Image)) +@pytest.mark.parametrize("label_type", (torch.Tensor, int)) +@pytest.mark.parametrize("dataset_return_type", (dict, tuple)) +@pytest.mark.parametrize("to_tensor", (transforms.ToTensor, transforms.ToImage)) +def test_classification_preset(image_type, label_type, dataset_return_type, to_tensor): + + image = tv_tensors.Image(torch.randint(0, 256, size=(1, 3, 250, 250), dtype=torch.uint8)) + if image_type is PIL.Image: + image = to_pil_image(image[0]) + elif image_type is torch.Tensor: + image = image.as_subclass(torch.Tensor) + assert is_pure_tensor(image) + + label = 1 if label_type is int else torch.tensor([1]) + + if dataset_return_type is dict: + sample = { + "image": image, + "label": label, + } + else: + sample = image, label + + if to_tensor is transforms.ToTensor: + with pytest.warns(UserWarning, match="deprecated and will be removed"): + to_tensor = to_tensor() + else: + to_tensor = to_tensor() + + t = transforms.Compose( + [ + transforms.RandomResizedCrop((224, 224), antialias=True), + transforms.RandomHorizontalFlip(p=1), + transforms.RandAugment(), + transforms.TrivialAugmentWide(), + transforms.AugMix(), + transforms.AutoAugment(), + to_tensor, + # TODO: ConvertImageDtype is a pass-through on PIL images, is that + # intended? This results in a failure if we convert to tensor after + # it, because the image would still be uint8 which make Normalize + # fail. + transforms.ConvertImageDtype(torch.float), + transforms.Normalize(mean=[0, 0, 0], std=[1, 1, 1]), + transforms.RandomErasing(p=1), + ] + ) + + out = t(sample) + + assert type(out) == type(sample) + + if dataset_return_type is tuple: + out_image, out_label = out + else: + assert out.keys() == sample.keys() + out_image, out_label = out.values() + + assert out_image.shape[-2:] == (224, 224) + assert out_label == label + + +@pytest.mark.parametrize("image_type", (PIL.Image, torch.Tensor, tv_tensors.Image)) +@pytest.mark.parametrize("data_augmentation", ("hflip", "lsj", "multiscale", "ssd", "ssdlite")) +@pytest.mark.parametrize("to_tensor", (transforms.ToTensor, transforms.ToImage)) +@pytest.mark.parametrize("sanitize", (True, False)) +def test_detection_preset(image_type, data_augmentation, to_tensor, sanitize): + torch.manual_seed(0) + + if to_tensor is transforms.ToTensor: + with pytest.warns(UserWarning, match="deprecated and will be removed"): + to_tensor = to_tensor() + else: + to_tensor = to_tensor() + + if data_augmentation == "hflip": + t = [ + transforms.RandomHorizontalFlip(p=1), + to_tensor, + transforms.ConvertImageDtype(torch.float), + ] + elif data_augmentation == "lsj": + t = [ + transforms.ScaleJitter(target_size=(1024, 1024), antialias=True), + # Note: replaced FixedSizeCrop with RandomCrop, becuase we're + # leaving FixedSizeCrop in prototype for now, and it expects Label + # classes which we won't release yet. + # transforms.FixedSizeCrop( + # size=(1024, 1024), fill=defaultdict(lambda: (123.0, 117.0, 104.0), {tv_tensors.Mask: 0}) + # ), + transforms.RandomCrop((1024, 1024), pad_if_needed=True), + transforms.RandomHorizontalFlip(p=1), + to_tensor, + transforms.ConvertImageDtype(torch.float), + ] + elif data_augmentation == "multiscale": + t = [ + transforms.RandomShortestSize( + min_size=(480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800), max_size=1333, antialias=True + ), + transforms.RandomHorizontalFlip(p=1), + to_tensor, + transforms.ConvertImageDtype(torch.float), + ] + elif data_augmentation == "ssd": + t = [ + transforms.RandomPhotometricDistort(p=1), + transforms.RandomZoomOut(fill={"others": (123.0, 117.0, 104.0), tv_tensors.Mask: 0}, p=1), + transforms.RandomIoUCrop(), + transforms.RandomHorizontalFlip(p=1), + to_tensor, + transforms.ConvertImageDtype(torch.float), + ] + elif data_augmentation == "ssdlite": + t = [ + transforms.RandomIoUCrop(), + transforms.RandomHorizontalFlip(p=1), + to_tensor, + transforms.ConvertImageDtype(torch.float), + ] + if sanitize: + t += [transforms.SanitizeBoundingBoxes()] + t = transforms.Compose(t) + + num_boxes = 5 + H = W = 250 + + image = tv_tensors.Image(torch.randint(0, 256, size=(1, 3, H, W), dtype=torch.uint8)) + if image_type is PIL.Image: + image = to_pil_image(image[0]) + elif image_type is torch.Tensor: + image = image.as_subclass(torch.Tensor) + assert is_pure_tensor(image) + + label = torch.randint(0, 10, size=(num_boxes,)) + + boxes = torch.randint(0, min(H, W) // 2, size=(num_boxes, 4)) + boxes[:, 2:] += boxes[:, :2] + boxes = boxes.clamp(min=0, max=min(H, W)) + boxes = tv_tensors.BoundingBoxes(boxes, format="XYXY", canvas_size=(H, W)) + + masks = tv_tensors.Mask(torch.randint(0, 2, size=(num_boxes, H, W), dtype=torch.uint8)) + + sample = { + "image": image, + "label": label, + "boxes": boxes, + "masks": masks, + } + + out = t(sample) + + if isinstance(to_tensor, transforms.ToTensor) and image_type is not tv_tensors.Image: + assert is_pure_tensor(out["image"]) + else: + assert isinstance(out["image"], tv_tensors.Image) + assert isinstance(out["label"], type(sample["label"])) + + num_boxes_expected = { + # ssd and ssdlite contain RandomIoUCrop which may "remove" some bbox. It + # doesn't remove them strictly speaking, it just marks some boxes as + # degenerate and those boxes will be later removed by + # SanitizeBoundingBoxes(), which we add to the pipelines if the sanitize + # param is True. + # Note that the values below are probably specific to the random seed + # set above (which is fine). + (True, "ssd"): 5, + (True, "ssdlite"): 4, + }.get((sanitize, data_augmentation), num_boxes) + + assert out["boxes"].shape[0] == out["masks"].shape[0] == out["label"].shape[0] == num_boxes_expected + + +class TestSanitizeBoundingBoxes: + def _get_boxes_and_valid_mask(self, H=256, W=128, min_size=10, min_area=10): + boxes_and_validity = [ + ([0, 1, 10, 1], False), # Y1 == Y2 + ([0, 1, 0, 20], False), # X1 == X2 + ([0, 0, min_size - 1, 10], False), # H < min_size + ([0, 0, 10, min_size - 1], False), # W < min_size + ([0, 0, 10, H + 1], False), # Y2 > H + ([0, 0, W + 1, 10], False), # X2 > W + ([-1, 1, 10, 20], False), # any < 0 + ([0, 0, -1, 20], False), # any < 0 + ([0, 0, -10, -1], False), # any < 0 + ([0, 0, min_size, 10], min_size * 10 >= min_area), # H < min_size + ([0, 0, 10, min_size], min_size * 10 >= min_area), # W < min_size + ([0, 0, W, H], W * H >= min_area), + ([1, 1, 30, 20], 29 * 19 >= min_area), + ([0, 0, 10, 10], 9 * 9 >= min_area), + ([1, 1, 30, 20], 29 * 19 >= min_area), + ] + + random.shuffle(boxes_and_validity) # For test robustness: mix order of wrong and correct cases + boxes, expected_valid_mask = zip(*boxes_and_validity) + boxes = tv_tensors.BoundingBoxes( + boxes, + format=tv_tensors.BoundingBoxFormat.XYXY, + canvas_size=(H, W), + ) + + return boxes, expected_valid_mask + + @pytest.mark.parametrize("min_size, min_area", ((1, 1), (10, 1), (10, 101))) + @pytest.mark.parametrize( + "labels_getter", + ( + "default", + lambda inputs: inputs["labels"], + lambda inputs: (inputs["labels"], inputs["other_labels"]), + lambda inputs: [inputs["labels"], inputs["other_labels"]], + None, + lambda inputs: None, + ), + ) + @pytest.mark.parametrize("sample_type", (tuple, dict)) + def test_transform(self, min_size, min_area, labels_getter, sample_type): + + if sample_type is tuple and not isinstance(labels_getter, str): + # The "lambda inputs: inputs["labels"]" labels_getter used in this test + # doesn't work if the input is a tuple. + return + + H, W = 256, 128 + boxes, expected_valid_mask = self._get_boxes_and_valid_mask(H=H, W=W, min_size=min_size, min_area=min_area) + valid_indices = [i for (i, is_valid) in enumerate(expected_valid_mask) if is_valid] + + labels = torch.arange(boxes.shape[0]) + masks = tv_tensors.Mask(torch.randint(0, 2, size=(boxes.shape[0], H, W))) + # other_labels corresponds to properties from COCO like iscrowd, area... + # We only sanitize it when labels_getter returns a tuple + other_labels = torch.arange(boxes.shape[0]) + whatever = torch.rand(10) + input_img = torch.randint(0, 256, size=(1, 3, H, W), dtype=torch.uint8) + sample = { + "image": input_img, + "labels": labels, + "boxes": boxes, + "other_labels": other_labels, + "whatever": whatever, + "None": None, + "masks": masks, + } + + if sample_type is tuple: + img = sample.pop("image") + sample = (img, sample) + + out = transforms.SanitizeBoundingBoxes(min_size=min_size, min_area=min_area, labels_getter=labels_getter)( + sample + ) + + if sample_type is tuple: + out_image = out[0] + out_labels = out[1]["labels"] + out_other_labels = out[1]["other_labels"] + out_boxes = out[1]["boxes"] + out_masks = out[1]["masks"] + out_whatever = out[1]["whatever"] + else: + out_image = out["image"] + out_labels = out["labels"] + out_other_labels = out["other_labels"] + out_boxes = out["boxes"] + out_masks = out["masks"] + out_whatever = out["whatever"] + + assert out_image is input_img + assert out_whatever is whatever + + assert isinstance(out_boxes, tv_tensors.BoundingBoxes) + assert isinstance(out_masks, tv_tensors.Mask) + + if labels_getter is None or (callable(labels_getter) and labels_getter(sample) is None): + assert out_labels is labels + assert out_other_labels is other_labels + else: + assert isinstance(out_labels, torch.Tensor) + assert out_boxes.shape[0] == out_labels.shape[0] == out_masks.shape[0] + # This works because we conveniently set labels to arange(num_boxes) + assert out_labels.tolist() == valid_indices + + if callable(labels_getter) and isinstance(labels_getter(sample), (tuple, list)): + assert_equal(out_other_labels, out_labels) + else: + assert_equal(out_other_labels, other_labels) + + @pytest.mark.parametrize("input_type", (torch.Tensor, tv_tensors.BoundingBoxes)) + def test_functional(self, input_type): + # Note: the "functional" F.sanitize_bounding_boxes was added after the class, so there is some + # redundancy with test_transform() in terms of correctness checks. But that's OK. + + H, W, min_size = 256, 128, 10 + + boxes, expected_valid_mask = self._get_boxes_and_valid_mask(H=H, W=W, min_size=min_size) + + if input_type is tv_tensors.BoundingBoxes: + format = canvas_size = None + else: + # just passing "XYXY" explicitly to make sure we support strings + format, canvas_size = "XYXY", boxes.canvas_size + boxes = boxes.as_subclass(torch.Tensor) + + boxes, valid = F.sanitize_bounding_boxes(boxes, format=format, canvas_size=canvas_size, min_size=min_size) + + assert_equal(valid, torch.tensor(expected_valid_mask)) + assert type(valid) == torch.Tensor + assert boxes.shape[0] == sum(valid) + assert isinstance(boxes, input_type) + + def test_kernel(self): + H, W, min_size = 256, 128, 10 + boxes, _ = self._get_boxes_and_valid_mask(H=H, W=W, min_size=min_size) + + format, canvas_size = boxes.format, boxes.canvas_size + boxes = boxes.as_subclass(torch.Tensor) + + check_kernel( + F.sanitize_bounding_boxes, + input=boxes, + format=format, + canvas_size=canvas_size, + check_batched_vs_unbatched=False, + ) + + def test_no_label(self): + # Non-regression test for https://github.com/pytorch/vision/issues/7878 + + img = make_image() + boxes = make_bounding_boxes() + + with pytest.raises(ValueError, match="or a two-tuple whose second item is a dict"): + transforms.SanitizeBoundingBoxes()(img, boxes) + + out_img, out_boxes = transforms.SanitizeBoundingBoxes(labels_getter=None)(img, boxes) + assert isinstance(out_img, tv_tensors.Image) + assert isinstance(out_boxes, tv_tensors.BoundingBoxes) + + def test_errors_transform(self): + good_bbox = tv_tensors.BoundingBoxes( + [[0, 0, 10, 10]], + format=tv_tensors.BoundingBoxFormat.XYXY, + canvas_size=(20, 20), + ) + + with pytest.raises(ValueError, match="min_size must be >= 1"): + transforms.SanitizeBoundingBoxes(min_size=0) + with pytest.raises(ValueError, match="min_area must be >= 1"): + transforms.SanitizeBoundingBoxes(min_area=0) + with pytest.raises(ValueError, match="labels_getter should either be 'default'"): + transforms.SanitizeBoundingBoxes(labels_getter=12) + + with pytest.raises(ValueError, match="Could not infer where the labels are"): + bad_labels_key = {"bbox": good_bbox, "BAD_KEY": torch.arange(good_bbox.shape[0])} + transforms.SanitizeBoundingBoxes()(bad_labels_key) + + with pytest.raises(ValueError, match="must be a tensor"): + not_a_tensor = {"bbox": good_bbox, "labels": torch.arange(good_bbox.shape[0]).tolist()} + transforms.SanitizeBoundingBoxes()(not_a_tensor) + + with pytest.raises(ValueError, match="Number of boxes"): + different_sizes = {"bbox": good_bbox, "labels": torch.arange(good_bbox.shape[0] + 3)} + transforms.SanitizeBoundingBoxes()(different_sizes) + + def test_errors_functional(self): + + good_bbox = tv_tensors.BoundingBoxes( + [[0, 0, 10, 10]], + format=tv_tensors.BoundingBoxFormat.XYXY, + canvas_size=(20, 20), + ) + + with pytest.raises(ValueError, match="canvas_size cannot be None if bounding_boxes is a pure tensor"): + F.sanitize_bounding_boxes(good_bbox.as_subclass(torch.Tensor), format="XYXY", canvas_size=None) + + with pytest.raises(ValueError, match="canvas_size cannot be None if bounding_boxes is a pure tensor"): + F.sanitize_bounding_boxes(good_bbox.as_subclass(torch.Tensor), format=None, canvas_size=(10, 10)) + + with pytest.raises(ValueError, match="canvas_size must be None when bounding_boxes is a tv_tensors"): + F.sanitize_bounding_boxes(good_bbox, format="XYXY", canvas_size=None) + + with pytest.raises(ValueError, match="canvas_size must be None when bounding_boxes is a tv_tensors"): + F.sanitize_bounding_boxes(good_bbox, format="XYXY", canvas_size=None) + + with pytest.raises(ValueError, match="bounding_boxes must be a tv_tensors.BoundingBoxes instance or a"): + F.sanitize_bounding_boxes(good_bbox.tolist()) + + +class TestJPEG: + @pytest.mark.parametrize("quality", [5, 75]) + @pytest.mark.parametrize("color_space", ["RGB", "GRAY"]) + def test_kernel_image(self, quality, color_space): + check_kernel(F.jpeg_image, make_image(color_space=color_space), quality=quality) + + def test_kernel_video(self): + check_kernel(F.jpeg_video, make_video(), quality=5) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_video]) + def test_functional(self, make_input): + check_functional(F.jpeg, make_input(), quality=5) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.jpeg_image, torch.Tensor), + (F._augment._jpeg_image_pil, PIL.Image.Image), + (F.jpeg_image, tv_tensors.Image), + (F.jpeg_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.jpeg, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_video]) + @pytest.mark.parametrize("quality", [5, (10, 20)]) + @pytest.mark.parametrize("color_space", ["RGB", "GRAY"]) + def test_transform(self, make_input, quality, color_space): + check_transform(transforms.JPEG(quality=quality), make_input(color_space=color_space)) + + @pytest.mark.parametrize("quality", [5]) + def test_functional_image_correctness(self, quality): + image = make_image() + + actual = F.jpeg(image, quality=quality) + expected = F.to_image(F.jpeg(F.to_pil_image(image), quality=quality)) + + # NOTE: this will fail if torchvision and Pillow use different JPEG encoder/decoder + torch.testing.assert_close(actual, expected, rtol=0, atol=1) + + @pytest.mark.parametrize("quality", [5, (10, 20)]) + @pytest.mark.parametrize("color_space", ["RGB", "GRAY"]) + @pytest.mark.parametrize("seed", list(range(5))) + def test_transform_image_correctness(self, quality, color_space, seed): + image = make_image(color_space=color_space) + + transform = transforms.JPEG(quality=quality) + + with freeze_rng_state(): + torch.manual_seed(seed) + actual = transform(image) + + torch.manual_seed(seed) + expected = F.to_image(transform(F.to_pil_image(image))) + + torch.testing.assert_close(actual, expected, rtol=0, atol=1) + + @pytest.mark.parametrize("quality", [5, (10, 20)]) + @pytest.mark.parametrize("seed", list(range(10))) + def test_transformmake_params_bounds(self, quality, seed): + transform = transforms.JPEG(quality=quality) + + with freeze_rng_state(): + torch.manual_seed(seed) + params = transform.make_params([]) + + if isinstance(quality, int): + assert params["quality"] == quality + else: + assert quality[0] <= params["quality"] <= quality[1] + + @pytest.mark.parametrize("quality", [[0], [0, 0, 0]]) + def test_transform_sequence_len_error(self, quality): + with pytest.raises(ValueError, match="quality should be a sequence of length 2"): + transforms.JPEG(quality=quality) + + @pytest.mark.parametrize("quality", [-1, 0, 150]) + def test_transform_invalid_quality_error(self, quality): + with pytest.raises(ValueError, match="quality must be an integer from 1 to 100"): + transforms.JPEG(quality=quality) + + +class TestUtils: + # TODO: Still need to test has_all, has_any, check_type and get_bouding_boxes + @pytest.mark.parametrize( + "make_input1", [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask] + ) + @pytest.mark.parametrize( + "make_input2", [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask] + ) + @pytest.mark.parametrize("query", [transforms.query_size, transforms.query_chw]) + def test_query_size_and_query_chw(self, make_input1, make_input2, query): + size = (32, 64) + input1 = make_input1(size) + input2 = make_input2(size) + + if query is transforms.query_chw and not any( + transforms.check_type(inpt, (is_pure_tensor, tv_tensors.Image, PIL.Image.Image, tv_tensors.Video)) + for inpt in (input1, input2) + ): + return + + expected = size if query is transforms.query_size else ((3,) + size) + assert query([input1, input2]) == expected + + @pytest.mark.parametrize( + "make_input1", [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask] + ) + @pytest.mark.parametrize( + "make_input2", [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask] + ) + @pytest.mark.parametrize("query", [transforms.query_size, transforms.query_chw]) + def test_different_sizes(self, make_input1, make_input2, query): + input1 = make_input1((10, 10)) + input2 = make_input2((20, 20)) + if query is transforms.query_chw and not all( + transforms.check_type(inpt, (is_pure_tensor, tv_tensors.Image, PIL.Image.Image, tv_tensors.Video)) + for inpt in (input1, input2) + ): + return + with pytest.raises(ValueError, match="Found multiple"): + query([input1, input2]) + + @pytest.mark.parametrize("query", [transforms.query_size, transforms.query_chw]) + def test_no_valid_input(self, query): + with pytest.raises(TypeError, match="No image"): + query(["blah"]) diff --git a/test/test_transforms_v2_utils.py b/test/test_transforms_v2_utils.py new file mode 100644 index 00000000000..53222c6a2c8 --- /dev/null +++ b/test/test_transforms_v2_utils.py @@ -0,0 +1,92 @@ +import PIL.Image +import pytest + +import torch + +import torchvision.transforms.v2._utils +from common_utils import DEFAULT_SIZE, make_bounding_boxes, make_detection_masks, make_image + +from torchvision import tv_tensors +from torchvision.transforms.v2._utils import has_all, has_any +from torchvision.transforms.v2.functional import to_pil_image + + +IMAGE = make_image(DEFAULT_SIZE, color_space="RGB") +BOUNDING_BOX = make_bounding_boxes(DEFAULT_SIZE, format=tv_tensors.BoundingBoxFormat.XYXY) +MASK = make_detection_masks(DEFAULT_SIZE) + + +@pytest.mark.parametrize( + ("sample", "types", "expected"), + [ + ((IMAGE, BOUNDING_BOX, MASK), (tv_tensors.Image,), True), + ((IMAGE, BOUNDING_BOX, MASK), (tv_tensors.BoundingBoxes,), True), + ((IMAGE, BOUNDING_BOX, MASK), (tv_tensors.Mask,), True), + ((IMAGE, BOUNDING_BOX, MASK), (tv_tensors.Image, tv_tensors.BoundingBoxes), True), + ((IMAGE, BOUNDING_BOX, MASK), (tv_tensors.Image, tv_tensors.Mask), True), + ((IMAGE, BOUNDING_BOX, MASK), (tv_tensors.BoundingBoxes, tv_tensors.Mask), True), + ((MASK,), (tv_tensors.Image, tv_tensors.BoundingBoxes), False), + ((BOUNDING_BOX,), (tv_tensors.Image, tv_tensors.Mask), False), + ((IMAGE,), (tv_tensors.BoundingBoxes, tv_tensors.Mask), False), + ( + (IMAGE, BOUNDING_BOX, MASK), + (tv_tensors.Image, tv_tensors.BoundingBoxes, tv_tensors.Mask), + True, + ), + ((), (tv_tensors.Image, tv_tensors.BoundingBoxes, tv_tensors.Mask), False), + ((IMAGE, BOUNDING_BOX, MASK), (lambda obj: isinstance(obj, tv_tensors.Image),), True), + ((IMAGE, BOUNDING_BOX, MASK), (lambda _: False,), False), + ((IMAGE, BOUNDING_BOX, MASK), (lambda _: True,), True), + ((IMAGE,), (tv_tensors.Image, PIL.Image.Image, torchvision.transforms.v2._utils.is_pure_tensor), True), + ( + (torch.Tensor(IMAGE),), + (tv_tensors.Image, PIL.Image.Image, torchvision.transforms.v2._utils.is_pure_tensor), + True, + ), + ( + (to_pil_image(IMAGE),), + (tv_tensors.Image, PIL.Image.Image, torchvision.transforms.v2._utils.is_pure_tensor), + True, + ), + ], +) +def test_has_any(sample, types, expected): + assert has_any(sample, *types) is expected + + +@pytest.mark.parametrize( + ("sample", "types", "expected"), + [ + ((IMAGE, BOUNDING_BOX, MASK), (tv_tensors.Image,), True), + ((IMAGE, BOUNDING_BOX, MASK), (tv_tensors.BoundingBoxes,), True), + ((IMAGE, BOUNDING_BOX, MASK), (tv_tensors.Mask,), True), + ((IMAGE, BOUNDING_BOX, MASK), (tv_tensors.Image, tv_tensors.BoundingBoxes), True), + ((IMAGE, BOUNDING_BOX, MASK), (tv_tensors.Image, tv_tensors.Mask), True), + ((IMAGE, BOUNDING_BOX, MASK), (tv_tensors.BoundingBoxes, tv_tensors.Mask), True), + ( + (IMAGE, BOUNDING_BOX, MASK), + (tv_tensors.Image, tv_tensors.BoundingBoxes, tv_tensors.Mask), + True, + ), + ((BOUNDING_BOX, MASK), (tv_tensors.Image, tv_tensors.BoundingBoxes), False), + ((BOUNDING_BOX, MASK), (tv_tensors.Image, tv_tensors.Mask), False), + ((IMAGE, MASK), (tv_tensors.BoundingBoxes, tv_tensors.Mask), False), + ( + (IMAGE, BOUNDING_BOX, MASK), + (tv_tensors.Image, tv_tensors.BoundingBoxes, tv_tensors.Mask), + True, + ), + ((BOUNDING_BOX, MASK), (tv_tensors.Image, tv_tensors.BoundingBoxes, tv_tensors.Mask), False), + ((IMAGE, MASK), (tv_tensors.Image, tv_tensors.BoundingBoxes, tv_tensors.Mask), False), + ((IMAGE, BOUNDING_BOX), (tv_tensors.Image, tv_tensors.BoundingBoxes, tv_tensors.Mask), False), + ( + (IMAGE, BOUNDING_BOX, MASK), + (lambda obj: isinstance(obj, (tv_tensors.Image, tv_tensors.BoundingBoxes, tv_tensors.Mask)),), + True, + ), + ((IMAGE, BOUNDING_BOX, MASK), (lambda _: False,), False), + ((IMAGE, BOUNDING_BOX, MASK), (lambda _: True,), True), + ], +) +def test_has_all(sample, types, expected): + assert has_all(sample, *types) is expected diff --git a/test/test_transforms_video.py b/test/test_transforms_video.py index 296d519f5c4..4ad57e6a98e 100644 --- a/test/test_transforms_video.py +++ b/test/test_transforms_video.py @@ -1,10 +1,11 @@ -from __future__ import division -import torch -import torchvision.transforms._transforms_video as transforms -from torchvision.transforms import Compose -import unittest import random +import warnings + import numpy as np +import pytest +import torch +from common_utils import assert_equal +from torchvision.transforms import Compose try: from scipy import stats @@ -12,21 +13,27 @@ stats = None -class TestVideoTransforms(unittest.TestCase): +with warnings.catch_warnings(record=True): + warnings.simplefilter("always") + import torchvision.transforms._transforms_video as transforms + +class TestVideoTransforms: def test_random_crop_video(self): numFrames = random.randint(4, 128) height = random.randint(10, 32) * 2 width = random.randint(10, 32) * 2 - oheight = random.randint(5, (height - 2) / 2) * 2 - owidth = random.randint(5, (width - 2) / 2) * 2 + oheight = random.randint(5, (height - 2) // 2) * 2 + owidth = random.randint(5, (width - 2) // 2) * 2 clip = torch.randint(0, 256, (numFrames, height, width, 3), dtype=torch.uint8) - result = Compose([ - transforms.ToTensorVideo(), - transforms.RandomCropVideo((oheight, owidth)), - ])(clip) - self.assertEqual(result.size(2), oheight) - self.assertEqual(result.size(3), owidth) + result = Compose( + [ + transforms.ToTensorVideo(), + transforms.RandomCropVideo((oheight, owidth)), + ] + )(clip) + assert result.size(2) == oheight + assert result.size(3) == owidth transforms.RandomCropVideo((oheight, owidth)).__repr__() @@ -34,15 +41,17 @@ def test_random_resized_crop_video(self): numFrames = random.randint(4, 128) height = random.randint(10, 32) * 2 width = random.randint(10, 32) * 2 - oheight = random.randint(5, (height - 2) / 2) * 2 - owidth = random.randint(5, (width - 2) / 2) * 2 + oheight = random.randint(5, (height - 2) // 2) * 2 + owidth = random.randint(5, (width - 2) // 2) * 2 clip = torch.randint(0, 256, (numFrames, height, width, 3), dtype=torch.uint8) - result = Compose([ - transforms.ToTensorVideo(), - transforms.RandomResizedCropVideo((oheight, owidth)), - ])(clip) - self.assertEqual(result.size(2), oheight) - self.assertEqual(result.size(3), owidth) + result = Compose( + [ + transforms.ToTensorVideo(), + transforms.RandomResizedCropVideo((oheight, owidth)), + ] + )(clip) + assert result.size(2) == oheight + assert result.size(3) == owidth transforms.RandomResizedCropVideo((oheight, owidth)).__repr__() @@ -50,73 +59,83 @@ def test_center_crop_video(self): numFrames = random.randint(4, 128) height = random.randint(10, 32) * 2 width = random.randint(10, 32) * 2 - oheight = random.randint(5, (height - 2) / 2) * 2 - owidth = random.randint(5, (width - 2) / 2) * 2 + oheight = random.randint(5, (height - 2) // 2) * 2 + owidth = random.randint(5, (width - 2) // 2) * 2 clip = torch.ones((numFrames, height, width, 3), dtype=torch.uint8) * 255 oh1 = (height - oheight) // 2 ow1 = (width - owidth) // 2 - clipNarrow = clip[:, oh1:oh1 + oheight, ow1:ow1 + owidth, :] + clipNarrow = clip[:, oh1 : oh1 + oheight, ow1 : ow1 + owidth, :] clipNarrow.fill_(0) - result = Compose([ - transforms.ToTensorVideo(), - transforms.CenterCropVideo((oheight, owidth)), - ])(clip) - - msg = "height: " + str(height) + " width: " \ - + str(width) + " oheight: " + str(oheight) + " owidth: " + str(owidth) - self.assertEqual(result.sum().item(), 0, msg) + result = Compose( + [ + transforms.ToTensorVideo(), + transforms.CenterCropVideo((oheight, owidth)), + ] + )(clip) + + msg = ( + "height: " + str(height) + " width: " + str(width) + " oheight: " + str(oheight) + " owidth: " + str(owidth) + ) + assert result.sum().item() == 0, msg oheight += 1 owidth += 1 - result = Compose([ - transforms.ToTensorVideo(), - transforms.CenterCropVideo((oheight, owidth)), - ])(clip) + result = Compose( + [ + transforms.ToTensorVideo(), + transforms.CenterCropVideo((oheight, owidth)), + ] + )(clip) sum1 = result.sum() - msg = "height: " + str(height) + " width: " \ - + str(width) + " oheight: " + str(oheight) + " owidth: " + str(owidth) - self.assertEqual(sum1.item() > 1, True, msg) + msg = ( + "height: " + str(height) + " width: " + str(width) + " oheight: " + str(oheight) + " owidth: " + str(owidth) + ) + assert sum1.item() > 1, msg oheight += 1 owidth += 1 - result = Compose([ - transforms.ToTensorVideo(), - transforms.CenterCropVideo((oheight, owidth)), - ])(clip) + result = Compose( + [ + transforms.ToTensorVideo(), + transforms.CenterCropVideo((oheight, owidth)), + ] + )(clip) sum2 = result.sum() - msg = "height: " + str(height) + " width: " \ - + str(width) + " oheight: " + str(oheight) + " owidth: " + str(owidth) - self.assertTrue(sum2.item() > 1, msg) - self.assertTrue(sum2.item() > sum1.item(), msg) + msg = ( + "height: " + str(height) + " width: " + str(width) + " oheight: " + str(oheight) + " owidth: " + str(owidth) + ) + assert sum2.item() > 1, msg + assert sum2.item() > sum1.item(), msg - @unittest.skipIf(stats is None, 'scipy.stats is not available') - def test_normalize_video(self): + @pytest.mark.skipif(stats is None, reason="scipy.stats is not available") + @pytest.mark.parametrize("channels", [1, 3]) + def test_normalize_video(self, channels): def samples_from_standard_normal(tensor): - p_value = stats.kstest(list(tensor.view(-1)), 'norm', args=(0, 1)).pvalue + p_value = stats.kstest(list(tensor.view(-1)), "norm", args=(0, 1)).pvalue return p_value > 0.0001 random_state = random.getstate() random.seed(42) - for channels in [1, 3]: - numFrames = random.randint(4, 128) - height = random.randint(32, 256) - width = random.randint(32, 256) - mean = random.random() - std = random.random() - clip = torch.normal(mean, std, size=(channels, numFrames, height, width)) - mean = [clip[c].mean().item() for c in range(channels)] - std = [clip[c].std().item() for c in range(channels)] - normalized = transforms.NormalizeVideo(mean, std)(clip) - self.assertTrue(samples_from_standard_normal(normalized)) + + numFrames = random.randint(4, 128) + height = random.randint(32, 256) + width = random.randint(32, 256) + mean = random.random() + std = random.random() + clip = torch.normal(mean, std, size=(channels, numFrames, height, width)) + mean = [clip[c].mean().item() for c in range(channels)] + std = [clip[c].std().item() for c in range(channels)] + normalized = transforms.NormalizeVideo(mean, std)(clip) + assert samples_from_standard_normal(normalized) random.setstate(random_state) # Checking the optional in-place behaviour tensor = torch.rand((3, 128, 16, 16)) tensor_inplace = transforms.NormalizeVideo((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)(tensor) - self.assertTrue(torch.equal(tensor, tensor_inplace)) + assert_equal(tensor, tensor_inplace) transforms.NormalizeVideo((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True).__repr__() @@ -124,49 +143,36 @@ def test_to_tensor_video(self): numFrames, height, width = 64, 4, 4 trans = transforms.ToTensorVideo() - with self.assertRaises(TypeError): - trans(np.random.rand(numFrames, height, width, 1).tolist()) + with pytest.raises(TypeError): + np_rng = np.random.RandomState(0) + trans(np_rng.rand(numFrames, height, width, 1).tolist()) + with pytest.raises(TypeError): trans(torch.rand((numFrames, height, width, 1), dtype=torch.float)) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): trans(torch.ones((3, numFrames, height, width, 3), dtype=torch.uint8)) + with pytest.raises(ValueError): trans(torch.ones((height, width, 3), dtype=torch.uint8)) + with pytest.raises(ValueError): trans(torch.ones((width, 3), dtype=torch.uint8)) + with pytest.raises(ValueError): trans(torch.ones((3), dtype=torch.uint8)) trans.__repr__() - @unittest.skipIf(stats is None, 'scipy.stats not available') - def test_random_horizontal_flip_video(self): - random_state = random.getstate() - random.seed(42) + @pytest.mark.parametrize("p", (0, 1)) + def test_random_horizontal_flip_video(self, p): clip = torch.rand((3, 4, 112, 112), dtype=torch.float) - hclip = clip.flip((-1)) - - num_samples = 250 - num_horizontal = 0 - for _ in range(num_samples): - out = transforms.RandomHorizontalFlipVideo()(clip) - if torch.all(torch.eq(out, hclip)): - num_horizontal += 1 + hclip = clip.flip(-1) - p_value = stats.binom_test(num_horizontal, num_samples, p=0.5) - random.setstate(random_state) - self.assertGreater(p_value, 0.0001) - - num_samples = 250 - num_horizontal = 0 - for _ in range(num_samples): - out = transforms.RandomHorizontalFlipVideo(p=0.7)(clip) - if torch.all(torch.eq(out, hclip)): - num_horizontal += 1 - - p_value = stats.binom_test(num_horizontal, num_samples, p=0.7) - random.setstate(random_state) - self.assertGreater(p_value, 0.0001) + out = transforms.RandomHorizontalFlipVideo(p=p)(clip) + if p == 0: + torch.testing.assert_close(out, clip) + elif p == 1: + torch.testing.assert_close(out, hclip) transforms.RandomHorizontalFlipVideo().__repr__() -if __name__ == '__main__': - unittest.main() +if __name__ == "__main__": + pytest.main([__file__]) diff --git a/test/test_tv_tensors.py b/test/test_tv_tensors.py new file mode 100644 index 00000000000..ed75ae35ecd --- /dev/null +++ b/test/test_tv_tensors.py @@ -0,0 +1,320 @@ +from copy import deepcopy + +import pytest +import torch +from common_utils import assert_equal, make_bounding_boxes, make_image, make_segmentation_mask, make_video +from PIL import Image + +from torchvision import tv_tensors + + +@pytest.fixture(autouse=True) +def restore_tensor_return_type(): + # This is for security, as we should already be restoring the default manually in each test anyway + # (at least at the time of writing...) + yield + tv_tensors.set_return_type("Tensor") + + +@pytest.mark.parametrize("data", [torch.rand(3, 32, 32), Image.new("RGB", (32, 32), color=123)]) +def test_image_instance(data): + image = tv_tensors.Image(data) + assert isinstance(image, torch.Tensor) + assert image.ndim == 3 and image.shape[0] == 3 + + +@pytest.mark.parametrize("data", [torch.randint(0, 10, size=(1, 32, 32)), Image.new("L", (32, 32), color=2)]) +def test_mask_instance(data): + mask = tv_tensors.Mask(data) + assert isinstance(mask, torch.Tensor) + assert mask.ndim == 3 and mask.shape[0] == 1 + + +@pytest.mark.parametrize("data", [torch.randint(0, 32, size=(5, 4)), [[0, 0, 5, 5], [2, 2, 7, 7]], [1, 2, 3, 4]]) +@pytest.mark.parametrize( + "format", ["XYXY", "CXCYWH", tv_tensors.BoundingBoxFormat.XYXY, tv_tensors.BoundingBoxFormat.XYWH] +) +def test_bbox_instance(data, format): + bboxes = tv_tensors.BoundingBoxes(data, format=format, canvas_size=(32, 32)) + assert isinstance(bboxes, torch.Tensor) + assert bboxes.ndim == 2 and bboxes.shape[1] == 4 + if isinstance(format, str): + format = tv_tensors.BoundingBoxFormat[(format.upper())] + assert bboxes.format == format + + +def test_bbox_dim_error(): + data_3d = [[[1, 2, 3, 4]]] + with pytest.raises(ValueError, match="Expected a 1D or 2D tensor, got 3D"): + tv_tensors.BoundingBoxes(data_3d, format="XYXY", canvas_size=(32, 32)) + + +@pytest.mark.parametrize( + ("data", "input_requires_grad", "expected_requires_grad"), + [ + ([[[0.0, 1.0], [0.0, 1.0]]], None, False), + ([[[0.0, 1.0], [0.0, 1.0]]], False, False), + ([[[0.0, 1.0], [0.0, 1.0]]], True, True), + (torch.rand(3, 16, 16, requires_grad=False), None, False), + (torch.rand(3, 16, 16, requires_grad=False), False, False), + (torch.rand(3, 16, 16, requires_grad=False), True, True), + (torch.rand(3, 16, 16, requires_grad=True), None, True), + (torch.rand(3, 16, 16, requires_grad=True), False, False), + (torch.rand(3, 16, 16, requires_grad=True), True, True), + ], +) +def test_new_requires_grad(data, input_requires_grad, expected_requires_grad): + tv_tensor = tv_tensors.Image(data, requires_grad=input_requires_grad) + assert tv_tensor.requires_grad is expected_requires_grad + + +@pytest.mark.parametrize("make_input", [make_image, make_bounding_boxes, make_segmentation_mask, make_video]) +def test_isinstance(make_input): + assert isinstance(make_input(), torch.Tensor) + + +def test_wrapping_no_copy(): + tensor = torch.rand(3, 16, 16) + image = tv_tensors.Image(tensor) + + assert image.data_ptr() == tensor.data_ptr() + + +@pytest.mark.parametrize("make_input", [make_image, make_bounding_boxes, make_segmentation_mask, make_video]) +def test_to_wrapping(make_input): + dp = make_input() + + dp_to = dp.to(torch.float64) + + assert type(dp_to) is type(dp) + assert dp_to.dtype is torch.float64 + + +@pytest.mark.parametrize("make_input", [make_image, make_bounding_boxes, make_segmentation_mask, make_video]) +@pytest.mark.parametrize("return_type", ["Tensor", "TVTensor"]) +def test_to_tv_tensor_reference(make_input, return_type): + tensor = torch.rand((3, 16, 16), dtype=torch.float64) + dp = make_input() + + with tv_tensors.set_return_type(return_type): + tensor_to = tensor.to(dp) + + assert type(tensor_to) is (type(dp) if return_type == "TVTensor" else torch.Tensor) + assert tensor_to.dtype is dp.dtype + assert type(tensor) is torch.Tensor + + +@pytest.mark.parametrize("make_input", [make_image, make_bounding_boxes, make_segmentation_mask, make_video]) +@pytest.mark.parametrize("return_type", ["Tensor", "TVTensor"]) +def test_clone_wrapping(make_input, return_type): + dp = make_input() + + with tv_tensors.set_return_type(return_type): + dp_clone = dp.clone() + + assert type(dp_clone) is type(dp) + assert dp_clone.data_ptr() != dp.data_ptr() + + +@pytest.mark.parametrize("make_input", [make_image, make_bounding_boxes, make_segmentation_mask, make_video]) +@pytest.mark.parametrize("return_type", ["Tensor", "TVTensor"]) +def test_requires_grad__wrapping(make_input, return_type): + dp = make_input(dtype=torch.float) + + assert not dp.requires_grad + + with tv_tensors.set_return_type(return_type): + dp_requires_grad = dp.requires_grad_(True) + + assert type(dp_requires_grad) is type(dp) + assert dp.requires_grad + assert dp_requires_grad.requires_grad + + +@pytest.mark.parametrize("make_input", [make_image, make_bounding_boxes, make_segmentation_mask, make_video]) +@pytest.mark.parametrize("return_type", ["Tensor", "TVTensor"]) +def test_detach_wrapping(make_input, return_type): + dp = make_input(dtype=torch.float).requires_grad_(True) + + with tv_tensors.set_return_type(return_type): + dp_detached = dp.detach() + + assert type(dp_detached) is type(dp) + + +@pytest.mark.parametrize("return_type", ["Tensor", "TVTensor"]) +def test_force_subclass_with_metadata(return_type): + # Sanity checks for the ops in _FORCE_TORCHFUNCTION_SUBCLASS and tv_tensors with metadata + # Largely the same as above, we additionally check that the metadata is preserved + format, canvas_size = "XYXY", (32, 32) + bbox = tv_tensors.BoundingBoxes([[0, 0, 5, 5], [2, 2, 7, 7]], format=format, canvas_size=canvas_size) + + tv_tensors.set_return_type(return_type) + bbox = bbox.clone() + if return_type == "TVTensor": + assert bbox.format, bbox.canvas_size == (format, canvas_size) + + bbox = bbox.to(torch.float64) + if return_type == "TVTensor": + assert bbox.format, bbox.canvas_size == (format, canvas_size) + + bbox = bbox.detach() + if return_type == "TVTensor": + assert bbox.format, bbox.canvas_size == (format, canvas_size) + + assert not bbox.requires_grad + bbox.requires_grad_(True) + if return_type == "TVTensor": + assert bbox.format, bbox.canvas_size == (format, canvas_size) + assert bbox.requires_grad + tv_tensors.set_return_type("tensor") + + +@pytest.mark.parametrize("make_input", [make_image, make_bounding_boxes, make_segmentation_mask, make_video]) +@pytest.mark.parametrize("return_type", ["Tensor", "TVTensor"]) +def test_other_op_no_wrapping(make_input, return_type): + dp = make_input() + + with tv_tensors.set_return_type(return_type): + # any operation besides the ones listed in _FORCE_TORCHFUNCTION_SUBCLASS will do here + output = dp * 2 + + assert type(output) is (type(dp) if return_type == "TVTensor" else torch.Tensor) + + +@pytest.mark.parametrize("make_input", [make_image, make_bounding_boxes, make_segmentation_mask, make_video]) +@pytest.mark.parametrize( + "op", + [ + lambda t: t.numpy(), + lambda t: t.tolist(), + lambda t: t.max(dim=-1), + ], +) +def test_no_tensor_output_op_no_wrapping(make_input, op): + dp = make_input() + + output = op(dp) + + assert type(output) is not type(dp) + + +@pytest.mark.parametrize("make_input", [make_image, make_bounding_boxes, make_segmentation_mask, make_video]) +@pytest.mark.parametrize("return_type", ["Tensor", "TVTensor"]) +def test_inplace_op_no_wrapping(make_input, return_type): + dp = make_input() + original_type = type(dp) + + with tv_tensors.set_return_type(return_type): + output = dp.add_(0) + + assert type(output) is (type(dp) if return_type == "TVTensor" else torch.Tensor) + assert type(dp) is original_type + + +@pytest.mark.parametrize("make_input", [make_image, make_bounding_boxes, make_segmentation_mask, make_video]) +def test_wrap(make_input): + dp = make_input() + + # any operation besides the ones listed in _FORCE_TORCHFUNCTION_SUBCLASS will do here + output = dp * 2 + + dp_new = tv_tensors.wrap(output, like=dp) + + assert type(dp_new) is type(dp) + assert dp_new.data_ptr() == output.data_ptr() + + +@pytest.mark.parametrize("make_input", [make_image, make_bounding_boxes, make_segmentation_mask, make_video]) +@pytest.mark.parametrize("requires_grad", [False, True]) +def test_deepcopy(make_input, requires_grad): + dp = make_input(dtype=torch.float) + + dp.requires_grad_(requires_grad) + + dp_deepcopied = deepcopy(dp) + + assert dp_deepcopied is not dp + assert dp_deepcopied.data_ptr() != dp.data_ptr() + assert_equal(dp_deepcopied, dp) + + assert type(dp_deepcopied) is type(dp) + assert dp_deepcopied.requires_grad is requires_grad + + +@pytest.mark.parametrize("make_input", [make_image, make_bounding_boxes, make_segmentation_mask, make_video]) +@pytest.mark.parametrize("return_type", ["Tensor", "TVTensor"]) +@pytest.mark.parametrize( + "op", + ( + lambda dp: dp + torch.rand(*dp.shape), + lambda dp: torch.rand(*dp.shape) + dp, + lambda dp: dp * torch.rand(*dp.shape), + lambda dp: torch.rand(*dp.shape) * dp, + lambda dp: dp + 3, + lambda dp: 3 + dp, + lambda dp: dp + dp, + lambda dp: dp.sum(), + lambda dp: dp.reshape(-1), + lambda dp: dp.int(), + lambda dp: torch.stack([dp, dp]), + lambda dp: torch.chunk(dp, 2)[0], + lambda dp: torch.unbind(dp)[0], + ), +) +def test_usual_operations(make_input, return_type, op): + + dp = make_input() + with tv_tensors.set_return_type(return_type): + out = op(dp) + assert type(out) is (type(dp) if return_type == "TVTensor" else torch.Tensor) + if isinstance(dp, tv_tensors.BoundingBoxes) and return_type == "TVTensor": + assert hasattr(out, "format") + assert hasattr(out, "canvas_size") + + +def test_subclasses(): + img = make_image() + masks = make_segmentation_mask() + + with pytest.raises(TypeError, match="unsupported operand"): + img + masks + + +def test_set_return_type(): + img = make_image() + + assert type(img + 3) is torch.Tensor + + with tv_tensors.set_return_type("TVTensor"): + assert type(img + 3) is tv_tensors.Image + assert type(img + 3) is torch.Tensor + + tv_tensors.set_return_type("TVTensor") + assert type(img + 3) is tv_tensors.Image + + with tv_tensors.set_return_type("tensor"): + assert type(img + 3) is torch.Tensor + with tv_tensors.set_return_type("TVTensor"): + assert type(img + 3) is tv_tensors.Image + tv_tensors.set_return_type("tensor") + assert type(img + 3) is torch.Tensor + assert type(img + 3) is torch.Tensor + # Exiting a context manager will restore the return type as it was prior to entering it, + # regardless of whether the "global" tv_tensors.set_return_type() was called within the context manager. + assert type(img + 3) is tv_tensors.Image + + tv_tensors.set_return_type("tensor") + + +def test_return_type_input(): + img = make_image() + + # Case-insensitive + with tv_tensors.set_return_type("tvtensor"): + assert type(img + 3) is tv_tensors.Image + + with pytest.raises(ValueError, match="return_type must be"): + tv_tensors.set_return_type("typo") + + tv_tensors.set_return_type("tensor") diff --git a/test/test_utils.py b/test/test_utils.py index f1982130f75..8dfe3a1080f 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,84 +1,558 @@ import os +import re import sys import tempfile -import torch -import torchvision.utils as utils -import unittest from io import BytesIO + +import numpy as np +import pytest +import torch import torchvision.transforms.functional as F -from PIL import Image - - -class Tester(unittest.TestCase): - - def test_make_grid_not_inplace(self): - t = torch.rand(5, 3, 10, 10) - t_clone = t.clone() - - utils.make_grid(t, normalize=False) - self.assertTrue(torch.equal(t, t_clone), 'make_grid modified tensor in-place') - - utils.make_grid(t, normalize=True, scale_each=False) - self.assertTrue(torch.equal(t, t_clone), 'make_grid modified tensor in-place') - - utils.make_grid(t, normalize=True, scale_each=True) - self.assertTrue(torch.equal(t, t_clone), 'make_grid modified tensor in-place') - - def test_normalize_in_make_grid(self): - t = torch.rand(5, 3, 10, 10) * 255 - norm_max = torch.tensor(1.0) - norm_min = torch.tensor(0.0) - - grid = utils.make_grid(t, normalize=True) - grid_max = torch.max(grid) - grid_min = torch.min(grid) - - # Rounding the result to one decimal for comparison - n_digits = 1 - rounded_grid_max = torch.round(grid_max * 10 ** n_digits) / (10 ** n_digits) - rounded_grid_min = torch.round(grid_min * 10 ** n_digits) / (10 ** n_digits) - - self.assertTrue(torch.equal(norm_max, rounded_grid_max), 'Normalized max is not equal to 1') - self.assertTrue(torch.equal(norm_min, rounded_grid_min), 'Normalized min is not equal to 0') - - @unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows') - def test_save_image(self): - with tempfile.NamedTemporaryFile(suffix='.png') as f: - t = torch.rand(2, 3, 64, 64) - utils.save_image(t, f.name) - self.assertTrue(os.path.exists(f.name), 'The image is not present after save') - - @unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows') - def test_save_image_single_pixel(self): - with tempfile.NamedTemporaryFile(suffix='.png') as f: - t = torch.rand(1, 3, 1, 1) - utils.save_image(t, f.name) - self.assertTrue(os.path.exists(f.name), 'The pixel image is not present after save') - - @unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows') - def test_save_image_file_object(self): - with tempfile.NamedTemporaryFile(suffix='.png') as f: - t = torch.rand(2, 3, 64, 64) - utils.save_image(t, f.name) - img_orig = Image.open(f.name) - fp = BytesIO() - utils.save_image(t, fp, format='png') - img_bytes = Image.open(fp) - self.assertTrue(torch.equal(F.to_tensor(img_orig), F.to_tensor(img_bytes)), - 'Image not stored in file object') - - @unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows') - def test_save_image_single_pixel_file_object(self): - with tempfile.NamedTemporaryFile(suffix='.png') as f: - t = torch.rand(1, 3, 1, 1) - utils.save_image(t, f.name) - img_orig = Image.open(f.name) - fp = BytesIO() - utils.save_image(t, fp, format='png') - img_bytes = Image.open(fp) - self.assertTrue(torch.equal(F.to_tensor(img_orig), F.to_tensor(img_bytes)), - 'Pixel Image not stored in file object') - - -if __name__ == '__main__': - unittest.main() +import torchvision.utils as utils +from common_utils import assert_equal, cpu_and_cuda +from PIL import __version__ as PILLOW_VERSION, Image, ImageColor +from torchvision.transforms.v2.functional import to_dtype + + +PILLOW_VERSION = tuple(int(x) for x in PILLOW_VERSION.split(".")) + +boxes = torch.tensor([[0, 0, 20, 20], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float) + +keypoints = torch.tensor([[[10, 10], [5, 5], [2, 2]], [[20, 20], [30, 30], [3, 3]]], dtype=torch.float) + + +def test_make_grid_not_inplace(): + t = torch.rand(5, 3, 10, 10) + t_clone = t.clone() + + utils.make_grid(t, normalize=False) + assert_equal(t, t_clone, msg="make_grid modified tensor in-place") + + utils.make_grid(t, normalize=True, scale_each=False) + assert_equal(t, t_clone, msg="make_grid modified tensor in-place") + + utils.make_grid(t, normalize=True, scale_each=True) + assert_equal(t, t_clone, msg="make_grid modified tensor in-place") + + +def test_normalize_in_make_grid(): + t = torch.rand(5, 3, 10, 10) * 255 + norm_max = torch.tensor(1.0) + norm_min = torch.tensor(0.0) + + grid = utils.make_grid(t, normalize=True) + grid_max = torch.max(grid) + grid_min = torch.min(grid) + + # Rounding the result to one decimal for comparison + n_digits = 1 + rounded_grid_max = torch.round(grid_max * 10**n_digits) / (10**n_digits) + rounded_grid_min = torch.round(grid_min * 10**n_digits) / (10**n_digits) + + assert_equal(norm_max, rounded_grid_max, msg="Normalized max is not equal to 1") + assert_equal(norm_min, rounded_grid_min, msg="Normalized min is not equal to 0") + + +@pytest.mark.skipif(sys.platform in ("win32", "cygwin"), reason="temporarily disabled on Windows") +def test_save_image(): + with tempfile.NamedTemporaryFile(suffix=".png") as f: + t = torch.rand(2, 3, 64, 64) + utils.save_image(t, f.name) + assert os.path.exists(f.name), "The image is not present after save" + + +@pytest.mark.skipif(sys.platform in ("win32", "cygwin"), reason="temporarily disabled on Windows") +def test_save_image_single_pixel(): + with tempfile.NamedTemporaryFile(suffix=".png") as f: + t = torch.rand(1, 3, 1, 1) + utils.save_image(t, f.name) + assert os.path.exists(f.name), "The pixel image is not present after save" + + +@pytest.mark.skipif(sys.platform in ("win32", "cygwin"), reason="temporarily disabled on Windows") +def test_save_image_file_object(): + with tempfile.NamedTemporaryFile(suffix=".png") as f: + t = torch.rand(2, 3, 64, 64) + utils.save_image(t, f.name) + img_orig = Image.open(f.name) + fp = BytesIO() + utils.save_image(t, fp, format="png") + img_bytes = Image.open(fp) + assert_equal(F.pil_to_tensor(img_orig), F.pil_to_tensor(img_bytes), msg="Image not stored in file object") + + +@pytest.mark.skipif(sys.platform in ("win32", "cygwin"), reason="temporarily disabled on Windows") +def test_save_image_single_pixel_file_object(): + with tempfile.NamedTemporaryFile(suffix=".png") as f: + t = torch.rand(1, 3, 1, 1) + utils.save_image(t, f.name) + img_orig = Image.open(f.name) + fp = BytesIO() + utils.save_image(t, fp, format="png") + img_bytes = Image.open(fp) + assert_equal(F.pil_to_tensor(img_orig), F.pil_to_tensor(img_bytes), msg="Image not stored in file object") + + +def test_draw_boxes(): + img = torch.full((3, 100, 100), 255, dtype=torch.uint8) + img_cp = img.clone() + boxes_cp = boxes.clone() + labels = ["a", "b", "c", "d"] + colors = ["green", "#FF00FF", (0, 255, 0), "red"] + result = utils.draw_bounding_boxes(img, boxes, labels=labels, colors=colors, fill=True) + + path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "fakedata", "draw_boxes_util.png") + if not os.path.exists(path): + res = Image.fromarray(result.permute(1, 2, 0).contiguous().numpy()) + res.save(path) + + if PILLOW_VERSION >= (10, 1): + # The reference image is only valid for new PIL versions + expected = torch.as_tensor(np.array(Image.open(path))).permute(2, 0, 1) + assert_equal(result, expected) + + # Check if modification is not in place + assert_equal(boxes, boxes_cp) + assert_equal(img, img_cp) + + +@pytest.mark.skipif(PILLOW_VERSION < (10, 1), reason="The reference image is only valid for PIL >= 10.1") +def test_draw_boxes_with_coloured_labels(): + img = torch.full((3, 100, 100), 255, dtype=torch.uint8) + labels = ["a", "b", "c", "d"] + colors = ["green", "#FF00FF", (0, 255, 0), "red"] + label_colors = ["green", "red", (0, 255, 0), "#FF00FF"] + result = utils.draw_bounding_boxes(img, boxes, labels=labels, colors=colors, fill=True, label_colors=label_colors) + + path = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "assets", "fakedata", "draw_boxes_different_label_colors.png" + ) + expected = torch.as_tensor(np.array(Image.open(path))).permute(2, 0, 1) + assert_equal(result, expected) + + +@pytest.mark.parametrize("fill", [True, False]) +def test_draw_boxes_dtypes(fill): + img_uint8 = torch.full((3, 100, 100), 255, dtype=torch.uint8) + out_uint8 = utils.draw_bounding_boxes(img_uint8, boxes, fill=fill) + + assert img_uint8 is not out_uint8 + assert out_uint8.dtype == torch.uint8 + + img_float = to_dtype(img_uint8, torch.float, scale=True) + out_float = utils.draw_bounding_boxes(img_float, boxes, fill=fill) + + assert img_float is not out_float + assert out_float.is_floating_point() + + torch.testing.assert_close(out_uint8, to_dtype(out_float, torch.uint8, scale=True), rtol=0, atol=1) + + +@pytest.mark.parametrize("colors", [None, ["red", "blue", "#FF00FF", (1, 34, 122)], "red", "#FF00FF", (1, 34, 122)]) +def test_draw_boxes_colors(colors): + img = torch.full((3, 100, 100), 0, dtype=torch.uint8) + utils.draw_bounding_boxes(img, boxes, fill=False, width=7, colors=colors) + + with pytest.raises(ValueError, match="Number of colors must be equal or larger than the number of objects"): + utils.draw_bounding_boxes(image=img, boxes=boxes, colors=[]) + + +def test_draw_boxes_vanilla(): + img = torch.full((3, 100, 100), 0, dtype=torch.uint8) + img_cp = img.clone() + boxes_cp = boxes.clone() + result = utils.draw_bounding_boxes(img, boxes, fill=False, width=7, colors="white") + + path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "fakedata", "draw_boxes_vanilla.png") + if not os.path.exists(path): + res = Image.fromarray(result.permute(1, 2, 0).contiguous().numpy()) + res.save(path) + + expected = torch.as_tensor(np.array(Image.open(path))).permute(2, 0, 1) + assert_equal(result, expected) + # Check if modification is not in place + assert_equal(boxes, boxes_cp) + assert_equal(img, img_cp) + + +def test_draw_boxes_grayscale(): + img = torch.full((1, 4, 4), fill_value=255, dtype=torch.uint8) + boxes = torch.tensor([[0, 0, 3, 3]], dtype=torch.int64) + bboxed_img = utils.draw_bounding_boxes(image=img, boxes=boxes, colors=["#1BBC9B"]) + assert bboxed_img.size(0) == 3 + + +def test_draw_invalid_boxes(): + img_tp = ((1, 1, 1), (1, 2, 3)) + img_wrong2 = torch.full((1, 3, 5, 5), 255, dtype=torch.uint8) + img_correct = torch.zeros((3, 10, 10), dtype=torch.uint8) + boxes = torch.tensor([[0, 0, 20, 20], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float) + boxes_wrong = torch.tensor([[10, 10, 4, 5], [30, 20, 10, 5]], dtype=torch.float) + labels_wrong = ["one", "two"] + colors_wrong = ["pink", "blue"] + + with pytest.raises(TypeError, match="Tensor expected"): + utils.draw_bounding_boxes(img_tp, boxes) + with pytest.raises(ValueError, match="Pass individual images, not batches"): + utils.draw_bounding_boxes(img_wrong2, boxes) + with pytest.raises(ValueError, match="Only grayscale and RGB images are supported"): + utils.draw_bounding_boxes(img_wrong2[0][:2], boxes) + with pytest.raises(ValueError, match="Number of boxes"): + utils.draw_bounding_boxes(img_correct, boxes, labels_wrong) + with pytest.raises(ValueError, match="Number of colors"): + utils.draw_bounding_boxes(img_correct, boxes, colors=colors_wrong) + with pytest.raises(ValueError, match="Boxes need to be in"): + utils.draw_bounding_boxes(img_correct, boxes_wrong) + + +def test_draw_boxes_warning(): + img = torch.full((3, 100, 100), 255, dtype=torch.uint8) + + with pytest.warns(UserWarning, match=re.escape("Argument 'font_size' will be ignored since 'font' is not set.")): + utils.draw_bounding_boxes(img, boxes, font_size=11) + + +def test_draw_no_boxes(): + img = torch.full((3, 100, 100), 0, dtype=torch.uint8) + boxes = torch.full((0, 4), 0, dtype=torch.float) + with pytest.warns(UserWarning, match=re.escape("boxes doesn't contain any box. No box was drawn")): + res = utils.draw_bounding_boxes(img, boxes) + # Check that the function didn't change the image + assert res.eq(img).all() + + +@pytest.mark.parametrize( + "colors", + [ + None, + "blue", + "#FF00FF", + (1, 34, 122), + ["red", "blue"], + ["#FF00FF", (1, 34, 122)], + ], +) +@pytest.mark.parametrize("alpha", (0, 0.5, 0.7, 1)) +@pytest.mark.parametrize("device", cpu_and_cuda()) +def test_draw_segmentation_masks(colors, alpha, device): + """This test makes sure that masks draw their corresponding color where they should""" + num_masks, h, w = 2, 100, 100 + dtype = torch.uint8 + img = torch.randint(0, 256, size=(3, h, w), dtype=dtype, device=device) + masks = torch.zeros((num_masks, h, w), dtype=torch.bool, device=device) + masks[0, 10:20, 10:20] = True + masks[1, 15:25, 15:25] = True + + overlap = masks[0] & masks[1] + + out = utils.draw_segmentation_masks(img, masks, colors=colors, alpha=alpha) + assert out.dtype == dtype + assert out is not img + + # Make sure the image didn't change where there's no mask + masked_pixels = masks[0] | masks[1] + assert_equal(img[:, ~masked_pixels], out[:, ~masked_pixels]) + + if colors is None: + colors = utils._generate_color_palette(num_masks) + elif isinstance(colors, str) or isinstance(colors, tuple): + colors = [colors] + + # Make sure each mask draws with its own color + for mask, color in zip(masks, colors): + if isinstance(color, str): + color = ImageColor.getrgb(color) + color = torch.tensor(color, dtype=dtype, device=device) + + if alpha == 1: + assert (out[:, mask & ~overlap] == color[:, None]).all() + elif alpha == 0: + assert (out[:, mask & ~overlap] == img[:, mask & ~overlap]).all() + + interpolated_color = (img[:, mask & ~overlap] * (1 - alpha) + color[:, None] * alpha).to(dtype) + torch.testing.assert_close(out[:, mask & ~overlap], interpolated_color, rtol=0.0, atol=1.0) + + interpolated_overlap = (img[:, overlap] * (1 - alpha)).to(dtype) + torch.testing.assert_close(out[:, overlap], interpolated_overlap, rtol=0.0, atol=1.0) + + +def test_draw_segmentation_masks_dtypes(): + num_masks, h, w = 2, 100, 100 + + masks = torch.randint(0, 2, (num_masks, h, w), dtype=torch.bool) + + img_uint8 = torch.randint(0, 256, size=(3, h, w), dtype=torch.uint8) + out_uint8 = utils.draw_segmentation_masks(img_uint8, masks) + + assert img_uint8 is not out_uint8 + assert out_uint8.dtype == torch.uint8 + + img_float = to_dtype(img_uint8, torch.float, scale=True) + out_float = utils.draw_segmentation_masks(img_float, masks) + + assert img_float is not out_float + assert out_float.is_floating_point() + + torch.testing.assert_close(out_uint8, to_dtype(out_float, torch.uint8, scale=True), rtol=0, atol=1) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +def test_draw_segmentation_masks_errors(device): + h, w = 10, 10 + + masks = torch.randint(0, 2, size=(h, w), dtype=torch.bool, device=device) + img = torch.randint(0, 256, size=(3, h, w), dtype=torch.uint8, device=device) + + with pytest.raises(TypeError, match="The image must be a tensor"): + utils.draw_segmentation_masks(image="Not A Tensor Image", masks=masks) + with pytest.raises(ValueError, match="The image dtype must be"): + img_bad_dtype = torch.randint(0, 256, size=(3, h, w), dtype=torch.int64) + utils.draw_segmentation_masks(image=img_bad_dtype, masks=masks) + with pytest.raises(ValueError, match="Pass individual images, not batches"): + batch = torch.randint(0, 256, size=(10, 3, h, w), dtype=torch.uint8) + utils.draw_segmentation_masks(image=batch, masks=masks) + with pytest.raises(ValueError, match="Pass an RGB image"): + one_channel = torch.randint(0, 256, size=(1, h, w), dtype=torch.uint8) + utils.draw_segmentation_masks(image=one_channel, masks=masks) + with pytest.raises(ValueError, match="The masks must be of dtype bool"): + masks_bad_dtype = torch.randint(0, 2, size=(h, w), dtype=torch.float) + utils.draw_segmentation_masks(image=img, masks=masks_bad_dtype) + with pytest.raises(ValueError, match="masks must be of shape"): + masks_bad_shape = torch.randint(0, 2, size=(3, 2, h, w), dtype=torch.bool) + utils.draw_segmentation_masks(image=img, masks=masks_bad_shape) + with pytest.raises(ValueError, match="must have the same height and width"): + masks_bad_shape = torch.randint(0, 2, size=(h + 4, w), dtype=torch.bool) + utils.draw_segmentation_masks(image=img, masks=masks_bad_shape) + with pytest.raises(ValueError, match="Number of colors must be equal or larger than the number of objects"): + utils.draw_segmentation_masks(image=img, masks=masks, colors=[]) + with pytest.raises(ValueError, match="`colors` must be a tuple or a string, or a list thereof"): + bad_colors = np.array(["red", "blue"]) # should be a list + utils.draw_segmentation_masks(image=img, masks=masks, colors=bad_colors) + with pytest.raises(ValueError, match="If passed as tuple, colors should be an RGB triplet"): + bad_colors = ("red", "blue") # should be a list + utils.draw_segmentation_masks(image=img, masks=masks, colors=bad_colors) + + +@pytest.mark.parametrize("device", cpu_and_cuda()) +def test_draw_no_segmention_mask(device): + img = torch.full((3, 100, 100), 0, dtype=torch.uint8, device=device) + masks = torch.full((0, 100, 100), 0, dtype=torch.bool, device=device) + with pytest.warns(UserWarning, match=re.escape("masks doesn't contain any mask. No mask was drawn")): + res = utils.draw_segmentation_masks(img, masks) + # Check that the function didn't change the image + assert res.eq(img).all() + + +def test_draw_keypoints_vanilla(): + # Keypoints is declared on top as global variable + keypoints_cp = keypoints.clone() + + img = torch.full((3, 100, 100), 0, dtype=torch.uint8) + img_cp = img.clone() + result = utils.draw_keypoints( + img, + keypoints, + colors="red", + connectivity=[ + (0, 1), + ], + ) + path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "fakedata", "draw_keypoint_vanilla.png") + if not os.path.exists(path): + res = Image.fromarray(result.permute(1, 2, 0).contiguous().numpy()) + res.save(path) + + expected = torch.as_tensor(np.array(Image.open(path))).permute(2, 0, 1) + assert_equal(result, expected) + # Check that keypoints are not modified inplace + assert_equal(keypoints, keypoints_cp) + # Check that image is not modified in place + assert_equal(img, img_cp) + + +def test_draw_keypoins_K_equals_one(): + # Non-regression test for https://github.com/pytorch/vision/pull/8439 + img = torch.full((3, 100, 100), 0, dtype=torch.uint8) + keypoints = torch.tensor([[[10, 10]]], dtype=torch.float) + utils.draw_keypoints(img, keypoints) + + +@pytest.mark.parametrize("colors", ["red", "#FF00FF", (1, 34, 122)]) +def test_draw_keypoints_colored(colors): + # Keypoints is declared on top as global variable + keypoints_cp = keypoints.clone() + + img = torch.full((3, 100, 100), 0, dtype=torch.uint8) + img_cp = img.clone() + result = utils.draw_keypoints( + img, + keypoints, + colors=colors, + connectivity=[ + (0, 1), + ], + ) + assert result.size(0) == 3 + assert_equal(keypoints, keypoints_cp) + assert_equal(img, img_cp) + + +@pytest.mark.parametrize("connectivity", [[(0, 1)], [(0, 1), (1, 2)]]) +@pytest.mark.parametrize( + "vis", + [ + torch.tensor([[1, 1, 0], [1, 1, 0]], dtype=torch.bool), + torch.tensor([[1, 1, 0], [1, 1, 0]], dtype=torch.float).unsqueeze_(-1), + ], +) +def test_draw_keypoints_visibility(connectivity, vis): + # Keypoints is declared on top as global variable + keypoints_cp = keypoints.clone() + + img = torch.full((3, 100, 100), 0, dtype=torch.uint8) + img_cp = img.clone() + + vis_cp = vis if vis is None else vis.clone() + + result = utils.draw_keypoints( + image=img, + keypoints=keypoints, + connectivity=connectivity, + colors="red", + visibility=vis, + ) + assert result.size(0) == 3 + assert_equal(keypoints, keypoints_cp) + assert_equal(img, img_cp) + + # compare with a fakedata image + # connect the key points 0 to 1 for both skeletons and do not show the other key points + path = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "assets", "fakedata", "draw_keypoints_visibility.png" + ) + if not os.path.exists(path): + res = Image.fromarray(result.permute(1, 2, 0).contiguous().numpy()) + res.save(path) + + expected = torch.as_tensor(np.array(Image.open(path))).permute(2, 0, 1) + assert_equal(result, expected) + + if vis_cp is None: + assert vis is None + else: + assert_equal(vis, vis_cp) + assert vis.dtype == vis_cp.dtype + + +def test_draw_keypoints_visibility_default(): + # Keypoints is declared on top as global variable + keypoints_cp = keypoints.clone() + + img = torch.full((3, 100, 100), 0, dtype=torch.uint8) + img_cp = img.clone() + + result = utils.draw_keypoints( + image=img, + keypoints=keypoints, + connectivity=[(0, 1)], + colors="red", + visibility=None, + ) + assert result.size(0) == 3 + assert_equal(keypoints, keypoints_cp) + assert_equal(img, img_cp) + + # compare against fakedata image, which connects 0->1 for both key-point skeletons + path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "fakedata", "draw_keypoint_vanilla.png") + expected = torch.as_tensor(np.array(Image.open(path))).permute(2, 0, 1) + assert_equal(result, expected) + + +def test_draw_keypoints_dtypes(): + image_uint8 = torch.randint(0, 256, size=(3, 100, 100), dtype=torch.uint8) + image_float = to_dtype(image_uint8, torch.float, scale=True) + + out_uint8 = utils.draw_keypoints(image_uint8, keypoints) + out_float = utils.draw_keypoints(image_float, keypoints) + + assert out_uint8.dtype == torch.uint8 + assert out_uint8 is not image_uint8 + + assert out_float.is_floating_point() + assert out_float is not image_float + + torch.testing.assert_close(out_uint8, to_dtype(out_float, torch.uint8, scale=True), rtol=0, atol=1) + + +def test_draw_keypoints_errors(): + h, w = 10, 10 + img = torch.full((3, 100, 100), 0, dtype=torch.uint8) + + with pytest.raises(TypeError, match="The image must be a tensor"): + utils.draw_keypoints(image="Not A Tensor Image", keypoints=keypoints) + with pytest.raises(ValueError, match="The image dtype must be"): + img_bad_dtype = torch.full((3, h, w), 0, dtype=torch.int64) + utils.draw_keypoints(image=img_bad_dtype, keypoints=keypoints) + with pytest.raises(ValueError, match="Pass individual images, not batches"): + batch = torch.randint(0, 256, size=(10, 3, h, w), dtype=torch.uint8) + utils.draw_keypoints(image=batch, keypoints=keypoints) + with pytest.raises(ValueError, match="Pass an RGB image"): + one_channel = torch.randint(0, 256, size=(1, h, w), dtype=torch.uint8) + utils.draw_keypoints(image=one_channel, keypoints=keypoints) + with pytest.raises(ValueError, match="keypoints must be of shape"): + invalid_keypoints = torch.tensor([[10, 10, 10, 10], [5, 6, 7, 8]], dtype=torch.float) + utils.draw_keypoints(image=img, keypoints=invalid_keypoints) + with pytest.raises(ValueError, match=re.escape("visibility must be of shape (num_instances, K)")): + one_dim_visibility = torch.tensor([True, True, True], dtype=torch.bool) + utils.draw_keypoints(image=img, keypoints=keypoints, visibility=one_dim_visibility) + with pytest.raises(ValueError, match=re.escape("visibility must be of shape (num_instances, K)")): + three_dim_visibility = torch.ones((2, 3, 4), dtype=torch.bool) + utils.draw_keypoints(image=img, keypoints=keypoints, visibility=three_dim_visibility) + with pytest.raises(ValueError, match="keypoints and visibility must have the same dimensionality"): + vis_wrong_n = torch.ones((3, 3), dtype=torch.bool) + utils.draw_keypoints(image=img, keypoints=keypoints, visibility=vis_wrong_n) + with pytest.raises(ValueError, match="keypoints and visibility must have the same dimensionality"): + vis_wrong_k = torch.ones((2, 4), dtype=torch.bool) + utils.draw_keypoints(image=img, keypoints=keypoints, visibility=vis_wrong_k) + + +@pytest.mark.parametrize("batch", (True, False)) +def test_flow_to_image(batch): + h, w = 100, 100 + flow = torch.meshgrid(torch.arange(h), torch.arange(w), indexing="ij") + flow = torch.stack(flow[::-1], dim=0).float() + flow[0] -= h / 2 + flow[1] -= w / 2 + + if batch: + flow = torch.stack([flow, flow]) + + img = utils.flow_to_image(flow) + assert img.shape == (2, 3, h, w) if batch else (3, h, w) + + path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "expected_flow.pt") + expected_img = torch.load(path, map_location="cpu", weights_only=True) + + if batch: + expected_img = torch.stack([expected_img, expected_img]) + + assert_equal(expected_img, img) + + +@pytest.mark.parametrize( + "input_flow, match", + ( + (torch.full((3, 10, 10), 0, dtype=torch.float), "Input flow should have shape"), + (torch.full((5, 3, 10, 10), 0, dtype=torch.float), "Input flow should have shape"), + (torch.full((2, 10), 0, dtype=torch.float), "Input flow should have shape"), + (torch.full((5, 2, 10), 0, dtype=torch.float), "Input flow should have shape"), + (torch.full((2, 10, 30), 0, dtype=torch.int), "Flow should be of dtype torch.float"), + ), +) +def test_flow_to_image_errors(input_flow, match): + with pytest.raises(ValueError, match=match): + utils.flow_to_image(flow=input_flow) + + +if __name__ == "__main__": + pytest.main([__file__]) diff --git a/test/test_video_gpu_decoder.py b/test/test_video_gpu_decoder.py new file mode 100644 index 00000000000..aa6d0aee9e0 --- /dev/null +++ b/test/test_video_gpu_decoder.py @@ -0,0 +1,97 @@ +import math +import os + +import pytest +import torch +import torchvision +from torchvision.io import _HAS_GPU_VIDEO_DECODER, VideoReader + +try: + import av +except ImportError: + av = None + +VIDEO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "videos") + + +@pytest.mark.skipif(_HAS_GPU_VIDEO_DECODER is False, reason="Didn't compile with support for gpu decoder") +class TestVideoGPUDecoder: + @pytest.mark.skipif(av is None, reason="PyAV unavailable") + @pytest.mark.parametrize( + "video_file", + [ + "RATRACE_wave_f_nm_np1_fr_goo_37.avi", + "TrumanShow_wave_f_nm_np1_fr_med_26.avi", + "v_SoccerJuggling_g23_c01.avi", + "v_SoccerJuggling_g24_c01.avi", + "R6llTwEh07w.mp4", + "SOX5yA1l24A.mp4", + "WUzgd7C1pWA.mp4", + ], + ) + def test_frame_reading(self, video_file): + torchvision.set_video_backend("cuda") + full_path = os.path.join(VIDEO_DIR, video_file) + decoder = VideoReader(full_path) + with av.open(full_path) as container: + for av_frame in container.decode(container.streams.video[0]): + av_frames = torch.tensor(av_frame.to_rgb(src_colorspace="ITU709").to_ndarray()) + vision_frames = next(decoder)["data"] + mean_delta = torch.mean(torch.abs(av_frames.float() - vision_frames.cpu().float())) + assert mean_delta < 0.75 + + @pytest.mark.skipif(av is None, reason="PyAV unavailable") + @pytest.mark.parametrize("keyframes", [True, False]) + @pytest.mark.parametrize( + "full_path, duration", + [ + (os.path.join(VIDEO_DIR, x), y) + for x, y in [ + ("v_SoccerJuggling_g23_c01.avi", 8.0), + ("v_SoccerJuggling_g24_c01.avi", 8.0), + ("R6llTwEh07w.mp4", 10.0), + ("SOX5yA1l24A.mp4", 11.0), + ("WUzgd7C1pWA.mp4", 11.0), + ] + ], + ) + def test_seek_reading(self, keyframes, full_path, duration): + torchvision.set_video_backend("cuda") + decoder = VideoReader(full_path) + time = duration / 2 + decoder.seek(time, keyframes_only=keyframes) + with av.open(full_path) as container: + container.seek(int(time * 1000000), any_frame=not keyframes, backward=False) + for av_frame in container.decode(container.streams.video[0]): + av_frames = torch.tensor(av_frame.to_rgb(src_colorspace="ITU709").to_ndarray()) + vision_frames = next(decoder)["data"] + mean_delta = torch.mean(torch.abs(av_frames.float() - vision_frames.cpu().float())) + assert mean_delta < 0.75 + + @pytest.mark.skipif(av is None, reason="PyAV unavailable") + @pytest.mark.parametrize( + "video_file", + [ + "RATRACE_wave_f_nm_np1_fr_goo_37.avi", + "TrumanShow_wave_f_nm_np1_fr_med_26.avi", + "v_SoccerJuggling_g23_c01.avi", + "v_SoccerJuggling_g24_c01.avi", + "R6llTwEh07w.mp4", + "SOX5yA1l24A.mp4", + "WUzgd7C1pWA.mp4", + ], + ) + def test_metadata(self, video_file): + torchvision.set_video_backend("cuda") + full_path = os.path.join(VIDEO_DIR, video_file) + decoder = VideoReader(full_path) + video_metadata = decoder.get_metadata()["video"] + with av.open(full_path) as container: + video = container.streams.video[0] + av_duration = float(video.duration * video.time_base) + assert math.isclose(video_metadata["duration"], av_duration, rel_tol=1e-2) + assert math.isclose(video_metadata["fps"], video.base_rate, rel_tol=1e-2) + + +if __name__ == "__main__": + pytest.main([__file__]) diff --git a/test/test_video_reader.py b/test/test_video_reader.py index bf59eb7dc4d..10995424982 100644 --- a/test/test_video_reader.py +++ b/test/test_video_reader.py @@ -1,33 +1,28 @@ import collections -from common_utils import get_tmp_dir -from fractions import Fraction import math -import numpy as np import os -import sys -import time +from fractions import Fraction + +import numpy as np +import pytest import torch import torchvision.io as io -import unittest +from common_utils import assert_equal from numpy.random import randint +from pytest import approx +from torchvision import set_video_backend +from torchvision.io import _HAS_CPU_VIDEO_DECODER + try: import av + # Do a version test too io.video._check_av_available() except ImportError: av = None -if sys.version_info < (3,): - from urllib2 import URLError -else: - from urllib.error import URLError - - -from torchvision.io import _HAS_VIDEO_OPT - - VIDEO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "videos") CheckerConfig = [ @@ -39,10 +34,7 @@ "check_aframes", "check_aframe_pts", ] -GroundTruth = collections.namedtuple( - "GroundTruth", - " ".join(CheckerConfig) -) +GroundTruth = collections.namedtuple("GroundTruth", " ".join(CheckerConfig)) all_check_config = GroundTruth( duration=0, @@ -115,18 +107,14 @@ } -DecoderResult = collections.namedtuple( - "DecoderResult", "vframes vframe_pts vtimebase aframes aframe_pts atimebase" -) +DecoderResult = collections.namedtuple("DecoderResult", "vframes vframe_pts vtimebase aframes aframe_pts atimebase") -"""av_seek_frame is imprecise so seek to a timestamp earlier by a margin -The unit of margin is second""" -seek_frame_margin = 0.25 +# av_seek_frame is imprecise so seek to a timestamp earlier by a margin +# The unit of margin is second +SEEK_FRAME_MARGIN = 0.25 -def _read_from_stream( - container, start_pts, end_pts, stream, stream_name, buffer_size=4 -): +def _read_from_stream(container, start_pts, end_pts, stream, stream_name, buffer_size=4): """ Args: container: pyav container @@ -139,7 +127,7 @@ def _read_from_stream( ascending order. We need to decode more frames even when we meet end pts """ - # seeking in the stream is imprecise. Thus, seek to an ealier PTS by a margin + # seeking in the stream is imprecise. Thus, seek to an earlier PTS by a margin margin = 1 seek_offset = max(start_pts - margin, 0) @@ -193,9 +181,9 @@ def _decode_frames_by_av_module( frames are read """ if video_end_pts is None: - video_end_pts = float('inf') + video_end_pts = float("inf") if audio_end_pts is None: - audio_end_pts = float('inf') + audio_end_pts = float("inf") container = av.open(full_path) video_frames = [] @@ -238,9 +226,7 @@ def _decode_frames_by_av_module( else: aframes = torch.empty((1, 0), dtype=torch.float32) - aframe_pts = torch.tensor( - [audio_frame.pts for audio_frame in audio_frames], dtype=torch.int64 - ) + aframe_pts = torch.tensor([audio_frame.pts for audio_frame in audio_frames], dtype=torch.int64) return DecoderResult( vframes=vframes, @@ -271,55 +257,64 @@ def _get_video_tensor(video_dir, video_file): assert os.path.exists(full_path), "File not found: %s" % full_path with open(full_path, "rb") as fp: - video_tensor = torch.from_numpy(np.frombuffer(fp.read(), dtype=np.uint8)) + video_tensor = torch.frombuffer(fp.read(), dtype=torch.uint8) return full_path, video_tensor -@unittest.skipIf(av is None, "PyAV unavailable") -@unittest.skipIf(_HAS_VIDEO_OPT is False, "Didn't compile with ffmpeg") -class TestVideoReader(unittest.TestCase): +@pytest.mark.skipif(av is None, reason="PyAV unavailable") +@pytest.mark.skipif(_HAS_CPU_VIDEO_DECODER is False, reason="Didn't compile with ffmpeg") +class TestVideoReader: def check_separate_decoding_result(self, tv_result, config): - """check the decoding results from TorchVision decoder - """ - vframes, vframe_pts, vtimebase, vfps, vduration, aframes, aframe_pts, \ - atimebase, asample_rate, aduration = tv_result - - video_duration = vduration.item() * Fraction( - vtimebase[0].item(), vtimebase[1].item() - ) - self.assertAlmostEqual(video_duration, config.duration, delta=0.5) + """check the decoding results from TorchVision decoder""" + ( + vframes, + vframe_pts, + vtimebase, + vfps, + vduration, + aframes, + aframe_pts, + atimebase, + asample_rate, + aduration, + ) = tv_result + + video_duration = vduration.item() * Fraction(vtimebase[0].item(), vtimebase[1].item()) + assert video_duration == approx(config.duration, abs=0.5) + + assert vfps.item() == approx(config.video_fps, abs=0.5) - self.assertAlmostEqual(vfps.item(), config.video_fps, delta=0.5) if asample_rate.numel() > 0: - self.assertEqual(asample_rate.item(), config.audio_sample_rate) - audio_duration = aduration.item() * Fraction( - atimebase[0].item(), atimebase[1].item() - ) - self.assertAlmostEqual(audio_duration, config.duration, delta=0.5) + assert asample_rate.item() == config.audio_sample_rate + audio_duration = aduration.item() * Fraction(atimebase[0].item(), atimebase[1].item()) + assert audio_duration == approx(config.duration, abs=0.5) # check if pts of video frames are sorted in ascending order for i in range(len(vframe_pts) - 1): - self.assertEqual(vframe_pts[i] < vframe_pts[i + 1], True) + assert vframe_pts[i] < vframe_pts[i + 1] if len(aframe_pts) > 1: # check if pts of audio frames are sorted in ascending order for i in range(len(aframe_pts) - 1): - self.assertEqual(aframe_pts[i] < aframe_pts[i + 1], True) + assert aframe_pts[i] < aframe_pts[i + 1] def check_probe_result(self, result, config): vtimebase, vfps, vduration, atimebase, asample_rate, aduration = result - video_duration = vduration.item() * Fraction( - vtimebase[0].item(), vtimebase[1].item() - ) - self.assertAlmostEqual(video_duration, config.duration, delta=0.5) - self.assertAlmostEqual(vfps.item(), config.video_fps, delta=0.5) + video_duration = vduration.item() * Fraction(vtimebase[0].item(), vtimebase[1].item()) + assert video_duration == approx(config.duration, abs=0.5) + assert vfps.item() == approx(config.video_fps, abs=0.5) if asample_rate.numel() > 0: - self.assertEqual(asample_rate.item(), config.audio_sample_rate) - audio_duration = aduration.item() * Fraction( - atimebase[0].item(), atimebase[1].item() - ) - self.assertAlmostEqual(audio_duration, config.duration, delta=0.5) + assert asample_rate.item() == config.audio_sample_rate + audio_duration = aduration.item() * Fraction(atimebase[0].item(), atimebase[1].item()) + assert audio_duration == approx(config.duration, abs=0.5) + + def check_meta_result(self, result, config): + assert result.video_duration == approx(config.duration, abs=0.5) + assert result.video_fps == approx(config.video_fps, abs=0.5) + if result.has_audio > 0: + assert result.audio_sample_rate == config.audio_sample_rate + assert result.audio_duration == approx(config.duration, abs=0.5) def compare_decoding_result(self, tv_result, ref_result, config=all_check_config): """ @@ -330,8 +325,18 @@ def compare_decoding_result(self, tv_result, ref_result, config=all_check_config decoder or TorchVision decoder with getPtsOnly = 1 config: config of decoding results checker """ - vframes, vframe_pts, vtimebase, _vfps, _vduration, aframes, aframe_pts, \ - atimebase, _asample_rate, _aduration = tv_result + ( + vframes, + vframe_pts, + vtimebase, + _vfps, + _vduration, + aframes, + aframe_pts, + atimebase, + _asample_rate, + _aduration, + ) = tv_result if isinstance(ref_result, list): # the ref_result is from new video_reader decoder ref_result = DecoderResult( @@ -345,37 +350,34 @@ def compare_decoding_result(self, tv_result, ref_result, config=all_check_config if vframes.numel() > 0 and ref_result.vframes.numel() > 0: mean_delta = torch.mean(torch.abs(vframes.float() - ref_result.vframes.float())) - self.assertAlmostEqual(mean_delta, 0, delta=8.0) + assert mean_delta == approx(0.0, abs=8.0) mean_delta = torch.mean(torch.abs(vframe_pts.float() - ref_result.vframe_pts.float())) - self.assertAlmostEqual(mean_delta, 0, delta=1.0) + assert mean_delta == approx(0.0, abs=1.0) - is_same = torch.all(torch.eq(vtimebase, ref_result.vtimebase)).item() - self.assertEqual(is_same, True) + assert_equal(vtimebase, ref_result.vtimebase) if config.check_aframes and aframes.numel() > 0 and ref_result.aframes.numel() > 0: """Audio stream is available and audio frame is required to return from decoder""" - is_same = torch.all(torch.eq(aframes, ref_result.aframes)).item() - self.assertEqual(is_same, True) + assert_equal(aframes, ref_result.aframes) if config.check_aframe_pts and aframe_pts.numel() > 0 and ref_result.aframe_pts.numel() > 0: """Audio stream is available""" - is_same = torch.all(torch.eq(aframe_pts, ref_result.aframe_pts)).item() - self.assertEqual(is_same, True) + assert_equal(aframe_pts, ref_result.aframe_pts) - is_same = torch.all(torch.eq(atimebase, ref_result.atimebase)).item() - self.assertEqual(is_same, True) + assert_equal(atimebase, ref_result.atimebase) - @unittest.skip( - "This stress test will iteratively decode the same set of videos." - "It helps to detect memory leak but it takes lots of time to run." - "By default, it is disabled" - ) - def test_stress_test_read_video_from_file(self): + @pytest.mark.parametrize("test_video", test_videos.keys()) + def test_stress_test_read_video_from_file(self, test_video): + pytest.skip( + "This stress test will iteratively decode the same set of videos." + "It helps to detect memory leak but it takes lots of time to run." + "By default, it is disabled" + ) num_iter = 10000 # video related - width, height, min_dimension = 0, 0, 0 + width, height, min_dimension, max_dimension = 0, 0, 0, 0 video_start_pts, video_end_pts = 0, -1 video_timebase_num, video_timebase_den = 0, 1 # audio related @@ -384,56 +386,18 @@ def test_stress_test_read_video_from_file(self): audio_timebase_num, audio_timebase_den = 0, 1 for _i in range(num_iter): - for test_video, _config in test_videos.items(): - full_path = os.path.join(VIDEO_DIR, test_video) - - # pass 1: decode all frames using new decoder - torch.ops.video_reader.read_video_from_file( - full_path, - seek_frame_margin, - 0, # getPtsOnly - 1, # readVideoStream - width, - height, - min_dimension, - video_start_pts, - video_end_pts, - video_timebase_num, - video_timebase_den, - 1, # readAudioStream - samples, - channels, - audio_start_pts, - audio_end_pts, - audio_timebase_num, - audio_timebase_den, - ) - - def test_read_video_from_file(self): - """ - Test the case when decoder starts with a video file to decode frames. - """ - # video related - width, height, min_dimension = 0, 0, 0 - video_start_pts, video_end_pts = 0, -1 - video_timebase_num, video_timebase_den = 0, 1 - # audio related - samples, channels = 0, 0 - audio_start_pts, audio_end_pts = 0, -1 - audio_timebase_num, audio_timebase_den = 0, 1 - - for test_video, config in test_videos.items(): full_path = os.path.join(VIDEO_DIR, test_video) # pass 1: decode all frames using new decoder - tv_result = torch.ops.video_reader.read_video_from_file( + torch.ops.video_reader.read_video_from_file( full_path, - seek_frame_margin, + SEEK_FRAME_MARGIN, 0, # getPtsOnly 1, # readVideoStream width, height, min_dimension, + max_dimension, video_start_pts, video_end_pts, video_timebase_num, @@ -446,20 +410,63 @@ def test_read_video_from_file(self): audio_timebase_num, audio_timebase_den, ) - # pass 2: decode all frames using av - pyav_result = _decode_frames_by_av_module(full_path) - # check results from TorchVision decoder - self.check_separate_decoding_result(tv_result, config) - # compare decoding results - self.compare_decoding_result(tv_result, pyav_result, config) - def test_read_video_from_file_read_single_stream_only(self): + @pytest.mark.parametrize("test_video,config", test_videos.items()) + def test_read_video_from_file(self, test_video, config): + """ + Test the case when decoder starts with a video file to decode frames. + """ + # video related + width, height, min_dimension, max_dimension = 0, 0, 0, 0 + video_start_pts, video_end_pts = 0, -1 + video_timebase_num, video_timebase_den = 0, 1 + # audio related + samples, channels = 0, 0 + audio_start_pts, audio_end_pts = 0, -1 + audio_timebase_num, audio_timebase_den = 0, 1 + + full_path = os.path.join(VIDEO_DIR, test_video) + + # pass 1: decode all frames using new decoder + tv_result = torch.ops.video_reader.read_video_from_file( + full_path, + SEEK_FRAME_MARGIN, + 0, # getPtsOnly + 1, # readVideoStream + width, + height, + min_dimension, + max_dimension, + video_start_pts, + video_end_pts, + video_timebase_num, + video_timebase_den, + 1, # readAudioStream + samples, + channels, + audio_start_pts, + audio_end_pts, + audio_timebase_num, + audio_timebase_den, + ) + # pass 2: decode all frames using av + pyav_result = _decode_frames_by_av_module(full_path) + # check results from TorchVision decoder + self.check_separate_decoding_result(tv_result, config) + # compare decoding results + self.compare_decoding_result(tv_result, pyav_result, config) + + @pytest.mark.parametrize("test_video,config", test_videos.items()) + @pytest.mark.parametrize("read_video_stream,read_audio_stream", [(1, 0), (0, 1)]) + def test_read_video_from_file_read_single_stream_only( + self, test_video, config, read_video_stream, read_audio_stream + ): """ Test the case when decoder starts with a video file to decode frames, and only reads video stream and ignores audio stream """ # video related - width, height, min_dimension = 0, 0, 0 + width, height, min_dimension, max_dimension = 0, 0, 0, 0 video_start_pts, video_end_pts = 0, -1 video_timebase_num, video_timebase_den = 0, 1 # audio related @@ -467,52 +474,62 @@ def test_read_video_from_file_read_single_stream_only(self): audio_start_pts, audio_end_pts = 0, -1 audio_timebase_num, audio_timebase_den = 0, 1 - for test_video, config in test_videos.items(): - full_path = os.path.join(VIDEO_DIR, test_video) - for readVideoStream, readAudioStream in [(1, 0), (0, 1)]: - # decode all frames using new decoder - tv_result = torch.ops.video_reader.read_video_from_file( - full_path, - seek_frame_margin, - 0, # getPtsOnly - readVideoStream, - width, - height, - min_dimension, - video_start_pts, - video_end_pts, - video_timebase_num, - video_timebase_den, - readAudioStream, - samples, - channels, - audio_start_pts, - audio_end_pts, - audio_timebase_num, - audio_timebase_den, - ) - - vframes, vframe_pts, vtimebase, vfps, vduration, aframes, aframe_pts, \ - atimebase, asample_rate, aduration = tv_result - - self.assertEqual(vframes.numel() > 0, readVideoStream) - self.assertEqual(vframe_pts.numel() > 0, readVideoStream) - self.assertEqual(vtimebase.numel() > 0, readVideoStream) - self.assertEqual(vfps.numel() > 0, readVideoStream) - - expect_audio_data = readAudioStream == 1 and config.audio_sample_rate is not None - self.assertEqual(aframes.numel() > 0, expect_audio_data) - self.assertEqual(aframe_pts.numel() > 0, expect_audio_data) - self.assertEqual(atimebase.numel() > 0, expect_audio_data) - self.assertEqual(asample_rate.numel() > 0, expect_audio_data) - - def test_read_video_from_file_rescale_min_dimension(self): + full_path = os.path.join(VIDEO_DIR, test_video) + # decode all frames using new decoder + tv_result = torch.ops.video_reader.read_video_from_file( + full_path, + SEEK_FRAME_MARGIN, + 0, # getPtsOnly + read_video_stream, + width, + height, + min_dimension, + max_dimension, + video_start_pts, + video_end_pts, + video_timebase_num, + video_timebase_den, + read_audio_stream, + samples, + channels, + audio_start_pts, + audio_end_pts, + audio_timebase_num, + audio_timebase_den, + ) + + ( + vframes, + vframe_pts, + vtimebase, + vfps, + vduration, + aframes, + aframe_pts, + atimebase, + asample_rate, + aduration, + ) = tv_result + + assert (vframes.numel() > 0) is bool(read_video_stream) + assert (vframe_pts.numel() > 0) is bool(read_video_stream) + assert (vtimebase.numel() > 0) is bool(read_video_stream) + assert (vfps.numel() > 0) is bool(read_video_stream) + + expect_audio_data = read_audio_stream == 1 and config.audio_sample_rate is not None + assert (aframes.numel() > 0) is bool(expect_audio_data) + assert (aframe_pts.numel() > 0) is bool(expect_audio_data) + assert (atimebase.numel() > 0) is bool(expect_audio_data) + assert (asample_rate.numel() > 0) is bool(expect_audio_data) + + @pytest.mark.parametrize("test_video", test_videos.keys()) + def test_read_video_from_file_rescale_min_dimension(self, test_video): """ Test the case when decoder starts with a video file to decode frames, and video min dimension between height and width is set. """ # video related - width, height, min_dimension = 0, 0, 128 + width, height, min_dimension, max_dimension = 0, 0, 128, 0 video_start_pts, video_end_pts = 0, -1 video_timebase_num, video_timebase_den = 0, 1 # audio related @@ -520,38 +537,79 @@ def test_read_video_from_file_rescale_min_dimension(self): audio_start_pts, audio_end_pts = 0, -1 audio_timebase_num, audio_timebase_den = 0, 1 - for test_video, _config in test_videos.items(): - full_path = os.path.join(VIDEO_DIR, test_video) + full_path = os.path.join(VIDEO_DIR, test_video) + + tv_result = torch.ops.video_reader.read_video_from_file( + full_path, + SEEK_FRAME_MARGIN, + 0, # getPtsOnly + 1, # readVideoStream + width, + height, + min_dimension, + max_dimension, + video_start_pts, + video_end_pts, + video_timebase_num, + video_timebase_den, + 1, # readAudioStream + samples, + channels, + audio_start_pts, + audio_end_pts, + audio_timebase_num, + audio_timebase_den, + ) + assert min_dimension == min(tv_result[0].size(1), tv_result[0].size(2)) - tv_result = torch.ops.video_reader.read_video_from_file( - full_path, - seek_frame_margin, - 0, # getPtsOnly - 1, # readVideoStream - width, - height, - min_dimension, - video_start_pts, - video_end_pts, - video_timebase_num, - video_timebase_den, - 1, # readAudioStream - samples, - channels, - audio_start_pts, - audio_end_pts, - audio_timebase_num, - audio_timebase_den, - ) - self.assertEqual(min_dimension, min(tv_result[0].size(1), tv_result[0].size(2))) + @pytest.mark.parametrize("test_video", test_videos.keys()) + def test_read_video_from_file_rescale_max_dimension(self, test_video): + """ + Test the case when decoder starts with a video file to decode frames, and + video min dimension between height and width is set. + """ + # video related + width, height, min_dimension, max_dimension = 0, 0, 0, 85 + video_start_pts, video_end_pts = 0, -1 + video_timebase_num, video_timebase_den = 0, 1 + # audio related + samples, channels = 0, 0 + audio_start_pts, audio_end_pts = 0, -1 + audio_timebase_num, audio_timebase_den = 0, 1 - def test_read_video_from_file_rescale_width(self): + full_path = os.path.join(VIDEO_DIR, test_video) + + tv_result = torch.ops.video_reader.read_video_from_file( + full_path, + SEEK_FRAME_MARGIN, + 0, # getPtsOnly + 1, # readVideoStream + width, + height, + min_dimension, + max_dimension, + video_start_pts, + video_end_pts, + video_timebase_num, + video_timebase_den, + 1, # readAudioStream + samples, + channels, + audio_start_pts, + audio_end_pts, + audio_timebase_num, + audio_timebase_den, + ) + assert max_dimension == max(tv_result[0].size(1), tv_result[0].size(2)) + + @pytest.mark.parametrize("test_video", test_videos.keys()) + def test_read_video_from_file_rescale_both_min_max_dimension(self, test_video): """ Test the case when decoder starts with a video file to decode frames, and - video width is set. + video min dimension between height and width is set. """ # video related - width, height, min_dimension = 256, 0, 0 + width, height, min_dimension, max_dimension = 0, 0, 64, 85 video_start_pts, video_end_pts = 0, -1 video_timebase_num, video_timebase_den = 0, 1 # audio related @@ -559,38 +617,80 @@ def test_read_video_from_file_rescale_width(self): audio_start_pts, audio_end_pts = 0, -1 audio_timebase_num, audio_timebase_den = 0, 1 - for test_video, _config in test_videos.items(): - full_path = os.path.join(VIDEO_DIR, test_video) + full_path = os.path.join(VIDEO_DIR, test_video) + + tv_result = torch.ops.video_reader.read_video_from_file( + full_path, + SEEK_FRAME_MARGIN, + 0, # getPtsOnly + 1, # readVideoStream + width, + height, + min_dimension, + max_dimension, + video_start_pts, + video_end_pts, + video_timebase_num, + video_timebase_den, + 1, # readAudioStream + samples, + channels, + audio_start_pts, + audio_end_pts, + audio_timebase_num, + audio_timebase_den, + ) + assert min_dimension == min(tv_result[0].size(1), tv_result[0].size(2)) + assert max_dimension == max(tv_result[0].size(1), tv_result[0].size(2)) - tv_result = torch.ops.video_reader.read_video_from_file( - full_path, - seek_frame_margin, - 0, # getPtsOnly - 1, # readVideoStream - width, - height, - min_dimension, - video_start_pts, - video_end_pts, - video_timebase_num, - video_timebase_den, - 1, # readAudioStream - samples, - channels, - audio_start_pts, - audio_end_pts, - audio_timebase_num, - audio_timebase_den, - ) - self.assertEqual(tv_result[0].size(2), width) + @pytest.mark.parametrize("test_video", test_videos.keys()) + def test_read_video_from_file_rescale_width(self, test_video): + """ + Test the case when decoder starts with a video file to decode frames, and + video width is set. + """ + # video related + width, height, min_dimension, max_dimension = 256, 0, 0, 0 + video_start_pts, video_end_pts = 0, -1 + video_timebase_num, video_timebase_den = 0, 1 + # audio related + samples, channels = 0, 0 + audio_start_pts, audio_end_pts = 0, -1 + audio_timebase_num, audio_timebase_den = 0, 1 + + full_path = os.path.join(VIDEO_DIR, test_video) + + tv_result = torch.ops.video_reader.read_video_from_file( + full_path, + SEEK_FRAME_MARGIN, + 0, # getPtsOnly + 1, # readVideoStream + width, + height, + min_dimension, + max_dimension, + video_start_pts, + video_end_pts, + video_timebase_num, + video_timebase_den, + 1, # readAudioStream + samples, + channels, + audio_start_pts, + audio_end_pts, + audio_timebase_num, + audio_timebase_den, + ) + assert tv_result[0].size(2) == width - def test_read_video_from_file_rescale_height(self): + @pytest.mark.parametrize("test_video", test_videos.keys()) + def test_read_video_from_file_rescale_height(self, test_video): """ Test the case when decoder starts with a video file to decode frames, and video height is set. """ # video related - width, height, min_dimension = 0, 224, 0 + width, height, min_dimension, max_dimension = 0, 224, 0, 0 video_start_pts, video_end_pts = 0, -1 video_timebase_num, video_timebase_den = 0, 1 # audio related @@ -598,38 +698,39 @@ def test_read_video_from_file_rescale_height(self): audio_start_pts, audio_end_pts = 0, -1 audio_timebase_num, audio_timebase_den = 0, 1 - for test_video, _config in test_videos.items(): - full_path = os.path.join(VIDEO_DIR, test_video) - - tv_result = torch.ops.video_reader.read_video_from_file( - full_path, - seek_frame_margin, - 0, # getPtsOnly - 1, # readVideoStream - width, - height, - min_dimension, - video_start_pts, - video_end_pts, - video_timebase_num, - video_timebase_den, - 1, # readAudioStream - samples, - channels, - audio_start_pts, - audio_end_pts, - audio_timebase_num, - audio_timebase_den, - ) - self.assertEqual(tv_result[0].size(1), height) + full_path = os.path.join(VIDEO_DIR, test_video) + + tv_result = torch.ops.video_reader.read_video_from_file( + full_path, + SEEK_FRAME_MARGIN, + 0, # getPtsOnly + 1, # readVideoStream + width, + height, + min_dimension, + max_dimension, + video_start_pts, + video_end_pts, + video_timebase_num, + video_timebase_den, + 1, # readAudioStream + samples, + channels, + audio_start_pts, + audio_end_pts, + audio_timebase_num, + audio_timebase_den, + ) + assert tv_result[0].size(1) == height - def test_read_video_from_file_rescale_width_and_height(self): + @pytest.mark.parametrize("test_video", test_videos.keys()) + def test_read_video_from_file_rescale_width_and_height(self, test_video): """ Test the case when decoder starts with a video file to decode frames, and both video height and width are set. """ # video related - width, height, min_dimension = 320, 240, 0 + width, height, min_dimension, max_dimension = 320, 240, 0, 0 video_start_pts, video_end_pts = 0, -1 video_timebase_num, video_timebase_den = 0, 1 # audio related @@ -637,93 +738,97 @@ def test_read_video_from_file_rescale_width_and_height(self): audio_start_pts, audio_end_pts = 0, -1 audio_timebase_num, audio_timebase_den = 0, 1 - for test_video, _config in test_videos.items(): - full_path = os.path.join(VIDEO_DIR, test_video) - - tv_result = torch.ops.video_reader.read_video_from_file( - full_path, - seek_frame_margin, - 0, # getPtsOnly - 1, # readVideoStream - width, - height, - min_dimension, - video_start_pts, - video_end_pts, - video_timebase_num, - video_timebase_den, - 1, # readAudioStream - samples, - channels, - audio_start_pts, - audio_end_pts, - audio_timebase_num, - audio_timebase_den, - ) - self.assertEqual(tv_result[0].size(1), height) - self.assertEqual(tv_result[0].size(2), width) + full_path = os.path.join(VIDEO_DIR, test_video) + + tv_result = torch.ops.video_reader.read_video_from_file( + full_path, + SEEK_FRAME_MARGIN, + 0, # getPtsOnly + 1, # readVideoStream + width, + height, + min_dimension, + max_dimension, + video_start_pts, + video_end_pts, + video_timebase_num, + video_timebase_den, + 1, # readAudioStream + samples, + channels, + audio_start_pts, + audio_end_pts, + audio_timebase_num, + audio_timebase_den, + ) + assert tv_result[0].size(1) == height + assert tv_result[0].size(2) == width - def test_read_video_from_file_audio_resampling(self): + @pytest.mark.parametrize("test_video", test_videos.keys()) + @pytest.mark.parametrize("samples", [9600, 96000]) + def test_read_video_from_file_audio_resampling(self, test_video, samples): """ Test the case when decoder starts with a video file to decode frames, and audio waveform are resampled """ + # video related + width, height, min_dimension, max_dimension = 0, 0, 0, 0 + video_start_pts, video_end_pts = 0, -1 + video_timebase_num, video_timebase_den = 0, 1 + # audio related + channels = 0 + audio_start_pts, audio_end_pts = 0, -1 + audio_timebase_num, audio_timebase_den = 0, 1 - for samples in [ - 9600, # downsampling - 96000, # upsampling - ]: - # video related - width, height, min_dimension = 0, 0, 0 - video_start_pts, video_end_pts = 0, -1 - video_timebase_num, video_timebase_den = 0, 1 - # audio related - channels = 0 - audio_start_pts, audio_end_pts = 0, -1 - audio_timebase_num, audio_timebase_den = 0, 1 - - for test_video, _config in test_videos.items(): - full_path = os.path.join(VIDEO_DIR, test_video) - - tv_result = torch.ops.video_reader.read_video_from_file( - full_path, - seek_frame_margin, - 0, # getPtsOnly - 1, # readVideoStream - width, - height, - min_dimension, - video_start_pts, - video_end_pts, - video_timebase_num, - video_timebase_den, - 1, # readAudioStream - samples, - channels, - audio_start_pts, - audio_end_pts, - audio_timebase_num, - audio_timebase_den, - ) - vframes, vframe_pts, vtimebase, vfps, vduration, aframes, aframe_pts, \ - atimebase, asample_rate, aduration = tv_result - if aframes.numel() > 0: - self.assertEqual(samples, asample_rate.item()) - self.assertEqual(1, aframes.size(1)) - # when audio stream is found - duration = float(aframe_pts[-1]) * float(atimebase[0]) / float(atimebase[1]) - self.assertAlmostEqual( - aframes.size(0), - int(duration * asample_rate.item()), - delta=0.1 * asample_rate.item(), - ) - - def test_compare_read_video_from_memory_and_file(self): + full_path = os.path.join(VIDEO_DIR, test_video) + + tv_result = torch.ops.video_reader.read_video_from_file( + full_path, + SEEK_FRAME_MARGIN, + 0, # getPtsOnly + 1, # readVideoStream + width, + height, + min_dimension, + max_dimension, + video_start_pts, + video_end_pts, + video_timebase_num, + video_timebase_den, + 1, # readAudioStream + samples, + channels, + audio_start_pts, + audio_end_pts, + audio_timebase_num, + audio_timebase_den, + ) + ( + vframes, + vframe_pts, + vtimebase, + vfps, + vduration, + aframes, + aframe_pts, + atimebase, + asample_rate, + aduration, + ) = tv_result + if aframes.numel() > 0: + assert samples == asample_rate.item() + assert 1 == aframes.size(1) + # when audio stream is found + duration = float(aframe_pts[-1]) * float(atimebase[0]) / float(atimebase[1]) + assert aframes.size(0) == approx(int(duration * asample_rate.item()), abs=0.1 * asample_rate.item()) + + @pytest.mark.parametrize("test_video,config", test_videos.items()) + def test_compare_read_video_from_memory_and_file(self, test_video, config): """ Test the case when video is already in memory, and decoder reads data in memory """ # video related - width, height, min_dimension = 0, 0, 0 + width, height, min_dimension, max_dimension = 0, 0, 0, 0 video_start_pts, video_end_pts = 0, -1 video_timebase_num, video_timebase_den = 0, 1 # audio related @@ -731,63 +836,65 @@ def test_compare_read_video_from_memory_and_file(self): audio_start_pts, audio_end_pts = 0, -1 audio_timebase_num, audio_timebase_den = 0, 1 - for test_video, config in test_videos.items(): - full_path, video_tensor = _get_video_tensor(VIDEO_DIR, test_video) - - # pass 1: decode all frames using cpp decoder - tv_result_memory = torch.ops.video_reader.read_video_from_memory( - video_tensor, - seek_frame_margin, - 0, # getPtsOnly - 1, # readVideoStream - width, - height, - min_dimension, - video_start_pts, - video_end_pts, - video_timebase_num, - video_timebase_den, - 1, # readAudioStream - samples, - channels, - audio_start_pts, - audio_end_pts, - audio_timebase_num, - audio_timebase_den, - ) - self.check_separate_decoding_result(tv_result_memory, config) - # pass 2: decode all frames from file - tv_result_file = torch.ops.video_reader.read_video_from_file( - full_path, - seek_frame_margin, - 0, # getPtsOnly - 1, # readVideoStream - width, - height, - min_dimension, - video_start_pts, - video_end_pts, - video_timebase_num, - video_timebase_den, - 1, # readAudioStream - samples, - channels, - audio_start_pts, - audio_end_pts, - audio_timebase_num, - audio_timebase_den, - ) + full_path, video_tensor = _get_video_tensor(VIDEO_DIR, test_video) + + # pass 1: decode all frames using cpp decoder + tv_result_memory = torch.ops.video_reader.read_video_from_memory( + video_tensor, + SEEK_FRAME_MARGIN, + 0, # getPtsOnly + 1, # readVideoStream + width, + height, + min_dimension, + max_dimension, + video_start_pts, + video_end_pts, + video_timebase_num, + video_timebase_den, + 1, # readAudioStream + samples, + channels, + audio_start_pts, + audio_end_pts, + audio_timebase_num, + audio_timebase_den, + ) + self.check_separate_decoding_result(tv_result_memory, config) + # pass 2: decode all frames from file + tv_result_file = torch.ops.video_reader.read_video_from_file( + full_path, + SEEK_FRAME_MARGIN, + 0, # getPtsOnly + 1, # readVideoStream + width, + height, + min_dimension, + max_dimension, + video_start_pts, + video_end_pts, + video_timebase_num, + video_timebase_den, + 1, # readAudioStream + samples, + channels, + audio_start_pts, + audio_end_pts, + audio_timebase_num, + audio_timebase_den, + ) - self.check_separate_decoding_result(tv_result_file, config) - # finally, compare results decoded from memory and file - self.compare_decoding_result(tv_result_memory, tv_result_file) + self.check_separate_decoding_result(tv_result_file, config) + # finally, compare results decoded from memory and file + self.compare_decoding_result(tv_result_memory, tv_result_file) - def test_read_video_from_memory(self): + @pytest.mark.parametrize("test_video,config", test_videos.items()) + def test_read_video_from_memory(self, test_video, config): """ Test the case when video is already in memory, and decoder reads data in memory """ # video related - width, height, min_dimension = 0, 0, 0 + width, height, min_dimension, max_dimension = 0, 0, 0, 0 video_start_pts, video_end_pts = 0, -1 video_timebase_num, video_timebase_den = 0, 1 # audio related @@ -795,44 +902,45 @@ def test_read_video_from_memory(self): audio_start_pts, audio_end_pts = 0, -1 audio_timebase_num, audio_timebase_den = 0, 1 - for test_video, config in test_videos.items(): - full_path, video_tensor = _get_video_tensor(VIDEO_DIR, test_video) - - # pass 1: decode all frames using cpp decoder - tv_result = torch.ops.video_reader.read_video_from_memory( - video_tensor, - seek_frame_margin, - 0, # getPtsOnly - 1, # readVideoStream - width, - height, - min_dimension, - video_start_pts, - video_end_pts, - video_timebase_num, - video_timebase_den, - 1, # readAudioStream - samples, - channels, - audio_start_pts, - audio_end_pts, - audio_timebase_num, - audio_timebase_den, - ) - # pass 2: decode all frames using av - pyav_result = _decode_frames_by_av_module(full_path) + full_path, video_tensor = _get_video_tensor(VIDEO_DIR, test_video) + + # pass 1: decode all frames using cpp decoder + tv_result = torch.ops.video_reader.read_video_from_memory( + video_tensor, + SEEK_FRAME_MARGIN, + 0, # getPtsOnly + 1, # readVideoStream + width, + height, + min_dimension, + max_dimension, + video_start_pts, + video_end_pts, + video_timebase_num, + video_timebase_den, + 1, # readAudioStream + samples, + channels, + audio_start_pts, + audio_end_pts, + audio_timebase_num, + audio_timebase_den, + ) + # pass 2: decode all frames using av + pyav_result = _decode_frames_by_av_module(full_path) - self.check_separate_decoding_result(tv_result, config) - self.compare_decoding_result(tv_result, pyav_result, config) + self.check_separate_decoding_result(tv_result, config) + self.compare_decoding_result(tv_result, pyav_result, config) - def test_read_video_from_memory_get_pts_only(self): + @pytest.mark.parametrize("test_video,config", test_videos.items()) + def test_read_video_from_memory_get_pts_only(self, test_video, config): """ Test the case when video is already in memory, and decoder reads data in memory. Compare frame pts between decoding for pts only and full decoding for both pts and frame data """ # video related - width, height, min_dimension = 0, 0, 0 + width, height, min_dimension, max_dimension = 0, 0, 0, 0 video_start_pts, video_end_pts = 0, -1 video_timebase_num, video_timebase_den = 0, 1 # audio related @@ -840,210 +948,307 @@ def test_read_video_from_memory_get_pts_only(self): audio_start_pts, audio_end_pts = 0, -1 audio_timebase_num, audio_timebase_den = 0, 1 - for test_video, config in test_videos.items(): - full_path, video_tensor = _get_video_tensor(VIDEO_DIR, test_video) - - # pass 1: decode all frames using cpp decoder - tv_result = torch.ops.video_reader.read_video_from_memory( - video_tensor, - seek_frame_margin, - 0, # getPtsOnly - 1, # readVideoStream - width, - height, - min_dimension, - video_start_pts, - video_end_pts, - video_timebase_num, - video_timebase_den, - 1, # readAudioStream - samples, - channels, - audio_start_pts, - audio_end_pts, - audio_timebase_num, - audio_timebase_den, - ) - self.assertAlmostEqual(config.video_fps, tv_result[3].item(), delta=0.01) - - # pass 2: decode all frames to get PTS only using cpp decoder - tv_result_pts_only = torch.ops.video_reader.read_video_from_memory( - video_tensor, - seek_frame_margin, - 1, # getPtsOnly - 1, # readVideoStream - width, - height, - min_dimension, - video_start_pts, - video_end_pts, - video_timebase_num, - video_timebase_den, - 1, # readAudioStream - samples, - channels, - audio_start_pts, - audio_end_pts, - audio_timebase_num, - audio_timebase_den, - ) + _, video_tensor = _get_video_tensor(VIDEO_DIR, test_video) + + # pass 1: decode all frames using cpp decoder + tv_result = torch.ops.video_reader.read_video_from_memory( + video_tensor, + SEEK_FRAME_MARGIN, + 0, # getPtsOnly + 1, # readVideoStream + width, + height, + min_dimension, + max_dimension, + video_start_pts, + video_end_pts, + video_timebase_num, + video_timebase_den, + 1, # readAudioStream + samples, + channels, + audio_start_pts, + audio_end_pts, + audio_timebase_num, + audio_timebase_den, + ) + assert abs(config.video_fps - tv_result[3].item()) < 0.01 + + # pass 2: decode all frames to get PTS only using cpp decoder + tv_result_pts_only = torch.ops.video_reader.read_video_from_memory( + video_tensor, + SEEK_FRAME_MARGIN, + 1, # getPtsOnly + 1, # readVideoStream + width, + height, + min_dimension, + max_dimension, + video_start_pts, + video_end_pts, + video_timebase_num, + video_timebase_den, + 1, # readAudioStream + samples, + channels, + audio_start_pts, + audio_end_pts, + audio_timebase_num, + audio_timebase_den, + ) - self.assertEqual(tv_result_pts_only[0].numel(), 0) - self.assertEqual(tv_result_pts_only[5].numel(), 0) - self.compare_decoding_result(tv_result, tv_result_pts_only) + assert not tv_result_pts_only[0].numel() + assert not tv_result_pts_only[5].numel() + self.compare_decoding_result(tv_result, tv_result_pts_only) - def test_read_video_in_range_from_memory(self): + @pytest.mark.parametrize("test_video,config", test_videos.items()) + @pytest.mark.parametrize("num_frames", [4, 8, 16, 32, 64, 128]) + def test_read_video_in_range_from_memory(self, test_video, config, num_frames): """ Test the case when video is already in memory, and decoder reads data in memory. In addition, decoder takes meaningful start- and end PTS as input, and decode frames within that interval """ - for test_video, config in test_videos.items(): - full_path, video_tensor = _get_video_tensor(VIDEO_DIR, test_video) - # video related - width, height, min_dimension = 0, 0, 0 - video_start_pts, video_end_pts = 0, -1 - video_timebase_num, video_timebase_den = 0, 1 - # audio related - samples, channels = 0, 0 - audio_start_pts, audio_end_pts = 0, -1 - audio_timebase_num, audio_timebase_den = 0, 1 - # pass 1: decode all frames using new decoder - tv_result = torch.ops.video_reader.read_video_from_memory( - video_tensor, - seek_frame_margin, - 0, # getPtsOnly - 1, # readVideoStream - width, - height, - min_dimension, - video_start_pts, - video_end_pts, - video_timebase_num, - video_timebase_den, - 1, # readAudioStream - samples, - channels, - audio_start_pts, - audio_end_pts, - audio_timebase_num, - audio_timebase_den, + full_path, video_tensor = _get_video_tensor(VIDEO_DIR, test_video) + # video related + width, height, min_dimension, max_dimension = 0, 0, 0, 0 + video_start_pts, video_end_pts = 0, -1 + video_timebase_num, video_timebase_den = 0, 1 + # audio related + samples, channels = 0, 0 + audio_start_pts, audio_end_pts = 0, -1 + audio_timebase_num, audio_timebase_den = 0, 1 + # pass 1: decode all frames using new decoder + tv_result = torch.ops.video_reader.read_video_from_memory( + video_tensor, + SEEK_FRAME_MARGIN, + 0, # getPtsOnly + 1, # readVideoStream + width, + height, + min_dimension, + max_dimension, + video_start_pts, + video_end_pts, + video_timebase_num, + video_timebase_den, + 1, # readAudioStream + samples, + channels, + audio_start_pts, + audio_end_pts, + audio_timebase_num, + audio_timebase_den, + ) + ( + vframes, + vframe_pts, + vtimebase, + vfps, + vduration, + aframes, + aframe_pts, + atimebase, + asample_rate, + aduration, + ) = tv_result + assert abs(config.video_fps - vfps.item()) < 0.01 + + start_pts_ind_max = vframe_pts.size(0) - num_frames + if start_pts_ind_max <= 0: + return + # randomly pick start pts + start_pts_ind = randint(0, start_pts_ind_max) + end_pts_ind = start_pts_ind + num_frames - 1 + video_start_pts = vframe_pts[start_pts_ind] + video_end_pts = vframe_pts[end_pts_ind] + + video_timebase_num, video_timebase_den = vtimebase[0], vtimebase[1] + if len(atimebase) > 0: + # when audio stream is available + audio_timebase_num, audio_timebase_den = atimebase[0], atimebase[1] + audio_start_pts = _pts_convert( + video_start_pts.item(), + Fraction(video_timebase_num.item(), video_timebase_den.item()), + Fraction(audio_timebase_num.item(), audio_timebase_den.item()), + math.floor, + ) + audio_end_pts = _pts_convert( + video_end_pts.item(), + Fraction(video_timebase_num.item(), video_timebase_den.item()), + Fraction(audio_timebase_num.item(), audio_timebase_den.item()), + math.ceil, ) - vframes, vframe_pts, vtimebase, vfps, vduration, aframes, aframe_pts, \ - atimebase, asample_rate, aduration = tv_result - self.assertAlmostEqual(config.video_fps, vfps.item(), delta=0.01) - - for num_frames in [4, 8, 16, 32, 64, 128]: - start_pts_ind_max = vframe_pts.size(0) - num_frames - if start_pts_ind_max <= 0: - continue - # randomly pick start pts - start_pts_ind = randint(0, start_pts_ind_max) - end_pts_ind = start_pts_ind + num_frames - 1 - video_start_pts = vframe_pts[start_pts_ind] - video_end_pts = vframe_pts[end_pts_ind] - - video_timebase_num, video_timebase_den = vtimebase[0], vtimebase[1] - if len(atimebase) > 0: - # when audio stream is available - audio_timebase_num, audio_timebase_den = atimebase[0], atimebase[1] - audio_start_pts = _pts_convert( - video_start_pts.item(), - Fraction(video_timebase_num.item(), video_timebase_den.item()), - Fraction(audio_timebase_num.item(), audio_timebase_den.item()), - math.floor, - ) - audio_end_pts = _pts_convert( - video_end_pts.item(), - Fraction(video_timebase_num.item(), video_timebase_den.item()), - Fraction(audio_timebase_num.item(), audio_timebase_den.item()), - math.ceil, - ) - - # pass 2: decode frames in the randomly generated range - tv_result = torch.ops.video_reader.read_video_from_memory( - video_tensor, - seek_frame_margin, - 0, # getPtsOnly - 1, # readVideoStream - width, - height, - min_dimension, - video_start_pts, - video_end_pts, - video_timebase_num, - video_timebase_den, - 1, # readAudioStream - samples, - channels, - audio_start_pts, - audio_end_pts, - audio_timebase_num, - audio_timebase_den, - ) - - # pass 3: decode frames in range using PyAv - video_timebase_av, audio_timebase_av = _get_timebase_by_av_module(full_path) - - video_start_pts_av = _pts_convert( - video_start_pts.item(), - Fraction(video_timebase_num.item(), video_timebase_den.item()), - Fraction(video_timebase_av.numerator, video_timebase_av.denominator), - math.floor, - ) - video_end_pts_av = _pts_convert( - video_end_pts.item(), - Fraction(video_timebase_num.item(), video_timebase_den.item()), - Fraction(video_timebase_av.numerator, video_timebase_av.denominator), - math.ceil, - ) - if audio_timebase_av: - audio_start_pts = _pts_convert( - video_start_pts.item(), - Fraction(video_timebase_num.item(), video_timebase_den.item()), - Fraction(audio_timebase_av.numerator, audio_timebase_av.denominator), - math.floor, - ) - audio_end_pts = _pts_convert( - video_end_pts.item(), - Fraction(video_timebase_num.item(), video_timebase_den.item()), - Fraction(audio_timebase_av.numerator, audio_timebase_av.denominator), - math.ceil, - ) - - pyav_result = _decode_frames_by_av_module( - full_path, - video_start_pts_av, - video_end_pts_av, - audio_start_pts, - audio_end_pts, - ) - - self.assertEqual(tv_result[0].size(0), num_frames) - if pyav_result.vframes.size(0) == num_frames: - # if PyAv decodes a different number of video frames, skip - # comparing the decoding results between Torchvision video reader - # and PyAv - self.compare_decoding_result(tv_result, pyav_result, config) - - def test_probe_video_from_file(self): + + # pass 2: decode frames in the randomly generated range + tv_result = torch.ops.video_reader.read_video_from_memory( + video_tensor, + SEEK_FRAME_MARGIN, + 0, # getPtsOnly + 1, # readVideoStream + width, + height, + min_dimension, + max_dimension, + video_start_pts, + video_end_pts, + video_timebase_num, + video_timebase_den, + 1, # readAudioStream + samples, + channels, + audio_start_pts, + audio_end_pts, + audio_timebase_num, + audio_timebase_den, + ) + + # pass 3: decode frames in range using PyAv + video_timebase_av, audio_timebase_av = _get_timebase_by_av_module(full_path) + + video_start_pts_av = _pts_convert( + video_start_pts.item(), + Fraction(video_timebase_num.item(), video_timebase_den.item()), + Fraction(video_timebase_av.numerator, video_timebase_av.denominator), + math.floor, + ) + video_end_pts_av = _pts_convert( + video_end_pts.item(), + Fraction(video_timebase_num.item(), video_timebase_den.item()), + Fraction(video_timebase_av.numerator, video_timebase_av.denominator), + math.ceil, + ) + if audio_timebase_av: + audio_start_pts = _pts_convert( + video_start_pts.item(), + Fraction(video_timebase_num.item(), video_timebase_den.item()), + Fraction(audio_timebase_av.numerator, audio_timebase_av.denominator), + math.floor, + ) + audio_end_pts = _pts_convert( + video_end_pts.item(), + Fraction(video_timebase_num.item(), video_timebase_den.item()), + Fraction(audio_timebase_av.numerator, audio_timebase_av.denominator), + math.ceil, + ) + + pyav_result = _decode_frames_by_av_module( + full_path, + video_start_pts_av, + video_end_pts_av, + audio_start_pts, + audio_end_pts, + ) + + assert tv_result[0].size(0) == num_frames + if pyav_result.vframes.size(0) == num_frames: + # if PyAv decodes a different number of video frames, skip + # comparing the decoding results between Torchvision video reader + # and PyAv + self.compare_decoding_result(tv_result, pyav_result, config) + + @pytest.mark.parametrize("test_video,config", test_videos.items()) + def test_probe_video_from_file(self, test_video, config): """ Test the case when decoder probes a video file """ - for test_video, config in test_videos.items(): - full_path = os.path.join(VIDEO_DIR, test_video) - probe_result = torch.ops.video_reader.probe_video_from_file(full_path) - self.check_probe_result(probe_result, config) + full_path = os.path.join(VIDEO_DIR, test_video) + probe_result = torch.ops.video_reader.probe_video_from_file(full_path) + self.check_probe_result(probe_result, config) - def test_probe_video_from_memory(self): + @pytest.mark.parametrize("test_video,config", test_videos.items()) + def test_probe_video_from_memory(self, test_video, config): """ Test the case when decoder probes a video in memory """ - for test_video, config in test_videos.items(): - full_path, video_tensor = _get_video_tensor(VIDEO_DIR, test_video) - probe_result = torch.ops.video_reader.probe_video_from_memory(video_tensor) - self.check_probe_result(probe_result, config) + _, video_tensor = _get_video_tensor(VIDEO_DIR, test_video) + probe_result = torch.ops.video_reader.probe_video_from_memory(video_tensor) + self.check_probe_result(probe_result, config) + + @pytest.mark.parametrize("test_video,config", test_videos.items()) + def test_probe_video_from_memory_script(self, test_video, config): + scripted_fun = torch.jit.script(io._probe_video_from_memory) + assert scripted_fun is not None + _, video_tensor = _get_video_tensor(VIDEO_DIR, test_video) + probe_result = scripted_fun(video_tensor) + self.check_meta_result(probe_result, config) -if __name__ == '__main__': - unittest.main() + @pytest.mark.parametrize("test_video", test_videos.keys()) + def test_read_video_from_memory_scripted(self, test_video): + """ + Test the case when video is already in memory, and decoder reads data in memory + """ + # video related + width, height, min_dimension, max_dimension = 0, 0, 0, 0 + video_start_pts, video_end_pts = 0, -1 + video_timebase_num, video_timebase_den = 0, 1 + # audio related + samples, channels = 0, 0 + audio_start_pts, audio_end_pts = 0, -1 + audio_timebase_num, audio_timebase_den = 0, 1 + + scripted_fun = torch.jit.script(io._read_video_from_memory) + assert scripted_fun is not None + + _, video_tensor = _get_video_tensor(VIDEO_DIR, test_video) + + # decode all frames using cpp decoder + scripted_fun( + video_tensor, + SEEK_FRAME_MARGIN, + 1, # readVideoStream + width, + height, + min_dimension, + max_dimension, + [video_start_pts, video_end_pts], + video_timebase_num, + video_timebase_den, + 1, # readAudioStream + samples, + channels, + [audio_start_pts, audio_end_pts], + audio_timebase_num, + audio_timebase_den, + ) + # FUTURE: check value of video / audio frames + + def test_invalid_file(self): + set_video_backend("video_reader") + with pytest.raises(RuntimeError): + io.read_video("foo.mp4") + + set_video_backend("pyav") + with pytest.raises(RuntimeError): + io.read_video("foo.mp4") + + @pytest.mark.parametrize("test_video", test_videos.keys()) + @pytest.mark.parametrize("backend", ["video_reader", "pyav"]) + @pytest.mark.parametrize("start_offset", [0, 500]) + @pytest.mark.parametrize("end_offset", [3000, None]) + def test_audio_present_pts(self, test_video, backend, start_offset, end_offset): + """Test if audio frames are returned with pts unit.""" + full_path = os.path.join(VIDEO_DIR, test_video) + container = av.open(full_path) + if container.streams.audio: + set_video_backend(backend) + _, audio, _ = io.read_video(full_path, start_offset, end_offset, pts_unit="pts") + assert all([dimension > 0 for dimension in audio.shape[:2]]) + + @pytest.mark.parametrize("test_video", test_videos.keys()) + @pytest.mark.parametrize("backend", ["video_reader", "pyav"]) + @pytest.mark.parametrize("start_offset", [0, 0.1]) + @pytest.mark.parametrize("end_offset", [0.3, None]) + def test_audio_present_sec(self, test_video, backend, start_offset, end_offset): + """Test if audio frames are returned with sec unit.""" + full_path = os.path.join(VIDEO_DIR, test_video) + container = av.open(full_path) + if container.streams.audio: + set_video_backend(backend) + _, audio, _ = io.read_video(full_path, start_offset, end_offset, pts_unit="sec") + assert all([dimension > 0 for dimension in audio.shape[:2]]) + + +if __name__ == "__main__": + pytest.main([__file__]) diff --git a/test/test_videoapi.py b/test/test_videoapi.py new file mode 100644 index 00000000000..aabcf6407f7 --- /dev/null +++ b/test/test_videoapi.py @@ -0,0 +1,312 @@ +import collections +import os +import urllib + +import pytest +import torch +import torchvision +from pytest import approx +from torchvision.datasets.utils import download_url +from torchvision.io import _HAS_CPU_VIDEO_DECODER, VideoReader + + +# WARNING: these tests have been skipped forever on the CI because the video ops +# are never properly available. This is bad, but things have been in a terrible +# state for a long time already as we write this comment, and we'll hopefully be +# able to get rid of this all soon. + + +try: + import av + + # Do a version test too + torchvision.io.video._check_av_available() +except ImportError: + av = None + + +VIDEO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "videos") + +CheckerConfig = ["duration", "video_fps", "audio_sample_rate"] +GroundTruth = collections.namedtuple("GroundTruth", " ".join(CheckerConfig)) + + +def backends(): + backends_ = ["video_reader"] + if av is not None: + backends_.append("pyav") + return backends_ + + +def fate(name, path="."): + """Download and return a path to a sample from the FFmpeg test suite. + See the `FFmpeg Automated Test Environment `_ + """ + + file_name = name.split("/")[1] + download_url("http://fate.ffmpeg.org/fate-suite/" + name, path, file_name) + return os.path.join(path, file_name) + + +test_videos = { + "RATRACE_wave_f_nm_np1_fr_goo_37.avi": GroundTruth(duration=2.0, video_fps=30.0, audio_sample_rate=None), + "SchoolRulesHowTheyHelpUs_wave_f_nm_np1_ba_med_0.avi": GroundTruth( + duration=2.0, video_fps=30.0, audio_sample_rate=None + ), + "TrumanShow_wave_f_nm_np1_fr_med_26.avi": GroundTruth(duration=2.0, video_fps=30.0, audio_sample_rate=None), + "v_SoccerJuggling_g23_c01.avi": GroundTruth(duration=8.0, video_fps=29.97, audio_sample_rate=None), + "v_SoccerJuggling_g24_c01.avi": GroundTruth(duration=8.0, video_fps=29.97, audio_sample_rate=None), + "R6llTwEh07w.mp4": GroundTruth(duration=10.0, video_fps=30.0, audio_sample_rate=44100), + "SOX5yA1l24A.mp4": GroundTruth(duration=11.0, video_fps=29.97, audio_sample_rate=48000), + "WUzgd7C1pWA.mp4": GroundTruth(duration=11.0, video_fps=29.97, audio_sample_rate=48000), +} + + +@pytest.mark.skipif(_HAS_CPU_VIDEO_DECODER is False, reason="Didn't compile with ffmpeg") +class TestVideoApi: + @pytest.mark.skipif(av is None, reason="PyAV unavailable") + @pytest.mark.parametrize("test_video", test_videos.keys()) + @pytest.mark.parametrize("backend", backends()) + def test_frame_reading(self, test_video, backend): + torchvision.set_video_backend(backend) + full_path = os.path.join(VIDEO_DIR, test_video) + with av.open(full_path) as av_reader: + if av_reader.streams.video: + av_frames, vr_frames = [], [] + av_pts, vr_pts = [], [] + # get av frames + for av_frame in av_reader.decode(av_reader.streams.video[0]): + av_frames.append(torch.tensor(av_frame.to_rgb().to_ndarray()).permute(2, 0, 1)) + av_pts.append(av_frame.pts * av_frame.time_base) + + # get vr frames + video_reader = VideoReader(full_path, "video") + for vr_frame in video_reader: + vr_frames.append(vr_frame["data"]) + vr_pts.append(vr_frame["pts"]) + + # same number of frames + assert len(vr_frames) == len(av_frames) + assert len(vr_pts) == len(av_pts) + + # compare the frames and ptss + for i in range(len(vr_frames)): + assert float(av_pts[i]) == approx(vr_pts[i], abs=0.1) + + mean_delta = torch.mean(torch.abs(av_frames[i].float() - vr_frames[i].float())) + # on average the difference is very small and caused + # by decoding (around 1%) + # TODO: asses empirically how to set this? atm it's 1% + # averaged over all frames + assert mean_delta.item() < 2.55 + + del vr_frames, av_frames, vr_pts, av_pts + + # test audio reading compared to PYAV + with av.open(full_path) as av_reader: + if av_reader.streams.audio: + av_frames, vr_frames = [], [] + av_pts, vr_pts = [], [] + # get av frames + for av_frame in av_reader.decode(av_reader.streams.audio[0]): + av_frames.append(torch.tensor(av_frame.to_ndarray()).permute(1, 0)) + av_pts.append(av_frame.pts * av_frame.time_base) + av_reader.close() + + # get vr frames + video_reader = VideoReader(full_path, "audio") + for vr_frame in video_reader: + vr_frames.append(vr_frame["data"]) + vr_pts.append(vr_frame["pts"]) + + # same number of frames + assert len(vr_frames) == len(av_frames) + assert len(vr_pts) == len(av_pts) + + # compare the frames and ptss + for i in range(len(vr_frames)): + assert float(av_pts[i]) == approx(vr_pts[i], abs=0.1) + max_delta = torch.max(torch.abs(av_frames[i].float() - vr_frames[i].float())) + # we assure that there is never more than 1% difference in signal + assert max_delta.item() < 0.001 + + @pytest.mark.parametrize("stream", ["video", "audio"]) + @pytest.mark.parametrize("test_video", test_videos.keys()) + @pytest.mark.parametrize("backend", backends()) + def test_frame_reading_mem_vs_file(self, test_video, stream, backend): + torchvision.set_video_backend(backend) + full_path = os.path.join(VIDEO_DIR, test_video) + + reader = VideoReader(full_path) + reader_md = reader.get_metadata() + + if stream in reader_md: + # Test video reading from file vs from memory + vr_frames, vr_frames_mem = [], [] + vr_pts, vr_pts_mem = [], [] + # get vr frames + video_reader = VideoReader(full_path, stream) + for vr_frame in video_reader: + vr_frames.append(vr_frame["data"]) + vr_pts.append(vr_frame["pts"]) + + # get vr frames = read from memory + f = open(full_path, "rb") + fbytes = f.read() + f.close() + video_reader_from_mem = VideoReader(fbytes, stream) + + for vr_frame_from_mem in video_reader_from_mem: + vr_frames_mem.append(vr_frame_from_mem["data"]) + vr_pts_mem.append(vr_frame_from_mem["pts"]) + + # same number of frames + assert len(vr_frames) == len(vr_frames_mem) + assert len(vr_pts) == len(vr_pts_mem) + + # compare the frames and ptss + for i in range(len(vr_frames)): + assert vr_pts[i] == vr_pts_mem[i] + mean_delta = torch.mean(torch.abs(vr_frames[i].float() - vr_frames_mem[i].float())) + # on average the difference is very small and caused + # by decoding (around 1%) + # TODO: asses empirically how to set this? atm it's 1% + # averaged over all frames + assert mean_delta.item() < 2.55 + + del vr_frames, vr_pts, vr_frames_mem, vr_pts_mem + else: + del reader, reader_md + + @pytest.mark.parametrize("test_video,config", test_videos.items()) + @pytest.mark.parametrize("backend", backends()) + def test_metadata(self, test_video, config, backend): + """ + Test that the metadata returned via pyav corresponds to the one returned + by the new video decoder API + """ + torchvision.set_video_backend(backend) + full_path = os.path.join(VIDEO_DIR, test_video) + reader = VideoReader(full_path, "video") + reader_md = reader.get_metadata() + assert config.video_fps == approx(reader_md["video"]["fps"][0], abs=0.0001) + assert config.duration == approx(reader_md["video"]["duration"][0], abs=0.5) + + @pytest.mark.parametrize("test_video", test_videos.keys()) + @pytest.mark.parametrize("backend", backends()) + def test_seek_start(self, test_video, backend): + torchvision.set_video_backend(backend) + full_path = os.path.join(VIDEO_DIR, test_video) + video_reader = VideoReader(full_path, "video") + num_frames = 0 + for _ in video_reader: + num_frames += 1 + + # now seek the container to 0 and do it again + # It's often that starting seek can be inprecise + # this way and it doesn't start at 0 + video_reader.seek(0) + start_num_frames = 0 + for _ in video_reader: + start_num_frames += 1 + + assert start_num_frames == num_frames + + # now seek the container to < 0 to check for unexpected behaviour + video_reader.seek(-1) + start_num_frames = 0 + for _ in video_reader: + start_num_frames += 1 + + assert start_num_frames == num_frames + + @pytest.mark.parametrize("test_video", test_videos.keys()) + @pytest.mark.parametrize("backend", ["video_reader"]) + def test_accurateseek_middle(self, test_video, backend): + torchvision.set_video_backend(backend) + full_path = os.path.join(VIDEO_DIR, test_video) + stream = "video" + video_reader = VideoReader(full_path, stream) + md = video_reader.get_metadata() + duration = md[stream]["duration"][0] + if duration is not None: + num_frames = 0 + for _ in video_reader: + num_frames += 1 + + video_reader.seek(duration / 2) + middle_num_frames = 0 + for _ in video_reader: + middle_num_frames += 1 + + assert middle_num_frames < num_frames + assert middle_num_frames == approx(num_frames // 2, abs=1) + + video_reader.seek(duration / 2) + frame = next(video_reader) + lb = duration / 2 - 1 / md[stream]["fps"][0] + ub = duration / 2 + 1 / md[stream]["fps"][0] + assert (lb <= frame["pts"]) and (ub >= frame["pts"]) + + def test_fate_suite(self): + # TODO: remove the try-except statement once the connectivity issues are resolved + try: + video_path = fate("sub/MovText_capability_tester.mp4", VIDEO_DIR) + except (urllib.error.URLError, ConnectionError) as error: + pytest.skip(f"Skipping due to connectivity issues: {error}") + vr = VideoReader(video_path) + metadata = vr.get_metadata() + + assert metadata["subtitles"]["duration"] is not None + os.remove(video_path) + + @pytest.mark.skipif(av is None, reason="PyAV unavailable") + @pytest.mark.parametrize("test_video,config", test_videos.items()) + @pytest.mark.parametrize("backend", backends()) + def test_keyframe_reading(self, test_video, config, backend): + torchvision.set_video_backend(backend) + full_path = os.path.join(VIDEO_DIR, test_video) + + av_reader = av.open(full_path) + # reduce streams to only keyframes + av_stream = av_reader.streams.video[0] + av_stream.codec_context.skip_frame = "NONKEY" + + av_keyframes = [] + vr_keyframes = [] + if av_reader.streams.video: + + # get all keyframes using pyav. Then, seek randomly into video reader + # and assert that all the returned values are in AV_KEYFRAMES + + for av_frame in av_reader.decode(av_stream): + av_keyframes.append(float(av_frame.pts * av_frame.time_base)) + + if len(av_keyframes) > 1: + video_reader = VideoReader(full_path, "video") + for i in range(1, len(av_keyframes)): + seek_val = (av_keyframes[i] + av_keyframes[i - 1]) / 2 + data = next(video_reader.seek(seek_val, True)) + vr_keyframes.append(data["pts"]) + + data = next(video_reader.seek(config.duration, True)) + vr_keyframes.append(data["pts"]) + + assert len(av_keyframes) == len(vr_keyframes) + # NOTE: this video gets different keyframe with different + # loaders (0.333 pyav, 0.666 for us) + if test_video != "TrumanShow_wave_f_nm_np1_fr_med_26.avi": + for i in range(len(av_keyframes)): + assert av_keyframes[i] == approx(vr_keyframes[i], rel=0.001) + + def test_src(self): + with pytest.raises(ValueError, match="src cannot be empty"): + VideoReader(src="") + with pytest.raises(ValueError, match="src must be either string"): + VideoReader(src=2) + with pytest.raises(TypeError, match="unexpected keyword argument"): + VideoReader(path="path") + + +if __name__ == "__main__": + pytest.main([__file__]) diff --git a/torchvision/__init__.py b/torchvision/__init__.py index ca155712671..dd1e4ea6e94 100644 --- a/torchvision/__init__.py +++ b/torchvision/__init__.py @@ -1,20 +1,32 @@ +import os import warnings +from modulefinder import Module -from torchvision import models -from torchvision import datasets -from torchvision import ops -from torchvision import transforms -from torchvision import utils -from torchvision import io +import torch -from .extension import _HAS_OPS +# Don't re-order these, we need to load the _C extension (done when importing +# .extensions) before entering _meta_registrations. +from .extension import _HAS_OPS # usort:skip +from torchvision import _meta_registrations, datasets, io, models, ops, transforms, utils # usort:skip try: - from .version import __version__ # noqa: F401 + from .version import __version__ # @manual=fbcode//pytorch/vision:version except ImportError: pass -_image_backend = 'PIL' + +# Check if torchvision is being imported within the root folder +if not _HAS_OPS and os.path.dirname(os.path.realpath(__file__)) == os.path.join( + os.path.realpath(os.getcwd()), "torchvision" +): + message = ( + "You are importing torchvision within its own root folder ({}). " + "This is not expected to work and may give errors. Please exit the " + "torchvision project source and relaunch your python interpreter." + ) + warnings.warn(message.format(os.getcwd())) + +_image_backend = "PIL" _video_backend = "pyav" @@ -29,9 +41,8 @@ def set_image_backend(backend): generally faster than PIL, but does not support as many operations. """ global _image_backend - if backend not in ['PIL', 'accimage']: - raise ValueError("Invalid backend '{}'. Options are 'PIL' and 'accimage'" - .format(backend)) + if backend not in ["PIL", "accimage"]: + raise ValueError(f"Invalid backend '{backend}'. Options are 'PIL' and 'accimage'") _image_backend = backend @@ -49,26 +60,46 @@ def set_video_backend(backend): Args: backend (string): Name of the video backend. one of {'pyav', 'video_reader'}. The :mod:`pyav` package uses the 3rd party PyAv library. It is a Pythonic - binding for the FFmpeg libraries. - The :mod:`video_reader` package includes a native c++ implementation on - top of FFMPEG libraries, and a python API of TorchScript custom operator. - It is generally decoding faster than pyav, but perhaps is less robust. + binding for the FFmpeg libraries. + The :mod:`video_reader` package includes a native C++ implementation on + top of FFMPEG libraries, and a python API of TorchScript custom operator. + It generally decodes faster than :mod:`pyav`, but is perhaps less robust. + + .. note:: + Building with FFMPEG is disabled by default in the latest `main`. If you want to use the 'video_reader' + backend, please compile torchvision from source. """ global _video_backend - if backend not in ["pyav", "video_reader"]: - raise ValueError( - "Invalid video backend '%s'. Options are 'pyav' and 'video_reader'" % backend - ) - if backend == "video_reader" and not io._HAS_VIDEO_OPT: - warnings.warn("video_reader video backend is not available") + if backend not in ["pyav", "video_reader", "cuda"]: + raise ValueError("Invalid video backend '%s'. Options are 'pyav', 'video_reader' and 'cuda'" % backend) + if backend == "video_reader" and not io._HAS_CPU_VIDEO_DECODER: + # TODO: better messages + message = "video_reader video backend is not available. Please compile torchvision from source and try again" + raise RuntimeError(message) + elif backend == "cuda" and not io._HAS_GPU_VIDEO_DECODER: + # TODO: better messages + message = "cuda video backend is not available." + raise RuntimeError(message) else: _video_backend = backend def get_video_backend(): + """ + Returns the currently active video backend used to decode videos. + + Returns: + str: Name of the video backend. one of {'pyav', 'video_reader'}. + """ + return _video_backend def _is_tracing(): - import torch return torch._C._get_tracing_state() + + +def disable_beta_transforms_warning(): + # Noop, only exists to avoid breaking existing code. + # See https://github.com/pytorch/vision/issues/7896 + pass diff --git a/torchvision/_internally_replaced_utils.py b/torchvision/_internally_replaced_utils.py new file mode 100644 index 00000000000..e0fa72489f1 --- /dev/null +++ b/torchvision/_internally_replaced_utils.py @@ -0,0 +1,51 @@ +import importlib.machinery +import os + +from torch.hub import _get_torch_home + + +_HOME = os.path.join(_get_torch_home(), "datasets", "vision") +_USE_SHARDED_DATASETS = False +IN_FBCODE = False + + +def _download_file_from_remote_location(fpath: str, url: str) -> None: + pass + + +def _is_remote_location_available() -> bool: + return False + + +try: + from torch.hub import load_state_dict_from_url # noqa: 401 +except ImportError: + from torch.utils.model_zoo import load_url as load_state_dict_from_url # noqa: 401 + + +def _get_extension_path(lib_name): + + lib_dir = os.path.dirname(__file__) + if os.name == "nt": + # Register the main torchvision library location on the default DLL path + import ctypes + + kernel32 = ctypes.WinDLL("kernel32.dll", use_last_error=True) + with_load_library_flags = hasattr(kernel32, "AddDllDirectory") + prev_error_mode = kernel32.SetErrorMode(0x0001) + + if with_load_library_flags: + kernel32.AddDllDirectory.restype = ctypes.c_void_p + + os.add_dll_directory(lib_dir) + + kernel32.SetErrorMode(prev_error_mode) + + loader_details = (importlib.machinery.ExtensionFileLoader, importlib.machinery.EXTENSION_SUFFIXES) + + extfinder = importlib.machinery.FileFinder(lib_dir, loader_details) + ext_specs = extfinder.find_spec(lib_name) + if ext_specs is None: + raise ImportError + + return ext_specs.origin diff --git a/torchvision/_meta_registrations.py b/torchvision/_meta_registrations.py new file mode 100644 index 00000000000..f75bfb77a7f --- /dev/null +++ b/torchvision/_meta_registrations.py @@ -0,0 +1,225 @@ +import functools + +import torch +import torch._custom_ops +import torch.library + +# Ensure that torch.ops.torchvision is visible +import torchvision.extension # noqa: F401 + + +@functools.lru_cache(None) +def get_meta_lib(): + return torch.library.Library("torchvision", "IMPL", "Meta") + + +def register_meta(op_name, overload_name="default"): + def wrapper(fn): + if torchvision.extension._has_ops(): + get_meta_lib().impl(getattr(getattr(torch.ops.torchvision, op_name), overload_name), fn) + return fn + + return wrapper + + +@register_meta("roi_align") +def meta_roi_align(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio, aligned): + torch._check(rois.size(1) == 5, lambda: "rois must have shape as Tensor[K, 5]") + torch._check( + input.dtype == rois.dtype, + lambda: ( + "Expected tensor for input to have the same type as tensor for rois; " + f"but type {input.dtype} does not equal {rois.dtype}" + ), + ) + num_rois = rois.size(0) + channels = input.size(1) + return input.new_empty((num_rois, channels, pooled_height, pooled_width)) + + +@register_meta("_roi_align_backward") +def meta_roi_align_backward( + grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio, aligned +): + torch._check( + grad.dtype == rois.dtype, + lambda: ( + "Expected tensor for grad to have the same type as tensor for rois; " + f"but type {grad.dtype} does not equal {rois.dtype}" + ), + ) + return grad.new_empty((batch_size, channels, height, width)) + + +@register_meta("ps_roi_align") +def meta_ps_roi_align(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio): + torch._check(rois.size(1) == 5, lambda: "rois must have shape as Tensor[K, 5]") + torch._check( + input.dtype == rois.dtype, + lambda: ( + "Expected tensor for input to have the same type as tensor for rois; " + f"but type {input.dtype} does not equal {rois.dtype}" + ), + ) + channels = input.size(1) + torch._check( + channels % (pooled_height * pooled_width) == 0, + "input channels must be a multiple of pooling height * pooling width", + ) + + num_rois = rois.size(0) + out_size = (num_rois, channels // (pooled_height * pooled_width), pooled_height, pooled_width) + return input.new_empty(out_size), torch.empty(out_size, dtype=torch.int32, device="meta") + + +@register_meta("_ps_roi_align_backward") +def meta_ps_roi_align_backward( + grad, + rois, + channel_mapping, + spatial_scale, + pooled_height, + pooled_width, + sampling_ratio, + batch_size, + channels, + height, + width, +): + torch._check( + grad.dtype == rois.dtype, + lambda: ( + "Expected tensor for grad to have the same type as tensor for rois; " + f"but type {grad.dtype} does not equal {rois.dtype}" + ), + ) + return grad.new_empty((batch_size, channels, height, width)) + + +@register_meta("roi_pool") +def meta_roi_pool(input, rois, spatial_scale, pooled_height, pooled_width): + torch._check(rois.size(1) == 5, lambda: "rois must have shape as Tensor[K, 5]") + torch._check( + input.dtype == rois.dtype, + lambda: ( + "Expected tensor for input to have the same type as tensor for rois; " + f"but type {input.dtype} does not equal {rois.dtype}" + ), + ) + num_rois = rois.size(0) + channels = input.size(1) + out_size = (num_rois, channels, pooled_height, pooled_width) + return input.new_empty(out_size), torch.empty(out_size, device="meta", dtype=torch.int32) + + +@register_meta("_roi_pool_backward") +def meta_roi_pool_backward( + grad, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width +): + torch._check( + grad.dtype == rois.dtype, + lambda: ( + "Expected tensor for grad to have the same type as tensor for rois; " + f"but type {grad.dtype} does not equal {rois.dtype}" + ), + ) + return grad.new_empty((batch_size, channels, height, width)) + + +@register_meta("ps_roi_pool") +def meta_ps_roi_pool(input, rois, spatial_scale, pooled_height, pooled_width): + torch._check(rois.size(1) == 5, lambda: "rois must have shape as Tensor[K, 5]") + torch._check( + input.dtype == rois.dtype, + lambda: ( + "Expected tensor for input to have the same type as tensor for rois; " + f"but type {input.dtype} does not equal {rois.dtype}" + ), + ) + channels = input.size(1) + torch._check( + channels % (pooled_height * pooled_width) == 0, + "input channels must be a multiple of pooling height * pooling width", + ) + num_rois = rois.size(0) + out_size = (num_rois, channels // (pooled_height * pooled_width), pooled_height, pooled_width) + return input.new_empty(out_size), torch.empty(out_size, device="meta", dtype=torch.int32) + + +@register_meta("_ps_roi_pool_backward") +def meta_ps_roi_pool_backward( + grad, rois, channel_mapping, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width +): + torch._check( + grad.dtype == rois.dtype, + lambda: ( + "Expected tensor for grad to have the same type as tensor for rois; " + f"but type {grad.dtype} does not equal {rois.dtype}" + ), + ) + return grad.new_empty((batch_size, channels, height, width)) + + +@torch.library.register_fake("torchvision::nms") +def meta_nms(dets, scores, iou_threshold): + torch._check(dets.dim() == 2, lambda: f"boxes should be a 2d tensor, got {dets.dim()}D") + torch._check(dets.size(1) == 4, lambda: f"boxes should have 4 elements in dimension 1, got {dets.size(1)}") + torch._check(scores.dim() == 1, lambda: f"scores should be a 1d tensor, got {scores.dim()}") + torch._check( + dets.size(0) == scores.size(0), + lambda: f"boxes and scores should have same number of elements in dimension 0, got {dets.size(0)} and {scores.size(0)}", + ) + ctx = torch._custom_ops.get_ctx() + num_to_keep = ctx.create_unbacked_symint() + return dets.new_empty(num_to_keep, dtype=torch.long) + + +@register_meta("deform_conv2d") +def meta_deform_conv2d( + input, + weight, + offset, + mask, + bias, + stride_h, + stride_w, + pad_h, + pad_w, + dil_h, + dil_w, + n_weight_grps, + n_offset_grps, + use_mask, +): + + out_height, out_width = offset.shape[-2:] + out_channels = weight.shape[0] + batch_size = input.shape[0] + return input.new_empty((batch_size, out_channels, out_height, out_width)) + + +@register_meta("_deform_conv2d_backward") +def meta_deform_conv2d_backward( + grad, + input, + weight, + offset, + mask, + bias, + stride_h, + stride_w, + pad_h, + pad_w, + dilation_h, + dilation_w, + groups, + offset_groups, + use_mask, +): + + grad_input = input.new_empty(input.shape) + grad_weight = weight.new_empty(weight.shape) + grad_offset = offset.new_empty(offset.shape) + grad_mask = mask.new_empty(mask.shape) + grad_bias = bias.new_empty(bias.shape) + return grad_input, grad_weight, grad_offset, grad_mask, grad_bias diff --git a/torchvision/_utils.py b/torchvision/_utils.py new file mode 100644 index 00000000000..b739ef0966e --- /dev/null +++ b/torchvision/_utils.py @@ -0,0 +1,32 @@ +import enum +from typing import Sequence, Type, TypeVar + +T = TypeVar("T", bound=enum.Enum) + + +class StrEnumMeta(enum.EnumMeta): + auto = enum.auto + + def from_str(self: Type[T], member: str) -> T: # type: ignore[misc] + try: + return self[member] + except KeyError: + # TODO: use `add_suggestion` from torchvision.prototype.utils._internal to improve the error message as + # soon as it is migrated. + raise ValueError(f"Unknown value '{member}' for {self.__name__}.") from None + + +class StrEnum(enum.Enum, metaclass=StrEnumMeta): + pass + + +def sequence_to_str(seq: Sequence, separate_last: str = "") -> str: + if not seq: + return "" + if len(seq) == 1: + return f"'{seq[0]}'" + + head = "'" + "', '".join([str(item) for item in seq[:-1]]) + "'" + tail = f"{'' if separate_last and len(seq) == 2 else ','} {separate_last}'{seq[-1]}'" + + return head + tail diff --git a/torchvision/csrc/PSROIAlign.h b/torchvision/csrc/PSROIAlign.h deleted file mode 100644 index a5998df2891..00000000000 --- a/torchvision/csrc/PSROIAlign.h +++ /dev/null @@ -1,150 +0,0 @@ -#pragma once - -#include "cpu/vision_cpu.h" - -#ifdef WITH_CUDA -#include "cuda/vision_cuda.h" -#endif - -#include - -std::tuple PSROIAlign_forward( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio) { - if (input.type().is_cuda()) { -#ifdef WITH_CUDA - return PSROIAlign_forward_cuda( - input, - rois, - spatial_scale, - pooled_height, - pooled_width, - sampling_ratio); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - return PSROIAlign_forward_cpu( - input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); -} - -at::Tensor PSROIAlign_backward( - const at::Tensor& grad, - const at::Tensor& rois, - const at::Tensor& mapping_channel, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio, - const int batch_size, - const int channels, - const int height, - const int width) { - if (grad.type().is_cuda()) { -#ifdef WITH_CUDA - return PSROIAlign_backward_cuda( - grad, - rois, - mapping_channel, - spatial_scale, - pooled_height, - pooled_width, - sampling_ratio, - batch_size, - channels, - height, - width); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - return PSROIAlign_backward_cpu( - grad, - rois, - mapping_channel, - spatial_scale, - pooled_height, - pooled_width, - sampling_ratio, - batch_size, - channels, - height, - width); -} - -using namespace at; -using torch::Tensor; -using torch::autograd::AutogradContext; -using torch::autograd::Variable; -using torch::autograd::variable_list; - -class PSROIAlignFunction - : public torch::autograd::Function { - public: - static variable_list forward( - AutogradContext* ctx, - Variable input, - Variable rois, - const double spatial_scale, - const int64_t pooled_height, - const int64_t pooled_width, - const int64_t sampling_ratio) { - ctx->saved_data["spatial_scale"] = spatial_scale; - ctx->saved_data["pooled_height"] = pooled_height; - ctx->saved_data["pooled_width"] = pooled_width; - ctx->saved_data["sampling_ratio"] = sampling_ratio; - ctx->saved_data["input_shape"] = input.sizes(); - auto result = PSROIAlign_forward( - input, - rois, - spatial_scale, - pooled_height, - pooled_width, - sampling_ratio); - auto output = std::get<0>(result); - auto channel_mapping = std::get<1>(result); - ctx->save_for_backward({rois, channel_mapping}); - ctx->mark_non_differentiable({channel_mapping}); - return {output, channel_mapping}; - } - - static variable_list backward( - AutogradContext* ctx, - variable_list grad_output) { - // Use data saved in forward - auto saved = ctx->get_saved_variables(); - auto rois = saved[0]; - auto channel_mapping = saved[1]; - auto input_shape = ctx->saved_data["input_shape"].toIntList(); - auto grad_in = PSROIAlign_backward( - grad_output[0], - rois, - channel_mapping, - ctx->saved_data["spatial_scale"].toDouble(), - ctx->saved_data["pooled_height"].toInt(), - ctx->saved_data["pooled_width"].toInt(), - ctx->saved_data["sampling_ratio"].toInt(), - input_shape[0], - input_shape[1], - input_shape[2], - input_shape[3]); - return { - grad_in, Variable(), Variable(), Variable(), Variable(), Variable()}; - } -}; - -std::tuple ps_roi_align( - const Tensor& input, - const Tensor& rois, - const double spatial_scale, - const int64_t pooled_height, - const int64_t pooled_width, - const int64_t sampling_ratio) { - auto result = PSROIAlignFunction::apply( - input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); - return std::tuple(result[0], result[1]); -} diff --git a/torchvision/csrc/PSROIPool.h b/torchvision/csrc/PSROIPool.h deleted file mode 100644 index c67ce92f54e..00000000000 --- a/torchvision/csrc/PSROIPool.h +++ /dev/null @@ -1,128 +0,0 @@ -#pragma once - -#include "cpu/vision_cpu.h" - -#ifdef WITH_CUDA -#include "cuda/vision_cuda.h" -#endif - -std::tuple PSROIPool_forward( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width) { - if (input.type().is_cuda()) { -#ifdef WITH_CUDA - return PSROIPool_forward_cuda( - input, rois, spatial_scale, pooled_height, pooled_width); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - return PSROIPool_forward_cpu( - input, rois, spatial_scale, pooled_height, pooled_width); -} - -at::Tensor PSROIPool_backward( - const at::Tensor& grad, - const at::Tensor& rois, - const at::Tensor& mapping_channel, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width) { - if (grad.type().is_cuda()) { -#ifdef WITH_CUDA - return PSROIPool_backward_cuda( - grad, - rois, - mapping_channel, - spatial_scale, - pooled_height, - pooled_width, - batch_size, - channels, - height, - width); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - return PSROIPool_backward_cpu( - grad, - rois, - mapping_channel, - spatial_scale, - pooled_height, - pooled_width, - batch_size, - channels, - height, - width); -} - -using namespace at; -using torch::Tensor; -using torch::autograd::AutogradContext; -using torch::autograd::Variable; -using torch::autograd::variable_list; - -class PSROIPoolFunction : public torch::autograd::Function { - public: - static variable_list forward( - AutogradContext* ctx, - Variable input, - Variable rois, - const double spatial_scale, - const int64_t pooled_height, - const int64_t pooled_width) { - ctx->saved_data["spatial_scale"] = spatial_scale; - ctx->saved_data["pooled_height"] = pooled_height; - ctx->saved_data["pooled_width"] = pooled_width; - ctx->saved_data["input_shape"] = input.sizes(); - auto result = PSROIPool_forward( - input, rois, spatial_scale, pooled_height, pooled_width); - auto output = std::get<0>(result); - auto channel_mapping = std::get<1>(result); - ctx->save_for_backward({rois, channel_mapping}); - ctx->mark_non_differentiable({channel_mapping}); - return {output, channel_mapping}; - } - - static variable_list backward( - AutogradContext* ctx, - variable_list grad_output) { - // Use data saved in forward - auto saved = ctx->get_saved_variables(); - auto rois = saved[0]; - auto channel_mapping = saved[1]; - auto input_shape = ctx->saved_data["input_shape"].toIntList(); - auto grad_in = PSROIPool_backward( - grad_output[0], - rois, - channel_mapping, - ctx->saved_data["spatial_scale"].toDouble(), - ctx->saved_data["pooled_height"].toInt(), - ctx->saved_data["pooled_width"].toInt(), - input_shape[0], - input_shape[1], - input_shape[2], - input_shape[3]); - return {grad_in, Variable(), Variable(), Variable(), Variable()}; - } -}; - -std::tuple ps_roi_pool( - const Tensor& input, - const Tensor& rois, - const double spatial_scale, - const int64_t pooled_height, - const int64_t pooled_width) { - auto result = PSROIPoolFunction::apply( - input, rois, spatial_scale, pooled_height, pooled_width); - return std::tuple(result[0], result[1]); -} diff --git a/torchvision/csrc/ROIAlign.h b/torchvision/csrc/ROIAlign.h deleted file mode 100644 index 765d4879d99..00000000000 --- a/torchvision/csrc/ROIAlign.h +++ /dev/null @@ -1,147 +0,0 @@ -#pragma once - -#include "cpu/vision_cpu.h" - -#ifdef WITH_CUDA -#include "cuda/vision_cuda.h" -#endif - -// Interface for Python -at::Tensor ROIAlign_forward( - const at::Tensor& input, // Input feature map. - const at::Tensor& rois, // List of ROIs to pool over. - const double spatial_scale, // The scale of the image features. ROIs will be - // scaled to this. - const int64_t pooled_height, // The height of the pooled feature map. - const int64_t pooled_width, // The width of the pooled feature - const int64_t sampling_ratio) // The number of points to sample in each bin -// along each axis. -{ - if (input.type().is_cuda()) { -#ifdef WITH_CUDA - return ROIAlign_forward_cuda( - input, - rois, - spatial_scale, - pooled_height, - pooled_width, - sampling_ratio); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - return ROIAlign_forward_cpu( - input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); -} - -at::Tensor ROIAlign_backward( - const at::Tensor& grad, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width, - const int sampling_ratio) { - if (grad.type().is_cuda()) { -#ifdef WITH_CUDA - return ROIAlign_backward_cuda( - grad, - rois, - spatial_scale, - pooled_height, - pooled_width, - batch_size, - channels, - height, - width, - sampling_ratio); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - return ROIAlign_backward_cpu( - grad, - rois, - spatial_scale, - pooled_height, - pooled_width, - batch_size, - channels, - height, - width, - sampling_ratio); -} - -using namespace at; -using torch::Tensor; -using torch::autograd::AutogradContext; -using torch::autograd::Variable; -using torch::autograd::variable_list; - -class ROIAlignFunction : public torch::autograd::Function { - public: - static variable_list forward( - AutogradContext* ctx, - Variable input, - Variable rois, - const double spatial_scale, - const int64_t pooled_height, - const int64_t pooled_width, - const int64_t sampling_ratio) { - ctx->saved_data["spatial_scale"] = spatial_scale; - ctx->saved_data["pooled_height"] = pooled_height; - ctx->saved_data["pooled_width"] = pooled_width; - ctx->saved_data["sampling_ratio"] = sampling_ratio; - ctx->saved_data["input_shape"] = input.sizes(); - ctx->save_for_backward({rois}); - auto result = ROIAlign_forward( - input, - rois, - spatial_scale, - pooled_height, - pooled_width, - sampling_ratio); - return {result}; - } - - static variable_list backward( - AutogradContext* ctx, - variable_list grad_output) { - // Use data saved in forward - auto saved = ctx->get_saved_variables(); - auto rois = saved[0]; - auto input_shape = ctx->saved_data["input_shape"].toIntList(); - auto grad_in = ROIAlign_backward( - grad_output[0], - rois, - ctx->saved_data["spatial_scale"].toDouble(), - ctx->saved_data["pooled_height"].toInt(), - ctx->saved_data["pooled_width"].toInt(), - input_shape[0], - input_shape[1], - input_shape[2], - input_shape[3], - ctx->saved_data["sampling_ratio"].toInt()); - return { - grad_in, Variable(), Variable(), Variable(), Variable(), Variable()}; - } -}; - -Tensor roi_align( - const Tensor& input, - const Tensor& rois, - const double spatial_scale, - const int64_t pooled_height, - const int64_t pooled_width, - const int64_t sampling_ratio) { - return ROIAlignFunction::apply( - input, - rois, - spatial_scale, - pooled_height, - pooled_width, - sampling_ratio)[0]; -} diff --git a/torchvision/csrc/ROIPool.h b/torchvision/csrc/ROIPool.h deleted file mode 100644 index 79b40293176..00000000000 --- a/torchvision/csrc/ROIPool.h +++ /dev/null @@ -1,128 +0,0 @@ -#pragma once - -#include "cpu/vision_cpu.h" - -#ifdef WITH_CUDA -#include "cuda/vision_cuda.h" -#endif - -std::tuple ROIPool_forward( - const at::Tensor& input, - const at::Tensor& rois, - const double spatial_scale, - const int64_t pooled_height, - const int64_t pooled_width) { - if (input.type().is_cuda()) { -#ifdef WITH_CUDA - return ROIPool_forward_cuda( - input, rois, spatial_scale, pooled_height, pooled_width); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - return ROIPool_forward_cpu( - input, rois, spatial_scale, pooled_height, pooled_width); -} - -at::Tensor ROIPool_backward( - const at::Tensor& grad, - const at::Tensor& rois, - const at::Tensor& argmax, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width) { - if (grad.type().is_cuda()) { -#ifdef WITH_CUDA - return ROIPool_backward_cuda( - grad, - rois, - argmax, - spatial_scale, - pooled_height, - pooled_width, - batch_size, - channels, - height, - width); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - return ROIPool_backward_cpu( - grad, - rois, - argmax, - spatial_scale, - pooled_height, - pooled_width, - batch_size, - channels, - height, - width); -} - -using namespace at; -using torch::Tensor; -using torch::autograd::AutogradContext; -using torch::autograd::Variable; -using torch::autograd::variable_list; - -class ROIPoolFunction : public torch::autograd::Function { - public: - static variable_list forward( - AutogradContext* ctx, - Variable input, - Variable rois, - const double spatial_scale, - const int64_t pooled_height, - const int64_t pooled_width) { - ctx->saved_data["spatial_scale"] = spatial_scale; - ctx->saved_data["pooled_height"] = pooled_height; - ctx->saved_data["pooled_width"] = pooled_width; - ctx->saved_data["input_shape"] = input.sizes(); - auto result = ROIPool_forward( - input, rois, spatial_scale, pooled_height, pooled_width); - auto output = std::get<0>(result); - auto argmax = std::get<1>(result); - ctx->save_for_backward({rois, argmax}); - ctx->mark_non_differentiable({argmax}); - return {output, argmax}; - } - - static variable_list backward( - AutogradContext* ctx, - variable_list grad_output) { - // Use data saved in forward - auto saved = ctx->get_saved_variables(); - auto rois = saved[0]; - auto argmax = saved[1]; - auto input_shape = ctx->saved_data["input_shape"].toIntList(); - auto grad_in = ROIPool_backward( - grad_output[0], - rois, - argmax, - ctx->saved_data["spatial_scale"].toDouble(), - ctx->saved_data["pooled_height"].toInt(), - ctx->saved_data["pooled_width"].toInt(), - input_shape[0], - input_shape[1], - input_shape[2], - input_shape[3]); - return {grad_in, Variable(), Variable(), Variable(), Variable()}; - } -}; - -std::tuple roi_pool( - const Tensor& input, - const Tensor& rois, - const double spatial_scale, - const int64_t pooled_height, - const int64_t pooled_width) { - auto result = ROIPoolFunction::apply( - input, rois, spatial_scale, pooled_height, pooled_width); - return std::tuple(result[0], result[1]); -} diff --git a/torchvision/csrc/cpu/video_reader/FfmpegAudioSampler.cpp b/torchvision/csrc/cpu/video_reader/FfmpegAudioSampler.cpp deleted file mode 100644 index 24aecacf946..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegAudioSampler.cpp +++ /dev/null @@ -1,118 +0,0 @@ -#include "FfmpegAudioSampler.h" -#include -#include "FfmpegUtil.h" - -using namespace std; - -FfmpegAudioSampler::FfmpegAudioSampler( - const AudioFormat& in, - const AudioFormat& out) - : inFormat_(in), outFormat_(out) {} - -FfmpegAudioSampler::~FfmpegAudioSampler() { - if (swrContext_) { - swr_free(&swrContext_); - } -} - -int FfmpegAudioSampler::init() { - swrContext_ = swr_alloc_set_opts( - nullptr, // we're allocating a new context - av_get_default_channel_layout(outFormat_.channels), // out_ch_layout - static_cast(outFormat_.format), // out_sample_fmt - outFormat_.samples, // out_sample_rate - av_get_default_channel_layout(inFormat_.channels), // in_ch_layout - static_cast(inFormat_.format), // in_sample_fmt - inFormat_.samples, // in_sample_rate - 0, // log_offset - nullptr); // log_ctx - if (swrContext_ == nullptr) { - LOG(ERROR) << "swr_alloc_set_opts fails"; - return -1; - } - int result = 0; - if ((result = swr_init(swrContext_)) < 0) { - LOG(ERROR) << "swr_init failed, err: " << ffmpeg_util::getErrorDesc(result) - << ", in -> format: " << inFormat_.format - << ", channels: " << inFormat_.channels - << ", samples: " << inFormat_.samples - << ", out -> format: " << outFormat_.format - << ", channels: " << outFormat_.channels - << ", samples: " << outFormat_.samples; - return -1; - } - return 0; -} - -int64_t FfmpegAudioSampler::getSampleBytes(const AVFrame* frame) const { - auto outSamples = getOutNumSamples(frame->nb_samples); - - return av_samples_get_buffer_size( - nullptr, - outFormat_.channels, - outSamples, - static_cast(outFormat_.format), - 1); -} - -// https://www.ffmpeg.org/doxygen/3.2/group__lswr.html -unique_ptr FfmpegAudioSampler::sample(const AVFrame* frame) { - if (!frame) { - return nullptr; // no flush for videos - } - - auto inNumSamples = frame->nb_samples; - auto outNumSamples = getOutNumSamples(frame->nb_samples); - - auto outSampleSize = getSampleBytes(frame); - AvDataPtr frameData(static_cast(av_malloc(outSampleSize))); - - uint8_t* outPlanes[AVRESAMPLE_MAX_CHANNELS]; - int result = 0; - if ((result = av_samples_fill_arrays( - outPlanes, - nullptr, // linesize is not needed - frameData.get(), - outFormat_.channels, - outNumSamples, - static_cast(outFormat_.format), - 1)) < 0) { - LOG(ERROR) << "av_samples_fill_arrays failed, err: " - << ffmpeg_util::getErrorDesc(result) - << ", outNumSamples: " << outNumSamples - << ", format: " << outFormat_.format; - return nullptr; - } - - if ((result = swr_convert( - swrContext_, - &outPlanes[0], - outNumSamples, - (const uint8_t**)&frame->data[0], - inNumSamples)) < 0) { - LOG(ERROR) << "swr_convert faield, err: " - << ffmpeg_util::getErrorDesc(result); - return nullptr; - } - // result returned by swr_convert is the No. of actual output samples. - // So update the buffer size using av_samples_get_buffer_size - result = av_samples_get_buffer_size( - nullptr, - outFormat_.channels, - result, - static_cast(outFormat_.format), - 1); - - return make_unique(std::move(frameData), result, 0); -} -/* -Because of decoding delay, the returned value is an upper bound of No. of -output samples -*/ -int64_t FfmpegAudioSampler::getOutNumSamples(int inNumSamples) const { - return av_rescale_rnd( - swr_get_delay(swrContext_, inFormat_.samples) + inNumSamples, - outFormat_.samples, - inFormat_.samples, - AV_ROUND_UP); -} diff --git a/torchvision/csrc/cpu/video_reader/FfmpegAudioSampler.h b/torchvision/csrc/cpu/video_reader/FfmpegAudioSampler.h deleted file mode 100644 index 767a5ca6e4f..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegAudioSampler.h +++ /dev/null @@ -1,32 +0,0 @@ -#pragma once - -#include "FfmpegSampler.h" - -#define AVRESAMPLE_MAX_CHANNELS 32 - -/** - * Class transcode audio frames from one format into another - */ -class FfmpegAudioSampler : public FfmpegSampler { - public: - explicit FfmpegAudioSampler(const AudioFormat& in, const AudioFormat& out); - ~FfmpegAudioSampler() override; - - int init() override; - - int64_t getSampleBytes(const AVFrame* frame) const; - // FfmpegSampler overrides - // returns number of bytes of the sampled data - std::unique_ptr sample(const AVFrame* frame) override; - - const AudioFormat& getInFormat() const { - return inFormat_; - } - - private: - int64_t getOutNumSamples(int inNumSamples) const; - - AudioFormat inFormat_; - AudioFormat outFormat_; - SwrContext* swrContext_{nullptr}; -}; diff --git a/torchvision/csrc/cpu/video_reader/FfmpegAudioStream.cpp b/torchvision/csrc/cpu/video_reader/FfmpegAudioStream.cpp deleted file mode 100644 index b5b1e2fbda5..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegAudioStream.cpp +++ /dev/null @@ -1,103 +0,0 @@ -#include "FfmpegAudioStream.h" -#include "FfmpegUtil.h" - -using namespace std; - -namespace { - -bool operator==(const AudioFormat& x, const AVCodecContext& y) { - return x.samples == y.sample_rate && x.channels == y.channels && - x.format == y.sample_fmt; -} - -AudioFormat& toAudioFormat( - AudioFormat& audioFormat, - const AVCodecContext& codecCtx) { - audioFormat.samples = codecCtx.sample_rate; - audioFormat.channels = codecCtx.channels; - audioFormat.format = codecCtx.sample_fmt; - - return audioFormat; -} - -} // namespace - -FfmpegAudioStream::FfmpegAudioStream( - AVFormatContext* inputCtx, - int index, - enum AVMediaType avMediaType, - MediaFormat mediaFormat, - double seekFrameMargin) - : FfmpegStream(inputCtx, index, avMediaType, seekFrameMargin), - mediaFormat_(mediaFormat) {} - -FfmpegAudioStream::~FfmpegAudioStream() {} - -void FfmpegAudioStream::checkStreamDecodeParams() { - auto timeBase = getTimeBase(); - if (timeBase.first > 0) { - CHECK_EQ(timeBase.first, inputCtx_->streams[index_]->time_base.num); - CHECK_EQ(timeBase.second, inputCtx_->streams[index_]->time_base.den); - } -} - -void FfmpegAudioStream::updateStreamDecodeParams() { - auto timeBase = getTimeBase(); - if (timeBase.first == 0) { - mediaFormat_.format.audio.timeBaseNum = - inputCtx_->streams[index_]->time_base.num; - mediaFormat_.format.audio.timeBaseDen = - inputCtx_->streams[index_]->time_base.den; - } - mediaFormat_.format.audio.duration = inputCtx_->streams[index_]->duration; -} - -int FfmpegAudioStream::initFormat() { - AudioFormat& format = mediaFormat_.format.audio; - - if (format.samples == 0) { - format.samples = codecCtx_->sample_rate; - } - if (format.channels == 0) { - format.channels = codecCtx_->channels; - } - if (format.format == AV_SAMPLE_FMT_NONE) { - format.format = codecCtx_->sample_fmt; - VLOG(2) << "set stream format sample_fmt: " << format.format; - } - - checkStreamDecodeParams(); - - updateStreamDecodeParams(); - - if (format.samples > 0 && format.channels > 0 && - format.format != AV_SAMPLE_FMT_NONE) { - return 0; - } else { - return -1; - } -} - -unique_ptr FfmpegAudioStream::sampleFrameData() { - AudioFormat& audioFormat = mediaFormat_.format.audio; - - if (!sampler_ || !(sampler_->getInFormat() == *codecCtx_)) { - AudioFormat newInFormat; - newInFormat = toAudioFormat(newInFormat, *codecCtx_); - sampler_ = make_unique(newInFormat, audioFormat); - VLOG(1) << "Set sampler input audio format" - << ", samples: " << newInFormat.samples - << ", channels: " << newInFormat.channels - << ", format: " << newInFormat.format - << " : output audio sampler format" - << ", samples: " << audioFormat.samples - << ", channels: " << audioFormat.channels - << ", format: " << audioFormat.format; - int ret = sampler_->init(); - if (ret < 0) { - VLOG(1) << "Fail to initialize audio sampler"; - return nullptr; - } - } - return sampler_->sample(frame_); -} diff --git a/torchvision/csrc/cpu/video_reader/FfmpegAudioStream.h b/torchvision/csrc/cpu/video_reader/FfmpegAudioStream.h deleted file mode 100644 index 1d4f7a2f2ee..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegAudioStream.h +++ /dev/null @@ -1,54 +0,0 @@ -#pragma once - -#include -#include "FfmpegAudioSampler.h" -#include "FfmpegStream.h" - -/** - * Class uses FFMPEG library to decode one video stream. - */ -class FfmpegAudioStream : public FfmpegStream { - public: - explicit FfmpegAudioStream( - AVFormatContext* inputCtx, - int index, - enum AVMediaType avMediaType, - MediaFormat mediaFormat, - double seekFrameMargin); - - ~FfmpegAudioStream() override; - - // FfmpegStream overrides - MediaType getMediaType() const override { - return MediaType::TYPE_AUDIO; - } - - FormatUnion getMediaFormat() const override { - return mediaFormat_.format; - } - - int64_t getStartPts() const override { - return mediaFormat_.format.audio.startPts; - } - int64_t getEndPts() const override { - return mediaFormat_.format.audio.endPts; - } - // return numerator and denominator of time base - std::pair getTimeBase() const { - return std::make_pair( - mediaFormat_.format.audio.timeBaseNum, - mediaFormat_.format.audio.timeBaseDen); - } - - void checkStreamDecodeParams(); - - void updateStreamDecodeParams(); - - protected: - int initFormat() override; - std::unique_ptr sampleFrameData() override; - - private: - MediaFormat mediaFormat_; - std::unique_ptr sampler_{nullptr}; -}; diff --git a/torchvision/csrc/cpu/video_reader/FfmpegDecoder.cpp b/torchvision/csrc/cpu/video_reader/FfmpegDecoder.cpp deleted file mode 100644 index fb4d302cc03..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegDecoder.cpp +++ /dev/null @@ -1,412 +0,0 @@ -#include "FfmpegDecoder.h" -#include "FfmpegAudioStream.h" -#include "FfmpegUtil.h" -#include "FfmpegVideoStream.h" - -using namespace std; - -static AVPacket avPkt; - -namespace { - -unique_ptr createFfmpegStream( - MediaType type, - AVFormatContext* ctx, - int idx, - MediaFormat& mediaFormat, - double seekFrameMargin) { - enum AVMediaType avType; - CHECK(ffmpeg_util::mapMediaType(type, &avType)); - switch (type) { - case MediaType::TYPE_VIDEO: - return make_unique( - ctx, idx, avType, mediaFormat, seekFrameMargin); - case MediaType::TYPE_AUDIO: - return make_unique( - ctx, idx, avType, mediaFormat, seekFrameMargin); - default: - return nullptr; - } -} - -} // namespace - -FfmpegAvioContext::FfmpegAvioContext() - : workBuffersize_(VIO_BUFFER_SZ), - workBuffer_((uint8_t*)av_malloc(workBuffersize_)), - inputFile_(nullptr), - inputBuffer_(nullptr), - inputBufferSize_(0) {} - -int FfmpegAvioContext::initAVIOContext(const uint8_t* buffer, int64_t size) { - inputBuffer_ = buffer; - inputBufferSize_ = size; - avioCtx_ = avio_alloc_context( - workBuffer_, - workBuffersize_, - 0, - reinterpret_cast(this), - &FfmpegAvioContext::readMemory, - nullptr, // no write function - &FfmpegAvioContext::seekMemory); - return 0; -} - -FfmpegAvioContext::~FfmpegAvioContext() { - /* note: the internal buffer could have changed, and be != workBuffer_ */ - if (avioCtx_) { - av_freep(&avioCtx_->buffer); - av_freep(&avioCtx_); - } else { - av_freep(&workBuffer_); - } - if (inputFile_) { - fclose(inputFile_); - } -} - -int FfmpegAvioContext::read(uint8_t* buf, int buf_size) { - if (inputBuffer_) { - return readMemory(this, buf, buf_size); - } else { - return -1; - } -} - -int FfmpegAvioContext::readMemory(void* opaque, uint8_t* buf, int buf_size) { - FfmpegAvioContext* h = static_cast(opaque); - if (buf_size < 0) { - return -1; - } - - int reminder = h->inputBufferSize_ - h->offset_; - int r = buf_size < reminder ? buf_size : reminder; - if (r < 0) { - return AVERROR_EOF; - } - - memcpy(buf, h->inputBuffer_ + h->offset_, r); - h->offset_ += r; - return r; -} - -int64_t FfmpegAvioContext::seek(int64_t offset, int whence) { - if (inputBuffer_) { - return seekMemory(this, offset, whence); - } else { - return -1; - } -} - -int64_t FfmpegAvioContext::seekMemory( - void* opaque, - int64_t offset, - int whence) { - FfmpegAvioContext* h = static_cast(opaque); - switch (whence) { - case SEEK_CUR: // from current position - h->offset_ += offset; - break; - case SEEK_END: // from eof - h->offset_ = h->inputBufferSize_ + offset; - break; - case SEEK_SET: // from beginning of file - h->offset_ = offset; - break; - case AVSEEK_SIZE: - return h->inputBufferSize_; - } - return h->offset_; -} - -int FfmpegDecoder::init( - const std::string& filename, - bool isDecodeFile, - FfmpegAvioContext& ioctx, - DecoderOutput& decoderOutput) { - cleanUp(); - - int ret = 0; - if (!isDecodeFile) { - formatCtx_ = avformat_alloc_context(); - if (!formatCtx_) { - LOG(ERROR) << "avformat_alloc_context failed"; - return -1; - } - formatCtx_->pb = ioctx.get_avio(); - formatCtx_->flags |= AVFMT_FLAG_CUSTOM_IO; - - // Determining the input format: - int probeSz = AVPROBE_SIZE + AVPROBE_PADDING_SIZE; - uint8_t* probe((uint8_t*)av_malloc(probeSz)); - memset(probe, 0, probeSz); - int len = ioctx.read(probe, probeSz - AVPROBE_PADDING_SIZE); - if (len < probeSz - AVPROBE_PADDING_SIZE) { - LOG(ERROR) << "Insufficient data to determine video format"; - av_freep(&probe); - return -1; - } - // seek back to start of stream - ioctx.seek(0, SEEK_SET); - - unique_ptr probeData(new AVProbeData()); - probeData->buf = probe; - probeData->buf_size = len; - probeData->filename = ""; - // Determine the input-format: - formatCtx_->iformat = av_probe_input_format(probeData.get(), 1); - // this is to avoid the double-free error - if (formatCtx_->iformat == nullptr) { - LOG(ERROR) << "av_probe_input_format fails"; - return -1; - } - VLOG(1) << "av_probe_input_format succeeds"; - av_freep(&probe); - - ret = avformat_open_input(&formatCtx_, "", nullptr, nullptr); - } else { - ret = avformat_open_input(&formatCtx_, filename.c_str(), nullptr, nullptr); - } - - if (ret < 0) { - LOG(ERROR) << "avformat_open_input failed, error: " - << ffmpeg_util::getErrorDesc(ret); - cleanUp(); - return ret; - } - ret = avformat_find_stream_info(formatCtx_, nullptr); - if (ret < 0) { - LOG(ERROR) << "avformat_find_stream_info failed, error: " - << ffmpeg_util::getErrorDesc(ret); - cleanUp(); - return ret; - } - if (!initStreams()) { - LOG(ERROR) << "Cannot activate streams"; - cleanUp(); - return -1; - } - - for (auto& stream : streams_) { - MediaType mediaType = stream.second->getMediaType(); - decoderOutput.initMediaType(mediaType, stream.second->getMediaFormat()); - } - VLOG(1) << "FfmpegDecoder initialized"; - return 0; -} - -int FfmpegDecoder::decodeFile( - unique_ptr params, - const string& fileName, - DecoderOutput& decoderOutput) { - VLOG(1) << "decode file: " << fileName; - FfmpegAvioContext ioctx; - int ret = decodeLoop(std::move(params), fileName, true, ioctx, decoderOutput); - return ret; -} - -int FfmpegDecoder::decodeMemory( - unique_ptr params, - const uint8_t* buffer, - int64_t size, - DecoderOutput& decoderOutput) { - VLOG(1) << "decode video data in memory"; - FfmpegAvioContext ioctx; - int ret = ioctx.initAVIOContext(buffer, size); - if (ret == 0) { - ret = - decodeLoop(std::move(params), string(""), false, ioctx, decoderOutput); - } - return ret; -} - -int FfmpegDecoder::probeFile( - unique_ptr params, - const string& fileName, - DecoderOutput& decoderOutput) { - VLOG(1) << "probe file: " << fileName; - FfmpegAvioContext ioctx; - return probeVideo(std::move(params), fileName, true, ioctx, decoderOutput); -} - -int FfmpegDecoder::probeMemory( - unique_ptr params, - const uint8_t* buffer, - int64_t size, - DecoderOutput& decoderOutput) { - VLOG(1) << "probe video data in memory"; - FfmpegAvioContext ioctx; - int ret = ioctx.initAVIOContext(buffer, size); - if (ret == 0) { - ret = - probeVideo(std::move(params), string(""), false, ioctx, decoderOutput); - } - return ret; -} - -void FfmpegDecoder::cleanUp() { - if (formatCtx_) { - for (auto& stream : streams_) { - // Drain stream buffers. - DecoderOutput decoderOutput; - stream.second->flush(1, decoderOutput); - stream.second.reset(); - } - streams_.clear(); - avformat_close_input(&formatCtx_); - } -} - -FfmpegStream* FfmpegDecoder::findStreamByIndex(int streamIndex) const { - auto it = streams_.find(streamIndex); - return it != streams_.end() ? it->second.get() : nullptr; -} - -/* -Reference implementation: -https://ffmpeg.org/doxygen/3.4/demuxing_decoding_8c-example.html -*/ -int FfmpegDecoder::decodeLoop( - unique_ptr params, - const std::string& filename, - bool isDecodeFile, - FfmpegAvioContext& ioctx, - DecoderOutput& decoderOutput) { - params_ = std::move(params); - - int ret = init(filename, isDecodeFile, ioctx, decoderOutput); - if (ret < 0) { - return ret; - } - // init package - av_init_packet(&avPkt); - avPkt.data = nullptr; - avPkt.size = 0; - - int result = 0; - bool ptsInRange = true; - while (ptsInRange) { - result = av_read_frame(formatCtx_, &avPkt); - if (result == AVERROR(EAGAIN)) { - VLOG(1) << "Decoder is busy"; - ret = 0; - break; - } else if (result == AVERROR_EOF) { - VLOG(1) << "Stream decoding is completed"; - ret = 0; - break; - } else if (result < 0) { - VLOG(1) << "av_read_frame fails. Break decoder loop. Error: " - << ffmpeg_util::getErrorDesc(result); - ret = result; - break; - } - - ret = 0; - auto stream = findStreamByIndex(avPkt.stream_index); - if (stream == nullptr) { - // the packet is from a stream the caller is not interested. Ignore it - VLOG(2) << "avPkt ignored. stream index: " << avPkt.stream_index; - // Need to free the memory of AVPacket. Otherwise, memory leak happens - av_packet_unref(&avPkt); - continue; - } - - do { - result = stream->sendPacket(&avPkt); - if (result == AVERROR(EAGAIN)) { - VLOG(2) << "avcodec_send_packet returns AVERROR(EAGAIN)"; - // start to recevie available frames from internal buffer - stream->receiveAvailFrames(params_->getPtsOnly, decoderOutput); - if (isPtsExceedRange()) { - // exit the most-outer while loop - VLOG(1) << "In all streams, exceed the end pts. Exit decoding loop"; - ret = 0; - ptsInRange = false; - break; - } - } else if (result < 0) { - LOG(WARNING) << "avcodec_send_packet failed. Error: " - << ffmpeg_util::getErrorDesc(result); - ret = result; - break; - } else { - VLOG(2) << "avcodec_send_packet succeeds"; - // succeed. Read the next AVPacket and send out it - break; - } - } while (ptsInRange); - // Need to free the memory of AVPacket. Otherwise, memory leak happens - av_packet_unref(&avPkt); - } - /* flush cached frames */ - flushStreams(decoderOutput); - return ret; -} - -int FfmpegDecoder::probeVideo( - unique_ptr params, - const std::string& filename, - bool isDecodeFile, - FfmpegAvioContext& ioctx, - DecoderOutput& decoderOutput) { - params_ = std::move(params); - return init(filename, isDecodeFile, ioctx, decoderOutput); -} - -bool FfmpegDecoder::initStreams() { - for (auto it = params_->formats.begin(); it != params_->formats.end(); ++it) { - AVMediaType mediaType; - if (!ffmpeg_util::mapMediaType(it->first, &mediaType)) { - LOG(ERROR) << "Unknown media type: " << it->first; - return false; - } - int streamIdx = - av_find_best_stream(formatCtx_, mediaType, -1, -1, nullptr, 0); - - if (streamIdx >= 0) { - VLOG(2) << "find stream index: " << streamIdx; - auto stream = createFfmpegStream( - it->first, - formatCtx_, - streamIdx, - it->second, - params_->seekFrameMargin); - - CHECK(stream); - if (stream->openCodecContext() < 0) { - LOG(ERROR) << "Cannot open codec. Stream index: " << streamIdx; - return false; - } - streams_.emplace(streamIdx, move(stream)); - } else { - VLOG(1) << "Cannot open find stream of type " << it->first; - } - } - // Seek frames in each stream - int ret = 0; - for (auto& stream : streams_) { - auto startPts = stream.second->getStartPts(); - VLOG(1) << "stream: " << stream.first << " startPts: " << startPts; - if (startPts > 0 && (ret = stream.second->seekFrame(startPts)) < 0) { - LOG(WARNING) << "seekFrame in stream fails"; - return false; - } - } - VLOG(1) << "initStreams succeeds"; - return true; -} - -bool FfmpegDecoder::isPtsExceedRange() { - bool exceed = true; - for (auto& stream : streams_) { - exceed = exceed && stream.second->isFramePtsExceedRange(); - } - return exceed; -} - -void FfmpegDecoder::flushStreams(DecoderOutput& decoderOutput) { - for (auto& stream : streams_) { - stream.second->flush(params_->getPtsOnly, decoderOutput); - } -} diff --git a/torchvision/csrc/cpu/video_reader/FfmpegDecoder.h b/torchvision/csrc/cpu/video_reader/FfmpegDecoder.h deleted file mode 100644 index a0a564a4214..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegDecoder.h +++ /dev/null @@ -1,127 +0,0 @@ -#pragma once - -#include -#include - -#include "FfmpegHeaders.h" -#include "FfmpegStream.h" -#include "Interface.h" - -#define VIO_BUFFER_SZ 81920 -#define AVPROBE_SIZE 8192 - -class DecoderParameters { - public: - std::unordered_map formats; - // av_seek_frame is imprecise so seek to a timestamp earlier by a margin - // The unit of margin is second - double seekFrameMargin{1.0}; - // When getPtsOnly is set to 1, we only get pts of each frame and don not - // output frame data. It will be much faster - int64_t getPtsOnly{0}; -}; - -class FfmpegAvioContext { - public: - FfmpegAvioContext(); - - int initAVIOContext(const uint8_t* buffer, int64_t size); - - ~FfmpegAvioContext(); - - int read(uint8_t* buf, int buf_size); - - static int readMemory(void* opaque, uint8_t* buf, int buf_size); - - int64_t seek(int64_t offset, int whence); - - static int64_t seekMemory(void* opaque, int64_t offset, int whence); - - AVIOContext* get_avio() { - return avioCtx_; - } - - private: - int workBuffersize_; - uint8_t* workBuffer_; - // for file mode - FILE* inputFile_; - // for memory mode - const uint8_t* inputBuffer_; - int inputBufferSize_; - int offset_ = 0; - - AVIOContext* avioCtx_{nullptr}; -}; - -class FfmpegDecoder { - public: - FfmpegDecoder() { - av_register_all(); - } - ~FfmpegDecoder() { - cleanUp(); - } - // return 0 on success - // return negative number on failure - int decodeFile( - std::unique_ptr params, - const std::string& filename, - DecoderOutput& decoderOutput); - // return 0 on success - // return negative number on failure - int decodeMemory( - std::unique_ptr params, - const uint8_t* buffer, - int64_t size, - DecoderOutput& decoderOutput); - // return 0 on success - // return negative number on failure - int probeFile( - std::unique_ptr params, - const std::string& filename, - DecoderOutput& decoderOutput); - // return 0 on success - // return negative number on failure - int probeMemory( - std::unique_ptr params, - const uint8_t* buffer, - int64_t size, - DecoderOutput& decoderOutput); - - void cleanUp(); - - private: - FfmpegStream* findStreamByIndex(int streamIndex) const; - - int init( - const std::string& filename, - bool isDecodeFile, - FfmpegAvioContext& ioctx, - DecoderOutput& decoderOutput); - // return 0 on success - // return negative number on failure - int decodeLoop( - std::unique_ptr params, - const std::string& filename, - bool isDecodeFile, - FfmpegAvioContext& ioctx, - DecoderOutput& decoderOutput); - - int probeVideo( - std::unique_ptr params, - const std::string& filename, - bool isDecodeFile, - FfmpegAvioContext& ioctx, - DecoderOutput& decoderOutput); - - bool initStreams(); - - void flushStreams(DecoderOutput& decoderOutput); - // whether in all streams, the pts of most recent frame exceeds range - bool isPtsExceedRange(); - - std::unordered_map> streams_; - AVFormatContext* formatCtx_{nullptr}; - std::unique_ptr params_{nullptr}; -}; diff --git a/torchvision/csrc/cpu/video_reader/FfmpegHeaders.h b/torchvision/csrc/cpu/video_reader/FfmpegHeaders.h deleted file mode 100644 index ff26aa30a8d..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegHeaders.h +++ /dev/null @@ -1,13 +0,0 @@ -#pragma once - -extern "C" { -#include -#include -#include -#include -#include -#include -#include -#include -#include -} diff --git a/torchvision/csrc/cpu/video_reader/FfmpegSampler.h b/torchvision/csrc/cpu/video_reader/FfmpegSampler.h deleted file mode 100644 index 3d00be3486f..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegSampler.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once - -#include "FfmpegHeaders.h" -#include "Interface.h" - -/** - * Class sample data from AVFrame - */ -class FfmpegSampler { - public: - virtual ~FfmpegSampler() = default; - // return 0 on success and negative number on failure - virtual int init() = 0; - // sample from the given frame - virtual std::unique_ptr sample(const AVFrame* frame) = 0; -}; diff --git a/torchvision/csrc/cpu/video_reader/FfmpegStream.cpp b/torchvision/csrc/cpu/video_reader/FfmpegStream.cpp deleted file mode 100644 index b745170baf4..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegStream.cpp +++ /dev/null @@ -1,188 +0,0 @@ -#include "FfmpegStream.h" -#include "FfmpegUtil.h" - -using namespace std; - -// (TODO) Currently, disable the use of refCount -static int refCount = 0; - -FfmpegStream::FfmpegStream( - AVFormatContext* inputCtx, - int index, - enum AVMediaType avMediaType, - double seekFrameMargin) - : inputCtx_(inputCtx), - index_(index), - avMediaType_(avMediaType), - seekFrameMargin_(seekFrameMargin) {} - -FfmpegStream::~FfmpegStream() { - if (frame_) { - av_frame_free(&frame_); - } - avcodec_free_context(&codecCtx_); -} - -int FfmpegStream::openCodecContext() { - VLOG(2) << "stream start_time: " << inputCtx_->streams[index_]->start_time; - - auto typeString = av_get_media_type_string(avMediaType_); - AVStream* st = inputCtx_->streams[index_]; - auto codec_id = st->codecpar->codec_id; - VLOG(1) << "codec_id: " << codec_id; - AVCodec* codec = avcodec_find_decoder(codec_id); - if (!codec) { - LOG(ERROR) << "avcodec_find_decoder failed for codec_id: " << int(codec_id); - return AVERROR(EINVAL); - } - VLOG(1) << "Succeed to find decoder"; - - codecCtx_ = avcodec_alloc_context3(codec); - if (!codecCtx_) { - LOG(ERROR) << "avcodec_alloc_context3 fails"; - return AVERROR(ENOMEM); - } - - int ret; - /* Copy codec parameters from input stream to output codec context */ - if ((ret = avcodec_parameters_to_context(codecCtx_, st->codecpar)) < 0) { - LOG(ERROR) << "Failed to copy " << typeString - << " codec parameters to decoder context"; - return ret; - } - - AVDictionary* opts = nullptr; - av_dict_set(&opts, "refcounted_frames", refCount ? "1" : "0", 0); - - // after avcodec_open2, value of codecCtx_->time_base is NOT meaningful - // But inputCtx_->streams[index_]->time_base has meaningful values - if ((ret = avcodec_open2(codecCtx_, codec, &opts)) < 0) { - LOG(ERROR) << "avcodec_open2 failed. " << ffmpeg_util::getErrorDesc(ret); - return ret; - } - VLOG(1) << "Succeed to open codec"; - - frame_ = av_frame_alloc(); - return initFormat(); -} - -unique_ptr FfmpegStream::getFrameData(int getPtsOnly) { - if (!codecCtx_) { - LOG(ERROR) << "Codec is not initialized"; - return nullptr; - } - if (getPtsOnly) { - unique_ptr decodedFrame = make_unique(); - decodedFrame->pts_ = frame_->pts; - return decodedFrame; - } else { - unique_ptr decodedFrame = sampleFrameData(); - if (decodedFrame) { - decodedFrame->pts_ = frame_->pts; - } - return decodedFrame; - } -} - -void FfmpegStream::flush(int getPtsOnly, DecoderOutput& decoderOutput) { - VLOG(1) << "Media Type: " << getMediaType() << ", flush stream."; - // need to receive frames before entering draining mode - receiveAvailFrames(getPtsOnly, decoderOutput); - - VLOG(2) << "send nullptr packet"; - sendPacket(nullptr); - // receive remaining frames after entering draining mode - receiveAvailFrames(getPtsOnly, decoderOutput); - - avcodec_flush_buffers(codecCtx_); -} - -bool FfmpegStream::isFramePtsInRange() { - CHECK(frame_); - auto pts = frame_->pts; - auto startPts = this->getStartPts(); - auto endPts = this->getEndPts(); - VLOG(2) << "isPtsInRange. pts: " << pts << ", startPts: " << startPts - << ", endPts: " << endPts; - return (pts == AV_NOPTS_VALUE) || - (pts >= startPts && (endPts >= 0 ? pts <= endPts : true)); -} - -bool FfmpegStream::isFramePtsExceedRange() { - if (frame_) { - auto endPts = this->getEndPts(); - VLOG(2) << "isFramePtsExceedRange. last_pts_: " << last_pts_ - << ", endPts: " << endPts; - return endPts >= 0 ? last_pts_ >= endPts : false; - } else { - return true; - } -} - -// seek a frame -int FfmpegStream::seekFrame(int64_t seekPts) { - // translate margin from second to pts - int64_t margin = (int64_t)( - seekFrameMargin_ * (double)inputCtx_->streams[index_]->time_base.den / - (double)inputCtx_->streams[index_]->time_base.num); - int64_t real_seekPts = (seekPts - margin) > 0 ? (seekPts - margin) : 0; - VLOG(2) << "seek margin: " << margin; - VLOG(2) << "real seekPts: " << real_seekPts; - int ret = av_seek_frame( - inputCtx_, - index_, - (seekPts - margin) > 0 ? (seekPts - margin) : 0, - AVSEEK_FLAG_BACKWARD); - if (ret < 0) { - LOG(WARNING) << "av_seek_frame fails. Stream index: " << index_; - return ret; - } - return 0; -} - -// send/receive encoding and decoding API overview -// https://ffmpeg.org/doxygen/3.4/group__lavc__encdec.html -int FfmpegStream::sendPacket(const AVPacket* packet) { - return avcodec_send_packet(codecCtx_, packet); -} - -int FfmpegStream::receiveFrame() { - int ret = avcodec_receive_frame(codecCtx_, frame_); - if (ret >= 0) { - // succeed - frame_->pts = av_frame_get_best_effort_timestamp(frame_); - if (frame_->pts == AV_NOPTS_VALUE) { - // Trick: if we can not figure out pts, we just set it to be (last_pts + - // 1) - frame_->pts = last_pts_ + 1; - } - last_pts_ = frame_->pts; - - VLOG(2) << "avcodec_receive_frame succeed"; - } else if (ret == AVERROR(EAGAIN)) { - VLOG(2) << "avcodec_receive_frame fails and returns AVERROR(EAGAIN). "; - } else if (ret == AVERROR_EOF) { - // no more frame to read - VLOG(2) << "avcodec_receive_frame returns AVERROR_EOF"; - } else { - LOG(WARNING) << "avcodec_receive_frame failed. Error: " - << ffmpeg_util::getErrorDesc(ret); - } - return ret; -} - -void FfmpegStream::receiveAvailFrames( - int getPtsOnly, - DecoderOutput& decoderOutput) { - int result = 0; - while ((result = receiveFrame()) >= 0) { - unique_ptr decodedFrame = getFrameData(getPtsOnly); - - if (decodedFrame && - ((!getPtsOnly && decodedFrame->frameSize_ > 0) || getPtsOnly)) { - if (isFramePtsInRange()) { - decoderOutput.addMediaFrame(getMediaType(), std::move(decodedFrame)); - } - } // end-if - } // end-while -} diff --git a/torchvision/csrc/cpu/video_reader/FfmpegStream.h b/torchvision/csrc/cpu/video_reader/FfmpegStream.h deleted file mode 100644 index b66a36977ec..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegStream.h +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -#pragma once - -#include -#include -#include -#include "FfmpegHeaders.h" -#include "Interface.h" - -/* -Class uses FFMPEG library to decode one media stream (audio or video). -*/ -class FfmpegStream { - public: - FfmpegStream( - AVFormatContext* inputCtx, - int index, - enum AVMediaType avMediaType, - double seekFrameMargin); - virtual ~FfmpegStream(); - - // returns 0 - on success or negative error - int openCodecContext(); - // returns stream index - int getIndex() const { - return index_; - } - // returns number decoded/sampled bytes - std::unique_ptr getFrameData(int getPtsOnly); - // flush the stream at the end of decoding. - // Return 0 on success and -1 when cache is drained - void flush(int getPtsOnly, DecoderOutput& decoderOutput); - // seek a frame - int seekFrame(int64_t ts); - // send an AVPacket - int sendPacket(const AVPacket* packet); - // receive AVFrame - int receiveFrame(); - // receive all available frames from the internal buffer - void receiveAvailFrames(int getPtsOnly, DecoderOutput& decoderOutput); - // return media type - virtual MediaType getMediaType() const = 0; - // return media format - virtual FormatUnion getMediaFormat() const = 0; - // return start presentation timestamp - virtual int64_t getStartPts() const = 0; - // return end presentation timestamp - virtual int64_t getEndPts() const = 0; - // is the pts of most recent frame within range? - bool isFramePtsInRange(); - // does the pts of most recent frame exceed range? - bool isFramePtsExceedRange(); - - protected: - virtual int initFormat() = 0; - // returns a decoded frame - virtual std::unique_ptr sampleFrameData() = 0; - - protected: - AVFormatContext* const inputCtx_; - const int index_; - enum AVMediaType avMediaType_; - - AVCodecContext* codecCtx_{nullptr}; - AVFrame* frame_{nullptr}; - // pts of last decoded frame - int64_t last_pts_{0}; - double seekFrameMargin_{1.0}; -}; diff --git a/torchvision/csrc/cpu/video_reader/FfmpegUtil.cpp b/torchvision/csrc/cpu/video_reader/FfmpegUtil.cpp deleted file mode 100644 index 9e804ee67c0..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegUtil.cpp +++ /dev/null @@ -1,111 +0,0 @@ -#include "FfmpegUtil.h" - -using namespace std; - -namespace ffmpeg_util { - -bool mapFfmpegType(AVMediaType media, MediaType* type) { - switch (media) { - case AVMEDIA_TYPE_VIDEO: - *type = MediaType::TYPE_VIDEO; - return true; - case AVMEDIA_TYPE_AUDIO: - *type = MediaType::TYPE_AUDIO; - return true; - default: - return false; - } -} - -bool mapMediaType(MediaType type, AVMediaType* media) { - switch (type) { - case MediaType::TYPE_VIDEO: - *media = AVMEDIA_TYPE_VIDEO; - return true; - case MediaType::TYPE_AUDIO: - *media = AVMEDIA_TYPE_AUDIO; - return true; - default: - return false; - } -} - -void setFormatDimensions( - int& destW, - int& destH, - int userW, - int userH, - int srcW, - int srcH, - int minDimension) { - // rounding rules - // int -> double -> round - // round up if fraction is >= 0.5 or round down if fraction is < 0.5 - // int result = double(value) + 0.5 - // here we rounding double to int according to the above rule - if (userW == 0 && userH == 0) { - if (minDimension > 0) { // #2 - if (srcW > srcH) { - // landscape - destH = minDimension; - destW = round(double(srcW * minDimension) / srcH); - } else { - // portrait - destW = minDimension; - destH = round(double(srcH * minDimension) / srcW); - } - } else { // #1 - destW = srcW; - destH = srcH; - } - } else if (userW != 0 && userH == 0) { // #3 - destW = userW; - destH = round(double(srcH * userW) / srcW); - } else if (userW == 0 && userH != 0) { // #4 - destW = round(double(srcW * userH) / srcH); - destH = userH; - } else { - // userW != 0 && userH != 0. #5 - destW = userW; - destH = userH; - } - // prevent zeros - destW = std::max(destW, 1); - destH = std::max(destH, 1); -} - -bool validateVideoFormat(const VideoFormat& f) { - /* - Valid parameters values for decoder - ___________________________________________________ - | W | H | minDimension | algorithm | - |_________________________________________________| - | 0 | 0 | 0 | original | - |_________________________________________________| - | 0 | 0 | >0 |scale to min dimension| - |_____|_____|____________________________________ | - | >0 | 0 | 0 | scale keeping W | - |_________________________________________________| - | 0 | >0 | 0 | scale keeping H | - |_________________________________________________| - | >0 | >0 | 0 | stretch/scale | - |_________________________________________________| - - */ - return (f.width == 0 && f.height == 0) || // #1 and #2 - (f.width != 0 && f.height != 0 && f.minDimension == 0) || // # 5 - (((f.width != 0 && f.height == 0) || // #3 and #4 - (f.width == 0 && f.height != 0)) && - f.minDimension == 0); -} - -string getErrorDesc(int errnum) { - array buffer; - if (av_strerror(errnum, buffer.data(), buffer.size()) < 0) { - return string("Unknown error code"); - } - buffer.back() = 0; - return string(buffer.data()); -} - -} // namespace ffmpeg_util diff --git a/torchvision/csrc/cpu/video_reader/FfmpegUtil.h b/torchvision/csrc/cpu/video_reader/FfmpegUtil.h deleted file mode 100644 index 9f42eb53c97..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegUtil.h +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once - -#include -#include -#include "FfmpegHeaders.h" -#include "Interface.h" - -namespace ffmpeg_util { - -bool mapFfmpegType(AVMediaType media, enum MediaType* type); - -bool mapMediaType(MediaType type, enum AVMediaType* media); - -void setFormatDimensions( - int& destW, - int& destH, - int userW, - int userH, - int srcW, - int srcH, - int minDimension); - -bool validateVideoFormat(const VideoFormat& f); - -std::string getErrorDesc(int errnum); - -} // namespace ffmpeg_util diff --git a/torchvision/csrc/cpu/video_reader/FfmpegVideoSampler.cpp b/torchvision/csrc/cpu/video_reader/FfmpegVideoSampler.cpp deleted file mode 100644 index d87b3104dd5..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegVideoSampler.cpp +++ /dev/null @@ -1,90 +0,0 @@ -#include "FfmpegVideoSampler.h" -#include "FfmpegUtil.h" - -using namespace std; - -FfmpegVideoSampler::FfmpegVideoSampler( - const VideoFormat& in, - const VideoFormat& out, - int swsFlags) - : inFormat_(in), outFormat_(out), swsFlags_(swsFlags) {} - -FfmpegVideoSampler::~FfmpegVideoSampler() { - if (scaleContext_) { - sws_freeContext(scaleContext_); - scaleContext_ = nullptr; - } -} - -int FfmpegVideoSampler::init() { - VLOG(1) << "Input format: width " << inFormat_.width << ", height " - << inFormat_.height << ", format " << inFormat_.format - << ", minDimension " << inFormat_.minDimension; - VLOG(1) << "Scale format: width " << outFormat_.width << ", height " - << outFormat_.height << ", format " << outFormat_.format - << ", minDimension " << outFormat_.minDimension; - - scaleContext_ = sws_getContext( - inFormat_.width, - inFormat_.height, - (AVPixelFormat)inFormat_.format, - outFormat_.width, - outFormat_.height, - static_cast(outFormat_.format), - swsFlags_, - nullptr, - nullptr, - nullptr); - if (scaleContext_) { - return 0; - } else { - return -1; - } -} - -int32_t FfmpegVideoSampler::getImageBytes() const { - return av_image_get_buffer_size( - (AVPixelFormat)outFormat_.format, outFormat_.width, outFormat_.height, 1); -} - -// https://ffmpeg.org/doxygen/3.4/scaling_video_8c-example.html#a10 -unique_ptr FfmpegVideoSampler::sample(const AVFrame* frame) { - if (!frame) { - return nullptr; // no flush for videos - } - // scaled and cropped image - auto outImageSize = getImageBytes(); - AvDataPtr frameData(static_cast(av_malloc(outImageSize))); - - uint8_t* scalePlanes[4] = {nullptr}; - int scaleLines[4] = {0}; - - int result; - if ((result = av_image_fill_arrays( - scalePlanes, - scaleLines, - frameData.get(), - static_cast(outFormat_.format), - outFormat_.width, - outFormat_.height, - 1)) < 0) { - LOG(ERROR) << "av_image_fill_arrays failed, err: " - << ffmpeg_util::getErrorDesc(result); - return nullptr; - } - - if ((result = sws_scale( - scaleContext_, - frame->data, - frame->linesize, - 0, - inFormat_.height, - scalePlanes, - scaleLines)) < 0) { - LOG(ERROR) << "sws_scale failed, err: " - << ffmpeg_util::getErrorDesc(result); - return nullptr; - } - - return make_unique(std::move(frameData), outImageSize, 0); -} diff --git a/torchvision/csrc/cpu/video_reader/FfmpegVideoSampler.h b/torchvision/csrc/cpu/video_reader/FfmpegVideoSampler.h deleted file mode 100644 index 1fd6862f537..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegVideoSampler.h +++ /dev/null @@ -1,32 +0,0 @@ -#pragma once - -#include "FfmpegSampler.h" - -/** - * Class transcode video frames from one format into another - */ - -class FfmpegVideoSampler : public FfmpegSampler { - public: - explicit FfmpegVideoSampler( - const VideoFormat& in, - const VideoFormat& out, - int swsFlags = SWS_AREA); - ~FfmpegVideoSampler() override; - - int init() override; - - int32_t getImageBytes() const; - // returns number of bytes of the sampled data - std::unique_ptr sample(const AVFrame* frame) override; - - const VideoFormat& getInFormat() const { - return inFormat_; - } - - private: - VideoFormat inFormat_; - VideoFormat outFormat_; - int swsFlags_; - SwsContext* scaleContext_{nullptr}; -}; diff --git a/torchvision/csrc/cpu/video_reader/FfmpegVideoStream.cpp b/torchvision/csrc/cpu/video_reader/FfmpegVideoStream.cpp deleted file mode 100644 index 7a429249a71..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegVideoStream.cpp +++ /dev/null @@ -1,115 +0,0 @@ -#include "FfmpegVideoStream.h" -#include "FfmpegUtil.h" - -using namespace std; - -namespace { - -bool operator==(const VideoFormat& x, const AVFrame& y) { - return x.width == y.width && x.height == y.height && - x.format == static_cast(y.format); -} - -VideoFormat toVideoFormat(const AVFrame& frame) { - VideoFormat videoFormat; - videoFormat.width = frame.width; - videoFormat.height = frame.height; - videoFormat.format = static_cast(frame.format); - - return videoFormat; -} - -} // namespace - -FfmpegVideoStream::FfmpegVideoStream( - AVFormatContext* inputCtx, - int index, - enum AVMediaType avMediaType, - MediaFormat mediaFormat, - double seekFrameMargin) - : FfmpegStream(inputCtx, index, avMediaType, seekFrameMargin), - mediaFormat_(mediaFormat) {} - -FfmpegVideoStream::~FfmpegVideoStream() {} - -void FfmpegVideoStream::checkStreamDecodeParams() { - auto timeBase = getTimeBase(); - if (timeBase.first > 0) { - CHECK_EQ(timeBase.first, inputCtx_->streams[index_]->time_base.num); - CHECK_EQ(timeBase.second, inputCtx_->streams[index_]->time_base.den); - } -} - -void FfmpegVideoStream::updateStreamDecodeParams() { - auto timeBase = getTimeBase(); - if (timeBase.first == 0) { - mediaFormat_.format.video.timeBaseNum = - inputCtx_->streams[index_]->time_base.num; - mediaFormat_.format.video.timeBaseDen = - inputCtx_->streams[index_]->time_base.den; - } - mediaFormat_.format.video.duration = inputCtx_->streams[index_]->duration; -} - -int FfmpegVideoStream::initFormat() { - // set output format - VideoFormat& format = mediaFormat_.format.video; - if (!ffmpeg_util::validateVideoFormat(format)) { - LOG(ERROR) << "Invalid video format"; - return -1; - } - - format.fps = av_q2d( - av_guess_frame_rate(inputCtx_, inputCtx_->streams[index_], nullptr)); - - // keep aspect ratio - ffmpeg_util::setFormatDimensions( - format.width, - format.height, - format.width, - format.height, - codecCtx_->width, - codecCtx_->height, - format.minDimension); - - VLOG(1) << "After adjusting, video format" - << ", width: " << format.width << ", height: " << format.height - << ", format: " << format.format - << ", minDimension: " << format.minDimension; - - if (format.format == AV_PIX_FMT_NONE) { - format.format = codecCtx_->pix_fmt; - VLOG(1) << "Set pixel format: " << format.format; - } - - checkStreamDecodeParams(); - - updateStreamDecodeParams(); - - return format.width != 0 && format.height != 0 && - format.format != AV_PIX_FMT_NONE - ? 0 - : -1; -} - -unique_ptr FfmpegVideoStream::sampleFrameData() { - VideoFormat& format = mediaFormat_.format.video; - if (!sampler_ || !(sampler_->getInFormat() == *frame_)) { - VideoFormat newInFormat = toVideoFormat(*frame_); - sampler_ = make_unique(newInFormat, format, SWS_AREA); - VLOG(1) << "Set input video sampler format" - << ", width: " << newInFormat.width - << ", height: " << newInFormat.height - << ", format: " << newInFormat.format - << " : output video sampler format" - << ", width: " << format.width << ", height: " << format.height - << ", format: " << format.format - << ", minDimension: " << format.minDimension; - int ret = sampler_->init(); - if (ret < 0) { - VLOG(1) << "Fail to initialize video sampler"; - return nullptr; - } - } - return sampler_->sample(frame_); -} diff --git a/torchvision/csrc/cpu/video_reader/FfmpegVideoStream.h b/torchvision/csrc/cpu/video_reader/FfmpegVideoStream.h deleted file mode 100644 index 9bfbc9f665b..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegVideoStream.h +++ /dev/null @@ -1,54 +0,0 @@ -#pragma once - -#include -#include "FfmpegStream.h" -#include "FfmpegVideoSampler.h" - -/** - * Class uses FFMPEG library to decode one video stream. - */ -class FfmpegVideoStream : public FfmpegStream { - public: - explicit FfmpegVideoStream( - AVFormatContext* inputCtx, - int index, - enum AVMediaType avMediaType, - MediaFormat mediaFormat, - double seekFrameMargin); - - ~FfmpegVideoStream() override; - - // FfmpegStream overrides - MediaType getMediaType() const override { - return MediaType::TYPE_VIDEO; - } - - FormatUnion getMediaFormat() const override { - return mediaFormat_.format; - } - - int64_t getStartPts() const override { - return mediaFormat_.format.video.startPts; - } - int64_t getEndPts() const override { - return mediaFormat_.format.video.endPts; - } - // return numerator and denominator of time base - std::pair getTimeBase() const { - return std::make_pair( - mediaFormat_.format.video.timeBaseNum, - mediaFormat_.format.video.timeBaseDen); - } - - void checkStreamDecodeParams(); - - void updateStreamDecodeParams(); - - protected: - int initFormat() override; - std::unique_ptr sampleFrameData() override; - - private: - MediaFormat mediaFormat_; - std::unique_ptr sampler_{nullptr}; -}; diff --git a/torchvision/csrc/cpu/video_reader/Interface.cpp b/torchvision/csrc/cpu/video_reader/Interface.cpp deleted file mode 100644 index 0ec9f155821..00000000000 --- a/torchvision/csrc/cpu/video_reader/Interface.cpp +++ /dev/null @@ -1,22 +0,0 @@ -#include "Interface.h" - -void DecoderOutput::initMediaType(MediaType mediaType, FormatUnion format) { - MediaData mediaData(format); - media_data_.emplace(mediaType, std::move(mediaData)); -} - -void DecoderOutput::addMediaFrame( - MediaType mediaType, - std::unique_ptr frame) { - if (media_data_.find(mediaType) != media_data_.end()) { - VLOG(1) << "media type: " << mediaType - << " add frame with pts: " << frame->pts_; - media_data_[mediaType].frames_.push_back(std::move(frame)); - } else { - VLOG(1) << "media type: " << mediaType << " not found. Skip the frame."; - } -} - -void DecoderOutput::clear() { - media_data_.clear(); -} diff --git a/torchvision/csrc/cpu/video_reader/Interface.h b/torchvision/csrc/cpu/video_reader/Interface.h deleted file mode 100644 index e137008ce7b..00000000000 --- a/torchvision/csrc/cpu/video_reader/Interface.h +++ /dev/null @@ -1,127 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -extern "C" { - -#include -#include -void av_free(void* ptr); -} - -struct avDeleter { - void operator()(uint8_t* p) const { - av_free(p); - } -}; - -const AVPixelFormat defaultVideoPixelFormat = AV_PIX_FMT_RGB24; -const AVSampleFormat defaultAudioSampleFormat = AV_SAMPLE_FMT_FLT; - -using AvDataPtr = std::unique_ptr; - -enum MediaType : uint32_t { - TYPE_VIDEO = 1, - TYPE_AUDIO = 2, -}; - -struct EnumClassHash { - template - uint32_t operator()(T t) const { - return static_cast(t); - } -}; - -struct VideoFormat { - // fields are initialized for the auto detection - // caller can specify some/all of field values if specific output is desirable - - int width{0}; // width in pixels - int height{0}; // height in pixels - int minDimension{0}; // choose min dimension and rescale accordingly - // Output image pixel format. data type AVPixelFormat - AVPixelFormat format{defaultVideoPixelFormat}; // type AVPixelFormat - int64_t startPts{0}, endPts{0}; // Start and end presentation timestamp - int timeBaseNum{0}; - int timeBaseDen{1}; // numerator and denominator of time base - float fps{0.0}; - int64_t duration{0}; // duration of the stream, in stream time base -}; - -struct AudioFormat { - // fields are initialized for the auto detection - // caller can specify some/all of field values if specific output is desirable - - int samples{0}; // number samples per second (frequency) - int channels{0}; // number of channels - AVSampleFormat format{defaultAudioSampleFormat}; // type AVSampleFormat - int64_t startPts{0}, endPts{0}; // Start and end presentation timestamp - int timeBaseNum{0}; - int timeBaseDen{1}; // numerator and denominator of time base - int64_t duration{0}; // duration of the stream, in stream time base -}; - -union FormatUnion { - FormatUnion() {} - VideoFormat video; - AudioFormat audio; -}; - -struct MediaFormat { - MediaFormat() {} - - MediaFormat(const MediaFormat& mediaFormat) : type(mediaFormat.type) { - if (type == MediaType::TYPE_VIDEO) { - format.video = mediaFormat.format.video; - } else if (type == MediaType::TYPE_AUDIO) { - format.audio = mediaFormat.format.audio; - } - } - - MediaFormat(MediaType mediaType) : type(mediaType) { - if (mediaType == MediaType::TYPE_VIDEO) { - format.video = VideoFormat(); - } else if (mediaType == MediaType::TYPE_AUDIO) { - format.audio = AudioFormat(); - } - } - // media type - MediaType type; - // format data - FormatUnion format; -}; - -class DecodedFrame { - public: - explicit DecodedFrame() : frame_(nullptr), frameSize_(0), pts_(0) {} - explicit DecodedFrame(AvDataPtr frame, int frameSize, int64_t pts) - : frame_(std::move(frame)), frameSize_(frameSize), pts_(pts) {} - AvDataPtr frame_{nullptr}; - int frameSize_{0}; - int64_t pts_{0}; -}; - -struct MediaData { - MediaData() {} - MediaData(FormatUnion format) : format_(format) {} - FormatUnion format_; - std::vector> frames_; -}; - -class DecoderOutput { - public: - explicit DecoderOutput() {} - - ~DecoderOutput() {} - - void initMediaType(MediaType mediaType, FormatUnion format); - - void addMediaFrame(MediaType mediaType, std::unique_ptr frame); - - void clear(); - - std::unordered_map media_data_; -}; diff --git a/torchvision/csrc/cpu/video_reader/VideoReader.cpp b/torchvision/csrc/cpu/video_reader/VideoReader.cpp deleted file mode 100644 index dfe7f46bf39..00000000000 --- a/torchvision/csrc/cpu/video_reader/VideoReader.cpp +++ /dev/null @@ -1,500 +0,0 @@ -#include "VideoReader.h" -#include -#include -#include -#include -#include "FfmpegDecoder.h" -#include "FfmpegHeaders.h" -#include "util.h" - -using namespace std; - -// If we are in a Windows environment, we need to define -// initialization functions for the _custom_ops extension -#ifdef _WIN32 -#if PY_MAJOR_VERSION < 3 -PyMODINIT_FUNC init_video_reader(void) { - // No need to do anything. - return NULL; -} -#else -PyMODINIT_FUNC PyInit_video_reader(void) { - // No need to do anything. - return NULL; -} -#endif -#endif - -namespace video_reader { - -class UnknownPixelFormatException : public exception { - const char* what() const throw() override { - return "Unknown pixel format"; - } -}; - -int getChannels(AVPixelFormat format) { - int numChannels = 0; - switch (format) { - case AV_PIX_FMT_BGR24: - case AV_PIX_FMT_RGB24: - numChannels = 3; - break; - default: - LOG(ERROR) << "Unknown format: " << format; - throw UnknownPixelFormatException(); - } - return numChannels; -} - -void fillVideoTensor( - std::vector>& frames, - torch::Tensor& videoFrame, - torch::Tensor& videoFramePts) { - int frameSize = 0; - if (videoFrame.numel() > 0) { - frameSize = videoFrame.numel() / frames.size(); - } - - int frameCount = 0; - - uint8_t* videoFrameData = - videoFrame.numel() > 0 ? videoFrame.data_ptr() : nullptr; - int64_t* videoFramePtsData = videoFramePts.data_ptr(); - - for (size_t i = 0; i < frames.size(); ++i) { - const auto& frame = frames[i]; - if (videoFrameData) { - memcpy( - videoFrameData + (size_t)(frameCount++) * (size_t)frameSize, - frame->frame_.get(), - frameSize * sizeof(uint8_t)); - } - videoFramePtsData[i] = frame->pts_; - } -} - -void getVideoMeta( - DecoderOutput& decoderOutput, - int& numFrames, - int& height, - int& width, - int& numChannels) { - auto& videoFrames = decoderOutput.media_data_[TYPE_VIDEO].frames_; - numFrames = videoFrames.size(); - - FormatUnion& videoFormat = decoderOutput.media_data_[TYPE_VIDEO].format_; - height = videoFormat.video.height; - width = videoFormat.video.width; - numChannels = getChannels(videoFormat.video.format); -} - -void fillAudioTensor( - std::vector>& frames, - torch::Tensor& audioFrame, - torch::Tensor& audioFramePts) { - if (frames.size() == 0) { - return; - } - - float* audioFrameData = - audioFrame.numel() > 0 ? audioFrame.data_ptr() : nullptr; - CHECK_EQ(audioFramePts.size(0), frames.size()); - int64_t* audioFramePtsData = audioFramePts.data_ptr(); - - int bytesPerSample = av_get_bytes_per_sample(defaultAudioSampleFormat); - - int64_t frameDataOffset = 0; - for (size_t i = 0; i < frames.size(); ++i) { - audioFramePtsData[i] = frames[i]->pts_; - if (audioFrameData) { - memcpy( - audioFrameData + frameDataOffset, - frames[i]->frame_.get(), - frames[i]->frameSize_); - frameDataOffset += (frames[i]->frameSize_ / bytesPerSample); - } - } -} - -void getAudioMeta( - DecoderOutput& decoderOutput, - int64_t& numSamples, - int64_t& channels, - int64_t& numFrames) { - FormatUnion& audioFormat = decoderOutput.media_data_[TYPE_AUDIO].format_; - - channels = audioFormat.audio.channels; - CHECK_EQ(audioFormat.audio.format, AV_SAMPLE_FMT_FLT); - int bytesPerSample = av_get_bytes_per_sample( - static_cast(audioFormat.audio.format)); - - // auto& audioFrames = decoderOutput.media_frames_[TYPE_AUDIO]; - auto& audioFrames = decoderOutput.media_data_[TYPE_AUDIO].frames_; - numFrames = audioFrames.size(); - int64_t frameSizeTotal = 0; - for (auto const& decodedFrame : audioFrames) { - frameSizeTotal += static_cast(decodedFrame->frameSize_); - } - VLOG(2) << "numFrames: " << numFrames; - VLOG(2) << "frameSizeTotal: " << frameSizeTotal; - VLOG(2) << "channels: " << channels; - VLOG(2) << "bytesPerSample: " << bytesPerSample; - CHECK_EQ(frameSizeTotal % (channels * bytesPerSample), 0); - numSamples = frameSizeTotal / (channels * bytesPerSample); -} - -torch::List readVideo( - bool isReadFile, - const torch::Tensor& input_video, - std::string videoPath, - double seekFrameMargin, - int64_t getPtsOnly, - int64_t readVideoStream, - int64_t width, - int64_t height, - int64_t minDimension, - int64_t videoStartPts, - int64_t videoEndPts, - int64_t videoTimeBaseNum, - int64_t videoTimeBaseDen, - int64_t readAudioStream, - int64_t audioSamples, - int64_t audioChannels, - int64_t audioStartPts, - int64_t audioEndPts, - int64_t audioTimeBaseNum, - int64_t audioTimeBaseDen) { - unique_ptr params = util::getDecoderParams( - seekFrameMargin, - getPtsOnly, - readVideoStream, - width, - height, - minDimension, - videoStartPts, - videoEndPts, - videoTimeBaseNum, - videoTimeBaseDen, - readAudioStream, - audioSamples, - audioChannels, - audioStartPts, - audioEndPts, - audioTimeBaseNum, - audioTimeBaseDen); - - FfmpegDecoder decoder; - DecoderOutput decoderOutput; - - if (isReadFile) { - decoder.decodeFile(std::move(params), videoPath, decoderOutput); - } else { - decoder.decodeMemory( - std::move(params), - input_video.data_ptr(), - input_video.size(0), - decoderOutput); - } - - // video section - torch::Tensor videoFrame = torch::zeros({0}, torch::kByte); - torch::Tensor videoFramePts = torch::zeros({0}, torch::kLong); - torch::Tensor videoTimeBase = torch::zeros({0}, torch::kInt); - torch::Tensor videoFps = torch::zeros({0}, torch::kFloat); - torch::Tensor videoDuration = torch::zeros({0}, torch::kLong); - - if (readVideoStream == 1) { - auto it = decoderOutput.media_data_.find(TYPE_VIDEO); - if (it != decoderOutput.media_data_.end()) { - int numVideoFrames, outHeight, outWidth, numChannels; - getVideoMeta( - decoderOutput, numVideoFrames, outHeight, outWidth, numChannels); - - if (getPtsOnly == 0) { - videoFrame = torch::zeros( - {numVideoFrames, outHeight, outWidth, numChannels}, torch::kByte); - } - - videoFramePts = torch::zeros({numVideoFrames}, torch::kLong); - - fillVideoTensor( - decoderOutput.media_data_[TYPE_VIDEO].frames_, - videoFrame, - videoFramePts); - - videoTimeBase = torch::zeros({2}, torch::kInt); - int* videoTimeBaseData = videoTimeBase.data_ptr(); - videoTimeBaseData[0] = it->second.format_.video.timeBaseNum; - videoTimeBaseData[1] = it->second.format_.video.timeBaseDen; - - videoFps = torch::zeros({1}, torch::kFloat); - float* videoFpsData = videoFps.data_ptr(); - videoFpsData[0] = it->second.format_.video.fps; - - videoDuration = torch::zeros({1}, torch::kLong); - int64_t* videoDurationData = videoDuration.data_ptr(); - videoDurationData[0] = it->second.format_.video.duration; - } else { - VLOG(1) << "Miss video stream"; - } - } - - // audio section - torch::Tensor audioFrame = torch::zeros({0}, torch::kFloat); - torch::Tensor audioFramePts = torch::zeros({0}, torch::kLong); - torch::Tensor audioTimeBase = torch::zeros({0}, torch::kInt); - torch::Tensor audioSampleRate = torch::zeros({0}, torch::kInt); - torch::Tensor audioDuration = torch::zeros({0}, torch::kLong); - if (readAudioStream == 1) { - auto it = decoderOutput.media_data_.find(TYPE_AUDIO); - if (it != decoderOutput.media_data_.end()) { - VLOG(1) << "Find audio stream"; - int64_t numAudioSamples = 0, outAudioChannels = 0, numAudioFrames = 0; - getAudioMeta( - decoderOutput, numAudioSamples, outAudioChannels, numAudioFrames); - VLOG(2) << "numAudioSamples: " << numAudioSamples; - VLOG(2) << "outAudioChannels: " << outAudioChannels; - VLOG(2) << "numAudioFrames: " << numAudioFrames; - - if (getPtsOnly == 0) { - audioFrame = - torch::zeros({numAudioSamples, outAudioChannels}, torch::kFloat); - } - audioFramePts = torch::zeros({numAudioFrames}, torch::kLong); - fillAudioTensor( - decoderOutput.media_data_[TYPE_AUDIO].frames_, - audioFrame, - audioFramePts); - - audioTimeBase = torch::zeros({2}, torch::kInt); - int* audioTimeBaseData = audioTimeBase.data_ptr(); - audioTimeBaseData[0] = it->second.format_.audio.timeBaseNum; - audioTimeBaseData[1] = it->second.format_.audio.timeBaseDen; - - audioSampleRate = torch::zeros({1}, torch::kInt); - int* audioSampleRateData = audioSampleRate.data_ptr(); - audioSampleRateData[0] = it->second.format_.audio.samples; - - audioDuration = torch::zeros({1}, torch::kLong); - int64_t* audioDurationData = audioDuration.data_ptr(); - audioDurationData[0] = it->second.format_.audio.duration; - } else { - VLOG(1) << "Miss audio stream"; - } - } - - torch::List result; - result.push_back(std::move(videoFrame)); - result.push_back(std::move(videoFramePts)); - result.push_back(std::move(videoTimeBase)); - result.push_back(std::move(videoFps)); - result.push_back(std::move(videoDuration)); - result.push_back(std::move(audioFrame)); - result.push_back(std::move(audioFramePts)); - result.push_back(std::move(audioTimeBase)); - result.push_back(std::move(audioSampleRate)); - result.push_back(std::move(audioDuration)); - - return result; -} - -torch::List readVideoFromMemory( - torch::Tensor input_video, - double seekFrameMargin, - int64_t getPtsOnly, - int64_t readVideoStream, - int64_t width, - int64_t height, - int64_t minDimension, - int64_t videoStartPts, - int64_t videoEndPts, - int64_t videoTimeBaseNum, - int64_t videoTimeBaseDen, - int64_t readAudioStream, - int64_t audioSamples, - int64_t audioChannels, - int64_t audioStartPts, - int64_t audioEndPts, - int64_t audioTimeBaseNum, - int64_t audioTimeBaseDen) { - return readVideo( - false, - input_video, - "", // videoPath - seekFrameMargin, - getPtsOnly, - readVideoStream, - width, - height, - minDimension, - videoStartPts, - videoEndPts, - videoTimeBaseNum, - videoTimeBaseDen, - readAudioStream, - audioSamples, - audioChannels, - audioStartPts, - audioEndPts, - audioTimeBaseNum, - audioTimeBaseDen); -} - -torch::List readVideoFromFile( - std::string videoPath, - double seekFrameMargin, - int64_t getPtsOnly, - int64_t readVideoStream, - int64_t width, - int64_t height, - int64_t minDimension, - int64_t videoStartPts, - int64_t videoEndPts, - int64_t videoTimeBaseNum, - int64_t videoTimeBaseDen, - int64_t readAudioStream, - int64_t audioSamples, - int64_t audioChannels, - int64_t audioStartPts, - int64_t audioEndPts, - int64_t audioTimeBaseNum, - int64_t audioTimeBaseDen) { - torch::Tensor dummy_input_video = torch::ones({0}); - return readVideo( - true, - dummy_input_video, - videoPath, - seekFrameMargin, - getPtsOnly, - readVideoStream, - width, - height, - minDimension, - videoStartPts, - videoEndPts, - videoTimeBaseNum, - videoTimeBaseDen, - readAudioStream, - audioSamples, - audioChannels, - audioStartPts, - audioEndPts, - audioTimeBaseNum, - audioTimeBaseDen); -} - -torch::List probeVideo( - bool isReadFile, - const torch::Tensor& input_video, - std::string videoPath) { - unique_ptr params = util::getDecoderParams( - 0, // seekFrameMargin - 0, // getPtsOnly - 1, // readVideoStream - 0, // width - 0, // height - 0, // minDimension - 0, // videoStartPts - 0, // videoEndPts - 0, // videoTimeBaseNum - 1, // videoTimeBaseDen - 1, // readAudioStream - 0, // audioSamples - 0, // audioChannels - 0, // audioStartPts - 0, // audioEndPts - 0, // audioTimeBaseNum - 1 // audioTimeBaseDen - ); - - FfmpegDecoder decoder; - DecoderOutput decoderOutput; - if (isReadFile) { - decoder.probeFile(std::move(params), videoPath, decoderOutput); - } else { - decoder.probeMemory( - std::move(params), - input_video.data_ptr(), - input_video.size(0), - decoderOutput); - } - // video section - torch::Tensor videoTimeBase = torch::zeros({0}, torch::kInt); - torch::Tensor videoFps = torch::zeros({0}, torch::kFloat); - torch::Tensor videoDuration = torch::zeros({0}, torch::kLong); - - auto it = decoderOutput.media_data_.find(TYPE_VIDEO); - if (it != decoderOutput.media_data_.end()) { - VLOG(1) << "Find video stream"; - videoTimeBase = torch::zeros({2}, torch::kInt); - int* videoTimeBaseData = videoTimeBase.data_ptr(); - videoTimeBaseData[0] = it->second.format_.video.timeBaseNum; - videoTimeBaseData[1] = it->second.format_.video.timeBaseDen; - - videoFps = torch::zeros({1}, torch::kFloat); - float* videoFpsData = videoFps.data_ptr(); - videoFpsData[0] = it->second.format_.video.fps; - - videoDuration = torch::zeros({1}, torch::kLong); - int64_t* videoDurationData = videoDuration.data_ptr(); - videoDurationData[0] = it->second.format_.video.duration; - } else { - VLOG(1) << "Miss video stream"; - } - - // audio section - torch::Tensor audioTimeBase = torch::zeros({0}, torch::kInt); - torch::Tensor audioSampleRate = torch::zeros({0}, torch::kInt); - torch::Tensor audioDuration = torch::zeros({0}, torch::kLong); - - it = decoderOutput.media_data_.find(TYPE_AUDIO); - if (it != decoderOutput.media_data_.end()) { - VLOG(1) << "Find audio stream"; - audioTimeBase = torch::zeros({2}, torch::kInt); - int* audioTimeBaseData = audioTimeBase.data_ptr(); - audioTimeBaseData[0] = it->second.format_.audio.timeBaseNum; - audioTimeBaseData[1] = it->second.format_.audio.timeBaseDen; - - audioSampleRate = torch::zeros({1}, torch::kInt); - int* audioSampleRateData = audioSampleRate.data_ptr(); - audioSampleRateData[0] = it->second.format_.audio.samples; - - audioDuration = torch::zeros({1}, torch::kLong); - int64_t* audioDurationData = audioDuration.data_ptr(); - audioDurationData[0] = it->second.format_.audio.duration; - } else { - VLOG(1) << "Miss audio stream"; - } - - torch::List result; - result.push_back(std::move(videoTimeBase)); - result.push_back(std::move(videoFps)); - result.push_back(std::move(videoDuration)); - result.push_back(std::move(audioTimeBase)); - result.push_back(std::move(audioSampleRate)); - result.push_back(std::move(audioDuration)); - - return result; -} - -torch::List probeVideoFromMemory(torch::Tensor input_video) { - return probeVideo(false, input_video, ""); -} - -torch::List probeVideoFromFile(std::string videoPath) { - torch::Tensor dummy_input_video = torch::ones({0}); - return probeVideo(true, dummy_input_video, videoPath); -} - -} // namespace video_reader - -static auto registry = torch::RegisterOperators() - .op("video_reader::read_video_from_memory", - &video_reader::readVideoFromMemory) - .op("video_reader::read_video_from_file", - &video_reader::readVideoFromFile) - .op("video_reader::probe_video_from_memory", - &video_reader::probeVideoFromMemory) - .op("video_reader::probe_video_from_file", - &video_reader::probeVideoFromFile); diff --git a/torchvision/csrc/cpu/video_reader/VideoReader.h b/torchvision/csrc/cpu/video_reader/VideoReader.h deleted file mode 100644 index efc2e4709a6..00000000000 --- a/torchvision/csrc/cpu/video_reader/VideoReader.h +++ /dev/null @@ -1,99 +0,0 @@ -#pragma once - -#include - -// Interface for Python - -/* - return: - videoFrame: tensor (N, H, W, C) kByte - videoFramePts: tensor (N) kLong - videoTimeBase: tensor (2) kInt - videoFps: tensor (1) kFloat - audioFrame: tensor (N, C) kFloat - audioFramePts: tensor (N) kLong - audioTimeBase: tensor (2) kInt - audioSampleRate: tensor (1) kInt -*/ -torch::List readVideoFromMemory( - // 1D tensor of data type uint8, storing the comparessed video data - torch::Tensor input_video, - // seeking frame in the video/audio stream is imprecise so seek to a - // timestamp earlier by a margin The unit of margin is second - double seekFrameMargin, - // If only pts is needed and video/audio frames are not needed, set it - // to 1 - int64_t getPtsOnly, - // bool variable. Set it to 1 if video stream should be read. Otherwise, set - // it to 0 - int64_t readVideoStream, - /* - Valid parameters values for rescaling video frames - ___________________________________________________ - | width | height | min_dimension | algorithm | - |_________________________________________________| - | 0 | 0 | 0 | original | - |_________________________________________________| - | 0 | 0 | >0 |scale to min dimension| - |_____|_____|____________________________________ | - | >0 | 0 | 0 | scale keeping W | - |_________________________________________________| - | 0 | >0 | 0 | scale keeping H | - |_________________________________________________| - | >0 | >0 | 0 | stretch/scale | - |_________________________________________________| - */ - int64_t width, - int64_t height, - int64_t minDimension, - // video frames with pts in [videoStartPts, videoEndPts] will be decoded - // For decoding all video frames, use [0, -1] - int64_t videoStartPts, - int64_t videoEndPts, - // numerator and denominator of time base of video stream. - // For decoding all video frames, supply dummy 0 (numerator) and 1 - // (denominator). For decoding localized video frames, need to supply - // them which will be checked during decoding - int64_t videoTimeBaseNum, - int64_t videoTimeBaseDen, - // bool variable. Set it to 1 if audio stream should be read. Otherwise, set - // it to 0 - int64_t readAudioStream, - // audio stream sampling rate. - // If not resampling audio waveform, supply 0 - // Otherwise, supply a positive integer. - int64_t audioSamples, - // audio stream channels - // Supply 0 to use the same number of channels as in the original audio - // stream - int64_t audioChannels, - // audio frames with pts in [audioStartPts, audioEndPts] will be decoded - // For decoding all audio frames, use [0, -1] - int64_t audioStartPts, - int64_t audioEndPts, - // numerator and denominator of time base of audio stream. - // For decoding all audio frames, supply dummy 0 (numerator) and 1 - // (denominator). For decoding localized audio frames, need to supply - // them which will be checked during decoding - int64_t audioTimeBaseNum, - int64_t audioTimeBaseDen); - -torch::List readVideoFromFile( - std::string videoPath, - double seekFrameMargin, - int64_t getPtsOnly, - int64_t readVideoStream, - int64_t width, - int64_t height, - int64_t minDimension, - int64_t videoStartPts, - int64_t videoEndPts, - int64_t videoTimeBaseNum, - int64_t videoTimeBaseDen, - int64_t readAudioStream, - int64_t audioSamples, - int64_t audioChannels, - int64_t audioStartPts, - int64_t audioEndPts, - int64_t audioTimeBaseNum, - int64_t audioTimeBaseDen); diff --git a/torchvision/csrc/cpu/video_reader/util.cpp b/torchvision/csrc/cpu/video_reader/util.cpp deleted file mode 100644 index ae3c3df0f0a..00000000000 --- a/torchvision/csrc/cpu/video_reader/util.cpp +++ /dev/null @@ -1,60 +0,0 @@ -#include "util.h" - -using namespace std; - -namespace util { - -unique_ptr getDecoderParams( - double seekFrameMargin, - int64_t getPtsOnly, - int64_t readVideoStream, - int videoWidth, - int videoHeight, - int videoMinDimension, - int64_t videoStartPts, - int64_t videoEndPts, - int videoTimeBaseNum, - int videoTimeBaseDen, - int64_t readAudioStream, - int audioSamples, - int audioChannels, - int64_t audioStartPts, - int64_t audioEndPts, - int audioTimeBaseNum, - int audioTimeBaseDen) { - unique_ptr params = make_unique(); - - if (readVideoStream == 1) { - params->formats.emplace( - MediaType::TYPE_VIDEO, MediaFormat(MediaType::TYPE_VIDEO)); - MediaFormat& videoFormat = params->formats[MediaType::TYPE_VIDEO]; - - videoFormat.format.video.width = videoWidth; - videoFormat.format.video.height = videoHeight; - videoFormat.format.video.minDimension = videoMinDimension; - videoFormat.format.video.startPts = videoStartPts; - videoFormat.format.video.endPts = videoEndPts; - videoFormat.format.video.timeBaseNum = videoTimeBaseNum; - videoFormat.format.video.timeBaseDen = videoTimeBaseDen; - } - - if (readAudioStream == 1) { - params->formats.emplace( - MediaType::TYPE_AUDIO, MediaFormat(MediaType::TYPE_AUDIO)); - MediaFormat& audioFormat = params->formats[MediaType::TYPE_AUDIO]; - - audioFormat.format.audio.samples = audioSamples; - audioFormat.format.audio.channels = audioChannels; - audioFormat.format.audio.startPts = audioStartPts; - audioFormat.format.audio.endPts = audioEndPts; - audioFormat.format.audio.timeBaseNum = audioTimeBaseNum; - audioFormat.format.audio.timeBaseDen = audioTimeBaseDen; - } - - params->seekFrameMargin = seekFrameMargin; - params->getPtsOnly = getPtsOnly; - - return params; -} - -} // namespace util diff --git a/torchvision/csrc/cpu/video_reader/util.h b/torchvision/csrc/cpu/video_reader/util.h deleted file mode 100644 index 6b5fd55388b..00000000000 --- a/torchvision/csrc/cpu/video_reader/util.h +++ /dev/null @@ -1,26 +0,0 @@ -#pragma once -#include -#include "FfmpegDecoder.h" - -namespace util { - -std::unique_ptr getDecoderParams( - double seekFrameMargin, - int64_t getPtsOnly, - int64_t readVideoStream, - int videoWidth, - int videoHeight, - int videoMinDimension, - int64_t videoStartPts, - int64_t videoEndPts, - int videoTimeBaseNum, - int videoTimeBaseDen, - int64_t readAudioStream, - int audioSamples, - int audioChannels, - int64_t audioStartPts, - int64_t audioEndPts, - int audioTimeBaseNum, - int audioTimeBaseDen); - -} // namespace util diff --git a/torchvision/csrc/cpu/vision_cpu.h b/torchvision/csrc/cpu/vision_cpu.h deleted file mode 100644 index d84b172ba49..00000000000 --- a/torchvision/csrc/cpu/vision_cpu.h +++ /dev/null @@ -1,86 +0,0 @@ -#pragma once -#include - -std::tuple ROIPool_forward_cpu( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width); - -at::Tensor ROIPool_backward_cpu( - const at::Tensor& grad, - const at::Tensor& rois, - const at::Tensor& argmax, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width); - -at::Tensor ROIAlign_forward_cpu( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio); - -at::Tensor ROIAlign_backward_cpu( - const at::Tensor& grad, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width, - const int sampling_ratio); - -std::tuple PSROIPool_forward_cpu( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width); - -at::Tensor PSROIPool_backward_cpu( - const at::Tensor& grad, - const at::Tensor& rois, - const at::Tensor& mapping_channel, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width); - -std::tuple PSROIAlign_forward_cpu( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio); - -at::Tensor PSROIAlign_backward_cpu( - const at::Tensor& grad, - const at::Tensor& rois, - const at::Tensor& mapping_channel, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio, - const int batch_size, - const int channels, - const int height, - const int width); - -at::Tensor nms_cpu( - const at::Tensor& dets, - const at::Tensor& scores, - const float iou_threshold); diff --git a/torchvision/csrc/cuda/cuda_helpers.h b/torchvision/csrc/cuda/cuda_helpers.h deleted file mode 100644 index af32f60e815..00000000000 --- a/torchvision/csrc/cuda/cuda_helpers.h +++ /dev/null @@ -1,5 +0,0 @@ -#pragma once - -#define CUDA_1D_KERNEL_LOOP(i, n) \ - for (int i = (blockIdx.x * blockDim.x) + threadIdx.x; i < (n); \ - i += (blockDim.x * gridDim.x)) diff --git a/torchvision/csrc/cuda/vision_cuda.h b/torchvision/csrc/cuda/vision_cuda.h deleted file mode 100644 index b35c4c909c1..00000000000 --- a/torchvision/csrc/cuda/vision_cuda.h +++ /dev/null @@ -1,87 +0,0 @@ -#pragma once -#include -#include - -at::Tensor ROIAlign_forward_cuda( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio); - -at::Tensor ROIAlign_backward_cuda( - const at::Tensor& grad, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width, - const int sampling_ratio); - -std::tuple ROIPool_forward_cuda( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width); - -at::Tensor ROIPool_backward_cuda( - const at::Tensor& grad, - const at::Tensor& rois, - const at::Tensor& argmax, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width); - -std::tuple PSROIPool_forward_cuda( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width); - -at::Tensor PSROIPool_backward_cuda( - const at::Tensor& grad, - const at::Tensor& rois, - const at::Tensor& mapping_channel, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width); - -std::tuple PSROIAlign_forward_cuda( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio); - -at::Tensor PSROIAlign_backward_cuda( - const at::Tensor& grad, - const at::Tensor& rois, - const at::Tensor& mapping_channel, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio, - const int batch_size, - const int channels, - const int height, - const int width); - -at::Tensor nms_cuda( - const at::Tensor& dets, - const at::Tensor& scores, - const float iou_threshold); diff --git a/torchvision/csrc/io/decoder/audio_sampler.cpp b/torchvision/csrc/io/decoder/audio_sampler.cpp new file mode 100644 index 00000000000..d46b93ddc69 --- /dev/null +++ b/torchvision/csrc/io/decoder/audio_sampler.cpp @@ -0,0 +1,251 @@ +#include "audio_sampler.h" +#include +#include "util.h" + +#define AVRESAMPLE_MAX_CHANNELS 32 + +// www.ffmpeg.org/doxygen/1.1/doc_2examples_2resampling_audio_8c-example.html#a24 +namespace ffmpeg { + +namespace { +int preparePlanes( + const AudioFormat& fmt, + const uint8_t* buffer, + int numSamples, + uint8_t** planes) { + int result; + if ((result = av_samples_fill_arrays( + planes, + nullptr, // linesize is not needed + buffer, + fmt.channels, + numSamples, + (AVSampleFormat)fmt.format, + 1)) < 0) { + LOG(ERROR) << "av_samples_fill_arrays failed, err: " + << Util::generateErrorDesc(result) + << ", numSamples: " << numSamples << ", fmt: " << fmt.format; + } + return result; +} +} // namespace + +AudioSampler::AudioSampler(void* logCtx) : logCtx_(logCtx) {} + +AudioSampler::~AudioSampler() { + cleanUp(); +} + +void AudioSampler::shutdown() { + cleanUp(); +} + +bool AudioSampler::init(const SamplerParameters& params) { + cleanUp(); + + if (params.type != MediaType::TYPE_AUDIO) { + LOG(ERROR) << "Invalid media type, expected MediaType::TYPE_AUDIO"; + return false; + } + +#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100) + SwrContext* swrContext_ = NULL; + AVChannelLayout channel_out; + AVChannelLayout channel_in; + av_channel_layout_default(&channel_out, params.out.audio.channels); + av_channel_layout_default(&channel_in, params.in.audio.channels); + int ret = swr_alloc_set_opts2( + &swrContext_, + &channel_out, + (AVSampleFormat)params.out.audio.format, + params.out.audio.samples, + &channel_in, + (AVSampleFormat)params.in.audio.format, + params.in.audio.samples, + 0, + logCtx_); +#else + swrContext_ = swr_alloc_set_opts( + nullptr, + av_get_default_channel_layout(params.out.audio.channels), + (AVSampleFormat)params.out.audio.format, + params.out.audio.samples, + av_get_default_channel_layout(params.in.audio.channels), + (AVSampleFormat)params.in.audio.format, + params.in.audio.samples, + 0, + logCtx_); +#endif + if (swrContext_ == nullptr) { + LOG(ERROR) << "Cannot allocate SwrContext"; + return false; + } + + int result; + if ((result = swr_init(swrContext_)) < 0) { + LOG(ERROR) << "swr_init failed, err: " << Util::generateErrorDesc(result) + << ", in -> format: " << params.in.audio.format + << ", channels: " << params.in.audio.channels + << ", samples: " << params.in.audio.samples + << ", out -> format: " << params.out.audio.format + << ", channels: " << params.out.audio.channels + << ", samples: " << params.out.audio.samples; + return false; + } + + // set formats + params_ = params; + return true; +} + +int AudioSampler::numOutputSamples(int inSamples) const { + return swr_get_out_samples(swrContext_, inSamples); +} + +int AudioSampler::sample( + const uint8_t* inPlanes[], + int inNumSamples, + ByteStorage* out, + int outNumSamples) { + int result; + int outBufferBytes = av_samples_get_buffer_size( + nullptr, + params_.out.audio.channels, + outNumSamples, + (AVSampleFormat)params_.out.audio.format, + 1); + + if (out) { + out->ensure(outBufferBytes); + + uint8_t* outPlanes[AVRESAMPLE_MAX_CHANNELS] = {nullptr}; + + if ((result = preparePlanes( + params_.out.audio, + out->writableTail(), + outNumSamples, + outPlanes)) < 0) { + return result; + } + + if ((result = swr_convert( + swrContext_, + &outPlanes[0], + outNumSamples, + inPlanes, + inNumSamples)) < 0) { + LOG(ERROR) << "swr_convert failed, err: " + << Util::generateErrorDesc(result); + return result; + } + + TORCH_CHECK_LE(result, outNumSamples); + + if (result) { + if ((result = av_samples_get_buffer_size( + nullptr, + params_.out.audio.channels, + result, + (AVSampleFormat)params_.out.audio.format, + 1)) >= 0) { + out->append(result); + } else { + LOG(ERROR) << "av_samples_get_buffer_size failed, err: " + << Util::generateErrorDesc(result); + } + } + } else { + // allocate a temporary buffer + auto* tmpBuffer = static_cast(av_malloc(outBufferBytes)); + if (!tmpBuffer) { + LOG(ERROR) << "av_alloc failed, for size: " << outBufferBytes; + return -1; + } + + uint8_t* outPlanes[AVRESAMPLE_MAX_CHANNELS] = {nullptr}; + + if ((result = preparePlanes( + params_.out.audio, tmpBuffer, outNumSamples, outPlanes)) < 0) { + av_free(tmpBuffer); + return result; + } + + if ((result = swr_convert( + swrContext_, + &outPlanes[0], + outNumSamples, + inPlanes, + inNumSamples)) < 0) { + LOG(ERROR) << "swr_convert failed, err: " + << Util::generateErrorDesc(result); + av_free(tmpBuffer); + return result; + } + + av_free(tmpBuffer); + + TORCH_CHECK_LE(result, outNumSamples); + + if (result) { + result = av_samples_get_buffer_size( + nullptr, + params_.out.audio.channels, + result, + (AVSampleFormat)params_.out.audio.format, + 1); + } + } + + return result; +} + +int AudioSampler::sample(AVFrame* frame, ByteStorage* out) { + const auto outNumSamples = numOutputSamples(frame ? frame->nb_samples : 0); + + if (!outNumSamples) { + return 0; + } + + return sample( + frame ? (const uint8_t**)&frame->data[0] : nullptr, + frame ? frame->nb_samples : 0, + out, + outNumSamples); +} + +int AudioSampler::sample(const ByteStorage* in, ByteStorage* out) { + const auto inSampleSize = + av_get_bytes_per_sample((AVSampleFormat)params_.in.audio.format); + + const auto inNumSamples = + !in ? 0 : in->length() / inSampleSize / params_.in.audio.channels; + + const auto outNumSamples = numOutputSamples(inNumSamples); + + if (!outNumSamples) { + return 0; + } + + uint8_t* inPlanes[AVRESAMPLE_MAX_CHANNELS] = {nullptr}; + int result; + if (in && + (result = preparePlanes( + params_.in.audio, in->data(), inNumSamples, inPlanes)) < 0) { + return result; + } + + return sample( + in ? (const uint8_t**)inPlanes : nullptr, + inNumSamples, + out, + outNumSamples); +} + +void AudioSampler::cleanUp() { + if (swrContext_) { + swr_free(&swrContext_); + swrContext_ = nullptr; + } +} + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/audio_sampler.h b/torchvision/csrc/io/decoder/audio_sampler.h new file mode 100644 index 00000000000..e105bbe4de2 --- /dev/null +++ b/torchvision/csrc/io/decoder/audio_sampler.h @@ -0,0 +1,39 @@ +#pragma once + +#include "defs.h" + +namespace ffmpeg { + +/** + * Class transcode audio frames from one format into another + */ + +class AudioSampler : public MediaSampler { + public: + explicit AudioSampler(void* logCtx); + ~AudioSampler() override; + + // MediaSampler overrides + bool init(const SamplerParameters& params) override; + int sample(const ByteStorage* in, ByteStorage* out) override; + void shutdown() override; + + int sample(AVFrame* frame, ByteStorage* out); + + private: + // close resources + void cleanUp(); + // helper functions for rescaling, cropping, etc. + int numOutputSamples(int inSamples) const; + int sample( + const uint8_t* inPlanes[], + int inNumSamples, + ByteStorage* out, + int outNumSamples); + + private: + SwrContext* swrContext_{nullptr}; + void* logCtx_{nullptr}; +}; + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/audio_stream.cpp b/torchvision/csrc/io/decoder/audio_stream.cpp new file mode 100644 index 00000000000..9d7354e02f5 --- /dev/null +++ b/torchvision/csrc/io/decoder/audio_stream.cpp @@ -0,0 +1,120 @@ +#include "audio_stream.h" +#include +#include +#include "util.h" + +namespace ffmpeg { + +namespace { +static int get_nb_channels(const AVFrame* frame, const AVCodecContext* codec) { +#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100) + return frame ? frame->ch_layout.nb_channels : codec->ch_layout.nb_channels; +#else + return frame ? frame->channels : codec->channels; +#endif +} + +bool operator==(const AudioFormat& x, const AVFrame& y) { + return x.samples == static_cast(y.sample_rate) && + x.channels == static_cast(get_nb_channels(&y, nullptr)) && + x.format == y.format; +} + +bool operator==(const AudioFormat& x, const AVCodecContext& y) { + return x.samples == static_cast(y.sample_rate) && + x.channels == static_cast(get_nb_channels(nullptr, &y)) && + x.format == y.sample_fmt; +} + +AudioFormat& toAudioFormat(AudioFormat& x, const AVFrame& y) { + x.samples = y.sample_rate; + x.channels = get_nb_channels(&y, nullptr); + x.format = y.format; + return x; +} + +AudioFormat& toAudioFormat(AudioFormat& x, const AVCodecContext& y) { + x.samples = y.sample_rate; + x.channels = get_nb_channels(nullptr, &y); + x.format = y.sample_fmt; + return x; +} +} // namespace + +AudioStream::AudioStream( + AVFormatContext* inputCtx, + int index, + bool convertPtsToWallTime, + const AudioFormat& format) + : Stream( + inputCtx, + MediaFormat::makeMediaFormat(format, index), + convertPtsToWallTime, + 0) {} + +AudioStream::~AudioStream() { + if (sampler_) { + sampler_->shutdown(); + sampler_.reset(); + } +} + +int AudioStream::initFormat() { + // set output format + if (format_.format.audio.samples == 0) { + format_.format.audio.samples = codecCtx_->sample_rate; + } +#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100) + if (format_.format.audio.channels == 0) { + format_.format.audio.channels = codecCtx_->ch_layout.nb_channels; + } +#else + if (format_.format.audio.channels == 0) { + format_.format.audio.channels = codecCtx_->channels; + } +#endif + if (format_.format.audio.format == AV_SAMPLE_FMT_NONE) { + format_.format.audio.format = codecCtx_->sample_fmt; + } + + return format_.format.audio.samples != 0 && + format_.format.audio.channels != 0 && + format_.format.audio.format != AV_SAMPLE_FMT_NONE + ? 0 + : -1; +} + +// copies audio sample bytes via swr_convert call in audio_sampler.cpp +int AudioStream::copyFrameBytes(ByteStorage* out, bool flush) { + if (!sampler_) { + sampler_ = std::make_unique(codecCtx_); + } + // check if input format gets changed + if (flush ? !(sampler_->getInputFormat().audio == *codecCtx_) + : !(sampler_->getInputFormat().audio == *frame_)) { + // - reinit sampler + SamplerParameters params; + params.type = format_.type; + params.out = format_.format; + params.in = FormatUnion(); + flush ? toAudioFormat(params.in.audio, *codecCtx_) + : toAudioFormat(params.in.audio, *frame_); + if (!sampler_->init(params)) { + return -1; + } + + VLOG(1) << "Set input audio sampler format" + << ", samples: " << params.in.audio.samples + << ", channels: " << params.in.audio.channels + << ", format: " << params.in.audio.format + << " : output audio sampler format" + << ", samples: " << format_.format.audio.samples + << ", channels: " << format_.format.audio.channels + << ", format: " << format_.format.audio.format; + } + // calls to a sampler that converts the audio samples and copies them to the + // out buffer via ffmpeg::swr_convert + return sampler_->sample(flush ? nullptr : frame_, out); +} + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/audio_stream.h b/torchvision/csrc/io/decoder/audio_stream.h new file mode 100644 index 00000000000..2d6457b68f5 --- /dev/null +++ b/torchvision/csrc/io/decoder/audio_stream.h @@ -0,0 +1,29 @@ +#pragma once + +#include "audio_sampler.h" +#include "stream.h" + +namespace ffmpeg { + +/** + * Class uses FFMPEG library to decode one audio stream. + */ + +class AudioStream : public Stream { + public: + AudioStream( + AVFormatContext* inputCtx, + int index, + bool convertPtsToWallTime, + const AudioFormat& format); + ~AudioStream() override; + + private: + int initFormat() override; + int copyFrameBytes(ByteStorage* out, bool flush) override; + + private: + std::unique_ptr sampler_; +}; + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/cc_stream.cpp b/torchvision/csrc/io/decoder/cc_stream.cpp new file mode 100644 index 00000000000..89174c396fd --- /dev/null +++ b/torchvision/csrc/io/decoder/cc_stream.cpp @@ -0,0 +1,24 @@ +#include "cc_stream.h" + +namespace ffmpeg { + +CCStream::CCStream( + AVFormatContext* inputCtx, + int index, + bool convertPtsToWallTime, + const SubtitleFormat& format) + : SubtitleStream(inputCtx, index, convertPtsToWallTime, format) { + format_.type = TYPE_CC; +} + +AVCodec* CCStream::findCodec(AVCodecParameters* params) { + if (params->codec_id == AV_CODEC_ID_BIN_DATA && + params->codec_type == AVMEDIA_TYPE_DATA) { + // obtain subtitles codec + params->codec_id = AV_CODEC_ID_MOV_TEXT; + params->codec_type = AVMEDIA_TYPE_SUBTITLE; + } + return Stream::findCodec(params); +} + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/cc_stream.h b/torchvision/csrc/io/decoder/cc_stream.h new file mode 100644 index 00000000000..3a1d169f014 --- /dev/null +++ b/torchvision/csrc/io/decoder/cc_stream.h @@ -0,0 +1,22 @@ +#pragma once + +#include "subtitle_stream.h" + +namespace ffmpeg { + +/** + * Class uses FFMPEG library to decode one closed captions stream. + */ +class CCStream : public SubtitleStream { + public: + CCStream( + AVFormatContext* inputCtx, + int index, + bool convertPtsToWallTime, + const SubtitleFormat& format); + + private: + AVCodec* findCodec(AVCodecParameters* params) override; +}; + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/decoder.cpp b/torchvision/csrc/io/decoder/decoder.cpp new file mode 100644 index 00000000000..cfe762bbc6e --- /dev/null +++ b/torchvision/csrc/io/decoder/decoder.cpp @@ -0,0 +1,763 @@ +#include "decoder.h" +#include +#include +#include +#include +#include +#include "audio_stream.h" +#include "cc_stream.h" +#include "subtitle_stream.h" +#include "util.h" +#include "video_stream.h" + +namespace ffmpeg { + +namespace { + +constexpr size_t kIoBufferSize = 96 * 1024; +constexpr size_t kIoPaddingSize = AV_INPUT_BUFFER_PADDING_SIZE; +constexpr size_t kLogBufferSize = 1024; + +bool mapFfmpegType(AVMediaType media, MediaType* type) { + switch (media) { + case AVMEDIA_TYPE_AUDIO: + *type = TYPE_AUDIO; + return true; + case AVMEDIA_TYPE_VIDEO: + *type = TYPE_VIDEO; + return true; + case AVMEDIA_TYPE_SUBTITLE: + *type = TYPE_SUBTITLE; + return true; + case AVMEDIA_TYPE_DATA: + *type = TYPE_CC; + return true; + default: + return false; + } +} + +std::unique_ptr createStream( + MediaType type, + AVFormatContext* ctx, + int idx, + bool convertPtsToWallTime, + const FormatUnion& format, + int64_t loggingUuid) { + switch (type) { + case TYPE_AUDIO: + return std::make_unique( + ctx, idx, convertPtsToWallTime, format.audio); + case TYPE_VIDEO: + return std::make_unique( + // negative loggingUuid indicates video streams. + ctx, + idx, + convertPtsToWallTime, + format.video, + -loggingUuid); + case TYPE_SUBTITLE: + return std::make_unique( + ctx, idx, convertPtsToWallTime, format.subtitle); + case TYPE_CC: + return std::make_unique( + ctx, idx, convertPtsToWallTime, format.subtitle); + default: + return nullptr; + } +} + +} // Namespace + +/* static */ +void Decoder::logFunction(void* avcl, int level, const char* cfmt, va_list vl) { + if (!avcl) { + // Nothing can be done here + return; + } + + AVClass* avclass = *reinterpret_cast(avcl); + if (!avclass) { + // Nothing can be done here + return; + } + Decoder* decoder = nullptr; + if (strcmp(avclass->class_name, "AVFormatContext") == 0) { + AVFormatContext* context = reinterpret_cast(avcl); + if (context) { + decoder = reinterpret_cast(context->opaque); + } + } else if (strcmp(avclass->class_name, "AVCodecContext") == 0) { + AVCodecContext* context = reinterpret_cast(avcl); + if (context) { + decoder = reinterpret_cast(context->opaque); + } + } else if (strcmp(avclass->class_name, "AVIOContext") == 0) { + AVIOContext* context = reinterpret_cast(avcl); + // only if opaque was assigned to Decoder pointer + if (context && context->read_packet == Decoder::readFunction) { + decoder = reinterpret_cast(context->opaque); + } + } else if (strcmp(avclass->class_name, "SWResampler") == 0) { + // expect AVCodecContext as parent + if (avclass->parent_log_context_offset) { + AVClass** parent = + *(AVClass***)(((uint8_t*)avcl) + avclass->parent_log_context_offset); + AVCodecContext* context = reinterpret_cast(parent); + if (context) { + decoder = reinterpret_cast(context->opaque); + } + } + } else if (strcmp(avclass->class_name, "SWScaler") == 0) { + // cannot find a way to pass context pointer through SwsContext struct + } else { + VLOG(2) << "Unknown context class: " << avclass->class_name; + } + + if (decoder != nullptr && decoder->enableLogLevel(level)) { + char buf[kLogBufferSize] = {0}; + // Format the line + int* prefix = decoder->getPrintPrefix(); + *prefix = 1; + av_log_format_line(avcl, level, cfmt, vl, buf, sizeof(buf) - 1, prefix); + // pass message to the decoder instance + std::string msg(buf); + decoder->logCallback(level, msg); + } +} + +bool Decoder::enableLogLevel(int level) const { + return ssize_t(level) <= params_.logLevel; +} + +void Decoder::logCallback(int level, const std::string& message) { + LOG(INFO) << "Msg, uuid=" << params_.loggingUuid << " level=" << level + << " msg=" << message; +} + +/* static */ +int Decoder::shutdownFunction(void* ctx) { + Decoder* decoder = (Decoder*)ctx; + if (decoder == nullptr) { + return 1; + } + return decoder->shutdownCallback(); +} + +int Decoder::shutdownCallback() { + return interrupted_ ? 1 : 0; +} + +/* static */ +int Decoder::readFunction(void* opaque, uint8_t* buf, int size) { + Decoder* decoder = reinterpret_cast(opaque); + if (decoder == nullptr) { + return 0; + } + return decoder->readCallback(buf, size); +} + +/* static */ +int64_t Decoder::seekFunction(void* opaque, int64_t offset, int whence) { + Decoder* decoder = reinterpret_cast(opaque); + if (decoder == nullptr) { + return -1; + } + return decoder->seekCallback(offset, whence); +} + +int Decoder::readCallback(uint8_t* buf, int size) { + return seekableBuffer_.read(buf, size, params_.timeoutMs); +} + +int64_t Decoder::seekCallback(int64_t offset, int whence) { + return seekableBuffer_.seek(offset, whence, params_.timeoutMs); +} + +/* static */ +void Decoder::initOnce() { + static std::once_flag flagInit; + std::call_once(flagInit, []() { +#if LIBAVUTIL_VERSION_MAJOR < 56 // Before FFMPEG 4.0 + av_register_all(); + avcodec_register_all(); +#endif + avformat_network_init(); + av_log_set_callback(Decoder::logFunction); + av_log_set_level(AV_LOG_ERROR); + VLOG(1) << "Registered ffmpeg libs"; + }); +} + +Decoder::Decoder() { + initOnce(); +} + +Decoder::~Decoder() { + cleanUp(); +} + +// Initialise the format context that holds information about the container and +// fill it with minimal information about the format (codecs are not opened +// here). Function reads in information about the streams from the container +// into inputCtx and then passes it to decoder::openStreams. Finally, if seek is +// specified within the decoder parameters, it seeks into the correct frame +// (note, the seek defined here is "precise" seek). +bool Decoder::init( + const DecoderParameters& params, + DecoderInCallback&& in, + std::vector* metadata) { + cleanUp(); + + if ((params.uri.empty() || in) && (!params.uri.empty() || !in)) { + LOG(ERROR) + << "uuid=" << params_.loggingUuid + << " either external URI gets provided or explicit input callback"; + return false; + } + + // set callback and params + params_ = params; + + if (!(inputCtx_ = avformat_alloc_context())) { + LOG(ERROR) << "uuid=" << params_.loggingUuid + << " cannot allocate format context"; + return false; + } + + AVInputFormat* fmt = nullptr; + int result = 0; + if (in) { + ImageType type = ImageType::UNKNOWN; + if ((result = seekableBuffer_.init( + std::forward(in), + params_.timeoutMs, + params_.maxSeekableBytes, + params_.isImage ? &type : nullptr)) < 0) { + LOG(ERROR) << "uuid=" << params_.loggingUuid + << " can't initiate seekable buffer"; + cleanUp(); + return false; + } + + if (params_.isImage) { + const char* fmtName = "image2"; + switch (type) { + case ImageType::JPEG: + fmtName = "jpeg_pipe"; + break; + case ImageType::PNG: + fmtName = "png_pipe"; + break; + case ImageType::TIFF: + fmtName = "tiff_pipe"; + break; + default: + break; + } + + fmt = (AVInputFormat*)av_find_input_format(fmtName); + } + + const size_t avioCtxBufferSize = kIoBufferSize; + uint8_t* avioCtxBuffer = + (uint8_t*)av_malloc(avioCtxBufferSize + kIoPaddingSize); + if (!avioCtxBuffer) { + LOG(ERROR) << "uuid=" << params_.loggingUuid + << " av_malloc cannot allocate " << avioCtxBufferSize + << " bytes"; + cleanUp(); + return false; + } + + if (!(avioCtx_ = avio_alloc_context( + avioCtxBuffer, + avioCtxBufferSize, + 0, + reinterpret_cast(this), + &Decoder::readFunction, + nullptr, + result == 1 ? &Decoder::seekFunction : nullptr))) { + LOG(ERROR) << "uuid=" << params_.loggingUuid + << " avio_alloc_context failed"; + av_free(avioCtxBuffer); + cleanUp(); + return false; + } + + avioCtx_->max_packet_size = params.maxEncodedBufferSize; + + inputCtx_->pb = avioCtx_; + inputCtx_->flags |= AVFMT_FLAG_CUSTOM_IO; + } + + inputCtx_->opaque = reinterpret_cast(this); + inputCtx_->interrupt_callback.callback = Decoder::shutdownFunction; + inputCtx_->interrupt_callback.opaque = reinterpret_cast(this); + + // add network timeout + inputCtx_->flags |= AVFMT_FLAG_NONBLOCK; + + AVDictionary* options = nullptr; + if (params_.listen) { + av_dict_set_int(&options, "listen", 1, 0); + } + if (params_.timeoutMs > 0) { + av_dict_set_int(&options, "analyzeduration", params_.timeoutMs * 1000, 0); + av_dict_set_int(&options, "stimeout", params_.timeoutMs * 1000, 0); + av_dict_set_int(&options, "rw_timeout", params_.timeoutMs * 1000, 0); + if (!params_.tlsCertFile.empty()) { + av_dict_set(&options, "cert_file", params_.tlsCertFile.data(), 0); + } + if (!params_.tlsKeyFile.empty()) { + av_dict_set(&options, "key_file", params_.tlsKeyFile.data(), 0); + } + } + + av_dict_set_int(&options, "probesize", params_.probeSize, 0); + + interrupted_ = false; + + // ffmpeg avformat_open_input call can hang if media source doesn't respond + // set a guard for handle such situations, if requested + std::promise p; + std::future f = p.get_future(); + std::unique_ptr guard; + if (params_.preventStaleness) { + guard = std::make_unique([&f, this]() { + auto timeout = std::chrono::milliseconds(params_.timeoutMs); + if (std::future_status::timeout == f.wait_for(timeout)) { + LOG(ERROR) << "uuid=" << params_.loggingUuid + << " cannot open stream within " << params_.timeoutMs + << " ms"; + interrupted_ = true; + } + }); + } + + if (fmt) { + result = avformat_open_input(&inputCtx_, nullptr, fmt, &options); + } else { + result = + avformat_open_input(&inputCtx_, params_.uri.c_str(), nullptr, &options); + } + + av_dict_free(&options); + + if (guard) { + p.set_value(true); + guard->join(); + guard.reset(); + } + + if (result < 0 || interrupted_) { + LOG(ERROR) << "uuid=" << params_.loggingUuid + << " avformat_open_input failed, error=" + << Util::generateErrorDesc(result); + cleanUp(); + return false; + } + + result = avformat_find_stream_info(inputCtx_, nullptr); + + if (result < 0) { + LOG(ERROR) << "uuid=" << params_.loggingUuid + << " avformat_find_stream_info failed, error=" + << Util::generateErrorDesc(result); + cleanUp(); + return false; + } + + if (!openStreams(metadata)) { + LOG(ERROR) << "uuid=" << params_.loggingUuid << " cannot activate streams"; + cleanUp(); + return false; + } + // SyncDecoder inherits Decoder which would override onInit. + onInit(); + + if (params.startOffset != 0) { + auto offset = params.startOffset <= params.seekAccuracy + ? 0 + : params.startOffset - params.seekAccuracy; + + av_seek_frame(inputCtx_, -1, offset, AVSEEK_FLAG_BACKWARD); + } + + for (unsigned int i = 0; i < inputCtx_->nb_streams; i++) { + if ( +#if LIBAVUTIL_VERSION_MAJOR < 56 // Before FFMPEG 4.0 + inputCtx_->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO +#else // FFMPEG 4.0+ + inputCtx_->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO +#endif + && inputCtx_->streams[i]->duration > 0) { + // There is at least two 1/r_frame_rates from the frame before the last + // one until the video duration, let's prefer to set duration after the + // frame before the last one, but as early as possible + double correction = 2 * inputCtx_->streams[i]->r_frame_rate.den / + (double)inputCtx_->streams[i]->r_frame_rate.num - + 1 / (double)AV_TIME_BASE; + videoDurationMs_ = 1000 * inputCtx_->streams[i]->duration * + inputCtx_->streams[i]->time_base.num / + (double)inputCtx_->streams[i]->time_base.den - + 1000 * correction; + break; + } + } + + VLOG(1) << "Decoder initialized, log level: " << params_.logLevel; + VLOG(1) << "Video duration: " << videoDurationMs_; + return true; +} + +// open appropriate CODEC for every type of stream and move it to the class +// variable `streams_` and make sure it is in range for decoding +bool Decoder::openStreams(std::vector* metadata) { + for (unsigned int i = 0; i < inputCtx_->nb_streams; i++) { + // - find the corespondent format at params_.formats set + MediaFormat format; +#if LIBAVUTIL_VERSION_MAJOR < 56 // Before FFMPEG 4.0 + const auto media = inputCtx_->streams[i]->codec->codec_type; +#else // FFMPEG 4.0+ + const auto media = inputCtx_->streams[i]->codecpar->codec_type; +#endif + if (!mapFfmpegType(media, &format.type)) { + VLOG(1) << "Stream media: " << media << " at index " << i + << " gets ignored, unknown type"; + + continue; // unsupported type + } + + // check format + auto it = params_.formats.find(format); + if (it == params_.formats.end()) { + VLOG(1) << "Stream type: " << format.type << " at index: " << i + << " gets ignored, caller is not interested"; + continue; // clients don't care about this media format + } + + // do we have stream of this type? + auto stream = findByType(format); + + // should we process this stream? + + if (it->stream == -2 || // all streams of this type are welcome + (!stream && (it->stream == -1 || it->stream == i))) { // new stream + VLOG(1) << "Stream type: " << format.type << " found, at index: " << i; + auto stream_2 = createStream( + format.type, + inputCtx_, + i, + params_.convertPtsToWallTime, + it->format, + params_.loggingUuid); + CHECK(stream_2); + if (stream_2->openCodec(metadata, params_.numThreads) < 0) { + LOG(ERROR) << "uuid=" << params_.loggingUuid + << " open codec failed, stream_idx=" << i; + return false; + } + streams_.emplace(i, std::move(stream_2)); + inRange_.set(i, true); + } + } + + return true; +} + +void Decoder::shutdown() { + cleanUp(); +} + +void Decoder::interrupt() { + interrupted_ = true; +} + +void Decoder::cleanUp() { + if (!interrupted_) { + interrupted_ = true; + } + + if (inputCtx_) { + for (auto& stream : streams_) { + // Drain stream buffers. + DecoderOutputMessage msg; + while (msg.payload = nullptr, stream.second->flush(&msg, true) > 0) { + } + stream.second.reset(); + } + streams_.clear(); + avformat_close_input(&inputCtx_); + } + if (avioCtx_) { + av_freep(&avioCtx_->buffer); + av_freep(&avioCtx_); + } + + // reset callback + seekableBuffer_.shutdown(); +} + +// function does actual work, derived class calls it in working thread +// periodically. On success method returns 0, ENODATA on EOF, ETIMEDOUT if +// no frames got decoded in the specified timeout time, AVERROR_BUFFER_TOO_SMALL +// when unable to allocate packet and error on unrecoverable error +int Decoder::getFrame(size_t workingTimeInMs) { + if (inRange_.none()) { + return ENODATA; + } + // decode frames until cache is full and leave thread + // once decode() method gets called and grab some bytes + // run this method again + // init package + // update 03/22: moving memory management to ffmpeg + AVPacket* avPacket; + avPacket = av_packet_alloc(); + if (avPacket == nullptr) { + LOG(ERROR) << "uuid=" << params_.loggingUuid + << " decoder as not able to allocate the packet."; + return AVERROR_BUFFER_TOO_SMALL; + } + avPacket->data = nullptr; + avPacket->size = 0; + + auto end = std::chrono::steady_clock::now() + + std::chrono::milliseconds(workingTimeInMs); + // return true if elapsed time less than timeout + auto watcher = [end]() -> bool { + return std::chrono::steady_clock::now() <= end; + }; + + int result = 0; + size_t decodingErrors = 0; + bool decodedFrame = false; + while (!interrupted_ && inRange_.any() && !decodedFrame) { + if (watcher() == false) { + LOG(ERROR) << "uuid=" << params_.loggingUuid << " hit ETIMEDOUT"; + result = ETIMEDOUT; + break; + } + result = av_read_frame(inputCtx_, avPacket); + if (result == AVERROR(EAGAIN)) { + VLOG(4) << "Decoder is busy..."; + std::this_thread::yield(); + result = 0; // reset error, EAGAIN is not an error at all + // reset the packet to default settings + av_packet_unref(avPacket); + continue; + } else if (result == AVERROR_EOF) { + flushStreams(); + VLOG(1) << "End of stream"; + result = ENODATA; + break; + } else if ( + result == AVERROR(EPERM) && params_.skipOperationNotPermittedPackets) { + // reset error, lets skip packets with EPERM + result = 0; + // reset the packet to default settings + av_packet_unref(avPacket); + continue; + } else if (result < 0) { + flushStreams(); + LOG(ERROR) << "uuid=" << params_.loggingUuid + << " error detected: " << Util::generateErrorDesc(result); + break; + } + + // get stream; if stream cannot be found reset the packet to + // default settings + auto stream = findByIndex(avPacket->stream_index); + if (stream == nullptr || !inRange_.test(stream->getIndex())) { + av_packet_unref(avPacket); + continue; + } + + size_t numConsecutiveNoBytes = 0; + // it can be only partial decoding of the package bytes + do { + // decode package + bool gotFrame = false; + bool hasMsg = false; + // packet either got consumed completely or not at all + if ((result = processPacket( + stream, avPacket, &gotFrame, &hasMsg, params_.fastSeek)) < 0) { + LOG(ERROR) << "uuid=" << params_.loggingUuid + << " processPacket failed with code: " << result; + break; + } + + if (!gotFrame && params_.maxProcessNoBytes != 0 && + ++numConsecutiveNoBytes > params_.maxProcessNoBytes) { + LOG(ERROR) << "uuid=" << params_.loggingUuid + << " exceeding max amount of consecutive no bytes"; + break; + } + if (result > 0) { + numConsecutiveNoBytes = 0; + } + + decodedFrame |= hasMsg; + } while (result == 0); + + // post loop check + if (result < 0) { + if (params_.maxPackageErrors != 0 && // check errors + ++decodingErrors >= params_.maxPackageErrors) { // reached the limit + LOG(ERROR) << "uuid=" << params_.loggingUuid + << " exceeding max amount of consecutive package errors"; + break; + } + } else { + decodingErrors = 0; // reset on success + } + + result = 0; + + av_packet_unref(avPacket); + + if (params_.uniformSampling > 1) { + if (doSeek_) { + double duration = + videoDurationMs_ > 0 ? videoDurationMs_ : params_.expectedDuration; + double step = + (duration * AV_TIME_BASE) / (1000 * (params_.uniformSampling - 1)); + avformat_seek_file( + inputCtx_, + -1, + static_cast(step * kFramesDecoded_) + 1, + static_cast(step * (kFramesDecoded_ + 1)), + static_cast(step * (kFramesDecoded_ + 1)), + 0); + ++kFramesDecoded_; + doSeek_ = false; + } + } + } + + av_packet_free(&avPacket); + VLOG(2) << "Interrupted loop" << ", interrupted_ " << interrupted_ + << ", inRange_.any() " << inRange_.any() << ", decodedFrame " + << decodedFrame << ", result " << result; + + // loop can be terminated, either by: + // 1. explicitly interrupted + // 3. unrecoverable error or ENODATA (end of stream) or ETIMEDOUT (timeout) + // 4. decoded frames pts are out of the specified range + // 5. success decoded frame + if (interrupted_) { + return EINTR; + } + if (result != 0) { + return result; + } + if (inRange_.none()) { + return ENODATA; + } + return 0; +} + +// find stream by stream index +Stream* Decoder::findByIndex(int streamIndex) const { + auto it = streams_.find(streamIndex); + return it != streams_.end() ? it->second.get() : nullptr; +} + +// find stream by type; note finds only the first stream of a given type +Stream* Decoder::findByType(const MediaFormat& format) const { + for (auto& stream : streams_) { + if (stream.second->getMediaFormat().type == format.type) { + return stream.second.get(); + } + } + return nullptr; +} + +// given the stream and packet, decode the frame buffers into the +// DecoderOutputMessage data structure via stream::decodePacket function. +int Decoder::processPacket( + Stream* stream, + AVPacket* packet, + bool* gotFrame, + bool* hasMsg, + bool fastSeek) { + // decode package + int result; + DecoderOutputMessage msg; + msg.payload = params_.headerOnly ? nullptr : createByteStorage(0); + *hasMsg = false; + if ((result = stream->decodePacket( + packet, &msg, params_.headerOnly, gotFrame)) >= 0 && + *gotFrame) { + // check end offset + bool endInRange = + params_.endOffset <= 0 || msg.header.pts <= params_.endOffset; + inRange_.set(stream->getIndex(), endInRange); + // if fastseek is enabled, we're returning the first + // frame that we decode after (potential) seek. + // By default, we perform accurate seek to the closest + // following frame + bool startCondition = true; + if (!fastSeek) { + startCondition = msg.header.pts >= params_.startOffset; + } + if (endInRange && startCondition) { + *hasMsg = pushMsg(std::move(msg)); + } + } + return result; +} + +bool Decoder::pushMsg(DecoderOutputMessage&& msg) { + pastDecodedPTS_ = currentDecodedPTS_; + currentDecodedPTS_ = msg.header.pts; + + if (params_.uniformSampling <= 1) { + push(std::move(msg)); + return true; + } + + double duration = + videoDurationMs_ > 0 ? videoDurationMs_ : params_.expectedDuration; + double step = + (duration * AV_TIME_BASE) / (1000 * (params_.uniformSampling - 1)); + if (pastDecodedPTS_ < step * kFramesDecoded_ && + step * kFramesDecoded_ <= currentDecodedPTS_) { + push(std::move(msg)); + doSeek_ = true; + return true; + } + + return false; +} + +void Decoder::flushStreams() { + VLOG(1) << "Flushing streams..."; + for (auto& stream : streams_) { + DecoderOutputMessage msg; + while (msg.payload = (params_.headerOnly ? nullptr : createByteStorage(0)), + stream.second->flush(&msg, params_.headerOnly) > 0) { + // check end offset + bool endInRange = + params_.endOffset <= 0 || msg.header.pts <= params_.endOffset; + inRange_.set(stream.second->getIndex(), endInRange); + if (endInRange && msg.header.pts >= params_.startOffset) { + pushMsg(std::move(msg)); + } else { + msg.payload.reset(); + } + } + } +} + +int Decoder::decode_all(const DecoderOutCallback& callback) { + int result; + do { + DecoderOutputMessage out; + if (0 == (result = decode(&out, params_.timeoutMs))) { + callback(std::move(out)); + } + } while (result == 0); + return result; +} +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/decoder.h b/torchvision/csrc/io/decoder/decoder.h new file mode 100644 index 00000000000..172a011f93e --- /dev/null +++ b/torchvision/csrc/io/decoder/decoder.h @@ -0,0 +1,100 @@ +#pragma once + +#include +#include +#include "seekable_buffer.h" +#include "stream.h" + +#if defined(_MSC_VER) +#include +using ssize_t = SSIZE_T; +#endif + +namespace ffmpeg { + +/** + * Class uses FFMPEG library to decode media streams. + * Media bytes can be explicitly provided through read-callback + * or fetched internally by FFMPEG library + */ +class Decoder : public MediaDecoder { + public: + Decoder(); + ~Decoder() override; + + // MediaDecoder overrides + bool init( + const DecoderParameters& params, + DecoderInCallback&& in, + std::vector* metadata) override; + int decode_all(const DecoderOutCallback& callback) override; + void shutdown() override; + void interrupt() override; + + protected: + // function does actual work, derived class calls it in working thread + // periodically. On success method returns 0, ENOADATA on EOF, ETIMEDOUT if + // no frames got decoded in the specified timeout time, and error on + // unrecoverable error. + int getFrame(size_t workingTimeInMs = 100); + + // Derived class must override method and consume the provided message + virtual void push(DecoderOutputMessage&& buffer) = 0; + + // Fires on init call + virtual void onInit() {} + + public: + // C-style FFMPEG API requires C/static methods for callbacks + static void logFunction(void* avcl, int level, const char* cfmt, va_list vl); + static int shutdownFunction(void* ctx); + static int readFunction(void* opaque, uint8_t* buf, int size); + static int64_t seekFunction(void* opaque, int64_t offset, int whence); + // can be called by any classes or API + static void initOnce(); + + int* getPrintPrefix() { + return &printPrefix; + } + double videoDurationMs_ = -1; + + private: + // mark below function for a proper invocation + bool enableLogLevel(int level) const; + void logCallback(int level, const std::string& message); + int readCallback(uint8_t* buf, int size); + int64_t seekCallback(int64_t offset, int whence); + int shutdownCallback(); + + bool openStreams(std::vector* metadata); + Stream* findByIndex(int streamIndex) const; + Stream* findByType(const MediaFormat& format) const; + int processPacket( + Stream* stream, + AVPacket* packet, + bool* gotFrame, + bool* hasMsg, + bool fastSeek = false); + void flushStreams(); + void cleanUp(); + bool pushMsg(DecoderOutputMessage&& + msg); // returns whether frame is passed to downstream + + protected: + DecoderParameters params_; + + private: + SeekableBuffer seekableBuffer_; + int printPrefix{1}; + + std::atomic interrupted_{false}; + AVFormatContext* inputCtx_{nullptr}; + AVIOContext* avioCtx_{nullptr}; + std::unordered_map> streams_; + std::bitset<64> inRange_; + int kFramesDecoded_{0}; + int64_t pastDecodedPTS_{-1}; + int64_t currentDecodedPTS_{-1}; + bool doSeek_{false}; +}; +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/defs.h b/torchvision/csrc/io/decoder/defs.h new file mode 100644 index 00000000000..d2dc5c7935b --- /dev/null +++ b/torchvision/csrc/io/decoder/defs.h @@ -0,0 +1,415 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +extern "C" { +#include +#include +#include +#include +#include +#include +#include "libswscale/swscale.h" +} + +namespace ffmpeg { + +// bit mask of formats, keep them in form 2^n +enum MediaType : size_t { + TYPE_AUDIO = 1, + TYPE_VIDEO = 2, + TYPE_SUBTITLE = 4, + TYPE_CC = 8, // closed captions from transport streams +}; + +// audio +struct AudioFormat { + // fields are initialized for the auto detection + // caller can specify some/all of field values if specific output is desirable + bool operator==(const AudioFormat& x) const { + return x.format == format && x.samples == samples && x.channels == channels; + } + + size_t samples{0}; // number samples per second (frequency) + size_t channels{0}; // number of channels + long format{-1}; // AVSampleFormat, auto AV_SAMPLE_FMT_NONE + size_t padding[2]; + // -- alignment 40 bytes +}; + +// video +struct VideoFormat { + // fields are initialized for the auto detection + // caller can specify some/all of field values if specific output is desirable + bool operator==(const VideoFormat& x) const { + return x.format == format && x.width == width && x.height == height; + } + /* + When width = 0, height = 0, minDimension = 0, and maxDimension = 0, + keep the original frame resolution + When width = 0, height = 0, minDimension != 0, and maxDimension = 0, + keep the aspect ratio and resize the frame so that shorter edge size is + minDimension + When width = 0, height = 0, minDimension = 0, and maxDimension != 0, + keep the aspect ratio and resize the frame so that longer edge size is + maxDimension + When width = 0, height = 0, minDimension != 0, and maxDimension != 0, + resize the frame so that shorter edge size is minDimension, and + longer edge size is maxDimension. The aspect ratio may not be preserved + When width = 0, height != 0, minDimension = 0, and maxDimension = 0, + keep the aspect ratio and resize the frame so that frame height is $height + When width != 0, height = 0, minDimension = 0, and maxDimension = 0, + keep the aspect ratio and resize the frame so that frame width is $width + When width != 0, height != 0, minDimension = 0, and maxDimension = 0, + resize the frame so that frame width and height are set to $width and + $height, + respectively + */ + size_t width{0}; // width in pixels + size_t height{0}; // height in pixels + long format{-1}; // AVPixelFormat, auto AV_PIX_FMT_NONE + size_t minDimension{0}; // choose min dimension and rescale accordingly + size_t maxDimension{0}; // choose max dimension and rescale accordingly + size_t cropImage{0}; // request image crop + // -- alignment 40 bytes +}; + +// subtitle/cc +struct SubtitleFormat { + long type{0}; // AVSubtitleType, auto SUBTITLE_NONE + size_t padding[4]; + // -- alignment 40 bytes +}; + +union FormatUnion { + FormatUnion() : audio() {} + explicit FormatUnion(int) : video() {} + explicit FormatUnion(char) : subtitle() {} + explicit FormatUnion(double) : subtitle() {} + AudioFormat audio; + VideoFormat video; + SubtitleFormat subtitle; + // -- alignment 40 bytes +}; + +/* + MediaFormat data structure serves as input/output parameter. + Caller assigns values for input formats + or leave default values for auto detection + For output formats all fields will be set to the specific values +*/ +struct MediaFormat { + // for using map/set data structures + bool operator<(const MediaFormat& x) const { + return type < x.type; + } + bool operator==(const MediaFormat& x) const { + if (type != x.type) { + return false; + } + switch (type) { + case TYPE_AUDIO: + return format.audio == x.format.audio; + case TYPE_VIDEO: + return format.video == x.format.video; + case TYPE_SUBTITLE: + case TYPE_CC: + return true; + default: + return false; + } + } + + explicit MediaFormat(long s = -1) : type(TYPE_AUDIO), stream(s), format() {} + explicit MediaFormat(int x, long s = -1) + : type(TYPE_VIDEO), stream(s), format(x) {} + explicit MediaFormat(char x, long s = -1) + : type(TYPE_SUBTITLE), stream(s), format(x) {} + explicit MediaFormat(double x, long s = -1) + : type(TYPE_CC), stream(s), format(x) {} + + static MediaFormat makeMediaFormat(AudioFormat format, long stream) { + MediaFormat result(stream); + result.format.audio = format; + return result; + } + + static MediaFormat makeMediaFormat(VideoFormat format, long stream) { + MediaFormat result(0, stream); + result.format.video = format; + return result; + } + + static MediaFormat makeMediaFormat(SubtitleFormat format, long stream) { + MediaFormat result('0', stream); + result.format.subtitle = format; + return result; + } + + // format type + MediaType type; + // stream index: + // set -1 for one stream auto detection, -2 for all streams auto detection, + // >= 0, specified stream, if caller knows the stream index (unlikely) + long stream; + // union keeps one of the possible formats, defined by MediaType + FormatUnion format; +}; + +struct DecoderParameters { + // local file, remote file, http url, rtmp stream uri, etc. anything that + // ffmpeg can recognize + std::string uri{std::string()}; + // timeout on getting bytes for decoding + size_t timeoutMs{1000}; + // logging level, default AV_LOG_PANIC + long logLevel{0}; + // when decoder would give up, 0 means never + size_t maxPackageErrors{0}; + // max allowed consecutive times no bytes are processed. 0 means for infinite. + size_t maxProcessNoBytes{0}; + // start offset (us) + long startOffset{0}; + // end offset (us) + long endOffset{-1}; + // logging id + int64_t loggingUuid{0}; + // internal max seekable buffer size + size_t maxSeekableBytes{0}; + // adjust header pts to the epoch time + bool convertPtsToWallTime{false}; + // indicate if input stream is an encoded image + bool isImage{false}; + // listen and wait for new rtmp stream + bool listen{false}; + // don't copy frame body, only header + bool headerOnly{false}; + // enable fast seek (seek only to keyframes) + bool fastSeek{false}; + // interrupt init method on timeout + bool preventStaleness{true}; + // seek tolerated accuracy (us) + double seekAccuracy{1000000.0}; + // Allow multithreaded decoding for numThreads > 1; + // 0 numThreads=0 sets up sensible defaults + int numThreads{1}; + // what media types should be processed, default none + std::set formats; + + // can be used for asynchronous decoders + size_t cacheSize{8192}; // mow many bytes to cache before stop reading bytes + size_t cacheTimeoutMs{1000}; // timeout on bytes writing + bool enforceCacheSize{false}; // drop output frames if cache is full + bool mergeAudioMessages{false}; // combine collocated audio messages together + + std::string tlsCertFile; + std::string tlsKeyFile; + + // Skip packets that fail with EPERM errors and continue decoding. + bool skipOperationNotPermittedPackets{false}; + + // probing size in bytes, i.e. the size of the data to analyze to get stream + // information. A higher value will enable detecting more information in case + // it is dispersed into the stream, but will increase latency. Must be an + // integer not lesser than 32. It is 5000000 by default. + int64_t probeSize{5000000}; + + // Expected duration of the video to be decoded, mainly used with uniform + // sampling + float expectedDuration{0.0f}; + + // Sample N key-frames from the video roughly uniformly across the timeline + int uniformSampling{0}; + + // with 0, ffmpeg allocates buffers of size 32768 bytes for encoded frames. + // Override this with bigger buffer size if needed. + int64_t maxEncodedBufferSize{0}; +}; + +struct DecoderHeader { + // message id, from 0 till ... + size_t seqno{0}; + // decoded timestamp in microseconds from either beginning of the stream or + // from epoch time, see DecoderParameters::convertPtsToWallTime + long pts{0}; + // decoded key frame + size_t keyFrame{0}; + // frames per second, valid only for video streams + double fps{0}; + // format specifies what kind frame is in a payload + MediaFormat format; +}; + +// Abstract interface ByteStorage class +class ByteStorage { + public: + virtual ~ByteStorage() = default; + // makes sure that buffer has at least n bytes available for writing, if not + // storage must reallocate memory. + virtual void ensure(size_t n) = 0; + // caller must not to write more than available bytes + virtual uint8_t* writableTail() = 0; + // caller confirms that n bytes were written to the writable tail + virtual void append(size_t n) = 0; + // caller confirms that n bytes were read from the read buffer + virtual void trim(size_t n) = 0; + // gives an access to the beginning of the read buffer + virtual const uint8_t* data() const = 0; + // returns the stored size in bytes + virtual size_t length() const = 0; + // returns available capacity for writable tail + virtual size_t tail() const = 0; + // clears content, keeps capacity + virtual void clear() = 0; +}; + +struct DecoderOutputMessage { + DecoderHeader header; + std::unique_ptr payload; +}; + +/* + * External provider of the ecnoded bytes, specific implementation is left for + * different use cases, like file, memory, external network end-points, etc. + * Normally input/output parameter @out set to valid, not null buffer pointer, + * which indicates "read" call, however there are "seek" modes as well. + + * @out != nullptr => read from the current offset, @whence got ignored, + * @size bytes to read => return number bytes got read, 0 if no more bytes + * available, < 0 on error. + + * @out == nullptr, @timeoutMs == 0 => does provider support "seek" + * capability in a first place? @size & @whence got ignored, return 0 on + * success, < 0 if "seek" mode is not supported. + + * @out == nullptr, @timeoutMs != 0 => normal seek call + * offset == @size, i.e. @whence = [SEEK_SET, SEEK_CUR, SEEK_END, AVSEEK_SIZE) + * return < 0 on error, position if @whence = [SEEK_SET, SEEK_CUR, SEEK_END], + * length of buffer if @whence = [AVSEEK_SIZE]. + */ +using DecoderInCallback = + std::function; + +using DecoderOutCallback = std::function; + +struct DecoderMetadata { + // time base numerator + long num{0}; + // time base denominator + long den{1}; + // duration of the stream, in miscroseconds, if available + long duration{-1}; + // frames per second, valid only for video streams + double fps{0}; + // format specifies what kind frame is in a payload + MediaFormat format; +}; +/** + * Abstract class for decoding media bytes + * It has two different modes. Internal media bytes retrieval for given uri and + * external media bytes provider in case of memory streams + */ +class MediaDecoder { + public: + virtual ~MediaDecoder() = default; + + /** + * Initializes media decoder with parameters, + * calls callback when media bytes are available. + * Media bytes get fetched internally from provided URI + * or invokes provided input callback to get media bytes. + * Input callback must be empty for the internal media provider + * Caller can provide non-null pointer for the input container + * if headers to obtain the streams metadata (optional) + */ + virtual bool init( + const DecoderParameters& params, + DecoderInCallback&& in, + std::vector* metadata) = 0; + + /** + * Polls available decoded one frame from decoder + * Returns error code, 0 - for success + */ + virtual int decode(DecoderOutputMessage* out, uint64_t timeoutMs) = 0; + + /** + * Polls available decoded bytes from decoder, till EOF or error + */ + virtual int decode_all(const DecoderOutCallback& callback) = 0; + + /** + * Stops calling callback, releases resources + */ + virtual void shutdown() = 0; + + /** + * Interrupts whatever decoder is doing at any time + */ + virtual void interrupt() = 0; + + /** + * Factory to create ByteStorage class instances, particular implementation is + * left to the derived class. Caller provides the initially allocated size + */ + virtual std::unique_ptr createByteStorage(size_t n) = 0; +}; + +struct SamplerParameters { + MediaType type{TYPE_AUDIO}; + FormatUnion in; + FormatUnion out; + int64_t loggingUuid{0}; +}; + +/** + * Abstract class for sampling media bytes + */ +class MediaSampler { + public: + virtual ~MediaSampler() = default; + + /** + * Initializes media sampler with parameters + */ + virtual bool init(const SamplerParameters& params) = 0; + + /** + * Samples media bytes + * Returns error code < 0, or >=0 - for success, indicating number of bytes + * processed. + * set @in to null for flushing data + */ + virtual int sample(const ByteStorage* in, ByteStorage* out) = 0; + + /** + * Releases resources + */ + virtual void shutdown() = 0; + + /* + * Returns media type + */ + MediaType getMediaType() const { + return params_.type; + } + /* + * Returns formats + */ + FormatUnion getInputFormat() const { + return params_.in; + } + FormatUnion getOutFormat() const { + return params_.out; + } + + protected: + SamplerParameters params_; +}; +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/gpu/README.rst b/torchvision/csrc/io/decoder/gpu/README.rst new file mode 100644 index 00000000000..cebd31cb557 --- /dev/null +++ b/torchvision/csrc/io/decoder/gpu/README.rst @@ -0,0 +1,21 @@ +GPU Decoder +=========== + +GPU decoder depends on ffmpeg for demuxing, uses NVDECODE APIs from the nvidia-video-codec sdk and uses cuda for processing on gpu. In order to use this, please follow the following steps: + +* Download the latest `nvidia-video-codec-sdk `_ +* Extract the zipped file. +* Set TORCHVISION_INCLUDE environment variable to the location of the video codec headers(`nvcuvid.h` and `cuviddec.h`), which would be under `Interface` directory. +* Set TORCHVISION_LIBRARY environment variable to the location of the video codec library(`libnvcuvid.so`), which would be under `Lib/linux/stubs/x86_64` directory. +* Install the latest ffmpeg from `conda-forge` channel. + +.. code:: bash + + conda install -c conda-forge ffmpeg + +* Set CUDA_HOME environment variable to the cuda root directory. +* Build torchvision from source: + +.. code:: bash + + python setup.py install diff --git a/torchvision/csrc/io/decoder/gpu/decoder.cpp b/torchvision/csrc/io/decoder/gpu/decoder.cpp new file mode 100644 index 00000000000..f7377ede38b --- /dev/null +++ b/torchvision/csrc/io/decoder/gpu/decoder.cpp @@ -0,0 +1,405 @@ +#include "decoder.h" +#include +#include +#include +#include +#include + +static float chroma_height_factor(cudaVideoSurfaceFormat surface_format) { + return (surface_format == cudaVideoSurfaceFormat_YUV444 || + surface_format == cudaVideoSurfaceFormat_YUV444_16Bit) + ? 1.0 + : 0.5; +} + +static int chroma_plane_count(cudaVideoSurfaceFormat surface_format) { + return (surface_format == cudaVideoSurfaceFormat_YUV444 || + surface_format == cudaVideoSurfaceFormat_YUV444_16Bit) + ? 2 + : 1; +} + +/* Initialise cu_context and video_codec, create context lock and create parser + * object. + */ +void Decoder::init(CUcontext context, cudaVideoCodec codec) { + cu_context = context; + video_codec = codec; + check_for_cuda_errors( + cuvidCtxLockCreate(&ctx_lock, cu_context), __LINE__, __FILE__); + + CUVIDPARSERPARAMS parser_params = {}; + parser_params.CodecType = codec; + parser_params.ulMaxNumDecodeSurfaces = 1; + parser_params.ulClockRate = 1000; + parser_params.ulMaxDisplayDelay = 0u; + parser_params.pUserData = this; + parser_params.pfnSequenceCallback = video_sequence_handler; + parser_params.pfnDecodePicture = picture_decode_handler; + parser_params.pfnDisplayPicture = picture_display_handler; + parser_params.pfnGetOperatingPoint = operating_point_handler; + + check_for_cuda_errors( + cuvidCreateVideoParser(&parser, &parser_params), __LINE__, __FILE__); +} + +/* Destroy parser object and context lock. + */ +Decoder::~Decoder() { + if (parser) { + cuvidDestroyVideoParser(parser); + } + cuvidCtxLockDestroy(ctx_lock); +} + +/* Destroy CUvideodecoder object and free up all the unreturned decoded frames. + */ +void Decoder::release() { + cuCtxPushCurrent(cu_context); + if (decoder) { + cuvidDestroyDecoder(decoder); + } + cuCtxPopCurrent(nullptr); +} + +/* Trigger video decoding. + */ +void Decoder::decode(const uint8_t* data, unsigned long size) { + CUVIDSOURCEDATAPACKET pkt = {}; + pkt.flags = CUVID_PKT_TIMESTAMP; + pkt.payload_size = size; + pkt.payload = data; + pkt.timestamp = 0; + if (!data || size == 0) { + pkt.flags |= CUVID_PKT_ENDOFSTREAM; + } + check_for_cuda_errors(cuvidParseVideoData(parser, &pkt), __LINE__, __FILE__); + cuvidStream = 0; +} + +/* Fetch a decoded frame and remove it from the queue. + */ +torch::Tensor Decoder::fetch_frame() { + if (decoded_frames.empty()) { + auto options = + torch::TensorOptions().dtype(torch::kU8).device(torch::kCUDA); + return torch::zeros({0}, options); + } + torch::Tensor frame = decoded_frames.front(); + decoded_frames.pop(); + return frame; +} + +/* Called when a picture is ready to be decoded. + */ +int Decoder::handle_picture_decode(CUVIDPICPARAMS* pic_params) { + if (!decoder) { + TORCH_CHECK(false, "Uninitialised decoder"); + } + pic_num_in_decode_order[pic_params->CurrPicIdx] = decode_pic_count++; + check_for_cuda_errors(cuCtxPushCurrent(cu_context), __LINE__, __FILE__); + check_for_cuda_errors( + cuvidDecodePicture(decoder, pic_params), __LINE__, __FILE__); + check_for_cuda_errors(cuCtxPopCurrent(nullptr), __LINE__, __FILE__); + return 1; +} + +/* Process the decoded data and copy it to a cuda memory location. + */ +int Decoder::handle_picture_display(CUVIDPARSERDISPINFO* disp_info) { + CUVIDPROCPARAMS proc_params = {}; + proc_params.progressive_frame = disp_info->progressive_frame; + proc_params.second_field = disp_info->repeat_first_field + 1; + proc_params.top_field_first = disp_info->top_field_first; + proc_params.unpaired_field = disp_info->repeat_first_field < 0; + proc_params.output_stream = cuvidStream; + + CUdeviceptr source_frame = 0; + unsigned int source_pitch = 0; + check_for_cuda_errors(cuCtxPushCurrent(cu_context), __LINE__, __FILE__); + check_for_cuda_errors( + cuvidMapVideoFrame( + decoder, + disp_info->picture_index, + &source_frame, + &source_pitch, + &proc_params), + __LINE__, + __FILE__); + + CUVIDGETDECODESTATUS decode_status; + memset(&decode_status, 0, sizeof(decode_status)); + CUresult result = + cuvidGetDecodeStatus(decoder, disp_info->picture_index, &decode_status); + if (result == CUDA_SUCCESS && + (decode_status.decodeStatus == cuvidDecodeStatus_Error || + decode_status.decodeStatus == cuvidDecodeStatus_Error_Concealed)) { + VLOG(1) << "Decode Error occurred for picture " + << pic_num_in_decode_order[disp_info->picture_index]; + } + + auto options = torch::TensorOptions().dtype(torch::kU8).device(torch::kCUDA); + torch::Tensor decoded_frame = torch::empty({get_height(), width, 3}, options); + uint8_t* frame_ptr = decoded_frame.data_ptr(); + const uint8_t* const source_arr[] = { + (const uint8_t* const)source_frame, + (const uint8_t* const)(source_frame + + source_pitch * ((surface_height + 1) & ~1))}; + + auto err = nppiNV12ToRGB_709CSC_8u_P2C3R( + source_arr, + source_pitch, + frame_ptr, + width * 3, + {(int)decoded_frame.size(1), (int)decoded_frame.size(0)}); + + TORCH_CHECK( + err == NPP_NO_ERROR, + "Failed to convert from NV12 to RGB. Error code:", + err); + + check_for_cuda_errors(cuStreamSynchronize(cuvidStream), __LINE__, __FILE__); + decoded_frames.push(decoded_frame); + check_for_cuda_errors(cuCtxPopCurrent(nullptr), __LINE__, __FILE__); + + check_for_cuda_errors( + cuvidUnmapVideoFrame(decoder, source_frame), __LINE__, __FILE__); + return 1; +} + +/* Query the capabilities of the underlying hardware video decoder and + * verify if the hardware supports decoding the passed video. + */ +void Decoder::query_hardware(CUVIDEOFORMAT* video_format) { + CUVIDDECODECAPS decode_caps = {}; + decode_caps.eCodecType = video_format->codec; + decode_caps.eChromaFormat = video_format->chroma_format; + decode_caps.nBitDepthMinus8 = video_format->bit_depth_luma_minus8; + + check_for_cuda_errors(cuCtxPushCurrent(cu_context), __LINE__, __FILE__); + check_for_cuda_errors(cuvidGetDecoderCaps(&decode_caps), __LINE__, __FILE__); + check_for_cuda_errors(cuCtxPopCurrent(nullptr), __LINE__, __FILE__); + + if (!decode_caps.bIsSupported) { + TORCH_CHECK(false, "Codec not supported on this GPU"); + } + if ((video_format->coded_width > decode_caps.nMaxWidth) || + (video_format->coded_height > decode_caps.nMaxHeight)) { + TORCH_CHECK( + false, + "Resolution : ", + video_format->coded_width, + "x", + video_format->coded_height, + "\nMax Supported (wxh) : ", + decode_caps.nMaxWidth, + "x", + decode_caps.nMaxHeight, + "\nResolution not supported on this GPU"); + } + if ((video_format->coded_width >> 4) * (video_format->coded_height >> 4) > + decode_caps.nMaxMBCount) { + TORCH_CHECK( + false, + "MBCount : ", + (video_format->coded_width >> 4) * (video_format->coded_height >> 4), + "\nMax Supported mbcnt : ", + decode_caps.nMaxMBCount, + "\nMBCount not supported on this GPU"); + } + // Check if output format supported. If not, check fallback options + if (!(decode_caps.nOutputFormatMask & (1 << video_output_format))) { + if (decode_caps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_NV12)) { + video_output_format = cudaVideoSurfaceFormat_NV12; + } else if ( + decode_caps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_P016)) { + video_output_format = cudaVideoSurfaceFormat_P016; + } else if ( + decode_caps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_YUV444)) { + video_output_format = cudaVideoSurfaceFormat_YUV444; + } else if ( + decode_caps.nOutputFormatMask & + (1 << cudaVideoSurfaceFormat_YUV444_16Bit)) { + video_output_format = cudaVideoSurfaceFormat_YUV444_16Bit; + } else { + TORCH_CHECK(false, "No supported output format found"); + } + } +} + +/* Called before decoding frames and/or whenever there is a configuration + * change. + */ +int Decoder::handle_video_sequence(CUVIDEOFORMAT* video_format) { + // video_codec has been set in init(). Here it's set + // again for potential correction. + video_codec = video_format->codec; + video_chroma_format = video_format->chroma_format; + bit_depth_minus8 = video_format->bit_depth_luma_minus8; + bytes_per_pixel = bit_depth_minus8 > 0 ? 2 : 1; + // Set the output surface format same as chroma format + switch (video_chroma_format) { + case cudaVideoChromaFormat_Monochrome: + case cudaVideoChromaFormat_420: + video_output_format = video_format->bit_depth_luma_minus8 + ? cudaVideoSurfaceFormat_P016 + : cudaVideoSurfaceFormat_NV12; + break; + case cudaVideoChromaFormat_444: + video_output_format = video_format->bit_depth_luma_minus8 + ? cudaVideoSurfaceFormat_YUV444_16Bit + : cudaVideoSurfaceFormat_YUV444; + break; + case cudaVideoChromaFormat_422: + video_output_format = cudaVideoSurfaceFormat_NV12; + } + + query_hardware(video_format); + + if (width && luma_height && chroma_height) { + // cuvidCreateDecoder() has been called before and now there's possible + // config change. + return reconfigure_decoder(video_format); + } + + cu_video_format = *video_format; + unsigned long decode_surface = video_format->min_num_decode_surfaces; + cudaVideoDeinterlaceMode deinterlace_mode = cudaVideoDeinterlaceMode_Adaptive; + + if (video_format->progressive_sequence) { + deinterlace_mode = cudaVideoDeinterlaceMode_Weave; + } + + CUVIDDECODECREATEINFO video_decode_create_info = {}; + video_decode_create_info.ulWidth = video_format->coded_width; + video_decode_create_info.ulHeight = video_format->coded_height; + video_decode_create_info.ulNumDecodeSurfaces = decode_surface; + video_decode_create_info.CodecType = video_format->codec; + video_decode_create_info.ChromaFormat = video_format->chroma_format; + // With PreferCUVID, JPEG is still decoded by CUDA while video is decoded + // by NVDEC hardware + video_decode_create_info.ulCreationFlags = cudaVideoCreate_PreferCUVID; + video_decode_create_info.bitDepthMinus8 = video_format->bit_depth_luma_minus8; + video_decode_create_info.OutputFormat = video_output_format; + video_decode_create_info.DeinterlaceMode = deinterlace_mode; + video_decode_create_info.ulNumOutputSurfaces = 2; + video_decode_create_info.vidLock = ctx_lock; + + // AV1 has max width/height of sequence in sequence header + if (video_format->codec == cudaVideoCodec_AV1 && + video_format->seqhdr_data_length > 0) { + CUVIDEOFORMATEX* video_format_ex = (CUVIDEOFORMATEX*)video_format; + max_width = video_format_ex->av1.max_width; + max_height = video_format_ex->av1.max_height; + } + if (max_width < video_format->coded_width) { + max_width = video_format->coded_width; + } + if (max_height < video_format->coded_height) { + max_height = video_format->coded_height; + } + video_decode_create_info.ulMaxWidth = max_width; + video_decode_create_info.ulMaxHeight = max_height; + width = video_format->display_area.right - video_format->display_area.left; + luma_height = + video_format->display_area.bottom - video_format->display_area.top; + video_decode_create_info.ulTargetWidth = video_format->coded_width; + video_decode_create_info.ulTargetHeight = video_format->coded_height; + chroma_height = + (int)(ceil(luma_height * chroma_height_factor(video_output_format))); + num_chroma_planes = chroma_plane_count(video_output_format); + surface_height = video_decode_create_info.ulTargetHeight; + surface_width = video_decode_create_info.ulTargetWidth; + display_rect.bottom = video_decode_create_info.display_area.bottom; + display_rect.top = video_decode_create_info.display_area.top; + display_rect.left = video_decode_create_info.display_area.left; + display_rect.right = video_decode_create_info.display_area.right; + + check_for_cuda_errors(cuCtxPushCurrent(cu_context), __LINE__, __FILE__); + check_for_cuda_errors( + cuvidCreateDecoder(&decoder, &video_decode_create_info), + __LINE__, + __FILE__); + check_for_cuda_errors(cuCtxPopCurrent(nullptr), __LINE__, __FILE__); + return decode_surface; +} + +int Decoder::reconfigure_decoder(CUVIDEOFORMAT* video_format) { + if (video_format->bit_depth_luma_minus8 != + cu_video_format.bit_depth_luma_minus8 || + video_format->bit_depth_chroma_minus8 != + cu_video_format.bit_depth_chroma_minus8) { + TORCH_CHECK(false, "Reconfigure not supported for bit depth change"); + } + if (video_format->chroma_format != cu_video_format.chroma_format) { + TORCH_CHECK(false, "Reconfigure not supported for chroma format change"); + } + + bool decode_res_change = + !(video_format->coded_width == cu_video_format.coded_width && + video_format->coded_height == cu_video_format.coded_height); + bool display_rect_change = + !(video_format->display_area.bottom == + cu_video_format.display_area.bottom && + video_format->display_area.top == cu_video_format.display_area.top && + video_format->display_area.left == cu_video_format.display_area.left && + video_format->display_area.right == cu_video_format.display_area.right); + + unsigned int decode_surface = video_format->min_num_decode_surfaces; + + if ((video_format->coded_width > max_width) || + (video_format->coded_height > max_height)) { + // For VP9, let driver handle the change if new width/height > + // maxwidth/maxheight + if (video_codec != cudaVideoCodec_VP9) { + TORCH_CHECK( + false, + "Reconfigure not supported when width/height > maxwidth/maxheight"); + } + return 1; + } + + if (!decode_res_change) { + // If the coded_width/coded_height hasn't changed but display resolution has + // changed, then need to update width/height for correct output without + // cropping. Example : 1920x1080 vs 1920x1088. + if (display_rect_change) { + width = + video_format->display_area.right - video_format->display_area.left; + luma_height = + video_format->display_area.bottom - video_format->display_area.top; + chroma_height = + (int)ceil(luma_height * chroma_height_factor(video_output_format)); + num_chroma_planes = chroma_plane_count(video_output_format); + } + return 1; + } + cu_video_format.coded_width = video_format->coded_width; + cu_video_format.coded_height = video_format->coded_height; + CUVIDRECONFIGUREDECODERINFO reconfig_params = {}; + reconfig_params.ulWidth = video_format->coded_width; + reconfig_params.ulHeight = video_format->coded_height; + reconfig_params.ulTargetWidth = surface_width; + reconfig_params.ulTargetHeight = surface_height; + reconfig_params.ulNumDecodeSurfaces = decode_surface; + reconfig_params.display_area.bottom = display_rect.bottom; + reconfig_params.display_area.top = display_rect.top; + reconfig_params.display_area.left = display_rect.left; + reconfig_params.display_area.right = display_rect.right; + + check_for_cuda_errors(cuCtxPushCurrent(cu_context), __LINE__, __FILE__); + check_for_cuda_errors( + cuvidReconfigureDecoder(decoder, &reconfig_params), __LINE__, __FILE__); + check_for_cuda_errors(cuCtxPopCurrent(nullptr), __LINE__, __FILE__); + + return decode_surface; +} + +/* Called from AV1 sequence header to get operating point of an AV1 bitstream. + */ +int Decoder::get_operating_point(CUVIDOPERATINGPOINTINFO* oper_point_info) { + return oper_point_info->codec == cudaVideoCodec_AV1 && + oper_point_info->av1.operating_points_cnt > 1 + ? 0 + : -1; +} diff --git a/torchvision/csrc/io/decoder/gpu/decoder.h b/torchvision/csrc/io/decoder/gpu/decoder.h new file mode 100644 index 00000000000..5ad685ec746 --- /dev/null +++ b/torchvision/csrc/io/decoder/gpu/decoder.h @@ -0,0 +1,89 @@ +#include +#include +#include +#include +#include +#include +#include + +static auto check_for_cuda_errors = + [](CUresult result, int line_num, std::string file_name) { + if (CUDA_SUCCESS != result) { + const char* error_name = nullptr; + + TORCH_CHECK( + CUDA_SUCCESS != cuGetErrorName(result, &error_name), + "CUDA error: ", + error_name, + " in ", + file_name, + " at line ", + line_num) + TORCH_CHECK( + false, "Error: ", result, " in ", file_name, " at line ", line_num); + } + }; + +struct Rect { + int left, top, right, bottom; +}; + +class Decoder { + public: + Decoder() {} + ~Decoder(); + void init(CUcontext, cudaVideoCodec); + void release(); + void decode(const uint8_t*, unsigned long); + torch::Tensor fetch_frame(); + int get_height() const { + return luma_height; + } + + private: + unsigned int width = 0, luma_height = 0, chroma_height = 0; + unsigned int surface_height = 0, surface_width = 0; + unsigned int max_width = 0, max_height = 0; + unsigned int num_chroma_planes = 0; + int bit_depth_minus8 = 0, bytes_per_pixel = 1; + int decode_pic_count = 0, pic_num_in_decode_order[32]; + std::queue decoded_frames; + CUcontext cu_context = NULL; + CUvideoctxlock ctx_lock; + CUvideoparser parser = NULL; + CUvideodecoder decoder = NULL; + CUstream cuvidStream = 0; + cudaVideoCodec video_codec = cudaVideoCodec_NumCodecs; + cudaVideoChromaFormat video_chroma_format = cudaVideoChromaFormat_420; + cudaVideoSurfaceFormat video_output_format = cudaVideoSurfaceFormat_NV12; + CUVIDEOFORMAT cu_video_format = {}; + Rect display_rect = {}; + + static int video_sequence_handler( + void* user_data, + CUVIDEOFORMAT* video_format) { + return ((Decoder*)user_data)->handle_video_sequence(video_format); + } + static int picture_decode_handler( + void* user_data, + CUVIDPICPARAMS* pic_params) { + return ((Decoder*)user_data)->handle_picture_decode(pic_params); + } + static int picture_display_handler( + void* user_data, + CUVIDPARSERDISPINFO* disp_info) { + return ((Decoder*)user_data)->handle_picture_display(disp_info); + } + static int operating_point_handler( + void* user_data, + CUVIDOPERATINGPOINTINFO* operating_info) { + return ((Decoder*)user_data)->get_operating_point(operating_info); + } + + void query_hardware(CUVIDEOFORMAT*); + int reconfigure_decoder(CUVIDEOFORMAT*); + int handle_video_sequence(CUVIDEOFORMAT*); + int handle_picture_decode(CUVIDPICPARAMS*); + int handle_picture_display(CUVIDPARSERDISPINFO*); + int get_operating_point(CUVIDOPERATINGPOINTINFO*); +}; diff --git a/torchvision/csrc/io/decoder/gpu/demuxer.h b/torchvision/csrc/io/decoder/gpu/demuxer.h new file mode 100644 index 00000000000..f6e72dceee1 --- /dev/null +++ b/torchvision/csrc/io/decoder/gpu/demuxer.h @@ -0,0 +1,257 @@ +extern "C" { +#include +#include +#include +#include +} + +class Demuxer { + private: + AVFormatContext* fmtCtx = NULL; + AVBSFContext* bsfCtx = NULL; + AVPacket pkt, pktFiltered; + AVCodecID eVideoCodec; + uint8_t* dataWithHeader = NULL; + bool bMp4H264, bMp4HEVC, bMp4MPEG4; + unsigned int frameCount = 0; + int iVideoStream; + double timeBase = 0.0; + + public: + Demuxer(const char* filePath) { + avformat_network_init(); + TORCH_CHECK( + 0 <= avformat_open_input(&fmtCtx, filePath, NULL, NULL), + "avformat_open_input() failed at line ", + __LINE__, + " in demuxer.h\n"); + if (!fmtCtx) { + TORCH_CHECK( + false, + "Encountered NULL AVFormatContext at line ", + __LINE__, + " in demuxer.h\n"); + } + + TORCH_CHECK( + 0 <= avformat_find_stream_info(fmtCtx, NULL), + "avformat_find_stream_info() failed at line ", + __LINE__, + " in demuxer.h\n"); + iVideoStream = + av_find_best_stream(fmtCtx, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0); + if (iVideoStream < 0) { + TORCH_CHECK( + false, + "av_find_best_stream() failed at line ", + __LINE__, + " in demuxer.h\n"); + } + + eVideoCodec = fmtCtx->streams[iVideoStream]->codecpar->codec_id; + AVRational rTimeBase = fmtCtx->streams[iVideoStream]->time_base; + timeBase = av_q2d(rTimeBase); + + bMp4H264 = eVideoCodec == AV_CODEC_ID_H264 && + (!strcmp(fmtCtx->iformat->long_name, "QuickTime / MOV") || + !strcmp(fmtCtx->iformat->long_name, "FLV (Flash Video)") || + !strcmp(fmtCtx->iformat->long_name, "Matroska / WebM")); + bMp4HEVC = eVideoCodec == AV_CODEC_ID_HEVC && + (!strcmp(fmtCtx->iformat->long_name, "QuickTime / MOV") || + !strcmp(fmtCtx->iformat->long_name, "FLV (Flash Video)") || + !strcmp(fmtCtx->iformat->long_name, "Matroska / WebM")); + bMp4MPEG4 = eVideoCodec == AV_CODEC_ID_MPEG4 && + (!strcmp(fmtCtx->iformat->long_name, "QuickTime / MOV") || + !strcmp(fmtCtx->iformat->long_name, "FLV (Flash Video)") || + !strcmp(fmtCtx->iformat->long_name, "Matroska / WebM")); + + av_init_packet(&pkt); + pkt.data = NULL; + pkt.size = 0; + av_init_packet(&pktFiltered); + pktFiltered.data = NULL; + pktFiltered.size = 0; + + if (bMp4H264) { + const AVBitStreamFilter* bsf = av_bsf_get_by_name("h264_mp4toannexb"); + if (!bsf) { + TORCH_CHECK( + false, + "av_bsf_get_by_name() failed at line ", + __LINE__, + " in demuxer.h\n"); + } + TORCH_CHECK( + 0 <= av_bsf_alloc(bsf, &bsfCtx), + "av_bsf_alloc() failed at line ", + __LINE__, + " in demuxer.h\n"); + avcodec_parameters_copy( + bsfCtx->par_in, fmtCtx->streams[iVideoStream]->codecpar); + TORCH_CHECK( + 0 <= av_bsf_init(bsfCtx), + "av_bsf_init() failed at line ", + __LINE__, + " in demuxer.h\n"); + } + if (bMp4HEVC) { + const AVBitStreamFilter* bsf = av_bsf_get_by_name("hevc_mp4toannexb"); + if (!bsf) { + TORCH_CHECK( + false, + "av_bsf_get_by_name() failed at line ", + __LINE__, + " in demuxer.h\n"); + } + TORCH_CHECK( + 0 <= av_bsf_alloc(bsf, &bsfCtx), + "av_bsf_alloc() failed at line ", + __LINE__, + " in demuxer.h\n"); + avcodec_parameters_copy( + bsfCtx->par_in, fmtCtx->streams[iVideoStream]->codecpar); + TORCH_CHECK( + 0 <= av_bsf_init(bsfCtx), + "av_bsf_init() failed at line ", + __LINE__, + " in demuxer.h\n"); + } + } + + ~Demuxer() { + if (!fmtCtx) { + return; + } + if (pkt.data) { + av_packet_unref(&pkt); + } + if (pktFiltered.data) { + av_packet_unref(&pktFiltered); + } + if (bsfCtx) { + av_bsf_free(&bsfCtx); + } + avformat_close_input(&fmtCtx); + if (dataWithHeader) { + av_free(dataWithHeader); + } + } + + AVCodecID get_video_codec() { + return eVideoCodec; + } + + double get_duration() const { + return (double)fmtCtx->duration / AV_TIME_BASE; + } + + double get_fps() const { + return av_q2d(fmtCtx->streams[iVideoStream]->r_frame_rate); + } + + bool demux(uint8_t** video, unsigned long* videoBytes) { + if (!fmtCtx) { + return false; + } + *videoBytes = 0; + + if (pkt.data) { + av_packet_unref(&pkt); + } + int e = 0; + while ((e = av_read_frame(fmtCtx, &pkt)) >= 0 && + pkt.stream_index != iVideoStream) { + av_packet_unref(&pkt); + } + if (e < 0) { + return false; + } + + if (bMp4H264 || bMp4HEVC) { + if (pktFiltered.data) { + av_packet_unref(&pktFiltered); + } + TORCH_CHECK( + 0 <= av_bsf_send_packet(bsfCtx, &pkt), + "av_bsf_send_packet() failed at line ", + __LINE__, + " in demuxer.h\n"); + TORCH_CHECK( + 0 <= av_bsf_receive_packet(bsfCtx, &pktFiltered), + "av_bsf_receive_packet() failed at line ", + __LINE__, + " in demuxer.h\n"); + *video = pktFiltered.data; + *videoBytes = pktFiltered.size; + } else { + if (bMp4MPEG4 && (frameCount == 0)) { + int extraDataSize = + fmtCtx->streams[iVideoStream]->codecpar->extradata_size; + + if (extraDataSize > 0) { + dataWithHeader = (uint8_t*)av_malloc( + extraDataSize + pkt.size - 3 * sizeof(uint8_t)); + if (!dataWithHeader) { + TORCH_CHECK( + false, + "av_malloc() failed at line ", + __LINE__, + " in demuxer.h\n"); + } + memcpy( + dataWithHeader, + fmtCtx->streams[iVideoStream]->codecpar->extradata, + extraDataSize); + memcpy( + dataWithHeader + extraDataSize, + pkt.data + 3, + pkt.size - 3 * sizeof(uint8_t)); + *video = dataWithHeader; + *videoBytes = extraDataSize + pkt.size - 3 * sizeof(uint8_t); + } + } else { + *video = pkt.data; + *videoBytes = pkt.size; + } + } + frameCount++; + return true; + } + + void seek(double timestamp, int flag) { + int64_t time = timestamp * AV_TIME_BASE; + TORCH_CHECK( + 0 <= av_seek_frame(fmtCtx, -1, time, flag), + "av_seek_frame() failed at line ", + __LINE__, + " in demuxer.h\n"); + } +}; + +inline cudaVideoCodec ffmpeg_to_codec(AVCodecID id) { + switch (id) { + case AV_CODEC_ID_MPEG1VIDEO: + return cudaVideoCodec_MPEG1; + case AV_CODEC_ID_MPEG2VIDEO: + return cudaVideoCodec_MPEG2; + case AV_CODEC_ID_MPEG4: + return cudaVideoCodec_MPEG4; + case AV_CODEC_ID_WMV3: + case AV_CODEC_ID_VC1: + return cudaVideoCodec_VC1; + case AV_CODEC_ID_H264: + return cudaVideoCodec_H264; + case AV_CODEC_ID_HEVC: + return cudaVideoCodec_HEVC; + case AV_CODEC_ID_VP8: + return cudaVideoCodec_VP8; + case AV_CODEC_ID_VP9: + return cudaVideoCodec_VP9; + case AV_CODEC_ID_MJPEG: + return cudaVideoCodec_JPEG; + case AV_CODEC_ID_AV1: + return cudaVideoCodec_AV1; + default: + return cudaVideoCodec_NumCodecs; + } +} diff --git a/torchvision/csrc/io/decoder/gpu/gpu_decoder.cpp b/torchvision/csrc/io/decoder/gpu/gpu_decoder.cpp new file mode 100644 index 00000000000..aef1ef93b09 --- /dev/null +++ b/torchvision/csrc/io/decoder/gpu/gpu_decoder.cpp @@ -0,0 +1,65 @@ +#include "gpu_decoder.h" +#include + +/* Set cuda device, create cuda context and initialise the demuxer and decoder. + */ +GPUDecoder::GPUDecoder(std::string src_file, torch::Device dev) + : demuxer(src_file.c_str()) { + at::cuda::CUDAGuard device_guard(dev); + device = device_guard.current_device().index(); + check_for_cuda_errors( + cuDevicePrimaryCtxRetain(&ctx, device), __LINE__, __FILE__); + decoder.init(ctx, ffmpeg_to_codec(demuxer.get_video_codec())); + initialised = true; +} + +GPUDecoder::~GPUDecoder() { + at::cuda::CUDAGuard device_guard(device); + decoder.release(); + if (initialised) { + check_for_cuda_errors( + cuDevicePrimaryCtxRelease(device), __LINE__, __FILE__); + } +} + +/* Fetch a decoded frame tensor after demuxing and decoding. + */ +torch::Tensor GPUDecoder::decode() { + torch::Tensor frameTensor; + unsigned long videoBytes = 0; + uint8_t* video = nullptr; + at::cuda::CUDAGuard device_guard(device); + torch::Tensor frame; + do { + demuxer.demux(&video, &videoBytes); + decoder.decode(video, videoBytes); + frame = decoder.fetch_frame(); + } while (frame.numel() == 0 && videoBytes > 0); + return frame; +} + +/* Seek to a passed timestamp. The second argument controls whether to seek to a + * keyframe. + */ +void GPUDecoder::seek(double timestamp, bool keyframes_only) { + int flag = keyframes_only ? 0 : AVSEEK_FLAG_ANY; + demuxer.seek(timestamp, flag); +} + +c10::Dict> +GPUDecoder::get_metadata() const { + c10::Dict> metadata; + c10::Dict video_metadata; + video_metadata.insert("duration", demuxer.get_duration()); + video_metadata.insert("fps", demuxer.get_fps()); + metadata.insert("video", video_metadata); + return metadata; +} + +TORCH_LIBRARY(torchvision, m) { + m.class_("GPUDecoder") + .def(torch::init()) + .def("seek", &GPUDecoder::seek) + .def("get_metadata", &GPUDecoder::get_metadata) + .def("next", &GPUDecoder::decode); +} diff --git a/torchvision/csrc/io/decoder/gpu/gpu_decoder.h b/torchvision/csrc/io/decoder/gpu/gpu_decoder.h new file mode 100644 index 00000000000..22bf680a982 --- /dev/null +++ b/torchvision/csrc/io/decoder/gpu/gpu_decoder.h @@ -0,0 +1,20 @@ +#include +#include +#include "decoder.h" +#include "demuxer.h" + +class GPUDecoder : public torch::CustomClassHolder { + public: + GPUDecoder(std::string, torch::Device); + ~GPUDecoder(); + torch::Tensor decode(); + void seek(double, bool); + c10::Dict> get_metadata() const; + + private: + Demuxer demuxer; + CUcontext ctx; + Decoder decoder; + int64_t device; + bool initialised = false; +}; diff --git a/torchvision/csrc/io/decoder/memory_buffer.cpp b/torchvision/csrc/io/decoder/memory_buffer.cpp new file mode 100644 index 00000000000..4e420c3b3cd --- /dev/null +++ b/torchvision/csrc/io/decoder/memory_buffer.cpp @@ -0,0 +1,71 @@ +#include "memory_buffer.h" +#include + +namespace ffmpeg { + +MemoryBuffer::MemoryBuffer(const uint8_t* buffer, size_t size) + : buffer_(buffer), len_(size) {} + +int MemoryBuffer::read(uint8_t* buf, int size) { + if (pos_ < len_) { + auto available = std::min(int(len_ - pos_), size); + memcpy(buf, buffer_ + pos_, available); + pos_ += available; + return available; + } + + return 0; +} + +int64_t MemoryBuffer::seek(int64_t offset, int whence) { + if (whence & AVSEEK_SIZE) { + return len_; + } + + // remove force flag + whence &= ~AVSEEK_FORCE; + + switch (whence) { + case SEEK_SET: + if (offset >= 0 && offset <= len_) { + pos_ = offset; + } + break; + case SEEK_END: + if (len_ + offset >= 0 && len_ + offset <= len_) { + pos_ = len_ + offset; + } + break; + case SEEK_CUR: + if (pos_ + offset > 0 && pos_ + offset <= len_) { + pos_ += offset; + } + break; + default: + LOG(ERROR) << "Unknown whence flag gets provided: " << whence; + } + return pos_; +} + +/* static */ +DecoderInCallback MemoryBuffer::getCallback( + const uint8_t* buffer, + size_t size) { + MemoryBuffer object(buffer, size); + return + [object](uint8_t* out, int size, int whence, uint64_t timeoutMs) mutable + -> int { + if (out) { // see defs.h file + // read mode + return object.read(out, size); + } + // seek mode + if (!timeoutMs) { + // seek capability, yes - supported + return 0; + } + return object.seek(size, whence); + }; +} + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/memory_buffer.h b/torchvision/csrc/io/decoder/memory_buffer.h new file mode 100644 index 00000000000..909626d3cae --- /dev/null +++ b/torchvision/csrc/io/decoder/memory_buffer.h @@ -0,0 +1,25 @@ +#pragma once + +#include "defs.h" + +namespace ffmpeg { + +/** + * Class uses external memory buffer and implements a seekable interface. + */ +class MemoryBuffer { + public: + explicit MemoryBuffer(const uint8_t* buffer, size_t size); + int64_t seek(int64_t offset, int whence); + int read(uint8_t* buf, int size); + + // static constructor for decoder callback. + static DecoderInCallback getCallback(const uint8_t* buffer, size_t size); + + private: + const uint8_t* buffer_; // set at construction time + long pos_{0}; // current position + long len_{0}; // bytes in buffer +}; + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/seekable_buffer.cpp b/torchvision/csrc/io/decoder/seekable_buffer.cpp new file mode 100644 index 00000000000..41e3e689c7b --- /dev/null +++ b/torchvision/csrc/io/decoder/seekable_buffer.cpp @@ -0,0 +1,139 @@ +#include "seekable_buffer.h" +#include +#include +#include "memory_buffer.h" + +namespace ffmpeg { + +int SeekableBuffer::init( + DecoderInCallback&& in, + uint64_t timeoutMs, + size_t maxSeekableBytes, + ImageType* type) { + shutdown(); + isSeekable_ = in(nullptr, 0, 0, 0) == 0; + if (isSeekable_) { // seekable + if (type) { + if (!readBytes(in, 8, timeoutMs)) { + return -1; + } + setImageType(type); + end_ = 0; + eof_ = false; + std::vector().swap(buffer_); + // reset callback + if (in(nullptr, 0, SEEK_SET, timeoutMs)) { + return -1; + } + } + inCallback_ = std::forward(in); + return 1; + } + + if (!readBytes(in, maxSeekableBytes + (type ? 8 : 0), timeoutMs)) { + return -1; + } + + if (type) { + setImageType(type); + } + + if (eof_) { + end_ = 0; + eof_ = false; + // reuse MemoryBuffer functionality + inCallback_ = MemoryBuffer::getCallback(buffer_.data(), buffer_.size()); + isSeekable_ = true; + return 1; + } + inCallback_ = std::forward(in); + return 0; +} + +bool SeekableBuffer::readBytes( + DecoderInCallback& in, + size_t maxBytes, + uint64_t timeoutMs) { + // Resize to th minimum 4K page or less + buffer_.resize(std::min(maxBytes, size_t(4 * 1024UL))); + end_ = 0; + eof_ = false; + + auto end = + std::chrono::steady_clock::now() + std::chrono::milliseconds(timeoutMs); + auto watcher = [end]() -> bool { + return std::chrono::steady_clock::now() <= end; + }; + + bool hasTime = true; + while (!eof_ && end_ < maxBytes && (hasTime = watcher())) { + // lets read all bytes into available buffer + auto res = in(buffer_.data() + end_, buffer_.size() - end_, 0, timeoutMs); + if (res > 0) { + end_ += res; + if (end_ == buffer_.size()) { + buffer_.resize(std::min(size_t(end_ * 4UL), maxBytes)); + } + } else if (res == 0) { + eof_ = true; + } else { + // error + return false; + } + } + + buffer_.resize(end_); + + return hasTime; +} + +void SeekableBuffer::setImageType(ImageType* type) { + if (buffer_.size() > 2 && buffer_[0] == 0xFF && buffer_[1] == 0xD8 && + buffer_[2] == 0xFF) { + *type = ImageType::JPEG; + } else if ( + buffer_.size() > 3 && buffer_[1] == 'P' && buffer_[2] == 'N' && + buffer_[3] == 'G') { + *type = ImageType::PNG; + } else if ( + buffer_.size() > 1 && + ((buffer_[0] == 0x49 && buffer_[1] == 0x49) || + (buffer_[0] == 0x4D && buffer_[1] == 0x4D))) { + *type = ImageType::TIFF; + } else { + *type = ImageType::UNKNOWN; + } +} + +int SeekableBuffer::read(uint8_t* buf, int size, uint64_t timeoutMs) { + if (isSeekable_) { + return inCallback_(buf, size, 0, timeoutMs); + } + if (pos_ < end_) { + // read cached bytes for non-seekable callback + auto available = std::min(int(end_ - pos_), size); + memcpy(buf, buffer_.data() + pos_, available); + pos_ += available; + return available; + } else if (!eof_) { + // normal sequential read (see defs.h file), i.e. @buf != null + auto res = inCallback_(buf, size, 0, timeoutMs); // read through + eof_ = res == 0; + return res; + } else { + return 0; + } +} + +int64_t SeekableBuffer::seek(int64_t offset, int whence, uint64_t timeoutMs) { + return inCallback_(nullptr, offset, whence, timeoutMs); +} + +void SeekableBuffer::shutdown() { + pos_ = end_ = 0; + eof_ = false; + std::vector().swap(buffer_); + inCallback_ = nullptr; +} + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/seekable_buffer.h b/torchvision/csrc/io/decoder/seekable_buffer.h new file mode 100644 index 00000000000..9d5729f5306 --- /dev/null +++ b/torchvision/csrc/io/decoder/seekable_buffer.h @@ -0,0 +1,45 @@ +#pragma once + +#include "defs.h" + +namespace ffmpeg { + +/** + * Class uses internal buffer to store initial size bytes as a seekable cache + * from Media provider and let ffmpeg to seek and read bytes from cache + * and beyond - reading bytes directly from Media provider + */ +enum class ImageType { + UNKNOWN = 0, + JPEG = 1, + PNG = 2, + TIFF = 3, +}; + +class SeekableBuffer { + public: + // @type is optional, not nullptr only is image detection required + // \returns 1 is buffer seekable, 0 - if not seekable, < 0 on error + int init( + DecoderInCallback&& in, + uint64_t timeoutMs, + size_t maxSeekableBytes, + ImageType* type); + int read(uint8_t* buf, int size, uint64_t timeoutMs); + int64_t seek(int64_t offset, int whence, uint64_t timeoutMs); + void shutdown(); + + private: + bool readBytes(DecoderInCallback& in, size_t maxBytes, uint64_t timeoutMs); + void setImageType(ImageType* type); + + private: + DecoderInCallback inCallback_; + std::vector buffer_; // resized at init time + long pos_{0}; // current position (SEEK_CUR iff pos_ < end_) + long end_{0}; // current buffer size + bool eof_{0}; // indicates the EOF + bool isSeekable_{false}; // is callback seekable +}; + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/stream.cpp b/torchvision/csrc/io/decoder/stream.cpp new file mode 100644 index 00000000000..8c914050587 --- /dev/null +++ b/torchvision/csrc/io/decoder/stream.cpp @@ -0,0 +1,289 @@ +#include "stream.h" +#include +#include +#include +#include "util.h" + +namespace ffmpeg { +const AVRational timeBaseQ = AVRational{1, AV_TIME_BASE}; + +Stream::Stream( + AVFormatContext* inputCtx, + MediaFormat format, + bool convertPtsToWallTime, + int64_t loggingUuid) + : inputCtx_(inputCtx), + format_(format), + convertPtsToWallTime_(convertPtsToWallTime), + loggingUuid_(loggingUuid) {} + +Stream::~Stream() { + if (frame_) { + av_free(frame_); + } + if (codecCtx_) { + avcodec_free_context(&codecCtx_); + } +} + +// look up the proper CODEC querying the function +AVCodec* Stream::findCodec(AVCodecParameters* params) { + return (AVCodec*)avcodec_find_decoder(params->codec_id); +} + +// Allocate memory for the AVCodecContext, which will hold the context for +// decode/encode process. Then fill this codec context with CODEC parameters +// defined in stream parameters. Open the codec, and allocate the global frame +// defined in the header file +int Stream::openCodec(std::vector* metadata, int num_threads) { + AVStream* steam = inputCtx_->streams[format_.stream]; + + AVCodec* codec = findCodec(steam->codecpar); + if (!codec) { + LOG(ERROR) << "LoggingUuid #" << loggingUuid_ + << ", avcodec_find_decoder failed for codec_id: " + << int(steam->codecpar->codec_id); + return AVERROR(EINVAL); + } + + if (!(codecCtx_ = avcodec_alloc_context3(codec))) { + LOG(ERROR) << "LoggingUuid #" << loggingUuid_ + << ", avcodec_alloc_context3 failed"; + return AVERROR(ENOMEM); + } + // multithreading heuristics + // if user defined, + if (num_threads > max_threads) { + num_threads = max_threads; + } + + if (num_threads > 0) { + // if user defined, respect that + // note that default thread_type will be used + codecCtx_->thread_count = num_threads; + } else { + // otherwise set sensible defaults + codecCtx_->thread_count = 8; + codecCtx_->thread_type = FF_THREAD_SLICE; + } + + int ret; + // Copy codec parameters from input stream to output codec context + if ((ret = avcodec_parameters_to_context(codecCtx_, steam->codecpar)) < 0) { + LOG(ERROR) << "LoggingUuid #" << loggingUuid_ + << ", avcodec_parameters_to_context failed"; + return ret; + } + + // after avcodec_open2, value of codecCtx_->time_base is NOT meaningful + if ((ret = avcodec_open2(codecCtx_, codec, nullptr)) < 0) { + LOG(ERROR) << "LoggingUuid #" << loggingUuid_ + << ", avcodec_open2 failed: " << Util::generateErrorDesc(ret); + avcodec_free_context(&codecCtx_); + codecCtx_ = nullptr; + return ret; + } + + frame_ = av_frame_alloc(); + + switch (format_.type) { + case TYPE_VIDEO: + fps_ = av_q2d(av_guess_frame_rate(inputCtx_, steam, nullptr)); + break; + case TYPE_AUDIO: + fps_ = codecCtx_->sample_rate; + break; + default: + fps_ = 30.0; + } + + if ((ret = initFormat())) { + LOG(ERROR) << "initFormat failed, type: " << format_.type; + } + + if (metadata) { + DecoderMetadata header; + header.format = format_; + header.fps = fps_; + header.num = steam->time_base.num; + header.den = steam->time_base.den; + header.duration = + av_rescale_q(steam->duration, steam->time_base, timeBaseQ); + metadata->push_back(header); + } + + return ret; +} + +// send the raw data packet (compressed frame) to the decoder, through the codec +// context and receive the raw data frame (uncompressed frame) from the +// decoder, through the same codec context +int Stream::analyzePacket(const AVPacket* packet, bool* gotFrame) { + int consumed = 0; + int result = avcodec_send_packet(codecCtx_, packet); + if (result == AVERROR(EAGAIN)) { + *gotFrame = false; // no bytes get consumed, fetch frame + } else if (result == AVERROR_EOF) { + *gotFrame = false; // more than one flush packet + if (packet) { + // got packet after flush, this is an error + return result; + } + } else if (result < 0) { + LOG(ERROR) << "avcodec_send_packet failed, err: " + << Util::generateErrorDesc(result); + return result; // error + } else { + consumed = packet ? packet->size : 0; // all bytes get consumed + } + + result = avcodec_receive_frame(codecCtx_, frame_); + + if (result >= 0) { + *gotFrame = true; // frame is available + } else if (result == AVERROR(EAGAIN)) { + *gotFrame = false; // no frames at this time, needs more packets + if (!consumed) { + // precaution, if no packages got consumed and no frames are available + return result; + } + } else if (result == AVERROR_EOF) { + *gotFrame = false; // the last frame has been flushed + // precaution, if no more frames are available assume we consume all bytes + consumed = 0; + } else { // error + LOG(ERROR) << "avcodec_receive_frame failed, err: " + << Util::generateErrorDesc(result); + return result; + } + return consumed; +} + +// General decoding function: +// given the packet, analyse the metadata, and write the +// metadata and the buffer to the DecoderOutputImage. +int Stream::decodePacket( + const AVPacket* packet, + DecoderOutputMessage* out, + bool headerOnly, + bool* hasMsg) { + int consumed; + bool gotFrame = false; + *hasMsg = false; + if ((consumed = analyzePacket(packet, &gotFrame)) >= 0 && + (packet == nullptr || gotFrame)) { + int result; + if ((result = getMessage(out, !gotFrame, headerOnly)) < 0) { + return result; // report error + } + *hasMsg = result > 0; + } + return consumed; +} + +int Stream::flush(DecoderOutputMessage* out, bool headerOnly) { + bool hasMsg = false; + int result = decodePacket(nullptr, out, headerOnly, &hasMsg); + if (result < 0) { + avcodec_flush_buffers(codecCtx_); + return result; + } + if (!hasMsg) { + avcodec_flush_buffers(codecCtx_); + return 0; + } + return 1; +} + +// Sets the header and payload via stream::setHeader and copyFrameBytes +// functions that are defined in type stream subclass (VideoStream, AudioStream, +// ...) +int Stream::getMessage(DecoderOutputMessage* out, bool flush, bool headerOnly) { + if (flush) { + // only flush of audio frames makes sense + if (format_.type == TYPE_AUDIO) { + int processed = 0; + size_t total = 0; + // grab all audio bytes by chunks + do { + if ((processed = copyFrameBytes(out->payload.get(), flush)) < 0) { + return processed; + } + total += processed; + } while (processed); + + if (total) { + // set header if message bytes are available + setHeader(&out->header, flush); + return 1; + } + } + return 0; + } else { + if (format_.type == TYPE_AUDIO) { + int processed = 0; + if ((processed = copyFrameBytes(out->payload.get(), flush)) < 0) { + return processed; + } + if (processed) { + // set header if message bytes are available + setHeader(&out->header, flush); + return 1; + } + return 0; + } else { + // set header + setHeader(&out->header, flush); + + if (headerOnly) { + // Only header is requisted + return 1; + } + + return copyFrameBytes(out->payload.get(), flush); + } + } +} + +void Stream::setHeader(DecoderHeader* header, bool flush) { + header->seqno = numGenerator_++; + + setFramePts(header, flush); + + if (convertPtsToWallTime_) { + keeper_.adjust(header->pts); + } + + header->format = format_; + header->keyFrame = 0; + header->fps = std::numeric_limits::quiet_NaN(); +} + +void Stream::setFramePts(DecoderHeader* header, bool flush) { + if (flush) { + header->pts = nextPts_; // already in us + } else { + header->pts = frame_->best_effort_timestamp; + if (header->pts == AV_NOPTS_VALUE) { + header->pts = nextPts_; + } else { + header->pts = av_rescale_q( + header->pts, + inputCtx_->streams[format_.stream]->time_base, + timeBaseQ); + } + + switch (format_.type) { + case TYPE_AUDIO: + nextPts_ = header->pts + frame_->nb_samples * AV_TIME_BASE / fps_; + break; + case TYPE_VIDEO: + nextPts_ = header->pts + AV_TIME_BASE / fps_; + break; + default: + nextPts_ = header->pts; + } + } +} + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/stream.h b/torchvision/csrc/io/decoder/stream.h new file mode 100644 index 00000000000..6250dd9ecd2 --- /dev/null +++ b/torchvision/csrc/io/decoder/stream.h @@ -0,0 +1,80 @@ +#pragma once + +#include +#include "defs.h" +#include "time_keeper.h" + +namespace ffmpeg { + +/** + * Class uses FFMPEG library to decode one media stream (audio or video). + */ + +class Stream { + public: + Stream( + AVFormatContext* inputCtx, + MediaFormat format, + bool convertPtsToWallTime, + int64_t loggingUuid); + virtual ~Stream(); + + // returns 0 - on success or negative error + // num_threads sets up the codec context for multithreading if needed + // default is set to single thread in order to not break BC + int openCodec(std::vector* metadata, int num_threads = 1); + // returns 1 - if packet got consumed, 0 - if it's not, and < 0 on error + int decodePacket( + const AVPacket* packet, + DecoderOutputMessage* out, + bool headerOnly, + bool* hasMsg); + // returns stream index + int getIndex() const { + return format_.stream; + } + // returns 1 - if message got a payload, 0 - if it's not, and < 0 on error + int flush(DecoderOutputMessage* out, bool headerOnly); + // return media format + MediaFormat getMediaFormat() const { + return format_; + } + + protected: + virtual int initFormat() = 0; + // returns number processed bytes from packet, or negative error + virtual int analyzePacket(const AVPacket* packet, bool* gotFrame); + // returns number processed bytes from packet, or negative error + virtual int copyFrameBytes(ByteStorage* out, bool flush) = 0; + // sets output format + virtual void setHeader(DecoderHeader* header, bool flush); + // set frame pts + virtual void setFramePts(DecoderHeader* header, bool flush); + // finds codec + virtual AVCodec* findCodec(AVCodecParameters* params); + + private: + // returns 1 - if message got a payload, 0 - if it's not, and < 0 on error + int getMessage(DecoderOutputMessage* out, bool flush, bool headerOnly); + + protected: + AVFormatContext* const inputCtx_; + MediaFormat format_; + const bool convertPtsToWallTime_; + int64_t loggingUuid_; + + AVCodecContext* codecCtx_{nullptr}; + AVFrame* frame_{nullptr}; + + std::atomic numGenerator_{0}; + TimeKeeper keeper_; + // estimated next frame pts for flushing the last frame + int64_t nextPts_{0}; + double fps_{30.}; + // this is a dumb conservative limit; ideally we'd use + // int max_threads = at::get_num_threads(); but this would cause + // fb sync to fail as it would add dependency to ATen to the decoder API + const int max_threads = 12; +}; + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/subtitle_sampler.cpp b/torchvision/csrc/io/decoder/subtitle_sampler.cpp new file mode 100644 index 00000000000..d0df24d3e35 --- /dev/null +++ b/torchvision/csrc/io/decoder/subtitle_sampler.cpp @@ -0,0 +1,46 @@ +#include "subtitle_sampler.h" +#include +#include "util.h" + +namespace ffmpeg { + +SubtitleSampler::~SubtitleSampler() { + cleanUp(); +} + +void SubtitleSampler::shutdown() { + cleanUp(); +} + +bool SubtitleSampler::init(const SamplerParameters& params) { + cleanUp(); + // set formats + params_ = params; + return true; +} + +int SubtitleSampler::sample(AVSubtitle* sub, ByteStorage* out) { + if (!sub || !out) { + return 0; // flush + } + + out->ensure(Util::size(*sub)); + + return Util::serialize(*sub, out); +} + +int SubtitleSampler::sample(const ByteStorage* in, ByteStorage* out) { + if (in && out) { + // Get a writable copy + if (size_t len = in->length()) { + out->ensure(len); + memcpy(out->writableTail(), in->data(), len); + } + return out->length(); + } + return 0; +} + +void SubtitleSampler::cleanUp() {} + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/subtitle_sampler.h b/torchvision/csrc/io/decoder/subtitle_sampler.h new file mode 100644 index 00000000000..4aee811ed56 --- /dev/null +++ b/torchvision/csrc/io/decoder/subtitle_sampler.h @@ -0,0 +1,32 @@ +#pragma once + +#include "defs.h" + +namespace ffmpeg { + +/** + * Class transcode audio frames from one format into another + */ + +class SubtitleSampler : public MediaSampler { + public: + SubtitleSampler() = default; + ~SubtitleSampler() override; + + bool init(const SamplerParameters& params) override; + int sample(const ByteStorage* in, ByteStorage* out) override; + void shutdown() override; + + // returns number processed/scaling bytes + int sample(AVSubtitle* sub, ByteStorage* out); + + // helper serialization/deserialization methods + static void serialize(const AVSubtitle& sub, ByteStorage* out); + static bool deserialize(const ByteStorage& buf, AVSubtitle* sub); + + private: + // close resources + void cleanUp(); +}; + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/subtitle_stream.cpp b/torchvision/csrc/io/decoder/subtitle_stream.cpp new file mode 100644 index 00000000000..27c61d4dbd9 --- /dev/null +++ b/torchvision/csrc/io/decoder/subtitle_stream.cpp @@ -0,0 +1,97 @@ +#include "subtitle_stream.h" +#include +#include +#include "util.h" + +namespace ffmpeg { +const AVRational timeBaseQ = AVRational{1, AV_TIME_BASE}; + +SubtitleStream::SubtitleStream( + AVFormatContext* inputCtx, + int index, + bool convertPtsToWallTime, + const SubtitleFormat& format) + : Stream( + inputCtx, + MediaFormat::makeMediaFormat(format, index), + convertPtsToWallTime, + 0) { + memset(&sub_, 0, sizeof(sub_)); +} + +void SubtitleStream::releaseSubtitle() { + if (sub_.release) { + avsubtitle_free(&sub_); + memset(&sub_, 0, sizeof(sub_)); + } +} + +SubtitleStream::~SubtitleStream() { + releaseSubtitle(); + sampler_.shutdown(); +} + +int SubtitleStream::initFormat() { + if (!codecCtx_->subtitle_header) { + LOG(ERROR) << "No subtitle header found"; + } else { + VLOG(1) << "Subtitle header found!"; + } + return 0; +} + +int SubtitleStream::analyzePacket(const AVPacket* packet, bool* gotFrame) { + // clean-up + releaseSubtitle(); + + // FIXME: should this even be created? + AVPacket* avPacket; + avPacket = av_packet_alloc(); + if (avPacket == nullptr) { + LOG(ERROR) + << "decoder as not able to allocate the subtitle-specific packet."; + // alternative to ENOMEM + return AVERROR_BUFFER_TOO_SMALL; + } + avPacket->data = nullptr; + avPacket->size = 0; + // check flush packet + auto pkt = packet ? packet : avPacket; + + int gotFramePtr = 0; + // is these a better way than cast from const? + int result = + avcodec_decode_subtitle2(codecCtx_, &sub_, &gotFramePtr, (AVPacket*)pkt); + + if (result < 0) { + LOG(ERROR) << "avcodec_decode_subtitle2 failed, err: " + << Util::generateErrorDesc(result); + // free the packet we've created + av_packet_free(&avPacket); + return result; + } else if (result == 0) { + result = pkt->size; // discard the rest of the package + } + + sub_.release = gotFramePtr; + *gotFrame = gotFramePtr > 0; + + // set proper pts in us + if (gotFramePtr) { + sub_.pts = av_rescale_q( + pkt->pts, inputCtx_->streams[format_.stream]->time_base, timeBaseQ); + } + + av_packet_free(&avPacket); + return result; +} + +int SubtitleStream::copyFrameBytes(ByteStorage* out, bool flush) { + return sampler_.sample(flush ? nullptr : &sub_, out); +} + +void SubtitleStream::setFramePts(DecoderHeader* header, bool) { + header->pts = sub_.pts; // already in us +} + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/subtitle_stream.h b/torchvision/csrc/io/decoder/subtitle_stream.h new file mode 100644 index 00000000000..6c366e11f50 --- /dev/null +++ b/torchvision/csrc/io/decoder/subtitle_stream.h @@ -0,0 +1,38 @@ +#pragma once + +#include "stream.h" +#include "subtitle_sampler.h" + +namespace ffmpeg { + +/** + * Class uses FFMPEG library to decode one subtitle stream. + */ +struct AVSubtitleKeeper : AVSubtitle { + int64_t release{0}; +}; + +class SubtitleStream : public Stream { + public: + SubtitleStream( + AVFormatContext* inputCtx, + int index, + bool convertPtsToWallTime, + const SubtitleFormat& format); + ~SubtitleStream() override; + + protected: + void setFramePts(DecoderHeader* header, bool flush) override; + + private: + int initFormat() override; + int analyzePacket(const AVPacket* packet, bool* gotFrame) override; + int copyFrameBytes(ByteStorage* out, bool flush) override; + void releaseSubtitle(); + + private: + SubtitleSampler sampler_; + AVSubtitleKeeper sub_; +}; + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/sync_decoder.cpp b/torchvision/csrc/io/decoder/sync_decoder.cpp new file mode 100644 index 00000000000..1f03ef8eb95 --- /dev/null +++ b/torchvision/csrc/io/decoder/sync_decoder.cpp @@ -0,0 +1,97 @@ +#include "sync_decoder.h" +#include + +namespace ffmpeg { + +SyncDecoder::AVByteStorage::AVByteStorage(size_t n) { + ensure(n); +} + +SyncDecoder::AVByteStorage::~AVByteStorage() { + av_free(buffer_); +} + +void SyncDecoder::AVByteStorage::ensure(size_t n) { + if (tail() < n) { + capacity_ = offset_ + length_ + n; + buffer_ = static_cast(av_realloc(buffer_, capacity_)); + } +} + +uint8_t* SyncDecoder::AVByteStorage::writableTail() { + TORCH_CHECK_LE(offset_ + length_, capacity_); + return buffer_ + offset_ + length_; +} + +void SyncDecoder::AVByteStorage::append(size_t n) { + TORCH_CHECK_LE(n, tail()); + length_ += n; +} + +void SyncDecoder::AVByteStorage::trim(size_t n) { + TORCH_CHECK_LE(n, length_); + offset_ += n; + length_ -= n; +} + +const uint8_t* SyncDecoder::AVByteStorage::data() const { + return buffer_ + offset_; +} + +size_t SyncDecoder::AVByteStorage::length() const { + return length_; +} + +size_t SyncDecoder::AVByteStorage::tail() const { + TORCH_CHECK_LE(offset_ + length_, capacity_); + return capacity_ - offset_ - length_; +} + +void SyncDecoder::AVByteStorage::clear() { + offset_ = 0; + length_ = 0; +} + +std::unique_ptr SyncDecoder::createByteStorage(size_t n) { + return std::make_unique(n); +} + +void SyncDecoder::onInit() { + eof_ = false; + queue_.clear(); +} + +int SyncDecoder::decode(DecoderOutputMessage* out, uint64_t timeoutMs) { + if (eof_ && queue_.empty()) { + return ENODATA; + } + + if (queue_.empty()) { + int result = getFrame(timeoutMs); + // assign EOF + eof_ = result == ENODATA; + // check unrecoverable error, any error but ENODATA + if (result && result != ENODATA) { + return result; + } + + // still empty + if (queue_.empty()) { + if (eof_) { + return ENODATA; + } else { + LOG(INFO) << "Queue is empty"; + return ETIMEDOUT; + } + } + } + + *out = std::move(queue_.front()); + queue_.pop_front(); + return 0; +} + +void SyncDecoder::push(DecoderOutputMessage&& buffer) { + queue_.push_back(std::move(buffer)); +} +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/sync_decoder.h b/torchvision/csrc/io/decoder/sync_decoder.h new file mode 100644 index 00000000000..b7cf7b625ac --- /dev/null +++ b/torchvision/csrc/io/decoder/sync_decoder.h @@ -0,0 +1,48 @@ +#pragma once + +#include +#include "decoder.h" + +namespace ffmpeg { + +/** + * Class uses FFMPEG library to decode media streams. + * Media bytes can be explicitly provided through read-callback + * or fetched internally by FFMPEG library + */ +class SyncDecoder : public Decoder { + public: + // Allocation of memory must be done with a proper alignment. + class AVByteStorage : public ByteStorage { + public: + explicit AVByteStorage(size_t n); + ~AVByteStorage() override; + void ensure(size_t n) override; + uint8_t* writableTail() override; + void append(size_t n) override; + void trim(size_t n) override; + const uint8_t* data() const override; + size_t length() const override; + size_t tail() const override; + void clear() override; + + private: + size_t offset_{0}; + size_t length_{0}; + size_t capacity_{0}; + uint8_t* buffer_{nullptr}; + }; + + public: + int decode(DecoderOutputMessage* out, uint64_t timeoutMs) override; + + private: + void push(DecoderOutputMessage&& buffer) override; + void onInit() override; + std::unique_ptr createByteStorage(size_t n) override; + + private: + std::list queue_; + bool eof_{false}; +}; +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/sync_decoder_test.cpp b/torchvision/csrc/io/decoder/sync_decoder_test.cpp new file mode 100644 index 00000000000..085966ce687 --- /dev/null +++ b/torchvision/csrc/io/decoder/sync_decoder_test.cpp @@ -0,0 +1,416 @@ +#include +#include +#include +#include "memory_buffer.h" +#include "sync_decoder.h" +#include "util.h" + +using namespace ffmpeg; + +namespace { +struct VideoFileStats { + std::string name; + size_t durationPts{0}; + int num{0}; + int den{0}; + int fps{0}; +}; + +void gotAllTestFiles( + const std::string& folder, + std::vector* stats) { + DIR* d = opendir(folder.c_str()); + CHECK(d); + struct dirent* dir; + while ((dir = readdir(d))) { + if (dir->d_type != DT_DIR && 0 != strcmp(dir->d_name, "README")) { + VideoFileStats item; + item.name = folder + '/' + dir->d_name; + LOG(INFO) << "Found video file: " << item.name; + stats->push_back(std::move(item)); + } + } + closedir(d); +} + +void gotFilesStats(std::vector& stats) { + DecoderParameters params; + params.timeoutMs = 10000; + params.startOffset = 1000000; + params.seekAccuracy = 100000; + params.formats = {MediaFormat(0)}; + params.headerOnly = true; + params.preventStaleness = false; + size_t avgProvUs = 0; + const size_t rounds = 100; + for (auto& item : stats) { + LOG(INFO) << "Decoding video file in memory: " << item.name; + FILE* f = fopen(item.name.c_str(), "rb"); + CHECK(f != nullptr); + fseek(f, 0, SEEK_END); + std::vector buffer(ftell(f)); + rewind(f); + size_t s = fread(buffer.data(), 1, buffer.size(), f); + TORCH_CHECK_EQ(buffer.size(), s); + fclose(f); + + for (size_t i = 0; i < rounds; ++i) { + SyncDecoder decoder; + std::vector metadata; + const auto now = std::chrono::steady_clock::now(); + CHECK(decoder.init( + params, + MemoryBuffer::getCallback(buffer.data(), buffer.size()), + &metadata)); + const auto then = std::chrono::steady_clock::now(); + decoder.shutdown(); + avgProvUs += + std::chrono::duration_cast(then - now) + .count(); + TORCH_CHECK_EQ(metadata.size(), 1); + item.num = metadata[0].num; + item.den = metadata[0].den; + item.fps = metadata[0].fps; + item.durationPts = + av_rescale_q(metadata[0].duration, AV_TIME_BASE_Q, {1, item.fps}); + } + } + LOG(INFO) << "Probing (us) " << avgProvUs / stats.size() / rounds; +} + +size_t measurePerformanceUs( + const std::vector& stats, + size_t rounds, + size_t num, + size_t stride) { + size_t avgClipDecodingUs = 0; + std::srand(time(nullptr)); + for (const auto& item : stats) { + FILE* f = fopen(item.name.c_str(), "rb"); + CHECK(f != nullptr); + fseek(f, 0, SEEK_END); + std::vector buffer(ftell(f)); + rewind(f); + size_t s = fread(buffer.data(), 1, buffer.size(), f); + TORCH_CHECK_EQ(buffer.size(), s); + fclose(f); + + for (size_t i = 0; i < rounds; ++i) { + // randomy select clip + size_t rOffset = std::rand(); + size_t fOffset = rOffset % item.durationPts; + size_t clipFrames = num + (num - 1) * stride; + if (fOffset + clipFrames > item.durationPts) { + fOffset = item.durationPts - clipFrames; + } + + DecoderParameters params; + params.timeoutMs = 10000; + params.startOffset = 1000000; + params.seekAccuracy = 100000; + params.preventStaleness = false; + + for (size_t n = 0; n < num; ++n) { + std::list msgs; + + params.startOffset = + av_rescale_q(fOffset, {1, item.fps}, AV_TIME_BASE_Q); + params.endOffset = params.startOffset + 100; + + auto now = std::chrono::steady_clock::now(); + SyncDecoder decoder; + CHECK(decoder.init( + params, + MemoryBuffer::getCallback(buffer.data(), buffer.size()), + nullptr)); + DecoderOutputMessage out; + while (0 == decoder.decode(&out, params.timeoutMs)) { + msgs.push_back(std::move(out)); + } + + decoder.shutdown(); + + const auto then = std::chrono::steady_clock::now(); + + fOffset += 1 + stride; + + avgClipDecodingUs += + std::chrono::duration_cast(then - now) + .count(); + } + } + } + + return avgClipDecodingUs / rounds / num / stats.size(); +} + +void runDecoder(SyncDecoder& decoder) { + DecoderOutputMessage out; + size_t audioFrames = 0, videoFrames = 0, totalBytes = 0; + while (0 == decoder.decode(&out, 10000)) { + if (out.header.format.type == TYPE_AUDIO) { + ++audioFrames; + } else if (out.header.format.type == TYPE_VIDEO) { + ++videoFrames; + } else if (out.header.format.type == TYPE_SUBTITLE && out.payload) { + // deserialize + LOG(INFO) << "Deserializing subtitle"; + AVSubtitle sub; + memset(&sub, 0, sizeof(sub)); + EXPECT_TRUE(Util::deserialize(*out.payload, &sub)); + LOG(INFO) << "Found subtitles" << ", num rects: " << sub.num_rects; + for (int i = 0; i < sub.num_rects; ++i) { + std::string text = "picture"; + if (sub.rects[i]->type == SUBTITLE_TEXT) { + text = sub.rects[i]->text; + } else if (sub.rects[i]->type == SUBTITLE_ASS) { + text = sub.rects[i]->ass; + } + + LOG(INFO) << "Rect num: " << i << ", type:" << sub.rects[i]->type + << ", text: " << text; + } + + avsubtitle_free(&sub); + } + if (out.payload) { + totalBytes += out.payload->length(); + } + } + LOG(INFO) << "Decoded audio frames: " << audioFrames + << ", video frames: " << videoFrames + << ", total bytes: " << totalBytes; +} +} // namespace + +TEST(SyncDecoder, TestSyncDecoderPerformance) { + // Measure the average time of decoding per clip + // 1. list of the videos in testing directory + // 2. for each video got number of frames with timestamps + // 3. randomly select frame offset + // 4. adjust offset for number frames and strides, + // if it's out out upper boundary + // 5. repeat multiple times, measuring and accumulating decoding time + // per clip. + /* + 1) 4 x 2 + 2) 8 x 8 + 3) 16 x 8 + 4) 32 x 4 + */ + const std::string kFolder = "pytorch/vision/test/assets/videos"; + std::vector stats; + gotAllTestFiles(kFolder, &stats); + gotFilesStats(stats); + + const size_t kRounds = 10; + + auto new4x2 = measurePerformanceUs(stats, kRounds, 4, 2); + auto new8x8 = measurePerformanceUs(stats, kRounds, 8, 8); + auto new16x8 = measurePerformanceUs(stats, kRounds, 16, 8); + auto new32x4 = measurePerformanceUs(stats, kRounds, 32, 4); + LOG(INFO) << "Clip decoding (us)" << ", new(4x2): " << new4x2 + << ", new(8x8): " << new8x8 << ", new(16x8): " << new16x8 + << ", new(32x4): " << new32x4; +} + +TEST(SyncDecoder, Test) { + SyncDecoder decoder; + DecoderParameters params; + params.timeoutMs = 10000; + params.startOffset = 1000000; + params.seekAccuracy = 100000; + params.formats = {MediaFormat(), MediaFormat(0), MediaFormat('0')}; + params.uri = "pytorch/vision/test/assets/videos/R6llTwEh07w.mp4"; + CHECK(decoder.init(params, nullptr, nullptr)); + runDecoder(decoder); + decoder.shutdown(); +} + +TEST(SyncDecoder, TestSubtitles) { + SyncDecoder decoder; + DecoderParameters params; + params.timeoutMs = 10000; + params.formats = {MediaFormat(), MediaFormat(0), MediaFormat('0')}; + params.uri = "vue/synergy/data/robotsub.mp4"; + CHECK(decoder.init(params, nullptr, nullptr)); + runDecoder(decoder); + decoder.shutdown(); +} + +TEST(SyncDecoder, TestHeadersOnly) { + SyncDecoder decoder; + DecoderParameters params; + params.timeoutMs = 10000; + params.startOffset = 1000000; + params.seekAccuracy = 100000; + params.headerOnly = true; + params.formats = {MediaFormat(), MediaFormat(0), MediaFormat('0')}; + + params.uri = "pytorch/vision/test/assets/videos/R6llTwEh07w.mp4"; + CHECK(decoder.init(params, nullptr, nullptr)); + runDecoder(decoder); + decoder.shutdown(); + + params.uri = "pytorch/vision/test/assets/videos/SOX5yA1l24A.mp4"; + CHECK(decoder.init(params, nullptr, nullptr)); + runDecoder(decoder); + decoder.shutdown(); + + params.uri = "pytorch/vision/test/assets/videos/WUzgd7C1pWA.mp4"; + CHECK(decoder.init(params, nullptr, nullptr)); + runDecoder(decoder); + decoder.shutdown(); +} + +TEST(SyncDecoder, TestHeadersOnlyDownSampling) { + SyncDecoder decoder; + DecoderParameters params; + params.timeoutMs = 10000; + params.startOffset = 1000000; + params.seekAccuracy = 100000; + params.headerOnly = true; + MediaFormat format; + format.type = TYPE_AUDIO; + format.format.audio.samples = 8000; + params.formats.insert(format); + + format.type = TYPE_VIDEO; + format.format.video.width = 224; + format.format.video.height = 224; + params.formats.insert(format); + + params.uri = "pytorch/vision/test/assets/videos/R6llTwEh07w.mp4"; + CHECK(decoder.init(params, nullptr, nullptr)); + runDecoder(decoder); + decoder.shutdown(); + + params.uri = "pytorch/vision/test/assets/videos/SOX5yA1l24A.mp4"; + CHECK(decoder.init(params, nullptr, nullptr)); + runDecoder(decoder); + decoder.shutdown(); + + params.uri = "pytorch/vision/test/assets/videos/WUzgd7C1pWA.mp4"; + CHECK(decoder.init(params, nullptr, nullptr)); + runDecoder(decoder); + decoder.shutdown(); +} + +TEST(SyncDecoder, TestInitOnlyNoShutdown) { + SyncDecoder decoder; + DecoderParameters params; + params.timeoutMs = 10000; + params.startOffset = 1000000; + params.seekAccuracy = 100000; + params.headerOnly = false; + params.formats = {MediaFormat(), MediaFormat(0), MediaFormat('0')}; + params.uri = "pytorch/vision/test/assets/videos/R6llTwEh07w.mp4"; + std::vector metadata; + CHECK(decoder.init(params, nullptr, &metadata)); +} + +TEST(SyncDecoder, TestMemoryBuffer) { + SyncDecoder decoder; + DecoderParameters params; + params.timeoutMs = 10000; + params.startOffset = 1000000; + params.endOffset = 9000000; + params.seekAccuracy = 10000; + params.formats = {MediaFormat(), MediaFormat(0), MediaFormat('0')}; + + FILE* f = fopen( + "pytorch/vision/test/assets/videos/RATRACE_wave_f_nm_np1_fr_goo_37.avi", + "rb"); + CHECK(f != nullptr); + fseek(f, 0, SEEK_END); + std::vector buffer(ftell(f)); + rewind(f); + size_t s = fread(buffer.data(), 1, buffer.size(), f); + TORCH_CHECK_EQ(buffer.size(), s); + fclose(f); + CHECK(decoder.init( + params, + MemoryBuffer::getCallback(buffer.data(), buffer.size()), + nullptr)); + LOG(INFO) << "Decoding from memory bytes: " << buffer.size(); + runDecoder(decoder); + decoder.shutdown(); +} + +TEST(SyncDecoder, TestMemoryBufferNoSeekableWithFullRead) { + SyncDecoder decoder; + DecoderParameters params; + params.timeoutMs = 10000; + params.startOffset = 1000000; + params.endOffset = 9000000; + params.seekAccuracy = 10000; + params.formats = {MediaFormat(), MediaFormat(0), MediaFormat('0')}; + + FILE* f = fopen("pytorch/vision/test/assets/videos/R6llTwEh07w.mp4", "rb"); + CHECK(f != nullptr); + fseek(f, 0, SEEK_END); + std::vector buffer(ftell(f)); + rewind(f); + size_t s = fread(buffer.data(), 1, buffer.size(), f); + TORCH_CHECK_EQ(buffer.size(), s); + fclose(f); + + params.maxSeekableBytes = buffer.size() + 1; + MemoryBuffer object(buffer.data(), buffer.size()); + CHECK(decoder.init( + params, + [object](uint8_t* out, int size, int whence, uint64_t timeoutMs) mutable + -> int { + if (out) { // see defs.h file + // read mode + return object.read(out, size); + } + // seek mode + if (!timeoutMs) { + // seek capability, yes - no + return -1; + } + return object.seek(size, whence); + }, + nullptr)); + runDecoder(decoder); + decoder.shutdown(); +} + +TEST(SyncDecoder, TestMemoryBufferNoSeekableWithPartialRead) { + SyncDecoder decoder; + DecoderParameters params; + params.timeoutMs = 10000; + params.startOffset = 1000000; + params.endOffset = 9000000; + params.seekAccuracy = 10000; + params.formats = {MediaFormat(), MediaFormat(0), MediaFormat('0')}; + + FILE* f = fopen("pytorch/vision/test/assets/videos/R6llTwEh07w.mp4", "rb"); + CHECK(f != nullptr); + fseek(f, 0, SEEK_END); + std::vector buffer(ftell(f)); + rewind(f); + size_t s = fread(buffer.data(), 1, buffer.size(), f); + TORCH_CHECK_EQ(buffer.size(), s); + fclose(f); + + params.maxSeekableBytes = buffer.size() / 2; + MemoryBuffer object(buffer.data(), buffer.size()); + CHECK(!decoder.init( + params, + [object](uint8_t* out, int size, int whence, uint64_t timeoutMs) mutable + -> int { + if (out) { // see defs.h file + // read mode + return object.read(out, size); + } + // seek mode + if (!timeoutMs) { + // seek capability, yes - no + return -1; + } + return object.seek(size, whence); + }, + nullptr)); +} diff --git a/torchvision/csrc/io/decoder/time_keeper.cpp b/torchvision/csrc/io/decoder/time_keeper.cpp new file mode 100644 index 00000000000..845c76cddc8 --- /dev/null +++ b/torchvision/csrc/io/decoder/time_keeper.cpp @@ -0,0 +1,35 @@ +#include "time_keeper.h" +#include "defs.h" + +namespace ffmpeg { + +namespace { +const long kMaxTimeBaseDiference = 10; +} + +long TimeKeeper::adjust(long& decoderTimestamp) { + const long now = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); + + if (startTime_ == 0) { + startTime_ = now; + } + if (streamTimestamp_ == 0) { + streamTimestamp_ = decoderTimestamp; + } + + const auto runOut = startTime_ + decoderTimestamp - streamTimestamp_; + + if (std::labs((now - runOut) / AV_TIME_BASE) > kMaxTimeBaseDiference) { + streamTimestamp_ = startTime_ - now + decoderTimestamp; + } + + const auto sleepAdvised = runOut - now; + + decoderTimestamp += startTime_ - streamTimestamp_; + + return sleepAdvised > 0 ? sleepAdvised : 0; +} + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/time_keeper.h b/torchvision/csrc/io/decoder/time_keeper.h new file mode 100644 index 00000000000..e4d4718c705 --- /dev/null +++ b/torchvision/csrc/io/decoder/time_keeper.h @@ -0,0 +1,25 @@ +#pragma once + +#include +#include + +namespace ffmpeg { + +/** + * Class keeps the track of the decoded timestamps (us) for media streams. + */ + +class TimeKeeper { + public: + TimeKeeper() = default; + + // adjust provided @timestamp to the corrected value + // return advised sleep time before next frame processing in (us) + long adjust(long& decoderTimestamp); + + private: + long startTime_{0}; + long streamTimestamp_{0}; +}; + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/util.cpp b/torchvision/csrc/io/decoder/util.cpp new file mode 100644 index 00000000000..149f402c5dc --- /dev/null +++ b/torchvision/csrc/io/decoder/util.cpp @@ -0,0 +1,389 @@ +#include "util.h" +#include + +namespace ffmpeg { + +namespace Serializer { + +// fixed size types +template +inline size_t getSize(const T& x) { + return sizeof(x); +} + +template +inline bool +serializeItem(uint8_t* dest, size_t len, size_t& pos, const T& src) { + VLOG(6) << "Generic serializeItem"; + const auto required = sizeof(src); + if (len < pos + required) { + return false; + } + memcpy(dest + pos, &src, required); + pos += required; + return true; +} + +template +inline bool +deserializeItem(const uint8_t* src, size_t len, size_t& pos, T& dest) { + const auto required = sizeof(dest); + if (len < pos + required) { + return false; + } + memcpy(&dest, src + pos, required); + pos += required; + return true; +} + +// AVSubtitleRect specialization +inline size_t getSize(const AVSubtitleRect& x) { + auto rectBytes = [](const AVSubtitleRect& y) -> size_t { + size_t s = 0; + switch (y.type) { + case SUBTITLE_BITMAP: + for (int i = 0; i < y.nb_colors; ++i) { + s += sizeof(y.linesize[i]); + s += y.linesize[i]; + } + break; + case SUBTITLE_TEXT: + s += sizeof(size_t); + s += strlen(y.text); + break; + case SUBTITLE_ASS: + s += sizeof(size_t); + s += strlen(y.ass); + break; + default: + break; + } + return s; + }; + return getSize(x.x) + getSize(x.y) + getSize(x.w) + getSize(x.h) + + getSize(x.nb_colors) + getSize(x.type) + getSize(x.flags) + rectBytes(x); +} + +// AVSubtitle specialization +inline size_t getSize(const AVSubtitle& x) { + auto rectBytes = [](const AVSubtitle& y) -> size_t { + size_t s = getSize(y.num_rects); + for (unsigned i = 0; i < y.num_rects; ++i) { + s += getSize(*y.rects[i]); + } + return s; + }; + return getSize(x.format) + getSize(x.start_display_time) + + getSize(x.end_display_time) + getSize(x.pts) + rectBytes(x); +} + +inline bool serializeItem( + uint8_t* dest, + size_t len, + size_t& pos, + const AVSubtitleRect& src) { + auto rectSerialize = + [](uint8_t* d, size_t l, size_t& p, const AVSubtitleRect& x) -> size_t { + switch (x.type) { + case SUBTITLE_BITMAP: + for (int i = 0; i < x.nb_colors; ++i) { + if (!serializeItem(d, l, p, x.linesize[i])) { + return false; + } + if (p + x.linesize[i] > l) { + return false; + } + memcpy(d + p, x.data[i], x.linesize[i]); + p += x.linesize[i]; + } + return true; + case SUBTITLE_TEXT: { + const size_t s = strlen(x.text); + if (!serializeItem(d, l, p, s)) { + return false; + } + if (p + s > l) { + return false; + } + memcpy(d + p, x.text, s); + p += s; + return true; + } + case SUBTITLE_ASS: { + const size_t s = strlen(x.ass); + if (!serializeItem(d, l, p, s)) { + return false; + } + if (p + s > l) { + return false; + } + memcpy(d + p, x.ass, s); + p += s; + return true; + } + default: + return true; + } + }; + return serializeItem(dest, len, pos, src.x) && + serializeItem(dest, len, pos, src.y) && + serializeItem(dest, len, pos, src.w) && + serializeItem(dest, len, pos, src.h) && + serializeItem(dest, len, pos, src.nb_colors) && + serializeItem(dest, len, pos, src.type) && + serializeItem(dest, len, pos, src.flags) && + rectSerialize(dest, len, pos, src); +} + +inline bool +serializeItem(uint8_t* dest, size_t len, size_t& pos, const AVSubtitle& src) { + auto rectSerialize = + [](uint8_t* d, size_t l, size_t& p, const AVSubtitle& x) -> bool { + bool res = serializeItem(d, l, p, x.num_rects); + for (unsigned i = 0; res && i < x.num_rects; ++i) { + res = serializeItem(d, l, p, *(x.rects[i])); + } + return res; + }; + VLOG(6) << "AVSubtitle serializeItem"; + return serializeItem(dest, len, pos, src.format) && + serializeItem(dest, len, pos, src.start_display_time) && + serializeItem(dest, len, pos, src.end_display_time) && + serializeItem(dest, len, pos, src.pts) && + rectSerialize(dest, len, pos, src); +} + +inline bool deserializeItem( + const uint8_t* src, + size_t len, + size_t& pos, + AVSubtitleRect& dest) { + auto rectDeserialize = + [](const uint8_t* y, size_t l, size_t& p, AVSubtitleRect& x) -> bool { + switch (x.type) { + case SUBTITLE_BITMAP: + for (int i = 0; i < x.nb_colors; ++i) { + if (!deserializeItem(y, l, p, x.linesize[i])) { + return false; + } + if (p + x.linesize[i] > l) { + return false; + } + x.data[i] = (uint8_t*)av_malloc(x.linesize[i]); + memcpy(x.data[i], y + p, x.linesize[i]); + p += x.linesize[i]; + } + return true; + case SUBTITLE_TEXT: { + size_t s = 0; + if (!deserializeItem(y, l, p, s)) { + return false; + } + if (p + s > l) { + return false; + } + x.text = (char*)av_malloc(s + 1); + memcpy(x.text, y + p, s); + x.text[s] = 0; + p += s; + return true; + } + case SUBTITLE_ASS: { + size_t s = 0; + if (!deserializeItem(y, l, p, s)) { + return false; + } + if (p + s > l) { + return false; + } + x.ass = (char*)av_malloc(s + 1); + memcpy(x.ass, y + p, s); + x.ass[s] = 0; + p += s; + return true; + } + default: + return true; + } + }; + + return deserializeItem(src, len, pos, dest.x) && + deserializeItem(src, len, pos, dest.y) && + deserializeItem(src, len, pos, dest.w) && + deserializeItem(src, len, pos, dest.h) && + deserializeItem(src, len, pos, dest.nb_colors) && + deserializeItem(src, len, pos, dest.type) && + deserializeItem(src, len, pos, dest.flags) && + rectDeserialize(src, len, pos, dest); +} + +inline bool +deserializeItem(const uint8_t* src, size_t len, size_t& pos, AVSubtitle& dest) { + auto rectDeserialize = + [](const uint8_t* y, size_t l, size_t& p, AVSubtitle& x) -> bool { + bool res = deserializeItem(y, l, p, x.num_rects); + if (res && x.num_rects) { + x.rects = + (AVSubtitleRect**)av_malloc(x.num_rects * sizeof(AVSubtitleRect*)); + } + for (unsigned i = 0; res && i < x.num_rects; ++i) { + x.rects[i] = (AVSubtitleRect*)av_malloc(sizeof(AVSubtitleRect)); + memset(x.rects[i], 0, sizeof(AVSubtitleRect)); + res = deserializeItem(y, l, p, *x.rects[i]); + } + return res; + }; + return deserializeItem(src, len, pos, dest.format) && + deserializeItem(src, len, pos, dest.start_display_time) && + deserializeItem(src, len, pos, dest.end_display_time) && + deserializeItem(src, len, pos, dest.pts) && + rectDeserialize(src, len, pos, dest); +} +} // namespace Serializer + +namespace Util { +std::string generateErrorDesc(int errorCode) { + std::array buffer; + if (av_strerror(errorCode, buffer.data(), buffer.size()) < 0) { + return std::string("Unknown error code: ") + std::to_string(errorCode); + } + buffer.back() = 0; + return std::string(buffer.data()); +} + +size_t serialize(const AVSubtitle& sub, ByteStorage* out) { + const auto len = size(sub); + size_t pos = 0; + if (!Serializer::serializeItem(out->writableTail(), len, pos, sub)) { + return 0; + } + out->append(len); + return len; +} + +bool deserialize(const ByteStorage& buf, AVSubtitle* sub) { + size_t pos = 0; + return Serializer::deserializeItem(buf.data(), buf.length(), pos, *sub); +} + +size_t size(const AVSubtitle& sub) { + return Serializer::getSize(sub); +} + +bool validateVideoFormat(const VideoFormat& f) { + // clang-format off + /* + Valid parameters values for decoder + ____________________________________________________________________________________ + | W | H | minDimension | maxDimension | cropImage | algorithm | + |__________________________________________________________________________________| + | 0 | 0 | 0 | 0 | N/A | original | + |__________________________________________________________________________________| + | >0 | 0 | N/A | N/A | N/A | scale keeping W | + |__________________________________________________________________________________| + | 0 | >0 | N/A | N/A | N/A | scale keeping H | + |__________________________________________________________________________________| + | >0 | >0 | N/A | N/A | 0 | stretch/scale | + |__________________________________________________________________________________| + | >0 | >0 | N/A | N/A | >0 | scale/crop | + |__________________________________________________________________________________| + | 0 | 0 | >0 | 0 | N/A |scale to min dimension | + |__________________________________________________________________________________| + | 0 | 0 | 0 | >0 | N/A |scale to max dimension | + |__________________________________________________________________________________| + | 0 | 0 | >0 | >0 | N/A |stretch to min/max dimension| + |_____|_____|______________|______________|___________|____________________________| + + */ + // clang-format on + return (f.width == 0 && // #1, #6, #7 and #8 + f.height == 0 && f.cropImage == 0) || + (f.width != 0 && // #4 and #5 + f.height != 0 && f.minDimension == 0 && f.maxDimension == 0) || + (((f.width != 0 && // #2 + f.height == 0) || + (f.width == 0 && // #3 + f.height != 0)) && + f.minDimension == 0 && f.maxDimension == 0 && f.cropImage == 0); +} + +void setFormatDimensions( + size_t& destW, + size_t& destH, + size_t userW, + size_t userH, + size_t srcW, + size_t srcH, + size_t minDimension, + size_t maxDimension, + size_t cropImage) { + // rounding rules + // int -> double -> round up + // if fraction is >= 0.5 or round down if fraction is < 0.5 + // int result = double(value) + 0.5 + // here we rounding double to int according to the above rule + + // #1, #6, #7 and #8 + if (userW == 0 && userH == 0) { + if (minDimension > 0 && maxDimension == 0) { // #6 + if (srcW > srcH) { + // landscape + destH = minDimension; + destW = round(double(srcW * minDimension) / srcH); + } else { + // portrait + destW = minDimension; + destH = round(double(srcH * minDimension) / srcW); + } + } else if (minDimension == 0 && maxDimension > 0) { // #7 + if (srcW > srcH) { + // landscape + destW = maxDimension; + destH = round(double(srcH * maxDimension) / srcW); + } else { + // portrait + destH = maxDimension; + destW = round(double(srcW * maxDimension) / srcH); + } + } else if (minDimension > 0 && maxDimension > 0) { // #8 + if (srcW > srcH) { + // landscape + destW = maxDimension; + destH = minDimension; + } else { + // portrait + destW = minDimension; + destH = maxDimension; + } + } else { // #1 + destW = srcW; + destH = srcH; + } + } else if (userW != 0 && userH == 0) { // #2 + destW = userW; + destH = round(double(srcH * userW) / srcW); + } else if (userW == 0 && userH != 0) { // #3 + destW = round(double(srcW * userH) / srcH); + destH = userH; + } else { // userW != 0 && userH != 0 + if (cropImage == 0) { // #4 + destW = userW; + destH = userH; + } else { // #5 + double userSlope = double(userH) / userW; + double srcSlope = double(srcH) / srcW; + if (srcSlope < userSlope) { + destW = round(double(srcW * userH) / srcH); + destH = userH; + } else { + destW = userW; + destH = round(double(srcH * userW) / srcW); + } + } + } + // prevent zeros + destW = std::max(destW, size_t(1UL)); + destH = std::max(destH, size_t(1UL)); +} +} // namespace Util +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/util.h b/torchvision/csrc/io/decoder/util.h new file mode 100644 index 00000000000..01b550e5bbc --- /dev/null +++ b/torchvision/csrc/io/decoder/util.h @@ -0,0 +1,28 @@ +#pragma once + +#include "defs.h" + +namespace ffmpeg { + +/** + * FFMPEG library utility functions. + */ + +namespace Util { +std::string generateErrorDesc(int errorCode); +size_t serialize(const AVSubtitle& sub, ByteStorage* out); +bool deserialize(const ByteStorage& buf, AVSubtitle* sub); +size_t size(const AVSubtitle& sub); +void setFormatDimensions( + size_t& destW, + size_t& destH, + size_t userW, + size_t userH, + size_t srcW, + size_t srcH, + size_t minDimension, + size_t maxDimension, + size_t cropImage); +bool validateVideoFormat(const VideoFormat& format); +} // namespace Util +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/util_test.cpp b/torchvision/csrc/io/decoder/util_test.cpp new file mode 100644 index 00000000000..78de08b7139 --- /dev/null +++ b/torchvision/csrc/io/decoder/util_test.cpp @@ -0,0 +1,35 @@ +#include +#include +#include +#include "util.h" + +TEST(Util, TestSetFormatDimensions) { + // clang-format off + const size_t test_cases[][9] = { + // (userW, userH, srcW, srcH, minDimension, maxDimension, cropImage, destW, destH) + {0, 0, 172, 128, 0, 0, 0, 172, 128}, // #1 + {86, 0, 172, 128, 0, 0, 0, 86, 64}, // #2 + {64, 0, 128, 172, 0, 0, 0, 64, 86}, // #2 + {0, 32, 172, 128, 0, 0, 0, 43, 32}, // #3 + {32, 0, 128, 172, 0, 0, 0, 32, 43}, // #3 + {60, 50, 172, 128, 0, 0, 0, 60, 50}, // #4 + {50, 60, 128, 172, 0, 0, 0, 50, 60}, // #4 + {86, 40, 172, 128, 0, 0, 1, 86, 64}, // #5 + {86, 92, 172, 128, 0, 0, 1, 124, 92}, // #5 + {0, 0, 172, 128, 256, 0, 0, 344, 256}, // #6 + {0, 0, 128, 172, 256, 0, 0, 256, 344}, // #6 + {0, 0, 128, 172, 0, 344, 0, 256, 344}, // #7 + {0, 0, 172, 128, 0, 344, 0, 344, 256}, // #7 + {0, 0, 172, 128, 100, 344, 0, 344, 100},// #8 + {0, 0, 128, 172, 100, 344, 0, 100, 344} // #8 + }; + // clang-format onn + + for (const auto& tc : test_cases) { + size_t destW = 0; + size_t destH = 0; + ffmpeg::Util::setFormatDimensions(destW, destH, tc[0], tc[1], tc[2], tc[3], tc[4], tc[5], tc[6]); + CHECK(destW == tc[7]); + CHECK(destH == tc[8]); + } +} diff --git a/torchvision/csrc/io/decoder/video_sampler.cpp b/torchvision/csrc/io/decoder/video_sampler.cpp new file mode 100644 index 00000000000..8b712609e34 --- /dev/null +++ b/torchvision/csrc/io/decoder/video_sampler.cpp @@ -0,0 +1,337 @@ +#include "video_sampler.h" +#include +#include "util.h" + +// www.ffmpeg.org/doxygen/0.5/swscale-example_8c-source.html + +namespace ffmpeg { + +namespace { + +// Setup the data pointers and linesizes based on the specified image +// parameters and the provided array. This sets up "planes" to point to a +// "buffer" +// NOTE: this is most likely culprit behind #3534 +// +// Args: +// fmt: desired output video format +// buffer: source constant image buffer (in different format) that will contain +// the final image after SWScale planes: destination data pointer to be filled +// lineSize: target destination linesize (always {0}) +int preparePlanes( + const VideoFormat& fmt, + const uint8_t* buffer, + uint8_t** planes, + int* lineSize) { + int result; + + // NOTE: 1 at the end of av_fill_arrays is the value used for alignment + if ((result = av_image_fill_arrays( + planes, + lineSize, + buffer, + (AVPixelFormat)fmt.format, + fmt.width, + fmt.height, + 1)) < 0) { + LOG(ERROR) << "av_image_fill_arrays failed, err: " + << Util::generateErrorDesc(result); + } + return result; +} + +// Scale (and crop) the image slice in srcSlice and put the resulting scaled +// slice to `planes` buffer, which is mapped to be `out` via preparePlanes as +// `sws_scale` cannot access buffers directly. +// +// Args: +// context: SWSContext allocated on line 119 (if crop, optional) or 163 (if +// scale) srcSlice: frame data in YUV420P srcStride: the array containing the +// strides for each plane of the source +// image (from AVFrame->linesize[0]) +// out: destination buffer +// planes: indirect destination buffer (mapped to "out" via preparePlanes) +// lines: destination linesize; constant {0} +int transformImage( + SwsContext* context, + const uint8_t* const srcSlice[], + int srcStride[], + VideoFormat inFormat, + VideoFormat outFormat, + uint8_t* out, + uint8_t* planes[], + int lines[]) { + int result; + if ((result = preparePlanes(outFormat, out, planes, lines)) < 0) { + return result; + } + if (context) { + // NOTE: srcY stride always 0: this is a parameter of YUV format + if ((result = sws_scale( + context, srcSlice, srcStride, 0, inFormat.height, planes, lines)) < + 0) { + LOG(ERROR) << "sws_scale failed, err: " + << Util::generateErrorDesc(result); + return result; + } + } else if ( + inFormat.width == outFormat.width && + inFormat.height == outFormat.height && + inFormat.format == outFormat.format) { + // Copy planes without using sws_scale if sws_getContext failed. + av_image_copy( + planes, + lines, + (const uint8_t**)srcSlice, + srcStride, + (AVPixelFormat)inFormat.format, + inFormat.width, + inFormat.height); + } else { + LOG(ERROR) << "Invalid scale context format " << inFormat.format; + return AVERROR(EINVAL); + } + return 0; +} +} // namespace + +VideoSampler::VideoSampler(int swsFlags, int64_t loggingUuid) + : swsFlags_(swsFlags), loggingUuid_(loggingUuid) {} + +VideoSampler::~VideoSampler() { + cleanUp(); +} + +void VideoSampler::shutdown() { + cleanUp(); +} + +bool VideoSampler::init(const SamplerParameters& params) { + cleanUp(); + + if (params.out.video.cropImage != 0) { + if (!Util::validateVideoFormat(params.out.video)) { + LOG(ERROR) << "Invalid video format" + << ", width: " << params.out.video.width + << ", height: " << params.out.video.height + << ", format: " << params.out.video.format + << ", minDimension: " << params.out.video.minDimension + << ", crop: " << params.out.video.cropImage; + + return false; + } + + scaleFormat_.format = params.out.video.format; + Util::setFormatDimensions( + scaleFormat_.width, + scaleFormat_.height, + params.out.video.width, + params.out.video.height, + params.in.video.width, + params.in.video.height, + 0, + 0, + 1); + + if (!(scaleFormat_ == params_.out.video)) { // crop required + cropContext_ = sws_getContext( + params.out.video.width, + params.out.video.height, + (AVPixelFormat)params.out.video.format, + params.out.video.width, + params.out.video.height, + (AVPixelFormat)params.out.video.format, + swsFlags_, + nullptr, + nullptr, + nullptr); + + if (!cropContext_) { + LOG(ERROR) << "sws_getContext failed for crop context"; + return false; + } + + const auto scaleImageSize = av_image_get_buffer_size( + (AVPixelFormat)scaleFormat_.format, + scaleFormat_.width, + scaleFormat_.height, + 1); + scaleBuffer_.resize(scaleImageSize); + } + } else { + scaleFormat_ = params.out.video; + } + + VLOG(1) << "Input format #" << loggingUuid_ << ", width " + << params.in.video.width << ", height " << params.in.video.height + << ", format " << params.in.video.format << ", minDimension " + << params.in.video.minDimension << ", cropImage " + << params.in.video.cropImage; + VLOG(1) << "Scale format #" << loggingUuid_ << ", width " + << scaleFormat_.width << ", height " << scaleFormat_.height + << ", format " << scaleFormat_.format << ", minDimension " + << scaleFormat_.minDimension << ", cropImage " + << scaleFormat_.cropImage; + VLOG(1) << "Crop format #" << loggingUuid_ << ", width " + << params.out.video.width << ", height " << params.out.video.height + << ", format " << params.out.video.format << ", minDimension " + << params.out.video.minDimension << ", cropImage " + << params.out.video.cropImage; + + // set output format + params_ = params; + + if (params.in.video.format == AV_PIX_FMT_YUV420P) { + /* When the video width and height are not multiples of 8, + * and there is no size change in the conversion, + * a blurry screen will appear on the right side + * This problem was discovered in 2012 and + * continues to exist in version 4.1.3 in 2019 + * This problem can be avoided by increasing SWS_ACCURATE_RND + * details https://trac.ffmpeg.org/ticket/1582 + */ + if ((params.in.video.width & 0x7) || (params.in.video.height & 0x7)) { + VLOG(1) << "The width " << params.in.video.width << " and height " + << params.in.video.height << " the image is not a multiple of 8, " + << "the decoding speed may be reduced"; + swsFlags_ |= SWS_ACCURATE_RND; + } + } + + scaleContext_ = sws_getContext( + params.in.video.width, + params.in.video.height, + (AVPixelFormat)params.in.video.format, + scaleFormat_.width, + scaleFormat_.height, + (AVPixelFormat)scaleFormat_.format, + swsFlags_, + nullptr, + nullptr, + nullptr); + // sws_getContext might fail if in/out format == AV_PIX_FMT_PAL8 (png format) + // Return true if input and output formats/width/height are identical + // Check scaleContext_ for nullptr in transformImage to copy planes directly + + if (params.in.video.width == scaleFormat_.width && + params.in.video.height == scaleFormat_.height && + params.in.video.format == scaleFormat_.format) { + return true; + } + return scaleContext_ != nullptr; +} + +// Main body of the sample function called from one of the overloads below +// +// Args: +// srcSlice: decoded AVFrame->data perpared buffer +// srcStride: linesize (usually obtained from AVFrame->linesize) +// out: return buffer (ByteStorage*) +int VideoSampler::sample( + const uint8_t* const srcSlice[], + int srcStride[], + ByteStorage* out) { + int result; + // scaled and cropped image + int outImageSize = av_image_get_buffer_size( + (AVPixelFormat)params_.out.video.format, + params_.out.video.width, + params_.out.video.height, + 1); + + out->ensure(outImageSize); + + uint8_t* scalePlanes[4] = {nullptr}; + int scaleLines[4] = {0}; + // perform scale first + if ((result = transformImage( + scaleContext_, + srcSlice, + srcStride, + params_.in.video, + scaleFormat_, + // for crop use internal buffer + cropContext_ ? scaleBuffer_.data() : out->writableTail(), + scalePlanes, + scaleLines))) { + return result; + } + + // is crop required? + if (cropContext_) { + uint8_t* cropPlanes[4] = {nullptr}; + int cropLines[4] = {0}; + + if (params_.out.video.height < scaleFormat_.height) { + // Destination image is wider of source image: cut top and bottom + for (size_t i = 0; i < 4 && scalePlanes[i] != nullptr; ++i) { + scalePlanes[i] += scaleLines[i] * + (scaleFormat_.height - params_.out.video.height) / 2; + } + } else { + // Source image is wider of destination image: cut sides + for (size_t i = 0; i < 4 && scalePlanes[i] != nullptr; ++i) { + scalePlanes[i] += scaleLines[i] * + (scaleFormat_.width - params_.out.video.width) / 2 / + scaleFormat_.width; + } + } + + // crop image + if ((result = transformImage( + cropContext_, + scalePlanes, + scaleLines, + params_.out.video, + params_.out.video, + out->writableTail(), + cropPlanes, + cropLines))) { + return result; + } + } + + out->append(outImageSize); + return outImageSize; +} + +// Call from `video_stream.cpp::114` - occurs during file reads +int VideoSampler::sample(AVFrame* frame, ByteStorage* out) { + if (!frame) { + return 0; // no flush for videos + } + + return sample(frame->data, frame->linesize, out); +} + +// Call from `video_stream.cpp::114` - not sure when this occurs +int VideoSampler::sample(const ByteStorage* in, ByteStorage* out) { + if (!in) { + return 0; // no flush for videos + } + + int result; + uint8_t* inPlanes[4] = {nullptr}; + int inLineSize[4] = {0}; + + if ((result = preparePlanes( + params_.in.video, in->data(), inPlanes, inLineSize)) < 0) { + return result; + } + + return sample(inPlanes, inLineSize, out); +} + +void VideoSampler::cleanUp() { + if (scaleContext_) { + sws_freeContext(scaleContext_); + scaleContext_ = nullptr; + } + if (cropContext_) { + sws_freeContext(cropContext_); + cropContext_ = nullptr; + scaleBuffer_.clear(); + } +} + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/video_sampler.h b/torchvision/csrc/io/decoder/video_sampler.h new file mode 100644 index 00000000000..47247f2c0c5 --- /dev/null +++ b/torchvision/csrc/io/decoder/video_sampler.h @@ -0,0 +1,44 @@ +#pragma once + +#include "defs.h" + +namespace ffmpeg { + +/** + * Class transcode video frames from one format into another + */ + +class VideoSampler : public MediaSampler { + public: + VideoSampler(int swsFlags = SWS_AREA, int64_t loggingUuid = 0); + + ~VideoSampler() override; + + // MediaSampler overrides + bool init(const SamplerParameters& params) override; + int sample(const ByteStorage* in, ByteStorage* out) override; + void shutdown() override; + + // returns number processed/scaling bytes + int sample(AVFrame* frame, ByteStorage* out); + int getImageBytes() const; + + private: + // close resources + void cleanUp(); + // helper functions for rescaling, cropping, etc. + int sample( + const uint8_t* const srcSlice[], + int srcStride[], + ByteStorage* out); + + private: + VideoFormat scaleFormat_; + SwsContext* scaleContext_{nullptr}; + SwsContext* cropContext_{nullptr}; + int swsFlags_{SWS_AREA}; + std::vector scaleBuffer_; + int64_t loggingUuid_{0}; +}; + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/video_stream.cpp b/torchvision/csrc/io/decoder/video_stream.cpp new file mode 100644 index 00000000000..fa08c65cac1 --- /dev/null +++ b/torchvision/csrc/io/decoder/video_stream.cpp @@ -0,0 +1,131 @@ +#include "video_stream.h" +#include +#include "util.h" + +namespace ffmpeg { + +namespace { +bool operator==(const VideoFormat& x, const AVFrame& y) { + return x.width == static_cast(y.width) && + x.height == static_cast(y.height) && x.format == y.format; +} + +bool operator==(const VideoFormat& x, const AVCodecContext& y) { + return x.width == static_cast(y.width) && + x.height == static_cast(y.height) && x.format == y.pix_fmt; +} + +VideoFormat& toVideoFormat(VideoFormat& x, const AVFrame& y) { + x.width = y.width; + x.height = y.height; + x.format = y.format; + return x; +} + +VideoFormat& toVideoFormat(VideoFormat& x, const AVCodecContext& y) { + x.width = y.width; + x.height = y.height; + x.format = y.pix_fmt; + return x; +} +} // namespace + +VideoStream::VideoStream( + AVFormatContext* inputCtx, + int index, + bool convertPtsToWallTime, + const VideoFormat& format, + int64_t loggingUuid) + : Stream( + inputCtx, + MediaFormat::makeMediaFormat(format, index), + convertPtsToWallTime, + loggingUuid) {} + +VideoStream::~VideoStream() { + if (sampler_) { + sampler_->shutdown(); + sampler_.reset(); + } +} + +int VideoStream::initFormat() { + // set output format + if (!Util::validateVideoFormat(format_.format.video)) { + LOG(ERROR) << "Invalid video format" + << ", width: " << format_.format.video.width + << ", height: " << format_.format.video.height + << ", format: " << format_.format.video.format + << ", minDimension: " << format_.format.video.minDimension + << ", crop: " << format_.format.video.cropImage; + return -1; + } + + // keep aspect ratio + Util::setFormatDimensions( + format_.format.video.width, + format_.format.video.height, + format_.format.video.width, + format_.format.video.height, + codecCtx_->width, + codecCtx_->height, + format_.format.video.minDimension, + format_.format.video.maxDimension, + 0); + + if (format_.format.video.format == AV_PIX_FMT_NONE) { + format_.format.video.format = codecCtx_->pix_fmt; + } + return format_.format.video.width != 0 && format_.format.video.height != 0 && + format_.format.video.format != AV_PIX_FMT_NONE + ? 0 + : -1; +} + +// copies frame bytes via sws_scale call in video_sampler.cpp +int VideoStream::copyFrameBytes(ByteStorage* out, bool flush) { + if (!sampler_) { + sampler_ = std::make_unique(SWS_AREA, loggingUuid_); + } + + // check if input format gets changed + if (flush ? !(sampler_->getInputFormat().video == *codecCtx_) + : !(sampler_->getInputFormat().video == *frame_)) { + // - reinit sampler + SamplerParameters params; + params.type = format_.type; + params.out = format_.format; + params.in = FormatUnion(0); + flush ? toVideoFormat(params.in.video, *codecCtx_) + : toVideoFormat(params.in.video, *frame_); + if (!sampler_->init(params)) { + return -1; + } + + VLOG(1) << "Set input video sampler format" + << ", width: " << params.in.video.width + << ", height: " << params.in.video.height + << ", format: " << params.in.video.format + << " : output video sampler format" + << ", width: " << format_.format.video.width + << ", height: " << format_.format.video.height + << ", format: " << format_.format.video.format + << ", minDimension: " << format_.format.video.minDimension + << ", crop: " << format_.format.video.cropImage; + } + // calls to a sampler that converts the frame from YUV422 to RGB24, and + // optionally crops and resizes the frame. Frame bytes are copied from + // frame_->data to out buffer + return sampler_->sample(flush ? nullptr : frame_, out); +} + +void VideoStream::setHeader(DecoderHeader* header, bool flush) { + Stream::setHeader(header, flush); + if (!flush) { // no frames for video flush + header->keyFrame = frame_->key_frame; + header->fps = av_q2d(av_guess_frame_rate( + inputCtx_, inputCtx_->streams[format_.stream], nullptr)); + } +} + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/video_stream.h b/torchvision/csrc/io/decoder/video_stream.h new file mode 100644 index 00000000000..e6a8bf02b65 --- /dev/null +++ b/torchvision/csrc/io/decoder/video_stream.h @@ -0,0 +1,31 @@ +#pragma once + +#include "stream.h" +#include "video_sampler.h" + +namespace ffmpeg { + +/** + * Class uses FFMPEG library to decode one video stream. + */ + +class VideoStream : public Stream { + public: + VideoStream( + AVFormatContext* inputCtx, + int index, + bool convertPtsToWallTime, + const VideoFormat& format, + int64_t loggingUuid); + ~VideoStream() override; + + private: + int initFormat() override; + int copyFrameBytes(ByteStorage* out, bool flush) override; + void setHeader(DecoderHeader* header, bool flush) override; + + private: + std::unique_ptr sampler_; +}; + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/image/common.cpp b/torchvision/csrc/io/image/common.cpp new file mode 100644 index 00000000000..16b7ac2f91e --- /dev/null +++ b/torchvision/csrc/io/image/common.cpp @@ -0,0 +1,43 @@ + +#include "common.h" +#include + +namespace vision { +namespace image { + +void validate_encoded_data(const torch::Tensor& encoded_data) { + TORCH_CHECK(encoded_data.is_contiguous(), "Input tensor must be contiguous."); + TORCH_CHECK( + encoded_data.dtype() == torch::kU8, + "Input tensor must have uint8 data type, got ", + encoded_data.dtype()); + TORCH_CHECK( + encoded_data.dim() == 1 && encoded_data.numel() > 0, + "Input tensor must be 1-dimensional and non-empty, got ", + encoded_data.dim(), + " dims and ", + encoded_data.numel(), + " numels."); +} + +bool should_this_return_rgb_or_rgba_let_me_know_in_the_comments_down_below_guys_see_you_in_the_next_video( + ImageReadMode mode, + bool has_alpha) { + // Return true if the calling decoding function should return a 3D RGB tensor, + // and false if it should return a 4D RGBA tensor. + // This function ignores the requested "grayscale" modes and treats it as + // "unchanged", so it should only used on decoders who don't support grayscale + // outputs. + + if (mode == IMAGE_READ_MODE_RGB) { + return true; + } + if (mode == IMAGE_READ_MODE_RGB_ALPHA) { + return false; + } + // From here we assume mode is "unchanged", even for grayscale ones. + return !has_alpha; +} + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/common.h b/torchvision/csrc/io/image/common.h new file mode 100644 index 00000000000..d81acfda7d4 --- /dev/null +++ b/torchvision/csrc/io/image/common.h @@ -0,0 +1,24 @@ +#pragma once + +#include +#include + +namespace vision { +namespace image { + +/* Should be kept in-sync with Python ImageReadMode enum */ +using ImageReadMode = int64_t; +const ImageReadMode IMAGE_READ_MODE_UNCHANGED = 0; +const ImageReadMode IMAGE_READ_MODE_GRAY = 1; +const ImageReadMode IMAGE_READ_MODE_GRAY_ALPHA = 2; +const ImageReadMode IMAGE_READ_MODE_RGB = 3; +const ImageReadMode IMAGE_READ_MODE_RGB_ALPHA = 4; + +void validate_encoded_data(const torch::Tensor& encoded_data); + +bool should_this_return_rgb_or_rgba_let_me_know_in_the_comments_down_below_guys_see_you_in_the_next_video( + ImageReadMode mode, + bool has_alpha); + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cpu/common_jpeg.cpp b/torchvision/csrc/io/image/cpu/common_jpeg.cpp new file mode 100644 index 00000000000..4c993106b45 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/common_jpeg.cpp @@ -0,0 +1,26 @@ +#include "common_jpeg.h" + +namespace vision { +namespace image { +namespace detail { + +#if JPEG_FOUND +void torch_jpeg_error_exit(j_common_ptr cinfo) { + /* cinfo->err really points to a torch_jpeg_error_mgr struct, so coerce + * pointer */ + torch_jpeg_error_ptr myerr = (torch_jpeg_error_ptr)cinfo->err; + + /* Always display the message. */ + /* We could postpone this until after returning, if we chose. */ + // (*cinfo->err->output_message)(cinfo); + /* Create the message */ + (*(cinfo->err->format_message))(cinfo, myerr->jpegLastErrorMsg); + + /* Return control to the setjmp point */ + longjmp(myerr->setjmp_buffer, 1); +} +#endif + +} // namespace detail +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cpu/common_jpeg.h b/torchvision/csrc/io/image/cpu/common_jpeg.h new file mode 100644 index 00000000000..7f7f9f0ccf1 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/common_jpeg.h @@ -0,0 +1,27 @@ +#pragma once + +#if JPEG_FOUND +#include + +#include +#include + +namespace vision { +namespace image { +namespace detail { + +static const JOCTET EOI_BUFFER[1] = {JPEG_EOI}; +struct torch_jpeg_error_mgr { + struct jpeg_error_mgr pub; /* "public" fields */ + char jpegLastErrorMsg[JMSG_LENGTH_MAX]; /* error messages */ + jmp_buf setjmp_buffer; /* for return to caller */ +}; + +using torch_jpeg_error_ptr = struct torch_jpeg_error_mgr*; +void torch_jpeg_error_exit(j_common_ptr cinfo); + +} // namespace detail +} // namespace image +} // namespace vision + +#endif diff --git a/torchvision/csrc/io/image/cpu/common_png.h b/torchvision/csrc/io/image/cpu/common_png.h new file mode 100644 index 00000000000..68400d48e05 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/common_png.h @@ -0,0 +1,6 @@ +#pragma once + +#if PNG_FOUND +#include +#include +#endif diff --git a/torchvision/csrc/io/image/cpu/decode_gif.cpp b/torchvision/csrc/io/image/cpu/decode_gif.cpp new file mode 100644 index 00000000000..f26d37950e3 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/decode_gif.cpp @@ -0,0 +1,165 @@ +#include "decode_gif.h" +#include +#include "../common.h" +#include "giflib/gif_lib.h" + +namespace vision { +namespace image { + +typedef struct reader_helper_t { + uint8_t const* encoded_data; // input tensor data pointer + size_t encoded_data_size; // size of input tensor in bytes + size_t num_bytes_read; // number of bytes read so far in the tensor +} reader_helper_t; + +// That function is used by GIFLIB routines to read the encoded bytes. +// This reads `len` bytes and writes them into `buf`. The data is read from the +// input tensor passed to decode_gif() starting at the `num_bytes_read` +// position. +int read_from_tensor(GifFileType* gifFile, GifByteType* buf, int len) { + // the UserData field was set in DGifOpen() + reader_helper_t* reader_helper = + static_cast(gifFile->UserData); + + size_t num_bytes_to_read = std::min( + (size_t)len, + reader_helper->encoded_data_size - reader_helper->num_bytes_read); + std::memcpy( + buf, reader_helper->encoded_data + reader_helper->num_bytes_read, len); + reader_helper->num_bytes_read += num_bytes_to_read; + return num_bytes_to_read; +} + +torch::Tensor decode_gif(const torch::Tensor& encoded_data) { + // LibGif docs: https://giflib.sourceforge.net/intro.html + // Refer over there for more details on the libgif API, API ref, and a + // detailed description of the GIF format. + + validate_encoded_data(encoded_data); + + int error = D_GIF_SUCCEEDED; + + // We're using DGidOpen. The other entrypoints of libgif are + // DGifOpenFileName and DGifOpenFileHandle but we don't want to use those, + // since we need to read the encoded bytes from a tensor of encoded bytes, not + // from a file (for consistency with existing jpeg and png decoders). Using + // DGifOpen is the only way to read from a custom source. + // For that we need to provide a reader function `read_from_tensor` that + // reads from the tensor, and we have to keep track of the number of bytes + // read so far: this is why we need the reader_helper struct. + + // TODO: We are potentially doing an unnecessary copy of the encoded bytes: + // - 1 copy in from file to tensor (in read_file()) + // - 1 copy from tensor to GIFLIB buffers (in read_from_tensor()) + // Since we're vendoring GIFLIB we can potentially modify the calls to + // InternalRead() and just set the `buf` pointer to the tensor data directly. + // That might even save allocation of those buffers. + // If we do that, we'd have to make sure the buffers are never written to by + // GIFLIB, otherwise we'd be overridding the tensor data. + reader_helper_t reader_helper; + reader_helper.encoded_data = encoded_data.data_ptr(); + reader_helper.encoded_data_size = encoded_data.numel(); + reader_helper.num_bytes_read = 0; + GifFileType* gifFile = + DGifOpen(static_cast(&reader_helper), read_from_tensor, &error); + + TORCH_CHECK( + (gifFile != nullptr) && (error == D_GIF_SUCCEEDED), + "DGifOpenFileName() failed - ", + error); + + if (DGifSlurp(gifFile) == GIF_ERROR) { + auto gifFileError = gifFile->Error; + DGifCloseFile(gifFile, &error); + TORCH_CHECK(false, "DGifSlurp() failed - ", gifFileError); + } + auto num_images = gifFile->ImageCount; + + // This check should already done within DGifSlurp(), just to be safe + TORCH_CHECK(num_images > 0, "GIF file should contain at least one image!"); + + GifColorType bg = {0, 0, 0}; + if (gifFile->SColorMap) { + bg = gifFile->SColorMap->Colors[gifFile->SBackGroundColor]; + } + + // The GIFLIB docs say that the canvas's height and width are potentially + // ignored by modern viewers, so to be on the safe side we set the output + // height to max(canvas_heigh, first_image_height). Same for width. + // https://giflib.sourceforge.net/whatsinagif/bits_and_bytes.html + auto out_h = + std::max(gifFile->SHeight, gifFile->SavedImages[0].ImageDesc.Height); + auto out_w = + std::max(gifFile->SWidth, gifFile->SavedImages[0].ImageDesc.Width); + + // We output a channels-last tensor for consistency with other image decoders. + // Torchvision's resize tends to be is faster on uint8 channels-last tensors. + auto options = torch::TensorOptions() + .dtype(torch::kU8) + .memory_format(torch::MemoryFormat::ChannelsLast); + auto out = torch::empty( + {int64_t(num_images), 3, int64_t(out_h), int64_t(out_w)}, options); + auto out_a = out.accessor(); + for (int i = 0; i < num_images; i++) { + const SavedImage& img = gifFile->SavedImages[i]; + + GraphicsControlBlock gcb; + DGifSavedExtensionToGCB(gifFile, i, &gcb); + + const GifImageDesc& desc = img.ImageDesc; + const ColorMapObject* cmap = + desc.ColorMap ? desc.ColorMap : gifFile->SColorMap; + TORCH_CHECK( + cmap != nullptr, + "Global and local color maps are missing. This should never happen!"); + + // When going from one image to another, there is a "disposal method" which + // specifies how to handle the transition. E.g. DISPOSE_DO_NOT means that + // the current image should essentially be drawn on top of the previous + // canvas. The pixels of that previous canvas will appear on the new one if + // either: + // - a pixel is transparent in the current image + // - the current image is smaller than the canvas, hence exposing its pixels + // The "background" disposal method means that the current canvas should be + // set to the background color. + // We only support these 2 modes and default to "background" when the + // disposal method is unspecified, or when it's set to "DISPOSE_PREVIOUS" + // which according to GIFLIB is not widely supported. + // (https://giflib.sourceforge.net/whatsinagif/animation_and_transparency.html). + if (i > 0 && gcb.DisposalMode == DISPOSE_DO_NOT) { + out[i] = out[i - 1]; + } else { + // Background. If bg wasn't defined, it will be (0, 0, 0) + for (int h = 0; h < gifFile->SHeight; h++) { + for (int w = 0; w < gifFile->SWidth; w++) { + out_a[i][0][h][w] = bg.Red; + out_a[i][1][h][w] = bg.Green; + out_a[i][2][h][w] = bg.Blue; + } + } + } + + for (int h = 0; h < desc.Height; h++) { + for (int w = 0; w < desc.Width; w++) { + auto c = img.RasterBits[h * desc.Width + w]; + if (c == gcb.TransparentColor) { + continue; + } + GifColorType rgb = cmap->Colors[c]; + out_a[i][0][h + desc.Top][w + desc.Left] = rgb.Red; + out_a[i][1][h + desc.Top][w + desc.Left] = rgb.Green; + out_a[i][2][h + desc.Top][w + desc.Left] = rgb.Blue; + } + } + } + + out = out.squeeze(0); // remove batch dim if there's only one image + + DGifCloseFile(gifFile, &error); + TORCH_CHECK(error == D_GIF_SUCCEEDED, "DGifCloseFile() failed - ", error); + + return out; +} + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cpu/decode_gif.h b/torchvision/csrc/io/image/cpu/decode_gif.h new file mode 100644 index 00000000000..68d5073c91b --- /dev/null +++ b/torchvision/csrc/io/image/cpu/decode_gif.h @@ -0,0 +1,12 @@ +#pragma once + +#include + +namespace vision { +namespace image { + +// encoded_data tensor must be 1D uint8 and contiguous +C10_EXPORT torch::Tensor decode_gif(const torch::Tensor& encoded_data); + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cpu/decode_image.cpp b/torchvision/csrc/io/image/cpu/decode_image.cpp new file mode 100644 index 00000000000..43a688604f6 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/decode_image.cpp @@ -0,0 +1,64 @@ +#include "decode_image.h" + +#include "decode_gif.h" +#include "decode_jpeg.h" +#include "decode_png.h" +#include "decode_webp.h" + +namespace vision { +namespace image { + +torch::Tensor decode_image( + const torch::Tensor& data, + ImageReadMode mode, + bool apply_exif_orientation) { + // Check that tensor is a CPU tensor + TORCH_CHECK(data.device() == torch::kCPU, "Expected a CPU tensor"); + // Check that the input tensor dtype is uint8 + TORCH_CHECK(data.dtype() == torch::kU8, "Expected a torch.uint8 tensor"); + // Check that the input tensor is 1-dimensional + TORCH_CHECK( + data.dim() == 1 && data.numel() > 0, + "Expected a non empty 1-dimensional tensor"); + + auto err_msg = + "Unsupported image file. Only jpeg, png, webp and gif are currently supported. For avif and heic format, please rely on `decode_avif` and `decode_heic` directly."; + + auto datap = data.data_ptr(); + + const uint8_t jpeg_signature[3] = {255, 216, 255}; // == "\xFF\xD8\xFF" + TORCH_CHECK(data.numel() >= 3, err_msg); + if (memcmp(jpeg_signature, datap, 3) == 0) { + return decode_jpeg(data, mode, apply_exif_orientation); + } + + const uint8_t png_signature[4] = {137, 80, 78, 71}; // == "\211PNG" + TORCH_CHECK(data.numel() >= 4, err_msg); + if (memcmp(png_signature, datap, 4) == 0) { + return decode_png(data, mode, apply_exif_orientation); + } + + const uint8_t gif_signature_1[6] = { + 0x47, 0x49, 0x46, 0x38, 0x39, 0x61}; // == "GIF89a" + const uint8_t gif_signature_2[6] = { + 0x47, 0x49, 0x46, 0x38, 0x37, 0x61}; // == "GIF87a" + TORCH_CHECK(data.numel() >= 6, err_msg); + if (memcmp(gif_signature_1, datap, 6) == 0 || + memcmp(gif_signature_2, datap, 6) == 0) { + return decode_gif(data); + } + + const uint8_t webp_signature_begin[4] = {0x52, 0x49, 0x46, 0x46}; // == "RIFF" + const uint8_t webp_signature_end[7] = { + 0x57, 0x45, 0x42, 0x50, 0x56, 0x50, 0x38}; // == "WEBPVP8" + TORCH_CHECK(data.numel() >= 15, err_msg); + if ((memcmp(webp_signature_begin, datap, 4) == 0) && + (memcmp(webp_signature_end, datap + 8, 7) == 0)) { + return decode_webp(data, mode); + } + + TORCH_CHECK(false, err_msg); +} + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cpu/decode_image.h b/torchvision/csrc/io/image/cpu/decode_image.h new file mode 100644 index 00000000000..f66d47eccd4 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/decode_image.h @@ -0,0 +1,15 @@ +#pragma once + +#include +#include "../common.h" + +namespace vision { +namespace image { + +C10_EXPORT torch::Tensor decode_image( + const torch::Tensor& data, + ImageReadMode mode = IMAGE_READ_MODE_UNCHANGED, + bool apply_exif_orientation = false); + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cpu/decode_jpeg.cpp b/torchvision/csrc/io/image/cpu/decode_jpeg.cpp new file mode 100644 index 00000000000..052b98e1be9 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/decode_jpeg.cpp @@ -0,0 +1,268 @@ +#include "decode_jpeg.h" +#include "../common.h" +#include "common_jpeg.h" +#include "exif.h" + +namespace vision { +namespace image { + +#if !JPEG_FOUND +torch::Tensor decode_jpeg( + const torch::Tensor& data, + ImageReadMode mode, + bool apply_exif_orientation) { + TORCH_CHECK( + false, "decode_jpeg: torchvision not compiled with libjpeg support"); +} +#else + +using namespace detail; +using namespace exif_private; + +namespace { + +struct torch_jpeg_mgr { + struct jpeg_source_mgr pub; + const JOCTET* data; + size_t len; +}; + +static void torch_jpeg_init_source(j_decompress_ptr cinfo) {} + +static boolean torch_jpeg_fill_input_buffer(j_decompress_ptr cinfo) { + // No more data. Probably an incomplete image; Raise exception. + torch_jpeg_error_ptr myerr = (torch_jpeg_error_ptr)cinfo->err; + strcpy(myerr->jpegLastErrorMsg, "Image is incomplete or truncated"); + longjmp(myerr->setjmp_buffer, 1); +} + +static void torch_jpeg_skip_input_data(j_decompress_ptr cinfo, long num_bytes) { + torch_jpeg_mgr* src = (torch_jpeg_mgr*)cinfo->src; + if (src->pub.bytes_in_buffer < (size_t)num_bytes) { + // Skipping over all of remaining data; output EOI. + src->pub.next_input_byte = EOI_BUFFER; + src->pub.bytes_in_buffer = 1; + } else { + // Skipping over only some of the remaining data. + src->pub.next_input_byte += num_bytes; + src->pub.bytes_in_buffer -= num_bytes; + } +} + +static void torch_jpeg_term_source(j_decompress_ptr cinfo) {} + +static void torch_jpeg_set_source_mgr( + j_decompress_ptr cinfo, + const unsigned char* data, + size_t len) { + torch_jpeg_mgr* src; + if (cinfo->src == 0) { // if this is first time; allocate memory + cinfo->src = (struct jpeg_source_mgr*)(*cinfo->mem->alloc_small)( + (j_common_ptr)cinfo, JPOOL_PERMANENT, sizeof(torch_jpeg_mgr)); + } + src = (torch_jpeg_mgr*)cinfo->src; + src->pub.init_source = torch_jpeg_init_source; + src->pub.fill_input_buffer = torch_jpeg_fill_input_buffer; + src->pub.skip_input_data = torch_jpeg_skip_input_data; + src->pub.resync_to_restart = jpeg_resync_to_restart; // default + src->pub.term_source = torch_jpeg_term_source; + // fill the buffers + src->data = (const JOCTET*)data; + src->len = len; + src->pub.bytes_in_buffer = len; + src->pub.next_input_byte = src->data; + + jpeg_save_markers(cinfo, APP1, 0xffff); +} + +inline unsigned char clamped_cmyk_rgb_convert( + unsigned char k, + unsigned char cmy) { + // Inspired from Pillow: + // https://github.com/python-pillow/Pillow/blob/07623d1a7cc65206a5355fba2ae256550bfcaba6/src/libImaging/Convert.c#L568-L569 + int v = k * cmy + 128; + v = ((v >> 8) + v) >> 8; + return std::clamp(k - v, 0, 255); +} + +void convert_line_cmyk_to_rgb( + j_decompress_ptr cinfo, + const unsigned char* cmyk_line, + unsigned char* rgb_line) { + int width = cinfo->output_width; + for (int i = 0; i < width; ++i) { + int c = cmyk_line[i * 4 + 0]; + int m = cmyk_line[i * 4 + 1]; + int y = cmyk_line[i * 4 + 2]; + int k = cmyk_line[i * 4 + 3]; + + rgb_line[i * 3 + 0] = clamped_cmyk_rgb_convert(k, 255 - c); + rgb_line[i * 3 + 1] = clamped_cmyk_rgb_convert(k, 255 - m); + rgb_line[i * 3 + 2] = clamped_cmyk_rgb_convert(k, 255 - y); + } +} + +inline unsigned char rgb_to_gray(int r, int g, int b) { + // Inspired from Pillow: + // https://github.com/python-pillow/Pillow/blob/07623d1a7cc65206a5355fba2ae256550bfcaba6/src/libImaging/Convert.c#L226 + return (r * 19595 + g * 38470 + b * 7471 + 0x8000) >> 16; +} + +void convert_line_cmyk_to_gray( + j_decompress_ptr cinfo, + const unsigned char* cmyk_line, + unsigned char* gray_line) { + int width = cinfo->output_width; + for (int i = 0; i < width; ++i) { + int c = cmyk_line[i * 4 + 0]; + int m = cmyk_line[i * 4 + 1]; + int y = cmyk_line[i * 4 + 2]; + int k = cmyk_line[i * 4 + 3]; + + int r = clamped_cmyk_rgb_convert(k, 255 - c); + int g = clamped_cmyk_rgb_convert(k, 255 - m); + int b = clamped_cmyk_rgb_convert(k, 255 - y); + + gray_line[i] = rgb_to_gray(r, g, b); + } +} + +} // namespace + +torch::Tensor decode_jpeg( + const torch::Tensor& data, + ImageReadMode mode, + bool apply_exif_orientation) { + C10_LOG_API_USAGE_ONCE( + "torchvision.csrc.io.image.cpu.decode_jpeg.decode_jpeg"); + + validate_encoded_data(data); + + struct jpeg_decompress_struct cinfo; + struct torch_jpeg_error_mgr jerr; + + auto datap = data.data_ptr(); + // Setup decompression structure + cinfo.err = jpeg_std_error(&jerr.pub); + jerr.pub.error_exit = torch_jpeg_error_exit; + /* Establish the setjmp return context for my_error_exit to use. */ + if (setjmp(jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. + * We need to clean up the JPEG object. + */ + jpeg_destroy_decompress(&cinfo); + TORCH_CHECK(false, jerr.jpegLastErrorMsg); + } + + jpeg_create_decompress(&cinfo); + torch_jpeg_set_source_mgr(&cinfo, datap, data.numel()); + + // read info from header. + jpeg_read_header(&cinfo, TRUE); + + int channels = cinfo.num_components; + bool cmyk_to_rgb_or_gray = false; + + if (mode != IMAGE_READ_MODE_UNCHANGED) { + switch (mode) { + case IMAGE_READ_MODE_GRAY: + if (cinfo.jpeg_color_space == JCS_CMYK || + cinfo.jpeg_color_space == JCS_YCCK) { + cinfo.out_color_space = JCS_CMYK; + cmyk_to_rgb_or_gray = true; + } else { + cinfo.out_color_space = JCS_GRAYSCALE; + } + channels = 1; + break; + case IMAGE_READ_MODE_RGB: + if (cinfo.jpeg_color_space == JCS_CMYK || + cinfo.jpeg_color_space == JCS_YCCK) { + cinfo.out_color_space = JCS_CMYK; + cmyk_to_rgb_or_gray = true; + } else { + cinfo.out_color_space = JCS_RGB; + } + channels = 3; + break; + /* + * Libjpeg does not support converting from CMYK to grayscale etc. There + * is a way to do this but it involves converting it manually to RGB: + * https://github.com/tensorflow/tensorflow/blob/86871065265b04e0db8ca360c046421efb2bdeb4/tensorflow/core/lib/jpeg/jpeg_mem.cc#L284-L313 + */ + default: + jpeg_destroy_decompress(&cinfo); + TORCH_CHECK(false, "The provided mode is not supported for JPEG files"); + } + + jpeg_calc_output_dimensions(&cinfo); + } + + int exif_orientation = -1; + if (apply_exif_orientation) { + exif_orientation = fetch_jpeg_exif_orientation(&cinfo); + } + + jpeg_start_decompress(&cinfo); + + int height = cinfo.output_height; + int width = cinfo.output_width; + + int stride = width * channels; + auto tensor = + torch::empty({int64_t(height), int64_t(width), channels}, torch::kU8); + auto ptr = tensor.data_ptr(); + torch::Tensor cmyk_line_tensor; + if (cmyk_to_rgb_or_gray) { + cmyk_line_tensor = torch::empty({int64_t(width), 4}, torch::kU8); + } + + while (cinfo.output_scanline < cinfo.output_height) { + /* jpeg_read_scanlines expects an array of pointers to scanlines. + * Here the array is only one element long, but you could ask for + * more than one scanline at a time if that's more convenient. + */ + if (cmyk_to_rgb_or_gray) { + auto cmyk_line_ptr = cmyk_line_tensor.data_ptr(); + jpeg_read_scanlines(&cinfo, &cmyk_line_ptr, 1); + + if (channels == 3) { + convert_line_cmyk_to_rgb(&cinfo, cmyk_line_ptr, ptr); + } else if (channels == 1) { + convert_line_cmyk_to_gray(&cinfo, cmyk_line_ptr, ptr); + } + } else { + jpeg_read_scanlines(&cinfo, &ptr, 1); + } + ptr += stride; + } + + jpeg_finish_decompress(&cinfo); + jpeg_destroy_decompress(&cinfo); + auto output = tensor.permute({2, 0, 1}); + + if (apply_exif_orientation) { + return exif_orientation_transform(output, exif_orientation); + } + return output; +} +#endif // #if !JPEG_FOUND + +int64_t _jpeg_version() { +#if JPEG_FOUND + return JPEG_LIB_VERSION; +#else + return -1; +#endif +} + +bool _is_compiled_against_turbo() { +#ifdef LIBJPEG_TURBO_VERSION + return true; +#else + return false; +#endif +} + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cpu/decode_jpeg.h b/torchvision/csrc/io/image/cpu/decode_jpeg.h new file mode 100644 index 00000000000..7412a46d2ea --- /dev/null +++ b/torchvision/csrc/io/image/cpu/decode_jpeg.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include "../common.h" + +namespace vision { +namespace image { + +C10_EXPORT torch::Tensor decode_jpeg( + const torch::Tensor& data, + ImageReadMode mode = IMAGE_READ_MODE_UNCHANGED, + bool apply_exif_orientation = false); + +C10_EXPORT int64_t _jpeg_version(); +C10_EXPORT bool _is_compiled_against_turbo(); + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cpu/decode_png.cpp b/torchvision/csrc/io/image/cpu/decode_png.cpp new file mode 100644 index 00000000000..ede14c1e94a --- /dev/null +++ b/torchvision/csrc/io/image/cpu/decode_png.cpp @@ -0,0 +1,229 @@ +#include "decode_png.h" +#include "../common.h" +#include "common_png.h" +#include "exif.h" + +namespace vision { +namespace image { + +using namespace exif_private; + +#if !PNG_FOUND +torch::Tensor decode_png( + const torch::Tensor& data, + ImageReadMode mode, + bool apply_exif_orientation) { + TORCH_CHECK( + false, "decode_png: torchvision not compiled with libPNG support"); +} +#else + +bool is_little_endian() { + uint32_t x = 1; + return *(uint8_t*)&x; +} + +torch::Tensor decode_png( + const torch::Tensor& data, + ImageReadMode mode, + bool apply_exif_orientation) { + C10_LOG_API_USAGE_ONCE("torchvision.csrc.io.image.cpu.decode_png.decode_png"); + + validate_encoded_data(data); + + auto png_ptr = + png_create_read_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr); + TORCH_CHECK(png_ptr, "libpng read structure allocation failed!") + auto info_ptr = png_create_info_struct(png_ptr); + if (!info_ptr) { + png_destroy_read_struct(&png_ptr, nullptr, nullptr); + // Seems redundant with the if statement. done here to avoid leaking memory. + TORCH_CHECK(info_ptr, "libpng info structure allocation failed!") + } + + auto accessor = data.accessor(); + auto datap = accessor.data(); + auto datap_len = accessor.size(0); + + if (setjmp(png_jmpbuf(png_ptr)) != 0) { + png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); + TORCH_CHECK(false, "Internal error."); + } + TORCH_CHECK(datap_len >= 8, "Content is too small for png!") + auto is_png = !png_sig_cmp(datap, 0, 8); + TORCH_CHECK(is_png, "Content is not png!") + + struct Reader { + png_const_bytep ptr; + png_size_t count; + } reader; + reader.ptr = png_const_bytep(datap) + 8; + reader.count = datap_len - 8; + + auto read_callback = [](png_structp png_ptr, + png_bytep output, + png_size_t bytes) { + auto reader = static_cast(png_get_io_ptr(png_ptr)); + TORCH_CHECK( + reader->count >= bytes, + "Out of bound read in decode_png. Probably, the input image is corrupted"); + std::copy(reader->ptr, reader->ptr + bytes, output); + reader->ptr += bytes; + reader->count -= bytes; + }; + png_set_sig_bytes(png_ptr, 8); + png_set_read_fn(png_ptr, &reader, read_callback); + png_read_info(png_ptr, info_ptr); + + png_uint_32 width, height; + int bit_depth, color_type; + int interlace_type; + auto retval = png_get_IHDR( + png_ptr, + info_ptr, + &width, + &height, + &bit_depth, + &color_type, + &interlace_type, + nullptr, + nullptr); + + if (retval != 1) { + png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); + TORCH_CHECK(retval == 1, "Could read image metadata from content.") + } + + if (bit_depth > 8 && bit_depth != 16) { + png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); + TORCH_CHECK( + false, + "bit depth of png image is " + std::to_string(bit_depth) + + ". Only <=8 and 16 are supported.") + } + + int channels = png_get_channels(png_ptr, info_ptr); + + if (color_type == PNG_COLOR_TYPE_GRAY && bit_depth < 8) + png_set_expand_gray_1_2_4_to_8(png_ptr); + + int number_of_passes; + if (interlace_type == PNG_INTERLACE_ADAM7) { + number_of_passes = png_set_interlace_handling(png_ptr); + } else { + number_of_passes = 1; + } + + if (mode != IMAGE_READ_MODE_UNCHANGED) { + // TODO: consider supporting PNG_INFO_tRNS + bool is_palette = (color_type & PNG_COLOR_MASK_PALETTE) != 0; + bool has_color = (color_type & PNG_COLOR_MASK_COLOR) != 0; + bool has_alpha = (color_type & PNG_COLOR_MASK_ALPHA) != 0; + + switch (mode) { + case IMAGE_READ_MODE_GRAY: + if (color_type != PNG_COLOR_TYPE_GRAY) { + if (is_palette) { + png_set_palette_to_rgb(png_ptr); + has_alpha = true; + } + + if (has_alpha) { + png_set_strip_alpha(png_ptr); + } + + if (has_color) { + png_set_rgb_to_gray(png_ptr, 1, 0.2989, 0.587); + } + channels = 1; + } + break; + case IMAGE_READ_MODE_GRAY_ALPHA: + if (color_type != PNG_COLOR_TYPE_GRAY_ALPHA) { + if (is_palette) { + png_set_palette_to_rgb(png_ptr); + has_alpha = true; + } + + if (!has_alpha) { + png_set_add_alpha(png_ptr, (1 << bit_depth) - 1, PNG_FILLER_AFTER); + } + + if (has_color) { + png_set_rgb_to_gray(png_ptr, 1, 0.2989, 0.587); + } + channels = 2; + } + break; + case IMAGE_READ_MODE_RGB: + if (color_type != PNG_COLOR_TYPE_RGB) { + if (is_palette) { + png_set_palette_to_rgb(png_ptr); + has_alpha = true; + } else if (!has_color) { + png_set_gray_to_rgb(png_ptr); + } + + if (has_alpha) { + png_set_strip_alpha(png_ptr); + } + channels = 3; + } + break; + case IMAGE_READ_MODE_RGB_ALPHA: + if (color_type != PNG_COLOR_TYPE_RGB_ALPHA) { + if (is_palette) { + png_set_palette_to_rgb(png_ptr); + has_alpha = true; + } else if (!has_color) { + png_set_gray_to_rgb(png_ptr); + } + + if (!has_alpha) { + png_set_add_alpha(png_ptr, (1 << bit_depth) - 1, PNG_FILLER_AFTER); + } + channels = 4; + } + break; + default: + png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); + TORCH_CHECK(false, "The provided mode is not supported for PNG files"); + } + + png_read_update_info(png_ptr, info_ptr); + } + + auto num_pixels_per_row = width * channels; + auto is_16_bits = bit_depth == 16; + auto tensor = torch::empty( + {int64_t(height), int64_t(width), channels}, + is_16_bits ? at::kUInt16 : torch::kU8); + if (is_little_endian()) { + png_set_swap(png_ptr); + } + auto t_ptr = (uint8_t*)tensor.data_ptr(); + for (int pass = 0; pass < number_of_passes; pass++) { + for (png_uint_32 i = 0; i < height; ++i) { + png_read_row(png_ptr, t_ptr, nullptr); + t_ptr += num_pixels_per_row * (is_16_bits ? 2 : 1); + } + t_ptr = (uint8_t*)tensor.data_ptr(); + } + + int exif_orientation = -1; + if (apply_exif_orientation) { + exif_orientation = fetch_png_exif_orientation(png_ptr, info_ptr); + } + + png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); + + auto output = tensor.permute({2, 0, 1}); + if (apply_exif_orientation) { + return exif_orientation_transform(output, exif_orientation); + } + return output; +} +#endif + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cpu/decode_png.h b/torchvision/csrc/io/image/cpu/decode_png.h new file mode 100644 index 00000000000..faaffa7ae49 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/decode_png.h @@ -0,0 +1,15 @@ +#pragma once + +#include +#include "../common.h" + +namespace vision { +namespace image { + +C10_EXPORT torch::Tensor decode_png( + const torch::Tensor& data, + ImageReadMode mode = IMAGE_READ_MODE_UNCHANGED, + bool apply_exif_orientation = false); + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cpu/decode_webp.cpp b/torchvision/csrc/io/image/cpu/decode_webp.cpp new file mode 100644 index 00000000000..4c13c5c2b1a --- /dev/null +++ b/torchvision/csrc/io/image/cpu/decode_webp.cpp @@ -0,0 +1,60 @@ +#include "decode_webp.h" +#include "../common.h" + +#if WEBP_FOUND +#include "webp/decode.h" +#include "webp/types.h" +#endif // WEBP_FOUND + +namespace vision { +namespace image { + +#if !WEBP_FOUND +torch::Tensor decode_webp( + const torch::Tensor& encoded_data, + ImageReadMode mode) { + TORCH_CHECK( + false, "decode_webp: torchvision not compiled with libwebp support"); +} +#else + +torch::Tensor decode_webp( + const torch::Tensor& encoded_data, + ImageReadMode mode) { + validate_encoded_data(encoded_data); + + auto encoded_data_p = encoded_data.data_ptr(); + auto encoded_data_size = encoded_data.numel(); + + WebPBitstreamFeatures features; + auto res = WebPGetFeatures(encoded_data_p, encoded_data_size, &features); + TORCH_CHECK( + res == VP8_STATUS_OK, "WebPGetFeatures failed with error code ", res); + TORCH_CHECK( + !features.has_animation, "Animated webp files are not supported."); + + auto return_rgb = + should_this_return_rgb_or_rgba_let_me_know_in_the_comments_down_below_guys_see_you_in_the_next_video( + mode, features.has_alpha); + + auto decoding_func = return_rgb ? WebPDecodeRGB : WebPDecodeRGBA; + auto num_channels = return_rgb ? 3 : 4; + + int width = 0; + int height = 0; + + auto decoded_data = + decoding_func(encoded_data_p, encoded_data_size, &width, &height); + + TORCH_CHECK(decoded_data != nullptr, "WebPDecodeRGB[A] failed."); + + auto deleter = [decoded_data](void*) { WebPFree(decoded_data); }; + auto out = torch::from_blob( + decoded_data, {height, width, num_channels}, deleter, torch::kUInt8); + + return out.permute({2, 0, 1}); +} +#endif // WEBP_FOUND + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cpu/decode_webp.h b/torchvision/csrc/io/image/cpu/decode_webp.h new file mode 100644 index 00000000000..d5c81547c42 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/decode_webp.h @@ -0,0 +1,14 @@ +#pragma once + +#include +#include "../common.h" + +namespace vision { +namespace image { + +C10_EXPORT torch::Tensor decode_webp( + const torch::Tensor& encoded_data, + ImageReadMode mode = IMAGE_READ_MODE_UNCHANGED); + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cpu/encode_jpeg.cpp b/torchvision/csrc/io/image/cpu/encode_jpeg.cpp new file mode 100644 index 00000000000..d2ed73071a2 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/encode_jpeg.cpp @@ -0,0 +1,113 @@ +#include "encode_jpeg.h" + +#include "common_jpeg.h" + +namespace vision { +namespace image { + +#if !JPEG_FOUND + +torch::Tensor encode_jpeg(const torch::Tensor& data, int64_t quality) { + TORCH_CHECK( + false, "encode_jpeg: torchvision not compiled with libjpeg support"); +} + +#else +// For libjpeg version <= 9b, the out_size parameter in jpeg_mem_dest() is +// defined as unsigned long, whereas in later version, it is defined as size_t. +#if !defined(JPEG_LIB_VERSION_MAJOR) || JPEG_LIB_VERSION_MAJOR < 9 || \ + (JPEG_LIB_VERSION_MAJOR == 9 && JPEG_LIB_VERSION_MINOR <= 2) +using JpegSizeType = unsigned long; +#else +using JpegSizeType = size_t; +#endif + +using namespace detail; + +torch::Tensor encode_jpeg(const torch::Tensor& data, int64_t quality) { + C10_LOG_API_USAGE_ONCE( + "torchvision.csrc.io.image.cpu.encode_jpeg.encode_jpeg"); + // Define compression structures and error handling + struct jpeg_compress_struct cinfo {}; + struct torch_jpeg_error_mgr jerr {}; + + // Define buffer to write JPEG information to and its size + JpegSizeType jpegSize = 0; + uint8_t* jpegBuf = nullptr; + + cinfo.err = jpeg_std_error(&jerr.pub); + jerr.pub.error_exit = torch_jpeg_error_exit; + + /* Establish the setjmp return context for my_error_exit to use. */ + if (setjmp(jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. + * We need to clean up the JPEG object and the buffer. + */ + jpeg_destroy_compress(&cinfo); + if (jpegBuf != nullptr) { + free(jpegBuf); + } + + TORCH_CHECK(false, (const char*)jerr.jpegLastErrorMsg); + } + + // Check that the input tensor is on CPU + TORCH_CHECK(data.device() == torch::kCPU, "Input tensor should be on CPU"); + + // Check that the input tensor dtype is uint8 + TORCH_CHECK(data.dtype() == torch::kU8, "Input tensor dtype should be uint8"); + + // Check that the input tensor is 3-dimensional + TORCH_CHECK(data.dim() == 3, "Input data should be a 3-dimensional tensor"); + + // Get image info + int channels = data.size(0); + int height = data.size(1); + int width = data.size(2); + auto input = data.permute({1, 2, 0}).contiguous(); + + TORCH_CHECK( + channels == 1 || channels == 3, + "The number of channels should be 1 or 3, got: ", + channels); + + // Initialize JPEG structure + jpeg_create_compress(&cinfo); + + // Set output image information + cinfo.image_width = width; + cinfo.image_height = height; + cinfo.input_components = channels; + cinfo.in_color_space = channels == 1 ? JCS_GRAYSCALE : JCS_RGB; + + jpeg_set_defaults(&cinfo); + jpeg_set_quality(&cinfo, quality, TRUE); + + // Save JPEG output to a buffer + jpeg_mem_dest(&cinfo, &jpegBuf, &jpegSize); + + // Start JPEG compression + jpeg_start_compress(&cinfo, TRUE); + + auto stride = width * channels; + auto ptr = input.data_ptr(); + + // Encode JPEG file + while (cinfo.next_scanline < cinfo.image_height) { + jpeg_write_scanlines(&cinfo, &ptr, 1); + ptr += stride; + } + + jpeg_finish_compress(&cinfo); + jpeg_destroy_compress(&cinfo); + + torch::TensorOptions options = torch::TensorOptions{torch::kU8}; + auto out_tensor = + torch::from_blob(jpegBuf, {(long)jpegSize}, ::free, options); + jpegBuf = nullptr; + return out_tensor; +} +#endif + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cpu/encode_jpeg.h b/torchvision/csrc/io/image/cpu/encode_jpeg.h new file mode 100644 index 00000000000..25084e154d6 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/encode_jpeg.h @@ -0,0 +1,13 @@ +#pragma once + +#include + +namespace vision { +namespace image { + +C10_EXPORT torch::Tensor encode_jpeg( + const torch::Tensor& data, + int64_t quality); + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cpu/encode_png.cpp b/torchvision/csrc/io/image/cpu/encode_png.cpp new file mode 100644 index 00000000000..5596d3a6789 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/encode_png.cpp @@ -0,0 +1,180 @@ +#include "encode_jpeg.h" + +#include "common_png.h" + +namespace vision { +namespace image { + +#if !PNG_FOUND + +torch::Tensor encode_png(const torch::Tensor& data, int64_t compression_level) { + TORCH_CHECK( + false, "encode_png: torchvision not compiled with libpng support"); +} + +#else + +namespace { + +struct torch_mem_encode { + char* buffer; + size_t size; +}; + +struct torch_png_error_mgr { + const char* pngLastErrorMsg; /* error messages */ + jmp_buf setjmp_buffer; /* for return to caller */ +}; + +using torch_png_error_mgr_ptr = torch_png_error_mgr*; + +void torch_png_error(png_structp png_ptr, png_const_charp error_msg) { + /* png_ptr->err really points to a torch_png_error_mgr struct, so coerce + * pointer */ + auto error_ptr = (torch_png_error_mgr_ptr)png_get_error_ptr(png_ptr); + /* Replace the error message on the error structure */ + error_ptr->pngLastErrorMsg = error_msg; + /* Return control to the setjmp point */ + longjmp(error_ptr->setjmp_buffer, 1); +} + +void torch_png_write_data( + png_structp png_ptr, + png_bytep data, + png_size_t length) { + struct torch_mem_encode* p = + (struct torch_mem_encode*)png_get_io_ptr(png_ptr); + size_t nsize = p->size + length; + + /* allocate or grow buffer */ + if (p->buffer) + p->buffer = (char*)realloc(p->buffer, nsize); + else + p->buffer = (char*)malloc(nsize); + + if (!p->buffer) + png_error(png_ptr, "Write Error"); + + /* copy new bytes to end of buffer */ + memcpy(p->buffer + p->size, data, length); + p->size += length; +} + +} // namespace + +torch::Tensor encode_png(const torch::Tensor& data, int64_t compression_level) { + C10_LOG_API_USAGE_ONCE("torchvision.csrc.io.image.cpu.encode_png.encode_png"); + // Define compression structures and error handling + png_structp png_write; + png_infop info_ptr; + struct torch_png_error_mgr err_ptr; + + // Define output buffer + struct torch_mem_encode buf_info; + buf_info.buffer = nullptr; + buf_info.size = 0; + + /* Establish the setjmp return context for my_error_exit to use. */ + if (setjmp(err_ptr.setjmp_buffer)) { + /* If we get here, the PNG code has signaled an error. + * We need to clean up the PNG object and the buffer. + */ + if (info_ptr != nullptr) { + png_destroy_info_struct(png_write, &info_ptr); + } + + if (png_write != nullptr) { + png_destroy_write_struct(&png_write, nullptr); + } + + if (buf_info.buffer != nullptr) { + free(buf_info.buffer); + } + + TORCH_CHECK(false, err_ptr.pngLastErrorMsg); + } + + // Check that the compression level is between 0 and 9 + TORCH_CHECK( + compression_level >= 0 && compression_level <= 9, + "Compression level should be between 0 and 9"); + + // Check that the input tensor is on CPU + TORCH_CHECK(data.device() == torch::kCPU, "Input tensor should be on CPU"); + + // Check that the input tensor dtype is uint8 + TORCH_CHECK(data.dtype() == torch::kU8, "Input tensor dtype should be uint8"); + + // Check that the input tensor is 3-dimensional + TORCH_CHECK(data.dim() == 3, "Input data should be a 3-dimensional tensor"); + + // Get image info + int channels = data.size(0); + int height = data.size(1); + int width = data.size(2); + auto input = data.permute({1, 2, 0}).contiguous(); + + TORCH_CHECK( + channels == 1 || channels == 3, + "The number of channels should be 1 or 3, got: ", + channels); + + // Initialize PNG structures + png_write = png_create_write_struct( + PNG_LIBPNG_VER_STRING, &err_ptr, torch_png_error, nullptr); + + info_ptr = png_create_info_struct(png_write); + + // Define custom buffer output + png_set_write_fn(png_write, &buf_info, torch_png_write_data, nullptr); + + // Set output image information + auto color_type = channels == 1 ? PNG_COLOR_TYPE_GRAY : PNG_COLOR_TYPE_RGB; + png_set_IHDR( + png_write, + info_ptr, + width, + height, + 8, + color_type, + PNG_INTERLACE_NONE, + PNG_COMPRESSION_TYPE_DEFAULT, + PNG_FILTER_TYPE_DEFAULT); + + // Set image compression level + png_set_compression_level(png_write, compression_level); + + // Write file header + png_write_info(png_write, info_ptr); + + auto stride = width * channels; + auto ptr = input.data_ptr(); + + // Encode PNG file + for (int y = 0; y < height; ++y) { + png_write_row(png_write, ptr); + ptr += stride; + } + + // Write EOF + png_write_end(png_write, info_ptr); + + // Destroy structures + png_destroy_write_struct(&png_write, &info_ptr); + + torch::TensorOptions options = torch::TensorOptions{torch::kU8}; + auto outTensor = torch::empty({(long)buf_info.size}, options); + + // Copy memory from png buffer, since torch cannot get ownership of it via + // `from_blob` + auto outPtr = outTensor.data_ptr(); + std::memcpy(outPtr, buf_info.buffer, sizeof(uint8_t) * outTensor.numel()); + free(buf_info.buffer); + + return outTensor; +} + +#endif + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cpu/encode_png.h b/torchvision/csrc/io/image/cpu/encode_png.h new file mode 100644 index 00000000000..86a67c8706e --- /dev/null +++ b/torchvision/csrc/io/image/cpu/encode_png.h @@ -0,0 +1,13 @@ +#pragma once + +#include + +namespace vision { +namespace image { + +C10_EXPORT torch::Tensor encode_png( + const torch::Tensor& data, + int64_t compression_level); + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cpu/exif.h b/torchvision/csrc/io/image/cpu/exif.h new file mode 100644 index 00000000000..7680737f8c0 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/exif.h @@ -0,0 +1,257 @@ +// @nolint (improperly imported third-party code) +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this +license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without +modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright +notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote +products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" +and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are +disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any +direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ +#pragma once +// Functions in this module are taken from OpenCV +// https://github.com/opencv/opencv/blob/097891e311fae1d8354eb092a0fd0171e630d78c/modules/imgcodecs/src/exif.cpp + +#if JPEG_FOUND +#include +#endif +#if PNG_FOUND +#include +#endif + +#include + +namespace vision { +namespace image { +namespace exif_private { + +constexpr uint16_t APP1 = 0xe1; +constexpr uint16_t ENDIANNESS_INTEL = 0x49; +constexpr uint16_t ENDIANNESS_MOTO = 0x4d; +constexpr uint16_t REQ_EXIF_TAG_MARK = 0x2a; +constexpr uint16_t ORIENTATION_EXIF_TAG = 0x0112; +constexpr uint16_t INCORRECT_TAG = -1; + +class ExifDataReader { + public: + ExifDataReader(unsigned char* p, size_t s) : _ptr(p), _size(s) {} + size_t size() const { + return _size; + } + const unsigned char& operator[](size_t index) const { + TORCH_CHECK(index >= 0 && index < _size); + return _ptr[index]; + } + + protected: + unsigned char* _ptr; + size_t _size; +}; + +inline uint16_t get_endianness(const ExifDataReader& exif_data) { + if ((exif_data.size() < 1) || + (exif_data.size() > 1 && exif_data[0] != exif_data[1])) { + return 0; + } + if (exif_data[0] == 'I') { + return ENDIANNESS_INTEL; + } + if (exif_data[0] == 'M') { + return ENDIANNESS_MOTO; + } + return 0; +} + +inline uint16_t get_uint16( + const ExifDataReader& exif_data, + uint16_t endianness, + const size_t offset) { + if (offset + 1 >= exif_data.size()) { + return INCORRECT_TAG; + } + + if (endianness == ENDIANNESS_INTEL) { + return exif_data[offset] + (exif_data[offset + 1] << 8); + } + return (exif_data[offset] << 8) + exif_data[offset + 1]; +} + +inline uint32_t get_uint32( + const ExifDataReader& exif_data, + uint16_t endianness, + const size_t offset) { + if (offset + 3 >= exif_data.size()) { + return INCORRECT_TAG; + } + + if (endianness == ENDIANNESS_INTEL) { + return exif_data[offset] + (exif_data[offset + 1] << 8) + + (exif_data[offset + 2] << 16) + (exif_data[offset + 3] << 24); + } + return (exif_data[offset] << 24) + (exif_data[offset + 1] << 16) + + (exif_data[offset + 2] << 8) + exif_data[offset + 3]; +} + +inline int fetch_exif_orientation(unsigned char* exif_data_ptr, size_t size) { + int exif_orientation = -1; + + // Exif binary structure looks like this + // First 6 bytes: [E, x, i, f, 0, 0] + // Endianness, 2 bytes : [M, M] or [I, I] + // Tag mark, 2 bytes: [0, 0x2a] + // Offset, 4 bytes + // Num entries, 2 bytes + // Tag entries and data, tag has 2 bytes and its data has 10 bytes + // For more details: + // http://www.media.mit.edu/pia/Research/deepview/exif.html + + ExifDataReader exif_data(exif_data_ptr, size); + auto endianness = get_endianness(exif_data); + + // Checking whether Tag Mark (0x002A) correspond to one contained in the + // Jpeg file + uint16_t tag_mark = get_uint16(exif_data, endianness, 2); + if (tag_mark == REQ_EXIF_TAG_MARK) { + auto offset = get_uint32(exif_data, endianness, 4); + size_t num_entry = get_uint16(exif_data, endianness, offset); + offset += 2; // go to start of tag fields + constexpr size_t tiff_field_size = 12; + for (size_t entry = 0; entry < num_entry; entry++) { + // Here we just search for orientation tag and parse it + auto tag_num = get_uint16(exif_data, endianness, offset); + if (tag_num == INCORRECT_TAG) { + break; + } + if (tag_num == ORIENTATION_EXIF_TAG) { + exif_orientation = get_uint16(exif_data, endianness, offset + 8); + break; + } + offset += tiff_field_size; + } + } + return exif_orientation; +} + +#if JPEG_FOUND +inline int fetch_jpeg_exif_orientation(j_decompress_ptr cinfo) { + // Check for Exif marker APP1 + jpeg_saved_marker_ptr exif_marker = 0; + jpeg_saved_marker_ptr cmarker = cinfo->marker_list; + while (cmarker && exif_marker == 0) { + if (cmarker->marker == APP1) { + exif_marker = cmarker; + } + cmarker = cmarker->next; + } + + if (!exif_marker) { + return -1; + } + + constexpr size_t start_offset = 6; + if (exif_marker->data_length <= start_offset) { + return -1; + } + + auto* exif_data_ptr = exif_marker->data + start_offset; + auto size = exif_marker->data_length - start_offset; + + return fetch_exif_orientation(exif_data_ptr, size); +} +#endif // #if JPEG_FOUND + +#if PNG_FOUND && defined(PNG_eXIf_SUPPORTED) +inline int fetch_png_exif_orientation(png_structp png_ptr, png_infop info_ptr) { + png_uint_32 num_exif = 0; + png_bytep exif = 0; + + // Exif info could be in info_ptr + if (png_get_valid(png_ptr, info_ptr, PNG_INFO_eXIf)) { + png_get_eXIf_1(png_ptr, info_ptr, &num_exif, &exif); + } + + if (exif && num_exif > 0) { + return fetch_exif_orientation(exif, num_exif); + } + return -1; +} +#endif // #if PNG_FOUND && defined(PNG_eXIf_SUPPORTED) + +constexpr uint16_t IMAGE_ORIENTATION_TL = 1; // normal orientation +constexpr uint16_t IMAGE_ORIENTATION_TR = 2; // needs horizontal flip +constexpr uint16_t IMAGE_ORIENTATION_BR = 3; // needs 180 rotation +constexpr uint16_t IMAGE_ORIENTATION_BL = 4; // needs vertical flip +constexpr uint16_t IMAGE_ORIENTATION_LT = + 5; // mirrored horizontal & rotate 270 CW +constexpr uint16_t IMAGE_ORIENTATION_RT = 6; // rotate 90 CW +constexpr uint16_t IMAGE_ORIENTATION_RB = + 7; // mirrored horizontal & rotate 90 CW +constexpr uint16_t IMAGE_ORIENTATION_LB = 8; // needs 270 CW rotation + +inline torch::Tensor exif_orientation_transform( + const torch::Tensor& image, + int orientation) { + if (orientation == IMAGE_ORIENTATION_TL) { + return image; + } else if (orientation == IMAGE_ORIENTATION_TR) { + return image.flip(-1); + } else if (orientation == IMAGE_ORIENTATION_BR) { + // needs 180 rotation equivalent to + // flip both horizontally and vertically + return image.flip({-2, -1}); + } else if (orientation == IMAGE_ORIENTATION_BL) { + return image.flip(-2); + } else if (orientation == IMAGE_ORIENTATION_LT) { + return image.transpose(-1, -2); + } else if (orientation == IMAGE_ORIENTATION_RT) { + return image.transpose(-1, -2).flip(-1); + } else if (orientation == IMAGE_ORIENTATION_RB) { + return image.transpose(-1, -2).flip({-2, -1}); + } else if (orientation == IMAGE_ORIENTATION_LB) { + return image.transpose(-1, -2).flip(-2); + } + return image; +} + +} // namespace exif_private +} // namespace image +} // namespace vision diff --git a/travis-scripts/run-clang-format/LICENSE b/torchvision/csrc/io/image/cpu/giflib/README similarity index 59% rename from travis-scripts/run-clang-format/LICENSE rename to torchvision/csrc/io/image/cpu/giflib/README index e728f248889..7353453e32e 100644 --- a/travis-scripts/run-clang-format/LICENSE +++ b/torchvision/csrc/io/image/cpu/giflib/README @@ -1,6 +1,13 @@ -MIT License +These files come from the GIFLIB project (https://giflib.sourceforge.net/) and +are licensed under the MIT license. -Copyright (c) 2017 Guillaume Papin +Some modifications have been made to the original files: +- Remove use of "register" keyword in gifalloc.c for C++17 compatibility. +- Declare loop variable i in DGifGetImageHeader as int instead of unsigned int. + +Below is the original license text from the COPYING file of the GIFLIB project: + += MIT LICENSE Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -9,13 +16,13 @@ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/torchvision/csrc/io/image/cpu/giflib/dgif_lib.c b/torchvision/csrc/io/image/cpu/giflib/dgif_lib.c new file mode 100644 index 00000000000..7d35fff87ee --- /dev/null +++ b/torchvision/csrc/io/image/cpu/giflib/dgif_lib.c @@ -0,0 +1,1313 @@ +// @nolint (improperly imported third-party code) +/****************************************************************************** + +dgif_lib.c - GIF decoding + +The functions here and in egif_lib.c are partitioned carefully so that +if you only require one of read and write capability, only one of these +two modules will be linked. Preserve this property! + +*****************************************************************************/ +// SPDX-License-Identifier: MIT +// SPDX-FileCopyrightText: Copyright (C) Eric S. Raymond + +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#else +#include +#endif /* _WIN32 */ + +#include "gif_lib.h" +#include "gif_lib_private.h" + +/* compose unsigned little endian value */ +#define UNSIGNED_LITTLE_ENDIAN(lo, hi) ((lo) | ((hi) << 8)) + +/* avoid extra function call in case we use fread (TVT) */ +static int InternalRead(GifFileType *gif, GifByteType *buf, int len) { + // fprintf(stderr, "### Read: %d\n", len); + return (((GifFilePrivateType *)gif->Private)->Read + ? ((GifFilePrivateType *)gif->Private)->Read(gif, buf, len) + : fread(buf, 1, len, + ((GifFilePrivateType *)gif->Private)->File)); +} + +static int DGifGetWord(GifFileType *GifFile, GifWord *Word); +static int DGifSetupDecompress(GifFileType *GifFile); +static int DGifDecompressLine(GifFileType *GifFile, GifPixelType *Line, + int LineLen); +static int DGifGetPrefixChar(const GifPrefixType *Prefix, int Code, + int ClearCode); +static int DGifDecompressInput(GifFileType *GifFile, int *Code); +static int DGifBufferedInput(GifFileType *GifFile, GifByteType *Buf, + GifByteType *NextByte); + +/****************************************************************************** + Open a new GIF file for read, given by its name. + Returns dynamically allocated GifFileType pointer which serves as the GIF + info record. +******************************************************************************/ +GifFileType *DGifOpenFileName(const char *FileName, int *Error) { + int FileHandle; + GifFileType *GifFile; + + if ((FileHandle = open(FileName, O_RDONLY)) == -1) { + if (Error != NULL) { + *Error = D_GIF_ERR_OPEN_FAILED; + } + return NULL; + } + + GifFile = DGifOpenFileHandle(FileHandle, Error); + return GifFile; +} + +/****************************************************************************** + Update a new GIF file, given its file handle. + Returns dynamically allocated GifFileType pointer which serves as the GIF + info record. +******************************************************************************/ +GifFileType *DGifOpenFileHandle(int FileHandle, int *Error) { + char Buf[GIF_STAMP_LEN + 1]; + GifFileType *GifFile; + GifFilePrivateType *Private; + FILE *f; + + GifFile = (GifFileType *)malloc(sizeof(GifFileType)); + if (GifFile == NULL) { + if (Error != NULL) { + *Error = D_GIF_ERR_NOT_ENOUGH_MEM; + } + (void)close(FileHandle); + return NULL; + } + + /*@i1@*/ memset(GifFile, '\0', sizeof(GifFileType)); + + /* Belt and suspenders, in case the null pointer isn't zero */ + GifFile->SavedImages = NULL; + GifFile->SColorMap = NULL; + + Private = (GifFilePrivateType *)calloc(1, sizeof(GifFilePrivateType)); + if (Private == NULL) { + if (Error != NULL) { + *Error = D_GIF_ERR_NOT_ENOUGH_MEM; + } + (void)close(FileHandle); + free((char *)GifFile); + return NULL; + } + + /*@i1@*/ memset(Private, '\0', sizeof(GifFilePrivateType)); + +#ifdef _WIN32 + _setmode(FileHandle, O_BINARY); /* Make sure it is in binary mode. */ +#endif /* _WIN32 */ + + f = fdopen(FileHandle, "rb"); /* Make it into a stream: */ + + /*@-mustfreeonly@*/ + GifFile->Private = (void *)Private; + Private->FileHandle = FileHandle; + Private->File = f; + Private->FileState = FILE_STATE_READ; + Private->Read = NULL; /* don't use alternate input method (TVT) */ + GifFile->UserData = NULL; /* TVT */ + /*@=mustfreeonly@*/ + + /* Let's see if this is a GIF file: */ + /* coverity[check_return] */ + if (InternalRead(GifFile, (unsigned char *)Buf, GIF_STAMP_LEN) != + GIF_STAMP_LEN) { + if (Error != NULL) { + *Error = D_GIF_ERR_READ_FAILED; + } + (void)fclose(f); + free((char *)Private); + free((char *)GifFile); + return NULL; + } + + /* Check for GIF prefix at start of file */ + Buf[GIF_STAMP_LEN] = 0; + if (strncmp(GIF_STAMP, Buf, GIF_VERSION_POS) != 0) { + if (Error != NULL) { + *Error = D_GIF_ERR_NOT_GIF_FILE; + } + (void)fclose(f); + free((char *)Private); + free((char *)GifFile); + return NULL; + } + + if (DGifGetScreenDesc(GifFile) == GIF_ERROR) { + (void)fclose(f); + free((char *)Private); + free((char *)GifFile); + return NULL; + } + + GifFile->Error = 0; + + /* What version of GIF? */ + Private->gif89 = (Buf[GIF_VERSION_POS + 1] == '9'); + + return GifFile; +} + +/****************************************************************************** + GifFileType constructor with user supplied input function (TVT) +******************************************************************************/ +GifFileType *DGifOpen(void *userData, InputFunc readFunc, int *Error) { + char Buf[GIF_STAMP_LEN + 1]; + GifFileType *GifFile; + GifFilePrivateType *Private; + + GifFile = (GifFileType *)malloc(sizeof(GifFileType)); + if (GifFile == NULL) { + if (Error != NULL) { + *Error = D_GIF_ERR_NOT_ENOUGH_MEM; + } + return NULL; + } + + memset(GifFile, '\0', sizeof(GifFileType)); + + /* Belt and suspenders, in case the null pointer isn't zero */ + GifFile->SavedImages = NULL; + GifFile->SColorMap = NULL; + + Private = (GifFilePrivateType *)calloc(1, sizeof(GifFilePrivateType)); + if (!Private) { + if (Error != NULL) { + *Error = D_GIF_ERR_NOT_ENOUGH_MEM; + } + free((char *)GifFile); + return NULL; + } + /*@i1@*/ memset(Private, '\0', sizeof(GifFilePrivateType)); + + GifFile->Private = (void *)Private; + Private->FileHandle = 0; + Private->File = NULL; + Private->FileState = FILE_STATE_READ; + + Private->Read = readFunc; /* TVT */ + GifFile->UserData = userData; /* TVT */ + + /* Lets see if this is a GIF file: */ + /* coverity[check_return] */ + if (InternalRead(GifFile, (unsigned char *)Buf, GIF_STAMP_LEN) != + GIF_STAMP_LEN) { + if (Error != NULL) { + *Error = D_GIF_ERR_READ_FAILED; + } + free((char *)Private); + free((char *)GifFile); + return NULL; + } + + /* Check for GIF prefix at start of file */ + Buf[GIF_STAMP_LEN] = '\0'; + if (strncmp(GIF_STAMP, Buf, GIF_VERSION_POS) != 0) { + if (Error != NULL) { + *Error = D_GIF_ERR_NOT_GIF_FILE; + } + free((char *)Private); + free((char *)GifFile); + return NULL; + } + + if (DGifGetScreenDesc(GifFile) == GIF_ERROR) { + free((char *)Private); + free((char *)GifFile); + if (Error != NULL) { + *Error = D_GIF_ERR_NO_SCRN_DSCR; + } + return NULL; + } + + GifFile->Error = 0; + + /* What version of GIF? */ + Private->gif89 = (Buf[GIF_VERSION_POS + 1] == '9'); + + return GifFile; +} + +/****************************************************************************** + This routine should be called before any other DGif calls. Note that + this routine is called automatically from DGif file open routines. +******************************************************************************/ +int DGifGetScreenDesc(GifFileType *GifFile) { + int BitsPerPixel; + bool SortFlag; + GifByteType Buf[3]; + GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private; + + if (!IS_READABLE(Private)) { + /* This file was NOT open for reading: */ + GifFile->Error = D_GIF_ERR_NOT_READABLE; + return GIF_ERROR; + } + + /* Put the screen descriptor into the file: */ + if (DGifGetWord(GifFile, &GifFile->SWidth) == GIF_ERROR || + DGifGetWord(GifFile, &GifFile->SHeight) == GIF_ERROR) { + return GIF_ERROR; + } + + if (InternalRead(GifFile, Buf, 3) != 3) { + GifFile->Error = D_GIF_ERR_READ_FAILED; + GifFreeMapObject(GifFile->SColorMap); + GifFile->SColorMap = NULL; + return GIF_ERROR; + } + GifFile->SColorResolution = (((Buf[0] & 0x70) + 1) >> 4) + 1; + SortFlag = (Buf[0] & 0x08) != 0; + BitsPerPixel = (Buf[0] & 0x07) + 1; + GifFile->SBackGroundColor = Buf[1]; + GifFile->AspectByte = Buf[2]; + if (Buf[0] & 0x80) { /* Do we have global color map? */ + int i; + + GifFile->SColorMap = GifMakeMapObject(1 << BitsPerPixel, NULL); + if (GifFile->SColorMap == NULL) { + GifFile->Error = D_GIF_ERR_NOT_ENOUGH_MEM; + return GIF_ERROR; + } + + /* Get the global color map: */ + GifFile->SColorMap->SortFlag = SortFlag; + for (i = 0; i < GifFile->SColorMap->ColorCount; i++) { + /* coverity[check_return] */ + if (InternalRead(GifFile, Buf, 3) != 3) { + GifFreeMapObject(GifFile->SColorMap); + GifFile->SColorMap = NULL; + GifFile->Error = D_GIF_ERR_READ_FAILED; + return GIF_ERROR; + } + GifFile->SColorMap->Colors[i].Red = Buf[0]; + GifFile->SColorMap->Colors[i].Green = Buf[1]; + GifFile->SColorMap->Colors[i].Blue = Buf[2]; + } + } else { + GifFile->SColorMap = NULL; + } + + /* + * No check here for whether the background color is in range for the + * screen color map. Possibly there should be. + */ + + return GIF_OK; +} + +const char *DGifGetGifVersion(GifFileType *GifFile) { + GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private; + + if (Private->gif89) { + return GIF89_STAMP; + } else { + return GIF87_STAMP; + } +} + +/****************************************************************************** + This routine should be called before any attempt to read an image. +******************************************************************************/ +int DGifGetRecordType(GifFileType *GifFile, GifRecordType *Type) { + GifByteType Buf; + GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private; + + if (!IS_READABLE(Private)) { + /* This file was NOT open for reading: */ + GifFile->Error = D_GIF_ERR_NOT_READABLE; + return GIF_ERROR; + } + + /* coverity[check_return] */ + if (InternalRead(GifFile, &Buf, 1) != 1) { + GifFile->Error = D_GIF_ERR_READ_FAILED; + return GIF_ERROR; + } + + // fprintf(stderr, "### DGifGetRecordType: %02x\n", Buf); + switch (Buf) { + case DESCRIPTOR_INTRODUCER: + *Type = IMAGE_DESC_RECORD_TYPE; + break; + case EXTENSION_INTRODUCER: + *Type = EXTENSION_RECORD_TYPE; + break; + case TERMINATOR_INTRODUCER: + *Type = TERMINATE_RECORD_TYPE; + break; + default: + *Type = UNDEFINED_RECORD_TYPE; + GifFile->Error = D_GIF_ERR_WRONG_RECORD; + return GIF_ERROR; + } + + return GIF_OK; +} + +int DGifGetImageHeader(GifFileType *GifFile) { + unsigned int BitsPerPixel; + GifByteType Buf[3]; + GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private; + + if (!IS_READABLE(Private)) { + /* This file was NOT open for reading: */ + GifFile->Error = D_GIF_ERR_NOT_READABLE; + return GIF_ERROR; + } + + if (DGifGetWord(GifFile, &GifFile->Image.Left) == GIF_ERROR || + DGifGetWord(GifFile, &GifFile->Image.Top) == GIF_ERROR || + DGifGetWord(GifFile, &GifFile->Image.Width) == GIF_ERROR || + DGifGetWord(GifFile, &GifFile->Image.Height) == GIF_ERROR) { + return GIF_ERROR; + } + if (InternalRead(GifFile, Buf, 1) != 1) { + GifFile->Error = D_GIF_ERR_READ_FAILED; + GifFreeMapObject(GifFile->Image.ColorMap); + GifFile->Image.ColorMap = NULL; + return GIF_ERROR; + } + BitsPerPixel = (Buf[0] & 0x07) + 1; + GifFile->Image.Interlace = (Buf[0] & 0x40) ? true : false; + + /* Setup the colormap */ + if (GifFile->Image.ColorMap) { + GifFreeMapObject(GifFile->Image.ColorMap); + GifFile->Image.ColorMap = NULL; + } + /* Does this image have local color map? */ + if (Buf[0] & 0x80) { + int i; + + GifFile->Image.ColorMap = + GifMakeMapObject(1 << BitsPerPixel, NULL); + if (GifFile->Image.ColorMap == NULL) { + GifFile->Error = D_GIF_ERR_NOT_ENOUGH_MEM; + return GIF_ERROR; + } + + /* Get the image local color map: */ + for (i = 0; i < GifFile->Image.ColorMap->ColorCount; i++) { + /* coverity[check_return] */ + if (InternalRead(GifFile, Buf, 3) != 3) { + GifFreeMapObject(GifFile->Image.ColorMap); + GifFile->Error = D_GIF_ERR_READ_FAILED; + GifFile->Image.ColorMap = NULL; + return GIF_ERROR; + } + GifFile->Image.ColorMap->Colors[i].Red = Buf[0]; + GifFile->Image.ColorMap->Colors[i].Green = Buf[1]; + GifFile->Image.ColorMap->Colors[i].Blue = Buf[2]; + } + } + + Private->PixelCount = + (long)GifFile->Image.Width * (long)GifFile->Image.Height; + + /* Reset decompress algorithm parameters. */ + return DGifSetupDecompress(GifFile); +} + +/****************************************************************************** + This routine should be called before any attempt to read an image. + Note it is assumed the Image desc. header has been read. +******************************************************************************/ +int DGifGetImageDesc(GifFileType *GifFile) { + GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private; + SavedImage *sp; + + if (!IS_READABLE(Private)) { + /* This file was NOT open for reading: */ + GifFile->Error = D_GIF_ERR_NOT_READABLE; + return GIF_ERROR; + } + + if (DGifGetImageHeader(GifFile) == GIF_ERROR) { + return GIF_ERROR; + } + + if (GifFile->SavedImages) { + SavedImage *new_saved_images = (SavedImage *)reallocarray( + GifFile->SavedImages, (GifFile->ImageCount + 1), + sizeof(SavedImage)); + if (new_saved_images == NULL) { + GifFile->Error = D_GIF_ERR_NOT_ENOUGH_MEM; + return GIF_ERROR; + } + GifFile->SavedImages = new_saved_images; + } else { + if ((GifFile->SavedImages = + (SavedImage *)malloc(sizeof(SavedImage))) == NULL) { + GifFile->Error = D_GIF_ERR_NOT_ENOUGH_MEM; + return GIF_ERROR; + } + } + + sp = &GifFile->SavedImages[GifFile->ImageCount]; + memcpy(&sp->ImageDesc, &GifFile->Image, sizeof(GifImageDesc)); + if (GifFile->Image.ColorMap != NULL) { + sp->ImageDesc.ColorMap = + GifMakeMapObject(GifFile->Image.ColorMap->ColorCount, + GifFile->Image.ColorMap->Colors); + if (sp->ImageDesc.ColorMap == NULL) { + GifFile->Error = D_GIF_ERR_NOT_ENOUGH_MEM; + return GIF_ERROR; + } + } + sp->RasterBits = (unsigned char *)NULL; + sp->ExtensionBlockCount = 0; + sp->ExtensionBlocks = (ExtensionBlock *)NULL; + + GifFile->ImageCount++; + + return GIF_OK; +} + +/****************************************************************************** + Get one full scanned line (Line) of length LineLen from GIF file. +******************************************************************************/ +int DGifGetLine(GifFileType *GifFile, GifPixelType *Line, int LineLen) { + GifByteType *Dummy; + GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private; + + if (!IS_READABLE(Private)) { + /* This file was NOT open for reading: */ + GifFile->Error = D_GIF_ERR_NOT_READABLE; + return GIF_ERROR; + } + + if (!LineLen) { + LineLen = GifFile->Image.Width; + } + + if ((Private->PixelCount -= LineLen) > 0xffff0000UL) { + GifFile->Error = D_GIF_ERR_DATA_TOO_BIG; + return GIF_ERROR; + } + + if (DGifDecompressLine(GifFile, Line, LineLen) == GIF_OK) { + if (Private->PixelCount == 0) { + /* We probably won't be called any more, so let's clean + * up everything before we return: need to flush out all + * the rest of image until an empty block (size 0) + * detected. We use GetCodeNext. + */ + do { + if (DGifGetCodeNext(GifFile, &Dummy) == + GIF_ERROR) { + return GIF_ERROR; + } + } while (Dummy != NULL); + } + return GIF_OK; + } else { + return GIF_ERROR; + } +} + +/****************************************************************************** + Put one pixel (Pixel) into GIF file. +******************************************************************************/ +int DGifGetPixel(GifFileType *GifFile, GifPixelType Pixel) { + GifByteType *Dummy; + GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private; + + if (!IS_READABLE(Private)) { + /* This file was NOT open for reading: */ + GifFile->Error = D_GIF_ERR_NOT_READABLE; + return GIF_ERROR; + } + if (--Private->PixelCount > 0xffff0000UL) { + GifFile->Error = D_GIF_ERR_DATA_TOO_BIG; + return GIF_ERROR; + } + + if (DGifDecompressLine(GifFile, &Pixel, 1) == GIF_OK) { + if (Private->PixelCount == 0) { + /* We probably won't be called any more, so let's clean + * up everything before we return: need to flush out all + * the rest of image until an empty block (size 0) + * detected. We use GetCodeNext. + */ + do { + if (DGifGetCodeNext(GifFile, &Dummy) == + GIF_ERROR) { + return GIF_ERROR; + } + } while (Dummy != NULL); + } + return GIF_OK; + } else { + return GIF_ERROR; + } +} + +/****************************************************************************** + Get an extension block (see GIF manual) from GIF file. This routine only + returns the first data block, and DGifGetExtensionNext should be called + after this one until NULL extension is returned. + The Extension should NOT be freed by the user (not dynamically allocated). + Note it is assumed the Extension description header has been read. +******************************************************************************/ +int DGifGetExtension(GifFileType *GifFile, int *ExtCode, + GifByteType **Extension) { + GifByteType Buf; + GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private; + + // fprintf(stderr, "### -> DGifGetExtension:\n"); + if (!IS_READABLE(Private)) { + /* This file was NOT open for reading: */ + GifFile->Error = D_GIF_ERR_NOT_READABLE; + return GIF_ERROR; + } + + /* coverity[check_return] */ + if (InternalRead(GifFile, &Buf, 1) != 1) { + GifFile->Error = D_GIF_ERR_READ_FAILED; + return GIF_ERROR; + } + *ExtCode = Buf; + // fprintf(stderr, "### <- DGifGetExtension: %02x, about to call + // next\n", Buf); + + return DGifGetExtensionNext(GifFile, Extension); +} + +/****************************************************************************** + Get a following extension block (see GIF manual) from GIF file. This + routine should be called until NULL Extension is returned. + The Extension should NOT be freed by the user (not dynamically allocated). +******************************************************************************/ +int DGifGetExtensionNext(GifFileType *GifFile, GifByteType **Extension) { + GifByteType Buf; + GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private; + + // fprintf(stderr, "### -> DGifGetExtensionNext\n"); + if (InternalRead(GifFile, &Buf, 1) != 1) { + GifFile->Error = D_GIF_ERR_READ_FAILED; + return GIF_ERROR; + } + // fprintf(stderr, "### DGifGetExtensionNext sees %d\n", Buf); + + if (Buf > 0) { + *Extension = Private->Buf; /* Use private unused buffer. */ + (*Extension)[0] = + Buf; /* Pascal strings notation (pos. 0 is len.). */ + /* coverity[tainted_data,check_return] */ + if (InternalRead(GifFile, &((*Extension)[1]), Buf) != Buf) { + GifFile->Error = D_GIF_ERR_READ_FAILED; + return GIF_ERROR; + } + } else { + *Extension = NULL; + } + // fprintf(stderr, "### <- DGifGetExtensionNext: %p\n", Extension); + + return GIF_OK; +} + +/****************************************************************************** + Extract a Graphics Control Block from raw extension data +******************************************************************************/ + +int DGifExtensionToGCB(const size_t GifExtensionLength, + const GifByteType *GifExtension, + GraphicsControlBlock *GCB) { + if (GifExtensionLength != 4) { + return GIF_ERROR; + } + + GCB->DisposalMode = (GifExtension[0] >> 2) & 0x07; + GCB->UserInputFlag = (GifExtension[0] & 0x02) != 0; + GCB->DelayTime = + UNSIGNED_LITTLE_ENDIAN(GifExtension[1], GifExtension[2]); + if (GifExtension[0] & 0x01) { + GCB->TransparentColor = (int)GifExtension[3]; + } else { + GCB->TransparentColor = NO_TRANSPARENT_COLOR; + } + + return GIF_OK; +} + +/****************************************************************************** + Extract the Graphics Control Block for a saved image, if it exists. +******************************************************************************/ + +int DGifSavedExtensionToGCB(GifFileType *GifFile, int ImageIndex, + GraphicsControlBlock *GCB) { + int i; + + if (ImageIndex < 0 || ImageIndex > GifFile->ImageCount - 1) { + return GIF_ERROR; + } + + GCB->DisposalMode = DISPOSAL_UNSPECIFIED; + GCB->UserInputFlag = false; + GCB->DelayTime = 0; + GCB->TransparentColor = NO_TRANSPARENT_COLOR; + + for (i = 0; i < GifFile->SavedImages[ImageIndex].ExtensionBlockCount; + i++) { + ExtensionBlock *ep = + &GifFile->SavedImages[ImageIndex].ExtensionBlocks[i]; + if (ep->Function == GRAPHICS_EXT_FUNC_CODE) { + return DGifExtensionToGCB(ep->ByteCount, ep->Bytes, + GCB); + } + } + + return GIF_ERROR; +} + +/****************************************************************************** + This routine should be called last, to close the GIF file. +******************************************************************************/ +int DGifCloseFile(GifFileType *GifFile, int *ErrorCode) { + GifFilePrivateType *Private; + + if (GifFile == NULL || GifFile->Private == NULL) { + return GIF_ERROR; + } + + if (GifFile->Image.ColorMap) { + GifFreeMapObject(GifFile->Image.ColorMap); + GifFile->Image.ColorMap = NULL; + } + + if (GifFile->SColorMap) { + GifFreeMapObject(GifFile->SColorMap); + GifFile->SColorMap = NULL; + } + + if (GifFile->SavedImages) { + GifFreeSavedImages(GifFile); + GifFile->SavedImages = NULL; + } + + GifFreeExtensions(&GifFile->ExtensionBlockCount, + &GifFile->ExtensionBlocks); + + Private = (GifFilePrivateType *)GifFile->Private; + + if (!IS_READABLE(Private)) { + /* This file was NOT open for reading: */ + if (ErrorCode != NULL) { + *ErrorCode = D_GIF_ERR_NOT_READABLE; + } + free((char *)GifFile->Private); + free(GifFile); + return GIF_ERROR; + } + + if (Private->File && (fclose(Private->File) != 0)) { + if (ErrorCode != NULL) { + *ErrorCode = D_GIF_ERR_CLOSE_FAILED; + } + free((char *)GifFile->Private); + free(GifFile); + return GIF_ERROR; + } + + free((char *)GifFile->Private); + free(GifFile); + if (ErrorCode != NULL) { + *ErrorCode = D_GIF_SUCCEEDED; + } + return GIF_OK; +} + +/****************************************************************************** + Get 2 bytes (word) from the given file: +******************************************************************************/ +static int DGifGetWord(GifFileType *GifFile, GifWord *Word) { + unsigned char c[2]; + + /* coverity[check_return] */ + if (InternalRead(GifFile, c, 2) != 2) { + GifFile->Error = D_GIF_ERR_READ_FAILED; + return GIF_ERROR; + } + + *Word = (GifWord)UNSIGNED_LITTLE_ENDIAN(c[0], c[1]); + return GIF_OK; +} + +/****************************************************************************** + Get the image code in compressed form. This routine can be called if the + information needed to be piped out as is. Obviously this is much faster + than decoding and encoding again. This routine should be followed by calls + to DGifGetCodeNext, until NULL block is returned. + The block should NOT be freed by the user (not dynamically allocated). +******************************************************************************/ +int DGifGetCode(GifFileType *GifFile, int *CodeSize, GifByteType **CodeBlock) { + GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private; + + if (!IS_READABLE(Private)) { + /* This file was NOT open for reading: */ + GifFile->Error = D_GIF_ERR_NOT_READABLE; + return GIF_ERROR; + } + + *CodeSize = Private->BitsPerPixel; + + return DGifGetCodeNext(GifFile, CodeBlock); +} + +/****************************************************************************** + Continue to get the image code in compressed form. This routine should be + called until NULL block is returned. + The block should NOT be freed by the user (not dynamically allocated). +******************************************************************************/ +int DGifGetCodeNext(GifFileType *GifFile, GifByteType **CodeBlock) { + GifByteType Buf; + GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private; + + /* coverity[tainted_data_argument] */ + /* coverity[check_return] */ + if (InternalRead(GifFile, &Buf, 1) != 1) { + GifFile->Error = D_GIF_ERR_READ_FAILED; + return GIF_ERROR; + } + + /* coverity[lower_bounds] */ + if (Buf > 0) { + *CodeBlock = Private->Buf; /* Use private unused buffer. */ + (*CodeBlock)[0] = + Buf; /* Pascal strings notation (pos. 0 is len.). */ + /* coverity[tainted_data] */ + if (InternalRead(GifFile, &((*CodeBlock)[1]), Buf) != Buf) { + GifFile->Error = D_GIF_ERR_READ_FAILED; + return GIF_ERROR; + } + } else { + *CodeBlock = NULL; + Private->Buf[0] = 0; /* Make sure the buffer is empty! */ + Private->PixelCount = + 0; /* And local info. indicate image read. */ + } + + return GIF_OK; +} + +/****************************************************************************** + Setup the LZ decompression for this image: +******************************************************************************/ +static int DGifSetupDecompress(GifFileType *GifFile) { + int i, BitsPerPixel; + GifByteType CodeSize; + GifPrefixType *Prefix; + GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private; + + /* coverity[check_return] */ + if (InternalRead(GifFile, &CodeSize, 1) < + 1) { /* Read Code size from file. */ + GifFile->Error = D_GIF_ERR_READ_FAILED; + return GIF_ERROR; /* Failed to read Code size. */ + } + BitsPerPixel = CodeSize; + + /* this can only happen on a severely malformed GIF */ + if (BitsPerPixel > 8) { + GifFile->Error = + D_GIF_ERR_READ_FAILED; /* somewhat bogus error code */ + return GIF_ERROR; /* Failed to read Code size. */ + } + + Private->Buf[0] = 0; /* Input Buffer empty. */ + Private->BitsPerPixel = BitsPerPixel; + Private->ClearCode = (1 << BitsPerPixel); + Private->EOFCode = Private->ClearCode + 1; + Private->RunningCode = Private->EOFCode + 1; + Private->RunningBits = BitsPerPixel + 1; /* Number of bits per code. */ + Private->MaxCode1 = 1 << Private->RunningBits; /* Max. code + 1. */ + Private->StackPtr = 0; /* No pixels on the pixel stack. */ + Private->LastCode = NO_SUCH_CODE; + Private->CrntShiftState = 0; /* No information in CrntShiftDWord. */ + Private->CrntShiftDWord = 0; + + Prefix = Private->Prefix; + for (i = 0; i <= LZ_MAX_CODE; i++) { + Prefix[i] = NO_SUCH_CODE; + } + + return GIF_OK; +} + +/****************************************************************************** + The LZ decompression routine: + This version decompress the given GIF file into Line of length LineLen. + This routine can be called few times (one per scan line, for example), in + order the complete the whole image. +******************************************************************************/ +static int DGifDecompressLine(GifFileType *GifFile, GifPixelType *Line, + int LineLen) { + int i = 0; + int j, CrntCode, EOFCode, ClearCode, CrntPrefix, LastCode, StackPtr; + GifByteType *Stack, *Suffix; + GifPrefixType *Prefix; + GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private; + + StackPtr = Private->StackPtr; + Prefix = Private->Prefix; + Suffix = Private->Suffix; + Stack = Private->Stack; + EOFCode = Private->EOFCode; + ClearCode = Private->ClearCode; + LastCode = Private->LastCode; + + if (StackPtr > LZ_MAX_CODE) { + return GIF_ERROR; + } + + if (StackPtr != 0) { + /* Let pop the stack off before continueing to read the GIF + * file: */ + while (StackPtr != 0 && i < LineLen) { + Line[i++] = Stack[--StackPtr]; + } + } + + while (i < LineLen) { /* Decode LineLen items. */ + if (DGifDecompressInput(GifFile, &CrntCode) == GIF_ERROR) { + return GIF_ERROR; + } + + if (CrntCode == EOFCode) { + /* Note however that usually we will not be here as we + * will stop decoding as soon as we got all the pixel, + * or EOF code will not be read at all, and + * DGifGetLine/Pixel clean everything. */ + GifFile->Error = D_GIF_ERR_EOF_TOO_SOON; + return GIF_ERROR; + } else if (CrntCode == ClearCode) { + /* We need to start over again: */ + for (j = 0; j <= LZ_MAX_CODE; j++) { + Prefix[j] = NO_SUCH_CODE; + } + Private->RunningCode = Private->EOFCode + 1; + Private->RunningBits = Private->BitsPerPixel + 1; + Private->MaxCode1 = 1 << Private->RunningBits; + LastCode = Private->LastCode = NO_SUCH_CODE; + } else { + /* Its regular code - if in pixel range simply add it to + * output stream, otherwise trace to codes linked list + * until the prefix is in pixel range: */ + if (CrntCode < ClearCode) { + /* This is simple - its pixel scalar, so add it + * to output: */ + Line[i++] = CrntCode; + } else { + /* Its a code to needed to be traced: trace the + * linked list until the prefix is a pixel, + * while pushing the suffix pixels on our stack. + * If we done, pop the stack in reverse (thats + * what stack is good for!) order to output. */ + if (Prefix[CrntCode] == NO_SUCH_CODE) { + CrntPrefix = LastCode; + + /* Only allowed if CrntCode is exactly + * the running code: In that case + * CrntCode = XXXCode, CrntCode or the + * prefix code is last code and the + * suffix char is exactly the prefix of + * last code! */ + if (CrntCode == + Private->RunningCode - 2) { + Suffix[Private->RunningCode - + 2] = Stack[StackPtr++] = + DGifGetPrefixChar( + Prefix, LastCode, + ClearCode); + } else { + Suffix[Private->RunningCode - + 2] = Stack[StackPtr++] = + DGifGetPrefixChar( + Prefix, CrntCode, + ClearCode); + } + } else { + CrntPrefix = CrntCode; + } + + /* Now (if image is O.K.) we should not get a + * NO_SUCH_CODE during the trace. As we might + * loop forever, in case of defective image, we + * use StackPtr as loop counter and stop before + * overflowing Stack[]. */ + while (StackPtr < LZ_MAX_CODE && + CrntPrefix > ClearCode && + CrntPrefix <= LZ_MAX_CODE) { + Stack[StackPtr++] = Suffix[CrntPrefix]; + CrntPrefix = Prefix[CrntPrefix]; + } + if (StackPtr >= LZ_MAX_CODE || + CrntPrefix > LZ_MAX_CODE) { + GifFile->Error = D_GIF_ERR_IMAGE_DEFECT; + return GIF_ERROR; + } + /* Push the last character on stack: */ + Stack[StackPtr++] = CrntPrefix; + + /* Now lets pop all the stack into output: */ + while (StackPtr != 0 && i < LineLen) { + Line[i++] = Stack[--StackPtr]; + } + } + if (LastCode != NO_SUCH_CODE && + Private->RunningCode - 2 < (LZ_MAX_CODE + 1) && + Prefix[Private->RunningCode - 2] == NO_SUCH_CODE) { + Prefix[Private->RunningCode - 2] = LastCode; + + if (CrntCode == Private->RunningCode - 2) { + /* Only allowed if CrntCode is exactly + * the running code: In that case + * CrntCode = XXXCode, CrntCode or the + * prefix code is last code and the + * suffix char is exactly the prefix of + * last code! */ + Suffix[Private->RunningCode - 2] = + DGifGetPrefixChar(Prefix, LastCode, + ClearCode); + } else { + Suffix[Private->RunningCode - 2] = + DGifGetPrefixChar(Prefix, CrntCode, + ClearCode); + } + } + LastCode = CrntCode; + } + } + + Private->LastCode = LastCode; + Private->StackPtr = StackPtr; + + return GIF_OK; +} + +/****************************************************************************** + Routine to trace the Prefixes linked list until we get a prefix which is + not code, but a pixel value (less than ClearCode). Returns that pixel value. + If image is defective, we might loop here forever, so we limit the loops to + the maximum possible if image O.k. - LZ_MAX_CODE times. +******************************************************************************/ +static int DGifGetPrefixChar(const GifPrefixType *Prefix, int Code, + int ClearCode) { + int i = 0; + + while (Code > ClearCode && i++ <= LZ_MAX_CODE) { + if (Code > LZ_MAX_CODE) { + return NO_SUCH_CODE; + } + Code = Prefix[Code]; + } + return Code; +} + +/****************************************************************************** + Interface for accessing the LZ codes directly. Set Code to the real code + (12bits), or to -1 if EOF code is returned. +******************************************************************************/ +int DGifGetLZCodes(GifFileType *GifFile, int *Code) { + GifByteType *CodeBlock; + GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private; + + if (!IS_READABLE(Private)) { + /* This file was NOT open for reading: */ + GifFile->Error = D_GIF_ERR_NOT_READABLE; + return GIF_ERROR; + } + + if (DGifDecompressInput(GifFile, Code) == GIF_ERROR) { + return GIF_ERROR; + } + + if (*Code == Private->EOFCode) { + /* Skip rest of codes (hopefully only NULL terminating block): + */ + do { + if (DGifGetCodeNext(GifFile, &CodeBlock) == GIF_ERROR) { + return GIF_ERROR; + } + } while (CodeBlock != NULL); + + *Code = -1; + } else if (*Code == Private->ClearCode) { + /* We need to start over again: */ + Private->RunningCode = Private->EOFCode + 1; + Private->RunningBits = Private->BitsPerPixel + 1; + Private->MaxCode1 = 1 << Private->RunningBits; + } + + return GIF_OK; +} + +/****************************************************************************** + The LZ decompression input routine: + This routine is responsable for the decompression of the bit stream from + 8 bits (bytes) packets, into the real codes. + Returns GIF_OK if read successfully. +******************************************************************************/ +static int DGifDecompressInput(GifFileType *GifFile, int *Code) { + static const unsigned short CodeMasks[] = { + 0x0000, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, + 0x007f, 0x00ff, 0x01ff, 0x03ff, 0x07ff, 0x0fff}; + + GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private; + + GifByteType NextByte; + + /* The image can't contain more than LZ_BITS per code. */ + if (Private->RunningBits > LZ_BITS) { + GifFile->Error = D_GIF_ERR_IMAGE_DEFECT; + return GIF_ERROR; + } + + while (Private->CrntShiftState < Private->RunningBits) { + /* Needs to get more bytes from input stream for next code: */ + if (DGifBufferedInput(GifFile, Private->Buf, &NextByte) == + GIF_ERROR) { + return GIF_ERROR; + } + Private->CrntShiftDWord |= ((unsigned long)NextByte) + << Private->CrntShiftState; + Private->CrntShiftState += 8; + } + *Code = Private->CrntShiftDWord & CodeMasks[Private->RunningBits]; + + Private->CrntShiftDWord >>= Private->RunningBits; + Private->CrntShiftState -= Private->RunningBits; + + /* If code cannot fit into RunningBits bits, must raise its size. Note + * however that codes above 4095 are used for special signaling. + * If we're using LZ_BITS bits already and we're at the max code, just + * keep using the table as it is, don't increment Private->RunningCode. + */ + if (Private->RunningCode < LZ_MAX_CODE + 2 && + ++Private->RunningCode > Private->MaxCode1 && + Private->RunningBits < LZ_BITS) { + Private->MaxCode1 <<= 1; + Private->RunningBits++; + } + return GIF_OK; +} + +/****************************************************************************** + This routines read one GIF data block at a time and buffers it internally + so that the decompression routine could access it. + The routine returns the next byte from its internal buffer (or read next + block in if buffer empty) and returns GIF_OK if succesful. +******************************************************************************/ +static int DGifBufferedInput(GifFileType *GifFile, GifByteType *Buf, + GifByteType *NextByte) { + if (Buf[0] == 0) { + /* Needs to read the next buffer - this one is empty: */ + /* coverity[check_return] */ + if (InternalRead(GifFile, Buf, 1) != 1) { + GifFile->Error = D_GIF_ERR_READ_FAILED; + return GIF_ERROR; + } + /* There shouldn't be any empty data blocks here as the LZW spec + * says the LZW termination code should come first. Therefore + * we shouldn't be inside this routine at that point. + */ + if (Buf[0] == 0) { + GifFile->Error = D_GIF_ERR_IMAGE_DEFECT; + return GIF_ERROR; + } + if (InternalRead(GifFile, &Buf[1], Buf[0]) != Buf[0]) { + GifFile->Error = D_GIF_ERR_READ_FAILED; + return GIF_ERROR; + } + *NextByte = Buf[1]; + Buf[1] = 2; /* We use now the second place as last char read! */ + Buf[0]--; + } else { + *NextByte = Buf[Buf[1]++]; + Buf[0]--; + } + + return GIF_OK; +} + +/****************************************************************************** + This routine is called in case of error during parsing image. We need to + decrease image counter and reallocate memory for saved images. Not decreasing + ImageCount may lead to null pointer dereference, because the last element in + SavedImages may point to the spoilt image and null pointer buffers. +*******************************************************************************/ +void DGifDecreaseImageCounter(GifFileType *GifFile) { + GifFile->ImageCount--; + if (GifFile->SavedImages[GifFile->ImageCount].RasterBits != NULL) { + free(GifFile->SavedImages[GifFile->ImageCount].RasterBits); + } + + // Realloc array according to the new image counter. + SavedImage *correct_saved_images = (SavedImage *)reallocarray( + GifFile->SavedImages, GifFile->ImageCount, sizeof(SavedImage)); + if (correct_saved_images != NULL) { + GifFile->SavedImages = correct_saved_images; + } +} + +/****************************************************************************** + This routine reads an entire GIF into core, hanging all its state info off + the GifFileType pointer. Call DGifOpenFileName() or DGifOpenFileHandle() + first to initialize I/O. Its inverse is EGifSpew(). +*******************************************************************************/ +int DGifSlurp(GifFileType *GifFile) { + size_t ImageSize; + GifRecordType RecordType; + SavedImage *sp; + GifByteType *ExtData; + int ExtFunction; + + GifFile->ExtensionBlocks = NULL; + GifFile->ExtensionBlockCount = 0; + + do { + if (DGifGetRecordType(GifFile, &RecordType) == GIF_ERROR) { + return (GIF_ERROR); + } + + switch (RecordType) { + case IMAGE_DESC_RECORD_TYPE: + if (DGifGetImageDesc(GifFile) == GIF_ERROR) { + return (GIF_ERROR); + } + + sp = &GifFile->SavedImages[GifFile->ImageCount - 1]; + /* Allocate memory for the image */ + if (sp->ImageDesc.Width <= 0 || + sp->ImageDesc.Height <= 0 || + sp->ImageDesc.Width > + (INT_MAX / sp->ImageDesc.Height)) { + DGifDecreaseImageCounter(GifFile); + return GIF_ERROR; + } + ImageSize = sp->ImageDesc.Width * sp->ImageDesc.Height; + + if (ImageSize > (SIZE_MAX / sizeof(GifPixelType))) { + DGifDecreaseImageCounter(GifFile); + return GIF_ERROR; + } + sp->RasterBits = (unsigned char *)reallocarray( + NULL, ImageSize, sizeof(GifPixelType)); + + if (sp->RasterBits == NULL) { + DGifDecreaseImageCounter(GifFile); + return GIF_ERROR; + } + + if (sp->ImageDesc.Interlace) { + int i, j; + /* + * The way an interlaced image should be read - + * offsets and jumps... + */ + static const int InterlacedOffset[] = {0, 4, 2, + 1}; + static const int InterlacedJumps[] = {8, 8, 4, + 2}; + /* Need to perform 4 passes on the image */ + for (i = 0; i < 4; i++) { + for (j = InterlacedOffset[i]; + j < sp->ImageDesc.Height; + j += InterlacedJumps[i]) { + if (DGifGetLine( + GifFile, + sp->RasterBits + + j * sp->ImageDesc + .Width, + sp->ImageDesc.Width) == + GIF_ERROR) { + DGifDecreaseImageCounter( + GifFile); + return GIF_ERROR; + } + } + } + } else { + if (DGifGetLine(GifFile, sp->RasterBits, + ImageSize) == GIF_ERROR) { + DGifDecreaseImageCounter(GifFile); + return GIF_ERROR; + } + } + + if (GifFile->ExtensionBlocks) { + sp->ExtensionBlocks = GifFile->ExtensionBlocks; + sp->ExtensionBlockCount = + GifFile->ExtensionBlockCount; + + GifFile->ExtensionBlocks = NULL; + GifFile->ExtensionBlockCount = 0; + } + break; + + case EXTENSION_RECORD_TYPE: + if (DGifGetExtension(GifFile, &ExtFunction, &ExtData) == + GIF_ERROR) { + return (GIF_ERROR); + } + /* Create an extension block with our data */ + if (ExtData != NULL) { + if (GifAddExtensionBlock( + &GifFile->ExtensionBlockCount, + &GifFile->ExtensionBlocks, ExtFunction, + ExtData[0], &ExtData[1]) == GIF_ERROR) { + return (GIF_ERROR); + } + } + for (;;) { + if (DGifGetExtensionNext(GifFile, &ExtData) == + GIF_ERROR) { + return (GIF_ERROR); + } + if (ExtData == NULL) { + break; + } + /* Continue the extension block */ + if (GifAddExtensionBlock( + &GifFile->ExtensionBlockCount, + &GifFile->ExtensionBlocks, + CONTINUE_EXT_FUNC_CODE, ExtData[0], + &ExtData[1]) == GIF_ERROR) { + return (GIF_ERROR); + } + } + break; + + case TERMINATE_RECORD_TYPE: + break; + + default: /* Should be trapped by DGifGetRecordType */ + break; + } + } while (RecordType != TERMINATE_RECORD_TYPE); + + /* Sanity check for corrupted file */ + if (GifFile->ImageCount == 0) { + GifFile->Error = D_GIF_ERR_NO_IMAG_DSCR; + return (GIF_ERROR); + } + + return (GIF_OK); +} + +/* end */ diff --git a/torchvision/csrc/io/image/cpu/giflib/gif_hash.c b/torchvision/csrc/io/image/cpu/giflib/gif_hash.c new file mode 100644 index 00000000000..42efbe8de68 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/giflib/gif_hash.c @@ -0,0 +1,129 @@ +// @nolint (improperly imported third-party code) +/***************************************************************************** + +gif_hash.c -- module to support the following operations: + +1. InitHashTable - initialize hash table. +2. ClearHashTable - clear the hash table to an empty state. +2. InsertHashTable - insert one item into data structure. +3. ExistsHashTable - test if item exists in data structure. + +This module is used to hash the GIF codes during encoding. + +*****************************************************************************/ +// SPDX-License-Identifier: MIT +// SPDX-File-Copyright-Txt: (C) Copyright 1989 Gershon Elber + +#include +#include +#include +#include +#include + +#include "gif_hash.h" +#include "gif_lib.h" +#include "gif_lib_private.h" + +/* #define DEBUG_HIT_RATE Debug number of misses per hash Insert/Exists. */ + +#ifdef DEBUG_HIT_RATE +static long NumberOfTests = 0, NumberOfMisses = 0; +#endif /* DEBUG_HIT_RATE */ + +static int KeyItem(uint32_t Item); + +/****************************************************************************** + Initialize HashTable - allocate the memory needed and clear it. * +******************************************************************************/ +GifHashTableType *_InitHashTable(void) { + GifHashTableType *HashTable; + + if ((HashTable = (GifHashTableType *)malloc( + sizeof(GifHashTableType))) == NULL) { + return NULL; + } + + _ClearHashTable(HashTable); + + return HashTable; +} + +/****************************************************************************** + Routine to clear the HashTable to an empty state. * + This part is a little machine depended. Use the commented part otherwise. * +******************************************************************************/ +void _ClearHashTable(GifHashTableType *HashTable) { + memset(HashTable->HTable, 0xFF, HT_SIZE * sizeof(uint32_t)); +} + +/****************************************************************************** + Routine to insert a new Item into the HashTable. The data is assumed to be * + new one. * +******************************************************************************/ +void _InsertHashTable(GifHashTableType *HashTable, uint32_t Key, int Code) { + int HKey = KeyItem(Key); + uint32_t *HTable = HashTable->HTable; + +#ifdef DEBUG_HIT_RATE + NumberOfTests++; + NumberOfMisses++; +#endif /* DEBUG_HIT_RATE */ + + while (HT_GET_KEY(HTable[HKey]) != 0xFFFFFL) { +#ifdef DEBUG_HIT_RATE + NumberOfMisses++; +#endif /* DEBUG_HIT_RATE */ + HKey = (HKey + 1) & HT_KEY_MASK; + } + HTable[HKey] = HT_PUT_KEY(Key) | HT_PUT_CODE(Code); +} + +/****************************************************************************** + Routine to test if given Key exists in HashTable and if so returns its code * + Returns the Code if key was found, -1 if not. * +******************************************************************************/ +int _ExistsHashTable(GifHashTableType *HashTable, uint32_t Key) { + int HKey = KeyItem(Key); + uint32_t *HTable = HashTable->HTable, HTKey; + +#ifdef DEBUG_HIT_RATE + NumberOfTests++; + NumberOfMisses++; +#endif /* DEBUG_HIT_RATE */ + + while ((HTKey = HT_GET_KEY(HTable[HKey])) != 0xFFFFFL) { +#ifdef DEBUG_HIT_RATE + NumberOfMisses++; +#endif /* DEBUG_HIT_RATE */ + if (Key == HTKey) { + return HT_GET_CODE(HTable[HKey]); + } + HKey = (HKey + 1) & HT_KEY_MASK; + } + + return -1; +} + +/****************************************************************************** + Routine to generate an HKey for the hashtable out of the given unique key. * + The given Key is assumed to be 20 bits as follows: lower 8 bits are the * + new postfix character, while the upper 12 bits are the prefix code. * + Because the average hit ratio is only 2 (2 hash references per entry), * + evaluating more complex keys (such as twin prime keys) does not worth it! * +******************************************************************************/ +static int KeyItem(uint32_t Item) { + return ((Item >> 12) ^ Item) & HT_KEY_MASK; +} + +#ifdef DEBUG_HIT_RATE +/****************************************************************************** + Debugging routine to print the hit ratio - number of times the hash table * + was tested per operation. This routine was used to test the KeyItem routine * +******************************************************************************/ +void HashTablePrintHitRatio(void) { + printf("Hash Table Hit Ratio is %ld/%ld = %ld%%.\n", NumberOfMisses, + NumberOfTests, NumberOfMisses * 100 / NumberOfTests); +} +#endif /* DEBUG_HIT_RATE */ + +/* end */ diff --git a/torchvision/csrc/io/image/cpu/giflib/gif_hash.h b/torchvision/csrc/io/image/cpu/giflib/gif_hash.h new file mode 100644 index 00000000000..3066fb14592 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/giflib/gif_hash.h @@ -0,0 +1,43 @@ +// @nolint (improperly imported third-party code) +/****************************************************************************** + +gif_hash.h - magfic constants and declarations for GIF LZW + +******************************************************************************/ +// SPDX-License-Identifier: MIT + +#ifndef _GIF_HASH_H_ +#define _GIF_HASH_H_ + +#ifndef _WIN32 +#include +#endif /* _WIN32 */ +#include + +#define HT_SIZE 8192 /* 12bits = 4096 or twice as big! */ +#define HT_KEY_MASK 0x1FFF /* 13bits keys */ +#define HT_KEY_NUM_BITS 13 /* 13bits keys */ +#define HT_MAX_KEY 8191 /* 13bits - 1, maximal code possible */ +#define HT_MAX_CODE 4095 /* Biggest code possible in 12 bits. */ + +/* The 32 bits of the long are divided into two parts for the key & code: */ +/* 1. The code is 12 bits as our compression algorithm is limited to 12bits */ +/* 2. The key is 12 bits Prefix code + 8 bit new char or 20 bits. */ +/* The key is the upper 20 bits. The code is the lower 12. */ +#define HT_GET_KEY(l) (l >> 12) +#define HT_GET_CODE(l) (l & 0x0FFF) +#define HT_PUT_KEY(l) (l << 12) +#define HT_PUT_CODE(l) (l & 0x0FFF) + +typedef struct GifHashTableType { + uint32_t HTable[HT_SIZE]; +} GifHashTableType; + +GifHashTableType *_InitHashTable(void); +void _ClearHashTable(GifHashTableType *HashTable); +void _InsertHashTable(GifHashTableType *HashTable, uint32_t Key, int Code); +int _ExistsHashTable(GifHashTableType *HashTable, uint32_t Key); + +#endif /* _GIF_HASH_H_ */ + +/* end */ diff --git a/torchvision/csrc/io/image/cpu/giflib/gif_lib.h b/torchvision/csrc/io/image/cpu/giflib/gif_lib.h new file mode 100644 index 00000000000..7bed0430450 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/giflib/gif_lib.h @@ -0,0 +1,292 @@ +// @nolint (improperly imported third-party code) +/****************************************************************************** + +gif_lib.h - service library for decoding and encoding GIF images + +SPDX-License-Identifier: MIT + +*****************************************************************************/ + +#ifndef _GIF_LIB_H_ +#define _GIF_LIB_H_ 1 + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +#define GIFLIB_MAJOR 5 +#define GIFLIB_MINOR 2 +#define GIFLIB_RELEASE 2 + +#define GIF_ERROR 0 +#define GIF_OK 1 + +#include +#include + +#define GIF_STAMP "GIFVER" /* First chars in file - GIF stamp. */ +#define GIF_STAMP_LEN sizeof(GIF_STAMP) - 1 +#define GIF_VERSION_POS 3 /* Version first character in stamp. */ +#define GIF87_STAMP "GIF87a" /* First chars in file - GIF stamp. */ +#define GIF89_STAMP "GIF89a" /* First chars in file - GIF stamp. */ + +typedef unsigned char GifPixelType; +typedef unsigned char *GifRowType; +typedef unsigned char GifByteType; +typedef unsigned int GifPrefixType; +typedef int GifWord; + +typedef struct GifColorType { + GifByteType Red, Green, Blue; +} GifColorType; + +typedef struct ColorMapObject { + int ColorCount; + int BitsPerPixel; + bool SortFlag; + GifColorType *Colors; /* on malloc(3) heap */ +} ColorMapObject; + +typedef struct GifImageDesc { + GifWord Left, Top, Width, Height; /* Current image dimensions. */ + bool Interlace; /* Sequential/Interlaced lines. */ + ColorMapObject *ColorMap; /* The local color map */ +} GifImageDesc; + +typedef struct ExtensionBlock { + int ByteCount; + GifByteType *Bytes; /* on malloc(3) heap */ + int Function; /* The block function code */ +#define CONTINUE_EXT_FUNC_CODE 0x00 /* continuation subblock */ +#define COMMENT_EXT_FUNC_CODE 0xfe /* comment */ +#define GRAPHICS_EXT_FUNC_CODE 0xf9 /* graphics control (GIF89) */ +#define PLAINTEXT_EXT_FUNC_CODE 0x01 /* plaintext */ +#define APPLICATION_EXT_FUNC_CODE 0xff /* application block (GIF89) */ +} ExtensionBlock; + +typedef struct SavedImage { + GifImageDesc ImageDesc; + GifByteType *RasterBits; /* on malloc(3) heap */ + int ExtensionBlockCount; /* Count of extensions before image */ + ExtensionBlock *ExtensionBlocks; /* Extensions before image */ +} SavedImage; + +typedef struct GifFileType { + GifWord SWidth, SHeight; /* Size of virtual canvas */ + GifWord SColorResolution; /* How many colors can we generate? */ + GifWord SBackGroundColor; /* Background color for virtual canvas */ + GifByteType AspectByte; /* Used to compute pixel aspect ratio */ + ColorMapObject *SColorMap; /* Global colormap, NULL if nonexistent. */ + int ImageCount; /* Number of current image (both APIs) */ + GifImageDesc Image; /* Current image (low-level API) */ + SavedImage *SavedImages; /* Image sequence (high-level API) */ + int ExtensionBlockCount; /* Count extensions past last image */ + ExtensionBlock *ExtensionBlocks; /* Extensions past last image */ + int Error; /* Last error condition reported */ + void *UserData; /* hook to attach user data (TVT) */ + void *Private; /* Don't mess with this! */ +} GifFileType; + +#define GIF_ASPECT_RATIO(n) ((n) + 15.0 / 64.0) + +typedef enum { + UNDEFINED_RECORD_TYPE, + SCREEN_DESC_RECORD_TYPE, + IMAGE_DESC_RECORD_TYPE, /* Begin with ',' */ + EXTENSION_RECORD_TYPE, /* Begin with '!' */ + TERMINATE_RECORD_TYPE /* Begin with ';' */ +} GifRecordType; + +/* func type to read gif data from arbitrary sources (TVT) */ +typedef int (*InputFunc)(GifFileType *, GifByteType *, int); + +/* func type to write gif data to arbitrary targets. + * Returns count of bytes written. (MRB) + */ +typedef int (*OutputFunc)(GifFileType *, const GifByteType *, int); + +/****************************************************************************** + GIF89 structures +******************************************************************************/ + +typedef struct GraphicsControlBlock { + int DisposalMode; +#define DISPOSAL_UNSPECIFIED 0 /* No disposal specified. */ +#define DISPOSE_DO_NOT 1 /* Leave image in place */ +#define DISPOSE_BACKGROUND 2 /* Set area too background color */ +#define DISPOSE_PREVIOUS 3 /* Restore to previous content */ + bool UserInputFlag; /* User confirmation required before disposal */ + int DelayTime; /* pre-display delay in 0.01sec units */ + int TransparentColor; /* Palette index for transparency, -1 if none */ +#define NO_TRANSPARENT_COLOR -1 +} GraphicsControlBlock; + +/****************************************************************************** + GIF encoding routines +******************************************************************************/ + +/* Main entry points */ +GifFileType *EGifOpenFileName(const char *GifFileName, + const bool GifTestExistence, int *Error); +GifFileType *EGifOpenFileHandle(const int GifFileHandle, int *Error); +GifFileType *EGifOpen(void *userPtr, OutputFunc writeFunc, int *Error); +int EGifSpew(GifFileType *GifFile); +const char *EGifGetGifVersion(GifFileType *GifFile); /* new in 5.x */ +int EGifCloseFile(GifFileType *GifFile, int *ErrorCode); + +#define E_GIF_SUCCEEDED 0 +#define E_GIF_ERR_OPEN_FAILED 1 /* And EGif possible errors. */ +#define E_GIF_ERR_WRITE_FAILED 2 +#define E_GIF_ERR_HAS_SCRN_DSCR 3 +#define E_GIF_ERR_HAS_IMAG_DSCR 4 +#define E_GIF_ERR_NO_COLOR_MAP 5 +#define E_GIF_ERR_DATA_TOO_BIG 6 +#define E_GIF_ERR_NOT_ENOUGH_MEM 7 +#define E_GIF_ERR_DISK_IS_FULL 8 +#define E_GIF_ERR_CLOSE_FAILED 9 +#define E_GIF_ERR_NOT_WRITEABLE 10 + +/* These are legacy. You probably do not want to call them directly */ +int EGifPutScreenDesc(GifFileType *GifFile, const int GifWidth, + const int GifHeight, const int GifColorRes, + const int GifBackGround, + const ColorMapObject *GifColorMap); +int EGifPutImageDesc(GifFileType *GifFile, const int GifLeft, const int GifTop, + const int GifWidth, const int GifHeight, + const bool GifInterlace, + const ColorMapObject *GifColorMap); +void EGifSetGifVersion(GifFileType *GifFile, const bool gif89); +int EGifPutLine(GifFileType *GifFile, GifPixelType *GifLine, int GifLineLen); +int EGifPutPixel(GifFileType *GifFile, const GifPixelType GifPixel); +int EGifPutComment(GifFileType *GifFile, const char *GifComment); +int EGifPutExtensionLeader(GifFileType *GifFile, const int GifExtCode); +int EGifPutExtensionBlock(GifFileType *GifFile, const int GifExtLen, + const void *GifExtension); +int EGifPutExtensionTrailer(GifFileType *GifFile); +int EGifPutExtension(GifFileType *GifFile, const int GifExtCode, + const int GifExtLen, const void *GifExtension); +int EGifPutCode(GifFileType *GifFile, int GifCodeSize, + const GifByteType *GifCodeBlock); +int EGifPutCodeNext(GifFileType *GifFile, const GifByteType *GifCodeBlock); + +/****************************************************************************** + GIF decoding routines +******************************************************************************/ + +/* Main entry points */ +GifFileType *DGifOpenFileName(const char *GifFileName, int *Error); +GifFileType *DGifOpenFileHandle(int GifFileHandle, int *Error); +int DGifSlurp(GifFileType *GifFile); +GifFileType *DGifOpen(void *userPtr, InputFunc readFunc, + int *Error); /* new one (TVT) */ +int DGifCloseFile(GifFileType *GifFile, int *ErrorCode); + +#define D_GIF_SUCCEEDED 0 +#define D_GIF_ERR_OPEN_FAILED 101 /* And DGif possible errors. */ +#define D_GIF_ERR_READ_FAILED 102 +#define D_GIF_ERR_NOT_GIF_FILE 103 +#define D_GIF_ERR_NO_SCRN_DSCR 104 +#define D_GIF_ERR_NO_IMAG_DSCR 105 +#define D_GIF_ERR_NO_COLOR_MAP 106 +#define D_GIF_ERR_WRONG_RECORD 107 +#define D_GIF_ERR_DATA_TOO_BIG 108 +#define D_GIF_ERR_NOT_ENOUGH_MEM 109 +#define D_GIF_ERR_CLOSE_FAILED 110 +#define D_GIF_ERR_NOT_READABLE 111 +#define D_GIF_ERR_IMAGE_DEFECT 112 +#define D_GIF_ERR_EOF_TOO_SOON 113 + +/* These are legacy. You probably do not want to call them directly */ +int DGifGetScreenDesc(GifFileType *GifFile); +int DGifGetRecordType(GifFileType *GifFile, GifRecordType *GifType); +int DGifGetImageHeader(GifFileType *GifFile); +int DGifGetImageDesc(GifFileType *GifFile); +int DGifGetLine(GifFileType *GifFile, GifPixelType *GifLine, int GifLineLen); +int DGifGetPixel(GifFileType *GifFile, GifPixelType GifPixel); +int DGifGetExtension(GifFileType *GifFile, int *GifExtCode, + GifByteType **GifExtension); +int DGifGetExtensionNext(GifFileType *GifFile, GifByteType **GifExtension); +int DGifGetCode(GifFileType *GifFile, int *GifCodeSize, + GifByteType **GifCodeBlock); +int DGifGetCodeNext(GifFileType *GifFile, GifByteType **GifCodeBlock); +int DGifGetLZCodes(GifFileType *GifFile, int *GifCode); +const char *DGifGetGifVersion(GifFileType *GifFile); + +/****************************************************************************** + Error handling and reporting. +******************************************************************************/ +extern const char *GifErrorString(int ErrorCode); /* new in 2012 - ESR */ + +/***************************************************************************** + it g in core. +******************************************************************************/ + +/****************************************************************************** + Color map handling from gif_alloc.c +******************************************************************************/ + +extern ColorMapObject *GifMakeMapObject(int ColorCount, + const GifColorType *ColorMap); +extern void GifFreeMapObject(ColorMapObject *Object); +extern ColorMapObject *GifUnionColorMap(const ColorMapObject *ColorIn1, + const ColorMapObject *ColorIn2, + GifPixelType ColorTransIn2[]); +extern int GifBitSize(int n); + +/****************************************************************************** + Support for the in-core structures allocation (slurp mode). +******************************************************************************/ + +extern void GifApplyTranslation(SavedImage *Image, + const GifPixelType Translation[]); +extern int GifAddExtensionBlock(int *ExtensionBlock_Count, + ExtensionBlock **ExtensionBlocks, int Function, + unsigned int Len, unsigned char ExtData[]); +extern void GifFreeExtensions(int *ExtensionBlock_Count, + ExtensionBlock **ExtensionBlocks); +extern SavedImage *GifMakeSavedImage(GifFileType *GifFile, + const SavedImage *CopyFrom); +extern void GifFreeSavedImages(GifFileType *GifFile); + +/****************************************************************************** + 5.x functions for GIF89 graphics control blocks +******************************************************************************/ + +int DGifExtensionToGCB(const size_t GifExtensionLength, + const GifByteType *GifExtension, + GraphicsControlBlock *GCB); +size_t EGifGCBToExtension(const GraphicsControlBlock *GCB, + GifByteType *GifExtension); + +int DGifSavedExtensionToGCB(GifFileType *GifFile, int ImageIndex, + GraphicsControlBlock *GCB); +int EGifGCBToSavedExtension(const GraphicsControlBlock *GCB, + GifFileType *GifFile, int ImageIndex); + +/****************************************************************************** + The library's internal utility font +******************************************************************************/ + +#define GIF_FONT_WIDTH 8 +#define GIF_FONT_HEIGHT 8 +extern const unsigned char GifAsciiTable8x8[][GIF_FONT_WIDTH]; + +extern void GifDrawText8x8(SavedImage *Image, const int x, const int y, + const char *legend, const int color); + +extern void GifDrawBox(SavedImage *Image, const int x, const int y, const int w, + const int d, const int color); + +extern void GifDrawRectangle(SavedImage *Image, const int x, const int y, + const int w, const int d, const int color); + +extern void GifDrawBoxedText8x8(SavedImage *Image, const int x, const int y, + const char *legend, const int border, + const int bg, const int fg); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* _GIF_LIB_H */ + +/* end */ diff --git a/torchvision/csrc/io/image/cpu/giflib/gif_lib_private.h b/torchvision/csrc/io/image/cpu/giflib/gif_lib_private.h new file mode 100644 index 00000000000..04987150321 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/giflib/gif_lib_private.h @@ -0,0 +1,73 @@ +// @nolint (improperly imported third-party code) +/**************************************************************************** + +gif_lib_private.h - internal giflib routines and structures + +SPDX-License-Identifier: MIT + +****************************************************************************/ + +#ifndef _GIF_LIB_PRIVATE_H +#define _GIF_LIB_PRIVATE_H + +#include "gif_hash.h" +#include "gif_lib.h" + +#ifndef SIZE_MAX +#define SIZE_MAX UINTPTR_MAX +#endif + +#define EXTENSION_INTRODUCER 0x21 +#define DESCRIPTOR_INTRODUCER 0x2c +#define TERMINATOR_INTRODUCER 0x3b + +#define LZ_MAX_CODE 4095 /* Biggest code possible in 12 bits. */ +#define LZ_BITS 12 + +#define FLUSH_OUTPUT 4096 /* Impossible code, to signal flush. */ +#define FIRST_CODE 4097 /* Impossible code, to signal first. */ +#define NO_SUCH_CODE 4098 /* Impossible code, to signal empty. */ + +#define FILE_STATE_WRITE 0x01 +#define FILE_STATE_SCREEN 0x02 +#define FILE_STATE_IMAGE 0x04 +#define FILE_STATE_READ 0x08 + +#define IS_READABLE(Private) (Private->FileState & FILE_STATE_READ) +#define IS_WRITEABLE(Private) (Private->FileState & FILE_STATE_WRITE) + +typedef struct GifFilePrivateType { + GifWord FileState, FileHandle, /* Where all this data goes to! */ + BitsPerPixel, /* Bits per pixel (Codes uses at least this + 1). */ + ClearCode, /* The CLEAR LZ code. */ + EOFCode, /* The EOF LZ code. */ + RunningCode, /* The next code algorithm can generate. */ + RunningBits, /* The number of bits required to represent + RunningCode. */ + MaxCode1, /* 1 bigger than max. possible code, in RunningBits bits. + */ + LastCode, /* The code before the current code. */ + CrntCode, /* Current algorithm code. */ + StackPtr, /* For character stack (see below). */ + CrntShiftState; /* Number of bits in CrntShiftDWord. */ + unsigned long CrntShiftDWord; /* For bytes decomposition into codes. */ + unsigned long PixelCount; /* Number of pixels in image. */ + FILE *File; /* File as stream. */ + InputFunc Read; /* function to read gif input (TVT) */ + OutputFunc Write; /* function to write gif output (MRB) */ + GifByteType Buf[256]; /* Compressed input is buffered here. */ + GifByteType Stack[LZ_MAX_CODE]; /* Decoded pixels are stacked here. */ + GifByteType Suffix[LZ_MAX_CODE + 1]; /* So we can trace the codes. */ + GifPrefixType Prefix[LZ_MAX_CODE + 1]; + GifHashTableType *HashTable; + bool gif89; +} GifFilePrivateType; + +#ifndef HAVE_REALLOCARRAY +extern void *openbsd_reallocarray(void *optr, size_t nmemb, size_t size); +#define reallocarray openbsd_reallocarray +#endif + +#endif /* _GIF_LIB_PRIVATE_H */ + +/* end */ diff --git a/torchvision/csrc/io/image/cpu/giflib/gifalloc.c b/torchvision/csrc/io/image/cpu/giflib/gifalloc.c new file mode 100644 index 00000000000..65679d22804 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/giflib/gifalloc.c @@ -0,0 +1,426 @@ +// @nolint (improperly imported third-party code) +/***************************************************************************** + + GIF construction tools + +****************************************************************************/ +// SPDX-License-Identifier: MIT +// SPDX-FileCopyrightText: Copyright (C) Eric S. Raymond + +#include +#include +#include + +#include "gif_lib.h" +#include "gif_lib_private.h" + +#define MAX(x, y) (((x) > (y)) ? (x) : (y)) + +/****************************************************************************** + Miscellaneous utility functions +******************************************************************************/ + +/* return smallest bitfield size n will fit in */ +int GifBitSize(int n) { + int i; + + for (i = 1; i <= 8; i++) { + if ((1 << i) >= n) { + break; + } + } + return (i); +} + +/****************************************************************************** + Color map object functions +******************************************************************************/ + +/* + * Allocate a color map of given size; initialize with contents of + * ColorMap if that pointer is non-NULL. + */ +ColorMapObject *GifMakeMapObject(int ColorCount, const GifColorType *ColorMap) { + ColorMapObject *Object; + + /*** FIXME: Our ColorCount has to be a power of two. Is it necessary to + * make the user know that or should we automatically round up instead? + */ + if (ColorCount != (1 << GifBitSize(ColorCount))) { + return ((ColorMapObject *)NULL); + } + + Object = (ColorMapObject *)malloc(sizeof(ColorMapObject)); + if (Object == (ColorMapObject *)NULL) { + return ((ColorMapObject *)NULL); + } + + Object->Colors = + (GifColorType *)calloc(ColorCount, sizeof(GifColorType)); + if (Object->Colors == (GifColorType *)NULL) { + free(Object); + return ((ColorMapObject *)NULL); + } + + Object->ColorCount = ColorCount; + Object->BitsPerPixel = GifBitSize(ColorCount); + Object->SortFlag = false; + + if (ColorMap != NULL) { + memcpy((char *)Object->Colors, (char *)ColorMap, + ColorCount * sizeof(GifColorType)); + } + + return (Object); +} + +/******************************************************************************* + Free a color map object +*******************************************************************************/ +void GifFreeMapObject(ColorMapObject *Object) { + if (Object != NULL) { + (void)free(Object->Colors); + (void)free(Object); + } +} + +#ifdef DEBUG +void DumpColorMap(ColorMapObject *Object, FILE *fp) { + if (Object != NULL) { + int i, j, Len = Object->ColorCount; + + for (i = 0; i < Len; i += 4) { + for (j = 0; j < 4 && j < Len; j++) { + (void)fprintf(fp, "%3d: %02x %02x %02x ", + i + j, Object->Colors[i + j].Red, + Object->Colors[i + j].Green, + Object->Colors[i + j].Blue); + } + (void)fprintf(fp, "\n"); + } + } +} +#endif /* DEBUG */ + +/******************************************************************************* + Compute the union of two given color maps and return it. If result can't + fit into 256 colors, NULL is returned, the allocated union otherwise. + ColorIn1 is copied as is to ColorUnion, while colors from ColorIn2 are + copied iff they didn't exist before. ColorTransIn2 maps the old + ColorIn2 into the ColorUnion color map table./ +*******************************************************************************/ +ColorMapObject *GifUnionColorMap(const ColorMapObject *ColorIn1, + const ColorMapObject *ColorIn2, + GifPixelType ColorTransIn2[]) { + int i, j, CrntSlot, RoundUpTo, NewGifBitSize; + ColorMapObject *ColorUnion; + + /* + * We don't worry about duplicates within either color map; if + * the caller wants to resolve those, he can perform unions + * with an empty color map. + */ + + /* Allocate table which will hold the result for sure. */ + ColorUnion = GifMakeMapObject( + MAX(ColorIn1->ColorCount, ColorIn2->ColorCount) * 2, NULL); + + if (ColorUnion == NULL) { + return (NULL); + } + + /* + * Copy ColorIn1 to ColorUnion. + */ + for (i = 0; i < ColorIn1->ColorCount; i++) { + ColorUnion->Colors[i] = ColorIn1->Colors[i]; + } + CrntSlot = ColorIn1->ColorCount; + + /* + * Potentially obnoxious hack: + * + * Back CrntSlot down past all contiguous {0, 0, 0} slots at the end + * of table 1. This is very useful if your display is limited to + * 16 colors. + */ + while (ColorIn1->Colors[CrntSlot - 1].Red == 0 && + ColorIn1->Colors[CrntSlot - 1].Green == 0 && + ColorIn1->Colors[CrntSlot - 1].Blue == 0) { + CrntSlot--; + } + + /* Copy ColorIn2 to ColorUnion (use old colors if they exist): */ + for (i = 0; i < ColorIn2->ColorCount && CrntSlot <= 256; i++) { + /* Let's see if this color already exists: */ + for (j = 0; j < ColorIn1->ColorCount; j++) { + if (memcmp(&ColorIn1->Colors[j], &ColorIn2->Colors[i], + sizeof(GifColorType)) == 0) { + break; + } + } + + if (j < ColorIn1->ColorCount) { + ColorTransIn2[i] = j; /* color exists in Color1 */ + } else { + /* Color is new - copy it to a new slot: */ + ColorUnion->Colors[CrntSlot] = ColorIn2->Colors[i]; + ColorTransIn2[i] = CrntSlot++; + } + } + + if (CrntSlot > 256) { + GifFreeMapObject(ColorUnion); + return ((ColorMapObject *)NULL); + } + + NewGifBitSize = GifBitSize(CrntSlot); + RoundUpTo = (1 << NewGifBitSize); + + if (RoundUpTo != ColorUnion->ColorCount) { + GifColorType *Map = ColorUnion->Colors; + + /* + * Zero out slots up to next power of 2. + * We know these slots exist because of the way ColorUnion's + * start dimension was computed. + */ + for (j = CrntSlot; j < RoundUpTo; j++) { + Map[j].Red = Map[j].Green = Map[j].Blue = 0; + } + + /* perhaps we can shrink the map? */ + if (RoundUpTo < ColorUnion->ColorCount) { + GifColorType *new_map = (GifColorType *)reallocarray( + Map, RoundUpTo, sizeof(GifColorType)); + if (new_map == NULL) { + GifFreeMapObject(ColorUnion); + return ((ColorMapObject *)NULL); + } + ColorUnion->Colors = new_map; + } + } + + ColorUnion->ColorCount = RoundUpTo; + ColorUnion->BitsPerPixel = NewGifBitSize; + + return (ColorUnion); +} + +/******************************************************************************* + Apply a given color translation to the raster bits of an image +*******************************************************************************/ +void GifApplyTranslation(SavedImage *Image, const GifPixelType Translation[]) { + int i; + int RasterSize = + Image->ImageDesc.Height * Image->ImageDesc.Width; + + for (i = 0; i < RasterSize; i++) { + Image->RasterBits[i] = Translation[Image->RasterBits[i]]; + } +} + +/****************************************************************************** + Extension record functions +******************************************************************************/ +int GifAddExtensionBlock(int *ExtensionBlockCount, + ExtensionBlock **ExtensionBlocks, int Function, + unsigned int Len, unsigned char ExtData[]) { + ExtensionBlock *ep; + + if (*ExtensionBlocks == NULL) { + *ExtensionBlocks = + (ExtensionBlock *)malloc(sizeof(ExtensionBlock)); + } else { + ExtensionBlock *ep_new = (ExtensionBlock *)reallocarray( + *ExtensionBlocks, (*ExtensionBlockCount + 1), + sizeof(ExtensionBlock)); + if (ep_new == NULL) { + return (GIF_ERROR); + } + *ExtensionBlocks = ep_new; + } + + if (*ExtensionBlocks == NULL) { + return (GIF_ERROR); + } + + ep = &(*ExtensionBlocks)[(*ExtensionBlockCount)++]; + + ep->Function = Function; + ep->ByteCount = Len; + ep->Bytes = (GifByteType *)malloc(ep->ByteCount); + if (ep->Bytes == NULL) { + return (GIF_ERROR); + } + + if (ExtData != NULL) { + memcpy(ep->Bytes, ExtData, Len); + } + + return (GIF_OK); +} + +void GifFreeExtensions(int *ExtensionBlockCount, + ExtensionBlock **ExtensionBlocks) { + ExtensionBlock *ep; + + if (*ExtensionBlocks == NULL) { + return; + } + + for (ep = *ExtensionBlocks; + ep < (*ExtensionBlocks + *ExtensionBlockCount); ep++) { + (void)free((char *)ep->Bytes); + } + (void)free((char *)*ExtensionBlocks); + *ExtensionBlocks = NULL; + *ExtensionBlockCount = 0; +} + +/****************************************************************************** + Image block allocation functions +******************************************************************************/ + +/* Private Function: + * Frees the last image in the GifFile->SavedImages array + */ +void FreeLastSavedImage(GifFileType *GifFile) { + SavedImage *sp; + + if ((GifFile == NULL) || (GifFile->SavedImages == NULL)) { + return; + } + + /* Remove one SavedImage from the GifFile */ + GifFile->ImageCount--; + sp = &GifFile->SavedImages[GifFile->ImageCount]; + + /* Deallocate its Colormap */ + if (sp->ImageDesc.ColorMap != NULL) { + GifFreeMapObject(sp->ImageDesc.ColorMap); + sp->ImageDesc.ColorMap = NULL; + } + + /* Deallocate the image data */ + if (sp->RasterBits != NULL) { + free((char *)sp->RasterBits); + } + + /* Deallocate any extensions */ + GifFreeExtensions(&sp->ExtensionBlockCount, &sp->ExtensionBlocks); + + /*** FIXME: We could realloc the GifFile->SavedImages structure but is + * there a point to it? Saves some memory but we'd have to do it every + * time. If this is used in GifFreeSavedImages then it would be + * inefficient (The whole array is going to be deallocated.) If we just + * use it when we want to free the last Image it's convenient to do it + * here. + */ +} + +/* + * Append an image block to the SavedImages array + */ +SavedImage *GifMakeSavedImage(GifFileType *GifFile, + const SavedImage *CopyFrom) { + // cppcheck-suppress ctunullpointer + if (GifFile->SavedImages == NULL) { + GifFile->SavedImages = (SavedImage *)malloc(sizeof(SavedImage)); + } else { + SavedImage *newSavedImages = (SavedImage *)reallocarray( + GifFile->SavedImages, (GifFile->ImageCount + 1), + sizeof(SavedImage)); + if (newSavedImages == NULL) { + return ((SavedImage *)NULL); + } + GifFile->SavedImages = newSavedImages; + } + if (GifFile->SavedImages == NULL) { + return ((SavedImage *)NULL); + } else { + SavedImage *sp = &GifFile->SavedImages[GifFile->ImageCount++]; + + if (CopyFrom != NULL) { + memcpy((char *)sp, CopyFrom, sizeof(SavedImage)); + + /* + * Make our own allocated copies of the heap fields in + * the copied record. This guards against potential + * aliasing problems. + */ + + /* first, the local color map */ + if (CopyFrom->ImageDesc.ColorMap != NULL) { + sp->ImageDesc.ColorMap = GifMakeMapObject( + CopyFrom->ImageDesc.ColorMap->ColorCount, + CopyFrom->ImageDesc.ColorMap->Colors); + if (sp->ImageDesc.ColorMap == NULL) { + FreeLastSavedImage(GifFile); + return (SavedImage *)(NULL); + } + } + + /* next, the raster */ + sp->RasterBits = (unsigned char *)reallocarray( + NULL, + (CopyFrom->ImageDesc.Height * + CopyFrom->ImageDesc.Width), + sizeof(GifPixelType)); + if (sp->RasterBits == NULL) { + FreeLastSavedImage(GifFile); + return (SavedImage *)(NULL); + } + memcpy(sp->RasterBits, CopyFrom->RasterBits, + sizeof(GifPixelType) * + CopyFrom->ImageDesc.Height * + CopyFrom->ImageDesc.Width); + + /* finally, the extension blocks */ + if (CopyFrom->ExtensionBlocks != NULL) { + sp->ExtensionBlocks = + (ExtensionBlock *)reallocarray( + NULL, CopyFrom->ExtensionBlockCount, + sizeof(ExtensionBlock)); + if (sp->ExtensionBlocks == NULL) { + FreeLastSavedImage(GifFile); + return (SavedImage *)(NULL); + } + memcpy(sp->ExtensionBlocks, + CopyFrom->ExtensionBlocks, + sizeof(ExtensionBlock) * + CopyFrom->ExtensionBlockCount); + } + } else { + memset((char *)sp, '\0', sizeof(SavedImage)); + } + + return (sp); + } +} + +void GifFreeSavedImages(GifFileType *GifFile) { + SavedImage *sp; + + if ((GifFile == NULL) || (GifFile->SavedImages == NULL)) { + return; + } + for (sp = GifFile->SavedImages; + sp < GifFile->SavedImages + GifFile->ImageCount; sp++) { + if (sp->ImageDesc.ColorMap != NULL) { + GifFreeMapObject(sp->ImageDesc.ColorMap); + sp->ImageDesc.ColorMap = NULL; + } + + if (sp->RasterBits != NULL) { + free((char *)sp->RasterBits); + } + + GifFreeExtensions(&sp->ExtensionBlockCount, + &sp->ExtensionBlocks); + } + free((char *)GifFile->SavedImages); + GifFile->SavedImages = NULL; +} + +/* end */ diff --git a/torchvision/csrc/io/image/cpu/giflib/openbsd-reallocarray.c b/torchvision/csrc/io/image/cpu/giflib/openbsd-reallocarray.c new file mode 100644 index 00000000000..7d5f1e73a7d --- /dev/null +++ b/torchvision/csrc/io/image/cpu/giflib/openbsd-reallocarray.c @@ -0,0 +1,74 @@ +// @nolint (improperly imported third-party code) +/* + * SPDX-FileCopyrightText: Copyright (C) 2008 Otto Moerbeek + * SPDX-License-Identifier: MIT + */ + +#include +#include +#include +#include + +#ifndef SIZE_MAX +#define SIZE_MAX UINTPTR_MAX +#endif + +/* + * This is sqrt(SIZE_MAX+1), as s1*s2 <= SIZE_MAX + * if both s1 < MUL_NO_OVERFLOW and s2 < MUL_NO_OVERFLOW + */ +#define MUL_NO_OVERFLOW ((size_t)1 << (sizeof(size_t) * 4)) + +void *openbsd_reallocarray(void *optr, size_t nmemb, size_t size) { + if ((nmemb >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) && + nmemb > 0 && SIZE_MAX / nmemb < size) { + errno = ENOMEM; + return NULL; + } + /* + * Head off variations in realloc behavior on different + * platforms (reported by MarkR ) + * + * The behaviour of reallocarray is implementation-defined if + * nmemb or size is zero. It can return NULL or non-NULL + * depending on the platform. + * https://www.securecoding.cert.org/confluence/display/c/MEM04-C.Beware+of+zero-lengthallocations + * + * Here are some extracts from realloc man pages on different platforms. + * + * void realloc( void memblock, size_t size ); + * + * Windows: + * + * If there is not enough available memory to expand the block + * to the given size, the original block is left unchanged, + * and NULL is returned. If size is zero, then the block + * pointed to by memblock is freed; the return value is NULL, + * and memblock is left pointing at a freed block. + * + * OpenBSD: + * + * If size or nmemb is equal to 0, a unique pointer to an + * access protected, zero sized object is returned. Access via + * this pointer will generate a SIGSEGV exception. + * + * Linux: + * + * If size was equal to 0, either NULL or a pointer suitable + * to be passed to free() is returned. + * + * OS X: + * + * If size is zero and ptr is not NULL, a new, minimum sized + * object is allocated and the original object is freed. + * + * It looks like images with zero width or height can trigger + * this, and fuzzing behaviour will differ by platform, so + * fuzzing on one platform may not detect zero-size allocation + * problems on other platforms. + */ + if (size == 0 || nmemb == 0) { + return NULL; + } + return realloc(optr, size * nmemb); +} diff --git a/torchvision/csrc/io/image/cpu/read_write_file.cpp b/torchvision/csrc/io/image/cpu/read_write_file.cpp new file mode 100644 index 00000000000..06de72a5053 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/read_write_file.cpp @@ -0,0 +1,108 @@ +#include "read_write_file.h" + +#include + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#endif + +namespace vision { +namespace image { + +#ifdef _WIN32 +namespace { +std::wstring utf8_decode(const std::string& str) { + if (str.empty()) { + return std::wstring(); + } + int size_needed = MultiByteToWideChar( + CP_UTF8, 0, str.c_str(), static_cast(str.size()), nullptr, 0); + TORCH_CHECK(size_needed > 0, "Error converting the content to Unicode"); + std::wstring wstrTo(size_needed, 0); + MultiByteToWideChar( + CP_UTF8, + 0, + str.c_str(), + static_cast(str.size()), + &wstrTo[0], + size_needed); + return wstrTo; +} +} // namespace +#endif + +torch::Tensor read_file(const std::string& filename) { + C10_LOG_API_USAGE_ONCE( + "torchvision.csrc.io.image.cpu.read_write_file.read_file"); +#ifdef _WIN32 + // According to + // https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/stat-functions?view=vs-2019, + // we should use struct __stat64 and _wstat64 for 64-bit file size on Windows. + struct __stat64 stat_buf; + auto fileW = utf8_decode(filename); + int rc = _wstat64(fileW.c_str(), &stat_buf); +#else + struct stat stat_buf; + int rc = stat(filename.c_str(), &stat_buf); +#endif + // errno is a variable defined in errno.h + TORCH_CHECK( + rc == 0, "[Errno ", errno, "] ", strerror(errno), ": '", filename, "'"); + + int64_t size = stat_buf.st_size; + + TORCH_CHECK(size > 0, "Expected a non empty file"); + +#ifdef _WIN32 + // TODO: Once torch::from_file handles UTF-8 paths correctly, we should move + // back to use the following implementation since it uses file mapping. + // auto data = + // torch::from_file(filename, /*shared=*/false, /*size=*/size, + // torch::kU8).clone() + FILE* infile = _wfopen(fileW.c_str(), L"rb"); + + TORCH_CHECK(infile != nullptr, "Error opening input file"); + + auto data = torch::empty({size}, torch::kU8); + auto dataBytes = data.data_ptr(); + + fread(dataBytes, sizeof(uint8_t), size, infile); + fclose(infile); +#else + auto data = + torch::from_file(filename, /*shared=*/false, /*size=*/size, torch::kU8); +#endif + + return data; +} + +void write_file(const std::string& filename, torch::Tensor& data) { + C10_LOG_API_USAGE_ONCE( + "torchvision.csrc.io.image.cpu.read_write_file.write_file"); + // Check that the input tensor is on CPU + TORCH_CHECK(data.device() == torch::kCPU, "Input tensor should be on CPU"); + + // Check that the input tensor dtype is uint8 + TORCH_CHECK(data.dtype() == torch::kU8, "Input tensor dtype should be uint8"); + + // Check that the input tensor is 3-dimensional + TORCH_CHECK(data.dim() == 1, "Input data should be a 1-dimensional tensor"); + + auto fileBytes = data.data_ptr(); + auto fileCStr = filename.c_str(); +#ifdef _WIN32 + auto fileW = utf8_decode(filename); + FILE* outfile = _wfopen(fileW.c_str(), L"wb"); +#else + FILE* outfile = fopen(fileCStr, "wb"); +#endif + + TORCH_CHECK(outfile != nullptr, "Error opening output file"); + + fwrite(fileBytes, sizeof(uint8_t), data.numel(), outfile); + fclose(outfile); +} + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cpu/read_write_file.h b/torchvision/csrc/io/image/cpu/read_write_file.h new file mode 100644 index 00000000000..a5a712dd8e2 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/read_write_file.h @@ -0,0 +1,13 @@ +#pragma once + +#include + +namespace vision { +namespace image { + +C10_EXPORT torch::Tensor read_file(const std::string& filename); + +C10_EXPORT void write_file(const std::string& filename, torch::Tensor& data); + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cuda/decode_jpegs_cuda.cpp b/torchvision/csrc/io/image/cuda/decode_jpegs_cuda.cpp new file mode 100644 index 00000000000..2079ca5f919 --- /dev/null +++ b/torchvision/csrc/io/image/cuda/decode_jpegs_cuda.cpp @@ -0,0 +1,603 @@ +#include "decode_jpegs_cuda.h" +#if !NVJPEG_FOUND +namespace vision { +namespace image { +std::vector decode_jpegs_cuda( + const std::vector& encoded_images, + vision::image::ImageReadMode mode, + torch::Device device) { + TORCH_CHECK( + false, "decode_jpegs_cuda: torchvision not compiled with nvJPEG support"); +} +} // namespace image +} // namespace vision + +#else +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace vision { +namespace image { + +std::mutex decoderMutex; +std::unique_ptr cudaJpegDecoder; + +std::vector decode_jpegs_cuda( + const std::vector& encoded_images, + vision::image::ImageReadMode mode, + torch::Device device) { + C10_LOG_API_USAGE_ONCE( + "torchvision.csrc.io.image.cuda.decode_jpegs_cuda.decode_jpegs_cuda"); + + std::lock_guard lock(decoderMutex); + std::vector contig_images; + contig_images.reserve(encoded_images.size()); + + TORCH_CHECK( + device.is_cuda(), "Expected the device parameter to be a cuda device"); + + for (auto& encoded_image : encoded_images) { + TORCH_CHECK( + encoded_image.dtype() == torch::kU8, "Expected a torch.uint8 tensor"); + + TORCH_CHECK( + !encoded_image.is_cuda(), + "The input tensor must be on CPU when decoding with nvjpeg") + + TORCH_CHECK( + encoded_image.dim() == 1 && encoded_image.numel() > 0, + "Expected a non empty 1-dimensional tensor"); + + // nvjpeg requires images to be contiguous + if (encoded_image.is_contiguous()) { + contig_images.push_back(encoded_image); + } else { + contig_images.push_back(encoded_image.contiguous()); + } + } + + int major_version; + int minor_version; + nvjpegStatus_t get_major_property_status = + nvjpegGetProperty(MAJOR_VERSION, &major_version); + nvjpegStatus_t get_minor_property_status = + nvjpegGetProperty(MINOR_VERSION, &minor_version); + + TORCH_CHECK( + get_major_property_status == NVJPEG_STATUS_SUCCESS, + "nvjpegGetProperty failed: ", + get_major_property_status); + TORCH_CHECK( + get_minor_property_status == NVJPEG_STATUS_SUCCESS, + "nvjpegGetProperty failed: ", + get_minor_property_status); + if ((major_version < 11) || ((major_version == 11) && (minor_version < 6))) { + TORCH_WARN_ONCE( + "There is a memory leak issue in the nvjpeg library for CUDA versions < 11.6. " + "Make sure to rely on CUDA 11.6 or above before using decode_jpeg(..., device='cuda')."); + } + + at::cuda::CUDAGuard device_guard(device); + + if (cudaJpegDecoder == nullptr || device != cudaJpegDecoder->target_device) { + if (cudaJpegDecoder != nullptr) + cudaJpegDecoder.reset(new CUDAJpegDecoder(device)); + else { + cudaJpegDecoder = std::make_unique(device); + std::atexit([]() { cudaJpegDecoder.reset(); }); + } + } + + nvjpegOutputFormat_t output_format; + + switch (mode) { + case vision::image::IMAGE_READ_MODE_UNCHANGED: + // Using NVJPEG_OUTPUT_UNCHANGED causes differently sized output channels + // which is related to the subsampling used I'm not sure why this is the + // case, but for now we're just using RGB and later removing channels from + // grayscale images. + output_format = NVJPEG_OUTPUT_UNCHANGED; + break; + case vision::image::IMAGE_READ_MODE_GRAY: + output_format = NVJPEG_OUTPUT_Y; + break; + case vision::image::IMAGE_READ_MODE_RGB: + output_format = NVJPEG_OUTPUT_RGB; + break; + default: + TORCH_CHECK( + false, "The provided mode is not supported for JPEG decoding on GPU"); + } + + try { + at::cuda::CUDAEvent event; + auto result = cudaJpegDecoder->decode_images(contig_images, output_format); + auto current_stream{ + device.has_index() ? at::cuda::getCurrentCUDAStream( + cudaJpegDecoder->original_device.index()) + : at::cuda::getCurrentCUDAStream()}; + event.record(cudaJpegDecoder->stream); + event.block(current_stream); + return result; + } catch (const std::exception& e) { + if (typeid(e) != typeid(std::runtime_error)) { + TORCH_CHECK(false, "Error while decoding JPEG images: ", e.what()); + } else { + throw; + } + } +} + +CUDAJpegDecoder::CUDAJpegDecoder(const torch::Device& target_device) + : original_device{torch::kCUDA, c10::cuda::current_device()}, + target_device{target_device}, + stream{ + target_device.has_index() + ? at::cuda::getStreamFromPool(false, target_device.index()) + : at::cuda::getStreamFromPool(false)} { + nvjpegStatus_t status; + + hw_decode_available = true; + status = nvjpegCreateEx( + NVJPEG_BACKEND_HARDWARE, + NULL, + NULL, + NVJPEG_FLAGS_DEFAULT, + &nvjpeg_handle); + if (status == NVJPEG_STATUS_ARCH_MISMATCH) { + status = nvjpegCreateEx( + NVJPEG_BACKEND_DEFAULT, + NULL, + NULL, + NVJPEG_FLAGS_DEFAULT, + &nvjpeg_handle); + TORCH_CHECK( + status == NVJPEG_STATUS_SUCCESS, + "Failed to initialize nvjpeg with default backend: ", + status); + hw_decode_available = false; + } else { + TORCH_CHECK( + status == NVJPEG_STATUS_SUCCESS, + "Failed to initialize nvjpeg with hardware backend: ", + status); + } + + status = nvjpegJpegStateCreate(nvjpeg_handle, &nvjpeg_state); + TORCH_CHECK( + status == NVJPEG_STATUS_SUCCESS, + "Failed to create nvjpeg state: ", + status); + + status = nvjpegDecoderCreate( + nvjpeg_handle, NVJPEG_BACKEND_DEFAULT, &nvjpeg_decoder); + TORCH_CHECK( + status == NVJPEG_STATUS_SUCCESS, + "Failed to create nvjpeg decoder: ", + status); + + status = nvjpegDecoderStateCreate( + nvjpeg_handle, nvjpeg_decoder, &nvjpeg_decoupled_state); + TORCH_CHECK( + status == NVJPEG_STATUS_SUCCESS, + "Failed to create nvjpeg decoder state: ", + status); + + status = nvjpegBufferPinnedCreate(nvjpeg_handle, NULL, &pinned_buffers[0]); + TORCH_CHECK( + status == NVJPEG_STATUS_SUCCESS, + "Failed to create pinned buffer: ", + status); + + status = nvjpegBufferPinnedCreate(nvjpeg_handle, NULL, &pinned_buffers[1]); + TORCH_CHECK( + status == NVJPEG_STATUS_SUCCESS, + "Failed to create pinned buffer: ", + status); + + status = nvjpegBufferDeviceCreate(nvjpeg_handle, NULL, &device_buffer); + TORCH_CHECK( + status == NVJPEG_STATUS_SUCCESS, + "Failed to create device buffer: ", + status); + + status = nvjpegJpegStreamCreate(nvjpeg_handle, &jpeg_streams[0]); + TORCH_CHECK( + status == NVJPEG_STATUS_SUCCESS, + "Failed to create jpeg stream: ", + status); + + status = nvjpegJpegStreamCreate(nvjpeg_handle, &jpeg_streams[1]); + TORCH_CHECK( + status == NVJPEG_STATUS_SUCCESS, + "Failed to create jpeg stream: ", + status); + + status = nvjpegDecodeParamsCreate(nvjpeg_handle, &nvjpeg_decode_params); + TORCH_CHECK( + status == NVJPEG_STATUS_SUCCESS, + "Failed to create decode params: ", + status); +} + +CUDAJpegDecoder::~CUDAJpegDecoder() { + /* + The below code works on Mac and Linux, but fails on Windows. + This is because on Windows, the atexit hook which calls this + destructor executes after cuda is already shut down causing SIGSEGV. + We do not have a solution to this problem at the moment, so we'll + just leak the libnvjpeg & cuda variables for the time being and hope + that the CUDA runtime handles cleanup for us. + Please send a PR if you have a solution for this problem. + */ + + // nvjpegStatus_t status; + + // status = nvjpegDecodeParamsDestroy(nvjpeg_decode_params); + // TORCH_CHECK( + // status == NVJPEG_STATUS_SUCCESS, + // "Failed to destroy nvjpeg decode params: ", + // status); + + // status = nvjpegJpegStreamDestroy(jpeg_streams[0]); + // TORCH_CHECK( + // status == NVJPEG_STATUS_SUCCESS, + // "Failed to destroy jpeg stream: ", + // status); + + // status = nvjpegJpegStreamDestroy(jpeg_streams[1]); + // TORCH_CHECK( + // status == NVJPEG_STATUS_SUCCESS, + // "Failed to destroy jpeg stream: ", + // status); + + // status = nvjpegBufferPinnedDestroy(pinned_buffers[0]); + // TORCH_CHECK( + // status == NVJPEG_STATUS_SUCCESS, + // "Failed to destroy pinned buffer[0]: ", + // status); + + // status = nvjpegBufferPinnedDestroy(pinned_buffers[1]); + // TORCH_CHECK( + // status == NVJPEG_STATUS_SUCCESS, + // "Failed to destroy pinned buffer[1]: ", + // status); + + // status = nvjpegBufferDeviceDestroy(device_buffer); + // TORCH_CHECK( + // status == NVJPEG_STATUS_SUCCESS, + // "Failed to destroy device buffer: ", + // status); + + // status = nvjpegJpegStateDestroy(nvjpeg_decoupled_state); + // TORCH_CHECK( + // status == NVJPEG_STATUS_SUCCESS, + // "Failed to destroy nvjpeg decoupled state: ", + // status); + + // status = nvjpegDecoderDestroy(nvjpeg_decoder); + // TORCH_CHECK( + // status == NVJPEG_STATUS_SUCCESS, + // "Failed to destroy nvjpeg decoder: ", + // status); + + // status = nvjpegJpegStateDestroy(nvjpeg_state); + // TORCH_CHECK( + // status == NVJPEG_STATUS_SUCCESS, + // "Failed to destroy nvjpeg state: ", + // status); + + // status = nvjpegDestroy(nvjpeg_handle); + // TORCH_CHECK( + // status == NVJPEG_STATUS_SUCCESS, "nvjpegDestroy failed: ", status); +} + +std::tuple< + std::vector, + std::vector, + std::vector> +CUDAJpegDecoder::prepare_buffers( + const std::vector& encoded_images, + const nvjpegOutputFormat_t& output_format) { + /* + This function scans the encoded images' jpeg headers and + allocates decoding buffers based on the metadata found + + Args: + - encoded_images (std::vector): a vector of tensors + containing the jpeg bitstreams to be decoded. Each tensor must have dtype + torch.uint8 and device cpu + - output_format (nvjpegOutputFormat_t): NVJPEG_OUTPUT_RGB, NVJPEG_OUTPUT_Y + or NVJPEG_OUTPUT_UNCHANGED + + Returns: + - decoded_images (std::vector): a vector of nvjpegImages + containing pointers to the memory of the decoded images + - output_tensors (std::vector): a vector of Tensors + containing the decoded images. `decoded_images` points to the memory of + output_tensors + - channels (std::vector): a vector of ints containing the number of + output image channels for every image + */ + + int width[NVJPEG_MAX_COMPONENT]; + int height[NVJPEG_MAX_COMPONENT]; + std::vector channels(encoded_images.size()); + nvjpegChromaSubsampling_t subsampling; + nvjpegStatus_t status; + + std::vector output_tensors{encoded_images.size()}; + std::vector decoded_images{encoded_images.size()}; + + for (std::vector::size_type i = 0; i < encoded_images.size(); + i++) { + // extract bitstream meta data to figure out the number of channels, height, + // width for every image + status = nvjpegGetImageInfo( + nvjpeg_handle, + (unsigned char*)encoded_images[i].data_ptr(), + encoded_images[i].numel(), + &channels[i], + &subsampling, + width, + height); + TORCH_CHECK( + status == NVJPEG_STATUS_SUCCESS, "Failed to get image info: ", status); + + TORCH_CHECK( + subsampling != NVJPEG_CSS_UNKNOWN, "Unknown chroma subsampling"); + + // output channels may be different from the actual number of channels in + // the image, e.g. we decode a grayscale image as RGB and slice off the + // extra channels later + int output_channels = 3; + if (output_format == NVJPEG_OUTPUT_RGB || + output_format == NVJPEG_OUTPUT_UNCHANGED) { + output_channels = 3; + } else if (output_format == NVJPEG_OUTPUT_Y) { + output_channels = 1; + } + + // reserve output buffer + auto output_tensor = torch::empty( + {int64_t(output_channels), int64_t(height[0]), int64_t(width[0])}, + torch::dtype(torch::kU8).device(target_device)); + output_tensors[i] = output_tensor; + + // fill nvjpegImage_t struct + for (int c = 0; c < output_channels; c++) { + decoded_images[i].channel[c] = output_tensor[c].data_ptr(); + decoded_images[i].pitch[c] = width[0]; + } + for (int c = output_channels; c < NVJPEG_MAX_COMPONENT; c++) { + decoded_images[i].channel[c] = NULL; + decoded_images[i].pitch[c] = 0; + } + } + return {decoded_images, output_tensors, channels}; +} + +std::vector CUDAJpegDecoder::decode_images( + const std::vector& encoded_images, + const nvjpegOutputFormat_t& output_format) { + /* + This function decodes a batch of jpeg bitstreams. + We scan all encoded bitstreams and sort them into two groups: + 1. Baseline JPEGs: Can be decoded with hardware support on A100+ GPUs. + 2. Other JPEGs (e.g. progressive JPEGs): Can also be decoded on the + GPU (albeit with software support only) but need some preprocessing on the + host first. + + See + https://github.com/NVIDIA/CUDALibrarySamples/blob/f17940ac4e705bf47a8c39f5365925c1665f6c98/nvJPEG/nvJPEG-Decoder/nvjpegDecoder.cpp#L33 + for reference. + + Args: + - encoded_images (std::vector): a vector of tensors + containing the jpeg bitstreams to be decoded + - output_format (nvjpegOutputFormat_t): NVJPEG_OUTPUT_RGB, NVJPEG_OUTPUT_Y + or NVJPEG_OUTPUT_UNCHANGED + - device (torch::Device): The desired CUDA device for the returned Tensors + + Returns: + - output_tensors (std::vector): a vector of Tensors + containing the decoded images + */ + + auto [decoded_imgs_buf, output_tensors, channels] = + prepare_buffers(encoded_images, output_format); + + nvjpegStatus_t status; + cudaError_t cudaStatus; + + cudaStatus = cudaStreamSynchronize(stream); + TORCH_CHECK( + cudaStatus == cudaSuccess, + "Failed to synchronize CUDA stream: ", + cudaStatus); + + // baseline JPEGs can be batch decoded with hardware support on A100+ GPUs + // ultra fast! + std::vector hw_input_buffer; + std::vector hw_input_buffer_size; + std::vector hw_output_buffer; + + // other JPEG types such as progressive JPEGs can be decoded one-by-one in + // software slow :( + std::vector sw_input_buffer; + std::vector sw_input_buffer_size; + std::vector sw_output_buffer; + + if (hw_decode_available) { + for (std::vector::size_type i = 0; i < encoded_images.size(); + ++i) { + // extract bitstream meta data to figure out whether a bit-stream can be + // decoded + nvjpegJpegStreamParseHeader( + nvjpeg_handle, + encoded_images[i].data_ptr(), + encoded_images[i].numel(), + jpeg_streams[0]); + int isSupported = -1; + nvjpegDecodeBatchedSupported( + nvjpeg_handle, jpeg_streams[0], &isSupported); + + if (isSupported == 0) { + hw_input_buffer.push_back(encoded_images[i].data_ptr()); + hw_input_buffer_size.push_back(encoded_images[i].numel()); + hw_output_buffer.push_back(decoded_imgs_buf[i]); + } else { + sw_input_buffer.push_back(encoded_images[i].data_ptr()); + sw_input_buffer_size.push_back(encoded_images[i].numel()); + sw_output_buffer.push_back(decoded_imgs_buf[i]); + } + } + } else { + for (std::vector::size_type i = 0; i < encoded_images.size(); + ++i) { + sw_input_buffer.push_back(encoded_images[i].data_ptr()); + sw_input_buffer_size.push_back(encoded_images[i].numel()); + sw_output_buffer.push_back(decoded_imgs_buf[i]); + } + } + + if (hw_input_buffer.size() > 0) { + // UNCHANGED behaves weird, so we use RGB instead + status = nvjpegDecodeBatchedInitialize( + nvjpeg_handle, + nvjpeg_state, + hw_input_buffer.size(), + 1, + output_format == NVJPEG_OUTPUT_UNCHANGED ? NVJPEG_OUTPUT_RGB + : output_format); + TORCH_CHECK( + status == NVJPEG_STATUS_SUCCESS, + "Failed to initialize batch decoding: ", + status); + + status = nvjpegDecodeBatched( + nvjpeg_handle, + nvjpeg_state, + hw_input_buffer.data(), + hw_input_buffer_size.data(), + hw_output_buffer.data(), + stream); + TORCH_CHECK( + status == NVJPEG_STATUS_SUCCESS, "Failed to decode batch: ", status); + } + + if (sw_input_buffer.size() > 0) { + status = + nvjpegStateAttachDeviceBuffer(nvjpeg_decoupled_state, device_buffer); + TORCH_CHECK( + status == NVJPEG_STATUS_SUCCESS, + "Failed to attach device buffer: ", + status); + int buffer_index = 0; + // UNCHANGED behaves weird, so we use RGB instead + status = nvjpegDecodeParamsSetOutputFormat( + nvjpeg_decode_params, + output_format == NVJPEG_OUTPUT_UNCHANGED ? NVJPEG_OUTPUT_RGB + : output_format); + TORCH_CHECK( + status == NVJPEG_STATUS_SUCCESS, + "Failed to set output format: ", + status); + for (std::vector::size_type i = 0; i < sw_input_buffer.size(); + ++i) { + status = nvjpegJpegStreamParse( + nvjpeg_handle, + sw_input_buffer[i], + sw_input_buffer_size[i], + 0, + 0, + jpeg_streams[buffer_index]); + TORCH_CHECK( + status == NVJPEG_STATUS_SUCCESS, + "Failed to parse jpeg stream: ", + status); + + status = nvjpegStateAttachPinnedBuffer( + nvjpeg_decoupled_state, pinned_buffers[buffer_index]); + TORCH_CHECK( + status == NVJPEG_STATUS_SUCCESS, + "Failed to attach pinned buffer: ", + status); + + status = nvjpegDecodeJpegHost( + nvjpeg_handle, + nvjpeg_decoder, + nvjpeg_decoupled_state, + nvjpeg_decode_params, + jpeg_streams[buffer_index]); + TORCH_CHECK( + status == NVJPEG_STATUS_SUCCESS, + "Failed to decode jpeg stream: ", + status); + + cudaStatus = cudaStreamSynchronize(stream); + TORCH_CHECK( + cudaStatus == cudaSuccess, + "Failed to synchronize CUDA stream: ", + cudaStatus); + + status = nvjpegDecodeJpegTransferToDevice( + nvjpeg_handle, + nvjpeg_decoder, + nvjpeg_decoupled_state, + jpeg_streams[buffer_index], + stream); + TORCH_CHECK( + status == NVJPEG_STATUS_SUCCESS, + "Failed to transfer jpeg to device: ", + status); + + buffer_index = 1 - buffer_index; // switch pinned buffer in pipeline mode + // to avoid an extra sync + + status = nvjpegDecodeJpegDevice( + nvjpeg_handle, + nvjpeg_decoder, + nvjpeg_decoupled_state, + &sw_output_buffer[i], + stream); + TORCH_CHECK( + status == NVJPEG_STATUS_SUCCESS, + "Failed to decode jpeg stream: ", + status); + } + } + + cudaStatus = cudaStreamSynchronize(stream); + TORCH_CHECK( + cudaStatus == cudaSuccess, + "Failed to synchronize CUDA stream: ", + cudaStatus); + + // prune extraneous channels from single channel images + if (output_format == NVJPEG_OUTPUT_UNCHANGED) { + for (std::vector::size_type i = 0; i < output_tensors.size(); + ++i) { + if (channels[i] == 1) { + output_tensors[i] = output_tensors[i][0].unsqueeze(0).clone(); + } + } + } + + return output_tensors; +} + +} // namespace image +} // namespace vision + +#endif diff --git a/torchvision/csrc/io/image/cuda/decode_jpegs_cuda.h b/torchvision/csrc/io/image/cuda/decode_jpegs_cuda.h new file mode 100644 index 00000000000..6f72d9e35b2 --- /dev/null +++ b/torchvision/csrc/io/image/cuda/decode_jpegs_cuda.h @@ -0,0 +1,45 @@ +#pragma once +#include +#include +#include "../common.h" + +#if NVJPEG_FOUND +#include +#include + +namespace vision { +namespace image { +class CUDAJpegDecoder { + public: + CUDAJpegDecoder(const torch::Device& target_device); + ~CUDAJpegDecoder(); + + std::vector decode_images( + const std::vector& encoded_images, + const nvjpegOutputFormat_t& output_format); + + const torch::Device original_device; + const torch::Device target_device; + const c10::cuda::CUDAStream stream; + + private: + std::tuple< + std::vector, + std::vector, + std::vector> + prepare_buffers( + const std::vector& encoded_images, + const nvjpegOutputFormat_t& output_format); + nvjpegJpegState_t nvjpeg_state; + nvjpegJpegState_t nvjpeg_decoupled_state; + nvjpegBufferPinned_t pinned_buffers[2]; + nvjpegBufferDevice_t device_buffer; + nvjpegJpegStream_t jpeg_streams[2]; + nvjpegDecodeParams_t nvjpeg_decode_params; + nvjpegJpegDecoder_t nvjpeg_decoder; + bool hw_decode_available{false}; + nvjpegHandle_t nvjpeg_handle; +}; +} // namespace image +} // namespace vision +#endif diff --git a/torchvision/csrc/io/image/cuda/encode_decode_jpegs_cuda.h b/torchvision/csrc/io/image/cuda/encode_decode_jpegs_cuda.h new file mode 100644 index 00000000000..8c3ad8f9a9d --- /dev/null +++ b/torchvision/csrc/io/image/cuda/encode_decode_jpegs_cuda.h @@ -0,0 +1,59 @@ +#pragma once + +#include +#include "../common.h" +#include "decode_jpegs_cuda.h" +#include "encode_jpegs_cuda.h" + +namespace vision { +namespace image { + +/* + +Fast jpeg decoding with CUDA. +A100+ GPUs have dedicated hardware support for jpeg decoding. + +Args: + - encoded_images (const std::vector&): a vector of tensors + containing the jpeg bitstreams to be decoded. Each tensor must have dtype + torch.uint8 and device cpu + - mode (ImageReadMode): IMAGE_READ_MODE_UNCHANGED, IMAGE_READ_MODE_GRAY and +IMAGE_READ_MODE_RGB are supported + - device (torch::Device): The desired CUDA device to run the decoding on and +which will contain the output tensors + +Returns: + - decoded_images (std::vector): a vector of torch::Tensors of +dtype torch.uint8 on the specified containing the decoded images + +Notes: + - If a single image fails, the whole batch fails. + - This function is thread-safe +*/ +C10_EXPORT std::vector decode_jpegs_cuda( + const std::vector& encoded_images, + vision::image::ImageReadMode mode, + torch::Device device); + +/* +Fast jpeg encoding with CUDA. + +Args: + - decoded_images (const std::vector&): a vector of contiguous +CUDA tensors of dtype torch.uint8 to be encoded. + - quality (int64_t): 0-100, 75 is the default + +Returns: + - encoded_images (std::vector): a vector of CUDA +torch::Tensors of dtype torch.uint8 containing the encoded images + +Notes: + - If a single image fails, the whole batch fails. + - This function is thread-safe +*/ +C10_EXPORT std::vector encode_jpegs_cuda( + const std::vector& decoded_images, + const int64_t quality); + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cuda/encode_jpegs_cuda.cpp b/torchvision/csrc/io/image/cuda/encode_jpegs_cuda.cpp new file mode 100644 index 00000000000..1f10327ddbf --- /dev/null +++ b/torchvision/csrc/io/image/cuda/encode_jpegs_cuda.cpp @@ -0,0 +1,274 @@ +#include "encode_jpegs_cuda.h" +#if !NVJPEG_FOUND +namespace vision { +namespace image { +std::vector encode_jpegs_cuda( + const std::vector& decoded_images, + const int64_t quality) { + TORCH_CHECK( + false, "encode_jpegs_cuda: torchvision not compiled with nvJPEG support"); +} +} // namespace image +} // namespace vision +#else + +#include +#include +#include +#include +#include +#include +#include +#include +#include "c10/core/ScalarType.h" + +namespace vision { +namespace image { + +// We use global variables to cache the encoder and decoder instances and +// reuse them across calls to the corresponding pytorch functions +std::mutex encoderMutex; +std::unique_ptr cudaJpegEncoder; + +std::vector encode_jpegs_cuda( + const std::vector& decoded_images, + const int64_t quality) { + C10_LOG_API_USAGE_ONCE( + "torchvision.csrc.io.image.cuda.encode_jpegs_cuda.encode_jpegs_cuda"); + + // Some nvjpeg structures are not thread safe so we're keeping it single + // threaded for now. In the future this may be an opportunity to unlock + // further speedups + std::lock_guard lock(encoderMutex); + TORCH_CHECK(decoded_images.size() > 0, "Empty input tensor list"); + torch::Device device = decoded_images[0].device(); + at::cuda::CUDAGuard device_guard(device); + + // lazy init of the encoder class + // the encoder object holds on to a lot of state and is expensive to create, + // so we reuse it across calls. NB: the cached structures are device specific + // and cannot be reused across devices + if (cudaJpegEncoder == nullptr || device != cudaJpegEncoder->target_device) { + if (cudaJpegEncoder != nullptr) + delete cudaJpegEncoder.release(); + + cudaJpegEncoder = std::make_unique(device); + + // Unfortunately, we cannot rely on the smart pointer releasing the encoder + // object correctly upon program exit. This is because, when cudaJpegEncoder + // gets destroyed, the CUDA runtime may already be shut down, rendering all + // destroy* calls in the encoder destructor invalid. Instead, we use an + // atexit hook which executes after main() finishes, but hopefully before + // CUDA shuts down when the program exits. If CUDA is already shut down the + // destructor will detect this and will not attempt to destroy any encoder + // structures. + std::atexit([]() { delete cudaJpegEncoder.release(); }); + } + + std::vector contig_images; + contig_images.reserve(decoded_images.size()); + for (const auto& image : decoded_images) { + TORCH_CHECK( + image.dtype() == torch::kU8, "Input tensor dtype should be uint8"); + + TORCH_CHECK( + image.device() == device, + "All input tensors must be on the same CUDA device when encoding with nvjpeg") + + TORCH_CHECK( + image.dim() == 3 && image.numel() > 0, + "Input data should be a 3-dimensional tensor"); + + TORCH_CHECK( + image.size(0) == 3, + "The number of channels should be 3, got: ", + image.size(0)); + + // nvjpeg requires images to be contiguous + if (image.is_contiguous()) { + contig_images.push_back(image); + } else { + contig_images.push_back(image.contiguous()); + } + } + + cudaJpegEncoder->set_quality(quality); + std::vector encoded_images; + at::cuda::CUDAEvent event; + event.record(cudaJpegEncoder->stream); + for (const auto& image : contig_images) { + auto encoded_image = cudaJpegEncoder->encode_jpeg(image); + encoded_images.push_back(encoded_image); + } + + // We use a dedicated stream to do the encoding and even though the results + // may be ready on that stream we cannot assume that they are also available + // on the current stream of the calling context when this function returns. We + // use a blocking event to ensure that this is indeed the case. Crucially, we + // do not want to block the host at this particular point + // (which is what cudaStreamSynchronize would do.) Events allow us to + // synchronize the streams without blocking the host. + event.block(at::cuda::getCurrentCUDAStream( + cudaJpegEncoder->original_device.has_index() + ? cudaJpegEncoder->original_device.index() + : 0)); + return encoded_images; +} + +CUDAJpegEncoder::CUDAJpegEncoder(const torch::Device& target_device) + : original_device{torch::kCUDA, torch::cuda::current_device()}, + target_device{target_device}, + stream{ + target_device.has_index() + ? at::cuda::getStreamFromPool(false, target_device.index()) + : at::cuda::getStreamFromPool(false)} { + nvjpegStatus_t status; + status = nvjpegCreateSimple(&nvjpeg_handle); + TORCH_CHECK( + status == NVJPEG_STATUS_SUCCESS, + "Failed to create nvjpeg handle: ", + status); + + status = nvjpegEncoderStateCreate(nvjpeg_handle, &nv_enc_state, stream); + TORCH_CHECK( + status == NVJPEG_STATUS_SUCCESS, + "Failed to create nvjpeg encoder state: ", + status); + + status = nvjpegEncoderParamsCreate(nvjpeg_handle, &nv_enc_params, stream); + TORCH_CHECK( + status == NVJPEG_STATUS_SUCCESS, + "Failed to create nvjpeg encoder params: ", + status); +} + +CUDAJpegEncoder::~CUDAJpegEncoder() { + /* + The below code works on Mac and Linux, but fails on Windows. + This is because on Windows, the atexit hook which calls this + destructor executes after cuda is already shut down causing SIGSEGV. + We do not have a solution to this problem at the moment, so we'll + just leak the libnvjpeg & cuda variables for the time being and hope + that the CUDA runtime handles cleanup for us. + Please send a PR if you have a solution for this problem. + */ + + // // We run cudaGetDeviceCount as a dummy to test if the CUDA runtime is + // still + // // initialized. If it is not, we can skip the rest of this function as it + // is + // // unsafe to execute. + // int deviceCount = 0; + // cudaError_t error = cudaGetDeviceCount(&deviceCount); + // if (error != cudaSuccess) + // return; // CUDA runtime has already shut down. There's nothing we can do + // // now. + + // nvjpegStatus_t status; + + // status = nvjpegEncoderParamsDestroy(nv_enc_params); + // TORCH_CHECK( + // status == NVJPEG_STATUS_SUCCESS, + // "Failed to destroy nvjpeg encoder params: ", + // status); + + // status = nvjpegEncoderStateDestroy(nv_enc_state); + // TORCH_CHECK( + // status == NVJPEG_STATUS_SUCCESS, + // "Failed to destroy nvjpeg encoder state: ", + // status); + + // cudaStreamSynchronize(stream); + + // status = nvjpegDestroy(nvjpeg_handle); + // TORCH_CHECK( + // status == NVJPEG_STATUS_SUCCESS, "nvjpegDestroy failed: ", status); +} + +torch::Tensor CUDAJpegEncoder::encode_jpeg(const torch::Tensor& src_image) { + int channels = src_image.size(0); + int height = src_image.size(1); + int width = src_image.size(2); + + nvjpegStatus_t status; + cudaError_t cudaStatus; + status = nvjpegEncoderParamsSetSamplingFactors( + nv_enc_params, NVJPEG_CSS_444, stream); + TORCH_CHECK( + status == NVJPEG_STATUS_SUCCESS, + "Failed to set nvjpeg encoder params sampling factors: ", + status); + + nvjpegImage_t target_image; + for (int c = 0; c < channels; c++) { + target_image.channel[c] = src_image[c].data_ptr(); + // this is why we need contiguous tensors + target_image.pitch[c] = width; + } + for (int c = channels; c < NVJPEG_MAX_COMPONENT; c++) { + target_image.channel[c] = nullptr; + target_image.pitch[c] = 0; + } + // Encode the image + status = nvjpegEncodeImage( + nvjpeg_handle, + nv_enc_state, + nv_enc_params, + &target_image, + NVJPEG_INPUT_RGB, + width, + height, + stream); + + TORCH_CHECK( + status == NVJPEG_STATUS_SUCCESS, "image encoding failed: ", status); + // Retrieve length of the encoded image + size_t length; + status = nvjpegEncodeRetrieveBitstreamDevice( + nvjpeg_handle, nv_enc_state, NULL, &length, stream); + TORCH_CHECK( + status == NVJPEG_STATUS_SUCCESS, + "Failed to retrieve encoded image stream state: ", + status); + + // Synchronize the stream to ensure that the encoded image is ready + cudaStatus = cudaStreamSynchronize(stream); + TORCH_CHECK(cudaStatus == cudaSuccess, "CUDA ERROR: ", cudaStatus); + + // Reserve buffer for the encoded image + torch::Tensor encoded_image = torch::empty( + {static_cast(length)}, + torch::TensorOptions() + .dtype(torch::kByte) + .layout(torch::kStrided) + .device(target_device) + .requires_grad(false)); + cudaStatus = cudaStreamSynchronize(stream); + TORCH_CHECK(cudaStatus == cudaSuccess, "CUDA ERROR: ", cudaStatus); + // Retrieve the encoded image + status = nvjpegEncodeRetrieveBitstreamDevice( + nvjpeg_handle, + nv_enc_state, + encoded_image.data_ptr(), + &length, + 0); + TORCH_CHECK( + status == NVJPEG_STATUS_SUCCESS, + "Failed to retrieve encoded image: ", + status); + return encoded_image; +} + +void CUDAJpegEncoder::set_quality(const int64_t quality) { + nvjpegStatus_t paramsQualityStatus = + nvjpegEncoderParamsSetQuality(nv_enc_params, quality, stream); + TORCH_CHECK( + paramsQualityStatus == NVJPEG_STATUS_SUCCESS, + "Failed to set nvjpeg encoder params quality: ", + paramsQualityStatus); +} + +} // namespace image +} // namespace vision + +#endif // NVJPEG_FOUND diff --git a/torchvision/csrc/io/image/cuda/encode_jpegs_cuda.h b/torchvision/csrc/io/image/cuda/encode_jpegs_cuda.h new file mode 100644 index 00000000000..543940f1585 --- /dev/null +++ b/torchvision/csrc/io/image/cuda/encode_jpegs_cuda.h @@ -0,0 +1,33 @@ +#pragma once +#include +#include +#if NVJPEG_FOUND + +#include +#include +#include + +namespace vision { +namespace image { + +class CUDAJpegEncoder { + public: + CUDAJpegEncoder(const torch::Device& device); + ~CUDAJpegEncoder(); + + torch::Tensor encode_jpeg(const torch::Tensor& src_image); + + void set_quality(const int64_t quality); + + const torch::Device original_device; + const torch::Device target_device; + const c10::cuda::CUDAStream stream; + + protected: + nvjpegEncoderState_t nv_enc_state; + nvjpegEncoderParams_t nv_enc_params; + nvjpegHandle_t nvjpeg_handle; +}; +} // namespace image +} // namespace vision +#endif diff --git a/torchvision/csrc/io/image/image.cpp b/torchvision/csrc/io/image/image.cpp new file mode 100644 index 00000000000..2ac29e6b1ee --- /dev/null +++ b/torchvision/csrc/io/image/image.cpp @@ -0,0 +1,37 @@ +#include "image.h" + +#include + +// If we are in a Windows environment, we need to define +// initialization functions for the _custom_ops extension +#ifdef _WIN32 +void* PyInit_image(void) { + return nullptr; +} +#endif + +namespace vision { +namespace image { + +static auto registry = + torch::RegisterOperators() + .op("image::decode_gif", &decode_gif) + .op("image::decode_png(Tensor data, int mode, bool apply_exif_orientation=False) -> Tensor", + &decode_png) + .op("image::encode_png", &encode_png) + .op("image::decode_jpeg(Tensor data, int mode, bool apply_exif_orientation=False) -> Tensor", + &decode_jpeg) + .op("image::decode_webp(Tensor encoded_data, int mode) -> Tensor", + &decode_webp) + .op("image::encode_jpeg", &encode_jpeg) + .op("image::read_file", &read_file) + .op("image::write_file", &write_file) + .op("image::decode_image(Tensor data, int mode, bool apply_exif_orientation=False) -> Tensor", + &decode_image) + .op("image::decode_jpegs_cuda", &decode_jpegs_cuda) + .op("image::encode_jpegs_cuda", &encode_jpegs_cuda) + .op("image::_jpeg_version", &_jpeg_version) + .op("image::_is_compiled_against_turbo", &_is_compiled_against_turbo); + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/image.h b/torchvision/csrc/io/image/image.h new file mode 100644 index 00000000000..3f47fdec65c --- /dev/null +++ b/torchvision/csrc/io/image/image.h @@ -0,0 +1,11 @@ +#pragma once + +#include "cpu/decode_gif.h" +#include "cpu/decode_image.h" +#include "cpu/decode_jpeg.h" +#include "cpu/decode_png.h" +#include "cpu/decode_webp.h" +#include "cpu/encode_jpeg.h" +#include "cpu/encode_png.h" +#include "cpu/read_write_file.h" +#include "cuda/encode_decode_jpegs_cuda.h" diff --git a/torchvision/csrc/io/video/video.cpp b/torchvision/csrc/io/video/video.cpp new file mode 100644 index 00000000000..0340c97794d --- /dev/null +++ b/torchvision/csrc/io/video/video.cpp @@ -0,0 +1,387 @@ +#include "video.h" + +#include + +using namespace ffmpeg; + +namespace vision { +namespace video { + +namespace { + +const size_t decoderTimeoutMs = 600000; +const AVPixelFormat defaultVideoPixelFormat = AV_PIX_FMT_RGB24; + +// returns number of written bytes +template +size_t fillTensorList(DecoderOutputMessage& msgs, torch::Tensor& frame) { + const auto& msg = msgs; + T* frameData = frame.numel() > 0 ? frame.data_ptr() : nullptr; + if (frameData) { + auto sizeInBytes = msg.payload->length(); + memcpy(frameData, msg.payload->data(), sizeInBytes); + } + return sizeof(T); +} + +size_t fillVideoTensor(DecoderOutputMessage& msgs, torch::Tensor& videoFrame) { + return fillTensorList(msgs, videoFrame); +} + +size_t fillAudioTensor(DecoderOutputMessage& msgs, torch::Tensor& audioFrame) { + return fillTensorList(msgs, audioFrame); +} + +std::array, 4>::const_iterator +_parse_type(const std::string& stream_string) { + static const std::array, 4> types = {{ + {"video", TYPE_VIDEO}, + {"audio", TYPE_AUDIO}, + {"subtitle", TYPE_SUBTITLE}, + {"cc", TYPE_CC}, + }}; + auto device = std::find_if( + types.begin(), + types.end(), + [stream_string](const std::pair& p) { + return p.first == stream_string; + }); + if (device != types.end()) { + return device; + } + TORCH_CHECK( + false, "Expected one of [audio, video, subtitle, cc] ", stream_string); +} + +std::string parse_type_to_string(const std::string& stream_string) { + auto device = _parse_type(stream_string); + return device->first; +} + +MediaType parse_type_to_mt(const std::string& stream_string) { + auto device = _parse_type(stream_string); + return device->second; +} + +std::tuple _parseStream(const std::string& streamString) { + TORCH_CHECK(!streamString.empty(), "Stream string must not be empty"); + static const std::regex regex("([a-zA-Z_]+)(?::([1-9]\\d*|0))?"); + std::smatch match; + + TORCH_CHECK( + std::regex_match(streamString, match, regex), + "Invalid stream string: '", + streamString, + "'"); + + std::string type_ = "video"; + type_ = parse_type_to_string(match[1].str()); + long index_ = -1; + if (match[2].matched) { + try { + index_ = std::stoi(match[2].str()); + } catch (const std::exception&) { + TORCH_CHECK( + false, + "Could not parse device index '", + match[2].str(), + "' in device string '", + streamString, + "'"); + } + } + return std::make_tuple(type_, index_); +} + +} // namespace + +void Video::_getDecoderParams( + double videoStartS, + int64_t getPtsOnly, + std::string stream, + long stream_id = -1, + bool fastSeek = true, + bool all_streams = false, + int64_t num_threads = 1, + double seekFrameMarginUs = 10) { + int64_t videoStartUs = int64_t(videoStartS * 1e6); + + params.timeoutMs = decoderTimeoutMs; + params.startOffset = videoStartUs; + params.seekAccuracy = seekFrameMarginUs; + params.fastSeek = fastSeek; + params.headerOnly = false; + params.numThreads = num_threads; + + params.preventStaleness = false; // not sure what this is about + + if (all_streams == true) { + MediaFormat format; + format.stream = -2; + format.type = TYPE_AUDIO; + params.formats.insert(format); + + format.type = TYPE_VIDEO; + format.stream = -2; + format.format.video.width = 0; + format.format.video.height = 0; + format.format.video.cropImage = 0; + format.format.video.format = defaultVideoPixelFormat; + params.formats.insert(format); + + format.type = TYPE_SUBTITLE; + format.stream = -2; + params.formats.insert(format); + + format.type = TYPE_CC; + format.stream = -2; + params.formats.insert(format); + } else { + // parse stream type + MediaType stream_type = parse_type_to_mt(stream); + + // TODO: reset params.formats + std::set formats; + params.formats = formats; + // Define new format + MediaFormat format; + format.type = stream_type; + format.stream = stream_id; + if (stream_type == TYPE_VIDEO) { + format.format.video.width = 0; + format.format.video.height = 0; + format.format.video.cropImage = 0; + format.format.video.format = defaultVideoPixelFormat; + } + params.formats.insert(format); + } + +} // _get decoder params + +void Video::initFromFile( + std::string videoPath, + std::string stream, + int64_t numThreads) { + TORCH_CHECK(!initialized, "Video object can only be initialized once"); + initialized = true; + params.uri = videoPath; + _init(stream, numThreads); +} + +void Video::initFromMemory( + torch::Tensor videoTensor, + std::string stream, + int64_t numThreads) { + TORCH_CHECK(!initialized, "Video object can only be initialized once"); + initialized = true; + callback = MemoryBuffer::getCallback( + videoTensor.data_ptr(), videoTensor.size(0)); + _init(stream, numThreads); +} + +void Video::_init(std::string stream, int64_t numThreads) { + // set number of threads global + numThreads_ = numThreads; + // parse stream information + current_stream = _parseStream(stream); + // note that in the initial call we want to get all streams + _getDecoderParams( + 0, // video start + 0, // headerOnly + std::get<0>(current_stream), // stream info - remove that + long(-1), // stream_id parsed from info above change to -2 + false, // fastseek: we're using the default param here + true, // read all streams + numThreads_ // global number of Threads for decoding + ); + + std::string logMessage, logType; + + // locals + std::vector audioFPS, videoFPS; + std::vector audioDuration, videoDuration, ccDuration, subsDuration; + std::vector audioTB, videoTB, ccTB, subsTB; + c10::Dict> audioMetadata; + c10::Dict> videoMetadata; + c10::Dict> ccMetadata; + c10::Dict> subsMetadata; + + // callback and metadata defined in struct + DecoderInCallback tmp_callback = callback; + succeeded = decoder.init(params, std::move(tmp_callback), &metadata); + if (succeeded) { + for (const auto& header : metadata) { + double fps = double(header.fps); + double duration = double(header.duration) * 1e-6; // * timeBase; + + if (header.format.type == TYPE_VIDEO) { + videoFPS.push_back(fps); + videoDuration.push_back(duration); + } else if (header.format.type == TYPE_AUDIO) { + audioFPS.push_back(fps); + audioDuration.push_back(duration); + } else if (header.format.type == TYPE_CC) { + ccDuration.push_back(duration); + } else if (header.format.type == TYPE_SUBTITLE) { + subsDuration.push_back(duration); + }; + } + } + // audio + audioMetadata.insert("duration", audioDuration); + audioMetadata.insert("framerate", audioFPS); + // video + videoMetadata.insert("duration", videoDuration); + videoMetadata.insert("fps", videoFPS); + // subs + subsMetadata.insert("duration", subsDuration); + // cc + ccMetadata.insert("duration", ccDuration); + // put all to a data + streamsMetadata.insert("video", videoMetadata); + streamsMetadata.insert("audio", audioMetadata); + streamsMetadata.insert("subtitles", subsMetadata); + streamsMetadata.insert("cc", ccMetadata); + + succeeded = setCurrentStream(stream); + if (std::get<1>(current_stream) != -1) { + LOG(INFO) + << "Stream index set to " << std::get<1>(current_stream) + << ". If you encounter trouble, consider switching it to automatic stream discovery. \n"; + } +} + +Video::Video(std::string videoPath, std::string stream, int64_t numThreads) { + C10_LOG_API_USAGE_ONCE("torchvision.csrc.io.video.video.Video"); + if (!videoPath.empty()) { + initFromFile(videoPath, stream, numThreads); + } +} // video + +bool Video::setCurrentStream(std::string stream = "video") { + TORCH_CHECK(initialized, "Video object has to be initialized first"); + if ((!stream.empty()) && (_parseStream(stream) != current_stream)) { + current_stream = _parseStream(stream); + } + + double ts = 0; + if (seekTS > 0) { + ts = seekTS; + } + + _getDecoderParams( + ts, // video start + 0, // headerOnly + std::get<0>(current_stream), // stream + long(std::get<1>( + current_stream)), // stream_id parsed from info above change to -2 + false, // fastseek param set to 0 false by default (changed in seek) + false, // read all streams + numThreads_ // global number of threads + ); + + // callback and metadata defined in Video.h + DecoderInCallback tmp_callback = callback; + return (decoder.init(params, std::move(tmp_callback), &metadata)); +} + +std::tuple Video::getCurrentStream() const { + TORCH_CHECK(initialized, "Video object has to be initialized first"); + return current_stream; +} + +c10::Dict>> +Video::getStreamMetadata() const { + TORCH_CHECK(initialized, "Video object has to be initialized first"); + return streamsMetadata; +} + +void Video::Seek(double ts, bool fastSeek = false) { + TORCH_CHECK(initialized, "Video object has to be initialized first"); + // initialize the class variables used for seeking and retrurn + _getDecoderParams( + ts, // video start + 0, // headerOnly + std::get<0>(current_stream), // stream + long(std::get<1>( + current_stream)), // stream_id parsed from info above change to -2 + fastSeek, // fastseek + false, // read all streams + numThreads_ // global number of threads + ); + + // callback and metadata defined in Video.h + DecoderInCallback tmp_callback = callback; + succeeded = decoder.init(params, std::move(tmp_callback), &metadata); +} + +std::tuple Video::Next() { + TORCH_CHECK(initialized, "Video object has to be initialized first"); + // if failing to decode simply return a null tensor (note, should we + // raise an exception?) + double frame_pts_s; + torch::Tensor outFrame = torch::zeros({0}, torch::kByte); + + // decode single frame + DecoderOutputMessage out; + int64_t res = decoder.decode(&out, decoderTimeoutMs); + // if successful + if (res == 0) { + frame_pts_s = double(double(out.header.pts) * 1e-6); + + auto header = out.header; + const auto& format = header.format; + + // initialize the output variables based on type + + if (format.type == TYPE_VIDEO) { + // note: this can potentially be optimized + // by having the global tensor that we fill at decode time + // (would avoid allocations) + int outHeight = format.format.video.height; + int outWidth = format.format.video.width; + int numChannels = 3; + outFrame = torch::zeros({outHeight, outWidth, numChannels}, torch::kByte); + fillVideoTensor(out, outFrame); + outFrame = outFrame.permute({2, 0, 1}); + + } else if (format.type == TYPE_AUDIO) { + int outAudioChannels = format.format.audio.channels; + int bytesPerSample = av_get_bytes_per_sample( + static_cast(format.format.audio.format)); + int frameSizeTotal = out.payload->length(); + + TORCH_CHECK_EQ(frameSizeTotal % (outAudioChannels * bytesPerSample), 0); + int numAudioSamples = + frameSizeTotal / (outAudioChannels * bytesPerSample); + + outFrame = + torch::zeros({numAudioSamples, outAudioChannels}, torch::kFloat); + + fillAudioTensor(out, outFrame); + } + // currently not supporting other formats (will do soon) + + out.payload.reset(); + } else if (res == ENODATA) { + LOG(INFO) << "Decoder ran out of frames (ENODATA)\n"; + } else { + LOG(ERROR) << "Decoder failed with ERROR_CODE " << res; + } + + return std::make_tuple(outFrame, frame_pts_s); +} + +static auto registerVideo = + torch::class_