add demo code (leoxiaobin#161)

alex9311 · web-flow · commit e00377a256e1 · 2020-02-01T16:37:21.000+08:00
* add demo code

* demo code review update: use BGR for pose model

* demo code review update 2
diff --git a/demo/.gitignore b/demo/.gitignore
@@ -0,0 +1,3 @@
+output
+models
+videos
diff --git a/demo/Dockerfile b/demo/Dockerfile
@@ -0,0 +1,112 @@
+FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu16.04
+
+ENV OPENCV_VERSION="3.4.6"
+
+# Basic toolchain
+RUN apt-get update && apt-get install -y \
+        apt-utils \
+        build-essential \
+        git \
+        wget \
+        unzip \
+        yasm \
+        pkg-config \
+        libcurl4-openssl-dev \
+        zlib1g-dev \
+        htop \
+        cmake \
+        nano \
+        python3-pip \
+        python3-dev \
+        python3-tk \
+        libx264-dev \
+    && cd /usr/local/bin \
+    && ln -s /usr/bin/python3 python \
+    && pip3 install --upgrade pip \
+    && apt-get autoremove -y
+
+# Getting OpenCV dependencies available with apt
+RUN apt-get update && apt-get install -y \
+        libeigen3-dev \
+        libjpeg-dev \
+        libpng-dev \
+        libtiff-dev \
+        libjasper-dev \
+        libswscale-dev \
+        libavcodec-dev \
+        libavformat-dev && \
+    apt-get autoremove -y
+
+# Getting other dependencies
+RUN apt-get update && apt-get install -y \
+        cppcheck \
+        graphviz \
+        doxygen \
+        p7zip-full \
+        libdlib18 \
+        libdlib-dev && \
+    apt-get autoremove -y
+
+
+# Install OpenCV + OpenCV contrib (takes forever)
+RUN mkdir -p /tmp && \
+    cd /tmp && \
+    wget --no-check-certificate -O opencv.zip https://github.com/opencv/opencv/archive/${OPENCV_VERSION}.zip && \
+    wget --no-check-certificate -O opencv_contrib.zip https://github.com/opencv/opencv_contrib/archive/${OPENCV_VERSION}.zip && \
+    unzip opencv.zip && \
+    unzip opencv_contrib.zip && \
+    mkdir opencv-${OPENCV_VERSION}/build && \
+    cd opencv-${OPENCV_VERSION}/build && \
+    cmake -D CMAKE_BUILD_TYPE=RELEASE \
+        -D CMAKE_INSTALL_PREFIX=/usr/local \
+        -D WITH_CUDA=ON \
+        -D CUDA_FAST_MATH=1 \
+        -D WITH_CUBLAS=1 \
+        -D WITH_FFMPEG=ON \
+        -D WITH_OPENCL=ON \
+        -D WITH_V4L=ON \
+        -D WITH_OPENGL=ON \
+        -D OPENCV_EXTRA_MODULES_PATH=/tmp/opencv_contrib-${OPENCV_VERSION}/modules \
+        .. && \
+    make -j$(nproc) && \
+    make install && \
+    echo "/usr/local/lib" > /etc/ld.so.conf.d/opencv.conf && \
+    ldconfig && \
+    cd /tmp && \
+    rm -rf opencv-${OPENCV_VERSION} opencv.zip opencv_contrib-${OPENCV_VERSION} opencv_contrib.zip && \
+    cd /
+
+# Compile and install ffmpeg from source
+RUN git clone https://github.com/FFmpeg/FFmpeg /root/ffmpeg && \
+    cd /root/ffmpeg && \
+    ./configure --enable-gpl --enable-libx264 --enable-nonfree --disable-shared --extra-cflags=-I/usr/local/include && \
+    make -j8 && make install -j8
+
+# clone deep-high-resolution-net
+ARG POSE_ROOT=/pose_root
+RUN git clone https://github.com/leoxiaobin/deep-high-resolution-net.pytorch.git $POSE_ROOT
+WORKDIR $POSE_ROOT
+RUN mkdir output && mkdir log
+
+RUN pip3 install -r requirements.txt && \
+    pip3 install torch==1.1.0 \
+    torchvision==0.3.0 \
+    opencv-python \
+    pillow==6.2.1
+
+# build deep-high-resolution-net lib
+WORKDIR $POSE_ROOT/lib
+RUN make
+
+# install COCO API
+ARG COCOAPI=/cocoapi
+RUN git clone https://github.com/cocodataset/cocoapi.git $COCOAPI
+WORKDIR $COCOAPI/PythonAPI
+# Install into global site-packages
+RUN make install
+
+# download fastrrnn pretrained model for person detection
+RUN python -c "import torchvision; model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True); model.eval()"
+
+COPY inference.py $POSE_ROOT/tools
+COPY inference-config.yaml $POSE_ROOT/
diff --git a/demo/README.md b/demo/README.md
@@ -0,0 +1,41 @@
+This demo code is meant to be run on a video and includes a person detector.
+[Nvidia-docker](https://github.com/NVIDIA/nvidia-docker) and GPUs are required.
+It only expects there to be one person in each frame of video, though the code could easily be extended to support multiple people.
+
+### Prep
+1. Download the researchers' pretrained pose estimator from [google drive](https://drive.google.com/drive/folders/1hOTihvbyIxsm5ygDpbUuJ7O_tzv4oXjC?usp=sharing) to this directory under `models/`
+2. Put the video file you'd like to infer on in this directory under `videos`
+3. build the docker container in this directory with `./build-docker.sh` (this can take time because it involves compiling opencv)
+4. update the `inference-config.yaml` file to reflect the number of GPUs you have available
+
+### Running the Model
+Start your docker container with:
+```
+nvidia-docker run --rm -it \
+  -v $(pwd)/output:/output \
+  -v $(pwd)/videos:/videos \
+  -v $(pwd)/models:/models \
+  -w /pose_root \
+  hrnet_demo_inference \
+  /bin/bash
+```
+
+Once the container is running, you can run inference with:
+```
+python tools/inference.py \
+  --cfg inference-config.yaml \
+  --videoFile /videos/my-video.mp4 \
+  --inferenceFps 10 \
+  --writeBoxFrames \
+  TEST.MODEL_FILE \
+  /models/pytorch/pose_coco/pose_hrnet_w32_384x288.pth
+```
+
+The command above will output frames with boxes,
+frames with poses,
+a video with poses,
+and a csv with the keypoint coordinates for each frame.
+
+![](hrnet-demo.gif)
+
+Original source for demo video above is licensed for `Free for commercial use No attribution required` by [Pixabay](https://pixabay.com/service/license/)
diff --git a/demo/build-docker.sh b/demo/build-docker.sh
@@ -0,0 +1 @@
+docker build -t hrnet_demo_inference .
diff --git a/demo/hrnet-demo.gif b/demo/hrnet-demo.gif
diff --git a/demo/inference-config.yaml b/demo/inference-config.yaml
@@ -0,0 +1,127 @@
+AUTO_RESUME: true
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: true
+  DATASET: 'coco'
+  DATA_FORMAT: jpg
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 8
+  PROB_HALF_BODY: 0.3
+  ROOT: 'data/coco/'
+  ROT_FACTOR: 45
+  SCALE_FACTOR: 0.35
+  TEST_SET: 'val2017'
+  TRAIN_SET: 'train2017'
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 17
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 288
+  - 384
+  HEATMAP_SIZE:
+  - 72
+  - 96
+  SIGMA: 3
+  EXTRA:
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 210
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
+  BBOX_THRE: 1.0
+  IMAGE_THRE: 0.0
+  IN_VIS_THRE: 0.2
+  MODEL_FILE: ''
+  NMS_THRE: 1.0
+  OKS_THRE: 0.9
+  USE_GT_BBOX: true
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/demo/inference.py b/demo/inference.py