From 767abf937c82ab7252aa245d7be8d68a337e3caf Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Mon, 17 Apr 2023 22:54:02 -0700
Subject: [PATCH 01/30] add infinity dataset

---
 .gitignore                                    |    2 +
 demo/demo.py                                  |  467 +-
 demo/inference-config.yaml                    |    4 +-
 .../coco/hrnet/w32_256x192_adam_lr1e-3.yaml   |    6 +-
 .../coco/hrnet/w48_384x288_adam_lr1e-3.yaml   |    8 +-
 .../hrnet/w48_256x192_adam_lr1e-3.yaml        |  127 +
 .../hrnet/w48_384x288_adam_lr1e-3.yaml        |  127 +
 .../hrnet/w48_384x288_adam_lr1e-3.yaml        |  127 +
 lib/dataset/__init__.py                       |    8 +-
 lib/dataset/coco.py                           |    8 +-
 lib/dataset/infinity.py                       |  493 ++
 lib/dataset/infinity_coco.py                  |  562 ++
 lib/nms/gpu_nms.cpp                           | 7357 +++++++++++++++++
 lib/utils/vis.py                              |  111 +-
 requirements.txt                              |    4 +-
 tools/train.py                                |  223 +-
 16 files changed, 9313 insertions(+), 321 deletions(-)
 create mode 100644 experiments/infinity/hrnet/w48_256x192_adam_lr1e-3.yaml
 create mode 100644 experiments/infinity/hrnet/w48_384x288_adam_lr1e-3.yaml
 create mode 100644 experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
 create mode 100644 lib/dataset/infinity.py
 create mode 100644 lib/dataset/infinity_coco.py
 create mode 100644 lib/nms/gpu_nms.cpp

diff --git a/.gitignore b/.gitignore
index c1b85b62..42b5d470 100644
--- a/.gitignore
+++ b/.gitignore
@@ -85,8 +85,10 @@ lib/pycocotools/_mask.c
 lib/nms/cpu_nms.c
 
 output/*
+output_infinity_coco/*
 models/*
 log/*
+log_infinity_coco/*
 data/*
 external/
 
diff --git a/demo/demo.py b/demo/demo.py
index d482e838..5eae5b4b 100644
--- a/demo/demo.py
+++ b/demo/demo.py
@@ -1,118 +1,244 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
 
 import argparse
 import csv
 import os
 import shutil
+import time
 
-from PIL import Image
+import _init_paths
+import cv2
+import numpy as np
 import torch
-import torch.nn.parallel
 import torch.backends.cudnn as cudnn
+import torch.nn.parallel
 import torch.optim
 import torch.utils.data
 import torch.utils.data.distributed
-import torchvision.transforms as transforms
 import torchvision
-import cv2
-import numpy as np
-import time
-
-
-import _init_paths
-import models
-from config import cfg
-from config import update_config
+import torchvision.transforms as transforms
+from config import cfg, update_config
 from core.function import get_final_preds
+from PIL import Image
+from pycocotools.coco import COCO
 from utils.transforms import get_affine_transform
 
+import models
+
 COCO_KEYPOINT_INDEXES = {
-    0: 'nose',
-    1: 'left_eye',
-    2: 'right_eye',
-    3: 'left_ear',
-    4: 'right_ear',
-    5: 'left_shoulder',
-    6: 'right_shoulder',
-    7: 'left_elbow',
-    8: 'right_elbow',
-    9: 'left_wrist',
-    10: 'right_wrist',
-    11: 'left_hip',
-    12: 'right_hip',
-    13: 'left_knee',
-    14: 'right_knee',
-    15: 'left_ankle',
-    16: 'right_ankle'
+    0: "nose",
+    1: "left_eye",
+    2: "right_eye",
+    3: "left_ear",
+    4: "right_ear",
+    5: "left_shoulder",
+    6: "right_shoulder",
+    7: "left_elbow",
+    8: "right_elbow",
+    9: "left_wrist",
+    10: "right_wrist",
+    11: "left_hip",
+    12: "right_hip",
+    13: "left_knee",
+    14: "right_knee",
+    15: "left_ankle",
+    16: "right_ankle",
 }
 
 COCO_INSTANCE_CATEGORY_NAMES = [
-    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
-    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
-    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
-    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
-    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
-    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
-    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
-    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
-    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
-    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
-    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
-    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
+    "__background__",
+    "person",
+    "bicycle",
+    "car",
+    "motorcycle",
+    "airplane",
+    "bus",
+    "train",
+    "truck",
+    "boat",
+    "traffic light",
+    "fire hydrant",
+    "N/A",
+    "stop sign",
+    "parking meter",
+    "bench",
+    "bird",
+    "cat",
+    "dog",
+    "horse",
+    "sheep",
+    "cow",
+    "elephant",
+    "bear",
+    "zebra",
+    "giraffe",
+    "N/A",
+    "backpack",
+    "umbrella",
+    "N/A",
+    "N/A",
+    "handbag",
+    "tie",
+    "suitcase",
+    "frisbee",
+    "skis",
+    "snowboard",
+    "sports ball",
+    "kite",
+    "baseball bat",
+    "baseball glove",
+    "skateboard",
+    "surfboard",
+    "tennis racket",
+    "bottle",
+    "N/A",
+    "wine glass",
+    "cup",
+    "fork",
+    "knife",
+    "spoon",
+    "bowl",
+    "banana",
+    "apple",
+    "sandwich",
+    "orange",
+    "broccoli",
+    "carrot",
+    "hot dog",
+    "pizza",
+    "donut",
+    "cake",
+    "chair",
+    "couch",
+    "potted plant",
+    "bed",
+    "N/A",
+    "dining table",
+    "N/A",
+    "N/A",
+    "toilet",
+    "N/A",
+    "tv",
+    "laptop",
+    "mouse",
+    "remote",
+    "keyboard",
+    "cell phone",
+    "microwave",
+    "oven",
+    "toaster",
+    "sink",
+    "refrigerator",
+    "N/A",
+    "book",
+    "clock",
+    "vase",
+    "scissors",
+    "teddy bear",
+    "hair drier",
+    "toothbrush",
 ]
 
 SKELETON = [
-    [1,3],[1,0],[2,4],[2,0],[0,5],[0,6],[5,7],[7,9],[6,8],[8,10],[5,11],[6,12],[11,12],[11,13],[13,15],[12,14],[14,16]
+    [1, 3],
+    [1, 0],
+    [2, 4],
+    [2, 0],
+    [0, 5],
+    [0, 6],
+    [5, 7],
+    [7, 9],
+    [6, 8],
+    [8, 10],
+    [5, 11],
+    [6, 12],
+    [11, 12],
+    [11, 13],
+    [13, 15],
+    [12, 14],
+    [14, 16],
 ]
 
-CocoColors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0],
-              [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255],
-              [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
+CocoColors = [
+    [255, 0, 0],
+    [255, 85, 0],
+    [255, 170, 0],
+    [255, 255, 0],
+    [170, 255, 0],
+    [85, 255, 0],
+    [0, 255, 0],
+    [0, 255, 85],
+    [0, 255, 170],
+    [0, 255, 255],
+    [0, 170, 255],
+    [0, 85, 255],
+    [0, 0, 255],
+    [85, 0, 255],
+    [170, 0, 255],
+    [255, 0, 255],
+    [255, 0, 170],
+    [255, 0, 85],
+]
 
 NUM_KPTS = 17
+NUM_KPTS_INFINITY = 41
+
+CTX = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
 
-CTX = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
 
-def draw_pose(keypoints,img):
+def draw_pose(keypoints, img):
     """draw the keypoints and the skeletons.
     :params keypoints: the shape should be equal to [17,2]
     :params img:
     """
-    assert keypoints.shape == (NUM_KPTS,2)
+    assert keypoints.shape == (NUM_KPTS, 2)
     for i in range(len(SKELETON)):
         kpt_a, kpt_b = SKELETON[i][0], SKELETON[i][1]
-        x_a, y_a = keypoints[kpt_a][0],keypoints[kpt_a][1]
-        x_b, y_b = keypoints[kpt_b][0],keypoints[kpt_b][1] 
+        x_a, y_a = keypoints[kpt_a][0], keypoints[kpt_a][1]
+        x_b, y_b = keypoints[kpt_b][0], keypoints[kpt_b][1]
         cv2.circle(img, (int(x_a), int(y_a)), 6, CocoColors[i], -1)
         cv2.circle(img, (int(x_b), int(y_b)), 6, CocoColors[i], -1)
         cv2.line(img, (int(x_a), int(y_a)), (int(x_b), int(y_b)), CocoColors[i], 2)
 
-def draw_bbox(box,img):
+
+def draw_pose_infinity(keypoints, img):
+    """draw the keypoints and the skeletons.
+    :params keypoints: the shape should be equal to [17,2]
+    :params img:
+    """
+    assert keypoints.shape == (NUM_KPTS_INFINITY, 2)
+    for i in range(len(keypoints)):
+        x, y = keypoints[i][0], keypoints[i][1]
+        cv2.circle(img, (int(x), int(y)), 2, CocoColors[i % len(CocoColors)], -1)
+
+
+def draw_bbox(box, img):
     """draw the detected bounding box on the image.
     :param img:
     """
-    cv2.rectangle(img, box[0], box[1], color=(0, 255, 0),thickness=3)
+    cv2.rectangle(img, box[0], box[1], color=(0, 255, 0), thickness=3)
 
 
 def get_person_detection_boxes(model, img, threshold=0.5):
     pred = model(img)
-    pred_classes = [COCO_INSTANCE_CATEGORY_NAMES[i]
-                    for i in list(pred[0]['labels'].cpu().numpy())]  # Get the Prediction Score
-    pred_boxes = [[(i[0], i[1]), (i[2], i[3])]
-                  for i in list(pred[0]['boxes'].detach().cpu().numpy())]  # Bounding boxes
-    pred_score = list(pred[0]['scores'].detach().cpu().numpy())
-    if not pred_score or max(pred_score)<threshold:
+    pred_classes = [
+        COCO_INSTANCE_CATEGORY_NAMES[i] for i in list(pred[0]["labels"].cpu().numpy())
+    ]  # Get the Prediction Score
+    pred_boxes = [
+        [(i[0], i[1]), (i[2], i[3])]
+        for i in list(pred[0]["boxes"].detach().cpu().numpy())
+    ]  # Bounding boxes
+    pred_score = list(pred[0]["scores"].detach().cpu().numpy())
+    if not pred_score or max(pred_score) < threshold:
         return []
     # Get list of index with score greater than threshold
     pred_t = [pred_score.index(x) for x in pred_score if x > threshold][-1]
-    pred_boxes = pred_boxes[:pred_t+1]
-    pred_classes = pred_classes[:pred_t+1]
+    pred_boxes = pred_boxes[: pred_t + 1]
+    pred_classes = pred_classes[: pred_t + 1]
 
     person_boxes = []
     for idx, box in enumerate(pred_boxes):
-        if pred_classes[idx] == 'person':
+        if pred_classes[idx] == "person":
             person_boxes.append(box)
 
     return person_boxes
@@ -127,12 +253,14 @@ def get_pose_estimation_prediction(pose_model, image, center, scale):
         image,
         trans,
         (int(cfg.MODEL.IMAGE_SIZE[0]), int(cfg.MODEL.IMAGE_SIZE[1])),
-        flags=cv2.INTER_LINEAR)
-    transform = transforms.Compose([
-        transforms.ToTensor(),
-        transforms.Normalize(mean=[0.485, 0.456, 0.406],
-                             std=[0.229, 0.224, 0.225]),
-    ])
+        flags=cv2.INTER_LINEAR,
+    )
+    transform = transforms.Compose(
+        [
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+        ]
+    )
 
     # pose estimation inference
     model_input = transform(model_input).unsqueeze(0)
@@ -142,10 +270,8 @@ def get_pose_estimation_prediction(pose_model, image, center, scale):
         # compute output heatmap
         output = pose_model(model_input)
         preds, _ = get_final_preds(
-            cfg,
-            output.clone().cpu().numpy(),
-            np.asarray([center]),
-            np.asarray([scale]))
+            cfg, output.clone().cpu().numpy(), np.asarray([center]), np.asarray([scale])
+        )
 
         return preds
 
@@ -169,8 +295,8 @@ def box_to_center_scale(box, model_image_width, model_image_height):
 
     bottom_left_corner = box[0]
     top_right_corner = box[1]
-    box_width = top_right_corner[0]-bottom_left_corner[0]
-    box_height = top_right_corner[1]-bottom_left_corner[1]
+    box_width = top_right_corner[0] - bottom_left_corner[0]
+    box_height = top_right_corner[1] - bottom_left_corner[1]
     bottom_left_x = bottom_left_corner[0]
     bottom_left_y = bottom_left_corner[1]
     center[0] = bottom_left_x + box_width * 0.5
@@ -184,38 +310,65 @@ def box_to_center_scale(box, model_image_width, model_image_height):
     elif box_width < aspect_ratio * box_height:
         box_width = box_height * aspect_ratio
     scale = np.array(
-        [box_width * 1.0 / pixel_std, box_height * 1.0 / pixel_std],
-        dtype=np.float32)
+        [box_width * 1.0 / pixel_std, box_height * 1.0 / pixel_std], dtype=np.float32
+    )
     if center[0] != -1:
         scale = scale * 1.25
 
     return center, scale
 
+
 def parse_args():
-    parser = argparse.ArgumentParser(description='Train keypoints network')
+    parser = argparse.ArgumentParser(description="Train keypoints network")
     # general
-    parser.add_argument('--cfg', type=str, default='demo/inference-config.yaml')
-    parser.add_argument('--video', type=str)
-    parser.add_argument('--webcam',action='store_true')
-    parser.add_argument('--image',type=str)
-    parser.add_argument('--write',action='store_true')
-    parser.add_argument('--showFps',action='store_true')
-
-    parser.add_argument('opts',
-                        help='Modify config options using the command-line',
-                        default=None,
-                        nargs=argparse.REMAINDER)
+    parser.add_argument("--cfg", type=str, default="demo/inference-config.yaml")
+    parser.add_argument("--video", type=str)
+    parser.add_argument("--webcam", action="store_true")
+    parser.add_argument("--image", type=str)
+    parser.add_argument("--write", action="store_true")
+    parser.add_argument("--showFps", action="store_true")
+
+    parser.add_argument(
+        "opts",
+        help="Modify config options using the command-line",
+        default=None,
+        nargs=argparse.REMAINDER,
+    )
 
     args = parser.parse_args()
 
-    # args expected by supporting codebase  
-    args.modelDir = ''
-    args.logDir = ''
-    args.dataDir = ''
-    args.prevModelDir = ''
+    # args expected by supporting codebase
+    args.modelDir = ""
+    args.logDir = ""
+    args.dataDir = ""
+    args.prevModelDir = ""
     return args
 
 
+def load_infinity_data(video_path):
+    json_path = video_path[:-4] + ".json"
+    coco = COCO(json_path)
+
+    return coco
+
+
+def load_infinity_boxes(coco, index_frame):
+    img_data = list(coco.imgs.values())[index_frame]
+    img_id = img_data["id"]
+    ann_ids = coco.getAnnIds(imgIds=img_id)
+    anns = coco.loadAnns(ann_ids)
+    boxes = []
+    for ann in anns:
+        if ann["category_id"] == 0:
+            boxes.append(ann["bbox"])
+
+    return [convert_coco_bbox(box) for box in boxes]
+
+
+def convert_coco_bbox(bbox):
+    return [(bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3])]
+
+
 def main():
     # cudnn related setting
     cudnn.benchmark = cfg.CUDNN.BENCHMARK
@@ -229,21 +382,19 @@ def main():
     box_model.to(CTX)
     box_model.eval()
 
-    pose_model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(
-        cfg, is_train=False
-    )
+    pose_model = eval("models." + cfg.MODEL.NAME + ".get_pose_net")(cfg, is_train=False)
 
     if cfg.TEST.MODEL_FILE:
-        print('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
+        print("=> loading model from {}".format(cfg.TEST.MODEL_FILE))
         pose_model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False)
     else:
-        print('expected model defined in config at TEST.MODEL_FILE')
+        print("expected model defined in config at TEST.MODEL_FILE")
 
     pose_model = torch.nn.DataParallel(pose_model, device_ids=cfg.GPUS)
     pose_model.to(CTX)
     pose_model.eval()
 
-    # Loading an video or an image or webcam 
+    # Loading an video or an image or webcam
     if args.webcam:
         vidcap = cv2.VideoCapture(0)
     elif args.video:
@@ -251,14 +402,18 @@ def main():
     elif args.image:
         image_bgr = cv2.imread(args.image)
     else:
-        print('please use --video or --webcam or --image to define the input.')
-        return 
+        print("please use --video or --webcam or --image to define the input.")
+        return
 
     if args.webcam or args.video:
         if args.write:
-            save_path = 'output.avi'
-            fourcc = cv2.VideoWriter_fourcc(*'XVID')
-            out = cv2.VideoWriter(save_path,fourcc, 24.0, (int(vidcap.get(3)),int(vidcap.get(4))))
+            save_path = "output.avi"
+            fourcc = cv2.VideoWriter_fourcc(*"XVID")
+            out = cv2.VideoWriter(
+                save_path, fourcc, 12.0, (int(vidcap.get(3)), int(vidcap.get(4)))
+            )
+        index_frame = 0
+        coco = load_infinity_data(args.video)
         while True:
             ret, image_bgr = vidcap.read()
             if ret:
@@ -267,40 +422,62 @@ def main():
 
                 input = []
                 img = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
-                img_tensor = torch.from_numpy(img/255.).permute(2,0,1).float().to(CTX)
+                img_tensor = (
+                    torch.from_numpy(img / 255.0).permute(2, 0, 1).float().to(CTX)
+                )
                 input.append(img_tensor)
 
                 # object detection box
-                pred_boxes = get_person_detection_boxes(box_model, input, threshold=0.9)
+                # pred_boxes = get_person_detection_boxes(box_model, input, threshold=0.9)
+                pred_boxes = load_infinity_boxes(coco, index_frame)
 
                 # pose estimation
                 if len(pred_boxes) >= 1:
                     for box in pred_boxes:
-                        center, scale = box_to_center_scale(box, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[1])
-                        image_pose = image.copy() if cfg.DATASET.COLOR_RGB else image_bgr.copy()
-                        pose_preds = get_pose_estimation_prediction(pose_model, image_pose, center, scale)
-                        if len(pose_preds)>=1:
+                        center, scale = box_to_center_scale(
+                            box, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[1]
+                        )
+                        image_pose = (
+                            image.copy() if cfg.DATASET.COLOR_RGB else image_bgr.copy()
+                        )
+                        pose_preds = get_pose_estimation_prediction(
+                            pose_model, image_pose, center, scale
+                        )
+                        if len(pose_preds) >= 1:
                             for kpt in pose_preds:
-                                draw_pose(kpt,image_bgr) # draw the poses
+                                if len(kpt) == 41:
+                                    draw_pose_infinity(kpt, image_bgr)
+                                else:
+                                    draw_pose(kpt, image_bgr)  # draw the poses
 
                 if args.showFps:
-                    fps = 1/(time.time()-last_time)
-                    img = cv2.putText(image_bgr, 'fps: '+ "%.2f"%(fps), (25, 40), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 2)
+                    fps = 1 / (time.time() - last_time)
+                    img = cv2.putText(
+                        image_bgr,
+                        "fps: " + "%.2f" % (fps),
+                        (25, 40),
+                        cv2.FONT_HERSHEY_SIMPLEX,
+                        1.2,
+                        (0, 255, 0),
+                        2,
+                    )
 
                 if args.write:
                     out.write(image_bgr)
 
-                cv2.imshow('demo',image_bgr)
-                if cv2.waitKey(1) & 0XFF==ord('q'):
+                cv2.imshow("demo", image_bgr)
+                if cv2.waitKey(1) & 0xFF == ord("q"):
                     break
             else:
-                print('cannot load the video.')
+                print("cannot load the video.")
                 break
 
+            index_frame += 1
+
         cv2.destroyAllWindows()
         vidcap.release()
         if args.write:
-            print('video has been saved as {}'.format(save_path))
+            print("video has been saved as {}".format(save_path))
             out.release()
 
     else:
@@ -310,7 +487,7 @@ def main():
 
         input = []
         img = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
-        img_tensor = torch.from_numpy(img/255.).permute(2,0,1).float().to(CTX)
+        img_tensor = torch.from_numpy(img / 255.0).permute(2, 0, 1).float().to(CTX)
         input.append(img_tensor)
 
         # object detection box
@@ -319,25 +496,41 @@ def main():
         # pose estimation
         if len(pred_boxes) >= 1:
             for box in pred_boxes:
-                center, scale = box_to_center_scale(box, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[1])
+                center, scale = box_to_center_scale(
+                    box, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[1]
+                )
                 image_pose = image.copy() if cfg.DATASET.COLOR_RGB else image_bgr.copy()
-                pose_preds = get_pose_estimation_prediction(pose_model, image_pose, center, scale)
-                if len(pose_preds)>=1:
+                pose_preds = get_pose_estimation_prediction(
+                    pose_model, image_pose, center, scale
+                )
+                if len(pose_preds) >= 1:
                     for kpt in pose_preds:
-                        draw_pose(kpt,image_bgr) # draw the poses
-        
+                        if len(kpt) == 41:
+                            draw_pose_infinity(kpt, image_bgr)
+                        else:
+                            draw_pose(kpt, image_bgr)
         if args.showFps:
-            fps = 1/(time.time()-last_time)
-            img = cv2.putText(image_bgr, 'fps: '+ "%.2f"%(fps), (25, 40), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 2)
-        
+            fps = 1 / (time.time() - last_time)
+            img = cv2.putText(
+                image_bgr,
+                "fps: " + "%.2f" % (fps),
+                (25, 40),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                1.2,
+                (0, 255, 0),
+                2,
+            )
+
         if args.write:
-            save_path = 'output.jpg'
-            cv2.imwrite(save_path,image_bgr)
-            print('the result image has been saved as {}'.format(save_path))
-
-        cv2.imshow('demo',image_bgr)
-        if cv2.waitKey(0) & 0XFF==ord('q'):
-            cv2.destroyAllWindows()
-        
-if __name__ == '__main__':
+            save_path = f"../output_HRNet_augmented/{args.image.split('/')[-1].split('.')[0]}_output.jpg"
+            cv2.imwrite(save_path, image_bgr)
+            print("the result image has been saved as {}".format(save_path))
+
+        cv2.imshow("demo", image_bgr)
+        # if cv2.waitKey(0) & 0xFF == ord("q"):
+        #     cv2.destroyAllWindows()
+        # cv2.destroyAllWindows()
+
+
+if __name__ == "__main__":
     main()
diff --git a/demo/inference-config.yaml b/demo/inference-config.yaml
index 14bce176..eaae0244 100644
--- a/demo/inference-config.yaml
+++ b/demo/inference-config.yaml
@@ -26,7 +26,7 @@ MODEL:
   INIT_WEIGHTS: true
   NAME: pose_hrnet
   NUM_JOINTS: 17
-  PRETRAINED: 'models/pytorch/pose_coco/pose_hrnet_w32_384x288.pth'
+  PRETRAINED: 'models/pytorch/pose_coco/pose_hrnet_w32_256x192.pth'
   TARGET_TYPE: gaussian
   IMAGE_SIZE:
   - 288
@@ -112,7 +112,7 @@ TEST:
   BBOX_THRE: 1.0
   IMAGE_THRE: 0.0
   IN_VIS_THRE: 0.2
-  MODEL_FILE: 'models/pytorch/pose_coco/pose_hrnet_w32_384x288.pth'
+  MODEL_FILE: 'models/pytorch/pose_coco/pose_hrnet_w32_256x192.pth'
   NMS_THRE: 1.0
   OKS_THRE: 0.9
   USE_GT_BBOX: true
diff --git a/experiments/coco/hrnet/w32_256x192_adam_lr1e-3.yaml b/experiments/coco/hrnet/w32_256x192_adam_lr1e-3.yaml
index 16854cf2..d345bb7e 100644
--- a/experiments/coco/hrnet/w32_256x192_adam_lr1e-3.yaml
+++ b/experiments/coco/hrnet/w32_256x192_adam_lr1e-3.yaml
@@ -4,11 +4,11 @@ CUDNN:
   DETERMINISTIC: false
   ENABLED: true
 DATA_DIR: ''
-GPUS: (0,1,2,3)
+GPUS: (0,)
 OUTPUT_DIR: 'output'
 LOG_DIR: 'log'
-WORKERS: 24
-PRINT_FREQ: 100
+WORKERS: 12
+PRINT_FREQ: 10
 
 DATASET:
   COLOR_RGB: true
diff --git a/experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml
index 2844ff61..0ffcd6d2 100644
--- a/experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml
+++ b/experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -4,11 +4,11 @@ CUDNN:
   DETERMINISTIC: false
   ENABLED: true
 DATA_DIR: ''
-GPUS: (0,1,2,3)
+GPUS: (0,)
 OUTPUT_DIR: 'output'
 LOG_DIR: 'log'
-WORKERS: 24
-PRINT_FREQ: 100
+WORKERS: 12
+PRINT_FREQ: 10
 
 DATASET:
   COLOR_RGB: true
@@ -112,7 +112,7 @@ TEST:
   BBOX_THRE: 1.0
   IMAGE_THRE: 0.0
   IN_VIS_THRE: 0.2
-  MODEL_FILE: ''
+  MODEL_FILE: 'models/pytorch/pose_coco/pose_hrnet_w48_384x288.pth'
   NMS_THRE: 1.0
   OKS_THRE: 0.9
   USE_GT_BBOX: true
diff --git a/experiments/infinity/hrnet/w48_256x192_adam_lr1e-3.yaml b/experiments/infinity/hrnet/w48_256x192_adam_lr1e-3.yaml
new file mode 100644
index 00000000..5a301a61
--- /dev/null
+++ b/experiments/infinity/hrnet/w48_256x192_adam_lr1e-3.yaml
@@ -0,0 +1,127 @@
+AUTO_RESUME: true
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ""
+GPUS: (0,)
+OUTPUT_DIR: "output"
+LOG_DIR: "log"
+WORKERS: 4
+PRINT_FREQ: 10
+
+DATASET:
+  COLOR_RGB: true
+  DATASET: "infinity"
+  DATA_FORMAT: png
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 19
+  PROB_HALF_BODY: 0.3
+  ROOT: "../infinity_dataset_medium"
+  ROT_FACTOR: 45
+  SCALE_FACTOR: 0.35
+  TEST_SET: "test"
+  TRAIN_SET: "train"
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 41
+  PRETRAINED: "models/pytorch/imagenet/hrnet_w48-8ef0771d.pth"
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+    - 192
+    - 256
+  HEATMAP_SIZE:
+    - 48
+    - 64
+  SIGMA: 2
+  EXTRA:
+    PRETRAINED_LAYERS:
+      - "conv1"
+      - "bn1"
+      - "conv2"
+      - "bn2"
+      - "layer1"
+      - "transition1"
+      - "stage2"
+      - "transition2"
+      - "stage3"
+      - "transition3"
+      - "stage4"
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+        - 4
+        - 4
+      NUM_CHANNELS:
+        - 48
+        - 96
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+        - 4
+        - 4
+        - 4
+      NUM_CHANNELS:
+        - 48
+        - 96
+        - 192
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+        - 4
+        - 4
+        - 4
+        - 4
+      NUM_CHANNELS:
+        - 48
+        - 96
+        - 192
+        - 384
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 4
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 50
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+    - 170
+    - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 10
+  COCO_BBOX_FILE: "data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json"
+  BBOX_THRE: 1.0
+  IMAGE_THRE: 0.0
+  IN_VIS_THRE: 0.2
+  MODEL_FILE: "models/pytorch/infinity/model_best_256.pth"
+  NMS_THRE: 1.0
+  OKS_THRE: 0.9
+  USE_GT_BBOX: true
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/infinity/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/infinity/hrnet/w48_384x288_adam_lr1e-3.yaml
new file mode 100644
index 00000000..c7cd54d5
--- /dev/null
+++ b/experiments/infinity/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -0,0 +1,127 @@
+AUTO_RESUME: true
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ""
+GPUS: (0,)
+OUTPUT_DIR: "output"
+LOG_DIR: "log"
+WORKERS: 4
+PRINT_FREQ: 10
+
+DATASET:
+  COLOR_RGB: true
+  DATASET: "infinity"
+  DATA_FORMAT: png
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 19
+  PROB_HALF_BODY: 0.3
+  ROOT: "../infinity_dataset_medium_512"
+  ROT_FACTOR: 45
+  SCALE_FACTOR: 0.35
+  TEST_SET: "test"
+  TRAIN_SET: "train"
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 41
+  PRETRAINED: "models/pytorch/imagenet/hrnet_w48-8ef0771d.pth"
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+    - 288
+    - 384
+  HEATMAP_SIZE:
+    - 72
+    - 96
+  SIGMA: 3
+  EXTRA:
+    PRETRAINED_LAYERS:
+      - "conv1"
+      - "bn1"
+      - "conv2"
+      - "bn2"
+      - "layer1"
+      - "transition1"
+      - "stage2"
+      - "transition2"
+      - "stage3"
+      - "transition3"
+      - "stage4"
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+        - 4
+        - 4
+      NUM_CHANNELS:
+        - 48
+        - 96
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+        - 4
+        - 4
+        - 4
+      NUM_CHANNELS:
+        - 48
+        - 96
+        - 192
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+        - 4
+        - 4
+        - 4
+        - 4
+      NUM_CHANNELS:
+        - 48
+        - 96
+        - 192
+        - 384
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 2
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 50
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+    - 170
+    - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 10
+  COCO_BBOX_FILE: "data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json"
+  BBOX_THRE: 1.0
+  IMAGE_THRE: 0.0
+  IN_VIS_THRE: 0.2
+  MODEL_FILE: "models/pytorch/infinity/model_best.pth"
+  NMS_THRE: 1.0
+  OKS_THRE: 0.9
+  USE_GT_BBOX: true
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
new file mode 100644
index 00000000..978da652
--- /dev/null
+++ b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -0,0 +1,127 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ""
+GPUS: (0,)
+OUTPUT_DIR: "output_infinity_coco"
+LOG_DIR: "log_infinity_coco"
+WORKERS: 1
+PRINT_FREQ: 10
+
+DATASET:
+  COLOR_RGB: true
+  DATASET: "infinity_coco"
+  DATA_FORMAT: png
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 27
+  PROB_HALF_BODY: 0.3
+  ROOT: "../infinity_dataset_medium_512"
+  ROT_FACTOR: 45
+  SCALE_FACTOR: 0.35
+  TEST_SET: "test"
+  TRAIN_SET: "train"
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 58
+  PRETRAINED: "models/pytorch/imagenet/hrnet_w48-8ef0771d.pth"
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+    - 288
+    - 384
+  HEATMAP_SIZE:
+    - 72
+    - 96
+  SIGMA: 3
+  EXTRA:
+    PRETRAINED_LAYERS:
+      - "conv1"
+      - "bn1"
+      - "conv2"
+      - "bn2"
+      - "layer1"
+      - "transition1"
+      - "stage2"
+      - "transition2"
+      - "stage3"
+      - "transition3"
+      - "stage4"
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+        - 4
+        - 4
+      NUM_CHANNELS:
+        - 48
+        - 96
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+        - 4
+        - 4
+        - 4
+      NUM_CHANNELS:
+        - 48
+        - 96
+        - 192
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+        - 4
+        - 4
+        - 4
+        - 4
+      NUM_CHANNELS:
+        - 48
+        - 96
+        - 192
+        - 384
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 2
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 50
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+    - 170
+    - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 2
+  COCO_BBOX_FILE: "data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json"
+  BBOX_THRE: 1.0
+  IMAGE_THRE: 0.0
+  IN_VIS_THRE: 0.2
+  MODEL_FILE: "models/pytorch/infinity_coco/final_state.pth"
+  NMS_THRE: 1.0
+  OKS_THRE: 0.9
+  USE_GT_BBOX: true
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/lib/dataset/__init__.py b/lib/dataset/__init__.py
index 16e2413b..89ed057b 100644
--- a/lib/dataset/__init__.py
+++ b/lib/dataset/__init__.py
@@ -4,9 +4,9 @@
 # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 # ------------------------------------------------------------------------------
 
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
 
-from .mpii import MPIIDataset as mpii
 from .coco import COCODataset as coco
+from .infinity import InfinityDataset as infinity
+from .infinity_coco import InfinityCocoDataset as infinity_coco
+from .mpii import MPIIDataset as mpii
diff --git a/lib/dataset/coco.py b/lib/dataset/coco.py
index b8bad408..98d9c2db 100755
--- a/lib/dataset/coco.py
+++ b/lib/dataset/coco.py
@@ -181,8 +181,8 @@ def _load_coco_keypoint_annotation_kernal(self, index):
             if max(obj['keypoints']) == 0:
                 continue
 
-            joints_3d = np.zeros((self.num_joints, 3), dtype=np.float)
-            joints_3d_vis = np.zeros((self.num_joints, 3), dtype=np.float)
+            joints_3d = np.zeros((self.num_joints, 3), dtype=np.float32)
+            joints_3d_vis = np.zeros((self.num_joints, 3), dtype=np.float32)
             for ipt in range(self.num_joints):
                 joints_3d[ipt, 0] = obj['keypoints'][ipt * 3 + 0]
                 joints_3d[ipt, 1] = obj['keypoints'][ipt * 3 + 1]
@@ -270,9 +270,9 @@ def _load_coco_person_detection_results(self):
             num_boxes = num_boxes + 1
 
             center, scale = self._box2cs(box)
-            joints_3d = np.zeros((self.num_joints, 3), dtype=np.float)
+            joints_3d = np.zeros((self.num_joints, 3), dtype=np.float32)
             joints_3d_vis = np.ones(
-                (self.num_joints, 3), dtype=np.float)
+                (self.num_joints, 3), dtype=np.float32)
             kpt_db.append({
                 'image': img_name,
                 'center': center,
diff --git a/lib/dataset/infinity.py b/lib/dataset/infinity.py
new file mode 100644
index 00000000..021976de
--- /dev/null
+++ b/lib/dataset/infinity.py
@@ -0,0 +1,493 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import, division, print_function
+
+import logging
+import os
+from collections import OrderedDict, defaultdict
+
+import json_tricks as json
+import numpy as np
+from dataset.JointsDataset import JointsDataset
+from nms.nms import oks_nms, soft_oks_nms
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+
+logger = logging.getLogger(__name__)
+
+
+class InfinityDataset(JointsDataset):
+    """
+    "keypoints": {
+        0: "nose",
+        1: "left_eye",
+        2: "right_eye",
+        3: "left_ear",
+        4: "right_ear",
+        5: "left_shoulder",
+        6: "right_shoulder",
+        7: "left_elbow",
+        8: "right_elbow",
+        9: "left_wrist",
+        10: "right_wrist",
+        11: "left_hip",
+        12: "right_hip",
+        13: "left_knee",
+        14: "right_knee",
+        15: "left_ankle",
+        16: "right_ankle"
+    },
+        "skeleton": [
+        [16,14],[14,12],[17,15],[15,13],[12,13],[6,12],[7,13], [6,7],[6,8],
+        [7,9],[8,10],[9,11],[2,3],[1,2],[1,3],[2,4],[3,5],[4,6],[5,7]]
+    """
+
+    def __init__(self, cfg, root, image_set, is_train, transform=None):
+        super().__init__(cfg, root, image_set, is_train, transform)
+        self.nms_thre = cfg.TEST.NMS_THRE
+        self.image_thre = cfg.TEST.IMAGE_THRE
+        self.soft_nms = cfg.TEST.SOFT_NMS
+        self.oks_thre = cfg.TEST.OKS_THRE
+        self.in_vis_thre = cfg.TEST.IN_VIS_THRE
+        self.bbox_file = cfg.TEST.COCO_BBOX_FILE
+        self.use_gt_bbox = cfg.TEST.USE_GT_BBOX
+        self.image_width = cfg.MODEL.IMAGE_SIZE[0]
+        self.image_height = cfg.MODEL.IMAGE_SIZE[1]
+        self.aspect_ratio = self.image_width * 1.0 / self.image_height
+        self.pixel_std = 200
+
+        self.coco = COCO(self._get_ann_file_keypoint())
+
+        # deal with class names
+        cats = [cat["id"] for cat in self.coco.loadCats(self.coco.getCatIds())]
+        self.keypoints_name = self.coco.loadCats(0)[0]["augmented_keypoints"]
+        self.classes = ["__background__"] + cats
+        logger.info("=> classes: {}".format(self.classes))
+        self.num_classes = len(self.classes)
+        self._class_to_ind = dict(zip(self.classes, range(self.num_classes)))
+        self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds()))
+        self._coco_ind_to_class_ind = dict(
+            [
+                (self._class_to_coco_ind[cls], self._class_to_ind[cls])
+                for cls in self.classes[1:]
+            ]
+        )
+
+        # load image file names
+        self.image_set_index = self._load_image_set_index()
+        self.num_images = len(self.image_set_index)
+        logger.info("=> num_images: {}".format(self.num_images))
+
+        self.num_joints = 41
+        self.flip_pairs = [
+            [1, 2],
+            [3, 4],
+            [5, 6],
+            [7, 8],
+            [9, 10],
+            [12, 13],
+            [14, 15],
+            [16, 17],
+            [18, 19],
+            [20, 21],
+            [22, 23],
+            [24, 25],
+            [26, 27],
+            [28, 29],
+            [30, 31],
+            [32, 33],
+            [34, 35],
+            [39, 40],
+        ]
+        self.parent_ids = None
+        self.upper_body_ids = (
+            0,
+            1,
+            2,
+            3,
+            4,
+            5,
+            6,
+            7,
+            8,
+            9,
+            10,
+            11,
+            28,
+            29,
+            30,
+            31,
+            32,
+            33,
+            34,
+            35,
+            36,
+            38,
+        )
+        self.lower_body_ids = (
+            12,
+            13,
+            14,
+            15,
+            16,
+            17,
+            18,
+            19,
+            20,
+            21,
+            22,
+            23,
+            24,
+            25,
+            26,
+            27,
+            37,
+            39,
+            40,
+        )
+
+        self.joints_weight = np.ones(self.num_joints).reshape((self.num_joints, 1))
+
+        self.db = self._get_db()
+
+        if is_train and cfg.DATASET.SELECT_DATA:
+            self.db = self.select_data(self.db)
+
+        logger.info("=> load {} samples".format(len(self.db)))
+
+    def _get_ann_file_keypoint(self):
+        """self.root / annotations / person_keypoints_train2017.json"""
+        return os.path.join(self.root, self.image_set, "annotations.json")
+
+    def _load_image_set_index(self):
+        """image id: int"""
+        image_ids = self.coco.getImgIds()
+        return image_ids
+
+    def _get_db(self):
+        if self.is_train or self.use_gt_bbox:
+            # use ground truth bbox
+            gt_db = self._load_coco_keypoint_annotations()
+        else:
+            # use bbox from detection
+            gt_db = self._load_coco_person_detection_results()
+        return gt_db
+
+    def _load_coco_keypoint_annotations(self):
+        """ground truth bbox and keypoints"""
+        gt_db = []
+        for index in self.image_set_index:
+            gt_db.extend(self._load_coco_keypoint_annotation_kernal(index))
+        return gt_db
+
+    def _load_coco_keypoint_annotation_kernal(self, index):
+        """
+        coco ann: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id']
+        iscrowd:
+            crowd instances are handled by marking their overlaps with all categories to -1
+            and later excluded in training
+        bbox:
+            [x1, y1, w, h]
+        :param index: coco image id
+        :return: db entry
+        """
+        im_ann = self.coco.loadImgs(index)[0]
+        width = im_ann["width"]
+        height = im_ann["height"]
+
+        annIds = self.coco.getAnnIds(imgIds=index, iscrowd=False)
+        objs = self.coco.loadAnns(annIds)
+
+        # sanitize bboxes
+        valid_objs = []
+        for obj in objs:
+            x, y, w, h = obj["bbox"]
+            x1 = np.max((0, x))
+            y1 = np.max((0, y))
+            x2 = np.min((width - 1, x1 + np.max((0, w - 1))))
+            y2 = np.min((height - 1, y1 + np.max((0, h - 1))))
+            # if obj["area"] > 0 and x2 >= x1 and y2 >= y1:
+            if x2 >= x1 and y2 >= y1:
+                obj["clean_bbox"] = [x1, y1, x2 - x1, y2 - y1]
+                valid_objs.append(obj)
+        objs = valid_objs
+
+        rec = []
+        for obj in objs:
+            cls = self._coco_ind_to_class_ind[obj["category_id"]]
+            if cls != 1:
+                continue
+
+            # ignore objs without keypoints annotation
+            if max(obj["keypoints"]) == 0:
+                continue
+
+            joints_3d = np.zeros((self.num_joints, 3), dtype=np.float32)
+            joints_3d_vis = np.zeros((self.num_joints, 3), dtype=np.float32)
+            for ipt, name in enumerate(self.keypoints_name):
+                joints_3d[ipt, 0] = obj["keypoints"][name]["x"]
+                joints_3d[ipt, 1] = obj["keypoints"][name]["y"]
+                joints_3d[ipt, 2] = 0
+                t_vis = obj["keypoints"][name]["v"]
+                if t_vis > 1:
+                    t_vis = 1
+                joints_3d_vis[ipt, 0] = t_vis
+                joints_3d_vis[ipt, 1] = t_vis
+                joints_3d_vis[ipt, 2] = 0
+
+            center, scale = self._box2cs(obj["clean_bbox"][:4])
+            rec.append(
+                {
+                    "image": self.image_path_from_index(index),
+                    "center": center,
+                    "scale": scale,
+                    "joints_3d": joints_3d,
+                    "joints_3d_vis": joints_3d_vis,
+                    "filename": "",
+                    "imgnum": 0,
+                }
+            )
+
+        return rec
+
+    def _box2cs(self, box):
+        x, y, w, h = box[:4]
+        return self._xywh2cs(x, y, w, h)
+
+    def _xywh2cs(self, x, y, w, h):
+        center = np.zeros((2), dtype=np.float32)
+        center[0] = x + w * 0.5
+        center[1] = y + h * 0.5
+
+        if w > self.aspect_ratio * h:
+            h = w * 1.0 / self.aspect_ratio
+        elif w < self.aspect_ratio * h:
+            w = h * self.aspect_ratio
+        scale = np.array(
+            [w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std], dtype=np.float32
+        )
+        if center[0] != -1:
+            scale = scale * 1.25
+
+        return center, scale
+
+    def image_path_from_index(self, index):
+        image_path = os.path.join(self.root, self.image_set, "images", f"{index}.png")
+
+        return image_path
+
+    def _load_coco_person_detection_results(self):
+        all_boxes = None
+        with open(self.bbox_file, "r") as f:
+            all_boxes = json.load(f)
+
+        if not all_boxes:
+            logger.error("=> Load %s fail!" % self.bbox_file)
+            return None
+
+        logger.info("=> Total boxes: {}".format(len(all_boxes)))
+
+        kpt_db = []
+        num_boxes = 0
+        for n_img in range(0, len(all_boxes)):
+            det_res = all_boxes[n_img]
+            if det_res["category_id"] != 1:
+                continue
+            img_name = self.image_path_from_index(det_res["image_id"])
+            box = det_res["bbox"]
+            score = det_res["score"]
+
+            if score < self.image_thre:
+                continue
+
+            num_boxes = num_boxes + 1
+
+            center, scale = self._box2cs(box)
+            joints_3d = np.zeros((self.num_joints, 3), dtype=np.float32)
+            joints_3d_vis = np.ones((self.num_joints, 3), dtype=np.float32)
+            kpt_db.append(
+                {
+                    "image": img_name,
+                    "center": center,
+                    "scale": scale,
+                    "score": score,
+                    "joints_3d": joints_3d,
+                    "joints_3d_vis": joints_3d_vis,
+                }
+            )
+
+        logger.info(
+            "=> Total boxes after fliter low score@{}: {}".format(
+                self.image_thre, num_boxes
+            )
+        )
+        return kpt_db
+
+    def evaluate(self, cfg, preds, output_dir, all_boxes, img_path, *args, **kwargs):
+        rank = cfg.RANK
+
+        res_folder = os.path.join(output_dir, "results")
+        if not os.path.exists(res_folder):
+            try:
+                os.makedirs(res_folder)
+            except Exception:
+                logger.error("Fail to make {}".format(res_folder))
+
+        res_file = os.path.join(
+            res_folder, "keypoints_{}_results_{}.json".format(self.image_set, rank)
+        )
+
+        # person x (keypoints)
+        _kpts = []
+        for idx, kpt in enumerate(preds):
+            _kpts.append(
+                {
+                    "keypoints": kpt,
+                    "center": all_boxes[idx][0:2],
+                    "scale": all_boxes[idx][2:4],
+                    "area": all_boxes[idx][4],
+                    "score": all_boxes[idx][5],
+                    "image": int(img_path[idx].split("/")[-1].split(".")[0]),
+                }
+            )
+        # image x person x (keypoints)
+        kpts = defaultdict(list)
+        for kpt in _kpts:
+            kpts[kpt["image"]].append(kpt)
+
+        # rescoring and oks nms
+        num_joints = self.num_joints
+        in_vis_thre = self.in_vis_thre
+        oks_thre = self.oks_thre
+        oks_nmsed_kpts = []
+        for img in kpts.keys():
+            img_kpts = kpts[img]
+            for n_p in img_kpts:
+                box_score = n_p["score"]
+                kpt_score = 0
+                valid_num = 0
+                for n_jt in range(0, num_joints):
+                    t_s = n_p["keypoints"][n_jt][2]
+                    if t_s > in_vis_thre:
+                        kpt_score = kpt_score + t_s
+                        valid_num = valid_num + 1
+                if valid_num != 0:
+                    kpt_score = kpt_score / valid_num
+                # rescoring
+                n_p["score"] = kpt_score * box_score
+
+            if self.soft_nms:
+                keep = soft_oks_nms(
+                    [img_kpts[i] for i in range(len(img_kpts))], oks_thre
+                )
+            else:
+                keep = oks_nms([img_kpts[i] for i in range(len(img_kpts))], oks_thre)
+
+            if len(keep) == 0:
+                oks_nmsed_kpts.append(img_kpts)
+            else:
+                oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep])
+
+        self._write_coco_keypoint_results(oks_nmsed_kpts, res_file)
+        if "test" not in self.image_set:
+            info_str = self._do_python_keypoint_eval(res_file, res_folder)
+            name_value = OrderedDict(info_str)
+            return name_value, name_value["AP"]
+        else:
+            return {"Null": 0}, 0
+
+    def _write_coco_keypoint_results(self, keypoints, res_file):
+        data_pack = [
+            {
+                "cat_id": self._class_to_coco_ind[cls],
+                "cls_ind": cls_ind,
+                "cls": cls,
+                "ann_type": "keypoints",
+                "keypoints": keypoints,
+            }
+            for cls_ind, cls in enumerate(self.classes)
+            if not cls == "__background__"
+        ]
+
+        results = self._coco_keypoint_results_one_category_kernel(data_pack[0])
+        logger.info("=> writing results json to %s" % res_file)
+        with open(res_file, "w") as f:
+            json.dump(results, f, sort_keys=True, indent=4)
+        try:
+            json.load(open(res_file))
+        except Exception:
+            content = []
+            with open(res_file, "r") as f:
+                for line in f:
+                    content.append(line)
+            content[-1] = "]"
+            with open(res_file, "w") as f:
+                for c in content:
+                    f.write(c)
+
+    def _coco_keypoint_results_one_category_kernel(self, data_pack):
+        cat_id = data_pack["cat_id"]
+        keypoints = data_pack["keypoints"]
+        cat_results = []
+
+        for img_kpts in keypoints:
+            if len(img_kpts) == 0:
+                continue
+
+            _key_points = np.array(
+                [img_kpts[k]["keypoints"] for k in range(len(img_kpts))]
+            )
+            key_points = np.zeros(
+                (_key_points.shape[0], self.num_joints * 3), dtype=np.float
+            )
+
+            for ipt in range(self.num_joints):
+                key_points[:, ipt * 3 + 0] = _key_points[:, ipt, 0]
+                key_points[:, ipt * 3 + 1] = _key_points[:, ipt, 1]
+                key_points[:, ipt * 3 + 2] = _key_points[:, ipt, 2]  # keypoints score.
+
+            result = [
+                {
+                    "image_id": img_kpts[k]["image"],
+                    "category_id": cat_id,
+                    "keypoints": list(key_points[k]),
+                    "score": img_kpts[k]["score"],
+                    "center": list(img_kpts[k]["center"]),
+                    "scale": list(img_kpts[k]["scale"]),
+                }
+                for k in range(len(img_kpts))
+            ]
+            cat_results.extend(result)
+
+        return cat_results
+
+    def _do_python_keypoint_eval(self, res_file, res_folder):
+        coco_dt = self.coco.loadRes(res_file)
+        coco_eval = COCOeval(self.coco, coco_dt, "keypoints")
+        coco_eval.params.useSegm = None
+        coco_eval.evaluate()
+        coco_eval.accumulate()
+        coco_eval.summarize()
+
+        stats_names = [
+            "AP",
+            "Ap .5",
+            "AP .75",
+            "AP (M)",
+            "AP (L)",
+            "AR",
+            "AR .5",
+            "AR .75",
+            "AR (M)",
+            "AR (L)",
+        ]
+
+        info_str = []
+        for ind, name in enumerate(stats_names):
+            info_str.append((name, coco_eval.stats[ind]))
+
+        return info_str
diff --git a/lib/dataset/infinity_coco.py b/lib/dataset/infinity_coco.py
new file mode 100644
index 00000000..a9937f4c
--- /dev/null
+++ b/lib/dataset/infinity_coco.py
@@ -0,0 +1,562 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import, division, print_function
+
+import logging
+import os
+from collections import OrderedDict, defaultdict
+
+import json_tricks as json
+import numpy as np
+from dataset.JointsDataset import JointsDataset
+from nms.nms import oks_nms, soft_oks_nms
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+
+logger = logging.getLogger(__name__)
+
+
+class InfinityCocoDataset(JointsDataset):
+    """
+    "keypoints": {
+        0: "nose",
+        1: "left_eye",
+        2: "right_eye",
+        3: "left_ear",
+        4: "right_ear",
+        5: "left_shoulder",
+        6: "right_shoulder",
+        7: "left_elbow",
+        8: "right_elbow",
+        9: "left_wrist",
+        10: "right_wrist",
+        11: "left_hip",
+        12: "right_hip",
+        13: "left_knee",
+        14: "right_knee",
+        15: "left_ankle",
+        16: "right_ankle"
+    },
+        "skeleton": [
+        [16,14],[14,12],[17,15],[15,13],[12,13],[6,12],[7,13], [6,7],[6,8],
+        [7,9],[8,10],[9,11],[2,3],[1,2],[1,3],[2,4],[3,5],[4,6],[5,7]]
+    """
+
+    def __init__(self, cfg, root, image_set, is_train, transform=None):
+        super().__init__(cfg, root, image_set, is_train, transform)
+        self.nms_thre = cfg.TEST.NMS_THRE
+        self.image_thre = cfg.TEST.IMAGE_THRE
+        self.soft_nms = cfg.TEST.SOFT_NMS
+        self.oks_thre = cfg.TEST.OKS_THRE
+        self.in_vis_thre = cfg.TEST.IN_VIS_THRE
+        self.bbox_file = cfg.TEST.COCO_BBOX_FILE
+        self.use_gt_bbox = cfg.TEST.USE_GT_BBOX
+        self.image_width = cfg.MODEL.IMAGE_SIZE[0]
+        self.image_height = cfg.MODEL.IMAGE_SIZE[1]
+        self.aspect_ratio = self.image_width * 1.0 / self.image_height
+        self.pixel_std = 200
+
+        self.coco = COCO(self._get_ann_file_keypoint())
+
+        # deal with class names
+        cats = [cat["id"] for cat in self.coco.loadCats(self.coco.getCatIds())]
+        self.keypoints_name = self.coco.loadCats(0)[0]["augmented_keypoints"]
+        self.classes = ["__background__"] + cats
+        logger.info("=> classes: {}".format(self.classes))
+        self.num_classes = len(self.classes)
+        self._class_to_ind = dict(zip(self.classes, range(self.num_classes)))
+        self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds()))
+        self._coco_ind_to_class_ind = dict(
+            [
+                (self._class_to_coco_ind[cls], self._class_to_ind[cls])
+                for cls in self.classes[1:]
+            ]
+        )
+
+        # load image file names
+        self.image_set_index = self._load_image_set_index()
+        self.num_images = len(self.image_set_index)
+        logger.info("=> num_images: {}".format(self.num_images))
+
+        self.num_joints_infinity = 41
+        self.num_joints_coco = 17
+        self.num_joints = self.num_joints_infinity + self.num_joints_coco
+
+        self.flip_pairs_infinity = [
+            [1, 2],
+            [3, 4],
+            [5, 6],
+            [7, 8],
+            [9, 10],
+            [12, 13],
+            [14, 15],
+            [16, 17],
+            [18, 19],
+            [20, 21],
+            [22, 23],
+            [24, 25],
+            [26, 27],
+            [28, 29],
+            [30, 31],
+            [32, 33],
+            [34, 35],
+            [39, 40],
+        ]
+        self.flip_pairs_coco = [
+            [1, 2],
+            [3, 4],
+            [5, 6],
+            [7, 8],
+            [9, 10],
+            [11, 12],
+            [13, 14],
+            [15, 16],
+        ]
+        self.flip_pairs__infinity = [
+            [x + self.num_joints_coco, y + self.num_joints_coco]
+            for x, y in self.flip_pairs_infinity
+        ]
+        self.flip_pairs = self.flip_pairs_coco + self.flip_pairs_infinity
+
+        self.parent_ids = None
+        self.upper_body_ids_infinity = (
+            0,
+            1,
+            2,
+            3,
+            4,
+            5,
+            6,
+            7,
+            8,
+            9,
+            10,
+            11,
+            28,
+            29,
+            30,
+            31,
+            32,
+            33,
+            34,
+            35,
+            36,
+            38,
+        )
+        self.lower_body_ids_infinity = (
+            12,
+            13,
+            14,
+            15,
+            16,
+            17,
+            18,
+            19,
+            20,
+            21,
+            22,
+            23,
+            24,
+            25,
+            26,
+            27,
+            37,
+            39,
+            40,
+        )
+
+        self.upper_body_ids_coco = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
+        self.lower_body_ids_coco = (11, 12, 13, 14, 15, 16)
+
+        self.upper_body_ids_infinity = tuple(
+            [x + self.num_joints_coco for x in self.upper_body_ids_infinity]
+        )
+        self.lower_body_ids_infinity = tuple(
+            [x + self.num_joints_coco for x in self.lower_body_ids_infinity]
+        )
+
+        self.upper_body_ids = self.upper_body_ids_coco + self.upper_body_ids_infinity
+        self.lower_body_ids = self.lower_body_ids_coco + self.lower_body_ids_infinity
+
+        self.joints_weight_infinity = np.ones(self.num_joints_infinity).reshape(
+            (self.num_joints_infinity, 1)
+        )
+        self.joints_weight_coco = np.array(
+            [
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.2,
+                1.2,
+                1.5,
+                1.5,
+                1.0,
+                1.0,
+                1.2,
+                1.2,
+                1.5,
+                1.5,
+            ],
+            dtype=np.float32,
+        ).reshape((self.num_joints_coco, 1))
+        self.joints_weight = np.vstack(
+            (self.joints_weight_coco, self.joints_weight_infinity)
+        )
+        self.db = self._get_db()
+
+        if is_train and cfg.DATASET.SELECT_DATA:
+            self.db = self.select_data(self.db)
+
+        logger.info("=> load {} samples".format(len(self.db)))
+
+    def _get_ann_file_keypoint(self):
+        """self.root / annotations / person_keypoints_train2017.json"""
+        return os.path.join(self.root, self.image_set, "annotations.json")
+
+    def _load_image_set_index(self):
+        """image id: int"""
+        image_ids = self.coco.getImgIds()
+        return image_ids
+
+    def _get_db(self):
+        if self.is_train or self.use_gt_bbox:
+            # use ground truth bbox
+            gt_db = self._load_coco_keypoint_annotations()
+        else:
+            # use bbox from detection
+            gt_db = self._load_coco_person_detection_results()
+        return gt_db
+
+    def _load_coco_keypoint_annotations(self):
+        """ground truth bbox and keypoints"""
+        gt_db = []
+        for index in self.image_set_index:
+            gt_db.extend(self._load_coco_keypoint_annotation_kernal(index))
+        return gt_db
+
+    def _load_coco_keypoint_annotation_kernal(self, index):
+        """
+        coco ann: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id']
+        iscrowd:
+            crowd instances are handled by marking their overlaps with all categories to -1
+            and later excluded in training
+        bbox:
+            [x1, y1, w, h]
+        :param index: coco image id
+        :return: db entry
+        """
+        im_ann = self.coco.loadImgs(index)[0]
+        width = im_ann["width"]
+        height = im_ann["height"]
+
+        annIds = self.coco.getAnnIds(imgIds=index, iscrowd=False)
+        objs = self.coco.loadAnns(annIds)
+
+        # sanitize bboxes
+        valid_objs = []
+        for obj in objs:
+            x, y, w, h = obj["bbox"]
+            x1 = np.max((0, x))
+            y1 = np.max((0, y))
+            x2 = np.min((width - 1, x1 + np.max((0, w - 1))))
+            y2 = np.min((height - 1, y1 + np.max((0, h - 1))))
+            # if obj["area"] > 0 and x2 >= x1 and y2 >= y1:
+            if x2 >= x1 and y2 >= y1:
+                obj["clean_bbox"] = [x1, y1, x2 - x1, y2 - y1]
+                valid_objs.append(obj)
+        objs = valid_objs
+
+        rec = []
+        for obj in objs:
+            cls = self._coco_ind_to_class_ind[obj["category_id"]]
+            if cls != 1:
+                continue
+
+            # ignore objs without keypoints annotation
+            if max(obj["keypoints"]) == 0:
+                continue
+
+            joints_3d = np.zeros((self.num_joints, 3), dtype=np.float32)
+            joints_3d_vis = np.zeros((self.num_joints, 3), dtype=np.float32)
+
+            for ipt in range(self.num_joints_coco):
+                joints_3d[ipt, 0] = obj["coco_keypoints"][3 * ipt]
+                joints_3d[ipt, 1] = obj["coco_keypoints"][3 * ipt + 1]
+                t_vis = obj["coco_keypoints"][3 * ipt + 2]
+                if t_vis > 1:
+                    t_vis = 1
+                joints_3d_vis[ipt, 0] = t_vis
+                joints_3d_vis[ipt, 1] = t_vis
+                joints_3d_vis[ipt, 2] = 0
+
+            for ipt, name in enumerate(self.keypoints_name):
+                joints_3d[self.num_joints_coco + ipt, 0] = obj["keypoints"][name]["x"]
+                joints_3d[self.num_joints_coco + ipt, 1] = obj["keypoints"][name]["y"]
+                joints_3d[self.num_joints_coco + ipt, 2] = 0
+                t_vis = obj["keypoints"][name]["v"]
+                if t_vis > 1:
+                    t_vis = 1
+                joints_3d_vis[self.num_joints_coco + ipt, 0] = t_vis
+                joints_3d_vis[self.num_joints_coco + ipt, 1] = t_vis
+                joints_3d_vis[self.num_joints_coco + ipt, 2] = 0
+
+            center, scale = self._box2cs(obj["clean_bbox"][:4])
+            rec.append(
+                {
+                    "image": self.image_path_from_index(index),
+                    "center": center,
+                    "scale": scale,
+                    "joints_3d": joints_3d,
+                    "joints_3d_vis": joints_3d_vis,
+                    "filename": "",
+                    "imgnum": 0,
+                }
+            )
+
+        return rec
+
+    def _box2cs(self, box):
+        x, y, w, h = box[:4]
+        return self._xywh2cs(x, y, w, h)
+
+    def _xywh2cs(self, x, y, w, h):
+        center = np.zeros((2), dtype=np.float32)
+        center[0] = x + w * 0.5
+        center[1] = y + h * 0.5
+
+        if w > self.aspect_ratio * h:
+            h = w * 1.0 / self.aspect_ratio
+        elif w < self.aspect_ratio * h:
+            w = h * self.aspect_ratio
+        scale = np.array(
+            [w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std], dtype=np.float32
+        )
+        if center[0] != -1:
+            scale = scale * 1.25
+
+        return center, scale
+
+    def image_path_from_index(self, index):
+        image_path = os.path.join(self.root, self.image_set, "images", f"{index}.png")
+
+        return image_path
+
+    def _load_coco_person_detection_results(self):
+        all_boxes = None
+        with open(self.bbox_file, "r") as f:
+            all_boxes = json.load(f)
+
+        if not all_boxes:
+            logger.error("=> Load %s fail!" % self.bbox_file)
+            return None
+
+        logger.info("=> Total boxes: {}".format(len(all_boxes)))
+
+        kpt_db = []
+        num_boxes = 0
+        for n_img in range(0, len(all_boxes)):
+            det_res = all_boxes[n_img]
+            if det_res["category_id"] != 1:
+                continue
+            img_name = self.image_path_from_index(det_res["image_id"])
+            box = det_res["bbox"]
+            score = det_res["score"]
+
+            if score < self.image_thre:
+                continue
+
+            num_boxes = num_boxes + 1
+
+            center, scale = self._box2cs(box)
+            joints_3d = np.zeros((self.num_joints, 3), dtype=np.float32)
+            joints_3d_vis = np.ones((self.num_joints, 3), dtype=np.float32)
+            kpt_db.append(
+                {
+                    "image": img_name,
+                    "center": center,
+                    "scale": scale,
+                    "score": score,
+                    "joints_3d": joints_3d,
+                    "joints_3d_vis": joints_3d_vis,
+                }
+            )
+
+        logger.info(
+            "=> Total boxes after fliter low score@{}: {}".format(
+                self.image_thre, num_boxes
+            )
+        )
+        return kpt_db
+
+    def evaluate(self, cfg, preds, output_dir, all_boxes, img_path, *args, **kwargs):
+        rank = cfg.RANK
+
+        res_folder = os.path.join(output_dir, "results")
+        if not os.path.exists(res_folder):
+            try:
+                os.makedirs(res_folder)
+            except Exception:
+                logger.error("Fail to make {}".format(res_folder))
+
+        res_file = os.path.join(
+            res_folder, "keypoints_{}_results_{}.json".format(self.image_set, rank)
+        )
+
+        # person x (keypoints)
+        _kpts = []
+        for idx, kpt in enumerate(preds):
+            _kpts.append(
+                {
+                    "keypoints": kpt,
+                    "center": all_boxes[idx][0:2],
+                    "scale": all_boxes[idx][2:4],
+                    "area": all_boxes[idx][4],
+                    "score": all_boxes[idx][5],
+                    "image": int(img_path[idx].split("/")[-1].split(".")[0]),
+                }
+            )
+        # image x person x (keypoints)
+        kpts = defaultdict(list)
+        for kpt in _kpts:
+            kpts[kpt["image"]].append(kpt)
+
+        # rescoring and oks nms
+        num_joints = self.num_joints
+        in_vis_thre = self.in_vis_thre
+        oks_thre = self.oks_thre
+        oks_nmsed_kpts = []
+        for img in kpts.keys():
+            img_kpts = kpts[img]
+            for n_p in img_kpts:
+                box_score = n_p["score"]
+                kpt_score = 0
+                valid_num = 0
+                for n_jt in range(0, num_joints):
+                    t_s = n_p["keypoints"][n_jt][2]
+                    if t_s > in_vis_thre:
+                        kpt_score = kpt_score + t_s
+                        valid_num = valid_num + 1
+                if valid_num != 0:
+                    kpt_score = kpt_score / valid_num
+                # rescoring
+                n_p["score"] = kpt_score * box_score
+
+            if self.soft_nms:
+                keep = soft_oks_nms(
+                    [img_kpts[i] for i in range(len(img_kpts))], oks_thre
+                )
+            else:
+                keep = oks_nms([img_kpts[i] for i in range(len(img_kpts))], oks_thre)
+
+            if len(keep) == 0:
+                oks_nmsed_kpts.append(img_kpts)
+            else:
+                oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep])
+
+        self._write_coco_keypoint_results(oks_nmsed_kpts, res_file)
+        if "test" not in self.image_set:
+            info_str = self._do_python_keypoint_eval(res_file, res_folder)
+            name_value = OrderedDict(info_str)
+            return name_value, name_value["AP"]
+        else:
+            return {"Null": 0}, 0
+
+    def _write_coco_keypoint_results(self, keypoints, res_file):
+        data_pack = [
+            {
+                "cat_id": self._class_to_coco_ind[cls],
+                "cls_ind": cls_ind,
+                "cls": cls,
+                "ann_type": "keypoints",
+                "keypoints": keypoints,
+            }
+            for cls_ind, cls in enumerate(self.classes)
+            if not cls == "__background__"
+        ]
+
+        results = self._coco_keypoint_results_one_category_kernel(data_pack[0])
+        logger.info("=> writing results json to %s" % res_file)
+        with open(res_file, "w") as f:
+            json.dump(results, f, sort_keys=True, indent=4)
+        try:
+            json.load(open(res_file))
+        except Exception:
+            content = []
+            with open(res_file, "r") as f:
+                for line in f:
+                    content.append(line)
+            content[-1] = "]"
+            with open(res_file, "w") as f:
+                for c in content:
+                    f.write(c)
+
+    def _coco_keypoint_results_one_category_kernel(self, data_pack):
+        cat_id = data_pack["cat_id"]
+        keypoints = data_pack["keypoints"]
+        cat_results = []
+
+        for img_kpts in keypoints:
+            if len(img_kpts) == 0:
+                continue
+
+            _key_points = np.array(
+                [img_kpts[k]["keypoints"] for k in range(len(img_kpts))]
+            )
+            key_points = np.zeros(
+                (_key_points.shape[0], self.num_joints * 3), dtype=np.float
+            )
+
+            for ipt in range(self.num_joints):
+                key_points[:, ipt * 3 + 0] = _key_points[:, ipt, 0]
+                key_points[:, ipt * 3 + 1] = _key_points[:, ipt, 1]
+                key_points[:, ipt * 3 + 2] = _key_points[:, ipt, 2]  # keypoints score.
+
+            result = [
+                {
+                    "image_id": img_kpts[k]["image"],
+                    "category_id": cat_id,
+                    "keypoints": list(key_points[k]),
+                    "score": img_kpts[k]["score"],
+                    "center": list(img_kpts[k]["center"]),
+                    "scale": list(img_kpts[k]["scale"]),
+                }
+                for k in range(len(img_kpts))
+            ]
+            cat_results.extend(result)
+
+        return cat_results
+
+    def _do_python_keypoint_eval(self, res_file, res_folder):
+        coco_dt = self.coco.loadRes(res_file)
+        coco_eval = COCOeval(self.coco, coco_dt, "keypoints")
+        coco_eval.params.useSegm = None
+        coco_eval.evaluate()
+        coco_eval.accumulate()
+        coco_eval.summarize()
+
+        stats_names = [
+            "AP",
+            "Ap .5",
+            "AP .75",
+            "AP (M)",
+            "AP (L)",
+            "AR",
+            "AR .5",
+            "AR .75",
+            "AR (M)",
+            "AR (L)",
+        ]
+
+        info_str = []
+        for ind, name in enumerate(stats_names):
+            info_str.append((name, coco_eval.stats[ind]))
+
+        return info_str
diff --git a/lib/nms/gpu_nms.cpp b/lib/nms/gpu_nms.cpp
new file mode 100644
index 00000000..94b5c670
--- /dev/null
+++ b/lib/nms/gpu_nms.cpp
@@ -0,0 +1,7357 @@
+/* Generated by Cython 0.29.33 */
+
+#ifndef PY_SSIZE_T_CLEAN
+#define PY_SSIZE_T_CLEAN
+#endif /* PY_SSIZE_T_CLEAN */
+#include "Python.h"
+#ifndef Py_PYTHON_H
+    #error Python headers needed to compile C extensions, please install development version of Python.
+#elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000)
+    #error Cython requires Python 2.6+ or Python 3.3+.
+#else
+#define CYTHON_ABI "0_29_33"
+#define CYTHON_HEX_VERSION 0x001D21F0
+#define CYTHON_FUTURE_DIVISION 1
+#include <stddef.h>
+#ifndef offsetof
+  #define offsetof(type, member) ( (size_t) & ((type*)0) -> member )
+#endif
+#if !defined(WIN32) && !defined(MS_WINDOWS)
+  #ifndef __stdcall
+    #define __stdcall
+  #endif
+  #ifndef __cdecl
+    #define __cdecl
+  #endif
+  #ifndef __fastcall
+    #define __fastcall
+  #endif
+#endif
+#ifndef DL_IMPORT
+  #define DL_IMPORT(t) t
+#endif
+#ifndef DL_EXPORT
+  #define DL_EXPORT(t) t
+#endif
+#define __PYX_COMMA ,
+#ifndef HAVE_LONG_LONG
+  #if PY_VERSION_HEX >= 0x02070000
+    #define HAVE_LONG_LONG
+  #endif
+#endif
+#ifndef PY_LONG_LONG
+  #define PY_LONG_LONG LONG_LONG
+#endif
+#ifndef Py_HUGE_VAL
+  #define Py_HUGE_VAL HUGE_VAL
+#endif
+#ifdef PYPY_VERSION
+  #define CYTHON_COMPILING_IN_PYPY 1
+  #define CYTHON_COMPILING_IN_PYSTON 0
+  #define CYTHON_COMPILING_IN_CPYTHON 0
+  #define CYTHON_COMPILING_IN_NOGIL 0
+  #undef CYTHON_USE_TYPE_SLOTS
+  #define CYTHON_USE_TYPE_SLOTS 0
+  #undef CYTHON_USE_PYTYPE_LOOKUP
+  #define CYTHON_USE_PYTYPE_LOOKUP 0
+  #if PY_VERSION_HEX < 0x03050000
+    #undef CYTHON_USE_ASYNC_SLOTS
+    #define CYTHON_USE_ASYNC_SLOTS 0
+  #elif !defined(CYTHON_USE_ASYNC_SLOTS)
+    #define CYTHON_USE_ASYNC_SLOTS 1
+  #endif
+  #undef CYTHON_USE_PYLIST_INTERNALS
+  #define CYTHON_USE_PYLIST_INTERNALS 0
+  #undef CYTHON_USE_UNICODE_INTERNALS
+  #define CYTHON_USE_UNICODE_INTERNALS 0
+  #undef CYTHON_USE_UNICODE_WRITER
+  #define CYTHON_USE_UNICODE_WRITER 0
+  #undef CYTHON_USE_PYLONG_INTERNALS
+  #define CYTHON_USE_PYLONG_INTERNALS 0
+  #undef CYTHON_AVOID_BORROWED_REFS
+  #define CYTHON_AVOID_BORROWED_REFS 1
+  #undef CYTHON_ASSUME_SAFE_MACROS
+  #define CYTHON_ASSUME_SAFE_MACROS 0
+  #undef CYTHON_UNPACK_METHODS
+  #define CYTHON_UNPACK_METHODS 0
+  #undef CYTHON_FAST_THREAD_STATE
+  #define CYTHON_FAST_THREAD_STATE 0
+  #undef CYTHON_FAST_PYCALL
+  #define CYTHON_FAST_PYCALL 0
+  #undef CYTHON_PEP489_MULTI_PHASE_INIT
+  #define CYTHON_PEP489_MULTI_PHASE_INIT 0
+  #undef CYTHON_USE_TP_FINALIZE
+  #define CYTHON_USE_TP_FINALIZE 0
+  #undef CYTHON_USE_DICT_VERSIONS
+  #define CYTHON_USE_DICT_VERSIONS 0
+  #undef CYTHON_USE_EXC_INFO_STACK
+  #define CYTHON_USE_EXC_INFO_STACK 0
+  #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC
+    #define CYTHON_UPDATE_DESCRIPTOR_DOC 0
+  #endif
+#elif defined(PYSTON_VERSION)
+  #define CYTHON_COMPILING_IN_PYPY 0
+  #define CYTHON_COMPILING_IN_PYSTON 1
+  #define CYTHON_COMPILING_IN_CPYTHON 0
+  #define CYTHON_COMPILING_IN_NOGIL 0
+  #ifndef CYTHON_USE_TYPE_SLOTS
+    #define CYTHON_USE_TYPE_SLOTS 1
+  #endif
+  #undef CYTHON_USE_PYTYPE_LOOKUP
+  #define CYTHON_USE_PYTYPE_LOOKUP 0
+  #undef CYTHON_USE_ASYNC_SLOTS
+  #define CYTHON_USE_ASYNC_SLOTS 0
+  #undef CYTHON_USE_PYLIST_INTERNALS
+  #define CYTHON_USE_PYLIST_INTERNALS 0
+  #ifndef CYTHON_USE_UNICODE_INTERNALS
+    #define CYTHON_USE_UNICODE_INTERNALS 1
+  #endif
+  #undef CYTHON_USE_UNICODE_WRITER
+  #define CYTHON_USE_UNICODE_WRITER 0
+  #undef CYTHON_USE_PYLONG_INTERNALS
+  #define CYTHON_USE_PYLONG_INTERNALS 0
+  #ifndef CYTHON_AVOID_BORROWED_REFS
+    #define CYTHON_AVOID_BORROWED_REFS 0
+  #endif
+  #ifndef CYTHON_ASSUME_SAFE_MACROS
+    #define CYTHON_ASSUME_SAFE_MACROS 1
+  #endif
+  #ifndef CYTHON_UNPACK_METHODS
+    #define CYTHON_UNPACK_METHODS 1
+  #endif
+  #undef CYTHON_FAST_THREAD_STATE
+  #define CYTHON_FAST_THREAD_STATE 0
+  #undef CYTHON_FAST_PYCALL
+  #define CYTHON_FAST_PYCALL 0
+  #undef CYTHON_PEP489_MULTI_PHASE_INIT
+  #define CYTHON_PEP489_MULTI_PHASE_INIT 0
+  #undef CYTHON_USE_TP_FINALIZE
+  #define CYTHON_USE_TP_FINALIZE 0
+  #undef CYTHON_USE_DICT_VERSIONS
+  #define CYTHON_USE_DICT_VERSIONS 0
+  #undef CYTHON_USE_EXC_INFO_STACK
+  #define CYTHON_USE_EXC_INFO_STACK 0
+  #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC
+    #define CYTHON_UPDATE_DESCRIPTOR_DOC 0
+  #endif
+#elif defined(PY_NOGIL)
+  #define CYTHON_COMPILING_IN_PYPY 0
+  #define CYTHON_COMPILING_IN_PYSTON 0
+  #define CYTHON_COMPILING_IN_CPYTHON 0
+  #define CYTHON_COMPILING_IN_NOGIL 1
+  #ifndef CYTHON_USE_TYPE_SLOTS
+    #define CYTHON_USE_TYPE_SLOTS 1
+  #endif
+  #undef CYTHON_USE_PYTYPE_LOOKUP
+  #define CYTHON_USE_PYTYPE_LOOKUP 0
+  #ifndef CYTHON_USE_ASYNC_SLOTS
+    #define CYTHON_USE_ASYNC_SLOTS 1
+  #endif
+  #undef CYTHON_USE_PYLIST_INTERNALS
+  #define CYTHON_USE_PYLIST_INTERNALS 0
+  #ifndef CYTHON_USE_UNICODE_INTERNALS
+    #define CYTHON_USE_UNICODE_INTERNALS 1
+  #endif
+  #undef CYTHON_USE_UNICODE_WRITER
+  #define CYTHON_USE_UNICODE_WRITER 0
+  #undef CYTHON_USE_PYLONG_INTERNALS
+  #define CYTHON_USE_PYLONG_INTERNALS 0
+  #ifndef CYTHON_AVOID_BORROWED_REFS
+    #define CYTHON_AVOID_BORROWED_REFS 0
+  #endif
+  #ifndef CYTHON_ASSUME_SAFE_MACROS
+    #define CYTHON_ASSUME_SAFE_MACROS 1
+  #endif
+  #ifndef CYTHON_UNPACK_METHODS
+    #define CYTHON_UNPACK_METHODS 1
+  #endif
+  #undef CYTHON_FAST_THREAD_STATE
+  #define CYTHON_FAST_THREAD_STATE 0
+  #undef CYTHON_FAST_PYCALL
+  #define CYTHON_FAST_PYCALL 0
+  #ifndef CYTHON_PEP489_MULTI_PHASE_INIT
+    #define CYTHON_PEP489_MULTI_PHASE_INIT 1
+  #endif
+  #ifndef CYTHON_USE_TP_FINALIZE
+    #define CYTHON_USE_TP_FINALIZE 1
+  #endif
+  #undef CYTHON_USE_DICT_VERSIONS
+  #define CYTHON_USE_DICT_VERSIONS 0
+  #undef CYTHON_USE_EXC_INFO_STACK
+  #define CYTHON_USE_EXC_INFO_STACK 0
+#else
+  #define CYTHON_COMPILING_IN_PYPY 0
+  #define CYTHON_COMPILING_IN_PYSTON 0
+  #define CYTHON_COMPILING_IN_CPYTHON 1
+  #define CYTHON_COMPILING_IN_NOGIL 0
+  #ifndef CYTHON_USE_TYPE_SLOTS
+    #define CYTHON_USE_TYPE_SLOTS 1
+  #endif
+  #if PY_VERSION_HEX < 0x02070000
+    #undef CYTHON_USE_PYTYPE_LOOKUP
+    #define CYTHON_USE_PYTYPE_LOOKUP 0
+  #elif !defined(CYTHON_USE_PYTYPE_LOOKUP)
+    #define CYTHON_USE_PYTYPE_LOOKUP 1
+  #endif
+  #if PY_MAJOR_VERSION < 3
+    #undef CYTHON_USE_ASYNC_SLOTS
+    #define CYTHON_USE_ASYNC_SLOTS 0
+  #elif !defined(CYTHON_USE_ASYNC_SLOTS)
+    #define CYTHON_USE_ASYNC_SLOTS 1
+  #endif
+  #if PY_VERSION_HEX < 0x02070000
+    #undef CYTHON_USE_PYLONG_INTERNALS
+    #define CYTHON_USE_PYLONG_INTERNALS 0
+  #elif !defined(CYTHON_USE_PYLONG_INTERNALS)
+    #define CYTHON_USE_PYLONG_INTERNALS 1
+  #endif
+  #ifndef CYTHON_USE_PYLIST_INTERNALS
+    #define CYTHON_USE_PYLIST_INTERNALS 1
+  #endif
+  #ifndef CYTHON_USE_UNICODE_INTERNALS
+    #define CYTHON_USE_UNICODE_INTERNALS 1
+  #endif
+  #if PY_VERSION_HEX < 0x030300F0 || PY_VERSION_HEX >= 0x030B00A2
+    #undef CYTHON_USE_UNICODE_WRITER
+    #define CYTHON_USE_UNICODE_WRITER 0
+  #elif !defined(CYTHON_USE_UNICODE_WRITER)
+    #define CYTHON_USE_UNICODE_WRITER 1
+  #endif
+  #ifndef CYTHON_AVOID_BORROWED_REFS
+    #define CYTHON_AVOID_BORROWED_REFS 0
+  #endif
+  #ifndef CYTHON_ASSUME_SAFE_MACROS
+    #define CYTHON_ASSUME_SAFE_MACROS 1
+  #endif
+  #ifndef CYTHON_UNPACK_METHODS
+    #define CYTHON_UNPACK_METHODS 1
+  #endif
+  #if PY_VERSION_HEX >= 0x030B00A4
+    #undef CYTHON_FAST_THREAD_STATE
+    #define CYTHON_FAST_THREAD_STATE 0
+  #elif !defined(CYTHON_FAST_THREAD_STATE)
+    #define CYTHON_FAST_THREAD_STATE 1
+  #endif
+  #ifndef CYTHON_FAST_PYCALL
+    #define CYTHON_FAST_PYCALL (PY_VERSION_HEX < 0x030A0000)
+  #endif
+  #ifndef CYTHON_PEP489_MULTI_PHASE_INIT
+    #define CYTHON_PEP489_MULTI_PHASE_INIT (PY_VERSION_HEX >= 0x03050000)
+  #endif
+  #ifndef CYTHON_USE_TP_FINALIZE
+    #define CYTHON_USE_TP_FINALIZE (PY_VERSION_HEX >= 0x030400a1)
+  #endif
+  #ifndef CYTHON_USE_DICT_VERSIONS
+    #define CYTHON_USE_DICT_VERSIONS (PY_VERSION_HEX >= 0x030600B1)
+  #endif
+  #if PY_VERSION_HEX >= 0x030B00A4
+    #undef CYTHON_USE_EXC_INFO_STACK
+    #define CYTHON_USE_EXC_INFO_STACK 0
+  #elif !defined(CYTHON_USE_EXC_INFO_STACK)
+    #define CYTHON_USE_EXC_INFO_STACK (PY_VERSION_HEX >= 0x030700A3)
+  #endif
+  #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC
+    #define CYTHON_UPDATE_DESCRIPTOR_DOC 1
+  #endif
+#endif
+#if !defined(CYTHON_FAST_PYCCALL)
+#define CYTHON_FAST_PYCCALL  (CYTHON_FAST_PYCALL && PY_VERSION_HEX >= 0x030600B1)
+#endif
+#if CYTHON_USE_PYLONG_INTERNALS
+  #if PY_MAJOR_VERSION < 3
+    #include "longintrepr.h"
+  #endif
+  #undef SHIFT
+  #undef BASE
+  #undef MASK
+  #ifdef SIZEOF_VOID_P
+    enum { __pyx_check_sizeof_voidp = 1 / (int)(SIZEOF_VOID_P == sizeof(void*)) };
+  #endif
+#endif
+#ifndef __has_attribute
+  #define __has_attribute(x) 0
+#endif
+#ifndef __has_cpp_attribute
+  #define __has_cpp_attribute(x) 0
+#endif
+#ifndef CYTHON_RESTRICT
+  #if defined(__GNUC__)
+    #define CYTHON_RESTRICT __restrict__
+  #elif defined(_MSC_VER) && _MSC_VER >= 1400
+    #define CYTHON_RESTRICT __restrict
+  #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+    #define CYTHON_RESTRICT restrict
+  #else
+    #define CYTHON_RESTRICT
+  #endif
+#endif
+#ifndef CYTHON_UNUSED
+# if defined(__GNUC__)
+#   if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
+#     define CYTHON_UNUSED __attribute__ ((__unused__))
+#   else
+#     define CYTHON_UNUSED
+#   endif
+# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER))
+#   define CYTHON_UNUSED __attribute__ ((__unused__))
+# else
+#   define CYTHON_UNUSED
+# endif
+#endif
+#ifndef CYTHON_MAYBE_UNUSED_VAR
+#  if defined(__cplusplus)
+     template<class T> void CYTHON_MAYBE_UNUSED_VAR( const T& ) { }
+#  else
+#    define CYTHON_MAYBE_UNUSED_VAR(x) (void)(x)
+#  endif
+#endif
+#ifndef CYTHON_NCP_UNUSED
+# if CYTHON_COMPILING_IN_CPYTHON
+#  define CYTHON_NCP_UNUSED
+# else
+#  define CYTHON_NCP_UNUSED CYTHON_UNUSED
+# endif
+#endif
+#define __Pyx_void_to_None(void_result) ((void)(void_result), Py_INCREF(Py_None), Py_None)
+#ifdef _MSC_VER
+    #ifndef _MSC_STDINT_H_
+        #if _MSC_VER < 1300
+           typedef unsigned char     uint8_t;
+           typedef unsigned int      uint32_t;
+        #else
+           typedef unsigned __int8   uint8_t;
+           typedef unsigned __int32  uint32_t;
+        #endif
+    #endif
+#else
+   #include <stdint.h>
+#endif
+#ifndef CYTHON_FALLTHROUGH
+  #if defined(__cplusplus) && __cplusplus >= 201103L
+    #if __has_cpp_attribute(fallthrough)
+      #define CYTHON_FALLTHROUGH [[fallthrough]]
+    #elif __has_cpp_attribute(clang::fallthrough)
+      #define CYTHON_FALLTHROUGH [[clang::fallthrough]]
+    #elif __has_cpp_attribute(gnu::fallthrough)
+      #define CYTHON_FALLTHROUGH [[gnu::fallthrough]]
+    #endif
+  #endif
+  #ifndef CYTHON_FALLTHROUGH
+    #if __has_attribute(fallthrough)
+      #define CYTHON_FALLTHROUGH __attribute__((fallthrough))
+    #else
+      #define CYTHON_FALLTHROUGH
+    #endif
+  #endif
+  #if defined(__clang__ ) && defined(__apple_build_version__)
+    #if __apple_build_version__ < 7000000
+      #undef  CYTHON_FALLTHROUGH
+      #define CYTHON_FALLTHROUGH
+    #endif
+  #endif
+#endif
+
+#ifndef __cplusplus
+  #error "Cython files generated with the C++ option must be compiled with a C++ compiler."
+#endif
+#ifndef CYTHON_INLINE
+  #if defined(__clang__)
+    #define CYTHON_INLINE __inline__ __attribute__ ((__unused__))
+  #else
+    #define CYTHON_INLINE inline
+  #endif
+#endif
+template<typename T>
+void __Pyx_call_destructor(T& x) {
+    x.~T();
+}
+template<typename T>
+class __Pyx_FakeReference {
+  public:
+    __Pyx_FakeReference() : ptr(NULL) { }
+    __Pyx_FakeReference(const T& ref) : ptr(const_cast<T*>(&ref)) { }
+    T *operator->() { return ptr; }
+    T *operator&() { return ptr; }
+    operator T&() { return *ptr; }
+    template<typename U> bool operator ==(U other) { return *ptr == other; }
+    template<typename U> bool operator !=(U other) { return *ptr != other; }
+  private:
+    T *ptr;
+};
+
+#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX < 0x02070600 && !defined(Py_OptimizeFlag)
+  #define Py_OptimizeFlag 0
+#endif
+#define __PYX_BUILD_PY_SSIZE_T "n"
+#define CYTHON_FORMAT_SSIZE_T "z"
+#if PY_MAJOR_VERSION < 3
+  #define __Pyx_BUILTIN_MODULE_NAME "__builtin__"
+  #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\
+          PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
+  #define __Pyx_DefaultClassType PyClass_Type
+#else
+  #define __Pyx_BUILTIN_MODULE_NAME "builtins"
+  #define __Pyx_DefaultClassType PyType_Type
+#if PY_VERSION_HEX >= 0x030B00A1
+    static CYTHON_INLINE PyCodeObject* __Pyx_PyCode_New(int a, int k, int l, int s, int f,
+                                                    PyObject *code, PyObject *c, PyObject* n, PyObject *v,
+                                                    PyObject *fv, PyObject *cell, PyObject* fn,
+                                                    PyObject *name, int fline, PyObject *lnos) {
+        PyObject *kwds=NULL, *argcount=NULL, *posonlyargcount=NULL, *kwonlyargcount=NULL;
+        PyObject *nlocals=NULL, *stacksize=NULL, *flags=NULL, *replace=NULL, *call_result=NULL, *empty=NULL;
+        const char *fn_cstr=NULL;
+        const char *name_cstr=NULL;
+        PyCodeObject* co=NULL;
+        PyObject *type, *value, *traceback;
+        PyErr_Fetch(&type, &value, &traceback);
+        if (!(kwds=PyDict_New())) goto end;
+        if (!(argcount=PyLong_FromLong(a))) goto end;
+        if (PyDict_SetItemString(kwds, "co_argcount", argcount) != 0) goto end;
+        if (!(posonlyargcount=PyLong_FromLong(0))) goto end;
+        if (PyDict_SetItemString(kwds, "co_posonlyargcount", posonlyargcount) != 0) goto end;
+        if (!(kwonlyargcount=PyLong_FromLong(k))) goto end;
+        if (PyDict_SetItemString(kwds, "co_kwonlyargcount", kwonlyargcount) != 0) goto end;
+        if (!(nlocals=PyLong_FromLong(l))) goto end;
+        if (PyDict_SetItemString(kwds, "co_nlocals", nlocals) != 0) goto end;
+        if (!(stacksize=PyLong_FromLong(s))) goto end;
+        if (PyDict_SetItemString(kwds, "co_stacksize", stacksize) != 0) goto end;
+        if (!(flags=PyLong_FromLong(f))) goto end;
+        if (PyDict_SetItemString(kwds, "co_flags", flags) != 0) goto end;
+        if (PyDict_SetItemString(kwds, "co_code", code) != 0) goto end;
+        if (PyDict_SetItemString(kwds, "co_consts", c) != 0) goto end;
+        if (PyDict_SetItemString(kwds, "co_names", n) != 0) goto end;
+        if (PyDict_SetItemString(kwds, "co_varnames", v) != 0) goto end;
+        if (PyDict_SetItemString(kwds, "co_freevars", fv) != 0) goto end;
+        if (PyDict_SetItemString(kwds, "co_cellvars", cell) != 0) goto end;
+        if (PyDict_SetItemString(kwds, "co_linetable", lnos) != 0) goto end;
+        if (!(fn_cstr=PyUnicode_AsUTF8AndSize(fn, NULL))) goto end;
+        if (!(name_cstr=PyUnicode_AsUTF8AndSize(name, NULL))) goto end;
+        if (!(co = PyCode_NewEmpty(fn_cstr, name_cstr, fline))) goto end;
+        if (!(replace = PyObject_GetAttrString((PyObject*)co, "replace"))) goto cleanup_code_too;
+        if (!(empty = PyTuple_New(0))) goto cleanup_code_too; // unfortunately __pyx_empty_tuple isn't available here
+        if (!(call_result = PyObject_Call(replace, empty, kwds))) goto cleanup_code_too;
+        Py_XDECREF((PyObject*)co);
+        co = (PyCodeObject*)call_result;
+        call_result = NULL;
+        if (0) {
+            cleanup_code_too:
+            Py_XDECREF((PyObject*)co);
+            co = NULL;
+        }
+        end:
+        Py_XDECREF(kwds);
+        Py_XDECREF(argcount);
+        Py_XDECREF(posonlyargcount);
+        Py_XDECREF(kwonlyargcount);
+        Py_XDECREF(nlocals);
+        Py_XDECREF(stacksize);
+        Py_XDECREF(replace);
+        Py_XDECREF(call_result);
+        Py_XDECREF(empty);
+        if (type) {
+            PyErr_Restore(type, value, traceback);
+        }
+        return co;
+    }
+#else
+  #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\
+          PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
+#endif
+  #define __Pyx_DefaultClassType PyType_Type
+#endif
+#ifndef Py_TPFLAGS_CHECKTYPES
+  #define Py_TPFLAGS_CHECKTYPES 0
+#endif
+#ifndef Py_TPFLAGS_HAVE_INDEX
+  #define Py_TPFLAGS_HAVE_INDEX 0
+#endif
+#ifndef Py_TPFLAGS_HAVE_NEWBUFFER
+  #define Py_TPFLAGS_HAVE_NEWBUFFER 0
+#endif
+#ifndef Py_TPFLAGS_HAVE_FINALIZE
+  #define Py_TPFLAGS_HAVE_FINALIZE 0
+#endif
+#ifndef METH_STACKLESS
+  #define METH_STACKLESS 0
+#endif
+#if PY_VERSION_HEX <= 0x030700A3 || !defined(METH_FASTCALL)
+  #ifndef METH_FASTCALL
+     #define METH_FASTCALL 0x80
+  #endif
+  typedef PyObject *(*__Pyx_PyCFunctionFast) (PyObject *self, PyObject *const *args, Py_ssize_t nargs);
+  typedef PyObject *(*__Pyx_PyCFunctionFastWithKeywords) (PyObject *self, PyObject *const *args,
+                                                          Py_ssize_t nargs, PyObject *kwnames);
+#else
+  #define __Pyx_PyCFunctionFast _PyCFunctionFast
+  #define __Pyx_PyCFunctionFastWithKeywords _PyCFunctionFastWithKeywords
+#endif
+#if CYTHON_FAST_PYCCALL
+#define __Pyx_PyFastCFunction_Check(func)\
+    ((PyCFunction_Check(func) && (METH_FASTCALL == (PyCFunction_GET_FLAGS(func) & ~(METH_CLASS | METH_STATIC | METH_COEXIST | METH_KEYWORDS | METH_STACKLESS)))))
+#else
+#define __Pyx_PyFastCFunction_Check(func) 0
+#endif
+#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Malloc)
+  #define PyObject_Malloc(s)   PyMem_Malloc(s)
+  #define PyObject_Free(p)     PyMem_Free(p)
+  #define PyObject_Realloc(p)  PyMem_Realloc(p)
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030400A1
+  #define PyMem_RawMalloc(n)           PyMem_Malloc(n)
+  #define PyMem_RawRealloc(p, n)       PyMem_Realloc(p, n)
+  #define PyMem_RawFree(p)             PyMem_Free(p)
+#endif
+#if CYTHON_COMPILING_IN_PYSTON
+  #define __Pyx_PyCode_HasFreeVars(co)  PyCode_HasFreeVars(co)
+  #define __Pyx_PyFrame_SetLineNumber(frame, lineno) PyFrame_SetLineNumber(frame, lineno)
+#else
+  #define __Pyx_PyCode_HasFreeVars(co)  (PyCode_GetNumFree(co) > 0)
+  #define __Pyx_PyFrame_SetLineNumber(frame, lineno)  (frame)->f_lineno = (lineno)
+#endif
+#if !CYTHON_FAST_THREAD_STATE || PY_VERSION_HEX < 0x02070000
+  #define __Pyx_PyThreadState_Current PyThreadState_GET()
+#elif PY_VERSION_HEX >= 0x03060000
+  #define __Pyx_PyThreadState_Current _PyThreadState_UncheckedGet()
+#elif PY_VERSION_HEX >= 0x03000000
+  #define __Pyx_PyThreadState_Current PyThreadState_GET()
+#else
+  #define __Pyx_PyThreadState_Current _PyThreadState_Current
+#endif
+#if PY_VERSION_HEX < 0x030700A2 && !defined(PyThread_tss_create) && !defined(Py_tss_NEEDS_INIT)
+#include "pythread.h"
+#define Py_tss_NEEDS_INIT 0
+typedef int Py_tss_t;
+static CYTHON_INLINE int PyThread_tss_create(Py_tss_t *key) {
+  *key = PyThread_create_key();
+  return 0;
+}
+static CYTHON_INLINE Py_tss_t * PyThread_tss_alloc(void) {
+  Py_tss_t *key = (Py_tss_t *)PyObject_Malloc(sizeof(Py_tss_t));
+  *key = Py_tss_NEEDS_INIT;
+  return key;
+}
+static CYTHON_INLINE void PyThread_tss_free(Py_tss_t *key) {
+  PyObject_Free(key);
+}
+static CYTHON_INLINE int PyThread_tss_is_created(Py_tss_t *key) {
+  return *key != Py_tss_NEEDS_INIT;
+}
+static CYTHON_INLINE void PyThread_tss_delete(Py_tss_t *key) {
+  PyThread_delete_key(*key);
+  *key = Py_tss_NEEDS_INIT;
+}
+static CYTHON_INLINE int PyThread_tss_set(Py_tss_t *key, void *value) {
+  return PyThread_set_key_value(*key, value);
+}
+static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
+  return PyThread_get_key_value(*key);
+}
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON || defined(_PyDict_NewPresized)
+#define __Pyx_PyDict_NewPresized(n)  ((n <= 8) ? PyDict_New() : _PyDict_NewPresized(n))
+#else
+#define __Pyx_PyDict_NewPresized(n)  PyDict_New()
+#endif
+#if PY_MAJOR_VERSION >= 3 || CYTHON_FUTURE_DIVISION
+  #define __Pyx_PyNumber_Divide(x,y)         PyNumber_TrueDivide(x,y)
+  #define __Pyx_PyNumber_InPlaceDivide(x,y)  PyNumber_InPlaceTrueDivide(x,y)
+#else
+  #define __Pyx_PyNumber_Divide(x,y)         PyNumber_Divide(x,y)
+  #define __Pyx_PyNumber_InPlaceDivide(x,y)  PyNumber_InPlaceDivide(x,y)
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030500A1 && CYTHON_USE_UNICODE_INTERNALS
+#define __Pyx_PyDict_GetItemStr(dict, name)  _PyDict_GetItem_KnownHash(dict, name, ((PyASCIIObject *) name)->hash)
+#else
+#define __Pyx_PyDict_GetItemStr(dict, name)  PyDict_GetItem(dict, name)
+#endif
+#if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND)
+  #define CYTHON_PEP393_ENABLED 1
+  #if PY_VERSION_HEX >= 0x030C0000
+    #define __Pyx_PyUnicode_READY(op)       (0)
+  #else
+    #define __Pyx_PyUnicode_READY(op)       (likely(PyUnicode_IS_READY(op)) ?\
+                                                0 : _PyUnicode_Ready((PyObject *)(op)))
+  #endif
+  #define __Pyx_PyUnicode_GET_LENGTH(u)   PyUnicode_GET_LENGTH(u)
+  #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i)
+  #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u)   PyUnicode_MAX_CHAR_VALUE(u)
+  #define __Pyx_PyUnicode_KIND(u)         PyUnicode_KIND(u)
+  #define __Pyx_PyUnicode_DATA(u)         PyUnicode_DATA(u)
+  #define __Pyx_PyUnicode_READ(k, d, i)   PyUnicode_READ(k, d, i)
+  #define __Pyx_PyUnicode_WRITE(k, d, i, ch)  PyUnicode_WRITE(k, d, i, ch)
+  #if PY_VERSION_HEX >= 0x030C0000
+    #define __Pyx_PyUnicode_IS_TRUE(u)      (0 != PyUnicode_GET_LENGTH(u))
+  #else
+    #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03090000
+    #define __Pyx_PyUnicode_IS_TRUE(u)      (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : ((PyCompactUnicodeObject *)(u))->wstr_length))
+    #else
+    #define __Pyx_PyUnicode_IS_TRUE(u)      (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u)))
+    #endif
+  #endif
+#else
+  #define CYTHON_PEP393_ENABLED 0
+  #define PyUnicode_1BYTE_KIND  1
+  #define PyUnicode_2BYTE_KIND  2
+  #define PyUnicode_4BYTE_KIND  4
+  #define __Pyx_PyUnicode_READY(op)       (0)
+  #define __Pyx_PyUnicode_GET_LENGTH(u)   PyUnicode_GET_SIZE(u)
+  #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i]))
+  #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u)   ((sizeof(Py_UNICODE) == 2) ? 65535 : 1114111)
+  #define __Pyx_PyUnicode_KIND(u)         (sizeof(Py_UNICODE))
+  #define __Pyx_PyUnicode_DATA(u)         ((void*)PyUnicode_AS_UNICODE(u))
+  #define __Pyx_PyUnicode_READ(k, d, i)   ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i]))
+  #define __Pyx_PyUnicode_WRITE(k, d, i, ch)  (((void)(k)), ((Py_UNICODE*)d)[i] = ch)
+  #define __Pyx_PyUnicode_IS_TRUE(u)      (0 != PyUnicode_GET_SIZE(u))
+#endif
+#if CYTHON_COMPILING_IN_PYPY
+  #define __Pyx_PyUnicode_Concat(a, b)      PyNumber_Add(a, b)
+  #define __Pyx_PyUnicode_ConcatSafe(a, b)  PyNumber_Add(a, b)
+#else
+  #define __Pyx_PyUnicode_Concat(a, b)      PyUnicode_Concat(a, b)
+  #define __Pyx_PyUnicode_ConcatSafe(a, b)  ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ?\
+      PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b))
+#endif
+#if CYTHON_COMPILING_IN_PYPY && !defined(PyUnicode_Contains)
+  #define PyUnicode_Contains(u, s)  PySequence_Contains(u, s)
+#endif
+#if CYTHON_COMPILING_IN_PYPY && !defined(PyByteArray_Check)
+  #define PyByteArray_Check(obj)  PyObject_TypeCheck(obj, &PyByteArray_Type)
+#endif
+#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Format)
+  #define PyObject_Format(obj, fmt)  PyObject_CallMethod(obj, "__format__", "O", fmt)
+#endif
+#define __Pyx_PyString_FormatSafe(a, b)   ((unlikely((a) == Py_None || (PyString_Check(b) && !PyString_CheckExact(b)))) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b))
+#define __Pyx_PyUnicode_FormatSafe(a, b)  ((unlikely((a) == Py_None || (PyUnicode_Check(b) && !PyUnicode_CheckExact(b)))) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b))
+#if PY_MAJOR_VERSION >= 3
+  #define __Pyx_PyString_Format(a, b)  PyUnicode_Format(a, b)
+#else
+  #define __Pyx_PyString_Format(a, b)  PyString_Format(a, b)
+#endif
+#if PY_MAJOR_VERSION < 3 && !defined(PyObject_ASCII)
+  #define PyObject_ASCII(o)            PyObject_Repr(o)
+#endif
+#if PY_MAJOR_VERSION >= 3
+  #define PyBaseString_Type            PyUnicode_Type
+  #define PyStringObject               PyUnicodeObject
+  #define PyString_Type                PyUnicode_Type
+  #define PyString_Check               PyUnicode_Check
+  #define PyString_CheckExact          PyUnicode_CheckExact
+#ifndef PyObject_Unicode
+  #define PyObject_Unicode             PyObject_Str
+#endif
+#endif
+#if PY_MAJOR_VERSION >= 3
+  #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj)
+  #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj)
+#else
+  #define __Pyx_PyBaseString_Check(obj) (PyString_Check(obj) || PyUnicode_Check(obj))
+  #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj))
+#endif
+#ifndef PySet_CheckExact
+  #define PySet_CheckExact(obj)        (Py_TYPE(obj) == &PySet_Type)
+#endif
+#if PY_VERSION_HEX >= 0x030900A4
+  #define __Pyx_SET_REFCNT(obj, refcnt) Py_SET_REFCNT(obj, refcnt)
+  #define __Pyx_SET_SIZE(obj, size) Py_SET_SIZE(obj, size)
+#else
+  #define __Pyx_SET_REFCNT(obj, refcnt) Py_REFCNT(obj) = (refcnt)
+  #define __Pyx_SET_SIZE(obj, size) Py_SIZE(obj) = (size)
+#endif
+#if CYTHON_ASSUME_SAFE_MACROS
+  #define __Pyx_PySequence_SIZE(seq)  Py_SIZE(seq)
+#else
+  #define __Pyx_PySequence_SIZE(seq)  PySequence_Size(seq)
+#endif
+#if PY_MAJOR_VERSION >= 3
+  #define PyIntObject                  PyLongObject
+  #define PyInt_Type                   PyLong_Type
+  #define PyInt_Check(op)              PyLong_Check(op)
+  #define PyInt_CheckExact(op)         PyLong_CheckExact(op)
+  #define PyInt_FromString             PyLong_FromString
+  #define PyInt_FromUnicode            PyLong_FromUnicode
+  #define PyInt_FromLong               PyLong_FromLong
+  #define PyInt_FromSize_t             PyLong_FromSize_t
+  #define PyInt_FromSsize_t            PyLong_FromSsize_t
+  #define PyInt_AsLong                 PyLong_AsLong
+  #define PyInt_AS_LONG                PyLong_AS_LONG
+  #define PyInt_AsSsize_t              PyLong_AsSsize_t
+  #define PyInt_AsUnsignedLongMask     PyLong_AsUnsignedLongMask
+  #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask
+  #define PyNumber_Int                 PyNumber_Long
+#endif
+#if PY_MAJOR_VERSION >= 3
+  #define PyBoolObject                 PyLongObject
+#endif
+#if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY
+  #ifndef PyUnicode_InternFromString
+    #define PyUnicode_InternFromString(s) PyUnicode_FromString(s)
+  #endif
+#endif
+#if PY_VERSION_HEX < 0x030200A4
+  typedef long Py_hash_t;
+  #define __Pyx_PyInt_FromHash_t PyInt_FromLong
+  #define __Pyx_PyInt_AsHash_t   __Pyx_PyIndex_AsHash_t
+#else
+  #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t
+  #define __Pyx_PyInt_AsHash_t   __Pyx_PyIndex_AsSsize_t
+#endif
+#if PY_MAJOR_VERSION >= 3
+  #define __Pyx_PyMethod_New(func, self, klass) ((self) ? ((void)(klass), PyMethod_New(func, self)) : __Pyx_NewRef(func))
+#else
+  #define __Pyx_PyMethod_New(func, self, klass) PyMethod_New(func, self, klass)
+#endif
+#if CYTHON_USE_ASYNC_SLOTS
+  #if PY_VERSION_HEX >= 0x030500B1
+    #define __Pyx_PyAsyncMethodsStruct PyAsyncMethods
+    #define __Pyx_PyType_AsAsync(obj) (Py_TYPE(obj)->tp_as_async)
+  #else
+    #define __Pyx_PyType_AsAsync(obj) ((__Pyx_PyAsyncMethodsStruct*) (Py_TYPE(obj)->tp_reserved))
+  #endif
+#else
+  #define __Pyx_PyType_AsAsync(obj) NULL
+#endif
+#ifndef __Pyx_PyAsyncMethodsStruct
+    typedef struct {
+        unaryfunc am_await;
+        unaryfunc am_aiter;
+        unaryfunc am_anext;
+    } __Pyx_PyAsyncMethodsStruct;
+#endif
+
+#if defined(_WIN32) || defined(WIN32) || defined(MS_WINDOWS)
+  #if !defined(_USE_MATH_DEFINES)
+    #define _USE_MATH_DEFINES
+  #endif
+#endif
+#include <math.h>
+#ifdef NAN
+#define __PYX_NAN() ((float) NAN)
+#else
+static CYTHON_INLINE float __PYX_NAN() {
+  float value;
+  memset(&value, 0xFF, sizeof(value));
+  return value;
+}
+#endif
+#if defined(__CYGWIN__) && defined(_LDBL_EQ_DBL)
+#define __Pyx_truncl trunc
+#else
+#define __Pyx_truncl truncl
+#endif
+
+#define __PYX_MARK_ERR_POS(f_index, lineno) \
+    { __pyx_filename = __pyx_f[f_index]; (void)__pyx_filename; __pyx_lineno = lineno; (void)__pyx_lineno; __pyx_clineno = __LINE__; (void)__pyx_clineno; }
+#define __PYX_ERR(f_index, lineno, Ln_error) \
+    { __PYX_MARK_ERR_POS(f_index, lineno) goto Ln_error; }
+
+#ifndef __PYX_EXTERN_C
+  #ifdef __cplusplus
+    #define __PYX_EXTERN_C extern "C"
+  #else
+    #define __PYX_EXTERN_C extern
+  #endif
+#endif
+
+#define __PYX_HAVE__gpu_nms
+#define __PYX_HAVE_API__gpu_nms
+/* Early includes */
+#include <string.h>
+#include <stdio.h>
+#include "numpy/arrayobject.h"
+#include "numpy/ndarrayobject.h"
+#include "numpy/ndarraytypes.h"
+#include "numpy/arrayscalars.h"
+#include "numpy/ufuncobject.h"
+
+    /* NumPy API declarations from "numpy/__init__.pxd" */
+    
+#include "gpu_nms.hpp"
+#ifdef _OPENMP
+#include <omp.h>
+#endif /* _OPENMP */
+
+#if defined(PYREX_WITHOUT_ASSERTIONS) && !defined(CYTHON_WITHOUT_ASSERTIONS)
+#define CYTHON_WITHOUT_ASSERTIONS
+#endif
+
+typedef struct {PyObject **p; const char *s; const Py_ssize_t n; const char* encoding;
+                const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry;
+
+#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0
+#define __PYX_DEFAULT_STRING_ENCODING_IS_UTF8 0
+#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT (PY_MAJOR_VERSION >= 3 && __PYX_DEFAULT_STRING_ENCODING_IS_UTF8)
+#define __PYX_DEFAULT_STRING_ENCODING ""
+#define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString
+#define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
+#define __Pyx_uchar_cast(c) ((unsigned char)c)
+#define __Pyx_long_cast(x) ((long)x)
+#define __Pyx_fits_Py_ssize_t(v, type, is_signed)  (\
+    (sizeof(type) < sizeof(Py_ssize_t))  ||\
+    (sizeof(type) > sizeof(Py_ssize_t) &&\
+          likely(v < (type)PY_SSIZE_T_MAX ||\
+                 v == (type)PY_SSIZE_T_MAX)  &&\
+          (!is_signed || likely(v > (type)PY_SSIZE_T_MIN ||\
+                                v == (type)PY_SSIZE_T_MIN)))  ||\
+    (sizeof(type) == sizeof(Py_ssize_t) &&\
+          (is_signed || likely(v < (type)PY_SSIZE_T_MAX ||\
+                               v == (type)PY_SSIZE_T_MAX)))  )
+static CYTHON_INLINE int __Pyx_is_valid_index(Py_ssize_t i, Py_ssize_t limit) {
+    return (size_t) i < (size_t) limit;
+}
+#if defined (__cplusplus) && __cplusplus >= 201103L
+    #include <cstdlib>
+    #define __Pyx_sst_abs(value) std::abs(value)
+#elif SIZEOF_INT >= SIZEOF_SIZE_T
+    #define __Pyx_sst_abs(value) abs(value)
+#elif SIZEOF_LONG >= SIZEOF_SIZE_T
+    #define __Pyx_sst_abs(value) labs(value)
+#elif defined (_MSC_VER)
+    #define __Pyx_sst_abs(value) ((Py_ssize_t)_abs64(value))
+#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+    #define __Pyx_sst_abs(value) llabs(value)
+#elif defined (__GNUC__)
+    #define __Pyx_sst_abs(value) __builtin_llabs(value)
+#else
+    #define __Pyx_sst_abs(value) ((value<0) ? -value : value)
+#endif
+static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject*);
+static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length);
+#define __Pyx_PyByteArray_FromString(s) PyByteArray_FromStringAndSize((const char*)s, strlen((const char*)s))
+#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l)
+#define __Pyx_PyBytes_FromString        PyBytes_FromString
+#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize
+static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*);
+#if PY_MAJOR_VERSION < 3
+    #define __Pyx_PyStr_FromString        __Pyx_PyBytes_FromString
+    #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
+#else
+    #define __Pyx_PyStr_FromString        __Pyx_PyUnicode_FromString
+    #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize
+#endif
+#define __Pyx_PyBytes_AsWritableString(s)     ((char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsWritableSString(s)    ((signed char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsWritableUString(s)    ((unsigned char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsString(s)     ((const char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsSString(s)    ((const signed char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsUString(s)    ((const unsigned char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyObject_AsWritableString(s)    ((char*) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_AsWritableSString(s)    ((signed char*) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_AsWritableUString(s)    ((unsigned char*) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_AsSString(s)    ((const signed char*) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_AsUString(s)    ((const unsigned char*) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_FromCString(s)  __Pyx_PyObject_FromString((const char*)s)
+#define __Pyx_PyBytes_FromCString(s)   __Pyx_PyBytes_FromString((const char*)s)
+#define __Pyx_PyByteArray_FromCString(s)   __Pyx_PyByteArray_FromString((const char*)s)
+#define __Pyx_PyStr_FromCString(s)     __Pyx_PyStr_FromString((const char*)s)
+#define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s)
+static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u) {
+    const Py_UNICODE *u_end = u;
+    while (*u_end++) ;
+    return (size_t)(u_end - u - 1);
+}
+#define __Pyx_PyUnicode_FromUnicode(u)       PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u))
+#define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode
+#define __Pyx_PyUnicode_AsUnicode            PyUnicode_AsUnicode
+#define __Pyx_NewRef(obj) (Py_INCREF(obj), obj)
+#define __Pyx_Owned_Py_None(b) __Pyx_NewRef(Py_None)
+static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b);
+static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*);
+static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject*);
+static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x);
+#define __Pyx_PySequence_Tuple(obj)\
+    (likely(PyTuple_CheckExact(obj)) ? __Pyx_NewRef(obj) : PySequence_Tuple(obj))
+static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*);
+static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t);
+static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject*);
+#if CYTHON_ASSUME_SAFE_MACROS
+#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x))
+#else
+#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x)
+#endif
+#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x))
+#if PY_MAJOR_VERSION >= 3
+#define __Pyx_PyNumber_Int(x) (PyLong_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Long(x))
+#else
+#define __Pyx_PyNumber_Int(x) (PyInt_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Int(x))
+#endif
+#define __Pyx_PyNumber_Float(x) (PyFloat_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Float(x))
+#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
+static int __Pyx_sys_getdefaultencoding_not_ascii;
+static int __Pyx_init_sys_getdefaultencoding_params(void) {
+    PyObject* sys;
+    PyObject* default_encoding = NULL;
+    PyObject* ascii_chars_u = NULL;
+    PyObject* ascii_chars_b = NULL;
+    const char* default_encoding_c;
+    sys = PyImport_ImportModule("sys");
+    if (!sys) goto bad;
+    default_encoding = PyObject_CallMethod(sys, (char*) "getdefaultencoding", NULL);
+    Py_DECREF(sys);
+    if (!default_encoding) goto bad;
+    default_encoding_c = PyBytes_AsString(default_encoding);
+    if (!default_encoding_c) goto bad;
+    if (strcmp(default_encoding_c, "ascii") == 0) {
+        __Pyx_sys_getdefaultencoding_not_ascii = 0;
+    } else {
+        char ascii_chars[128];
+        int c;
+        for (c = 0; c < 128; c++) {
+            ascii_chars[c] = c;
+        }
+        __Pyx_sys_getdefaultencoding_not_ascii = 1;
+        ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL);
+        if (!ascii_chars_u) goto bad;
+        ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL);
+        if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) {
+            PyErr_Format(
+                PyExc_ValueError,
+                "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.",
+                default_encoding_c);
+            goto bad;
+        }
+        Py_DECREF(ascii_chars_u);
+        Py_DECREF(ascii_chars_b);
+    }
+    Py_DECREF(default_encoding);
+    return 0;
+bad:
+    Py_XDECREF(default_encoding);
+    Py_XDECREF(ascii_chars_u);
+    Py_XDECREF(ascii_chars_b);
+    return -1;
+}
+#endif
+#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3
+#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL)
+#else
+#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL)
+#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
+static char* __PYX_DEFAULT_STRING_ENCODING;
+static int __Pyx_init_sys_getdefaultencoding_params(void) {
+    PyObject* sys;
+    PyObject* default_encoding = NULL;
+    char* default_encoding_c;
+    sys = PyImport_ImportModule("sys");
+    if (!sys) goto bad;
+    default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL);
+    Py_DECREF(sys);
+    if (!default_encoding) goto bad;
+    default_encoding_c = PyBytes_AsString(default_encoding);
+    if (!default_encoding_c) goto bad;
+    __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c) + 1);
+    if (!__PYX_DEFAULT_STRING_ENCODING) goto bad;
+    strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c);
+    Py_DECREF(default_encoding);
+    return 0;
+bad:
+    Py_XDECREF(default_encoding);
+    return -1;
+}
+#endif
+#endif
+
+
+/* Test for GCC > 2.95 */
+#if defined(__GNUC__)     && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)))
+  #define likely(x)   __builtin_expect(!!(x), 1)
+  #define unlikely(x) __builtin_expect(!!(x), 0)
+#else /* !__GNUC__ or GCC < 2.95 */
+  #define likely(x)   (x)
+  #define unlikely(x) (x)
+#endif /* __GNUC__ */
+static CYTHON_INLINE void __Pyx_pretend_to_initialize(void* ptr) { (void)ptr; }
+
+static PyObject *__pyx_m = NULL;
+static PyObject *__pyx_d;
+static PyObject *__pyx_b;
+static PyObject *__pyx_cython_runtime = NULL;
+static PyObject *__pyx_empty_tuple;
+static PyObject *__pyx_empty_bytes;
+static PyObject *__pyx_empty_unicode;
+static int __pyx_lineno;
+static int __pyx_clineno = 0;
+static const char * __pyx_cfilenm= __FILE__;
+static const char *__pyx_filename;
+
+/* Header.proto */
+#if !defined(CYTHON_CCOMPLEX)
+  #if defined(__cplusplus)
+    #define CYTHON_CCOMPLEX 1
+  #elif defined(_Complex_I)
+    #define CYTHON_CCOMPLEX 1
+  #else
+    #define CYTHON_CCOMPLEX 0
+  #endif
+#endif
+#if CYTHON_CCOMPLEX
+  #ifdef __cplusplus
+    #include <complex>
+  #else
+    #include <complex.h>
+  #endif
+#endif
+#if CYTHON_CCOMPLEX && !defined(__cplusplus) && defined(__sun__) && defined(__GNUC__)
+  #undef _Complex_I
+  #define _Complex_I 1.0fj
+#endif
+
+
+static const char *__pyx_f[] = {
+  "gpu_nms.pyx",
+  "__init__.pxd",
+  "type.pxd",
+};
+/* BufferFormatStructs.proto */
+#define IS_UNSIGNED(type) (((type) -1) > 0)
+struct __Pyx_StructField_;
+#define __PYX_BUF_FLAGS_PACKED_STRUCT (1 << 0)
+typedef struct {
+  const char* name;
+  struct __Pyx_StructField_* fields;
+  size_t size;
+  size_t arraysize[8];
+  int ndim;
+  char typegroup;
+  char is_unsigned;
+  int flags;
+} __Pyx_TypeInfo;
+typedef struct __Pyx_StructField_ {
+  __Pyx_TypeInfo* type;
+  const char* name;
+  size_t offset;
+} __Pyx_StructField;
+typedef struct {
+  __Pyx_StructField* field;
+  size_t parent_offset;
+} __Pyx_BufFmt_StackElem;
+typedef struct {
+  __Pyx_StructField root;
+  __Pyx_BufFmt_StackElem* head;
+  size_t fmt_offset;
+  size_t new_count, enc_count;
+  size_t struct_alignment;
+  int is_complex;
+  char enc_type;
+  char new_packmode;
+  char enc_packmode;
+  char is_valid_array;
+} __Pyx_BufFmt_Context;
+
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":690
+ * # in Cython to enable them only on the right systems.
+ * 
+ * ctypedef npy_int8       int8_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_int16      int16_t
+ * ctypedef npy_int32      int32_t
+ */
+typedef npy_int8 __pyx_t_5numpy_int8_t;
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":691
+ * 
+ * ctypedef npy_int8       int8_t
+ * ctypedef npy_int16      int16_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_int32      int32_t
+ * ctypedef npy_int64      int64_t
+ */
+typedef npy_int16 __pyx_t_5numpy_int16_t;
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":692
+ * ctypedef npy_int8       int8_t
+ * ctypedef npy_int16      int16_t
+ * ctypedef npy_int32      int32_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_int64      int64_t
+ * #ctypedef npy_int96      int96_t
+ */
+typedef npy_int32 __pyx_t_5numpy_int32_t;
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":693
+ * ctypedef npy_int16      int16_t
+ * ctypedef npy_int32      int32_t
+ * ctypedef npy_int64      int64_t             # <<<<<<<<<<<<<<
+ * #ctypedef npy_int96      int96_t
+ * #ctypedef npy_int128     int128_t
+ */
+typedef npy_int64 __pyx_t_5numpy_int64_t;
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":697
+ * #ctypedef npy_int128     int128_t
+ * 
+ * ctypedef npy_uint8      uint8_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_uint16     uint16_t
+ * ctypedef npy_uint32     uint32_t
+ */
+typedef npy_uint8 __pyx_t_5numpy_uint8_t;
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":698
+ * 
+ * ctypedef npy_uint8      uint8_t
+ * ctypedef npy_uint16     uint16_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_uint32     uint32_t
+ * ctypedef npy_uint64     uint64_t
+ */
+typedef npy_uint16 __pyx_t_5numpy_uint16_t;
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":699
+ * ctypedef npy_uint8      uint8_t
+ * ctypedef npy_uint16     uint16_t
+ * ctypedef npy_uint32     uint32_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_uint64     uint64_t
+ * #ctypedef npy_uint96     uint96_t
+ */
+typedef npy_uint32 __pyx_t_5numpy_uint32_t;
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":700
+ * ctypedef npy_uint16     uint16_t
+ * ctypedef npy_uint32     uint32_t
+ * ctypedef npy_uint64     uint64_t             # <<<<<<<<<<<<<<
+ * #ctypedef npy_uint96     uint96_t
+ * #ctypedef npy_uint128    uint128_t
+ */
+typedef npy_uint64 __pyx_t_5numpy_uint64_t;
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":704
+ * #ctypedef npy_uint128    uint128_t
+ * 
+ * ctypedef npy_float32    float32_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_float64    float64_t
+ * #ctypedef npy_float80    float80_t
+ */
+typedef npy_float32 __pyx_t_5numpy_float32_t;
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":705
+ * 
+ * ctypedef npy_float32    float32_t
+ * ctypedef npy_float64    float64_t             # <<<<<<<<<<<<<<
+ * #ctypedef npy_float80    float80_t
+ * #ctypedef npy_float128   float128_t
+ */
+typedef npy_float64 __pyx_t_5numpy_float64_t;
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":714
+ * # The int types are mapped a bit surprising --
+ * # numpy.int corresponds to 'l' and numpy.long to 'q'
+ * ctypedef npy_long       int_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_longlong   long_t
+ * ctypedef npy_longlong   longlong_t
+ */
+typedef npy_long __pyx_t_5numpy_int_t;
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":715
+ * # numpy.int corresponds to 'l' and numpy.long to 'q'
+ * ctypedef npy_long       int_t
+ * ctypedef npy_longlong   long_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_longlong   longlong_t
+ * 
+ */
+typedef npy_longlong __pyx_t_5numpy_long_t;
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":716
+ * ctypedef npy_long       int_t
+ * ctypedef npy_longlong   long_t
+ * ctypedef npy_longlong   longlong_t             # <<<<<<<<<<<<<<
+ * 
+ * ctypedef npy_ulong      uint_t
+ */
+typedef npy_longlong __pyx_t_5numpy_longlong_t;
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":718
+ * ctypedef npy_longlong   longlong_t
+ * 
+ * ctypedef npy_ulong      uint_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_ulonglong  ulong_t
+ * ctypedef npy_ulonglong  ulonglong_t
+ */
+typedef npy_ulong __pyx_t_5numpy_uint_t;
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":719
+ * 
+ * ctypedef npy_ulong      uint_t
+ * ctypedef npy_ulonglong  ulong_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_ulonglong  ulonglong_t
+ * 
+ */
+typedef npy_ulonglong __pyx_t_5numpy_ulong_t;
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":720
+ * ctypedef npy_ulong      uint_t
+ * ctypedef npy_ulonglong  ulong_t
+ * ctypedef npy_ulonglong  ulonglong_t             # <<<<<<<<<<<<<<
+ * 
+ * ctypedef npy_intp       intp_t
+ */
+typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t;
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":722
+ * ctypedef npy_ulonglong  ulonglong_t
+ * 
+ * ctypedef npy_intp       intp_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_uintp      uintp_t
+ * 
+ */
+typedef npy_intp __pyx_t_5numpy_intp_t;
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":723
+ * 
+ * ctypedef npy_intp       intp_t
+ * ctypedef npy_uintp      uintp_t             # <<<<<<<<<<<<<<
+ * 
+ * ctypedef npy_double     float_t
+ */
+typedef npy_uintp __pyx_t_5numpy_uintp_t;
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":725
+ * ctypedef npy_uintp      uintp_t
+ * 
+ * ctypedef npy_double     float_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_double     double_t
+ * ctypedef npy_longdouble longdouble_t
+ */
+typedef npy_double __pyx_t_5numpy_float_t;
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":726
+ * 
+ * ctypedef npy_double     float_t
+ * ctypedef npy_double     double_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_longdouble longdouble_t
+ * 
+ */
+typedef npy_double __pyx_t_5numpy_double_t;
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":727
+ * ctypedef npy_double     float_t
+ * ctypedef npy_double     double_t
+ * ctypedef npy_longdouble longdouble_t             # <<<<<<<<<<<<<<
+ * 
+ * ctypedef npy_cfloat      cfloat_t
+ */
+typedef npy_longdouble __pyx_t_5numpy_longdouble_t;
+/* Declarations.proto */
+#if CYTHON_CCOMPLEX
+  #ifdef __cplusplus
+    typedef ::std::complex< float > __pyx_t_float_complex;
+  #else
+    typedef float _Complex __pyx_t_float_complex;
+  #endif
+#else
+    typedef struct { float real, imag; } __pyx_t_float_complex;
+#endif
+static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float, float);
+
+/* Declarations.proto */
+#if CYTHON_CCOMPLEX
+  #ifdef __cplusplus
+    typedef ::std::complex< double > __pyx_t_double_complex;
+  #else
+    typedef double _Complex __pyx_t_double_complex;
+  #endif
+#else
+    typedef struct { double real, imag; } __pyx_t_double_complex;
+#endif
+static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double, double);
+
+
+/*--- Type declarations ---*/
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":729
+ * ctypedef npy_longdouble longdouble_t
+ * 
+ * ctypedef npy_cfloat      cfloat_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_cdouble     cdouble_t
+ * ctypedef npy_clongdouble clongdouble_t
+ */
+typedef npy_cfloat __pyx_t_5numpy_cfloat_t;
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":730
+ * 
+ * ctypedef npy_cfloat      cfloat_t
+ * ctypedef npy_cdouble     cdouble_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_clongdouble clongdouble_t
+ * 
+ */
+typedef npy_cdouble __pyx_t_5numpy_cdouble_t;
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":731
+ * ctypedef npy_cfloat      cfloat_t
+ * ctypedef npy_cdouble     cdouble_t
+ * ctypedef npy_clongdouble clongdouble_t             # <<<<<<<<<<<<<<
+ * 
+ * ctypedef npy_cdouble     complex_t
+ */
+typedef npy_clongdouble __pyx_t_5numpy_clongdouble_t;
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":733
+ * ctypedef npy_clongdouble clongdouble_t
+ * 
+ * ctypedef npy_cdouble     complex_t             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object PyArray_MultiIterNew1(a):
+ */
+typedef npy_cdouble __pyx_t_5numpy_complex_t;
+
+/* --- Runtime support code (head) --- */
+/* Refnanny.proto */
+#ifndef CYTHON_REFNANNY
+  #define CYTHON_REFNANNY 0
+#endif
+#if CYTHON_REFNANNY
+  typedef struct {
+    void (*INCREF)(void*, PyObject*, int);
+    void (*DECREF)(void*, PyObject*, int);
+    void (*GOTREF)(void*, PyObject*, int);
+    void (*GIVEREF)(void*, PyObject*, int);
+    void* (*SetupContext)(const char*, int, const char*);
+    void (*FinishContext)(void**);
+  } __Pyx_RefNannyAPIStruct;
+  static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL;
+  static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname);
+  #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL;
+#ifdef WITH_THREAD
+  #define __Pyx_RefNannySetupContext(name, acquire_gil)\
+          if (acquire_gil) {\
+              PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\
+              __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\
+              PyGILState_Release(__pyx_gilstate_save);\
+          } else {\
+              __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\
+          }
+#else
+  #define __Pyx_RefNannySetupContext(name, acquire_gil)\
+          __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__)
+#endif
+  #define __Pyx_RefNannyFinishContext()\
+          __Pyx_RefNanny->FinishContext(&__pyx_refnanny)
+  #define __Pyx_INCREF(r)  __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+  #define __Pyx_DECREF(r)  __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+  #define __Pyx_GOTREF(r)  __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+  #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+  #define __Pyx_XINCREF(r)  do { if((r) != NULL) {__Pyx_INCREF(r); }} while(0)
+  #define __Pyx_XDECREF(r)  do { if((r) != NULL) {__Pyx_DECREF(r); }} while(0)
+  #define __Pyx_XGOTREF(r)  do { if((r) != NULL) {__Pyx_GOTREF(r); }} while(0)
+  #define __Pyx_XGIVEREF(r) do { if((r) != NULL) {__Pyx_GIVEREF(r);}} while(0)
+#else
+  #define __Pyx_RefNannyDeclarations
+  #define __Pyx_RefNannySetupContext(name, acquire_gil)
+  #define __Pyx_RefNannyFinishContext()
+  #define __Pyx_INCREF(r) Py_INCREF(r)
+  #define __Pyx_DECREF(r) Py_DECREF(r)
+  #define __Pyx_GOTREF(r)
+  #define __Pyx_GIVEREF(r)
+  #define __Pyx_XINCREF(r) Py_XINCREF(r)
+  #define __Pyx_XDECREF(r) Py_XDECREF(r)
+  #define __Pyx_XGOTREF(r)
+  #define __Pyx_XGIVEREF(r)
+#endif
+#define __Pyx_XDECREF_SET(r, v) do {\
+        PyObject *tmp = (PyObject *) r;\
+        r = v; __Pyx_XDECREF(tmp);\
+    } while (0)
+#define __Pyx_DECREF_SET(r, v) do {\
+        PyObject *tmp = (PyObject *) r;\
+        r = v; __Pyx_DECREF(tmp);\
+    } while (0)
+#define __Pyx_CLEAR(r)    do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0)
+#define __Pyx_XCLEAR(r)   do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0)
+
+/* RaiseArgTupleInvalid.proto */
+static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact,
+    Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found);
+
+/* RaiseDoubleKeywords.proto */
+static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name);
+
+/* ParseKeywords.proto */
+static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject **argnames[],\
+    PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args,\
+    const char* function_name);
+
+/* ArgTypeTest.proto */
+#define __Pyx_ArgTypeTest(obj, type, none_allowed, name, exact)\
+    ((likely((Py_TYPE(obj) == type) | (none_allowed && (obj == Py_None)))) ? 1 :\
+        __Pyx__ArgTypeTest(obj, type, name, exact))
+static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact);
+
+/* IsLittleEndian.proto */
+static CYTHON_INLINE int __Pyx_Is_Little_Endian(void);
+
+/* BufferFormatCheck.proto */
+static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts);
+static void __Pyx_BufFmt_Init(__Pyx_BufFmt_Context* ctx,
+                              __Pyx_BufFmt_StackElem* stack,
+                              __Pyx_TypeInfo* type);
+
+/* BufferGetAndValidate.proto */
+#define __Pyx_GetBufferAndValidate(buf, obj, dtype, flags, nd, cast, stack)\
+    ((obj == Py_None || obj == NULL) ?\
+    (__Pyx_ZeroBuffer(buf), 0) :\
+    __Pyx__GetBufferAndValidate(buf, obj, dtype, flags, nd, cast, stack))
+static int  __Pyx__GetBufferAndValidate(Py_buffer* buf, PyObject* obj,
+    __Pyx_TypeInfo* dtype, int flags, int nd, int cast, __Pyx_BufFmt_StackElem* stack);
+static void __Pyx_ZeroBuffer(Py_buffer* buf);
+static CYTHON_INLINE void __Pyx_SafeReleaseBuffer(Py_buffer* info);
+static Py_ssize_t __Pyx_minusones[] = { -1, -1, -1, -1, -1, -1, -1, -1 };
+static Py_ssize_t __Pyx_zeros[] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+
+/* PyObjectGetAttrStr.proto */
+#if CYTHON_USE_TYPE_SLOTS
+static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name);
+#else
+#define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n)
+#endif
+
+/* GetBuiltinName.proto */
+static PyObject *__Pyx_GetBuiltinName(PyObject *name);
+
+/* PyDictVersioning.proto */
+#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS
+#define __PYX_DICT_VERSION_INIT  ((PY_UINT64_T) -1)
+#define __PYX_GET_DICT_VERSION(dict)  (((PyDictObject*)(dict))->ma_version_tag)
+#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var)\
+    (version_var) = __PYX_GET_DICT_VERSION(dict);\
+    (cache_var) = (value);
+#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) {\
+    static PY_UINT64_T __pyx_dict_version = 0;\
+    static PyObject *__pyx_dict_cached_value = NULL;\
+    if (likely(__PYX_GET_DICT_VERSION(DICT) == __pyx_dict_version)) {\
+        (VAR) = __pyx_dict_cached_value;\
+    } else {\
+        (VAR) = __pyx_dict_cached_value = (LOOKUP);\
+        __pyx_dict_version = __PYX_GET_DICT_VERSION(DICT);\
+    }\
+}
+static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj);
+static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj);
+static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version);
+#else
+#define __PYX_GET_DICT_VERSION(dict)  (0)
+#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var)
+#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP)  (VAR) = (LOOKUP);
+#endif
+
+/* GetModuleGlobalName.proto */
+#if CYTHON_USE_DICT_VERSIONS
+#define __Pyx_GetModuleGlobalName(var, name)  do {\
+    static PY_UINT64_T __pyx_dict_version = 0;\
+    static PyObject *__pyx_dict_cached_value = NULL;\
+    (var) = (likely(__pyx_dict_version == __PYX_GET_DICT_VERSION(__pyx_d))) ?\
+        (likely(__pyx_dict_cached_value) ? __Pyx_NewRef(__pyx_dict_cached_value) : __Pyx_GetBuiltinName(name)) :\
+        __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\
+} while(0)
+#define __Pyx_GetModuleGlobalNameUncached(var, name)  do {\
+    PY_UINT64_T __pyx_dict_version;\
+    PyObject *__pyx_dict_cached_value;\
+    (var) = __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\
+} while(0)
+static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value);
+#else
+#define __Pyx_GetModuleGlobalName(var, name)  (var) = __Pyx__GetModuleGlobalName(name)
+#define __Pyx_GetModuleGlobalNameUncached(var, name)  (var) = __Pyx__GetModuleGlobalName(name)
+static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name);
+#endif
+
+/* PyObjectCall.proto */
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw);
+#else
+#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw)
+#endif
+
+/* ExtTypeTest.proto */
+static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type);
+
+/* GetItemInt.proto */
+#define __Pyx_GetItemInt(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\
+    (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\
+    __Pyx_GetItemInt_Fast(o, (Py_ssize_t)i, is_list, wraparound, boundscheck) :\
+    (is_list ? (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL) :\
+               __Pyx_GetItemInt_Generic(o, to_py_func(i))))
+#define __Pyx_GetItemInt_List(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\
+    (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\
+    __Pyx_GetItemInt_List_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\
+    (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL))
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i,
+                                                              int wraparound, int boundscheck);
+#define __Pyx_GetItemInt_Tuple(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\
+    (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\
+    __Pyx_GetItemInt_Tuple_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\
+    (PyErr_SetString(PyExc_IndexError, "tuple index out of range"), (PyObject*)NULL))
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i,
+                                                              int wraparound, int boundscheck);
+static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j);
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i,
+                                                     int is_list, int wraparound, int boundscheck);
+
+/* ObjectGetItem.proto */
+#if CYTHON_USE_TYPE_SLOTS
+static CYTHON_INLINE PyObject *__Pyx_PyObject_GetItem(PyObject *obj, PyObject* key);
+#else
+#define __Pyx_PyObject_GetItem(obj, key)  PyObject_GetItem(obj, key)
+#endif
+
+/* PyFunctionFastCall.proto */
+#if CYTHON_FAST_PYCALL
+#define __Pyx_PyFunction_FastCall(func, args, nargs)\
+    __Pyx_PyFunction_FastCallDict((func), (args), (nargs), NULL)
+#if 1 || PY_VERSION_HEX < 0x030600B1
+static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs);
+#else
+#define __Pyx_PyFunction_FastCallDict(func, args, nargs, kwargs) _PyFunction_FastCallDict(func, args, nargs, kwargs)
+#endif
+#define __Pyx_BUILD_ASSERT_EXPR(cond)\
+    (sizeof(char [1 - 2*!(cond)]) - 1)
+#ifndef Py_MEMBER_SIZE
+#define Py_MEMBER_SIZE(type, member) sizeof(((type *)0)->member)
+#endif
+#if CYTHON_FAST_PYCALL
+  static size_t __pyx_pyframe_localsplus_offset = 0;
+  #include "frameobject.h"
+#if PY_VERSION_HEX >= 0x030b00a6
+  #ifndef Py_BUILD_CORE
+    #define Py_BUILD_CORE 1
+  #endif
+  #include "internal/pycore_frame.h"
+#endif
+  #define __Pxy_PyFrame_Initialize_Offsets()\
+    ((void)__Pyx_BUILD_ASSERT_EXPR(sizeof(PyFrameObject) == offsetof(PyFrameObject, f_localsplus) + Py_MEMBER_SIZE(PyFrameObject, f_localsplus)),\
+     (void)(__pyx_pyframe_localsplus_offset = ((size_t)PyFrame_Type.tp_basicsize) - Py_MEMBER_SIZE(PyFrameObject, f_localsplus)))
+  #define __Pyx_PyFrame_GetLocalsplus(frame)\
+    (assert(__pyx_pyframe_localsplus_offset), (PyObject **)(((char *)(frame)) + __pyx_pyframe_localsplus_offset))
+#endif // CYTHON_FAST_PYCALL
+#endif
+
+/* PyObjectCallMethO.proto */
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg);
+#endif
+
+/* PyObjectCallNoArg.proto */
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func);
+#else
+#define __Pyx_PyObject_CallNoArg(func) __Pyx_PyObject_Call(func, __pyx_empty_tuple, NULL)
+#endif
+
+/* PyCFunctionFastCall.proto */
+#if CYTHON_FAST_PYCCALL
+static CYTHON_INLINE PyObject *__Pyx_PyCFunction_FastCall(PyObject *func, PyObject **args, Py_ssize_t nargs);
+#else
+#define __Pyx_PyCFunction_FastCall(func, args, nargs)  (assert(0), NULL)
+#endif
+
+/* PyObjectCallOneArg.proto */
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg);
+
+/* PyObjectCall2Args.proto */
+static CYTHON_UNUSED PyObject* __Pyx_PyObject_Call2Args(PyObject* function, PyObject* arg1, PyObject* arg2);
+
+/* BufferIndexError.proto */
+static void __Pyx_RaiseBufferIndexError(int axis);
+
+#define __Pyx_BufPtrStrided1d(type, buf, i0, s0) (type)((char*)buf + i0 * s0)
+#define __Pyx_BufPtrStrided2d(type, buf, i0, s0, i1, s1) (type)((char*)buf + i0 * s0 + i1 * s1)
+/* BufferFallbackError.proto */
+static void __Pyx_RaiseBufferFallbackError(void);
+
+/* PyThreadStateGet.proto */
+#if CYTHON_FAST_THREAD_STATE
+#define __Pyx_PyThreadState_declare  PyThreadState *__pyx_tstate;
+#define __Pyx_PyThreadState_assign  __pyx_tstate = __Pyx_PyThreadState_Current;
+#define __Pyx_PyErr_Occurred()  __pyx_tstate->curexc_type
+#else
+#define __Pyx_PyThreadState_declare
+#define __Pyx_PyThreadState_assign
+#define __Pyx_PyErr_Occurred()  PyErr_Occurred()
+#endif
+
+/* PyErrFetchRestore.proto */
+#if CYTHON_FAST_THREAD_STATE
+#define __Pyx_PyErr_Clear() __Pyx_ErrRestore(NULL, NULL, NULL)
+#define __Pyx_ErrRestoreWithState(type, value, tb)  __Pyx_ErrRestoreInState(PyThreadState_GET(), type, value, tb)
+#define __Pyx_ErrFetchWithState(type, value, tb)    __Pyx_ErrFetchInState(PyThreadState_GET(), type, value, tb)
+#define __Pyx_ErrRestore(type, value, tb)  __Pyx_ErrRestoreInState(__pyx_tstate, type, value, tb)
+#define __Pyx_ErrFetch(type, value, tb)    __Pyx_ErrFetchInState(__pyx_tstate, type, value, tb)
+static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb);
+static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb);
+#if CYTHON_COMPILING_IN_CPYTHON
+#define __Pyx_PyErr_SetNone(exc) (Py_INCREF(exc), __Pyx_ErrRestore((exc), NULL, NULL))
+#else
+#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc)
+#endif
+#else
+#define __Pyx_PyErr_Clear() PyErr_Clear()
+#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc)
+#define __Pyx_ErrRestoreWithState(type, value, tb)  PyErr_Restore(type, value, tb)
+#define __Pyx_ErrFetchWithState(type, value, tb)  PyErr_Fetch(type, value, tb)
+#define __Pyx_ErrRestoreInState(tstate, type, value, tb)  PyErr_Restore(type, value, tb)
+#define __Pyx_ErrFetchInState(tstate, type, value, tb)  PyErr_Fetch(type, value, tb)
+#define __Pyx_ErrRestore(type, value, tb)  PyErr_Restore(type, value, tb)
+#define __Pyx_ErrFetch(type, value, tb)  PyErr_Fetch(type, value, tb)
+#endif
+
+/* GetTopmostException.proto */
+#if CYTHON_USE_EXC_INFO_STACK
+static _PyErr_StackItem * __Pyx_PyErr_GetTopmostException(PyThreadState *tstate);
+#endif
+
+/* SaveResetException.proto */
+#if CYTHON_FAST_THREAD_STATE
+#define __Pyx_ExceptionSave(type, value, tb)  __Pyx__ExceptionSave(__pyx_tstate, type, value, tb)
+static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb);
+#define __Pyx_ExceptionReset(type, value, tb)  __Pyx__ExceptionReset(__pyx_tstate, type, value, tb)
+static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb);
+#else
+#define __Pyx_ExceptionSave(type, value, tb)   PyErr_GetExcInfo(type, value, tb)
+#define __Pyx_ExceptionReset(type, value, tb)  PyErr_SetExcInfo(type, value, tb)
+#endif
+
+/* PyErrExceptionMatches.proto */
+#if CYTHON_FAST_THREAD_STATE
+#define __Pyx_PyErr_ExceptionMatches(err) __Pyx_PyErr_ExceptionMatchesInState(__pyx_tstate, err)
+static CYTHON_INLINE int __Pyx_PyErr_ExceptionMatchesInState(PyThreadState* tstate, PyObject* err);
+#else
+#define __Pyx_PyErr_ExceptionMatches(err)  PyErr_ExceptionMatches(err)
+#endif
+
+/* GetException.proto */
+#if CYTHON_FAST_THREAD_STATE
+#define __Pyx_GetException(type, value, tb)  __Pyx__GetException(__pyx_tstate, type, value, tb)
+static int __Pyx__GetException(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb);
+#else
+static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb);
+#endif
+
+/* RaiseException.proto */
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause);
+
+/* TypeImport.proto */
+#ifndef __PYX_HAVE_RT_ImportType_proto
+#define __PYX_HAVE_RT_ImportType_proto
+enum __Pyx_ImportType_CheckSize {
+   __Pyx_ImportType_CheckSize_Error = 0,
+   __Pyx_ImportType_CheckSize_Warn = 1,
+   __Pyx_ImportType_CheckSize_Ignore = 2
+};
+static PyTypeObject *__Pyx_ImportType(PyObject* module, const char *module_name, const char *class_name, size_t size, enum __Pyx_ImportType_CheckSize check_size);
+#endif
+
+/* Import.proto */
+static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level);
+
+/* CLineInTraceback.proto */
+#ifdef CYTHON_CLINE_IN_TRACEBACK
+#define __Pyx_CLineForTraceback(tstate, c_line)  (((CYTHON_CLINE_IN_TRACEBACK)) ? c_line : 0)
+#else
+static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line);
+#endif
+
+/* CodeObjectCache.proto */
+typedef struct {
+    PyCodeObject* code_object;
+    int code_line;
+} __Pyx_CodeObjectCacheEntry;
+struct __Pyx_CodeObjectCache {
+    int count;
+    int max_count;
+    __Pyx_CodeObjectCacheEntry* entries;
+};
+static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL};
+static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line);
+static PyCodeObject *__pyx_find_code_object(int code_line);
+static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object);
+
+/* AddTraceback.proto */
+static void __Pyx_AddTraceback(const char *funcname, int c_line,
+                               int py_line, const char *filename);
+
+/* BufferStructDeclare.proto */
+typedef struct {
+  Py_ssize_t shape, strides, suboffsets;
+} __Pyx_Buf_DimInfo;
+typedef struct {
+  size_t refcount;
+  Py_buffer pybuffer;
+} __Pyx_Buffer;
+typedef struct {
+  __Pyx_Buffer *rcbuffer;
+  char *data;
+  __Pyx_Buf_DimInfo diminfo[8];
+} __Pyx_LocalBuf_ND;
+
+#if PY_MAJOR_VERSION < 3
+    static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags);
+    static void __Pyx_ReleaseBuffer(Py_buffer *view);
+#else
+    #define __Pyx_GetBuffer PyObject_GetBuffer
+    #define __Pyx_ReleaseBuffer PyBuffer_Release
+#endif
+
+
+/* GCCDiagnostics.proto */
+#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
+#define __Pyx_HAS_GCC_DIAGNOSTIC
+#endif
+
+/* RealImag.proto */
+#if CYTHON_CCOMPLEX
+  #ifdef __cplusplus
+    #define __Pyx_CREAL(z) ((z).real())
+    #define __Pyx_CIMAG(z) ((z).imag())
+  #else
+    #define __Pyx_CREAL(z) (__real__(z))
+    #define __Pyx_CIMAG(z) (__imag__(z))
+  #endif
+#else
+    #define __Pyx_CREAL(z) ((z).real)
+    #define __Pyx_CIMAG(z) ((z).imag)
+#endif
+#if defined(__cplusplus) && CYTHON_CCOMPLEX\
+        && (defined(_WIN32) || defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 5 || __GNUC__ == 4 && __GNUC_MINOR__ >= 4 )) || __cplusplus >= 201103)
+    #define __Pyx_SET_CREAL(z,x) ((z).real(x))
+    #define __Pyx_SET_CIMAG(z,y) ((z).imag(y))
+#else
+    #define __Pyx_SET_CREAL(z,x) __Pyx_CREAL(z) = (x)
+    #define __Pyx_SET_CIMAG(z,y) __Pyx_CIMAG(z) = (y)
+#endif
+
+/* Arithmetic.proto */
+#if CYTHON_CCOMPLEX
+    #define __Pyx_c_eq_float(a, b)   ((a)==(b))
+    #define __Pyx_c_sum_float(a, b)  ((a)+(b))
+    #define __Pyx_c_diff_float(a, b) ((a)-(b))
+    #define __Pyx_c_prod_float(a, b) ((a)*(b))
+    #define __Pyx_c_quot_float(a, b) ((a)/(b))
+    #define __Pyx_c_neg_float(a)     (-(a))
+  #ifdef __cplusplus
+    #define __Pyx_c_is_zero_float(z) ((z)==(float)0)
+    #define __Pyx_c_conj_float(z)    (::std::conj(z))
+    #if 1
+        #define __Pyx_c_abs_float(z)     (::std::abs(z))
+        #define __Pyx_c_pow_float(a, b)  (::std::pow(a, b))
+    #endif
+  #else
+    #define __Pyx_c_is_zero_float(z) ((z)==0)
+    #define __Pyx_c_conj_float(z)    (conjf(z))
+    #if 1
+        #define __Pyx_c_abs_float(z)     (cabsf(z))
+        #define __Pyx_c_pow_float(a, b)  (cpowf(a, b))
+    #endif
+ #endif
+#else
+    static CYTHON_INLINE int __Pyx_c_eq_float(__pyx_t_float_complex, __pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_sum_float(__pyx_t_float_complex, __pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_diff_float(__pyx_t_float_complex, __pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_prod_float(__pyx_t_float_complex, __pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quot_float(__pyx_t_float_complex, __pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_neg_float(__pyx_t_float_complex);
+    static CYTHON_INLINE int __Pyx_c_is_zero_float(__pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_conj_float(__pyx_t_float_complex);
+    #if 1
+        static CYTHON_INLINE float __Pyx_c_abs_float(__pyx_t_float_complex);
+        static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_pow_float(__pyx_t_float_complex, __pyx_t_float_complex);
+    #endif
+#endif
+
+/* Arithmetic.proto */
+#if CYTHON_CCOMPLEX
+    #define __Pyx_c_eq_double(a, b)   ((a)==(b))
+    #define __Pyx_c_sum_double(a, b)  ((a)+(b))
+    #define __Pyx_c_diff_double(a, b) ((a)-(b))
+    #define __Pyx_c_prod_double(a, b) ((a)*(b))
+    #define __Pyx_c_quot_double(a, b) ((a)/(b))
+    #define __Pyx_c_neg_double(a)     (-(a))
+  #ifdef __cplusplus
+    #define __Pyx_c_is_zero_double(z) ((z)==(double)0)
+    #define __Pyx_c_conj_double(z)    (::std::conj(z))
+    #if 1
+        #define __Pyx_c_abs_double(z)     (::std::abs(z))
+        #define __Pyx_c_pow_double(a, b)  (::std::pow(a, b))
+    #endif
+  #else
+    #define __Pyx_c_is_zero_double(z) ((z)==0)
+    #define __Pyx_c_conj_double(z)    (conj(z))
+    #if 1
+        #define __Pyx_c_abs_double(z)     (cabs(z))
+        #define __Pyx_c_pow_double(a, b)  (cpow(a, b))
+    #endif
+ #endif
+#else
+    static CYTHON_INLINE int __Pyx_c_eq_double(__pyx_t_double_complex, __pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_sum_double(__pyx_t_double_complex, __pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_diff_double(__pyx_t_double_complex, __pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_prod_double(__pyx_t_double_complex, __pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot_double(__pyx_t_double_complex, __pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_neg_double(__pyx_t_double_complex);
+    static CYTHON_INLINE int __Pyx_c_is_zero_double(__pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_conj_double(__pyx_t_double_complex);
+    #if 1
+        static CYTHON_INLINE double __Pyx_c_abs_double(__pyx_t_double_complex);
+        static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_pow_double(__pyx_t_double_complex, __pyx_t_double_complex);
+    #endif
+#endif
+
+/* CIntFromPy.proto */
+static CYTHON_INLINE npy_int32 __Pyx_PyInt_As_npy_int32(PyObject *);
+
+/* CIntToPy.proto */
+static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value);
+
+/* CIntToPy.proto */
+static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value);
+
+/* CIntFromPy.proto */
+static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *);
+
+/* CIntFromPy.proto */
+static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *);
+
+/* FastTypeChecks.proto */
+#if CYTHON_COMPILING_IN_CPYTHON
+#define __Pyx_TypeCheck(obj, type) __Pyx_IsSubtype(Py_TYPE(obj), (PyTypeObject *)type)
+static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b);
+static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches(PyObject *err, PyObject *type);
+static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches2(PyObject *err, PyObject *type1, PyObject *type2);
+#else
+#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type)
+#define __Pyx_PyErr_GivenExceptionMatches(err, type) PyErr_GivenExceptionMatches(err, type)
+#define __Pyx_PyErr_GivenExceptionMatches2(err, type1, type2) (PyErr_GivenExceptionMatches(err, type1) || PyErr_GivenExceptionMatches(err, type2))
+#endif
+#define __Pyx_PyException_Check(obj) __Pyx_TypeCheck(obj, PyExc_Exception)
+
+/* CheckBinaryVersion.proto */
+static int __Pyx_check_binary_version(void);
+
+/* InitStrings.proto */
+static int __Pyx_InitStrings(__Pyx_StringTabEntry *t);
+
+
+/* Module declarations from 'cpython.buffer' */
+
+/* Module declarations from 'libc.string' */
+
+/* Module declarations from 'libc.stdio' */
+
+/* Module declarations from '__builtin__' */
+
+/* Module declarations from 'cpython.type' */
+static PyTypeObject *__pyx_ptype_7cpython_4type_type = 0;
+
+/* Module declarations from 'cpython' */
+
+/* Module declarations from 'cpython.object' */
+
+/* Module declarations from 'cpython.ref' */
+
+/* Module declarations from 'cpython.mem' */
+
+/* Module declarations from 'numpy' */
+
+/* Module declarations from 'numpy' */
+static PyTypeObject *__pyx_ptype_5numpy_dtype = 0;
+static PyTypeObject *__pyx_ptype_5numpy_flatiter = 0;
+static PyTypeObject *__pyx_ptype_5numpy_broadcast = 0;
+static PyTypeObject *__pyx_ptype_5numpy_ndarray = 0;
+static PyTypeObject *__pyx_ptype_5numpy_generic = 0;
+static PyTypeObject *__pyx_ptype_5numpy_number = 0;
+static PyTypeObject *__pyx_ptype_5numpy_integer = 0;
+static PyTypeObject *__pyx_ptype_5numpy_signedinteger = 0;
+static PyTypeObject *__pyx_ptype_5numpy_unsignedinteger = 0;
+static PyTypeObject *__pyx_ptype_5numpy_inexact = 0;
+static PyTypeObject *__pyx_ptype_5numpy_floating = 0;
+static PyTypeObject *__pyx_ptype_5numpy_complexfloating = 0;
+static PyTypeObject *__pyx_ptype_5numpy_flexible = 0;
+static PyTypeObject *__pyx_ptype_5numpy_character = 0;
+static PyTypeObject *__pyx_ptype_5numpy_ufunc = 0;
+
+/* Module declarations from 'gpu_nms' */
+static __Pyx_TypeInfo __Pyx_TypeInfo_nn___pyx_t_5numpy_float32_t = { "float32_t", NULL, sizeof(__pyx_t_5numpy_float32_t), { 0 }, 0, 'R', 0, 0 };
+static __Pyx_TypeInfo __Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t = { "int32_t", NULL, sizeof(__pyx_t_5numpy_int32_t), { 0 }, 0, IS_UNSIGNED(__pyx_t_5numpy_int32_t) ? 'U' : 'I', IS_UNSIGNED(__pyx_t_5numpy_int32_t), 0 };
+#define __Pyx_MODULE_NAME "gpu_nms"
+extern int __pyx_module_is_main_gpu_nms;
+int __pyx_module_is_main_gpu_nms = 0;
+
+/* Implementation of 'gpu_nms' */
+static PyObject *__pyx_builtin_ImportError;
+static const char __pyx_k_np[] = "np";
+static const char __pyx_k_dets[] = "dets";
+static const char __pyx_k_keep[] = "keep";
+static const char __pyx_k_main[] = "__main__";
+static const char __pyx_k_name[] = "__name__";
+static const char __pyx_k_test[] = "__test__";
+static const char __pyx_k_dtype[] = "dtype";
+static const char __pyx_k_int32[] = "int32";
+static const char __pyx_k_numpy[] = "numpy";
+static const char __pyx_k_order[] = "order";
+static const char __pyx_k_zeros[] = "zeros";
+static const char __pyx_k_astype[] = "astype";
+static const char __pyx_k_import[] = "__import__";
+static const char __pyx_k_scores[] = "scores";
+static const char __pyx_k_thresh[] = "thresh";
+static const char __pyx_k_argsort[] = "argsort";
+static const char __pyx_k_gpu_nms[] = "gpu_nms";
+static const char __pyx_k_num_out[] = "num_out";
+static const char __pyx_k_boxes_dim[] = "boxes_dim";
+static const char __pyx_k_boxes_num[] = "boxes_num";
+static const char __pyx_k_device_id[] = "device_id";
+static const char __pyx_k_ImportError[] = "ImportError";
+static const char __pyx_k_gpu_nms_pyx[] = "gpu_nms.pyx";
+static const char __pyx_k_sorted_dets[] = "sorted_dets";
+static const char __pyx_k_cline_in_traceback[] = "cline_in_traceback";
+static const char __pyx_k_numpy_core_multiarray_failed_to[] = "numpy.core.multiarray failed to import";
+static const char __pyx_k_numpy_core_umath_failed_to_impor[] = "numpy.core.umath failed to import";
+static PyObject *__pyx_n_s_ImportError;
+static PyObject *__pyx_n_s_argsort;
+static PyObject *__pyx_n_s_astype;
+static PyObject *__pyx_n_s_boxes_dim;
+static PyObject *__pyx_n_s_boxes_num;
+static PyObject *__pyx_n_s_cline_in_traceback;
+static PyObject *__pyx_n_s_dets;
+static PyObject *__pyx_n_s_device_id;
+static PyObject *__pyx_n_s_dtype;
+static PyObject *__pyx_n_s_gpu_nms;
+static PyObject *__pyx_kp_s_gpu_nms_pyx;
+static PyObject *__pyx_n_s_import;
+static PyObject *__pyx_n_s_int32;
+static PyObject *__pyx_n_s_keep;
+static PyObject *__pyx_n_s_main;
+static PyObject *__pyx_n_s_name;
+static PyObject *__pyx_n_s_np;
+static PyObject *__pyx_n_s_num_out;
+static PyObject *__pyx_n_s_numpy;
+static PyObject *__pyx_kp_s_numpy_core_multiarray_failed_to;
+static PyObject *__pyx_kp_s_numpy_core_umath_failed_to_impor;
+static PyObject *__pyx_n_s_order;
+static PyObject *__pyx_n_s_scores;
+static PyObject *__pyx_n_s_sorted_dets;
+static PyObject *__pyx_n_s_test;
+static PyObject *__pyx_n_s_thresh;
+static PyObject *__pyx_n_s_zeros;
+static PyObject *__pyx_pf_7gpu_nms_gpu_nms(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_dets, PyObject *__pyx_v_thresh, __pyx_t_5numpy_int32_t __pyx_v_device_id); /* proto */
+static PyObject *__pyx_int_4;
+static PyObject *__pyx_int_neg_1;
+static PyObject *__pyx_slice_;
+static PyObject *__pyx_slice__3;
+static PyObject *__pyx_tuple__2;
+static PyObject *__pyx_tuple__4;
+static PyObject *__pyx_tuple__5;
+static PyObject *__pyx_tuple__6;
+static PyObject *__pyx_codeobj__7;
+/* Late includes */
+
+/* "gpu_nms.pyx":19
+ *     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
+ * 
+ * def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,             # <<<<<<<<<<<<<<
+ *             np.int32_t device_id=0):
+ *     cdef int boxes_num = dets.shape[0]
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_7gpu_nms_1gpu_nms(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static PyMethodDef __pyx_mdef_7gpu_nms_1gpu_nms = {"gpu_nms", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_7gpu_nms_1gpu_nms, METH_VARARGS|METH_KEYWORDS, 0};
+static PyObject *__pyx_pw_7gpu_nms_1gpu_nms(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+  PyArrayObject *__pyx_v_dets = 0;
+  PyObject *__pyx_v_thresh = 0;
+  __pyx_t_5numpy_int32_t __pyx_v_device_id;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("gpu_nms (wrapper)", 0);
+  {
+    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_dets,&__pyx_n_s_thresh,&__pyx_n_s_device_id,0};
+    PyObject* values[3] = {0,0,0};
+    if (unlikely(__pyx_kwds)) {
+      Py_ssize_t kw_args;
+      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
+      switch (pos_args) {
+        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+        CYTHON_FALLTHROUGH;
+        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+        CYTHON_FALLTHROUGH;
+        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+        CYTHON_FALLTHROUGH;
+        case  0: break;
+        default: goto __pyx_L5_argtuple_error;
+      }
+      kw_args = PyDict_Size(__pyx_kwds);
+      switch (pos_args) {
+        case  0:
+        if (likely((values[0] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_dets)) != 0)) kw_args--;
+        else goto __pyx_L5_argtuple_error;
+        CYTHON_FALLTHROUGH;
+        case  1:
+        if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_thresh)) != 0)) kw_args--;
+        else {
+          __Pyx_RaiseArgtupleInvalid("gpu_nms", 0, 2, 3, 1); __PYX_ERR(0, 19, __pyx_L3_error)
+        }
+        CYTHON_FALLTHROUGH;
+        case  2:
+        if (kw_args > 0) {
+          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_device_id);
+          if (value) { values[2] = value; kw_args--; }
+        }
+      }
+      if (unlikely(kw_args > 0)) {
+        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "gpu_nms") < 0)) __PYX_ERR(0, 19, __pyx_L3_error)
+      }
+    } else {
+      switch (PyTuple_GET_SIZE(__pyx_args)) {
+        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+        CYTHON_FALLTHROUGH;
+        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+        values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+        break;
+        default: goto __pyx_L5_argtuple_error;
+      }
+    }
+    __pyx_v_dets = ((PyArrayObject *)values[0]);
+    __pyx_v_thresh = ((PyObject*)values[1]);
+    if (values[2]) {
+      __pyx_v_device_id = __Pyx_PyInt_As_npy_int32(values[2]); if (unlikely((__pyx_v_device_id == ((npy_int32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 20, __pyx_L3_error)
+    } else {
+      __pyx_v_device_id = ((__pyx_t_5numpy_int32_t)0);
+    }
+  }
+  goto __pyx_L4_argument_unpacking_done;
+  __pyx_L5_argtuple_error:;
+  __Pyx_RaiseArgtupleInvalid("gpu_nms", 0, 2, 3, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 19, __pyx_L3_error)
+  __pyx_L3_error:;
+  __Pyx_AddTraceback("gpu_nms.gpu_nms", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __Pyx_RefNannyFinishContext();
+  return NULL;
+  __pyx_L4_argument_unpacking_done:;
+  if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_dets), __pyx_ptype_5numpy_ndarray, 1, "dets", 0))) __PYX_ERR(0, 19, __pyx_L1_error)
+  if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_thresh), (&PyFloat_Type), 1, "thresh", 1))) __PYX_ERR(0, 19, __pyx_L1_error)
+  __pyx_r = __pyx_pf_7gpu_nms_gpu_nms(__pyx_self, __pyx_v_dets, __pyx_v_thresh, __pyx_v_device_id);
+
+  /* function exit code */
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_pf_7gpu_nms_gpu_nms(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_dets, PyObject *__pyx_v_thresh, __pyx_t_5numpy_int32_t __pyx_v_device_id) {
+  int __pyx_v_boxes_num;
+  int __pyx_v_boxes_dim;
+  int __pyx_v_num_out;
+  PyArrayObject *__pyx_v_keep = 0;
+  PyArrayObject *__pyx_v_scores = 0;
+  PyArrayObject *__pyx_v_order = 0;
+  PyArrayObject *__pyx_v_sorted_dets = 0;
+  __Pyx_LocalBuf_ND __pyx_pybuffernd_dets;
+  __Pyx_Buffer __pyx_pybuffer_dets;
+  __Pyx_LocalBuf_ND __pyx_pybuffernd_keep;
+  __Pyx_Buffer __pyx_pybuffer_keep;
+  __Pyx_LocalBuf_ND __pyx_pybuffernd_order;
+  __Pyx_Buffer __pyx_pybuffer_order;
+  __Pyx_LocalBuf_ND __pyx_pybuffernd_scores;
+  __Pyx_Buffer __pyx_pybuffer_scores;
+  __Pyx_LocalBuf_ND __pyx_pybuffernd_sorted_dets;
+  __Pyx_Buffer __pyx_pybuffer_sorted_dets;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  PyObject *__pyx_t_2 = NULL;
+  PyObject *__pyx_t_3 = NULL;
+  PyObject *__pyx_t_4 = NULL;
+  PyObject *__pyx_t_5 = NULL;
+  PyArrayObject *__pyx_t_6 = NULL;
+  PyArrayObject *__pyx_t_7 = NULL;
+  PyArrayObject *__pyx_t_8 = NULL;
+  PyArrayObject *__pyx_t_9 = NULL;
+  Py_ssize_t __pyx_t_10;
+  int __pyx_t_11;
+  Py_ssize_t __pyx_t_12;
+  Py_ssize_t __pyx_t_13;
+  float __pyx_t_14;
+  PyObject *__pyx_t_15 = NULL;
+  PyObject *__pyx_t_16 = NULL;
+  PyObject *__pyx_t_17 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("gpu_nms", 0);
+  __pyx_pybuffer_keep.pybuffer.buf = NULL;
+  __pyx_pybuffer_keep.refcount = 0;
+  __pyx_pybuffernd_keep.data = NULL;
+  __pyx_pybuffernd_keep.rcbuffer = &__pyx_pybuffer_keep;
+  __pyx_pybuffer_scores.pybuffer.buf = NULL;
+  __pyx_pybuffer_scores.refcount = 0;
+  __pyx_pybuffernd_scores.data = NULL;
+  __pyx_pybuffernd_scores.rcbuffer = &__pyx_pybuffer_scores;
+  __pyx_pybuffer_order.pybuffer.buf = NULL;
+  __pyx_pybuffer_order.refcount = 0;
+  __pyx_pybuffernd_order.data = NULL;
+  __pyx_pybuffernd_order.rcbuffer = &__pyx_pybuffer_order;
+  __pyx_pybuffer_sorted_dets.pybuffer.buf = NULL;
+  __pyx_pybuffer_sorted_dets.refcount = 0;
+  __pyx_pybuffernd_sorted_dets.data = NULL;
+  __pyx_pybuffernd_sorted_dets.rcbuffer = &__pyx_pybuffer_sorted_dets;
+  __pyx_pybuffer_dets.pybuffer.buf = NULL;
+  __pyx_pybuffer_dets.refcount = 0;
+  __pyx_pybuffernd_dets.data = NULL;
+  __pyx_pybuffernd_dets.rcbuffer = &__pyx_pybuffer_dets;
+  {
+    __Pyx_BufFmt_StackElem __pyx_stack[1];
+    if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_dets.rcbuffer->pybuffer, (PyObject*)__pyx_v_dets, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float32_t, PyBUF_FORMAT| PyBUF_STRIDES, 2, 0, __pyx_stack) == -1)) __PYX_ERR(0, 19, __pyx_L1_error)
+  }
+  __pyx_pybuffernd_dets.diminfo[0].strides = __pyx_pybuffernd_dets.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_dets.diminfo[0].shape = __pyx_pybuffernd_dets.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_dets.diminfo[1].strides = __pyx_pybuffernd_dets.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_dets.diminfo[1].shape = __pyx_pybuffernd_dets.rcbuffer->pybuffer.shape[1];
+
+  /* "gpu_nms.pyx":21
+ * def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
+ *             np.int32_t device_id=0):
+ *     cdef int boxes_num = dets.shape[0]             # <<<<<<<<<<<<<<
+ *     cdef int boxes_dim = dets.shape[1]
+ *     cdef int num_out
+ */
+  __pyx_v_boxes_num = (__pyx_v_dets->dimensions[0]);
+
+  /* "gpu_nms.pyx":22
+ *             np.int32_t device_id=0):
+ *     cdef int boxes_num = dets.shape[0]
+ *     cdef int boxes_dim = dets.shape[1]             # <<<<<<<<<<<<<<
+ *     cdef int num_out
+ *     cdef np.ndarray[np.int32_t, ndim=1] \
+ */
+  __pyx_v_boxes_dim = (__pyx_v_dets->dimensions[1]);
+
+  /* "gpu_nms.pyx":25
+ *     cdef int num_out
+ *     cdef np.ndarray[np.int32_t, ndim=1] \
+ *         keep = np.zeros(boxes_num, dtype=np.int32)             # <<<<<<<<<<<<<<
+ *     cdef np.ndarray[np.float32_t, ndim=1] \
+ *         scores = dets[:, 4]
+ */
+  __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 25, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_zeros); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 25, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_boxes_num); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 25, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 25, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __Pyx_GIVEREF(__pyx_t_1);
+  PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_1);
+  __pyx_t_1 = 0;
+  __pyx_t_1 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 25, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_np); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 25, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_4);
+  __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_int32); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 25, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_5);
+  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+  if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_dtype, __pyx_t_5) < 0) __PYX_ERR(0, 25, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+  __pyx_t_5 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_3, __pyx_t_1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 25, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_5);
+  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  if (!(likely(((__pyx_t_5) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_5, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 25, __pyx_L1_error)
+  __pyx_t_6 = ((PyArrayObject *)__pyx_t_5);
+  {
+    __Pyx_BufFmt_StackElem __pyx_stack[1];
+    if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_keep.rcbuffer->pybuffer, (PyObject*)__pyx_t_6, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
+      __pyx_v_keep = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_keep.rcbuffer->pybuffer.buf = NULL;
+      __PYX_ERR(0, 24, __pyx_L1_error)
+    } else {__pyx_pybuffernd_keep.diminfo[0].strides = __pyx_pybuffernd_keep.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_keep.diminfo[0].shape = __pyx_pybuffernd_keep.rcbuffer->pybuffer.shape[0];
+    }
+  }
+  __pyx_t_6 = 0;
+  __pyx_v_keep = ((PyArrayObject *)__pyx_t_5);
+  __pyx_t_5 = 0;
+
+  /* "gpu_nms.pyx":27
+ *         keep = np.zeros(boxes_num, dtype=np.int32)
+ *     cdef np.ndarray[np.float32_t, ndim=1] \
+ *         scores = dets[:, 4]             # <<<<<<<<<<<<<<
+ *     cdef np.ndarray[np.int32_t, ndim=1] \
+ *         order = scores.argsort()[::-1].astype(np.int32)
+ */
+  __pyx_t_5 = __Pyx_PyObject_GetItem(((PyObject *)__pyx_v_dets), __pyx_tuple__2); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 27, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_5);
+  if (!(likely(((__pyx_t_5) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_5, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 27, __pyx_L1_error)
+  __pyx_t_7 = ((PyArrayObject *)__pyx_t_5);
+  {
+    __Pyx_BufFmt_StackElem __pyx_stack[1];
+    if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_scores.rcbuffer->pybuffer, (PyObject*)__pyx_t_7, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float32_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
+      __pyx_v_scores = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_scores.rcbuffer->pybuffer.buf = NULL;
+      __PYX_ERR(0, 26, __pyx_L1_error)
+    } else {__pyx_pybuffernd_scores.diminfo[0].strides = __pyx_pybuffernd_scores.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_scores.diminfo[0].shape = __pyx_pybuffernd_scores.rcbuffer->pybuffer.shape[0];
+    }
+  }
+  __pyx_t_7 = 0;
+  __pyx_v_scores = ((PyArrayObject *)__pyx_t_5);
+  __pyx_t_5 = 0;
+
+  /* "gpu_nms.pyx":29
+ *         scores = dets[:, 4]
+ *     cdef np.ndarray[np.int32_t, ndim=1] \
+ *         order = scores.argsort()[::-1].astype(np.int32)             # <<<<<<<<<<<<<<
+ *     cdef np.ndarray[np.float32_t, ndim=2] \
+ *         sorted_dets = dets[order, :]
+ */
+  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_scores), __pyx_n_s_argsort); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 29, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __pyx_t_2 = NULL;
+  if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_3))) {
+    __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_3);
+    if (likely(__pyx_t_2)) {
+      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
+      __Pyx_INCREF(__pyx_t_2);
+      __Pyx_INCREF(function);
+      __Pyx_DECREF_SET(__pyx_t_3, function);
+    }
+  }
+  __pyx_t_1 = (__pyx_t_2) ? __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_2) : __Pyx_PyObject_CallNoArg(__pyx_t_3);
+  __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0;
+  if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 29, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+  __pyx_t_3 = __Pyx_PyObject_GetItem(__pyx_t_1, __pyx_slice__3); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 29, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_astype); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 29, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+  __Pyx_GetModuleGlobalName(__pyx_t_3, __pyx_n_s_np); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 29, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_int32); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 29, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+  __pyx_t_3 = NULL;
+  if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_1))) {
+    __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_1);
+    if (likely(__pyx_t_3)) {
+      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1);
+      __Pyx_INCREF(__pyx_t_3);
+      __Pyx_INCREF(function);
+      __Pyx_DECREF_SET(__pyx_t_1, function);
+    }
+  }
+  __pyx_t_5 = (__pyx_t_3) ? __Pyx_PyObject_Call2Args(__pyx_t_1, __pyx_t_3, __pyx_t_2) : __Pyx_PyObject_CallOneArg(__pyx_t_1, __pyx_t_2);
+  __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
+  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+  if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 29, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_5);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  if (!(likely(((__pyx_t_5) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_5, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 29, __pyx_L1_error)
+  __pyx_t_8 = ((PyArrayObject *)__pyx_t_5);
+  {
+    __Pyx_BufFmt_StackElem __pyx_stack[1];
+    if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_order.rcbuffer->pybuffer, (PyObject*)__pyx_t_8, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
+      __pyx_v_order = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_order.rcbuffer->pybuffer.buf = NULL;
+      __PYX_ERR(0, 28, __pyx_L1_error)
+    } else {__pyx_pybuffernd_order.diminfo[0].strides = __pyx_pybuffernd_order.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_order.diminfo[0].shape = __pyx_pybuffernd_order.rcbuffer->pybuffer.shape[0];
+    }
+  }
+  __pyx_t_8 = 0;
+  __pyx_v_order = ((PyArrayObject *)__pyx_t_5);
+  __pyx_t_5 = 0;
+
+  /* "gpu_nms.pyx":31
+ *         order = scores.argsort()[::-1].astype(np.int32)
+ *     cdef np.ndarray[np.float32_t, ndim=2] \
+ *         sorted_dets = dets[order, :]             # <<<<<<<<<<<<<<
+ *     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
+ *     keep = keep[:num_out]
+ */
+  __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 31, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_5);
+  __Pyx_INCREF(((PyObject *)__pyx_v_order));
+  __Pyx_GIVEREF(((PyObject *)__pyx_v_order));
+  PyTuple_SET_ITEM(__pyx_t_5, 0, ((PyObject *)__pyx_v_order));
+  __Pyx_INCREF(__pyx_slice_);
+  __Pyx_GIVEREF(__pyx_slice_);
+  PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_slice_);
+  __pyx_t_1 = __Pyx_PyObject_GetItem(((PyObject *)__pyx_v_dets), __pyx_t_5); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 31, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+  if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 31, __pyx_L1_error)
+  __pyx_t_9 = ((PyArrayObject *)__pyx_t_1);
+  {
+    __Pyx_BufFmt_StackElem __pyx_stack[1];
+    if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_sorted_dets.rcbuffer->pybuffer, (PyObject*)__pyx_t_9, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float32_t, PyBUF_FORMAT| PyBUF_STRIDES, 2, 0, __pyx_stack) == -1)) {
+      __pyx_v_sorted_dets = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_sorted_dets.rcbuffer->pybuffer.buf = NULL;
+      __PYX_ERR(0, 30, __pyx_L1_error)
+    } else {__pyx_pybuffernd_sorted_dets.diminfo[0].strides = __pyx_pybuffernd_sorted_dets.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_sorted_dets.diminfo[0].shape = __pyx_pybuffernd_sorted_dets.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_sorted_dets.diminfo[1].strides = __pyx_pybuffernd_sorted_dets.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_sorted_dets.diminfo[1].shape = __pyx_pybuffernd_sorted_dets.rcbuffer->pybuffer.shape[1];
+    }
+  }
+  __pyx_t_9 = 0;
+  __pyx_v_sorted_dets = ((PyArrayObject *)__pyx_t_1);
+  __pyx_t_1 = 0;
+
+  /* "gpu_nms.pyx":32
+ *     cdef np.ndarray[np.float32_t, ndim=2] \
+ *         sorted_dets = dets[order, :]
+ *     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)             # <<<<<<<<<<<<<<
+ *     keep = keep[:num_out]
+ *     return list(order[keep])
+ */
+  __pyx_t_10 = 0;
+  __pyx_t_11 = -1;
+  if (__pyx_t_10 < 0) {
+    __pyx_t_10 += __pyx_pybuffernd_keep.diminfo[0].shape;
+    if (unlikely(__pyx_t_10 < 0)) __pyx_t_11 = 0;
+  } else if (unlikely(__pyx_t_10 >= __pyx_pybuffernd_keep.diminfo[0].shape)) __pyx_t_11 = 0;
+  if (unlikely(__pyx_t_11 != -1)) {
+    __Pyx_RaiseBufferIndexError(__pyx_t_11);
+    __PYX_ERR(0, 32, __pyx_L1_error)
+  }
+  __pyx_t_12 = 0;
+  __pyx_t_13 = 0;
+  __pyx_t_11 = -1;
+  if (__pyx_t_12 < 0) {
+    __pyx_t_12 += __pyx_pybuffernd_sorted_dets.diminfo[0].shape;
+    if (unlikely(__pyx_t_12 < 0)) __pyx_t_11 = 0;
+  } else if (unlikely(__pyx_t_12 >= __pyx_pybuffernd_sorted_dets.diminfo[0].shape)) __pyx_t_11 = 0;
+  if (__pyx_t_13 < 0) {
+    __pyx_t_13 += __pyx_pybuffernd_sorted_dets.diminfo[1].shape;
+    if (unlikely(__pyx_t_13 < 0)) __pyx_t_11 = 1;
+  } else if (unlikely(__pyx_t_13 >= __pyx_pybuffernd_sorted_dets.diminfo[1].shape)) __pyx_t_11 = 1;
+  if (unlikely(__pyx_t_11 != -1)) {
+    __Pyx_RaiseBufferIndexError(__pyx_t_11);
+    __PYX_ERR(0, 32, __pyx_L1_error)
+  }
+  __pyx_t_14 = __pyx_PyFloat_AsFloat(__pyx_v_thresh); if (unlikely((__pyx_t_14 == (float)-1) && PyErr_Occurred())) __PYX_ERR(0, 32, __pyx_L1_error)
+  _nms((&(*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_int32_t *, __pyx_pybuffernd_keep.rcbuffer->pybuffer.buf, __pyx_t_10, __pyx_pybuffernd_keep.diminfo[0].strides))), (&__pyx_v_num_out), (&(*__Pyx_BufPtrStrided2d(__pyx_t_5numpy_float32_t *, __pyx_pybuffernd_sorted_dets.rcbuffer->pybuffer.buf, __pyx_t_12, __pyx_pybuffernd_sorted_dets.diminfo[0].strides, __pyx_t_13, __pyx_pybuffernd_sorted_dets.diminfo[1].strides))), __pyx_v_boxes_num, __pyx_v_boxes_dim, __pyx_t_14, __pyx_v_device_id);
+
+  /* "gpu_nms.pyx":33
+ *         sorted_dets = dets[order, :]
+ *     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
+ *     keep = keep[:num_out]             # <<<<<<<<<<<<<<
+ *     return list(order[keep])
+ */
+  __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_num_out); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 33, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_5 = PySlice_New(Py_None, __pyx_t_1, Py_None); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 33, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_5);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_t_1 = __Pyx_PyObject_GetItem(((PyObject *)__pyx_v_keep), __pyx_t_5); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 33, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+  if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 33, __pyx_L1_error)
+  __pyx_t_6 = ((PyArrayObject *)__pyx_t_1);
+  {
+    __Pyx_BufFmt_StackElem __pyx_stack[1];
+    __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_keep.rcbuffer->pybuffer);
+    __pyx_t_11 = __Pyx_GetBufferAndValidate(&__pyx_pybuffernd_keep.rcbuffer->pybuffer, (PyObject*)__pyx_t_6, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack);
+    if (unlikely(__pyx_t_11 < 0)) {
+      PyErr_Fetch(&__pyx_t_15, &__pyx_t_16, &__pyx_t_17);
+      if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_keep.rcbuffer->pybuffer, (PyObject*)__pyx_v_keep, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
+        Py_XDECREF(__pyx_t_15); Py_XDECREF(__pyx_t_16); Py_XDECREF(__pyx_t_17);
+        __Pyx_RaiseBufferFallbackError();
+      } else {
+        PyErr_Restore(__pyx_t_15, __pyx_t_16, __pyx_t_17);
+      }
+      __pyx_t_15 = __pyx_t_16 = __pyx_t_17 = 0;
+    }
+    __pyx_pybuffernd_keep.diminfo[0].strides = __pyx_pybuffernd_keep.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_keep.diminfo[0].shape = __pyx_pybuffernd_keep.rcbuffer->pybuffer.shape[0];
+    if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 33, __pyx_L1_error)
+  }
+  __pyx_t_6 = 0;
+  __Pyx_DECREF_SET(__pyx_v_keep, ((PyArrayObject *)__pyx_t_1));
+  __pyx_t_1 = 0;
+
+  /* "gpu_nms.pyx":34
+ *     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
+ *     keep = keep[:num_out]
+ *     return list(order[keep])             # <<<<<<<<<<<<<<
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = __Pyx_PyObject_GetItem(((PyObject *)__pyx_v_order), ((PyObject *)__pyx_v_keep)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 34, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_5 = PySequence_List(__pyx_t_1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 34, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_5);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_r = __pyx_t_5;
+  __pyx_t_5 = 0;
+  goto __pyx_L0;
+
+  /* "gpu_nms.pyx":19
+ *     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
+ * 
+ * def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,             # <<<<<<<<<<<<<<
+ *             np.int32_t device_id=0):
+ *     cdef int boxes_num = dets.shape[0]
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_XDECREF(__pyx_t_4);
+  __Pyx_XDECREF(__pyx_t_5);
+  { PyObject *__pyx_type, *__pyx_value, *__pyx_tb;
+    __Pyx_PyThreadState_declare
+    __Pyx_PyThreadState_assign
+    __Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb);
+    __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_dets.rcbuffer->pybuffer);
+    __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_keep.rcbuffer->pybuffer);
+    __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_order.rcbuffer->pybuffer);
+    __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_scores.rcbuffer->pybuffer);
+    __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_sorted_dets.rcbuffer->pybuffer);
+  __Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);}
+  __Pyx_AddTraceback("gpu_nms.gpu_nms", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  goto __pyx_L2;
+  __pyx_L0:;
+  __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_dets.rcbuffer->pybuffer);
+  __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_keep.rcbuffer->pybuffer);
+  __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_order.rcbuffer->pybuffer);
+  __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_scores.rcbuffer->pybuffer);
+  __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_sorted_dets.rcbuffer->pybuffer);
+  __pyx_L2:;
+  __Pyx_XDECREF((PyObject *)__pyx_v_keep);
+  __Pyx_XDECREF((PyObject *)__pyx_v_scores);
+  __Pyx_XDECREF((PyObject *)__pyx_v_order);
+  __Pyx_XDECREF((PyObject *)__pyx_v_sorted_dets);
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":735
+ * ctypedef npy_cdouble     complex_t
+ * 
+ * cdef inline object PyArray_MultiIterNew1(a):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(1, <void*>a)
+ * 
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__pyx_v_a) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("PyArray_MultiIterNew1", 0);
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":736
+ * 
+ * cdef inline object PyArray_MultiIterNew1(a):
+ *     return PyArray_MultiIterNew(1, <void*>a)             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object PyArray_MultiIterNew2(a, b):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyArray_MultiIterNew(1, ((void *)__pyx_v_a)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 736, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":735
+ * ctypedef npy_cdouble     complex_t
+ * 
+ * cdef inline object PyArray_MultiIterNew1(a):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(1, <void*>a)
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("numpy.PyArray_MultiIterNew1", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":738
+ *     return PyArray_MultiIterNew(1, <void*>a)
+ * 
+ * cdef inline object PyArray_MultiIterNew2(a, b):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(2, <void*>a, <void*>b)
+ * 
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__pyx_v_a, PyObject *__pyx_v_b) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("PyArray_MultiIterNew2", 0);
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":739
+ * 
+ * cdef inline object PyArray_MultiIterNew2(a, b):
+ *     return PyArray_MultiIterNew(2, <void*>a, <void*>b)             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object PyArray_MultiIterNew3(a, b, c):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyArray_MultiIterNew(2, ((void *)__pyx_v_a), ((void *)__pyx_v_b)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 739, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":738
+ *     return PyArray_MultiIterNew(1, <void*>a)
+ * 
+ * cdef inline object PyArray_MultiIterNew2(a, b):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(2, <void*>a, <void*>b)
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("numpy.PyArray_MultiIterNew2", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":741
+ *     return PyArray_MultiIterNew(2, <void*>a, <void*>b)
+ * 
+ * cdef inline object PyArray_MultiIterNew3(a, b, c):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)
+ * 
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("PyArray_MultiIterNew3", 0);
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":742
+ * 
+ * cdef inline object PyArray_MultiIterNew3(a, b, c):
+ *     return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object PyArray_MultiIterNew4(a, b, c, d):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyArray_MultiIterNew(3, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 742, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":741
+ *     return PyArray_MultiIterNew(2, <void*>a, <void*>b)
+ * 
+ * cdef inline object PyArray_MultiIterNew3(a, b, c):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("numpy.PyArray_MultiIterNew3", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":744
+ *     return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)
+ * 
+ * cdef inline object PyArray_MultiIterNew4(a, b, c, d):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)
+ * 
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("PyArray_MultiIterNew4", 0);
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":745
+ * 
+ * cdef inline object PyArray_MultiIterNew4(a, b, c, d):
+ *     return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyArray_MultiIterNew(4, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 745, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":744
+ *     return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)
+ * 
+ * cdef inline object PyArray_MultiIterNew4(a, b, c, d):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("numpy.PyArray_MultiIterNew4", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":747
+ *     return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)
+ * 
+ * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)
+ * 
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d, PyObject *__pyx_v_e) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("PyArray_MultiIterNew5", 0);
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":748
+ * 
+ * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):
+ *     return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline tuple PyDataType_SHAPE(dtype d):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyArray_MultiIterNew(5, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d), ((void *)__pyx_v_e)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 748, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":747
+ *     return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)
+ * 
+ * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("numpy.PyArray_MultiIterNew5", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":750
+ *     return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)
+ * 
+ * cdef inline tuple PyDataType_SHAPE(dtype d):             # <<<<<<<<<<<<<<
+ *     if PyDataType_HASSUBARRAY(d):
+ *         return <tuple>d.subarray.shape
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__pyx_v_d) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  __Pyx_RefNannySetupContext("PyDataType_SHAPE", 0);
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":751
+ * 
+ * cdef inline tuple PyDataType_SHAPE(dtype d):
+ *     if PyDataType_HASSUBARRAY(d):             # <<<<<<<<<<<<<<
+ *         return <tuple>d.subarray.shape
+ *     else:
+ */
+  __pyx_t_1 = (PyDataType_HASSUBARRAY(__pyx_v_d) != 0);
+  if (__pyx_t_1) {
+
+    /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":752
+ * cdef inline tuple PyDataType_SHAPE(dtype d):
+ *     if PyDataType_HASSUBARRAY(d):
+ *         return <tuple>d.subarray.shape             # <<<<<<<<<<<<<<
+ *     else:
+ *         return ()
+ */
+    __Pyx_XDECREF(__pyx_r);
+    __Pyx_INCREF(((PyObject*)__pyx_v_d->subarray->shape));
+    __pyx_r = ((PyObject*)__pyx_v_d->subarray->shape);
+    goto __pyx_L0;
+
+    /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":751
+ * 
+ * cdef inline tuple PyDataType_SHAPE(dtype d):
+ *     if PyDataType_HASSUBARRAY(d):             # <<<<<<<<<<<<<<
+ *         return <tuple>d.subarray.shape
+ *     else:
+ */
+  }
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":754
+ *         return <tuple>d.subarray.shape
+ *     else:
+ *         return ()             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  /*else*/ {
+    __Pyx_XDECREF(__pyx_r);
+    __Pyx_INCREF(__pyx_empty_tuple);
+    __pyx_r = __pyx_empty_tuple;
+    goto __pyx_L0;
+  }
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":750
+ *     return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)
+ * 
+ * cdef inline tuple PyDataType_SHAPE(dtype d):             # <<<<<<<<<<<<<<
+ *     if PyDataType_HASSUBARRAY(d):
+ *         return <tuple>d.subarray.shape
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":929
+ *     int _import_umath() except -1
+ * 
+ * cdef inline void set_array_base(ndarray arr, object base):             # <<<<<<<<<<<<<<
+ *     Py_INCREF(base) # important to do this before stealing the reference below!
+ *     PyArray_SetBaseObject(arr, base)
+ */
+
+static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_arr, PyObject *__pyx_v_base) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("set_array_base", 0);
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":930
+ * 
+ * cdef inline void set_array_base(ndarray arr, object base):
+ *     Py_INCREF(base) # important to do this before stealing the reference below!             # <<<<<<<<<<<<<<
+ *     PyArray_SetBaseObject(arr, base)
+ * 
+ */
+  Py_INCREF(__pyx_v_base);
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":931
+ * cdef inline void set_array_base(ndarray arr, object base):
+ *     Py_INCREF(base) # important to do this before stealing the reference below!
+ *     PyArray_SetBaseObject(arr, base)             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object get_array_base(ndarray arr):
+ */
+  (void)(PyArray_SetBaseObject(__pyx_v_arr, __pyx_v_base));
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":929
+ *     int _import_umath() except -1
+ * 
+ * cdef inline void set_array_base(ndarray arr, object base):             # <<<<<<<<<<<<<<
+ *     Py_INCREF(base) # important to do this before stealing the reference below!
+ *     PyArray_SetBaseObject(arr, base)
+ */
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+}
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":933
+ *     PyArray_SetBaseObject(arr, base)
+ * 
+ * cdef inline object get_array_base(ndarray arr):             # <<<<<<<<<<<<<<
+ *     base = PyArray_BASE(arr)
+ *     if base is NULL:
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__pyx_v_arr) {
+  PyObject *__pyx_v_base;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  __Pyx_RefNannySetupContext("get_array_base", 0);
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":934
+ * 
+ * cdef inline object get_array_base(ndarray arr):
+ *     base = PyArray_BASE(arr)             # <<<<<<<<<<<<<<
+ *     if base is NULL:
+ *         return None
+ */
+  __pyx_v_base = PyArray_BASE(__pyx_v_arr);
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":935
+ * cdef inline object get_array_base(ndarray arr):
+ *     base = PyArray_BASE(arr)
+ *     if base is NULL:             # <<<<<<<<<<<<<<
+ *         return None
+ *     return <object>base
+ */
+  __pyx_t_1 = ((__pyx_v_base == NULL) != 0);
+  if (__pyx_t_1) {
+
+    /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":936
+ *     base = PyArray_BASE(arr)
+ *     if base is NULL:
+ *         return None             # <<<<<<<<<<<<<<
+ *     return <object>base
+ * 
+ */
+    __Pyx_XDECREF(__pyx_r);
+    __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+    goto __pyx_L0;
+
+    /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":935
+ * cdef inline object get_array_base(ndarray arr):
+ *     base = PyArray_BASE(arr)
+ *     if base is NULL:             # <<<<<<<<<<<<<<
+ *         return None
+ *     return <object>base
+ */
+  }
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":937
+ *     if base is NULL:
+ *         return None
+ *     return <object>base             # <<<<<<<<<<<<<<
+ * 
+ * # Versions of the import_* functions which are more suitable for
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __Pyx_INCREF(((PyObject *)__pyx_v_base));
+  __pyx_r = ((PyObject *)__pyx_v_base);
+  goto __pyx_L0;
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":933
+ *     PyArray_SetBaseObject(arr, base)
+ * 
+ * cdef inline object get_array_base(ndarray arr):             # <<<<<<<<<<<<<<
+ *     base = PyArray_BASE(arr)
+ *     if base is NULL:
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":941
+ * # Versions of the import_* functions which are more suitable for
+ * # Cython code.
+ * cdef inline int import_array() except -1:             # <<<<<<<<<<<<<<
+ *     try:
+ *         __pyx_import_array()
+ */
+
+static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) {
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  PyObject *__pyx_t_2 = NULL;
+  PyObject *__pyx_t_3 = NULL;
+  int __pyx_t_4;
+  PyObject *__pyx_t_5 = NULL;
+  PyObject *__pyx_t_6 = NULL;
+  PyObject *__pyx_t_7 = NULL;
+  PyObject *__pyx_t_8 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("import_array", 0);
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":942
+ * # Cython code.
+ * cdef inline int import_array() except -1:
+ *     try:             # <<<<<<<<<<<<<<
+ *         __pyx_import_array()
+ *     except Exception:
+ */
+  {
+    __Pyx_PyThreadState_declare
+    __Pyx_PyThreadState_assign
+    __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3);
+    __Pyx_XGOTREF(__pyx_t_1);
+    __Pyx_XGOTREF(__pyx_t_2);
+    __Pyx_XGOTREF(__pyx_t_3);
+    /*try:*/ {
+
+      /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":943
+ * cdef inline int import_array() except -1:
+ *     try:
+ *         __pyx_import_array()             # <<<<<<<<<<<<<<
+ *     except Exception:
+ *         raise ImportError("numpy.core.multiarray failed to import")
+ */
+      __pyx_t_4 = _import_array(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 943, __pyx_L3_error)
+
+      /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":942
+ * # Cython code.
+ * cdef inline int import_array() except -1:
+ *     try:             # <<<<<<<<<<<<<<
+ *         __pyx_import_array()
+ *     except Exception:
+ */
+    }
+    __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
+    __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0;
+    __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
+    goto __pyx_L8_try_end;
+    __pyx_L3_error:;
+
+    /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":944
+ *     try:
+ *         __pyx_import_array()
+ *     except Exception:             # <<<<<<<<<<<<<<
+ *         raise ImportError("numpy.core.multiarray failed to import")
+ * 
+ */
+    __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0])));
+    if (__pyx_t_4) {
+      __Pyx_AddTraceback("numpy.import_array", __pyx_clineno, __pyx_lineno, __pyx_filename);
+      if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(1, 944, __pyx_L5_except_error)
+      __Pyx_GOTREF(__pyx_t_5);
+      __Pyx_GOTREF(__pyx_t_6);
+      __Pyx_GOTREF(__pyx_t_7);
+
+      /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":945
+ *         __pyx_import_array()
+ *     except Exception:
+ *         raise ImportError("numpy.core.multiarray failed to import")             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline int import_umath() except -1:
+ */
+      __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 945, __pyx_L5_except_error)
+      __Pyx_GOTREF(__pyx_t_8);
+      __Pyx_Raise(__pyx_t_8, 0, 0, 0);
+      __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
+      __PYX_ERR(1, 945, __pyx_L5_except_error)
+    }
+    goto __pyx_L5_except_error;
+    __pyx_L5_except_error:;
+
+    /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":942
+ * # Cython code.
+ * cdef inline int import_array() except -1:
+ *     try:             # <<<<<<<<<<<<<<
+ *         __pyx_import_array()
+ *     except Exception:
+ */
+    __Pyx_XGIVEREF(__pyx_t_1);
+    __Pyx_XGIVEREF(__pyx_t_2);
+    __Pyx_XGIVEREF(__pyx_t_3);
+    __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3);
+    goto __pyx_L1_error;
+    __pyx_L8_try_end:;
+  }
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":941
+ * # Versions of the import_* functions which are more suitable for
+ * # Cython code.
+ * cdef inline int import_array() except -1:             # <<<<<<<<<<<<<<
+ *     try:
+ *         __pyx_import_array()
+ */
+
+  /* function exit code */
+  __pyx_r = 0;
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_XDECREF(__pyx_t_6);
+  __Pyx_XDECREF(__pyx_t_7);
+  __Pyx_XDECREF(__pyx_t_8);
+  __Pyx_AddTraceback("numpy.import_array", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = -1;
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":947
+ *         raise ImportError("numpy.core.multiarray failed to import")
+ * 
+ * cdef inline int import_umath() except -1:             # <<<<<<<<<<<<<<
+ *     try:
+ *         _import_umath()
+ */
+
+static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) {
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  PyObject *__pyx_t_2 = NULL;
+  PyObject *__pyx_t_3 = NULL;
+  int __pyx_t_4;
+  PyObject *__pyx_t_5 = NULL;
+  PyObject *__pyx_t_6 = NULL;
+  PyObject *__pyx_t_7 = NULL;
+  PyObject *__pyx_t_8 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("import_umath", 0);
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":948
+ * 
+ * cdef inline int import_umath() except -1:
+ *     try:             # <<<<<<<<<<<<<<
+ *         _import_umath()
+ *     except Exception:
+ */
+  {
+    __Pyx_PyThreadState_declare
+    __Pyx_PyThreadState_assign
+    __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3);
+    __Pyx_XGOTREF(__pyx_t_1);
+    __Pyx_XGOTREF(__pyx_t_2);
+    __Pyx_XGOTREF(__pyx_t_3);
+    /*try:*/ {
+
+      /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":949
+ * cdef inline int import_umath() except -1:
+ *     try:
+ *         _import_umath()             # <<<<<<<<<<<<<<
+ *     except Exception:
+ *         raise ImportError("numpy.core.umath failed to import")
+ */
+      __pyx_t_4 = _import_umath(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 949, __pyx_L3_error)
+
+      /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":948
+ * 
+ * cdef inline int import_umath() except -1:
+ *     try:             # <<<<<<<<<<<<<<
+ *         _import_umath()
+ *     except Exception:
+ */
+    }
+    __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
+    __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0;
+    __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
+    goto __pyx_L8_try_end;
+    __pyx_L3_error:;
+
+    /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":950
+ *     try:
+ *         _import_umath()
+ *     except Exception:             # <<<<<<<<<<<<<<
+ *         raise ImportError("numpy.core.umath failed to import")
+ * 
+ */
+    __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0])));
+    if (__pyx_t_4) {
+      __Pyx_AddTraceback("numpy.import_umath", __pyx_clineno, __pyx_lineno, __pyx_filename);
+      if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(1, 950, __pyx_L5_except_error)
+      __Pyx_GOTREF(__pyx_t_5);
+      __Pyx_GOTREF(__pyx_t_6);
+      __Pyx_GOTREF(__pyx_t_7);
+
+      /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":951
+ *         _import_umath()
+ *     except Exception:
+ *         raise ImportError("numpy.core.umath failed to import")             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline int import_ufunc() except -1:
+ */
+      __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple__5, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 951, __pyx_L5_except_error)
+      __Pyx_GOTREF(__pyx_t_8);
+      __Pyx_Raise(__pyx_t_8, 0, 0, 0);
+      __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
+      __PYX_ERR(1, 951, __pyx_L5_except_error)
+    }
+    goto __pyx_L5_except_error;
+    __pyx_L5_except_error:;
+
+    /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":948
+ * 
+ * cdef inline int import_umath() except -1:
+ *     try:             # <<<<<<<<<<<<<<
+ *         _import_umath()
+ *     except Exception:
+ */
+    __Pyx_XGIVEREF(__pyx_t_1);
+    __Pyx_XGIVEREF(__pyx_t_2);
+    __Pyx_XGIVEREF(__pyx_t_3);
+    __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3);
+    goto __pyx_L1_error;
+    __pyx_L8_try_end:;
+  }
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":947
+ *         raise ImportError("numpy.core.multiarray failed to import")
+ * 
+ * cdef inline int import_umath() except -1:             # <<<<<<<<<<<<<<
+ *     try:
+ *         _import_umath()
+ */
+
+  /* function exit code */
+  __pyx_r = 0;
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_XDECREF(__pyx_t_6);
+  __Pyx_XDECREF(__pyx_t_7);
+  __Pyx_XDECREF(__pyx_t_8);
+  __Pyx_AddTraceback("numpy.import_umath", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = -1;
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":953
+ *         raise ImportError("numpy.core.umath failed to import")
+ * 
+ * cdef inline int import_ufunc() except -1:             # <<<<<<<<<<<<<<
+ *     try:
+ *         _import_umath()
+ */
+
+static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) {
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  PyObject *__pyx_t_2 = NULL;
+  PyObject *__pyx_t_3 = NULL;
+  int __pyx_t_4;
+  PyObject *__pyx_t_5 = NULL;
+  PyObject *__pyx_t_6 = NULL;
+  PyObject *__pyx_t_7 = NULL;
+  PyObject *__pyx_t_8 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("import_ufunc", 0);
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":954
+ * 
+ * cdef inline int import_ufunc() except -1:
+ *     try:             # <<<<<<<<<<<<<<
+ *         _import_umath()
+ *     except Exception:
+ */
+  {
+    __Pyx_PyThreadState_declare
+    __Pyx_PyThreadState_assign
+    __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3);
+    __Pyx_XGOTREF(__pyx_t_1);
+    __Pyx_XGOTREF(__pyx_t_2);
+    __Pyx_XGOTREF(__pyx_t_3);
+    /*try:*/ {
+
+      /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":955
+ * cdef inline int import_ufunc() except -1:
+ *     try:
+ *         _import_umath()             # <<<<<<<<<<<<<<
+ *     except Exception:
+ *         raise ImportError("numpy.core.umath failed to import")
+ */
+      __pyx_t_4 = _import_umath(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 955, __pyx_L3_error)
+
+      /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":954
+ * 
+ * cdef inline int import_ufunc() except -1:
+ *     try:             # <<<<<<<<<<<<<<
+ *         _import_umath()
+ *     except Exception:
+ */
+    }
+    __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
+    __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0;
+    __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
+    goto __pyx_L8_try_end;
+    __pyx_L3_error:;
+
+    /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":956
+ *     try:
+ *         _import_umath()
+ *     except Exception:             # <<<<<<<<<<<<<<
+ *         raise ImportError("numpy.core.umath failed to import")
+ * 
+ */
+    __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0])));
+    if (__pyx_t_4) {
+      __Pyx_AddTraceback("numpy.import_ufunc", __pyx_clineno, __pyx_lineno, __pyx_filename);
+      if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(1, 956, __pyx_L5_except_error)
+      __Pyx_GOTREF(__pyx_t_5);
+      __Pyx_GOTREF(__pyx_t_6);
+      __Pyx_GOTREF(__pyx_t_7);
+
+      /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":957
+ *         _import_umath()
+ *     except Exception:
+ *         raise ImportError("numpy.core.umath failed to import")             # <<<<<<<<<<<<<<
+ * 
+ * cdef extern from *:
+ */
+      __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple__5, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 957, __pyx_L5_except_error)
+      __Pyx_GOTREF(__pyx_t_8);
+      __Pyx_Raise(__pyx_t_8, 0, 0, 0);
+      __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
+      __PYX_ERR(1, 957, __pyx_L5_except_error)
+    }
+    goto __pyx_L5_except_error;
+    __pyx_L5_except_error:;
+
+    /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":954
+ * 
+ * cdef inline int import_ufunc() except -1:
+ *     try:             # <<<<<<<<<<<<<<
+ *         _import_umath()
+ *     except Exception:
+ */
+    __Pyx_XGIVEREF(__pyx_t_1);
+    __Pyx_XGIVEREF(__pyx_t_2);
+    __Pyx_XGIVEREF(__pyx_t_3);
+    __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3);
+    goto __pyx_L1_error;
+    __pyx_L8_try_end:;
+  }
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":953
+ *         raise ImportError("numpy.core.umath failed to import")
+ * 
+ * cdef inline int import_ufunc() except -1:             # <<<<<<<<<<<<<<
+ *     try:
+ *         _import_umath()
+ */
+
+  /* function exit code */
+  __pyx_r = 0;
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_XDECREF(__pyx_t_6);
+  __Pyx_XDECREF(__pyx_t_7);
+  __Pyx_XDECREF(__pyx_t_8);
+  __Pyx_AddTraceback("numpy.import_ufunc", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = -1;
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":967
+ * 
+ * 
+ * cdef inline bint is_timedelta64_object(object obj):             # <<<<<<<<<<<<<<
+ *     """
+ *     Cython equivalent of `isinstance(obj, np.timedelta64)`
+ */
+
+static CYTHON_INLINE int __pyx_f_5numpy_is_timedelta64_object(PyObject *__pyx_v_obj) {
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("is_timedelta64_object", 0);
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":979
+ *     bool
+ *     """
+ *     return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type)             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_r = PyObject_TypeCheck(__pyx_v_obj, (&PyTimedeltaArrType_Type));
+  goto __pyx_L0;
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":967
+ * 
+ * 
+ * cdef inline bint is_timedelta64_object(object obj):             # <<<<<<<<<<<<<<
+ *     """
+ *     Cython equivalent of `isinstance(obj, np.timedelta64)`
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":982
+ * 
+ * 
+ * cdef inline bint is_datetime64_object(object obj):             # <<<<<<<<<<<<<<
+ *     """
+ *     Cython equivalent of `isinstance(obj, np.datetime64)`
+ */
+
+static CYTHON_INLINE int __pyx_f_5numpy_is_datetime64_object(PyObject *__pyx_v_obj) {
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("is_datetime64_object", 0);
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":994
+ *     bool
+ *     """
+ *     return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type)             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_r = PyObject_TypeCheck(__pyx_v_obj, (&PyDatetimeArrType_Type));
+  goto __pyx_L0;
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":982
+ * 
+ * 
+ * cdef inline bint is_datetime64_object(object obj):             # <<<<<<<<<<<<<<
+ *     """
+ *     Cython equivalent of `isinstance(obj, np.datetime64)`
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":997
+ * 
+ * 
+ * cdef inline npy_datetime get_datetime64_value(object obj) nogil:             # <<<<<<<<<<<<<<
+ *     """
+ *     returns the int64 value underlying scalar numpy datetime64 object
+ */
+
+static CYTHON_INLINE npy_datetime __pyx_f_5numpy_get_datetime64_value(PyObject *__pyx_v_obj) {
+  npy_datetime __pyx_r;
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":1004
+ *     also needed.  That can be found using `get_datetime64_unit`.
+ *     """
+ *     return (<PyDatetimeScalarObject*>obj).obval             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_r = ((PyDatetimeScalarObject *)__pyx_v_obj)->obval;
+  goto __pyx_L0;
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":997
+ * 
+ * 
+ * cdef inline npy_datetime get_datetime64_value(object obj) nogil:             # <<<<<<<<<<<<<<
+ *     """
+ *     returns the int64 value underlying scalar numpy datetime64 object
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  return __pyx_r;
+}
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":1007
+ * 
+ * 
+ * cdef inline npy_timedelta get_timedelta64_value(object obj) nogil:             # <<<<<<<<<<<<<<
+ *     """
+ *     returns the int64 value underlying scalar numpy timedelta64 object
+ */
+
+static CYTHON_INLINE npy_timedelta __pyx_f_5numpy_get_timedelta64_value(PyObject *__pyx_v_obj) {
+  npy_timedelta __pyx_r;
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":1011
+ *     returns the int64 value underlying scalar numpy timedelta64 object
+ *     """
+ *     return (<PyTimedeltaScalarObject*>obj).obval             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_r = ((PyTimedeltaScalarObject *)__pyx_v_obj)->obval;
+  goto __pyx_L0;
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":1007
+ * 
+ * 
+ * cdef inline npy_timedelta get_timedelta64_value(object obj) nogil:             # <<<<<<<<<<<<<<
+ *     """
+ *     returns the int64 value underlying scalar numpy timedelta64 object
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  return __pyx_r;
+}
+
+/* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":1014
+ * 
+ * 
+ * cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil:             # <<<<<<<<<<<<<<
+ *     """
+ *     returns the unit part of the dtype for a numpy datetime64 object.
+ */
+
+static CYTHON_INLINE NPY_DATETIMEUNIT __pyx_f_5numpy_get_datetime64_unit(PyObject *__pyx_v_obj) {
+  NPY_DATETIMEUNIT __pyx_r;
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":1018
+ *     returns the unit part of the dtype for a numpy datetime64 object.
+ *     """
+ *     return <NPY_DATETIMEUNIT>(<PyDatetimeScalarObject*>obj).obmeta.base             # <<<<<<<<<<<<<<
+ */
+  __pyx_r = ((NPY_DATETIMEUNIT)((PyDatetimeScalarObject *)__pyx_v_obj)->obmeta.base);
+  goto __pyx_L0;
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":1014
+ * 
+ * 
+ * cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil:             # <<<<<<<<<<<<<<
+ *     """
+ *     returns the unit part of the dtype for a numpy datetime64 object.
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  return __pyx_r;
+}
+
+static PyMethodDef __pyx_methods[] = {
+  {0, 0, 0, 0}
+};
+
+#if PY_MAJOR_VERSION >= 3
+#if CYTHON_PEP489_MULTI_PHASE_INIT
+static PyObject* __pyx_pymod_create(PyObject *spec, PyModuleDef *def); /*proto*/
+static int __pyx_pymod_exec_gpu_nms(PyObject* module); /*proto*/
+static PyModuleDef_Slot __pyx_moduledef_slots[] = {
+  {Py_mod_create, (void*)__pyx_pymod_create},
+  {Py_mod_exec, (void*)__pyx_pymod_exec_gpu_nms},
+  {0, NULL}
+};
+#endif
+
+static struct PyModuleDef __pyx_moduledef = {
+    PyModuleDef_HEAD_INIT,
+    "gpu_nms",
+    0, /* m_doc */
+  #if CYTHON_PEP489_MULTI_PHASE_INIT
+    0, /* m_size */
+  #else
+    -1, /* m_size */
+  #endif
+    __pyx_methods /* m_methods */,
+  #if CYTHON_PEP489_MULTI_PHASE_INIT
+    __pyx_moduledef_slots, /* m_slots */
+  #else
+    NULL, /* m_reload */
+  #endif
+    NULL, /* m_traverse */
+    NULL, /* m_clear */
+    NULL /* m_free */
+};
+#endif
+#ifndef CYTHON_SMALL_CODE
+#if defined(__clang__)
+    #define CYTHON_SMALL_CODE
+#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
+    #define CYTHON_SMALL_CODE __attribute__((cold))
+#else
+    #define CYTHON_SMALL_CODE
+#endif
+#endif
+
+static __Pyx_StringTabEntry __pyx_string_tab[] = {
+  {&__pyx_n_s_ImportError, __pyx_k_ImportError, sizeof(__pyx_k_ImportError), 0, 0, 1, 1},
+  {&__pyx_n_s_argsort, __pyx_k_argsort, sizeof(__pyx_k_argsort), 0, 0, 1, 1},
+  {&__pyx_n_s_astype, __pyx_k_astype, sizeof(__pyx_k_astype), 0, 0, 1, 1},
+  {&__pyx_n_s_boxes_dim, __pyx_k_boxes_dim, sizeof(__pyx_k_boxes_dim), 0, 0, 1, 1},
+  {&__pyx_n_s_boxes_num, __pyx_k_boxes_num, sizeof(__pyx_k_boxes_num), 0, 0, 1, 1},
+  {&__pyx_n_s_cline_in_traceback, __pyx_k_cline_in_traceback, sizeof(__pyx_k_cline_in_traceback), 0, 0, 1, 1},
+  {&__pyx_n_s_dets, __pyx_k_dets, sizeof(__pyx_k_dets), 0, 0, 1, 1},
+  {&__pyx_n_s_device_id, __pyx_k_device_id, sizeof(__pyx_k_device_id), 0, 0, 1, 1},
+  {&__pyx_n_s_dtype, __pyx_k_dtype, sizeof(__pyx_k_dtype), 0, 0, 1, 1},
+  {&__pyx_n_s_gpu_nms, __pyx_k_gpu_nms, sizeof(__pyx_k_gpu_nms), 0, 0, 1, 1},
+  {&__pyx_kp_s_gpu_nms_pyx, __pyx_k_gpu_nms_pyx, sizeof(__pyx_k_gpu_nms_pyx), 0, 0, 1, 0},
+  {&__pyx_n_s_import, __pyx_k_import, sizeof(__pyx_k_import), 0, 0, 1, 1},
+  {&__pyx_n_s_int32, __pyx_k_int32, sizeof(__pyx_k_int32), 0, 0, 1, 1},
+  {&__pyx_n_s_keep, __pyx_k_keep, sizeof(__pyx_k_keep), 0, 0, 1, 1},
+  {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1},
+  {&__pyx_n_s_name, __pyx_k_name, sizeof(__pyx_k_name), 0, 0, 1, 1},
+  {&__pyx_n_s_np, __pyx_k_np, sizeof(__pyx_k_np), 0, 0, 1, 1},
+  {&__pyx_n_s_num_out, __pyx_k_num_out, sizeof(__pyx_k_num_out), 0, 0, 1, 1},
+  {&__pyx_n_s_numpy, __pyx_k_numpy, sizeof(__pyx_k_numpy), 0, 0, 1, 1},
+  {&__pyx_kp_s_numpy_core_multiarray_failed_to, __pyx_k_numpy_core_multiarray_failed_to, sizeof(__pyx_k_numpy_core_multiarray_failed_to), 0, 0, 1, 0},
+  {&__pyx_kp_s_numpy_core_umath_failed_to_impor, __pyx_k_numpy_core_umath_failed_to_impor, sizeof(__pyx_k_numpy_core_umath_failed_to_impor), 0, 0, 1, 0},
+  {&__pyx_n_s_order, __pyx_k_order, sizeof(__pyx_k_order), 0, 0, 1, 1},
+  {&__pyx_n_s_scores, __pyx_k_scores, sizeof(__pyx_k_scores), 0, 0, 1, 1},
+  {&__pyx_n_s_sorted_dets, __pyx_k_sorted_dets, sizeof(__pyx_k_sorted_dets), 0, 0, 1, 1},
+  {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1},
+  {&__pyx_n_s_thresh, __pyx_k_thresh, sizeof(__pyx_k_thresh), 0, 0, 1, 1},
+  {&__pyx_n_s_zeros, __pyx_k_zeros, sizeof(__pyx_k_zeros), 0, 0, 1, 1},
+  {0, 0, 0, 0, 0, 0, 0}
+};
+static CYTHON_SMALL_CODE int __Pyx_InitCachedBuiltins(void) {
+  __pyx_builtin_ImportError = __Pyx_GetBuiltinName(__pyx_n_s_ImportError); if (!__pyx_builtin_ImportError) __PYX_ERR(1, 945, __pyx_L1_error)
+  return 0;
+  __pyx_L1_error:;
+  return -1;
+}
+
+static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0);
+
+  /* "gpu_nms.pyx":27
+ *         keep = np.zeros(boxes_num, dtype=np.int32)
+ *     cdef np.ndarray[np.float32_t, ndim=1] \
+ *         scores = dets[:, 4]             # <<<<<<<<<<<<<<
+ *     cdef np.ndarray[np.int32_t, ndim=1] \
+ *         order = scores.argsort()[::-1].astype(np.int32)
+ */
+  __pyx_slice_ = PySlice_New(Py_None, Py_None, Py_None); if (unlikely(!__pyx_slice_)) __PYX_ERR(0, 27, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_slice_);
+  __Pyx_GIVEREF(__pyx_slice_);
+  __pyx_tuple__2 = PyTuple_Pack(2, __pyx_slice_, __pyx_int_4); if (unlikely(!__pyx_tuple__2)) __PYX_ERR(0, 27, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__2);
+  __Pyx_GIVEREF(__pyx_tuple__2);
+
+  /* "gpu_nms.pyx":29
+ *         scores = dets[:, 4]
+ *     cdef np.ndarray[np.int32_t, ndim=1] \
+ *         order = scores.argsort()[::-1].astype(np.int32)             # <<<<<<<<<<<<<<
+ *     cdef np.ndarray[np.float32_t, ndim=2] \
+ *         sorted_dets = dets[order, :]
+ */
+  __pyx_slice__3 = PySlice_New(Py_None, Py_None, __pyx_int_neg_1); if (unlikely(!__pyx_slice__3)) __PYX_ERR(0, 29, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_slice__3);
+  __Pyx_GIVEREF(__pyx_slice__3);
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":945
+ *         __pyx_import_array()
+ *     except Exception:
+ *         raise ImportError("numpy.core.multiarray failed to import")             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline int import_umath() except -1:
+ */
+  __pyx_tuple__4 = PyTuple_Pack(1, __pyx_kp_s_numpy_core_multiarray_failed_to); if (unlikely(!__pyx_tuple__4)) __PYX_ERR(1, 945, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__4);
+  __Pyx_GIVEREF(__pyx_tuple__4);
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":951
+ *         _import_umath()
+ *     except Exception:
+ *         raise ImportError("numpy.core.umath failed to import")             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline int import_ufunc() except -1:
+ */
+  __pyx_tuple__5 = PyTuple_Pack(1, __pyx_kp_s_numpy_core_umath_failed_to_impor); if (unlikely(!__pyx_tuple__5)) __PYX_ERR(1, 951, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__5);
+  __Pyx_GIVEREF(__pyx_tuple__5);
+
+  /* "gpu_nms.pyx":19
+ *     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
+ * 
+ * def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,             # <<<<<<<<<<<<<<
+ *             np.int32_t device_id=0):
+ *     cdef int boxes_num = dets.shape[0]
+ */
+  __pyx_tuple__6 = PyTuple_Pack(10, __pyx_n_s_dets, __pyx_n_s_thresh, __pyx_n_s_device_id, __pyx_n_s_boxes_num, __pyx_n_s_boxes_dim, __pyx_n_s_num_out, __pyx_n_s_keep, __pyx_n_s_scores, __pyx_n_s_order, __pyx_n_s_sorted_dets); if (unlikely(!__pyx_tuple__6)) __PYX_ERR(0, 19, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__6);
+  __Pyx_GIVEREF(__pyx_tuple__6);
+  __pyx_codeobj__7 = (PyObject*)__Pyx_PyCode_New(3, 0, 10, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__6, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_gpu_nms_pyx, __pyx_n_s_gpu_nms, 19, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__7)) __PYX_ERR(0, 19, __pyx_L1_error)
+  __Pyx_RefNannyFinishContext();
+  return 0;
+  __pyx_L1_error:;
+  __Pyx_RefNannyFinishContext();
+  return -1;
+}
+
+static CYTHON_SMALL_CODE int __Pyx_InitGlobals(void) {
+  if (__Pyx_InitStrings(__pyx_string_tab) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  __pyx_int_4 = PyInt_FromLong(4); if (unlikely(!__pyx_int_4)) __PYX_ERR(0, 1, __pyx_L1_error)
+  __pyx_int_neg_1 = PyInt_FromLong(-1); if (unlikely(!__pyx_int_neg_1)) __PYX_ERR(0, 1, __pyx_L1_error)
+  return 0;
+  __pyx_L1_error:;
+  return -1;
+}
+
+static CYTHON_SMALL_CODE int __Pyx_modinit_global_init_code(void); /*proto*/
+static CYTHON_SMALL_CODE int __Pyx_modinit_variable_export_code(void); /*proto*/
+static CYTHON_SMALL_CODE int __Pyx_modinit_function_export_code(void); /*proto*/
+static CYTHON_SMALL_CODE int __Pyx_modinit_type_init_code(void); /*proto*/
+static CYTHON_SMALL_CODE int __Pyx_modinit_type_import_code(void); /*proto*/
+static CYTHON_SMALL_CODE int __Pyx_modinit_variable_import_code(void); /*proto*/
+static CYTHON_SMALL_CODE int __Pyx_modinit_function_import_code(void); /*proto*/
+
+static int __Pyx_modinit_global_init_code(void) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__Pyx_modinit_global_init_code", 0);
+  /*--- Global init code ---*/
+  __Pyx_RefNannyFinishContext();
+  return 0;
+}
+
+static int __Pyx_modinit_variable_export_code(void) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__Pyx_modinit_variable_export_code", 0);
+  /*--- Variable export code ---*/
+  __Pyx_RefNannyFinishContext();
+  return 0;
+}
+
+static int __Pyx_modinit_function_export_code(void) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__Pyx_modinit_function_export_code", 0);
+  /*--- Function export code ---*/
+  __Pyx_RefNannyFinishContext();
+  return 0;
+}
+
+static int __Pyx_modinit_type_init_code(void) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__Pyx_modinit_type_init_code", 0);
+  /*--- Type init code ---*/
+  __Pyx_RefNannyFinishContext();
+  return 0;
+}
+
+static int __Pyx_modinit_type_import_code(void) {
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__Pyx_modinit_type_import_code", 0);
+  /*--- Type import code ---*/
+  __pyx_t_1 = PyImport_ImportModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 9, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_ptype_7cpython_4type_type = __Pyx_ImportType(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "type", 
+  #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000
+  sizeof(PyTypeObject),
+  #else
+  sizeof(PyHeapTypeObject),
+  #endif
+  __Pyx_ImportType_CheckSize_Warn);
+   if (!__pyx_ptype_7cpython_4type_type) __PYX_ERR(2, 9, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_t_1 = PyImport_ImportModule("numpy"); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 200, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_ptype_5numpy_dtype = __Pyx_ImportType(__pyx_t_1, "numpy", "dtype", sizeof(PyArray_Descr), __Pyx_ImportType_CheckSize_Ignore);
+   if (!__pyx_ptype_5numpy_dtype) __PYX_ERR(1, 200, __pyx_L1_error)
+  __pyx_ptype_5numpy_flatiter = __Pyx_ImportType(__pyx_t_1, "numpy", "flatiter", sizeof(PyArrayIterObject), __Pyx_ImportType_CheckSize_Ignore);
+   if (!__pyx_ptype_5numpy_flatiter) __PYX_ERR(1, 223, __pyx_L1_error)
+  __pyx_ptype_5numpy_broadcast = __Pyx_ImportType(__pyx_t_1, "numpy", "broadcast", sizeof(PyArrayMultiIterObject), __Pyx_ImportType_CheckSize_Ignore);
+   if (!__pyx_ptype_5numpy_broadcast) __PYX_ERR(1, 227, __pyx_L1_error)
+  __pyx_ptype_5numpy_ndarray = __Pyx_ImportType(__pyx_t_1, "numpy", "ndarray", sizeof(PyArrayObject), __Pyx_ImportType_CheckSize_Ignore);
+   if (!__pyx_ptype_5numpy_ndarray) __PYX_ERR(1, 239, __pyx_L1_error)
+  __pyx_ptype_5numpy_generic = __Pyx_ImportType(__pyx_t_1, "numpy", "generic", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn);
+   if (!__pyx_ptype_5numpy_generic) __PYX_ERR(1, 771, __pyx_L1_error)
+  __pyx_ptype_5numpy_number = __Pyx_ImportType(__pyx_t_1, "numpy", "number", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn);
+   if (!__pyx_ptype_5numpy_number) __PYX_ERR(1, 773, __pyx_L1_error)
+  __pyx_ptype_5numpy_integer = __Pyx_ImportType(__pyx_t_1, "numpy", "integer", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn);
+   if (!__pyx_ptype_5numpy_integer) __PYX_ERR(1, 775, __pyx_L1_error)
+  __pyx_ptype_5numpy_signedinteger = __Pyx_ImportType(__pyx_t_1, "numpy", "signedinteger", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn);
+   if (!__pyx_ptype_5numpy_signedinteger) __PYX_ERR(1, 777, __pyx_L1_error)
+  __pyx_ptype_5numpy_unsignedinteger = __Pyx_ImportType(__pyx_t_1, "numpy", "unsignedinteger", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn);
+   if (!__pyx_ptype_5numpy_unsignedinteger) __PYX_ERR(1, 779, __pyx_L1_error)
+  __pyx_ptype_5numpy_inexact = __Pyx_ImportType(__pyx_t_1, "numpy", "inexact", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn);
+   if (!__pyx_ptype_5numpy_inexact) __PYX_ERR(1, 781, __pyx_L1_error)
+  __pyx_ptype_5numpy_floating = __Pyx_ImportType(__pyx_t_1, "numpy", "floating", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn);
+   if (!__pyx_ptype_5numpy_floating) __PYX_ERR(1, 783, __pyx_L1_error)
+  __pyx_ptype_5numpy_complexfloating = __Pyx_ImportType(__pyx_t_1, "numpy", "complexfloating", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn);
+   if (!__pyx_ptype_5numpy_complexfloating) __PYX_ERR(1, 785, __pyx_L1_error)
+  __pyx_ptype_5numpy_flexible = __Pyx_ImportType(__pyx_t_1, "numpy", "flexible", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn);
+   if (!__pyx_ptype_5numpy_flexible) __PYX_ERR(1, 787, __pyx_L1_error)
+  __pyx_ptype_5numpy_character = __Pyx_ImportType(__pyx_t_1, "numpy", "character", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn);
+   if (!__pyx_ptype_5numpy_character) __PYX_ERR(1, 789, __pyx_L1_error)
+  __pyx_ptype_5numpy_ufunc = __Pyx_ImportType(__pyx_t_1, "numpy", "ufunc", sizeof(PyUFuncObject), __Pyx_ImportType_CheckSize_Ignore);
+   if (!__pyx_ptype_5numpy_ufunc) __PYX_ERR(1, 827, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __Pyx_RefNannyFinishContext();
+  return 0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_RefNannyFinishContext();
+  return -1;
+}
+
+static int __Pyx_modinit_variable_import_code(void) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__Pyx_modinit_variable_import_code", 0);
+  /*--- Variable import code ---*/
+  __Pyx_RefNannyFinishContext();
+  return 0;
+}
+
+static int __Pyx_modinit_function_import_code(void) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__Pyx_modinit_function_import_code", 0);
+  /*--- Function import code ---*/
+  __Pyx_RefNannyFinishContext();
+  return 0;
+}
+
+
+#ifndef CYTHON_NO_PYINIT_EXPORT
+#define __Pyx_PyMODINIT_FUNC PyMODINIT_FUNC
+#elif PY_MAJOR_VERSION < 3
+#ifdef __cplusplus
+#define __Pyx_PyMODINIT_FUNC extern "C" void
+#else
+#define __Pyx_PyMODINIT_FUNC void
+#endif
+#else
+#ifdef __cplusplus
+#define __Pyx_PyMODINIT_FUNC extern "C" PyObject *
+#else
+#define __Pyx_PyMODINIT_FUNC PyObject *
+#endif
+#endif
+
+
+#if PY_MAJOR_VERSION < 3
+__Pyx_PyMODINIT_FUNC initgpu_nms(void) CYTHON_SMALL_CODE; /*proto*/
+__Pyx_PyMODINIT_FUNC initgpu_nms(void)
+#else
+__Pyx_PyMODINIT_FUNC PyInit_gpu_nms(void) CYTHON_SMALL_CODE; /*proto*/
+__Pyx_PyMODINIT_FUNC PyInit_gpu_nms(void)
+#if CYTHON_PEP489_MULTI_PHASE_INIT
+{
+  return PyModuleDef_Init(&__pyx_moduledef);
+}
+static CYTHON_SMALL_CODE int __Pyx_check_single_interpreter(void) {
+    #if PY_VERSION_HEX >= 0x030700A1
+    static PY_INT64_T main_interpreter_id = -1;
+    PY_INT64_T current_id = PyInterpreterState_GetID(PyThreadState_Get()->interp);
+    if (main_interpreter_id == -1) {
+        main_interpreter_id = current_id;
+        return (unlikely(current_id == -1)) ? -1 : 0;
+    } else if (unlikely(main_interpreter_id != current_id))
+    #else
+    static PyInterpreterState *main_interpreter = NULL;
+    PyInterpreterState *current_interpreter = PyThreadState_Get()->interp;
+    if (!main_interpreter) {
+        main_interpreter = current_interpreter;
+    } else if (unlikely(main_interpreter != current_interpreter))
+    #endif
+    {
+        PyErr_SetString(
+            PyExc_ImportError,
+            "Interpreter change detected - this module can only be loaded into one interpreter per process.");
+        return -1;
+    }
+    return 0;
+}
+static CYTHON_SMALL_CODE int __Pyx_copy_spec_to_module(PyObject *spec, PyObject *moddict, const char* from_name, const char* to_name, int allow_none) {
+    PyObject *value = PyObject_GetAttrString(spec, from_name);
+    int result = 0;
+    if (likely(value)) {
+        if (allow_none || value != Py_None) {
+            result = PyDict_SetItemString(moddict, to_name, value);
+        }
+        Py_DECREF(value);
+    } else if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
+        PyErr_Clear();
+    } else {
+        result = -1;
+    }
+    return result;
+}
+static CYTHON_SMALL_CODE PyObject* __pyx_pymod_create(PyObject *spec, CYTHON_UNUSED PyModuleDef *def) {
+    PyObject *module = NULL, *moddict, *modname;
+    if (__Pyx_check_single_interpreter())
+        return NULL;
+    if (__pyx_m)
+        return __Pyx_NewRef(__pyx_m);
+    modname = PyObject_GetAttrString(spec, "name");
+    if (unlikely(!modname)) goto bad;
+    module = PyModule_NewObject(modname);
+    Py_DECREF(modname);
+    if (unlikely(!module)) goto bad;
+    moddict = PyModule_GetDict(module);
+    if (unlikely(!moddict)) goto bad;
+    if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "loader", "__loader__", 1) < 0)) goto bad;
+    if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "origin", "__file__", 1) < 0)) goto bad;
+    if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "parent", "__package__", 1) < 0)) goto bad;
+    if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "submodule_search_locations", "__path__", 0) < 0)) goto bad;
+    return module;
+bad:
+    Py_XDECREF(module);
+    return NULL;
+}
+
+
+static CYTHON_SMALL_CODE int __pyx_pymod_exec_gpu_nms(PyObject *__pyx_pyinit_module)
+#endif
+#endif
+{
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannyDeclarations
+  #if CYTHON_PEP489_MULTI_PHASE_INIT
+  if (__pyx_m) {
+    if (__pyx_m == __pyx_pyinit_module) return 0;
+    PyErr_SetString(PyExc_RuntimeError, "Module 'gpu_nms' has already been imported. Re-initialisation is not supported.");
+    return -1;
+  }
+  #elif PY_MAJOR_VERSION >= 3
+  if (__pyx_m) return __Pyx_NewRef(__pyx_m);
+  #endif
+  #if CYTHON_REFNANNY
+__Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny");
+if (!__Pyx_RefNanny) {
+  PyErr_Clear();
+  __Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny");
+  if (!__Pyx_RefNanny)
+      Py_FatalError("failed to import 'refnanny' module");
+}
+#endif
+  __Pyx_RefNannySetupContext("__Pyx_PyMODINIT_FUNC PyInit_gpu_nms(void)", 0);
+  if (__Pyx_check_binary_version() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #ifdef __Pxy_PyFrame_Initialize_Offsets
+  __Pxy_PyFrame_Initialize_Offsets();
+  #endif
+  __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) __PYX_ERR(0, 1, __pyx_L1_error)
+  __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) __PYX_ERR(0, 1, __pyx_L1_error)
+  __pyx_empty_unicode = PyUnicode_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_unicode)) __PYX_ERR(0, 1, __pyx_L1_error)
+  #ifdef __Pyx_CyFunction_USED
+  if (__pyx_CyFunction_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  #ifdef __Pyx_FusedFunction_USED
+  if (__pyx_FusedFunction_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  #ifdef __Pyx_Coroutine_USED
+  if (__pyx_Coroutine_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  #ifdef __Pyx_Generator_USED
+  if (__pyx_Generator_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  #ifdef __Pyx_AsyncGen_USED
+  if (__pyx_AsyncGen_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  #ifdef __Pyx_StopAsyncIteration_USED
+  if (__pyx_StopAsyncIteration_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  /*--- Library function declarations ---*/
+  /*--- Threads initialization code ---*/
+  #if defined(WITH_THREAD) && PY_VERSION_HEX < 0x030700F0 && defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS
+  PyEval_InitThreads();
+  #endif
+  /*--- Module creation code ---*/
+  #if CYTHON_PEP489_MULTI_PHASE_INIT
+  __pyx_m = __pyx_pyinit_module;
+  Py_INCREF(__pyx_m);
+  #else
+  #if PY_MAJOR_VERSION < 3
+  __pyx_m = Py_InitModule4("gpu_nms", __pyx_methods, 0, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m);
+  #else
+  __pyx_m = PyModule_Create(&__pyx_moduledef);
+  #endif
+  if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  __pyx_d = PyModule_GetDict(__pyx_m); if (unlikely(!__pyx_d)) __PYX_ERR(0, 1, __pyx_L1_error)
+  Py_INCREF(__pyx_d);
+  __pyx_b = PyImport_AddModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_b)) __PYX_ERR(0, 1, __pyx_L1_error)
+  Py_INCREF(__pyx_b);
+  __pyx_cython_runtime = PyImport_AddModule((char *) "cython_runtime"); if (unlikely(!__pyx_cython_runtime)) __PYX_ERR(0, 1, __pyx_L1_error)
+  Py_INCREF(__pyx_cython_runtime);
+  if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  /*--- Initialize various global constants etc. ---*/
+  if (__Pyx_InitGlobals() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT)
+  if (__Pyx_init_sys_getdefaultencoding_params() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  if (__pyx_module_is_main_gpu_nms) {
+    if (PyObject_SetAttr(__pyx_m, __pyx_n_s_name, __pyx_n_s_main) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  }
+  #if PY_MAJOR_VERSION >= 3
+  {
+    PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) __PYX_ERR(0, 1, __pyx_L1_error)
+    if (!PyDict_GetItemString(modules, "gpu_nms")) {
+      if (unlikely(PyDict_SetItemString(modules, "gpu_nms", __pyx_m) < 0)) __PYX_ERR(0, 1, __pyx_L1_error)
+    }
+  }
+  #endif
+  /*--- Builtin init code ---*/
+  if (__Pyx_InitCachedBuiltins() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  /*--- Constants init code ---*/
+  if (__Pyx_InitCachedConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  /*--- Global type/function init code ---*/
+  (void)__Pyx_modinit_global_init_code();
+  (void)__Pyx_modinit_variable_export_code();
+  (void)__Pyx_modinit_function_export_code();
+  (void)__Pyx_modinit_type_init_code();
+  if (unlikely(__Pyx_modinit_type_import_code() < 0)) __PYX_ERR(0, 1, __pyx_L1_error)
+  (void)__Pyx_modinit_variable_import_code();
+  (void)__Pyx_modinit_function_import_code();
+  /*--- Execution code ---*/
+  #if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED)
+  if (__Pyx_patch_abc() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+
+  /* "gpu_nms.pyx":11
+ * from __future__ import print_function
+ * 
+ * import numpy as np             # <<<<<<<<<<<<<<
+ * cimport numpy as np
+ * 
+ */
+  __pyx_t_1 = __Pyx_Import(__pyx_n_s_numpy, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 11, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  if (PyDict_SetItem(__pyx_d, __pyx_n_s_np, __pyx_t_1) < 0) __PYX_ERR(0, 11, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+  /* "gpu_nms.pyx":14
+ * cimport numpy as np
+ * 
+ * assert sizeof(int) == sizeof(np.int32_t)             # <<<<<<<<<<<<<<
+ * 
+ * cdef extern from "gpu_nms.hpp":
+ */
+  #ifndef CYTHON_WITHOUT_ASSERTIONS
+  if (unlikely(!Py_OptimizeFlag)) {
+    if (unlikely(!(((sizeof(int)) == (sizeof(__pyx_t_5numpy_int32_t))) != 0))) {
+      PyErr_SetNone(PyExc_AssertionError);
+      __PYX_ERR(0, 14, __pyx_L1_error)
+    }
+  }
+  #endif
+
+  /* "gpu_nms.pyx":19
+ *     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
+ * 
+ * def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,             # <<<<<<<<<<<<<<
+ *             np.int32_t device_id=0):
+ *     cdef int boxes_num = dets.shape[0]
+ */
+  __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_7gpu_nms_1gpu_nms, NULL, __pyx_n_s_gpu_nms); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 19, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  if (PyDict_SetItem(__pyx_d, __pyx_n_s_gpu_nms, __pyx_t_1) < 0) __PYX_ERR(0, 19, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+  /* "gpu_nms.pyx":1
+ * # ------------------------------------------------------------------------------             # <<<<<<<<<<<<<<
+ * # Copyright (c) Microsoft
+ * # Licensed under the MIT License.
+ */
+  __pyx_t_1 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 1, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+  /* "../../../../../../../../usr/lib/python3/dist-packages/numpy/__init__.pxd":1014
+ * 
+ * 
+ * cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil:             # <<<<<<<<<<<<<<
+ *     """
+ *     returns the unit part of the dtype for a numpy datetime64 object.
+ */
+
+  /*--- Wrapped vars code ---*/
+
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  if (__pyx_m) {
+    if (__pyx_d) {
+      __Pyx_AddTraceback("init gpu_nms", __pyx_clineno, __pyx_lineno, __pyx_filename);
+    }
+    Py_CLEAR(__pyx_m);
+  } else if (!PyErr_Occurred()) {
+    PyErr_SetString(PyExc_ImportError, "init gpu_nms");
+  }
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  #if CYTHON_PEP489_MULTI_PHASE_INIT
+  return (__pyx_m != NULL) ? 0 : -1;
+  #elif PY_MAJOR_VERSION >= 3
+  return __pyx_m;
+  #else
+  return;
+  #endif
+}
+
+/* --- Runtime support code --- */
+/* Refnanny */
+#if CYTHON_REFNANNY
+static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) {
+    PyObject *m = NULL, *p = NULL;
+    void *r = NULL;
+    m = PyImport_ImportModule(modname);
+    if (!m) goto end;
+    p = PyObject_GetAttrString(m, "RefNannyAPI");
+    if (!p) goto end;
+    r = PyLong_AsVoidPtr(p);
+end:
+    Py_XDECREF(p);
+    Py_XDECREF(m);
+    return (__Pyx_RefNannyAPIStruct *)r;
+}
+#endif
+
+/* RaiseArgTupleInvalid */
+static void __Pyx_RaiseArgtupleInvalid(
+    const char* func_name,
+    int exact,
+    Py_ssize_t num_min,
+    Py_ssize_t num_max,
+    Py_ssize_t num_found)
+{
+    Py_ssize_t num_expected;
+    const char *more_or_less;
+    if (num_found < num_min) {
+        num_expected = num_min;
+        more_or_less = "at least";
+    } else {
+        num_expected = num_max;
+        more_or_less = "at most";
+    }
+    if (exact) {
+        more_or_less = "exactly";
+    }
+    PyErr_Format(PyExc_TypeError,
+                 "%.200s() takes %.8s %" CYTHON_FORMAT_SSIZE_T "d positional argument%.1s (%" CYTHON_FORMAT_SSIZE_T "d given)",
+                 func_name, more_or_less, num_expected,
+                 (num_expected == 1) ? "" : "s", num_found);
+}
+
+/* RaiseDoubleKeywords */
+static void __Pyx_RaiseDoubleKeywordsError(
+    const char* func_name,
+    PyObject* kw_name)
+{
+    PyErr_Format(PyExc_TypeError,
+        #if PY_MAJOR_VERSION >= 3
+        "%s() got multiple values for keyword argument '%U'", func_name, kw_name);
+        #else
+        "%s() got multiple values for keyword argument '%s'", func_name,
+        PyString_AsString(kw_name));
+        #endif
+}
+
+/* ParseKeywords */
+static int __Pyx_ParseOptionalKeywords(
+    PyObject *kwds,
+    PyObject **argnames[],
+    PyObject *kwds2,
+    PyObject *values[],
+    Py_ssize_t num_pos_args,
+    const char* function_name)
+{
+    PyObject *key = 0, *value = 0;
+    Py_ssize_t pos = 0;
+    PyObject*** name;
+    PyObject*** first_kw_arg = argnames + num_pos_args;
+    while (PyDict_Next(kwds, &pos, &key, &value)) {
+        name = first_kw_arg;
+        while (*name && (**name != key)) name++;
+        if (*name) {
+            values[name-argnames] = value;
+            continue;
+        }
+        name = first_kw_arg;
+        #if PY_MAJOR_VERSION < 3
+        if (likely(PyString_Check(key))) {
+            while (*name) {
+                if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key))
+                        && _PyString_Eq(**name, key)) {
+                    values[name-argnames] = value;
+                    break;
+                }
+                name++;
+            }
+            if (*name) continue;
+            else {
+                PyObject*** argname = argnames;
+                while (argname != first_kw_arg) {
+                    if ((**argname == key) || (
+                            (CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**argname) == PyString_GET_SIZE(key))
+                             && _PyString_Eq(**argname, key))) {
+                        goto arg_passed_twice;
+                    }
+                    argname++;
+                }
+            }
+        } else
+        #endif
+        if (likely(PyUnicode_Check(key))) {
+            while (*name) {
+                int cmp = (**name == key) ? 0 :
+                #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3
+                    (__Pyx_PyUnicode_GET_LENGTH(**name) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 :
+                #endif
+                    PyUnicode_Compare(**name, key);
+                if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad;
+                if (cmp == 0) {
+                    values[name-argnames] = value;
+                    break;
+                }
+                name++;
+            }
+            if (*name) continue;
+            else {
+                PyObject*** argname = argnames;
+                while (argname != first_kw_arg) {
+                    int cmp = (**argname == key) ? 0 :
+                    #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3
+                        (__Pyx_PyUnicode_GET_LENGTH(**argname) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 :
+                    #endif
+                        PyUnicode_Compare(**argname, key);
+                    if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad;
+                    if (cmp == 0) goto arg_passed_twice;
+                    argname++;
+                }
+            }
+        } else
+            goto invalid_keyword_type;
+        if (kwds2) {
+            if (unlikely(PyDict_SetItem(kwds2, key, value))) goto bad;
+        } else {
+            goto invalid_keyword;
+        }
+    }
+    return 0;
+arg_passed_twice:
+    __Pyx_RaiseDoubleKeywordsError(function_name, key);
+    goto bad;
+invalid_keyword_type:
+    PyErr_Format(PyExc_TypeError,
+        "%.200s() keywords must be strings", function_name);
+    goto bad;
+invalid_keyword:
+    PyErr_Format(PyExc_TypeError,
+    #if PY_MAJOR_VERSION < 3
+        "%.200s() got an unexpected keyword argument '%.200s'",
+        function_name, PyString_AsString(key));
+    #else
+        "%s() got an unexpected keyword argument '%U'",
+        function_name, key);
+    #endif
+bad:
+    return -1;
+}
+
+/* ArgTypeTest */
+static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact)
+{
+    if (unlikely(!type)) {
+        PyErr_SetString(PyExc_SystemError, "Missing type object");
+        return 0;
+    }
+    else if (exact) {
+        #if PY_MAJOR_VERSION == 2
+        if ((type == &PyBaseString_Type) && likely(__Pyx_PyBaseString_CheckExact(obj))) return 1;
+        #endif
+    }
+    else {
+        if (likely(__Pyx_TypeCheck(obj, type))) return 1;
+    }
+    PyErr_Format(PyExc_TypeError,
+        "Argument '%.200s' has incorrect type (expected %.200s, got %.200s)",
+        name, type->tp_name, Py_TYPE(obj)->tp_name);
+    return 0;
+}
+
+/* IsLittleEndian */
+static CYTHON_INLINE int __Pyx_Is_Little_Endian(void)
+{
+  union {
+    uint32_t u32;
+    uint8_t u8[4];
+  } S;
+  S.u32 = 0x01020304;
+  return S.u8[0] == 4;
+}
+
+/* BufferFormatCheck */
+static void __Pyx_BufFmt_Init(__Pyx_BufFmt_Context* ctx,
+                              __Pyx_BufFmt_StackElem* stack,
+                              __Pyx_TypeInfo* type) {
+  stack[0].field = &ctx->root;
+  stack[0].parent_offset = 0;
+  ctx->root.type = type;
+  ctx->root.name = "buffer dtype";
+  ctx->root.offset = 0;
+  ctx->head = stack;
+  ctx->head->field = &ctx->root;
+  ctx->fmt_offset = 0;
+  ctx->head->parent_offset = 0;
+  ctx->new_packmode = '@';
+  ctx->enc_packmode = '@';
+  ctx->new_count = 1;
+  ctx->enc_count = 0;
+  ctx->enc_type = 0;
+  ctx->is_complex = 0;
+  ctx->is_valid_array = 0;
+  ctx->struct_alignment = 0;
+  while (type->typegroup == 'S') {
+    ++ctx->head;
+    ctx->head->field = type->fields;
+    ctx->head->parent_offset = 0;
+    type = type->fields->type;
+  }
+}
+static int __Pyx_BufFmt_ParseNumber(const char** ts) {
+    int count;
+    const char* t = *ts;
+    if (*t < '0' || *t > '9') {
+      return -1;
+    } else {
+        count = *t++ - '0';
+        while (*t >= '0' && *t <= '9') {
+            count *= 10;
+            count += *t++ - '0';
+        }
+    }
+    *ts = t;
+    return count;
+}
+static int __Pyx_BufFmt_ExpectNumber(const char **ts) {
+    int number = __Pyx_BufFmt_ParseNumber(ts);
+    if (number == -1)
+        PyErr_Format(PyExc_ValueError,\
+                     "Does not understand character buffer dtype format string ('%c')", **ts);
+    return number;
+}
+static void __Pyx_BufFmt_RaiseUnexpectedChar(char ch) {
+  PyErr_Format(PyExc_ValueError,
+               "Unexpected format string character: '%c'", ch);
+}
+static const char* __Pyx_BufFmt_DescribeTypeChar(char ch, int is_complex) {
+  switch (ch) {
+    case '?': return "'bool'";
+    case 'c': return "'char'";
+    case 'b': return "'signed char'";
+    case 'B': return "'unsigned char'";
+    case 'h': return "'short'";
+    case 'H': return "'unsigned short'";
+    case 'i': return "'int'";
+    case 'I': return "'unsigned int'";
+    case 'l': return "'long'";
+    case 'L': return "'unsigned long'";
+    case 'q': return "'long long'";
+    case 'Q': return "'unsigned long long'";
+    case 'f': return (is_complex ? "'complex float'" : "'float'");
+    case 'd': return (is_complex ? "'complex double'" : "'double'");
+    case 'g': return (is_complex ? "'complex long double'" : "'long double'");
+    case 'T': return "a struct";
+    case 'O': return "Python object";
+    case 'P': return "a pointer";
+    case 's': case 'p': return "a string";
+    case 0: return "end";
+    default: return "unparseable format string";
+  }
+}
+static size_t __Pyx_BufFmt_TypeCharToStandardSize(char ch, int is_complex) {
+  switch (ch) {
+    case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1;
+    case 'h': case 'H': return 2;
+    case 'i': case 'I': case 'l': case 'L': return 4;
+    case 'q': case 'Q': return 8;
+    case 'f': return (is_complex ? 8 : 4);
+    case 'd': return (is_complex ? 16 : 8);
+    case 'g': {
+      PyErr_SetString(PyExc_ValueError, "Python does not define a standard format string size for long double ('g')..");
+      return 0;
+    }
+    case 'O': case 'P': return sizeof(void*);
+    default:
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+}
+static size_t __Pyx_BufFmt_TypeCharToNativeSize(char ch, int is_complex) {
+  switch (ch) {
+    case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1;
+    case 'h': case 'H': return sizeof(short);
+    case 'i': case 'I': return sizeof(int);
+    case 'l': case 'L': return sizeof(long);
+    #ifdef HAVE_LONG_LONG
+    case 'q': case 'Q': return sizeof(PY_LONG_LONG);
+    #endif
+    case 'f': return sizeof(float) * (is_complex ? 2 : 1);
+    case 'd': return sizeof(double) * (is_complex ? 2 : 1);
+    case 'g': return sizeof(long double) * (is_complex ? 2 : 1);
+    case 'O': case 'P': return sizeof(void*);
+    default: {
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+  }
+}
+typedef struct { char c; short x; } __Pyx_st_short;
+typedef struct { char c; int x; } __Pyx_st_int;
+typedef struct { char c; long x; } __Pyx_st_long;
+typedef struct { char c; float x; } __Pyx_st_float;
+typedef struct { char c; double x; } __Pyx_st_double;
+typedef struct { char c; long double x; } __Pyx_st_longdouble;
+typedef struct { char c; void *x; } __Pyx_st_void_p;
+#ifdef HAVE_LONG_LONG
+typedef struct { char c; PY_LONG_LONG x; } __Pyx_st_longlong;
+#endif
+static size_t __Pyx_BufFmt_TypeCharToAlignment(char ch, CYTHON_UNUSED int is_complex) {
+  switch (ch) {
+    case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1;
+    case 'h': case 'H': return sizeof(__Pyx_st_short) - sizeof(short);
+    case 'i': case 'I': return sizeof(__Pyx_st_int) - sizeof(int);
+    case 'l': case 'L': return sizeof(__Pyx_st_long) - sizeof(long);
+#ifdef HAVE_LONG_LONG
+    case 'q': case 'Q': return sizeof(__Pyx_st_longlong) - sizeof(PY_LONG_LONG);
+#endif
+    case 'f': return sizeof(__Pyx_st_float) - sizeof(float);
+    case 'd': return sizeof(__Pyx_st_double) - sizeof(double);
+    case 'g': return sizeof(__Pyx_st_longdouble) - sizeof(long double);
+    case 'P': case 'O': return sizeof(__Pyx_st_void_p) - sizeof(void*);
+    default:
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+}
+/* These are for computing the padding at the end of the struct to align
+   on the first member of the struct. This will probably the same as above,
+   but we don't have any guarantees.
+ */
+typedef struct { short x; char c; } __Pyx_pad_short;
+typedef struct { int x; char c; } __Pyx_pad_int;
+typedef struct { long x; char c; } __Pyx_pad_long;
+typedef struct { float x; char c; } __Pyx_pad_float;
+typedef struct { double x; char c; } __Pyx_pad_double;
+typedef struct { long double x; char c; } __Pyx_pad_longdouble;
+typedef struct { void *x; char c; } __Pyx_pad_void_p;
+#ifdef HAVE_LONG_LONG
+typedef struct { PY_LONG_LONG x; char c; } __Pyx_pad_longlong;
+#endif
+static size_t __Pyx_BufFmt_TypeCharToPadding(char ch, CYTHON_UNUSED int is_complex) {
+  switch (ch) {
+    case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1;
+    case 'h': case 'H': return sizeof(__Pyx_pad_short) - sizeof(short);
+    case 'i': case 'I': return sizeof(__Pyx_pad_int) - sizeof(int);
+    case 'l': case 'L': return sizeof(__Pyx_pad_long) - sizeof(long);
+#ifdef HAVE_LONG_LONG
+    case 'q': case 'Q': return sizeof(__Pyx_pad_longlong) - sizeof(PY_LONG_LONG);
+#endif
+    case 'f': return sizeof(__Pyx_pad_float) - sizeof(float);
+    case 'd': return sizeof(__Pyx_pad_double) - sizeof(double);
+    case 'g': return sizeof(__Pyx_pad_longdouble) - sizeof(long double);
+    case 'P': case 'O': return sizeof(__Pyx_pad_void_p) - sizeof(void*);
+    default:
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+}
+static char __Pyx_BufFmt_TypeCharToGroup(char ch, int is_complex) {
+  switch (ch) {
+    case 'c':
+        return 'H';
+    case 'b': case 'h': case 'i':
+    case 'l': case 'q': case 's': case 'p':
+        return 'I';
+    case '?': case 'B': case 'H': case 'I': case 'L': case 'Q':
+        return 'U';
+    case 'f': case 'd': case 'g':
+        return (is_complex ? 'C' : 'R');
+    case 'O':
+        return 'O';
+    case 'P':
+        return 'P';
+    default: {
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+  }
+}
+static void __Pyx_BufFmt_RaiseExpected(__Pyx_BufFmt_Context* ctx) {
+  if (ctx->head == NULL || ctx->head->field == &ctx->root) {
+    const char* expected;
+    const char* quote;
+    if (ctx->head == NULL) {
+      expected = "end";
+      quote = "";
+    } else {
+      expected = ctx->head->field->type->name;
+      quote = "'";
+    }
+    PyErr_Format(PyExc_ValueError,
+                 "Buffer dtype mismatch, expected %s%s%s but got %s",
+                 quote, expected, quote,
+                 __Pyx_BufFmt_DescribeTypeChar(ctx->enc_type, ctx->is_complex));
+  } else {
+    __Pyx_StructField* field = ctx->head->field;
+    __Pyx_StructField* parent = (ctx->head - 1)->field;
+    PyErr_Format(PyExc_ValueError,
+                 "Buffer dtype mismatch, expected '%s' but got %s in '%s.%s'",
+                 field->type->name, __Pyx_BufFmt_DescribeTypeChar(ctx->enc_type, ctx->is_complex),
+                 parent->type->name, field->name);
+  }
+}
+static int __Pyx_BufFmt_ProcessTypeChunk(__Pyx_BufFmt_Context* ctx) {
+  char group;
+  size_t size, offset, arraysize = 1;
+  if (ctx->enc_type == 0) return 0;
+  if (ctx->head->field->type->arraysize[0]) {
+    int i, ndim = 0;
+    if (ctx->enc_type == 's' || ctx->enc_type == 'p') {
+        ctx->is_valid_array = ctx->head->field->type->ndim == 1;
+        ndim = 1;
+        if (ctx->enc_count != ctx->head->field->type->arraysize[0]) {
+            PyErr_Format(PyExc_ValueError,
+                         "Expected a dimension of size %zu, got %zu",
+                         ctx->head->field->type->arraysize[0], ctx->enc_count);
+            return -1;
+        }
+    }
+    if (!ctx->is_valid_array) {
+      PyErr_Format(PyExc_ValueError, "Expected %d dimensions, got %d",
+                   ctx->head->field->type->ndim, ndim);
+      return -1;
+    }
+    for (i = 0; i < ctx->head->field->type->ndim; i++) {
+      arraysize *= ctx->head->field->type->arraysize[i];
+    }
+    ctx->is_valid_array = 0;
+    ctx->enc_count = 1;
+  }
+  group = __Pyx_BufFmt_TypeCharToGroup(ctx->enc_type, ctx->is_complex);
+  do {
+    __Pyx_StructField* field = ctx->head->field;
+    __Pyx_TypeInfo* type = field->type;
+    if (ctx->enc_packmode == '@' || ctx->enc_packmode == '^') {
+      size = __Pyx_BufFmt_TypeCharToNativeSize(ctx->enc_type, ctx->is_complex);
+    } else {
+      size = __Pyx_BufFmt_TypeCharToStandardSize(ctx->enc_type, ctx->is_complex);
+    }
+    if (ctx->enc_packmode == '@') {
+      size_t align_at = __Pyx_BufFmt_TypeCharToAlignment(ctx->enc_type, ctx->is_complex);
+      size_t align_mod_offset;
+      if (align_at == 0) return -1;
+      align_mod_offset = ctx->fmt_offset % align_at;
+      if (align_mod_offset > 0) ctx->fmt_offset += align_at - align_mod_offset;
+      if (ctx->struct_alignment == 0)
+          ctx->struct_alignment = __Pyx_BufFmt_TypeCharToPadding(ctx->enc_type,
+                                                                 ctx->is_complex);
+    }
+    if (type->size != size || type->typegroup != group) {
+      if (type->typegroup == 'C' && type->fields != NULL) {
+        size_t parent_offset = ctx->head->parent_offset + field->offset;
+        ++ctx->head;
+        ctx->head->field = type->fields;
+        ctx->head->parent_offset = parent_offset;
+        continue;
+      }
+      if ((type->typegroup == 'H' || group == 'H') && type->size == size) {
+      } else {
+          __Pyx_BufFmt_RaiseExpected(ctx);
+          return -1;
+      }
+    }
+    offset = ctx->head->parent_offset + field->offset;
+    if (ctx->fmt_offset != offset) {
+      PyErr_Format(PyExc_ValueError,
+                   "Buffer dtype mismatch; next field is at offset %" CYTHON_FORMAT_SSIZE_T "d but %" CYTHON_FORMAT_SSIZE_T "d expected",
+                   (Py_ssize_t)ctx->fmt_offset, (Py_ssize_t)offset);
+      return -1;
+    }
+    ctx->fmt_offset += size;
+    if (arraysize)
+      ctx->fmt_offset += (arraysize - 1) * size;
+    --ctx->enc_count;
+    while (1) {
+      if (field == &ctx->root) {
+        ctx->head = NULL;
+        if (ctx->enc_count != 0) {
+          __Pyx_BufFmt_RaiseExpected(ctx);
+          return -1;
+        }
+        break;
+      }
+      ctx->head->field = ++field;
+      if (field->type == NULL) {
+        --ctx->head;
+        field = ctx->head->field;
+        continue;
+      } else if (field->type->typegroup == 'S') {
+        size_t parent_offset = ctx->head->parent_offset + field->offset;
+        if (field->type->fields->type == NULL) continue;
+        field = field->type->fields;
+        ++ctx->head;
+        ctx->head->field = field;
+        ctx->head->parent_offset = parent_offset;
+        break;
+      } else {
+        break;
+      }
+    }
+  } while (ctx->enc_count);
+  ctx->enc_type = 0;
+  ctx->is_complex = 0;
+  return 0;
+}
+static PyObject *
+__pyx_buffmt_parse_array(__Pyx_BufFmt_Context* ctx, const char** tsp)
+{
+    const char *ts = *tsp;
+    int i = 0, number, ndim;
+    ++ts;
+    if (ctx->new_count != 1) {
+        PyErr_SetString(PyExc_ValueError,
+                        "Cannot handle repeated arrays in format string");
+        return NULL;
+    }
+    if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+    ndim = ctx->head->field->type->ndim;
+    while (*ts && *ts != ')') {
+        switch (*ts) {
+            case ' ': case '\f': case '\r': case '\n': case '\t': case '\v':  continue;
+            default:  break;
+        }
+        number = __Pyx_BufFmt_ExpectNumber(&ts);
+        if (number == -1) return NULL;
+        if (i < ndim && (size_t) number != ctx->head->field->type->arraysize[i])
+            return PyErr_Format(PyExc_ValueError,
+                        "Expected a dimension of size %zu, got %d",
+                        ctx->head->field->type->arraysize[i], number);
+        if (*ts != ',' && *ts != ')')
+            return PyErr_Format(PyExc_ValueError,
+                                "Expected a comma in format string, got '%c'", *ts);
+        if (*ts == ',') ts++;
+        i++;
+    }
+    if (i != ndim)
+        return PyErr_Format(PyExc_ValueError, "Expected %d dimension(s), got %d",
+                            ctx->head->field->type->ndim, i);
+    if (!*ts) {
+        PyErr_SetString(PyExc_ValueError,
+                        "Unexpected end of format string, expected ')'");
+        return NULL;
+    }
+    ctx->is_valid_array = 1;
+    ctx->new_count = 1;
+    *tsp = ++ts;
+    return Py_None;
+}
+static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts) {
+  int got_Z = 0;
+  while (1) {
+    switch(*ts) {
+      case 0:
+        if (ctx->enc_type != 0 && ctx->head == NULL) {
+          __Pyx_BufFmt_RaiseExpected(ctx);
+          return NULL;
+        }
+        if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+        if (ctx->head != NULL) {
+          __Pyx_BufFmt_RaiseExpected(ctx);
+          return NULL;
+        }
+        return ts;
+      case ' ':
+      case '\r':
+      case '\n':
+        ++ts;
+        break;
+      case '<':
+        if (!__Pyx_Is_Little_Endian()) {
+          PyErr_SetString(PyExc_ValueError, "Little-endian buffer not supported on big-endian compiler");
+          return NULL;
+        }
+        ctx->new_packmode = '=';
+        ++ts;
+        break;
+      case '>':
+      case '!':
+        if (__Pyx_Is_Little_Endian()) {
+          PyErr_SetString(PyExc_ValueError, "Big-endian buffer not supported on little-endian compiler");
+          return NULL;
+        }
+        ctx->new_packmode = '=';
+        ++ts;
+        break;
+      case '=':
+      case '@':
+      case '^':
+        ctx->new_packmode = *ts++;
+        break;
+      case 'T':
+        {
+          const char* ts_after_sub;
+          size_t i, struct_count = ctx->new_count;
+          size_t struct_alignment = ctx->struct_alignment;
+          ctx->new_count = 1;
+          ++ts;
+          if (*ts != '{') {
+            PyErr_SetString(PyExc_ValueError, "Buffer acquisition: Expected '{' after 'T'");
+            return NULL;
+          }
+          if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+          ctx->enc_type = 0;
+          ctx->enc_count = 0;
+          ctx->struct_alignment = 0;
+          ++ts;
+          ts_after_sub = ts;
+          for (i = 0; i != struct_count; ++i) {
+            ts_after_sub = __Pyx_BufFmt_CheckString(ctx, ts);
+            if (!ts_after_sub) return NULL;
+          }
+          ts = ts_after_sub;
+          if (struct_alignment) ctx->struct_alignment = struct_alignment;
+        }
+        break;
+      case '}':
+        {
+          size_t alignment = ctx->struct_alignment;
+          ++ts;
+          if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+          ctx->enc_type = 0;
+          if (alignment && ctx->fmt_offset % alignment) {
+            ctx->fmt_offset += alignment - (ctx->fmt_offset % alignment);
+          }
+        }
+        return ts;
+      case 'x':
+        if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+        ctx->fmt_offset += ctx->new_count;
+        ctx->new_count = 1;
+        ctx->enc_count = 0;
+        ctx->enc_type = 0;
+        ctx->enc_packmode = ctx->new_packmode;
+        ++ts;
+        break;
+      case 'Z':
+        got_Z = 1;
+        ++ts;
+        if (*ts != 'f' && *ts != 'd' && *ts != 'g') {
+          __Pyx_BufFmt_RaiseUnexpectedChar('Z');
+          return NULL;
+        }
+        CYTHON_FALLTHROUGH;
+      case '?': case 'c': case 'b': case 'B': case 'h': case 'H': case 'i': case 'I':
+      case 'l': case 'L': case 'q': case 'Q':
+      case 'f': case 'd': case 'g':
+      case 'O': case 'p':
+        if ((ctx->enc_type == *ts) && (got_Z == ctx->is_complex) &&
+            (ctx->enc_packmode == ctx->new_packmode) && (!ctx->is_valid_array)) {
+          ctx->enc_count += ctx->new_count;
+          ctx->new_count = 1;
+          got_Z = 0;
+          ++ts;
+          break;
+        }
+        CYTHON_FALLTHROUGH;
+      case 's':
+        if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+        ctx->enc_count = ctx->new_count;
+        ctx->enc_packmode = ctx->new_packmode;
+        ctx->enc_type = *ts;
+        ctx->is_complex = got_Z;
+        ++ts;
+        ctx->new_count = 1;
+        got_Z = 0;
+        break;
+      case ':':
+        ++ts;
+        while(*ts != ':') ++ts;
+        ++ts;
+        break;
+      case '(':
+        if (!__pyx_buffmt_parse_array(ctx, &ts)) return NULL;
+        break;
+      default:
+        {
+          int number = __Pyx_BufFmt_ExpectNumber(&ts);
+          if (number == -1) return NULL;
+          ctx->new_count = (size_t)number;
+        }
+    }
+  }
+}
+
+/* BufferGetAndValidate */
+  static CYTHON_INLINE void __Pyx_SafeReleaseBuffer(Py_buffer* info) {
+  if (unlikely(info->buf == NULL)) return;
+  if (info->suboffsets == __Pyx_minusones) info->suboffsets = NULL;
+  __Pyx_ReleaseBuffer(info);
+}
+static void __Pyx_ZeroBuffer(Py_buffer* buf) {
+  buf->buf = NULL;
+  buf->obj = NULL;
+  buf->strides = __Pyx_zeros;
+  buf->shape = __Pyx_zeros;
+  buf->suboffsets = __Pyx_minusones;
+}
+static int __Pyx__GetBufferAndValidate(
+        Py_buffer* buf, PyObject* obj,  __Pyx_TypeInfo* dtype, int flags,
+        int nd, int cast, __Pyx_BufFmt_StackElem* stack)
+{
+  buf->buf = NULL;
+  if (unlikely(__Pyx_GetBuffer(obj, buf, flags) == -1)) {
+    __Pyx_ZeroBuffer(buf);
+    return -1;
+  }
+  if (unlikely(buf->ndim != nd)) {
+    PyErr_Format(PyExc_ValueError,
+                 "Buffer has wrong number of dimensions (expected %d, got %d)",
+                 nd, buf->ndim);
+    goto fail;
+  }
+  if (!cast) {
+    __Pyx_BufFmt_Context ctx;
+    __Pyx_BufFmt_Init(&ctx, stack, dtype);
+    if (!__Pyx_BufFmt_CheckString(&ctx, buf->format)) goto fail;
+  }
+  if (unlikely((size_t)buf->itemsize != dtype->size)) {
+    PyErr_Format(PyExc_ValueError,
+      "Item size of buffer (%" CYTHON_FORMAT_SSIZE_T "d byte%s) does not match size of '%s' (%" CYTHON_FORMAT_SSIZE_T "d byte%s)",
+      buf->itemsize, (buf->itemsize > 1) ? "s" : "",
+      dtype->name, (Py_ssize_t)dtype->size, (dtype->size > 1) ? "s" : "");
+    goto fail;
+  }
+  if (buf->suboffsets == NULL) buf->suboffsets = __Pyx_minusones;
+  return 0;
+fail:;
+  __Pyx_SafeReleaseBuffer(buf);
+  return -1;
+}
+
+/* PyObjectGetAttrStr */
+  #if CYTHON_USE_TYPE_SLOTS
+static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name) {
+    PyTypeObject* tp = Py_TYPE(obj);
+    if (likely(tp->tp_getattro))
+        return tp->tp_getattro(obj, attr_name);
+#if PY_MAJOR_VERSION < 3
+    if (likely(tp->tp_getattr))
+        return tp->tp_getattr(obj, PyString_AS_STRING(attr_name));
+#endif
+    return PyObject_GetAttr(obj, attr_name);
+}
+#endif
+
+/* GetBuiltinName */
+  static PyObject *__Pyx_GetBuiltinName(PyObject *name) {
+    PyObject* result = __Pyx_PyObject_GetAttrStr(__pyx_b, name);
+    if (unlikely(!result)) {
+        PyErr_Format(PyExc_NameError,
+#if PY_MAJOR_VERSION >= 3
+            "name '%U' is not defined", name);
+#else
+            "name '%.200s' is not defined", PyString_AS_STRING(name));
+#endif
+    }
+    return result;
+}
+
+/* PyDictVersioning */
+  #if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS
+static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj) {
+    PyObject *dict = Py_TYPE(obj)->tp_dict;
+    return likely(dict) ? __PYX_GET_DICT_VERSION(dict) : 0;
+}
+static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj) {
+    PyObject **dictptr = NULL;
+    Py_ssize_t offset = Py_TYPE(obj)->tp_dictoffset;
+    if (offset) {
+#if CYTHON_COMPILING_IN_CPYTHON
+        dictptr = (likely(offset > 0)) ? (PyObject **) ((char *)obj + offset) : _PyObject_GetDictPtr(obj);
+#else
+        dictptr = _PyObject_GetDictPtr(obj);
+#endif
+    }
+    return (dictptr && *dictptr) ? __PYX_GET_DICT_VERSION(*dictptr) : 0;
+}
+static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version) {
+    PyObject *dict = Py_TYPE(obj)->tp_dict;
+    if (unlikely(!dict) || unlikely(tp_dict_version != __PYX_GET_DICT_VERSION(dict)))
+        return 0;
+    return obj_dict_version == __Pyx_get_object_dict_version(obj);
+}
+#endif
+
+/* GetModuleGlobalName */
+  #if CYTHON_USE_DICT_VERSIONS
+static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value)
+#else
+static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name)
+#endif
+{
+    PyObject *result;
+#if !CYTHON_AVOID_BORROWED_REFS
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030500A1
+    result = _PyDict_GetItem_KnownHash(__pyx_d, name, ((PyASCIIObject *) name)->hash);
+    __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version)
+    if (likely(result)) {
+        return __Pyx_NewRef(result);
+    } else if (unlikely(PyErr_Occurred())) {
+        return NULL;
+    }
+#else
+    result = PyDict_GetItem(__pyx_d, name);
+    __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version)
+    if (likely(result)) {
+        return __Pyx_NewRef(result);
+    }
+#endif
+#else
+    result = PyObject_GetItem(__pyx_d, name);
+    __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version)
+    if (likely(result)) {
+        return __Pyx_NewRef(result);
+    }
+    PyErr_Clear();
+#endif
+    return __Pyx_GetBuiltinName(name);
+}
+
+/* PyObjectCall */
+  #if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) {
+    PyObject *result;
+    ternaryfunc call = Py_TYPE(func)->tp_call;
+    if (unlikely(!call))
+        return PyObject_Call(func, arg, kw);
+    if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object")))
+        return NULL;
+    result = (*call)(func, arg, kw);
+    Py_LeaveRecursiveCall();
+    if (unlikely(!result) && unlikely(!PyErr_Occurred())) {
+        PyErr_SetString(
+            PyExc_SystemError,
+            "NULL result without error in PyObject_Call");
+    }
+    return result;
+}
+#endif
+
+/* ExtTypeTest */
+  static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type) {
+    if (unlikely(!type)) {
+        PyErr_SetString(PyExc_SystemError, "Missing type object");
+        return 0;
+    }
+    if (likely(__Pyx_TypeCheck(obj, type)))
+        return 1;
+    PyErr_Format(PyExc_TypeError, "Cannot convert %.200s to %.200s",
+                 Py_TYPE(obj)->tp_name, type->tp_name);
+    return 0;
+}
+
+/* GetItemInt */
+  static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j) {
+    PyObject *r;
+    if (!j) return NULL;
+    r = PyObject_GetItem(o, j);
+    Py_DECREF(j);
+    return r;
+}
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i,
+                                                              CYTHON_NCP_UNUSED int wraparound,
+                                                              CYTHON_NCP_UNUSED int boundscheck) {
+#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+    Py_ssize_t wrapped_i = i;
+    if (wraparound & unlikely(i < 0)) {
+        wrapped_i += PyList_GET_SIZE(o);
+    }
+    if ((!boundscheck) || likely(__Pyx_is_valid_index(wrapped_i, PyList_GET_SIZE(o)))) {
+        PyObject *r = PyList_GET_ITEM(o, wrapped_i);
+        Py_INCREF(r);
+        return r;
+    }
+    return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i));
+#else
+    return PySequence_GetItem(o, i);
+#endif
+}
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i,
+                                                              CYTHON_NCP_UNUSED int wraparound,
+                                                              CYTHON_NCP_UNUSED int boundscheck) {
+#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+    Py_ssize_t wrapped_i = i;
+    if (wraparound & unlikely(i < 0)) {
+        wrapped_i += PyTuple_GET_SIZE(o);
+    }
+    if ((!boundscheck) || likely(__Pyx_is_valid_index(wrapped_i, PyTuple_GET_SIZE(o)))) {
+        PyObject *r = PyTuple_GET_ITEM(o, wrapped_i);
+        Py_INCREF(r);
+        return r;
+    }
+    return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i));
+#else
+    return PySequence_GetItem(o, i);
+#endif
+}
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, int is_list,
+                                                     CYTHON_NCP_UNUSED int wraparound,
+                                                     CYTHON_NCP_UNUSED int boundscheck) {
+#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS && CYTHON_USE_TYPE_SLOTS
+    if (is_list || PyList_CheckExact(o)) {
+        Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyList_GET_SIZE(o);
+        if ((!boundscheck) || (likely(__Pyx_is_valid_index(n, PyList_GET_SIZE(o))))) {
+            PyObject *r = PyList_GET_ITEM(o, n);
+            Py_INCREF(r);
+            return r;
+        }
+    }
+    else if (PyTuple_CheckExact(o)) {
+        Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyTuple_GET_SIZE(o);
+        if ((!boundscheck) || likely(__Pyx_is_valid_index(n, PyTuple_GET_SIZE(o)))) {
+            PyObject *r = PyTuple_GET_ITEM(o, n);
+            Py_INCREF(r);
+            return r;
+        }
+    } else {
+        PySequenceMethods *m = Py_TYPE(o)->tp_as_sequence;
+        if (likely(m && m->sq_item)) {
+            if (wraparound && unlikely(i < 0) && likely(m->sq_length)) {
+                Py_ssize_t l = m->sq_length(o);
+                if (likely(l >= 0)) {
+                    i += l;
+                } else {
+                    if (!PyErr_ExceptionMatches(PyExc_OverflowError))
+                        return NULL;
+                    PyErr_Clear();
+                }
+            }
+            return m->sq_item(o, i);
+        }
+    }
+#else
+    if (is_list || PySequence_Check(o)) {
+        return PySequence_GetItem(o, i);
+    }
+#endif
+    return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i));
+}
+
+/* ObjectGetItem */
+  #if CYTHON_USE_TYPE_SLOTS
+static PyObject *__Pyx_PyObject_GetIndex(PyObject *obj, PyObject* index) {
+    PyObject *runerr = NULL;
+    Py_ssize_t key_value;
+    PySequenceMethods *m = Py_TYPE(obj)->tp_as_sequence;
+    if (unlikely(!(m && m->sq_item))) {
+        PyErr_Format(PyExc_TypeError, "'%.200s' object is not subscriptable", Py_TYPE(obj)->tp_name);
+        return NULL;
+    }
+    key_value = __Pyx_PyIndex_AsSsize_t(index);
+    if (likely(key_value != -1 || !(runerr = PyErr_Occurred()))) {
+        return __Pyx_GetItemInt_Fast(obj, key_value, 0, 1, 1);
+    }
+    if (PyErr_GivenExceptionMatches(runerr, PyExc_OverflowError)) {
+        PyErr_Clear();
+        PyErr_Format(PyExc_IndexError, "cannot fit '%.200s' into an index-sized integer", Py_TYPE(index)->tp_name);
+    }
+    return NULL;
+}
+static PyObject *__Pyx_PyObject_GetItem(PyObject *obj, PyObject* key) {
+    PyMappingMethods *m = Py_TYPE(obj)->tp_as_mapping;
+    if (likely(m && m->mp_subscript)) {
+        return m->mp_subscript(obj, key);
+    }
+    return __Pyx_PyObject_GetIndex(obj, key);
+}
+#endif
+
+/* PyFunctionFastCall */
+  #if CYTHON_FAST_PYCALL
+static PyObject* __Pyx_PyFunction_FastCallNoKw(PyCodeObject *co, PyObject **args, Py_ssize_t na,
+                                               PyObject *globals) {
+    PyFrameObject *f;
+    PyThreadState *tstate = __Pyx_PyThreadState_Current;
+    PyObject **fastlocals;
+    Py_ssize_t i;
+    PyObject *result;
+    assert(globals != NULL);
+    /* XXX Perhaps we should create a specialized
+       PyFrame_New() that doesn't take locals, but does
+       take builtins without sanity checking them.
+       */
+    assert(tstate != NULL);
+    f = PyFrame_New(tstate, co, globals, NULL);
+    if (f == NULL) {
+        return NULL;
+    }
+    fastlocals = __Pyx_PyFrame_GetLocalsplus(f);
+    for (i = 0; i < na; i++) {
+        Py_INCREF(*args);
+        fastlocals[i] = *args++;
+    }
+    result = PyEval_EvalFrameEx(f,0);
+    ++tstate->recursion_depth;
+    Py_DECREF(f);
+    --tstate->recursion_depth;
+    return result;
+}
+#if 1 || PY_VERSION_HEX < 0x030600B1
+static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs) {
+    PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func);
+    PyObject *globals = PyFunction_GET_GLOBALS(func);
+    PyObject *argdefs = PyFunction_GET_DEFAULTS(func);
+    PyObject *closure;
+#if PY_MAJOR_VERSION >= 3
+    PyObject *kwdefs;
+#endif
+    PyObject *kwtuple, **k;
+    PyObject **d;
+    Py_ssize_t nd;
+    Py_ssize_t nk;
+    PyObject *result;
+    assert(kwargs == NULL || PyDict_Check(kwargs));
+    nk = kwargs ? PyDict_Size(kwargs) : 0;
+    if (Py_EnterRecursiveCall((char*)" while calling a Python object")) {
+        return NULL;
+    }
+    if (
+#if PY_MAJOR_VERSION >= 3
+            co->co_kwonlyargcount == 0 &&
+#endif
+            likely(kwargs == NULL || nk == 0) &&
+            co->co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE)) {
+        if (argdefs == NULL && co->co_argcount == nargs) {
+            result = __Pyx_PyFunction_FastCallNoKw(co, args, nargs, globals);
+            goto done;
+        }
+        else if (nargs == 0 && argdefs != NULL
+                 && co->co_argcount == Py_SIZE(argdefs)) {
+            /* function called with no arguments, but all parameters have
+               a default value: use default values as arguments .*/
+            args = &PyTuple_GET_ITEM(argdefs, 0);
+            result =__Pyx_PyFunction_FastCallNoKw(co, args, Py_SIZE(argdefs), globals);
+            goto done;
+        }
+    }
+    if (kwargs != NULL) {
+        Py_ssize_t pos, i;
+        kwtuple = PyTuple_New(2 * nk);
+        if (kwtuple == NULL) {
+            result = NULL;
+            goto done;
+        }
+        k = &PyTuple_GET_ITEM(kwtuple, 0);
+        pos = i = 0;
+        while (PyDict_Next(kwargs, &pos, &k[i], &k[i+1])) {
+            Py_INCREF(k[i]);
+            Py_INCREF(k[i+1]);
+            i += 2;
+        }
+        nk = i / 2;
+    }
+    else {
+        kwtuple = NULL;
+        k = NULL;
+    }
+    closure = PyFunction_GET_CLOSURE(func);
+#if PY_MAJOR_VERSION >= 3
+    kwdefs = PyFunction_GET_KW_DEFAULTS(func);
+#endif
+    if (argdefs != NULL) {
+        d = &PyTuple_GET_ITEM(argdefs, 0);
+        nd = Py_SIZE(argdefs);
+    }
+    else {
+        d = NULL;
+        nd = 0;
+    }
+#if PY_MAJOR_VERSION >= 3
+    result = PyEval_EvalCodeEx((PyObject*)co, globals, (PyObject *)NULL,
+                               args, (int)nargs,
+                               k, (int)nk,
+                               d, (int)nd, kwdefs, closure);
+#else
+    result = PyEval_EvalCodeEx(co, globals, (PyObject *)NULL,
+                               args, (int)nargs,
+                               k, (int)nk,
+                               d, (int)nd, closure);
+#endif
+    Py_XDECREF(kwtuple);
+done:
+    Py_LeaveRecursiveCall();
+    return result;
+}
+#endif
+#endif
+
+/* PyObjectCallMethO */
+  #if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) {
+    PyObject *self, *result;
+    PyCFunction cfunc;
+    cfunc = PyCFunction_GET_FUNCTION(func);
+    self = PyCFunction_GET_SELF(func);
+    if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object")))
+        return NULL;
+    result = cfunc(self, arg);
+    Py_LeaveRecursiveCall();
+    if (unlikely(!result) && unlikely(!PyErr_Occurred())) {
+        PyErr_SetString(
+            PyExc_SystemError,
+            "NULL result without error in PyObject_Call");
+    }
+    return result;
+}
+#endif
+
+/* PyObjectCallNoArg */
+  #if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func) {
+#if CYTHON_FAST_PYCALL
+    if (PyFunction_Check(func)) {
+        return __Pyx_PyFunction_FastCall(func, NULL, 0);
+    }
+#endif
+#if defined(__Pyx_CyFunction_USED) && defined(NDEBUG)
+    if (likely(PyCFunction_Check(func) || __Pyx_CyFunction_Check(func)))
+#else
+    if (likely(PyCFunction_Check(func)))
+#endif
+    {
+        if (likely(PyCFunction_GET_FLAGS(func) & METH_NOARGS)) {
+            return __Pyx_PyObject_CallMethO(func, NULL);
+        }
+    }
+    return __Pyx_PyObject_Call(func, __pyx_empty_tuple, NULL);
+}
+#endif
+
+/* PyCFunctionFastCall */
+  #if CYTHON_FAST_PYCCALL
+static CYTHON_INLINE PyObject * __Pyx_PyCFunction_FastCall(PyObject *func_obj, PyObject **args, Py_ssize_t nargs) {
+    PyCFunctionObject *func = (PyCFunctionObject*)func_obj;
+    PyCFunction meth = PyCFunction_GET_FUNCTION(func);
+    PyObject *self = PyCFunction_GET_SELF(func);
+    int flags = PyCFunction_GET_FLAGS(func);
+    assert(PyCFunction_Check(func));
+    assert(METH_FASTCALL == (flags & ~(METH_CLASS | METH_STATIC | METH_COEXIST | METH_KEYWORDS | METH_STACKLESS)));
+    assert(nargs >= 0);
+    assert(nargs == 0 || args != NULL);
+    /* _PyCFunction_FastCallDict() must not be called with an exception set,
+       because it may clear it (directly or indirectly) and so the
+       caller loses its exception */
+    assert(!PyErr_Occurred());
+    if ((PY_VERSION_HEX < 0x030700A0) || unlikely(flags & METH_KEYWORDS)) {
+        return (*((__Pyx_PyCFunctionFastWithKeywords)(void*)meth)) (self, args, nargs, NULL);
+    } else {
+        return (*((__Pyx_PyCFunctionFast)(void*)meth)) (self, args, nargs);
+    }
+}
+#endif
+
+/* PyObjectCallOneArg */
+  #if CYTHON_COMPILING_IN_CPYTHON
+static PyObject* __Pyx__PyObject_CallOneArg(PyObject *func, PyObject *arg) {
+    PyObject *result;
+    PyObject *args = PyTuple_New(1);
+    if (unlikely(!args)) return NULL;
+    Py_INCREF(arg);
+    PyTuple_SET_ITEM(args, 0, arg);
+    result = __Pyx_PyObject_Call(func, args, NULL);
+    Py_DECREF(args);
+    return result;
+}
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) {
+#if CYTHON_FAST_PYCALL
+    if (PyFunction_Check(func)) {
+        return __Pyx_PyFunction_FastCall(func, &arg, 1);
+    }
+#endif
+    if (likely(PyCFunction_Check(func))) {
+        if (likely(PyCFunction_GET_FLAGS(func) & METH_O)) {
+            return __Pyx_PyObject_CallMethO(func, arg);
+#if CYTHON_FAST_PYCCALL
+        } else if (__Pyx_PyFastCFunction_Check(func)) {
+            return __Pyx_PyCFunction_FastCall(func, &arg, 1);
+#endif
+        }
+    }
+    return __Pyx__PyObject_CallOneArg(func, arg);
+}
+#else
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) {
+    PyObject *result;
+    PyObject *args = PyTuple_Pack(1, arg);
+    if (unlikely(!args)) return NULL;
+    result = __Pyx_PyObject_Call(func, args, NULL);
+    Py_DECREF(args);
+    return result;
+}
+#endif
+
+/* PyObjectCall2Args */
+  static CYTHON_UNUSED PyObject* __Pyx_PyObject_Call2Args(PyObject* function, PyObject* arg1, PyObject* arg2) {
+    PyObject *args, *result = NULL;
+    #if CYTHON_FAST_PYCALL
+    if (PyFunction_Check(function)) {
+        PyObject *args[2] = {arg1, arg2};
+        return __Pyx_PyFunction_FastCall(function, args, 2);
+    }
+    #endif
+    #if CYTHON_FAST_PYCCALL
+    if (__Pyx_PyFastCFunction_Check(function)) {
+        PyObject *args[2] = {arg1, arg2};
+        return __Pyx_PyCFunction_FastCall(function, args, 2);
+    }
+    #endif
+    args = PyTuple_New(2);
+    if (unlikely(!args)) goto done;
+    Py_INCREF(arg1);
+    PyTuple_SET_ITEM(args, 0, arg1);
+    Py_INCREF(arg2);
+    PyTuple_SET_ITEM(args, 1, arg2);
+    Py_INCREF(function);
+    result = __Pyx_PyObject_Call(function, args, NULL);
+    Py_DECREF(args);
+    Py_DECREF(function);
+done:
+    return result;
+}
+
+/* BufferIndexError */
+  static void __Pyx_RaiseBufferIndexError(int axis) {
+  PyErr_Format(PyExc_IndexError,
+     "Out of bounds on buffer access (axis %d)", axis);
+}
+
+/* BufferFallbackError */
+  static void __Pyx_RaiseBufferFallbackError(void) {
+  PyErr_SetString(PyExc_ValueError,
+     "Buffer acquisition failed on assignment; and then reacquiring the old buffer failed too!");
+}
+
+/* PyErrFetchRestore */
+  #if CYTHON_FAST_THREAD_STATE
+static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) {
+    PyObject *tmp_type, *tmp_value, *tmp_tb;
+    tmp_type = tstate->curexc_type;
+    tmp_value = tstate->curexc_value;
+    tmp_tb = tstate->curexc_traceback;
+    tstate->curexc_type = type;
+    tstate->curexc_value = value;
+    tstate->curexc_traceback = tb;
+    Py_XDECREF(tmp_type);
+    Py_XDECREF(tmp_value);
+    Py_XDECREF(tmp_tb);
+}
+static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) {
+    *type = tstate->curexc_type;
+    *value = tstate->curexc_value;
+    *tb = tstate->curexc_traceback;
+    tstate->curexc_type = 0;
+    tstate->curexc_value = 0;
+    tstate->curexc_traceback = 0;
+}
+#endif
+
+/* GetTopmostException */
+  #if CYTHON_USE_EXC_INFO_STACK
+static _PyErr_StackItem *
+__Pyx_PyErr_GetTopmostException(PyThreadState *tstate)
+{
+    _PyErr_StackItem *exc_info = tstate->exc_info;
+    while ((exc_info->exc_type == NULL || exc_info->exc_type == Py_None) &&
+           exc_info->previous_item != NULL)
+    {
+        exc_info = exc_info->previous_item;
+    }
+    return exc_info;
+}
+#endif
+
+/* SaveResetException */
+  #if CYTHON_FAST_THREAD_STATE
+static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) {
+    #if CYTHON_USE_EXC_INFO_STACK
+    _PyErr_StackItem *exc_info = __Pyx_PyErr_GetTopmostException(tstate);
+    *type = exc_info->exc_type;
+    *value = exc_info->exc_value;
+    *tb = exc_info->exc_traceback;
+    #else
+    *type = tstate->exc_type;
+    *value = tstate->exc_value;
+    *tb = tstate->exc_traceback;
+    #endif
+    Py_XINCREF(*type);
+    Py_XINCREF(*value);
+    Py_XINCREF(*tb);
+}
+static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) {
+    PyObject *tmp_type, *tmp_value, *tmp_tb;
+    #if CYTHON_USE_EXC_INFO_STACK
+    _PyErr_StackItem *exc_info = tstate->exc_info;
+    tmp_type = exc_info->exc_type;
+    tmp_value = exc_info->exc_value;
+    tmp_tb = exc_info->exc_traceback;
+    exc_info->exc_type = type;
+    exc_info->exc_value = value;
+    exc_info->exc_traceback = tb;
+    #else
+    tmp_type = tstate->exc_type;
+    tmp_value = tstate->exc_value;
+    tmp_tb = tstate->exc_traceback;
+    tstate->exc_type = type;
+    tstate->exc_value = value;
+    tstate->exc_traceback = tb;
+    #endif
+    Py_XDECREF(tmp_type);
+    Py_XDECREF(tmp_value);
+    Py_XDECREF(tmp_tb);
+}
+#endif
+
+/* PyErrExceptionMatches */
+  #if CYTHON_FAST_THREAD_STATE
+static int __Pyx_PyErr_ExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) {
+    Py_ssize_t i, n;
+    n = PyTuple_GET_SIZE(tuple);
+#if PY_MAJOR_VERSION >= 3
+    for (i=0; i<n; i++) {
+        if (exc_type == PyTuple_GET_ITEM(tuple, i)) return 1;
+    }
+#endif
+    for (i=0; i<n; i++) {
+        if (__Pyx_PyErr_GivenExceptionMatches(exc_type, PyTuple_GET_ITEM(tuple, i))) return 1;
+    }
+    return 0;
+}
+static CYTHON_INLINE int __Pyx_PyErr_ExceptionMatchesInState(PyThreadState* tstate, PyObject* err) {
+    PyObject *exc_type = tstate->curexc_type;
+    if (exc_type == err) return 1;
+    if (unlikely(!exc_type)) return 0;
+    if (unlikely(PyTuple_Check(err)))
+        return __Pyx_PyErr_ExceptionMatchesTuple(exc_type, err);
+    return __Pyx_PyErr_GivenExceptionMatches(exc_type, err);
+}
+#endif
+
+/* GetException */
+  #if CYTHON_FAST_THREAD_STATE
+static int __Pyx__GetException(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb)
+#else
+static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb)
+#endif
+{
+    PyObject *local_type, *local_value, *local_tb;
+#if CYTHON_FAST_THREAD_STATE
+    PyObject *tmp_type, *tmp_value, *tmp_tb;
+    local_type = tstate->curexc_type;
+    local_value = tstate->curexc_value;
+    local_tb = tstate->curexc_traceback;
+    tstate->curexc_type = 0;
+    tstate->curexc_value = 0;
+    tstate->curexc_traceback = 0;
+#else
+    PyErr_Fetch(&local_type, &local_value, &local_tb);
+#endif
+    PyErr_NormalizeException(&local_type, &local_value, &local_tb);
+#if CYTHON_FAST_THREAD_STATE
+    if (unlikely(tstate->curexc_type))
+#else
+    if (unlikely(PyErr_Occurred()))
+#endif
+        goto bad;
+    #if PY_MAJOR_VERSION >= 3
+    if (local_tb) {
+        if (unlikely(PyException_SetTraceback(local_value, local_tb) < 0))
+            goto bad;
+    }
+    #endif
+    Py_XINCREF(local_tb);
+    Py_XINCREF(local_type);
+    Py_XINCREF(local_value);
+    *type = local_type;
+    *value = local_value;
+    *tb = local_tb;
+#if CYTHON_FAST_THREAD_STATE
+    #if CYTHON_USE_EXC_INFO_STACK
+    {
+        _PyErr_StackItem *exc_info = tstate->exc_info;
+        tmp_type = exc_info->exc_type;
+        tmp_value = exc_info->exc_value;
+        tmp_tb = exc_info->exc_traceback;
+        exc_info->exc_type = local_type;
+        exc_info->exc_value = local_value;
+        exc_info->exc_traceback = local_tb;
+    }
+    #else
+    tmp_type = tstate->exc_type;
+    tmp_value = tstate->exc_value;
+    tmp_tb = tstate->exc_traceback;
+    tstate->exc_type = local_type;
+    tstate->exc_value = local_value;
+    tstate->exc_traceback = local_tb;
+    #endif
+    Py_XDECREF(tmp_type);
+    Py_XDECREF(tmp_value);
+    Py_XDECREF(tmp_tb);
+#else
+    PyErr_SetExcInfo(local_type, local_value, local_tb);
+#endif
+    return 0;
+bad:
+    *type = 0;
+    *value = 0;
+    *tb = 0;
+    Py_XDECREF(local_type);
+    Py_XDECREF(local_value);
+    Py_XDECREF(local_tb);
+    return -1;
+}
+
+/* RaiseException */
+  #if PY_MAJOR_VERSION < 3
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb,
+                        CYTHON_UNUSED PyObject *cause) {
+    __Pyx_PyThreadState_declare
+    Py_XINCREF(type);
+    if (!value || value == Py_None)
+        value = NULL;
+    else
+        Py_INCREF(value);
+    if (!tb || tb == Py_None)
+        tb = NULL;
+    else {
+        Py_INCREF(tb);
+        if (!PyTraceBack_Check(tb)) {
+            PyErr_SetString(PyExc_TypeError,
+                "raise: arg 3 must be a traceback or None");
+            goto raise_error;
+        }
+    }
+    if (PyType_Check(type)) {
+#if CYTHON_COMPILING_IN_PYPY
+        if (!value) {
+            Py_INCREF(Py_None);
+            value = Py_None;
+        }
+#endif
+        PyErr_NormalizeException(&type, &value, &tb);
+    } else {
+        if (value) {
+            PyErr_SetString(PyExc_TypeError,
+                "instance exception may not have a separate value");
+            goto raise_error;
+        }
+        value = type;
+        type = (PyObject*) Py_TYPE(type);
+        Py_INCREF(type);
+        if (!PyType_IsSubtype((PyTypeObject *)type, (PyTypeObject *)PyExc_BaseException)) {
+            PyErr_SetString(PyExc_TypeError,
+                "raise: exception class must be a subclass of BaseException");
+            goto raise_error;
+        }
+    }
+    __Pyx_PyThreadState_assign
+    __Pyx_ErrRestore(type, value, tb);
+    return;
+raise_error:
+    Py_XDECREF(value);
+    Py_XDECREF(type);
+    Py_XDECREF(tb);
+    return;
+}
+#else
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause) {
+    PyObject* owned_instance = NULL;
+    if (tb == Py_None) {
+        tb = 0;
+    } else if (tb && !PyTraceBack_Check(tb)) {
+        PyErr_SetString(PyExc_TypeError,
+            "raise: arg 3 must be a traceback or None");
+        goto bad;
+    }
+    if (value == Py_None)
+        value = 0;
+    if (PyExceptionInstance_Check(type)) {
+        if (value) {
+            PyErr_SetString(PyExc_TypeError,
+                "instance exception may not have a separate value");
+            goto bad;
+        }
+        value = type;
+        type = (PyObject*) Py_TYPE(value);
+    } else if (PyExceptionClass_Check(type)) {
+        PyObject *instance_class = NULL;
+        if (value && PyExceptionInstance_Check(value)) {
+            instance_class = (PyObject*) Py_TYPE(value);
+            if (instance_class != type) {
+                int is_subclass = PyObject_IsSubclass(instance_class, type);
+                if (!is_subclass) {
+                    instance_class = NULL;
+                } else if (unlikely(is_subclass == -1)) {
+                    goto bad;
+                } else {
+                    type = instance_class;
+                }
+            }
+        }
+        if (!instance_class) {
+            PyObject *args;
+            if (!value)
+                args = PyTuple_New(0);
+            else if (PyTuple_Check(value)) {
+                Py_INCREF(value);
+                args = value;
+            } else
+                args = PyTuple_Pack(1, value);
+            if (!args)
+                goto bad;
+            owned_instance = PyObject_Call(type, args, NULL);
+            Py_DECREF(args);
+            if (!owned_instance)
+                goto bad;
+            value = owned_instance;
+            if (!PyExceptionInstance_Check(value)) {
+                PyErr_Format(PyExc_TypeError,
+                             "calling %R should have returned an instance of "
+                             "BaseException, not %R",
+                             type, Py_TYPE(value));
+                goto bad;
+            }
+        }
+    } else {
+        PyErr_SetString(PyExc_TypeError,
+            "raise: exception class must be a subclass of BaseException");
+        goto bad;
+    }
+    if (cause) {
+        PyObject *fixed_cause;
+        if (cause == Py_None) {
+            fixed_cause = NULL;
+        } else if (PyExceptionClass_Check(cause)) {
+            fixed_cause = PyObject_CallObject(cause, NULL);
+            if (fixed_cause == NULL)
+                goto bad;
+        } else if (PyExceptionInstance_Check(cause)) {
+            fixed_cause = cause;
+            Py_INCREF(fixed_cause);
+        } else {
+            PyErr_SetString(PyExc_TypeError,
+                            "exception causes must derive from "
+                            "BaseException");
+            goto bad;
+        }
+        PyException_SetCause(value, fixed_cause);
+    }
+    PyErr_SetObject(type, value);
+    if (tb) {
+#if CYTHON_COMPILING_IN_PYPY
+        PyObject *tmp_type, *tmp_value, *tmp_tb;
+        PyErr_Fetch(&tmp_type, &tmp_value, &tmp_tb);
+        Py_INCREF(tb);
+        PyErr_Restore(tmp_type, tmp_value, tb);
+        Py_XDECREF(tmp_tb);
+#else
+        PyThreadState *tstate = __Pyx_PyThreadState_Current;
+        PyObject* tmp_tb = tstate->curexc_traceback;
+        if (tb != tmp_tb) {
+            Py_INCREF(tb);
+            tstate->curexc_traceback = tb;
+            Py_XDECREF(tmp_tb);
+        }
+#endif
+    }
+bad:
+    Py_XDECREF(owned_instance);
+    return;
+}
+#endif
+
+/* TypeImport */
+  #ifndef __PYX_HAVE_RT_ImportType
+#define __PYX_HAVE_RT_ImportType
+static PyTypeObject *__Pyx_ImportType(PyObject *module, const char *module_name, const char *class_name,
+    size_t size, enum __Pyx_ImportType_CheckSize check_size)
+{
+    PyObject *result = 0;
+    char warning[200];
+    Py_ssize_t basicsize;
+#ifdef Py_LIMITED_API
+    PyObject *py_basicsize;
+#endif
+    result = PyObject_GetAttrString(module, class_name);
+    if (!result)
+        goto bad;
+    if (!PyType_Check(result)) {
+        PyErr_Format(PyExc_TypeError,
+            "%.200s.%.200s is not a type object",
+            module_name, class_name);
+        goto bad;
+    }
+#ifndef Py_LIMITED_API
+    basicsize = ((PyTypeObject *)result)->tp_basicsize;
+#else
+    py_basicsize = PyObject_GetAttrString(result, "__basicsize__");
+    if (!py_basicsize)
+        goto bad;
+    basicsize = PyLong_AsSsize_t(py_basicsize);
+    Py_DECREF(py_basicsize);
+    py_basicsize = 0;
+    if (basicsize == (Py_ssize_t)-1 && PyErr_Occurred())
+        goto bad;
+#endif
+    if ((size_t)basicsize < size) {
+        PyErr_Format(PyExc_ValueError,
+            "%.200s.%.200s size changed, may indicate binary incompatibility. "
+            "Expected %zd from C header, got %zd from PyObject",
+            module_name, class_name, size, basicsize);
+        goto bad;
+    }
+    if (check_size == __Pyx_ImportType_CheckSize_Error && (size_t)basicsize != size) {
+        PyErr_Format(PyExc_ValueError,
+            "%.200s.%.200s size changed, may indicate binary incompatibility. "
+            "Expected %zd from C header, got %zd from PyObject",
+            module_name, class_name, size, basicsize);
+        goto bad;
+    }
+    else if (check_size == __Pyx_ImportType_CheckSize_Warn && (size_t)basicsize > size) {
+        PyOS_snprintf(warning, sizeof(warning),
+            "%s.%s size changed, may indicate binary incompatibility. "
+            "Expected %zd from C header, got %zd from PyObject",
+            module_name, class_name, size, basicsize);
+        if (PyErr_WarnEx(NULL, warning, 0) < 0) goto bad;
+    }
+    return (PyTypeObject *)result;
+bad:
+    Py_XDECREF(result);
+    return NULL;
+}
+#endif
+
+/* Import */
+  static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) {
+    PyObject *empty_list = 0;
+    PyObject *module = 0;
+    PyObject *global_dict = 0;
+    PyObject *empty_dict = 0;
+    PyObject *list;
+    #if PY_MAJOR_VERSION < 3
+    PyObject *py_import;
+    py_import = __Pyx_PyObject_GetAttrStr(__pyx_b, __pyx_n_s_import);
+    if (!py_import)
+        goto bad;
+    #endif
+    if (from_list)
+        list = from_list;
+    else {
+        empty_list = PyList_New(0);
+        if (!empty_list)
+            goto bad;
+        list = empty_list;
+    }
+    global_dict = PyModule_GetDict(__pyx_m);
+    if (!global_dict)
+        goto bad;
+    empty_dict = PyDict_New();
+    if (!empty_dict)
+        goto bad;
+    {
+        #if PY_MAJOR_VERSION >= 3
+        if (level == -1) {
+            if ((1) && (strchr(__Pyx_MODULE_NAME, '.'))) {
+                module = PyImport_ImportModuleLevelObject(
+                    name, global_dict, empty_dict, list, 1);
+                if (!module) {
+                    if (!PyErr_ExceptionMatches(PyExc_ImportError))
+                        goto bad;
+                    PyErr_Clear();
+                }
+            }
+            level = 0;
+        }
+        #endif
+        if (!module) {
+            #if PY_MAJOR_VERSION < 3
+            PyObject *py_level = PyInt_FromLong(level);
+            if (!py_level)
+                goto bad;
+            module = PyObject_CallFunctionObjArgs(py_import,
+                name, global_dict, empty_dict, list, py_level, (PyObject *)NULL);
+            Py_DECREF(py_level);
+            #else
+            module = PyImport_ImportModuleLevelObject(
+                name, global_dict, empty_dict, list, level);
+            #endif
+        }
+    }
+bad:
+    #if PY_MAJOR_VERSION < 3
+    Py_XDECREF(py_import);
+    #endif
+    Py_XDECREF(empty_list);
+    Py_XDECREF(empty_dict);
+    return module;
+}
+
+/* CLineInTraceback */
+  #ifndef CYTHON_CLINE_IN_TRACEBACK
+static int __Pyx_CLineForTraceback(CYTHON_UNUSED PyThreadState *tstate, int c_line) {
+    PyObject *use_cline;
+    PyObject *ptype, *pvalue, *ptraceback;
+#if CYTHON_COMPILING_IN_CPYTHON
+    PyObject **cython_runtime_dict;
+#endif
+    if (unlikely(!__pyx_cython_runtime)) {
+        return c_line;
+    }
+    __Pyx_ErrFetchInState(tstate, &ptype, &pvalue, &ptraceback);
+#if CYTHON_COMPILING_IN_CPYTHON
+    cython_runtime_dict = _PyObject_GetDictPtr(__pyx_cython_runtime);
+    if (likely(cython_runtime_dict)) {
+        __PYX_PY_DICT_LOOKUP_IF_MODIFIED(
+            use_cline, *cython_runtime_dict,
+            __Pyx_PyDict_GetItemStr(*cython_runtime_dict, __pyx_n_s_cline_in_traceback))
+    } else
+#endif
+    {
+      PyObject *use_cline_obj = __Pyx_PyObject_GetAttrStr(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback);
+      if (use_cline_obj) {
+        use_cline = PyObject_Not(use_cline_obj) ? Py_False : Py_True;
+        Py_DECREF(use_cline_obj);
+      } else {
+        PyErr_Clear();
+        use_cline = NULL;
+      }
+    }
+    if (!use_cline) {
+        c_line = 0;
+        (void) PyObject_SetAttr(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback, Py_False);
+    }
+    else if (use_cline == Py_False || (use_cline != Py_True && PyObject_Not(use_cline) != 0)) {
+        c_line = 0;
+    }
+    __Pyx_ErrRestoreInState(tstate, ptype, pvalue, ptraceback);
+    return c_line;
+}
+#endif
+
+/* CodeObjectCache */
+  static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) {
+    int start = 0, mid = 0, end = count - 1;
+    if (end >= 0 && code_line > entries[end].code_line) {
+        return count;
+    }
+    while (start < end) {
+        mid = start + (end - start) / 2;
+        if (code_line < entries[mid].code_line) {
+            end = mid;
+        } else if (code_line > entries[mid].code_line) {
+             start = mid + 1;
+        } else {
+            return mid;
+        }
+    }
+    if (code_line <= entries[mid].code_line) {
+        return mid;
+    } else {
+        return mid + 1;
+    }
+}
+static PyCodeObject *__pyx_find_code_object(int code_line) {
+    PyCodeObject* code_object;
+    int pos;
+    if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) {
+        return NULL;
+    }
+    pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line);
+    if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) {
+        return NULL;
+    }
+    code_object = __pyx_code_cache.entries[pos].code_object;
+    Py_INCREF(code_object);
+    return code_object;
+}
+static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) {
+    int pos, i;
+    __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries;
+    if (unlikely(!code_line)) {
+        return;
+    }
+    if (unlikely(!entries)) {
+        entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry));
+        if (likely(entries)) {
+            __pyx_code_cache.entries = entries;
+            __pyx_code_cache.max_count = 64;
+            __pyx_code_cache.count = 1;
+            entries[0].code_line = code_line;
+            entries[0].code_object = code_object;
+            Py_INCREF(code_object);
+        }
+        return;
+    }
+    pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line);
+    if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) {
+        PyCodeObject* tmp = entries[pos].code_object;
+        entries[pos].code_object = code_object;
+        Py_DECREF(tmp);
+        return;
+    }
+    if (__pyx_code_cache.count == __pyx_code_cache.max_count) {
+        int new_max = __pyx_code_cache.max_count + 64;
+        entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc(
+            __pyx_code_cache.entries, ((size_t)new_max) * sizeof(__Pyx_CodeObjectCacheEntry));
+        if (unlikely(!entries)) {
+            return;
+        }
+        __pyx_code_cache.entries = entries;
+        __pyx_code_cache.max_count = new_max;
+    }
+    for (i=__pyx_code_cache.count; i>pos; i--) {
+        entries[i] = entries[i-1];
+    }
+    entries[pos].code_line = code_line;
+    entries[pos].code_object = code_object;
+    __pyx_code_cache.count++;
+    Py_INCREF(code_object);
+}
+
+/* AddTraceback */
+  #include "compile.h"
+#include "frameobject.h"
+#include "traceback.h"
+#if PY_VERSION_HEX >= 0x030b00a6
+  #ifndef Py_BUILD_CORE
+    #define Py_BUILD_CORE 1
+  #endif
+  #include "internal/pycore_frame.h"
+#endif
+static PyCodeObject* __Pyx_CreateCodeObjectForTraceback(
+            const char *funcname, int c_line,
+            int py_line, const char *filename) {
+    PyCodeObject *py_code = NULL;
+    PyObject *py_funcname = NULL;
+    #if PY_MAJOR_VERSION < 3
+    PyObject *py_srcfile = NULL;
+    py_srcfile = PyString_FromString(filename);
+    if (!py_srcfile) goto bad;
+    #endif
+    if (c_line) {
+        #if PY_MAJOR_VERSION < 3
+        py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line);
+        if (!py_funcname) goto bad;
+        #else
+        py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line);
+        if (!py_funcname) goto bad;
+        funcname = PyUnicode_AsUTF8(py_funcname);
+        if (!funcname) goto bad;
+        #endif
+    }
+    else {
+        #if PY_MAJOR_VERSION < 3
+        py_funcname = PyString_FromString(funcname);
+        if (!py_funcname) goto bad;
+        #endif
+    }
+    #if PY_MAJOR_VERSION < 3
+    py_code = __Pyx_PyCode_New(
+        0,
+        0,
+        0,
+        0,
+        0,
+        __pyx_empty_bytes, /*PyObject *code,*/
+        __pyx_empty_tuple, /*PyObject *consts,*/
+        __pyx_empty_tuple, /*PyObject *names,*/
+        __pyx_empty_tuple, /*PyObject *varnames,*/
+        __pyx_empty_tuple, /*PyObject *freevars,*/
+        __pyx_empty_tuple, /*PyObject *cellvars,*/
+        py_srcfile,   /*PyObject *filename,*/
+        py_funcname,  /*PyObject *name,*/
+        py_line,
+        __pyx_empty_bytes  /*PyObject *lnotab*/
+    );
+    Py_DECREF(py_srcfile);
+    #else
+    py_code = PyCode_NewEmpty(filename, funcname, py_line);
+    #endif
+    Py_XDECREF(py_funcname);  // XDECREF since it's only set on Py3 if cline
+    return py_code;
+bad:
+    Py_XDECREF(py_funcname);
+    #if PY_MAJOR_VERSION < 3
+    Py_XDECREF(py_srcfile);
+    #endif
+    return NULL;
+}
+static void __Pyx_AddTraceback(const char *funcname, int c_line,
+                               int py_line, const char *filename) {
+    PyCodeObject *py_code = 0;
+    PyFrameObject *py_frame = 0;
+    PyThreadState *tstate = __Pyx_PyThreadState_Current;
+    PyObject *ptype, *pvalue, *ptraceback;
+    if (c_line) {
+        c_line = __Pyx_CLineForTraceback(tstate, c_line);
+    }
+    py_code = __pyx_find_code_object(c_line ? -c_line : py_line);
+    if (!py_code) {
+        __Pyx_ErrFetchInState(tstate, &ptype, &pvalue, &ptraceback);
+        py_code = __Pyx_CreateCodeObjectForTraceback(
+            funcname, c_line, py_line, filename);
+        if (!py_code) {
+            /* If the code object creation fails, then we should clear the
+               fetched exception references and propagate the new exception */
+            Py_XDECREF(ptype);
+            Py_XDECREF(pvalue);
+            Py_XDECREF(ptraceback);
+            goto bad;
+        }
+        __Pyx_ErrRestoreInState(tstate, ptype, pvalue, ptraceback);
+        __pyx_insert_code_object(c_line ? -c_line : py_line, py_code);
+    }
+    py_frame = PyFrame_New(
+        tstate,            /*PyThreadState *tstate,*/
+        py_code,           /*PyCodeObject *code,*/
+        __pyx_d,    /*PyObject *globals,*/
+        0                  /*PyObject *locals*/
+    );
+    if (!py_frame) goto bad;
+    __Pyx_PyFrame_SetLineNumber(py_frame, py_line);
+    PyTraceBack_Here(py_frame);
+bad:
+    Py_XDECREF(py_code);
+    Py_XDECREF(py_frame);
+}
+
+#if PY_MAJOR_VERSION < 3
+static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags) {
+    if (PyObject_CheckBuffer(obj)) return PyObject_GetBuffer(obj, view, flags);
+    PyErr_Format(PyExc_TypeError, "'%.200s' does not have the buffer interface", Py_TYPE(obj)->tp_name);
+    return -1;
+}
+static void __Pyx_ReleaseBuffer(Py_buffer *view) {
+    PyObject *obj = view->obj;
+    if (!obj) return;
+    if (PyObject_CheckBuffer(obj)) {
+        PyBuffer_Release(view);
+        return;
+    }
+    if ((0)) {}
+    view->obj = NULL;
+    Py_DECREF(obj);
+}
+#endif
+
+
+  /* CIntFromPyVerify */
+  #define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value)\
+    __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 0)
+#define __PYX_VERIFY_RETURN_INT_EXC(target_type, func_type, func_value)\
+    __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 1)
+#define __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, exc)\
+    {\
+        func_type value = func_value;\
+        if (sizeof(target_type) < sizeof(func_type)) {\
+            if (unlikely(value != (func_type) (target_type) value)) {\
+                func_type zero = 0;\
+                if (exc && unlikely(value == (func_type)-1 && PyErr_Occurred()))\
+                    return (target_type) -1;\
+                if (is_unsigned && unlikely(value < zero))\
+                    goto raise_neg_overflow;\
+                else\
+                    goto raise_overflow;\
+            }\
+        }\
+        return (target_type) value;\
+    }
+
+/* Declarations */
+  #if CYTHON_CCOMPLEX
+  #ifdef __cplusplus
+    static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) {
+      return ::std::complex< float >(x, y);
+    }
+  #else
+    static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) {
+      return x + y*(__pyx_t_float_complex)_Complex_I;
+    }
+  #endif
+#else
+    static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) {
+      __pyx_t_float_complex z;
+      z.real = x;
+      z.imag = y;
+      return z;
+    }
+#endif
+
+/* Arithmetic */
+  #if CYTHON_CCOMPLEX
+#else
+    static CYTHON_INLINE int __Pyx_c_eq_float(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+       return (a.real == b.real) && (a.imag == b.imag);
+    }
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_sum_float(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+        __pyx_t_float_complex z;
+        z.real = a.real + b.real;
+        z.imag = a.imag + b.imag;
+        return z;
+    }
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_diff_float(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+        __pyx_t_float_complex z;
+        z.real = a.real - b.real;
+        z.imag = a.imag - b.imag;
+        return z;
+    }
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_prod_float(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+        __pyx_t_float_complex z;
+        z.real = a.real * b.real - a.imag * b.imag;
+        z.imag = a.real * b.imag + a.imag * b.real;
+        return z;
+    }
+    #if 1
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quot_float(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+        if (b.imag == 0) {
+            return __pyx_t_float_complex_from_parts(a.real / b.real, a.imag / b.real);
+        } else if (fabsf(b.real) >= fabsf(b.imag)) {
+            if (b.real == 0 && b.imag == 0) {
+                return __pyx_t_float_complex_from_parts(a.real / b.real, a.imag / b.imag);
+            } else {
+                float r = b.imag / b.real;
+                float s = (float)(1.0) / (b.real + b.imag * r);
+                return __pyx_t_float_complex_from_parts(
+                    (a.real + a.imag * r) * s, (a.imag - a.real * r) * s);
+            }
+        } else {
+            float r = b.real / b.imag;
+            float s = (float)(1.0) / (b.imag + b.real * r);
+            return __pyx_t_float_complex_from_parts(
+                (a.real * r + a.imag) * s, (a.imag * r - a.real) * s);
+        }
+    }
+    #else
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quot_float(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+        if (b.imag == 0) {
+            return __pyx_t_float_complex_from_parts(a.real / b.real, a.imag / b.real);
+        } else {
+            float denom = b.real * b.real + b.imag * b.imag;
+            return __pyx_t_float_complex_from_parts(
+                (a.real * b.real + a.imag * b.imag) / denom,
+                (a.imag * b.real - a.real * b.imag) / denom);
+        }
+    }
+    #endif
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_neg_float(__pyx_t_float_complex a) {
+        __pyx_t_float_complex z;
+        z.real = -a.real;
+        z.imag = -a.imag;
+        return z;
+    }
+    static CYTHON_INLINE int __Pyx_c_is_zero_float(__pyx_t_float_complex a) {
+       return (a.real == 0) && (a.imag == 0);
+    }
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_conj_float(__pyx_t_float_complex a) {
+        __pyx_t_float_complex z;
+        z.real =  a.real;
+        z.imag = -a.imag;
+        return z;
+    }
+    #if 1
+        static CYTHON_INLINE float __Pyx_c_abs_float(__pyx_t_float_complex z) {
+          #if !defined(HAVE_HYPOT) || defined(_MSC_VER)
+            return sqrtf(z.real*z.real + z.imag*z.imag);
+          #else
+            return hypotf(z.real, z.imag);
+          #endif
+        }
+        static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_pow_float(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+            __pyx_t_float_complex z;
+            float r, lnr, theta, z_r, z_theta;
+            if (b.imag == 0 && b.real == (int)b.real) {
+                if (b.real < 0) {
+                    float denom = a.real * a.real + a.imag * a.imag;
+                    a.real = a.real / denom;
+                    a.imag = -a.imag / denom;
+                    b.real = -b.real;
+                }
+                switch ((int)b.real) {
+                    case 0:
+                        z.real = 1;
+                        z.imag = 0;
+                        return z;
+                    case 1:
+                        return a;
+                    case 2:
+                        return __Pyx_c_prod_float(a, a);
+                    case 3:
+                        z = __Pyx_c_prod_float(a, a);
+                        return __Pyx_c_prod_float(z, a);
+                    case 4:
+                        z = __Pyx_c_prod_float(a, a);
+                        return __Pyx_c_prod_float(z, z);
+                }
+            }
+            if (a.imag == 0) {
+                if (a.real == 0) {
+                    return a;
+                } else if ((b.imag == 0) && (a.real >= 0)) {
+                    z.real = powf(a.real, b.real);
+                    z.imag = 0;
+                    return z;
+                } else if (a.real > 0) {
+                    r = a.real;
+                    theta = 0;
+                } else {
+                    r = -a.real;
+                    theta = atan2f(0.0, -1.0);
+                }
+            } else {
+                r = __Pyx_c_abs_float(a);
+                theta = atan2f(a.imag, a.real);
+            }
+            lnr = logf(r);
+            z_r = expf(lnr * b.real - theta * b.imag);
+            z_theta = theta * b.real + lnr * b.imag;
+            z.real = z_r * cosf(z_theta);
+            z.imag = z_r * sinf(z_theta);
+            return z;
+        }
+    #endif
+#endif
+
+/* Declarations */
+  #if CYTHON_CCOMPLEX
+  #ifdef __cplusplus
+    static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) {
+      return ::std::complex< double >(x, y);
+    }
+  #else
+    static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) {
+      return x + y*(__pyx_t_double_complex)_Complex_I;
+    }
+  #endif
+#else
+    static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) {
+      __pyx_t_double_complex z;
+      z.real = x;
+      z.imag = y;
+      return z;
+    }
+#endif
+
+/* Arithmetic */
+  #if CYTHON_CCOMPLEX
+#else
+    static CYTHON_INLINE int __Pyx_c_eq_double(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+       return (a.real == b.real) && (a.imag == b.imag);
+    }
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_sum_double(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+        __pyx_t_double_complex z;
+        z.real = a.real + b.real;
+        z.imag = a.imag + b.imag;
+        return z;
+    }
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_diff_double(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+        __pyx_t_double_complex z;
+        z.real = a.real - b.real;
+        z.imag = a.imag - b.imag;
+        return z;
+    }
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_prod_double(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+        __pyx_t_double_complex z;
+        z.real = a.real * b.real - a.imag * b.imag;
+        z.imag = a.real * b.imag + a.imag * b.real;
+        return z;
+    }
+    #if 1
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot_double(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+        if (b.imag == 0) {
+            return __pyx_t_double_complex_from_parts(a.real / b.real, a.imag / b.real);
+        } else if (fabs(b.real) >= fabs(b.imag)) {
+            if (b.real == 0 && b.imag == 0) {
+                return __pyx_t_double_complex_from_parts(a.real / b.real, a.imag / b.imag);
+            } else {
+                double r = b.imag / b.real;
+                double s = (double)(1.0) / (b.real + b.imag * r);
+                return __pyx_t_double_complex_from_parts(
+                    (a.real + a.imag * r) * s, (a.imag - a.real * r) * s);
+            }
+        } else {
+            double r = b.real / b.imag;
+            double s = (double)(1.0) / (b.imag + b.real * r);
+            return __pyx_t_double_complex_from_parts(
+                (a.real * r + a.imag) * s, (a.imag * r - a.real) * s);
+        }
+    }
+    #else
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot_double(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+        if (b.imag == 0) {
+            return __pyx_t_double_complex_from_parts(a.real / b.real, a.imag / b.real);
+        } else {
+            double denom = b.real * b.real + b.imag * b.imag;
+            return __pyx_t_double_complex_from_parts(
+                (a.real * b.real + a.imag * b.imag) / denom,
+                (a.imag * b.real - a.real * b.imag) / denom);
+        }
+    }
+    #endif
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_neg_double(__pyx_t_double_complex a) {
+        __pyx_t_double_complex z;
+        z.real = -a.real;
+        z.imag = -a.imag;
+        return z;
+    }
+    static CYTHON_INLINE int __Pyx_c_is_zero_double(__pyx_t_double_complex a) {
+       return (a.real == 0) && (a.imag == 0);
+    }
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_conj_double(__pyx_t_double_complex a) {
+        __pyx_t_double_complex z;
+        z.real =  a.real;
+        z.imag = -a.imag;
+        return z;
+    }
+    #if 1
+        static CYTHON_INLINE double __Pyx_c_abs_double(__pyx_t_double_complex z) {
+          #if !defined(HAVE_HYPOT) || defined(_MSC_VER)
+            return sqrt(z.real*z.real + z.imag*z.imag);
+          #else
+            return hypot(z.real, z.imag);
+          #endif
+        }
+        static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_pow_double(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+            __pyx_t_double_complex z;
+            double r, lnr, theta, z_r, z_theta;
+            if (b.imag == 0 && b.real == (int)b.real) {
+                if (b.real < 0) {
+                    double denom = a.real * a.real + a.imag * a.imag;
+                    a.real = a.real / denom;
+                    a.imag = -a.imag / denom;
+                    b.real = -b.real;
+                }
+                switch ((int)b.real) {
+                    case 0:
+                        z.real = 1;
+                        z.imag = 0;
+                        return z;
+                    case 1:
+                        return a;
+                    case 2:
+                        return __Pyx_c_prod_double(a, a);
+                    case 3:
+                        z = __Pyx_c_prod_double(a, a);
+                        return __Pyx_c_prod_double(z, a);
+                    case 4:
+                        z = __Pyx_c_prod_double(a, a);
+                        return __Pyx_c_prod_double(z, z);
+                }
+            }
+            if (a.imag == 0) {
+                if (a.real == 0) {
+                    return a;
+                } else if ((b.imag == 0) && (a.real >= 0)) {
+                    z.real = pow(a.real, b.real);
+                    z.imag = 0;
+                    return z;
+                } else if (a.real > 0) {
+                    r = a.real;
+                    theta = 0;
+                } else {
+                    r = -a.real;
+                    theta = atan2(0.0, -1.0);
+                }
+            } else {
+                r = __Pyx_c_abs_double(a);
+                theta = atan2(a.imag, a.real);
+            }
+            lnr = log(r);
+            z_r = exp(lnr * b.real - theta * b.imag);
+            z_theta = theta * b.real + lnr * b.imag;
+            z.real = z_r * cos(z_theta);
+            z.imag = z_r * sin(z_theta);
+            return z;
+        }
+    #endif
+#endif
+
+/* CIntFromPy */
+  static CYTHON_INLINE npy_int32 __Pyx_PyInt_As_npy_int32(PyObject *x) {
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+    const npy_int32 neg_one = (npy_int32) -1, const_zero = (npy_int32) 0;
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic pop
+#endif
+    const int is_unsigned = neg_one > const_zero;
+#if PY_MAJOR_VERSION < 3
+    if (likely(PyInt_Check(x))) {
+        if (sizeof(npy_int32) < sizeof(long)) {
+            __PYX_VERIFY_RETURN_INT(npy_int32, long, PyInt_AS_LONG(x))
+        } else {
+            long val = PyInt_AS_LONG(x);
+            if (is_unsigned && unlikely(val < 0)) {
+                goto raise_neg_overflow;
+            }
+            return (npy_int32) val;
+        }
+    } else
+#endif
+    if (likely(PyLong_Check(x))) {
+        if (is_unsigned) {
+#if CYTHON_USE_PYLONG_INTERNALS
+            const digit* digits = ((PyLongObject*)x)->ob_digit;
+            switch (Py_SIZE(x)) {
+                case  0: return (npy_int32) 0;
+                case  1: __PYX_VERIFY_RETURN_INT(npy_int32, digit, digits[0])
+                case 2:
+                    if (8 * sizeof(npy_int32) > 1 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(npy_int32, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(npy_int32) >= 2 * PyLong_SHIFT) {
+                            return (npy_int32) (((((npy_int32)digits[1]) << PyLong_SHIFT) | (npy_int32)digits[0]));
+                        }
+                    }
+                    break;
+                case 3:
+                    if (8 * sizeof(npy_int32) > 2 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(npy_int32, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(npy_int32) >= 3 * PyLong_SHIFT) {
+                            return (npy_int32) (((((((npy_int32)digits[2]) << PyLong_SHIFT) | (npy_int32)digits[1]) << PyLong_SHIFT) | (npy_int32)digits[0]));
+                        }
+                    }
+                    break;
+                case 4:
+                    if (8 * sizeof(npy_int32) > 3 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(npy_int32, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(npy_int32) >= 4 * PyLong_SHIFT) {
+                            return (npy_int32) (((((((((npy_int32)digits[3]) << PyLong_SHIFT) | (npy_int32)digits[2]) << PyLong_SHIFT) | (npy_int32)digits[1]) << PyLong_SHIFT) | (npy_int32)digits[0]));
+                        }
+                    }
+                    break;
+            }
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON
+            if (unlikely(Py_SIZE(x) < 0)) {
+                goto raise_neg_overflow;
+            }
+#else
+            {
+                int result = PyObject_RichCompareBool(x, Py_False, Py_LT);
+                if (unlikely(result < 0))
+                    return (npy_int32) -1;
+                if (unlikely(result == 1))
+                    goto raise_neg_overflow;
+            }
+#endif
+            if (sizeof(npy_int32) <= sizeof(unsigned long)) {
+                __PYX_VERIFY_RETURN_INT_EXC(npy_int32, unsigned long, PyLong_AsUnsignedLong(x))
+#ifdef HAVE_LONG_LONG
+            } else if (sizeof(npy_int32) <= sizeof(unsigned PY_LONG_LONG)) {
+                __PYX_VERIFY_RETURN_INT_EXC(npy_int32, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x))
+#endif
+            }
+        } else {
+#if CYTHON_USE_PYLONG_INTERNALS
+            const digit* digits = ((PyLongObject*)x)->ob_digit;
+            switch (Py_SIZE(x)) {
+                case  0: return (npy_int32) 0;
+                case -1: __PYX_VERIFY_RETURN_INT(npy_int32, sdigit, (sdigit) (-(sdigit)digits[0]))
+                case  1: __PYX_VERIFY_RETURN_INT(npy_int32,  digit, +digits[0])
+                case -2:
+                    if (8 * sizeof(npy_int32) - 1 > 1 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(npy_int32, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(npy_int32) - 1 > 2 * PyLong_SHIFT) {
+                            return (npy_int32) (((npy_int32)-1)*(((((npy_int32)digits[1]) << PyLong_SHIFT) | (npy_int32)digits[0])));
+                        }
+                    }
+                    break;
+                case 2:
+                    if (8 * sizeof(npy_int32) > 1 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(npy_int32, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(npy_int32) - 1 > 2 * PyLong_SHIFT) {
+                            return (npy_int32) ((((((npy_int32)digits[1]) << PyLong_SHIFT) | (npy_int32)digits[0])));
+                        }
+                    }
+                    break;
+                case -3:
+                    if (8 * sizeof(npy_int32) - 1 > 2 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(npy_int32, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(npy_int32) - 1 > 3 * PyLong_SHIFT) {
+                            return (npy_int32) (((npy_int32)-1)*(((((((npy_int32)digits[2]) << PyLong_SHIFT) | (npy_int32)digits[1]) << PyLong_SHIFT) | (npy_int32)digits[0])));
+                        }
+                    }
+                    break;
+                case 3:
+                    if (8 * sizeof(npy_int32) > 2 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(npy_int32, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(npy_int32) - 1 > 3 * PyLong_SHIFT) {
+                            return (npy_int32) ((((((((npy_int32)digits[2]) << PyLong_SHIFT) | (npy_int32)digits[1]) << PyLong_SHIFT) | (npy_int32)digits[0])));
+                        }
+                    }
+                    break;
+                case -4:
+                    if (8 * sizeof(npy_int32) - 1 > 3 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(npy_int32, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(npy_int32) - 1 > 4 * PyLong_SHIFT) {
+                            return (npy_int32) (((npy_int32)-1)*(((((((((npy_int32)digits[3]) << PyLong_SHIFT) | (npy_int32)digits[2]) << PyLong_SHIFT) | (npy_int32)digits[1]) << PyLong_SHIFT) | (npy_int32)digits[0])));
+                        }
+                    }
+                    break;
+                case 4:
+                    if (8 * sizeof(npy_int32) > 3 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(npy_int32, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(npy_int32) - 1 > 4 * PyLong_SHIFT) {
+                            return (npy_int32) ((((((((((npy_int32)digits[3]) << PyLong_SHIFT) | (npy_int32)digits[2]) << PyLong_SHIFT) | (npy_int32)digits[1]) << PyLong_SHIFT) | (npy_int32)digits[0])));
+                        }
+                    }
+                    break;
+            }
+#endif
+            if (sizeof(npy_int32) <= sizeof(long)) {
+                __PYX_VERIFY_RETURN_INT_EXC(npy_int32, long, PyLong_AsLong(x))
+#ifdef HAVE_LONG_LONG
+            } else if (sizeof(npy_int32) <= sizeof(PY_LONG_LONG)) {
+                __PYX_VERIFY_RETURN_INT_EXC(npy_int32, PY_LONG_LONG, PyLong_AsLongLong(x))
+#endif
+            }
+        }
+        {
+#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray)
+            PyErr_SetString(PyExc_RuntimeError,
+                            "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers");
+#else
+            npy_int32 val;
+            PyObject *v = __Pyx_PyNumber_IntOrLong(x);
+ #if PY_MAJOR_VERSION < 3
+            if (likely(v) && !PyLong_Check(v)) {
+                PyObject *tmp = v;
+                v = PyNumber_Long(tmp);
+                Py_DECREF(tmp);
+            }
+ #endif
+            if (likely(v)) {
+                int one = 1; int is_little = (int)*(unsigned char *)&one;
+                unsigned char *bytes = (unsigned char *)&val;
+                int ret = _PyLong_AsByteArray((PyLongObject *)v,
+                                              bytes, sizeof(val),
+                                              is_little, !is_unsigned);
+                Py_DECREF(v);
+                if (likely(!ret))
+                    return val;
+            }
+#endif
+            return (npy_int32) -1;
+        }
+    } else {
+        npy_int32 val;
+        PyObject *tmp = __Pyx_PyNumber_IntOrLong(x);
+        if (!tmp) return (npy_int32) -1;
+        val = __Pyx_PyInt_As_npy_int32(tmp);
+        Py_DECREF(tmp);
+        return val;
+    }
+raise_overflow:
+    PyErr_SetString(PyExc_OverflowError,
+        "value too large to convert to npy_int32");
+    return (npy_int32) -1;
+raise_neg_overflow:
+    PyErr_SetString(PyExc_OverflowError,
+        "can't convert negative value to npy_int32");
+    return (npy_int32) -1;
+}
+
+/* CIntToPy */
+  static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value) {
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+    const int neg_one = (int) -1, const_zero = (int) 0;
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic pop
+#endif
+    const int is_unsigned = neg_one > const_zero;
+    if (is_unsigned) {
+        if (sizeof(int) < sizeof(long)) {
+            return PyInt_FromLong((long) value);
+        } else if (sizeof(int) <= sizeof(unsigned long)) {
+            return PyLong_FromUnsignedLong((unsigned long) value);
+#ifdef HAVE_LONG_LONG
+        } else if (sizeof(int) <= sizeof(unsigned PY_LONG_LONG)) {
+            return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value);
+#endif
+        }
+    } else {
+        if (sizeof(int) <= sizeof(long)) {
+            return PyInt_FromLong((long) value);
+#ifdef HAVE_LONG_LONG
+        } else if (sizeof(int) <= sizeof(PY_LONG_LONG)) {
+            return PyLong_FromLongLong((PY_LONG_LONG) value);
+#endif
+        }
+    }
+    {
+        int one = 1; int little = (int)*(unsigned char *)&one;
+        unsigned char *bytes = (unsigned char *)&value;
+        return _PyLong_FromByteArray(bytes, sizeof(int),
+                                     little, !is_unsigned);
+    }
+}
+
+/* CIntToPy */
+  static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) {
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+    const long neg_one = (long) -1, const_zero = (long) 0;
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic pop
+#endif
+    const int is_unsigned = neg_one > const_zero;
+    if (is_unsigned) {
+        if (sizeof(long) < sizeof(long)) {
+            return PyInt_FromLong((long) value);
+        } else if (sizeof(long) <= sizeof(unsigned long)) {
+            return PyLong_FromUnsignedLong((unsigned long) value);
+#ifdef HAVE_LONG_LONG
+        } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) {
+            return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value);
+#endif
+        }
+    } else {
+        if (sizeof(long) <= sizeof(long)) {
+            return PyInt_FromLong((long) value);
+#ifdef HAVE_LONG_LONG
+        } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) {
+            return PyLong_FromLongLong((PY_LONG_LONG) value);
+#endif
+        }
+    }
+    {
+        int one = 1; int little = (int)*(unsigned char *)&one;
+        unsigned char *bytes = (unsigned char *)&value;
+        return _PyLong_FromByteArray(bytes, sizeof(long),
+                                     little, !is_unsigned);
+    }
+}
+
+/* CIntFromPy */
+  static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) {
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+    const long neg_one = (long) -1, const_zero = (long) 0;
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic pop
+#endif
+    const int is_unsigned = neg_one > const_zero;
+#if PY_MAJOR_VERSION < 3
+    if (likely(PyInt_Check(x))) {
+        if (sizeof(long) < sizeof(long)) {
+            __PYX_VERIFY_RETURN_INT(long, long, PyInt_AS_LONG(x))
+        } else {
+            long val = PyInt_AS_LONG(x);
+            if (is_unsigned && unlikely(val < 0)) {
+                goto raise_neg_overflow;
+            }
+            return (long) val;
+        }
+    } else
+#endif
+    if (likely(PyLong_Check(x))) {
+        if (is_unsigned) {
+#if CYTHON_USE_PYLONG_INTERNALS
+            const digit* digits = ((PyLongObject*)x)->ob_digit;
+            switch (Py_SIZE(x)) {
+                case  0: return (long) 0;
+                case  1: __PYX_VERIFY_RETURN_INT(long, digit, digits[0])
+                case 2:
+                    if (8 * sizeof(long) > 1 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(long) >= 2 * PyLong_SHIFT) {
+                            return (long) (((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]));
+                        }
+                    }
+                    break;
+                case 3:
+                    if (8 * sizeof(long) > 2 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(long) >= 3 * PyLong_SHIFT) {
+                            return (long) (((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]));
+                        }
+                    }
+                    break;
+                case 4:
+                    if (8 * sizeof(long) > 3 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(long) >= 4 * PyLong_SHIFT) {
+                            return (long) (((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]));
+                        }
+                    }
+                    break;
+            }
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON
+            if (unlikely(Py_SIZE(x) < 0)) {
+                goto raise_neg_overflow;
+            }
+#else
+            {
+                int result = PyObject_RichCompareBool(x, Py_False, Py_LT);
+                if (unlikely(result < 0))
+                    return (long) -1;
+                if (unlikely(result == 1))
+                    goto raise_neg_overflow;
+            }
+#endif
+            if (sizeof(long) <= sizeof(unsigned long)) {
+                __PYX_VERIFY_RETURN_INT_EXC(long, unsigned long, PyLong_AsUnsignedLong(x))
+#ifdef HAVE_LONG_LONG
+            } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) {
+                __PYX_VERIFY_RETURN_INT_EXC(long, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x))
+#endif
+            }
+        } else {
+#if CYTHON_USE_PYLONG_INTERNALS
+            const digit* digits = ((PyLongObject*)x)->ob_digit;
+            switch (Py_SIZE(x)) {
+                case  0: return (long) 0;
+                case -1: __PYX_VERIFY_RETURN_INT(long, sdigit, (sdigit) (-(sdigit)digits[0]))
+                case  1: __PYX_VERIFY_RETURN_INT(long,  digit, +digits[0])
+                case -2:
+                    if (8 * sizeof(long) - 1 > 1 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) {
+                            return (long) (((long)-1)*(((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+                        }
+                    }
+                    break;
+                case 2:
+                    if (8 * sizeof(long) > 1 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) {
+                            return (long) ((((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+                        }
+                    }
+                    break;
+                case -3:
+                    if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) {
+                            return (long) (((long)-1)*(((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+                        }
+                    }
+                    break;
+                case 3:
+                    if (8 * sizeof(long) > 2 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) {
+                            return (long) ((((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+                        }
+                    }
+                    break;
+                case -4:
+                    if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) {
+                            return (long) (((long)-1)*(((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+                        }
+                    }
+                    break;
+                case 4:
+                    if (8 * sizeof(long) > 3 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) {
+                            return (long) ((((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+                        }
+                    }
+                    break;
+            }
+#endif
+            if (sizeof(long) <= sizeof(long)) {
+                __PYX_VERIFY_RETURN_INT_EXC(long, long, PyLong_AsLong(x))
+#ifdef HAVE_LONG_LONG
+            } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) {
+                __PYX_VERIFY_RETURN_INT_EXC(long, PY_LONG_LONG, PyLong_AsLongLong(x))
+#endif
+            }
+        }
+        {
+#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray)
+            PyErr_SetString(PyExc_RuntimeError,
+                            "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers");
+#else
+            long val;
+            PyObject *v = __Pyx_PyNumber_IntOrLong(x);
+ #if PY_MAJOR_VERSION < 3
+            if (likely(v) && !PyLong_Check(v)) {
+                PyObject *tmp = v;
+                v = PyNumber_Long(tmp);
+                Py_DECREF(tmp);
+            }
+ #endif
+            if (likely(v)) {
+                int one = 1; int is_little = (int)*(unsigned char *)&one;
+                unsigned char *bytes = (unsigned char *)&val;
+                int ret = _PyLong_AsByteArray((PyLongObject *)v,
+                                              bytes, sizeof(val),
+                                              is_little, !is_unsigned);
+                Py_DECREF(v);
+                if (likely(!ret))
+                    return val;
+            }
+#endif
+            return (long) -1;
+        }
+    } else {
+        long val;
+        PyObject *tmp = __Pyx_PyNumber_IntOrLong(x);
+        if (!tmp) return (long) -1;
+        val = __Pyx_PyInt_As_long(tmp);
+        Py_DECREF(tmp);
+        return val;
+    }
+raise_overflow:
+    PyErr_SetString(PyExc_OverflowError,
+        "value too large to convert to long");
+    return (long) -1;
+raise_neg_overflow:
+    PyErr_SetString(PyExc_OverflowError,
+        "can't convert negative value to long");
+    return (long) -1;
+}
+
+/* CIntFromPy */
+  static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) {
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+    const int neg_one = (int) -1, const_zero = (int) 0;
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic pop
+#endif
+    const int is_unsigned = neg_one > const_zero;
+#if PY_MAJOR_VERSION < 3
+    if (likely(PyInt_Check(x))) {
+        if (sizeof(int) < sizeof(long)) {
+            __PYX_VERIFY_RETURN_INT(int, long, PyInt_AS_LONG(x))
+        } else {
+            long val = PyInt_AS_LONG(x);
+            if (is_unsigned && unlikely(val < 0)) {
+                goto raise_neg_overflow;
+            }
+            return (int) val;
+        }
+    } else
+#endif
+    if (likely(PyLong_Check(x))) {
+        if (is_unsigned) {
+#if CYTHON_USE_PYLONG_INTERNALS
+            const digit* digits = ((PyLongObject*)x)->ob_digit;
+            switch (Py_SIZE(x)) {
+                case  0: return (int) 0;
+                case  1: __PYX_VERIFY_RETURN_INT(int, digit, digits[0])
+                case 2:
+                    if (8 * sizeof(int) > 1 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(int) >= 2 * PyLong_SHIFT) {
+                            return (int) (((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]));
+                        }
+                    }
+                    break;
+                case 3:
+                    if (8 * sizeof(int) > 2 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(int) >= 3 * PyLong_SHIFT) {
+                            return (int) (((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]));
+                        }
+                    }
+                    break;
+                case 4:
+                    if (8 * sizeof(int) > 3 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(int) >= 4 * PyLong_SHIFT) {
+                            return (int) (((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]));
+                        }
+                    }
+                    break;
+            }
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON
+            if (unlikely(Py_SIZE(x) < 0)) {
+                goto raise_neg_overflow;
+            }
+#else
+            {
+                int result = PyObject_RichCompareBool(x, Py_False, Py_LT);
+                if (unlikely(result < 0))
+                    return (int) -1;
+                if (unlikely(result == 1))
+                    goto raise_neg_overflow;
+            }
+#endif
+            if (sizeof(int) <= sizeof(unsigned long)) {
+                __PYX_VERIFY_RETURN_INT_EXC(int, unsigned long, PyLong_AsUnsignedLong(x))
+#ifdef HAVE_LONG_LONG
+            } else if (sizeof(int) <= sizeof(unsigned PY_LONG_LONG)) {
+                __PYX_VERIFY_RETURN_INT_EXC(int, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x))
+#endif
+            }
+        } else {
+#if CYTHON_USE_PYLONG_INTERNALS
+            const digit* digits = ((PyLongObject*)x)->ob_digit;
+            switch (Py_SIZE(x)) {
+                case  0: return (int) 0;
+                case -1: __PYX_VERIFY_RETURN_INT(int, sdigit, (sdigit) (-(sdigit)digits[0]))
+                case  1: __PYX_VERIFY_RETURN_INT(int,  digit, +digits[0])
+                case -2:
+                    if (8 * sizeof(int) - 1 > 1 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) {
+                            return (int) (((int)-1)*(((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+                        }
+                    }
+                    break;
+                case 2:
+                    if (8 * sizeof(int) > 1 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) {
+                            return (int) ((((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+                        }
+                    }
+                    break;
+                case -3:
+                    if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) {
+                            return (int) (((int)-1)*(((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+                        }
+                    }
+                    break;
+                case 3:
+                    if (8 * sizeof(int) > 2 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) {
+                            return (int) ((((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+                        }
+                    }
+                    break;
+                case -4:
+                    if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(int) - 1 > 4 * PyLong_SHIFT) {
+                            return (int) (((int)-1)*(((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+                        }
+                    }
+                    break;
+                case 4:
+                    if (8 * sizeof(int) > 3 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(int) - 1 > 4 * PyLong_SHIFT) {
+                            return (int) ((((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+                        }
+                    }
+                    break;
+            }
+#endif
+            if (sizeof(int) <= sizeof(long)) {
+                __PYX_VERIFY_RETURN_INT_EXC(int, long, PyLong_AsLong(x))
+#ifdef HAVE_LONG_LONG
+            } else if (sizeof(int) <= sizeof(PY_LONG_LONG)) {
+                __PYX_VERIFY_RETURN_INT_EXC(int, PY_LONG_LONG, PyLong_AsLongLong(x))
+#endif
+            }
+        }
+        {
+#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray)
+            PyErr_SetString(PyExc_RuntimeError,
+                            "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers");
+#else
+            int val;
+            PyObject *v = __Pyx_PyNumber_IntOrLong(x);
+ #if PY_MAJOR_VERSION < 3
+            if (likely(v) && !PyLong_Check(v)) {
+                PyObject *tmp = v;
+                v = PyNumber_Long(tmp);
+                Py_DECREF(tmp);
+            }
+ #endif
+            if (likely(v)) {
+                int one = 1; int is_little = (int)*(unsigned char *)&one;
+                unsigned char *bytes = (unsigned char *)&val;
+                int ret = _PyLong_AsByteArray((PyLongObject *)v,
+                                              bytes, sizeof(val),
+                                              is_little, !is_unsigned);
+                Py_DECREF(v);
+                if (likely(!ret))
+                    return val;
+            }
+#endif
+            return (int) -1;
+        }
+    } else {
+        int val;
+        PyObject *tmp = __Pyx_PyNumber_IntOrLong(x);
+        if (!tmp) return (int) -1;
+        val = __Pyx_PyInt_As_int(tmp);
+        Py_DECREF(tmp);
+        return val;
+    }
+raise_overflow:
+    PyErr_SetString(PyExc_OverflowError,
+        "value too large to convert to int");
+    return (int) -1;
+raise_neg_overflow:
+    PyErr_SetString(PyExc_OverflowError,
+        "can't convert negative value to int");
+    return (int) -1;
+}
+
+/* FastTypeChecks */
+  #if CYTHON_COMPILING_IN_CPYTHON
+static int __Pyx_InBases(PyTypeObject *a, PyTypeObject *b) {
+    while (a) {
+        a = a->tp_base;
+        if (a == b)
+            return 1;
+    }
+    return b == &PyBaseObject_Type;
+}
+static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b) {
+    PyObject *mro;
+    if (a == b) return 1;
+    mro = a->tp_mro;
+    if (likely(mro)) {
+        Py_ssize_t i, n;
+        n = PyTuple_GET_SIZE(mro);
+        for (i = 0; i < n; i++) {
+            if (PyTuple_GET_ITEM(mro, i) == (PyObject *)b)
+                return 1;
+        }
+        return 0;
+    }
+    return __Pyx_InBases(a, b);
+}
+#if PY_MAJOR_VERSION == 2
+static int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject* exc_type2) {
+    PyObject *exception, *value, *tb;
+    int res;
+    __Pyx_PyThreadState_declare
+    __Pyx_PyThreadState_assign
+    __Pyx_ErrFetch(&exception, &value, &tb);
+    res = exc_type1 ? PyObject_IsSubclass(err, exc_type1) : 0;
+    if (unlikely(res == -1)) {
+        PyErr_WriteUnraisable(err);
+        res = 0;
+    }
+    if (!res) {
+        res = PyObject_IsSubclass(err, exc_type2);
+        if (unlikely(res == -1)) {
+            PyErr_WriteUnraisable(err);
+            res = 0;
+        }
+    }
+    __Pyx_ErrRestore(exception, value, tb);
+    return res;
+}
+#else
+static CYTHON_INLINE int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject *exc_type2) {
+    int res = exc_type1 ? __Pyx_IsSubtype((PyTypeObject*)err, (PyTypeObject*)exc_type1) : 0;
+    if (!res) {
+        res = __Pyx_IsSubtype((PyTypeObject*)err, (PyTypeObject*)exc_type2);
+    }
+    return res;
+}
+#endif
+static int __Pyx_PyErr_GivenExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) {
+    Py_ssize_t i, n;
+    assert(PyExceptionClass_Check(exc_type));
+    n = PyTuple_GET_SIZE(tuple);
+#if PY_MAJOR_VERSION >= 3
+    for (i=0; i<n; i++) {
+        if (exc_type == PyTuple_GET_ITEM(tuple, i)) return 1;
+    }
+#endif
+    for (i=0; i<n; i++) {
+        PyObject *t = PyTuple_GET_ITEM(tuple, i);
+        #if PY_MAJOR_VERSION < 3
+        if (likely(exc_type == t)) return 1;
+        #endif
+        if (likely(PyExceptionClass_Check(t))) {
+            if (__Pyx_inner_PyErr_GivenExceptionMatches2(exc_type, NULL, t)) return 1;
+        } else {
+        }
+    }
+    return 0;
+}
+static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches(PyObject *err, PyObject* exc_type) {
+    if (likely(err == exc_type)) return 1;
+    if (likely(PyExceptionClass_Check(err))) {
+        if (likely(PyExceptionClass_Check(exc_type))) {
+            return __Pyx_inner_PyErr_GivenExceptionMatches2(err, NULL, exc_type);
+        } else if (likely(PyTuple_Check(exc_type))) {
+            return __Pyx_PyErr_GivenExceptionMatchesTuple(err, exc_type);
+        } else {
+        }
+    }
+    return PyErr_GivenExceptionMatches(err, exc_type);
+}
+static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches2(PyObject *err, PyObject *exc_type1, PyObject *exc_type2) {
+    assert(PyExceptionClass_Check(exc_type1));
+    assert(PyExceptionClass_Check(exc_type2));
+    if (likely(err == exc_type1 || err == exc_type2)) return 1;
+    if (likely(PyExceptionClass_Check(err))) {
+        return __Pyx_inner_PyErr_GivenExceptionMatches2(err, exc_type1, exc_type2);
+    }
+    return (PyErr_GivenExceptionMatches(err, exc_type1) || PyErr_GivenExceptionMatches(err, exc_type2));
+}
+#endif
+
+/* CheckBinaryVersion */
+  static int __Pyx_check_binary_version(void) {
+    char ctversion[5];
+    int same=1, i, found_dot;
+    const char* rt_from_call = Py_GetVersion();
+    PyOS_snprintf(ctversion, 5, "%d.%d", PY_MAJOR_VERSION, PY_MINOR_VERSION);
+    found_dot = 0;
+    for (i = 0; i < 4; i++) {
+        if (!ctversion[i]) {
+            same = (rt_from_call[i] < '0' || rt_from_call[i] > '9');
+            break;
+        }
+        if (rt_from_call[i] != ctversion[i]) {
+            same = 0;
+            break;
+        }
+    }
+    if (!same) {
+        char rtversion[5] = {'\0'};
+        char message[200];
+        for (i=0; i<4; ++i) {
+            if (rt_from_call[i] == '.') {
+                if (found_dot) break;
+                found_dot = 1;
+            } else if (rt_from_call[i] < '0' || rt_from_call[i] > '9') {
+                break;
+            }
+            rtversion[i] = rt_from_call[i];
+        }
+        PyOS_snprintf(message, sizeof(message),
+                      "compiletime version %s of module '%.100s' "
+                      "does not match runtime version %s",
+                      ctversion, __Pyx_MODULE_NAME, rtversion);
+        return PyErr_WarnEx(NULL, message, 1);
+    }
+    return 0;
+}
+
+/* InitStrings */
+  static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) {
+    while (t->p) {
+        #if PY_MAJOR_VERSION < 3
+        if (t->is_unicode) {
+            *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL);
+        } else if (t->intern) {
+            *t->p = PyString_InternFromString(t->s);
+        } else {
+            *t->p = PyString_FromStringAndSize(t->s, t->n - 1);
+        }
+        #else
+        if (t->is_unicode | t->is_str) {
+            if (t->intern) {
+                *t->p = PyUnicode_InternFromString(t->s);
+            } else if (t->encoding) {
+                *t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL);
+            } else {
+                *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1);
+            }
+        } else {
+            *t->p = PyBytes_FromStringAndSize(t->s, t->n - 1);
+        }
+        #endif
+        if (!*t->p)
+            return -1;
+        if (PyObject_Hash(*t->p) == -1)
+            return -1;
+        ++t;
+    }
+    return 0;
+}
+
+static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) {
+    return __Pyx_PyUnicode_FromStringAndSize(c_str, (Py_ssize_t)strlen(c_str));
+}
+static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject* o) {
+    Py_ssize_t ignore;
+    return __Pyx_PyObject_AsStringAndSize(o, &ignore);
+}
+#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
+#if !CYTHON_PEP393_ENABLED
+static const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) {
+    char* defenc_c;
+    PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL);
+    if (!defenc) return NULL;
+    defenc_c = PyBytes_AS_STRING(defenc);
+#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
+    {
+        char* end = defenc_c + PyBytes_GET_SIZE(defenc);
+        char* c;
+        for (c = defenc_c; c < end; c++) {
+            if ((unsigned char) (*c) >= 128) {
+                PyUnicode_AsASCIIString(o);
+                return NULL;
+            }
+        }
+    }
+#endif
+    *length = PyBytes_GET_SIZE(defenc);
+    return defenc_c;
+}
+#else
+static CYTHON_INLINE const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) {
+    if (unlikely(__Pyx_PyUnicode_READY(o) == -1)) return NULL;
+#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
+    if (likely(PyUnicode_IS_ASCII(o))) {
+        *length = PyUnicode_GET_LENGTH(o);
+        return PyUnicode_AsUTF8(o);
+    } else {
+        PyUnicode_AsASCIIString(o);
+        return NULL;
+    }
+#else
+    return PyUnicode_AsUTF8AndSize(o, length);
+#endif
+}
+#endif
+#endif
+static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) {
+#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
+    if (
+#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
+            __Pyx_sys_getdefaultencoding_not_ascii &&
+#endif
+            PyUnicode_Check(o)) {
+        return __Pyx_PyUnicode_AsStringAndSize(o, length);
+    } else
+#endif
+#if (!CYTHON_COMPILING_IN_PYPY) || (defined(PyByteArray_AS_STRING) && defined(PyByteArray_GET_SIZE))
+    if (PyByteArray_Check(o)) {
+        *length = PyByteArray_GET_SIZE(o);
+        return PyByteArray_AS_STRING(o);
+    } else
+#endif
+    {
+        char* result;
+        int r = PyBytes_AsStringAndSize(o, &result, length);
+        if (unlikely(r < 0)) {
+            return NULL;
+        } else {
+            return result;
+        }
+    }
+}
+static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {
+   int is_true = x == Py_True;
+   if (is_true | (x == Py_False) | (x == Py_None)) return is_true;
+   else return PyObject_IsTrue(x);
+}
+static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject* x) {
+    int retval;
+    if (unlikely(!x)) return -1;
+    retval = __Pyx_PyObject_IsTrue(x);
+    Py_DECREF(x);
+    return retval;
+}
+static PyObject* __Pyx_PyNumber_IntOrLongWrongResultType(PyObject* result, const char* type_name) {
+#if PY_MAJOR_VERSION >= 3
+    if (PyLong_Check(result)) {
+        if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
+                "__int__ returned non-int (type %.200s).  "
+                "The ability to return an instance of a strict subclass of int "
+                "is deprecated, and may be removed in a future version of Python.",
+                Py_TYPE(result)->tp_name)) {
+            Py_DECREF(result);
+            return NULL;
+        }
+        return result;
+    }
+#endif
+    PyErr_Format(PyExc_TypeError,
+                 "__%.4s__ returned non-%.4s (type %.200s)",
+                 type_name, type_name, Py_TYPE(result)->tp_name);
+    Py_DECREF(result);
+    return NULL;
+}
+static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x) {
+#if CYTHON_USE_TYPE_SLOTS
+  PyNumberMethods *m;
+#endif
+  const char *name = NULL;
+  PyObject *res = NULL;
+#if PY_MAJOR_VERSION < 3
+  if (likely(PyInt_Check(x) || PyLong_Check(x)))
+#else
+  if (likely(PyLong_Check(x)))
+#endif
+    return __Pyx_NewRef(x);
+#if CYTHON_USE_TYPE_SLOTS
+  m = Py_TYPE(x)->tp_as_number;
+  #if PY_MAJOR_VERSION < 3
+  if (m && m->nb_int) {
+    name = "int";
+    res = m->nb_int(x);
+  }
+  else if (m && m->nb_long) {
+    name = "long";
+    res = m->nb_long(x);
+  }
+  #else
+  if (likely(m && m->nb_int)) {
+    name = "int";
+    res = m->nb_int(x);
+  }
+  #endif
+#else
+  if (!PyBytes_CheckExact(x) && !PyUnicode_CheckExact(x)) {
+    res = PyNumber_Int(x);
+  }
+#endif
+  if (likely(res)) {
+#if PY_MAJOR_VERSION < 3
+    if (unlikely(!PyInt_Check(res) && !PyLong_Check(res))) {
+#else
+    if (unlikely(!PyLong_CheckExact(res))) {
+#endif
+        return __Pyx_PyNumber_IntOrLongWrongResultType(res, name);
+    }
+  }
+  else if (!PyErr_Occurred()) {
+    PyErr_SetString(PyExc_TypeError,
+                    "an integer is required");
+  }
+  return res;
+}
+static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {
+  Py_ssize_t ival;
+  PyObject *x;
+#if PY_MAJOR_VERSION < 3
+  if (likely(PyInt_CheckExact(b))) {
+    if (sizeof(Py_ssize_t) >= sizeof(long))
+        return PyInt_AS_LONG(b);
+    else
+        return PyInt_AsSsize_t(b);
+  }
+#endif
+  if (likely(PyLong_CheckExact(b))) {
+    #if CYTHON_USE_PYLONG_INTERNALS
+    const digit* digits = ((PyLongObject*)b)->ob_digit;
+    const Py_ssize_t size = Py_SIZE(b);
+    if (likely(__Pyx_sst_abs(size) <= 1)) {
+        ival = likely(size) ? digits[0] : 0;
+        if (size == -1) ival = -ival;
+        return ival;
+    } else {
+      switch (size) {
+         case 2:
+           if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) {
+             return (Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+           }
+           break;
+         case -2:
+           if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) {
+             return -(Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+           }
+           break;
+         case 3:
+           if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) {
+             return (Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+           }
+           break;
+         case -3:
+           if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) {
+             return -(Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+           }
+           break;
+         case 4:
+           if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) {
+             return (Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+           }
+           break;
+         case -4:
+           if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) {
+             return -(Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+           }
+           break;
+      }
+    }
+    #endif
+    return PyLong_AsSsize_t(b);
+  }
+  x = PyNumber_Index(b);
+  if (!x) return -1;
+  ival = PyInt_AsSsize_t(x);
+  Py_DECREF(x);
+  return ival;
+}
+static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject* o) {
+  if (sizeof(Py_hash_t) == sizeof(Py_ssize_t)) {
+    return (Py_hash_t) __Pyx_PyIndex_AsSsize_t(o);
+#if PY_MAJOR_VERSION < 3
+  } else if (likely(PyInt_CheckExact(o))) {
+    return PyInt_AS_LONG(o);
+#endif
+  } else {
+    Py_ssize_t ival;
+    PyObject *x;
+    x = PyNumber_Index(o);
+    if (!x) return -1;
+    ival = PyInt_AsLong(x);
+    Py_DECREF(x);
+    return ival;
+  }
+}
+static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b) {
+  return b ? __Pyx_NewRef(Py_True) : __Pyx_NewRef(Py_False);
+}
+static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) {
+    return PyInt_FromSize_t(ival);
+}
+
+
+#endif /* Py_PYTHON_H */
diff --git a/lib/utils/vis.py b/lib/utils/vis.py
index adc0947a..39f68101 100755
--- a/lib/utils/vis.py
+++ b/lib/utils/vis.py
@@ -4,27 +4,25 @@
 # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 # ------------------------------------------------------------------------------
 
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
 
 import math
 
+import cv2
 import numpy as np
 import torchvision
-import cv2
-
 from core.inference import get_max_preds
 
 
-def save_batch_image_with_joints(batch_image, batch_joints, batch_joints_vis,
-                                 file_name, nrow=8, padding=2):
-    '''
+def save_batch_image_with_joints(
+    batch_image, batch_joints, batch_joints_vis, file_name, nrow=8, padding=2
+):
+    """
     batch_image: [batch_size, channel, height, width]
     batch_joints: [batch_size, num_joints, 3],
     batch_joints_vis: [batch_size, num_joints, 1],
     }
-    '''
+    """
     grid = torchvision.utils.make_grid(batch_image, nrow, padding, True)
     ndarr = grid.mul(255).clamp(0, 255).byte().permute(1, 2, 0).cpu().numpy()
     ndarr = ndarr.copy()
@@ -42,22 +40,31 @@ def save_batch_image_with_joints(batch_image, batch_joints, batch_joints_vis,
             joints = batch_joints[k]
             joints_vis = batch_joints_vis[k]
 
+            i_joint = 0
             for joint, joint_vis in zip(joints, joints_vis):
                 joint[0] = x * width + padding + joint[0]
                 joint[1] = y * height + padding + joint[1]
                 if joint_vis[0]:
-                    cv2.circle(ndarr, (int(joint[0]), int(joint[1])), 2, [255, 0, 0], 2)
+                    if i_joint < 17:
+                        cv2.circle(
+                            ndarr, (int(joint[0]), int(joint[1])), 2, [255, 0, 0], 2
+                        )
+                    else:
+                        cv2.circle(
+                            ndarr, (int(joint[0]), int(joint[1])), 2, [0, 0, 255], 2
+                        )
+                i_joint += 1
             k = k + 1
-    cv2.imwrite(file_name, ndarr)
+    img = cv2.cvtColor(ndarr, cv2.COLOR_BGR2RGB)
+    cv2.imwrite(file_name, img)
 
 
-def save_batch_heatmaps(batch_image, batch_heatmaps, file_name,
-                        normalize=True):
-    '''
+def save_batch_heatmaps(batch_image, batch_heatmaps, file_name, normalize=True):
+    """
     batch_image: [batch_size, channel, height, width]
     batch_heatmaps: ['batch_size, num_joints, height, width]
     file_name: saved file name
-    '''
+    """
     if normalize:
         batch_image = batch_image.clone()
         min = float(batch_image.min())
@@ -70,44 +77,45 @@ def save_batch_heatmaps(batch_image, batch_heatmaps, file_name,
     heatmap_height = batch_heatmaps.size(2)
     heatmap_width = batch_heatmaps.size(3)
 
-    grid_image = np.zeros((batch_size*heatmap_height,
-                           (num_joints+1)*heatmap_width,
-                           3),
-                          dtype=np.uint8)
+    grid_image = np.zeros(
+        (batch_size * heatmap_height, (num_joints + 1) * heatmap_width, 3),
+        dtype=np.uint8,
+    )
 
     preds, maxvals = get_max_preds(batch_heatmaps.detach().cpu().numpy())
 
     for i in range(batch_size):
-        image = batch_image[i].mul(255)\
-                              .clamp(0, 255)\
-                              .byte()\
-                              .permute(1, 2, 0)\
-                              .cpu().numpy()
-        heatmaps = batch_heatmaps[i].mul(255)\
-                                    .clamp(0, 255)\
-                                    .byte()\
-                                    .cpu().numpy()
-
-        resized_image = cv2.resize(image,
-                                   (int(heatmap_width), int(heatmap_height)))
+        image = (
+            batch_image[i].mul(255).clamp(0, 255).byte().permute(1, 2, 0).cpu().numpy()
+        )
+        heatmaps = batch_heatmaps[i].mul(255).clamp(0, 255).byte().cpu().numpy()
+
+        resized_image = cv2.resize(image, (int(heatmap_width), int(heatmap_height)))
 
         height_begin = heatmap_height * i
         height_end = heatmap_height * (i + 1)
         for j in range(num_joints):
-            cv2.circle(resized_image,
-                       (int(preds[i][j][0]), int(preds[i][j][1])),
-                       1, [0, 0, 255], 1)
+            cv2.circle(
+                resized_image,
+                (int(preds[i][j][0]), int(preds[i][j][1])),
+                1,
+                [0, 0, 255],
+                1,
+            )
             heatmap = heatmaps[j, :, :]
             colored_heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
-            masked_image = colored_heatmap*0.7 + resized_image*0.3
-            cv2.circle(masked_image,
-                       (int(preds[i][j][0]), int(preds[i][j][1])),
-                       1, [0, 0, 255], 1)
-
-            width_begin = heatmap_width * (j+1)
-            width_end = heatmap_width * (j+2)
-            grid_image[height_begin:height_end, width_begin:width_end, :] = \
-                masked_image
+            masked_image = colored_heatmap * 0.7 + resized_image * 0.3
+            cv2.circle(
+                masked_image,
+                (int(preds[i][j][0]), int(preds[i][j][1])),
+                1,
+                [0, 0, 255],
+                1,
+            )
+
+            width_begin = heatmap_width * (j + 1)
+            width_end = heatmap_width * (j + 2)
+            grid_image[height_begin:height_end, width_begin:width_end, :] = masked_image
             # grid_image[height_begin:height_end, width_begin:width_end, :] = \
             #     colored_heatmap*0.7 + resized_image*0.3
 
@@ -116,26 +124,19 @@ def save_batch_heatmaps(batch_image, batch_heatmaps, file_name,
     cv2.imwrite(file_name, grid_image)
 
 
-def save_debug_images(config, input, meta, target, joints_pred, output,
-                      prefix):
+def save_debug_images(config, input, meta, target, joints_pred, output, prefix):
     if not config.DEBUG.DEBUG:
         return
 
     if config.DEBUG.SAVE_BATCH_IMAGES_GT:
         save_batch_image_with_joints(
-            input, meta['joints'], meta['joints_vis'],
-            '{}_gt.jpg'.format(prefix)
+            input, meta["joints"], meta["joints_vis"], "{}_gt.jpg".format(prefix)
         )
     if config.DEBUG.SAVE_BATCH_IMAGES_PRED:
         save_batch_image_with_joints(
-            input, joints_pred, meta['joints_vis'],
-            '{}_pred.jpg'.format(prefix)
+            input, joints_pred, meta["joints_vis"], "{}_pred.jpg".format(prefix)
         )
     if config.DEBUG.SAVE_HEATMAPS_GT:
-        save_batch_heatmaps(
-            input, target, '{}_hm_gt.jpg'.format(prefix)
-        )
+        save_batch_heatmaps(input, target, "{}_hm_gt.jpg".format(prefix))
     if config.DEBUG.SAVE_HEATMAPS_PRED:
-        save_batch_heatmaps(
-            input, output, '{}_hm_pred.jpg'.format(prefix)
-        )
+        save_batch_heatmaps(input, output, "{}_hm_pred.jpg".format(prefix))
diff --git a/requirements.txt b/requirements.txt
index 14f225c7..da42bc53 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
 EasyDict==1.7
-opencv-python==3.4.1.15
-shapely==1.6.4
+opencv-python
+shapely==2.0.0
 Cython
 scipy
 pandas
diff --git a/tools/train.py b/tools/train.py
index 039c5487..e89594ea 100755
--- a/tools/train.py
+++ b/tools/train.py
@@ -4,69 +4,52 @@
 # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 # ------------------------------------------------------------------------------
 
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
 
 import argparse
 import os
 import pprint
 import shutil
 
+import _init_paths
+import dataset
 import torch
-import torch.nn.parallel
 import torch.backends.cudnn as cudnn
+import torch.nn.parallel
 import torch.optim
 import torch.utils.data
 import torch.utils.data.distributed
 import torchvision.transforms as transforms
-from tensorboardX import SummaryWriter
-
-import _init_paths
-from config import cfg
-from config import update_config
+from config import cfg, update_config
+from core.function import train, validate
 from core.loss import JointsMSELoss
-from core.function import train
-from core.function import validate
-from utils.utils import get_optimizer
-from utils.utils import save_checkpoint
-from utils.utils import create_logger
-from utils.utils import get_model_summary
+from torch.utils.tensorboard import SummaryWriter
+from utils.utils import create_logger, get_model_summary, get_optimizer, save_checkpoint
 
-import dataset
 import models
 
 
 def parse_args():
-    parser = argparse.ArgumentParser(description='Train keypoints network')
+    parser = argparse.ArgumentParser(description="Train keypoints network")
     # general
-    parser.add_argument('--cfg',
-                        help='experiment configure file name',
-                        required=True,
-                        type=str)
+    parser.add_argument(
+        "--cfg", help="experiment configure file name", required=True, type=str
+    )
 
-    parser.add_argument('opts',
-                        help="Modify config options using the command-line",
-                        default=None,
-                        nargs=argparse.REMAINDER)
+    parser.add_argument(
+        "opts",
+        help="Modify config options using the command-line",
+        default=None,
+        nargs=argparse.REMAINDER,
+    )
 
     # philly
-    parser.add_argument('--modelDir',
-                        help='model directory',
-                        type=str,
-                        default='')
-    parser.add_argument('--logDir',
-                        help='log directory',
-                        type=str,
-                        default='')
-    parser.add_argument('--dataDir',
-                        help='data directory',
-                        type=str,
-                        default='')
-    parser.add_argument('--prevModelDir',
-                        help='prev Model directory',
-                        type=str,
-                        default='')
+    parser.add_argument("--modelDir", help="model directory", type=str, default="")
+    parser.add_argument("--logDir", help="log directory", type=str, default="")
+    parser.add_argument("--dataDir", help="data directory", type=str, default="")
+    parser.add_argument(
+        "--prevModelDir", help="prev Model directory", type=str, default=""
+    )
 
     args = parser.parse_args()
 
@@ -77,8 +60,9 @@ def main():
     args = parse_args()
     update_config(cfg, args)
 
-    logger, final_output_dir, tb_log_dir = create_logger(
-        cfg, args.cfg, 'train')
+    logger, final_output_dir, tb_log_dir = create_logger(cfg, args.cfg, "train")
+
+    print("final_output_dir: ", final_output_dir)
 
     logger.info(pprint.pformat(args))
     logger.info(cfg)
@@ -88,69 +72,74 @@ def main():
     torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
     torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
 
-    model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(
-        cfg, is_train=True
-    )
+    model = eval("models." + cfg.MODEL.NAME + ".get_pose_net")(cfg, is_train=True)
 
     # copy model file
     this_dir = os.path.dirname(__file__)
     shutil.copy2(
-        os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'),
-        final_output_dir)
+        os.path.join(this_dir, "../lib/models", cfg.MODEL.NAME + ".py"),
+        final_output_dir,
+    )
     # logger.info(pprint.pformat(model))
 
     writer_dict = {
-        'writer': SummaryWriter(log_dir=tb_log_dir),
-        'train_global_steps': 0,
-        'valid_global_steps': 0,
+        "writer": SummaryWriter(log_dir=tb_log_dir),
+        "train_global_steps": 0,
+        "valid_global_steps": 0,
     }
 
-    dump_input = torch.rand(
-        (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])
-    )
-    writer_dict['writer'].add_graph(model, (dump_input, ))
+    dump_input = torch.rand((1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0]))
+    writer_dict["writer"].add_graph(model, (dump_input,))
 
     logger.info(get_model_summary(model, dump_input))
 
     model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()
 
     # define loss function (criterion) and optimizer
-    criterion = JointsMSELoss(
-        use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT
-    ).cuda()
+    criterion = JointsMSELoss(use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda()
 
     # Data loading code
     normalize = transforms.Normalize(
         mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
     )
-    train_dataset = eval('dataset.'+cfg.DATASET.DATASET)(
-        cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True,
-        transforms.Compose([
-            transforms.ToTensor(),
-            normalize,
-        ])
+    train_dataset = eval("dataset." + cfg.DATASET.DATASET)(
+        cfg,
+        cfg.DATASET.ROOT,
+        cfg.DATASET.TRAIN_SET,
+        True,
+        transforms.Compose(
+            [
+                transforms.ToTensor(),
+                normalize,
+            ]
+        ),
     )
-    valid_dataset = eval('dataset.'+cfg.DATASET.DATASET)(
-        cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False,
-        transforms.Compose([
-            transforms.ToTensor(),
-            normalize,
-        ])
+    valid_dataset = eval("dataset." + cfg.DATASET.DATASET)(
+        cfg,
+        cfg.DATASET.ROOT,
+        cfg.DATASET.TEST_SET,
+        False,
+        transforms.Compose(
+            [
+                transforms.ToTensor(),
+                normalize,
+            ]
+        ),
     )
 
     train_loader = torch.utils.data.DataLoader(
         train_dataset,
-        batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU*len(cfg.GPUS),
+        batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
         shuffle=cfg.TRAIN.SHUFFLE,
         num_workers=cfg.WORKERS,
-        pin_memory=cfg.PIN_MEMORY
+        pin_memory=cfg.PIN_MEMORY,
     )
     valid_loader = torch.utils.data.DataLoader(
         valid_dataset,
-        batch_size=cfg.TEST.BATCH_SIZE_PER_GPU*len(cfg.GPUS),
+        batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
         shuffle=False,
         num_workers=cfg.WORKERS,
-        pin_memory=cfg.PIN_MEMORY
+        pin_memory=cfg.PIN_MEMORY,
     )
 
     best_perf = 0.0
@@ -158,39 +147,53 @@ def main():
     last_epoch = -1
     optimizer = get_optimizer(cfg, model)
     begin_epoch = cfg.TRAIN.BEGIN_EPOCH
-    checkpoint_file = os.path.join(
-        final_output_dir, 'checkpoint.pth'
-    )
+    checkpoint_file = os.path.join(final_output_dir, "checkpoint.pth")
 
     if cfg.AUTO_RESUME and os.path.exists(checkpoint_file):
         logger.info("=> loading checkpoint '{}'".format(checkpoint_file))
         checkpoint = torch.load(checkpoint_file)
-        begin_epoch = checkpoint['epoch']
-        best_perf = checkpoint['perf']
-        last_epoch = checkpoint['epoch']
-        model.load_state_dict(checkpoint['state_dict'])
-
-        optimizer.load_state_dict(checkpoint['optimizer'])
-        logger.info("=> loaded checkpoint '{}' (epoch {})".format(
-            checkpoint_file, checkpoint['epoch']))
+        begin_epoch = checkpoint["epoch"]
+        best_perf = checkpoint["perf"]
+        last_epoch = checkpoint["epoch"]
+        model.load_state_dict(checkpoint["state_dict"])
+
+        optimizer.load_state_dict(checkpoint["optimizer"])
+        logger.info(
+            "=> loaded checkpoint '{}' (epoch {})".format(
+                checkpoint_file, checkpoint["epoch"]
+            )
+        )
 
     lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
-        optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR,
-        last_epoch=last_epoch
+        optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=last_epoch
     )
 
     for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
         lr_scheduler.step()
 
         # train for one epoch
-        train(cfg, train_loader, model, criterion, optimizer, epoch,
-              final_output_dir, tb_log_dir, writer_dict)
-
+        train(
+            cfg,
+            train_loader,
+            model,
+            criterion,
+            optimizer,
+            epoch,
+            final_output_dir,
+            tb_log_dir,
+            writer_dict,
+        )
 
         # evaluate on validation set
         perf_indicator = validate(
-            cfg, valid_loader, valid_dataset, model, criterion,
-            final_output_dir, tb_log_dir, writer_dict
+            cfg,
+            valid_loader,
+            valid_dataset,
+            model,
+            criterion,
+            final_output_dir,
+            tb_log_dir,
+            writer_dict,
         )
 
         if perf_indicator >= best_perf:
@@ -199,25 +202,25 @@ def main():
         else:
             best_model = False
 
-        logger.info('=> saving checkpoint to {}'.format(final_output_dir))
-        save_checkpoint({
-            'epoch': epoch + 1,
-            'model': cfg.MODEL.NAME,
-            'state_dict': model.state_dict(),
-            'best_state_dict': model.module.state_dict(),
-            'perf': perf_indicator,
-            'optimizer': optimizer.state_dict(),
-        }, best_model, final_output_dir)
-
-    final_model_state_file = os.path.join(
-        final_output_dir, 'final_state.pth'
-    )
-    logger.info('=> saving final model state to {}'.format(
-        final_model_state_file)
-    )
+        logger.info("=> saving checkpoint to {}".format(final_output_dir))
+        save_checkpoint(
+            {
+                "epoch": epoch + 1,
+                "model": cfg.MODEL.NAME,
+                "state_dict": model.state_dict(),
+                "best_state_dict": model.module.state_dict(),
+                "perf": perf_indicator,
+                "optimizer": optimizer.state_dict(),
+            },
+            best_model,
+            final_output_dir,
+        )
+
+    final_model_state_file = os.path.join(final_output_dir, "final_state.pth")
+    logger.info("=> saving final model state to {}".format(final_model_state_file))
     torch.save(model.module.state_dict(), final_model_state_file)
-    writer_dict['writer'].close()
+    writer_dict["writer"].close()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()

From 0ca9a37f3a2168bec02c2ec8bdbfc7c6bb45d2c1 Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Tue, 18 Apr 2023 00:00:55 -0700
Subject: [PATCH 02/30] change sm for nvcc

---
 lib/nms/setup_linux.py | 87 +++++++++++++++++++++++++-----------------
 1 file changed, 51 insertions(+), 36 deletions(-)

diff --git a/lib/nms/setup_linux.py b/lib/nms/setup_linux.py
index 9120a93b..b6b09f49 100644
--- a/lib/nms/setup_linux.py
+++ b/lib/nms/setup_linux.py
@@ -6,11 +6,12 @@
 # --------------------------------------------------------
 
 import os
-from os.path import join as pjoin
-from setuptools import setup
 from distutils.extension import Extension
-from Cython.Distutils import build_ext
+from os.path import join as pjoin
+
 import numpy as np
+from Cython.Distutils import build_ext
+from setuptools import setup
 
 
 def find_in_path(name, path):
@@ -33,26 +34,35 @@ def locate_cuda():
     """
 
     # first check if the CUDAHOME env variable is in use
-    if 'CUDAHOME' in os.environ:
-        home = os.environ['CUDAHOME']
-        nvcc = pjoin(home, 'bin', 'nvcc')
+    if "CUDAHOME" in os.environ:
+        home = os.environ["CUDAHOME"]
+        nvcc = pjoin(home, "bin", "nvcc")
     else:
         # otherwise, search the PATH for NVCC
-        default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
-        nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
+        default_path = pjoin(os.sep, "usr", "local", "cuda", "bin")
+        nvcc = find_in_path("nvcc", os.environ["PATH"] + os.pathsep + default_path)
         if nvcc is None:
-            raise EnvironmentError('The nvcc binary could not be '
-                'located in your $PATH. Either add it to your path, or set $CUDAHOME')
+            raise EnvironmentError(
+                "The nvcc binary could not be "
+                "located in your $PATH. Either add it to your path, or set $CUDAHOME"
+            )
         home = os.path.dirname(os.path.dirname(nvcc))
 
-    cudaconfig = {'home':home, 'nvcc':nvcc,
-                  'include': pjoin(home, 'include'),
-                  'lib64': pjoin(home, 'lib64')}
+    cudaconfig = {
+        "home": home,
+        "nvcc": nvcc,
+        "include": pjoin(home, "include"),
+        "lib64": pjoin(home, "lib64"),
+    }
     for k, v in cudaconfig.items():
         if not os.path.exists(v):
-            raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
+            raise EnvironmentError(
+                "The CUDA %s path could not be located in %s" % (k, v)
+            )
 
     return cudaconfig
+
+
 CUDA = locate_cuda()
 
 
@@ -73,7 +83,7 @@ def customize_compiler_for_nvcc(self):
     subclassing going on."""
 
     # tell the compiler it can processes .cu
-    self.src_extensions.append('.cu')
+    self.src_extensions.append(".cu")
 
     # save references to the default compiler_so and _comple methods
     default_compiler_so = self.compiler_so
@@ -83,14 +93,14 @@ def customize_compiler_for_nvcc(self):
     # object but distutils doesn't have the ability to change compilers
     # based on source extension: we add it.
     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
-        if os.path.splitext(src)[1] == '.cu':
+        if os.path.splitext(src)[1] == ".cu":
             # use the cuda for .cu files
-            self.set_executable('compiler_so', CUDA['nvcc'])
+            self.set_executable("compiler_so", CUDA["nvcc"])
             # use only a subset of the extra_postargs, which are 1-1 translated
             # from the extra_compile_args in the Extension class
-            postargs = extra_postargs['nvcc']
+            postargs = extra_postargs["nvcc"]
         else:
-            postargs = extra_postargs['gcc']
+            postargs = extra_postargs["gcc"]
 
         super(obj, src, ext, cc_args, postargs, pp_opts)
         # reset the default compiler_so, which we might have changed for cuda
@@ -111,31 +121,36 @@ def build_extensions(self):
     Extension(
         "cpu_nms",
         ["cpu_nms.pyx"],
-        extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
-        include_dirs = [numpy_include]
+        extra_compile_args={"gcc": ["-Wno-cpp", "-Wno-unused-function"]},
+        include_dirs=[numpy_include],
     ),
-    Extension('gpu_nms',
-        ['nms_kernel.cu', 'gpu_nms.pyx'],
-        library_dirs=[CUDA['lib64']],
-        libraries=['cudart'],
-        language='c++',
-        runtime_library_dirs=[CUDA['lib64']],
+    Extension(
+        "gpu_nms",
+        ["nms_kernel.cu", "gpu_nms.pyx"],
+        library_dirs=[CUDA["lib64"]],
+        libraries=["cudart"],
+        language="c++",
+        runtime_library_dirs=[CUDA["lib64"]],
         # this syntax is specific to this build system
         # we're only going to use certain compiler args with nvcc and not with
         # gcc the implementation of this trick is in customize_compiler() below
-        extra_compile_args={'gcc': ["-Wno-unused-function"],
-                            'nvcc': ['-arch=sm_35',
-                                     '--ptxas-options=-v',
-                                     '-c',
-                                     '--compiler-options',
-                                     "'-fPIC'"]},
-        include_dirs = [numpy_include, CUDA['include']]
+        extra_compile_args={
+            "gcc": ["-Wno-unused-function"],
+            "nvcc": [
+                "-arch=sm_86",
+                "--ptxas-options=-v",
+                "-c",
+                "--compiler-options",
+                "'-fPIC'",
+            ],
+        },
+        include_dirs=[numpy_include, CUDA["include"]],
     ),
 ]
 
 setup(
-    name='nms',
+    name="nms",
     ext_modules=ext_modules,
     # inject our custom trigger
-    cmdclass={'build_ext': custom_build_ext},
+    cmdclass={"build_ext": custom_build_ext},
 )

From 0069a8ed3f71d12ee1200f590c33b70009da8f93 Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Tue, 18 Apr 2023 00:38:25 -0700
Subject: [PATCH 03/30] change batch_size

---
 experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
index 978da652..c2e676ef 100644
--- a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
+++ b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -91,7 +91,7 @@ MODEL:
 LOSS:
   USE_TARGET_WEIGHT: true
 TRAIN:
-  BATCH_SIZE_PER_GPU: 2
+  BATCH_SIZE_PER_GPU: 10
   SHUFFLE: true
   BEGIN_EPOCH: 0
   END_EPOCH: 50
@@ -107,7 +107,7 @@ TRAIN:
   MOMENTUM: 0.9
   NESTEROV: false
 TEST:
-  BATCH_SIZE_PER_GPU: 2
+  BATCH_SIZE_PER_GPU: 10
   COCO_BBOX_FILE: "data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json"
   BBOX_THRE: 1.0
   IMAGE_THRE: 0.0

From 852747a0932a73cb31b360924a736ac1b8b5ca3c Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Tue, 18 Apr 2023 00:42:26 -0700
Subject: [PATCH 04/30] changed nb epochs

---
 experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
index c2e676ef..7f8750b7 100644
--- a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
+++ b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -94,7 +94,7 @@ TRAIN:
   BATCH_SIZE_PER_GPU: 10
   SHUFFLE: true
   BEGIN_EPOCH: 0
-  END_EPOCH: 50
+  END_EPOCH: 200
   OPTIMIZER: adam
   LR: 0.001
   LR_FACTOR: 0.1

From bd99863df113db95529e220f8c99af207b2511b3 Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Fri, 21 Apr 2023 15:59:23 -0700
Subject: [PATCH 05/30] added coco val data for finetuning infinity_coco

---
 .../hrnet/w48_384x288_adam_lr1e-3.yaml        |   6 +-
 lib/dataset/coco.py                           | 306 ++++++++++--------
 lib/dataset/infinity_coco.py                  |  33 +-
 lib/nms/nms.py                                |  62 +++-
 lib/utils/vis.py                              |   1 -
 5 files changed, 247 insertions(+), 161 deletions(-)

diff --git a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
index 7f8750b7..25027e16 100644
--- a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
+++ b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -26,7 +26,7 @@ MODEL:
   INIT_WEIGHTS: true
   NAME: pose_hrnet
   NUM_JOINTS: 58
-  PRETRAINED: "models/pytorch/imagenet/hrnet_w48-8ef0771d.pth"
+  PRETRAINED: "models/pytorch/pose_coco/pose_hrnet_w48_384x288.pth"
   TARGET_TYPE: gaussian
   IMAGE_SIZE:
     - 288
@@ -91,7 +91,7 @@ MODEL:
 LOSS:
   USE_TARGET_WEIGHT: true
 TRAIN:
-  BATCH_SIZE_PER_GPU: 10
+  BATCH_SIZE_PER_GPU: 2
   SHUFFLE: true
   BEGIN_EPOCH: 0
   END_EPOCH: 200
@@ -107,7 +107,7 @@ TRAIN:
   MOMENTUM: 0.9
   NESTEROV: false
 TEST:
-  BATCH_SIZE_PER_GPU: 10
+  BATCH_SIZE_PER_GPU: 5
   COCO_BBOX_FILE: "data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json"
   BBOX_THRE: 1.0
   IMAGE_THRE: 0.0
diff --git a/lib/dataset/coco.py b/lib/dataset/coco.py
index 98d9c2db..23a3e9d1 100755
--- a/lib/dataset/coco.py
+++ b/lib/dataset/coco.py
@@ -4,30 +4,24 @@
 # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 # ------------------------------------------------------------------------------
 
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
 
-from collections import defaultdict
-from collections import OrderedDict
 import logging
 import os
+from collections import OrderedDict, defaultdict
 
-from pycocotools.coco import COCO
-from pycocotools.cocoeval import COCOeval
 import json_tricks as json
 import numpy as np
-
 from dataset.JointsDataset import JointsDataset
-from nms.nms import oks_nms
-from nms.nms import soft_oks_nms
-
+from nms.nms import oks_nms, soft_oks_nms
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
 
 logger = logging.getLogger(__name__)
 
 
 class COCODataset(JointsDataset):
-    '''
+    """
     "keypoints": {
         0: "nose",
         1: "left_eye",
@@ -47,12 +41,17 @@ class COCODataset(JointsDataset):
         15: "left_ankle",
         16: "right_ankle"
     },
-	"skeleton": [
+        "skeleton": [
         [16,14],[14,12],[17,15],[15,13],[12,13],[6,12],[7,13], [6,7],[6,8],
         [7,9],[8,10],[9,11],[2,3],[1,2],[1,3],[2,4],[3,5],[4,6],[5,7]]
-    '''
-    def __init__(self, cfg, root, image_set, is_train, transform=None):
+    """
+
+    def __init__(self, cfg, root, image_set, is_train, transform=None, infinity=False):
         super().__init__(cfg, root, image_set, is_train, transform)
+        if infinity:
+            self.root = "data/coco/"
+            self.image_set = "val2017"
+
         self.nms_thre = cfg.TEST.NMS_THRE
         self.image_thre = cfg.TEST.IMAGE_THRE
         self.soft_nms = cfg.TEST.SOFT_NMS
@@ -68,10 +67,9 @@ def __init__(self, cfg, root, image_set, is_train, transform=None):
         self.coco = COCO(self._get_ann_file_keypoint())
 
         # deal with class names
-        cats = [cat['name']
-                for cat in self.coco.loadCats(self.coco.getCatIds())]
-        self.classes = ['__background__'] + cats
-        logger.info('=> classes: {}'.format(self.classes))
+        cats = [cat["name"] for cat in self.coco.loadCats(self.coco.getCatIds())]
+        self.classes = ["__background__"] + cats
+        logger.info("=> classes: {}".format(self.classes))
         self.num_classes = len(self.classes)
         self._class_to_ind = dict(zip(self.classes, range(self.num_classes)))
         self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds()))
@@ -85,21 +83,44 @@ def __init__(self, cfg, root, image_set, is_train, transform=None):
         # load image file names
         self.image_set_index = self._load_image_set_index()
         self.num_images = len(self.image_set_index)
-        logger.info('=> num_images: {}'.format(self.num_images))
+        logger.info("=> num_images: {}".format(self.num_images))
 
         self.num_joints = 17
-        self.flip_pairs = [[1, 2], [3, 4], [5, 6], [7, 8],
-                           [9, 10], [11, 12], [13, 14], [15, 16]]
+        self.flip_pairs = [
+            [1, 2],
+            [3, 4],
+            [5, 6],
+            [7, 8],
+            [9, 10],
+            [11, 12],
+            [13, 14],
+            [15, 16],
+        ]
         self.parent_ids = None
         self.upper_body_ids = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
         self.lower_body_ids = (11, 12, 13, 14, 15, 16)
 
         self.joints_weight = np.array(
             [
-                1., 1., 1., 1., 1., 1., 1., 1.2, 1.2,
-                1.5, 1.5, 1., 1., 1.2, 1.2, 1.5, 1.5
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.2,
+                1.2,
+                1.5,
+                1.5,
+                1.0,
+                1.0,
+                1.2,
+                1.2,
+                1.5,
+                1.5,
             ],
-            dtype=np.float32
+            dtype=np.float32,
         ).reshape((self.num_joints, 1))
 
         self.db = self._get_db()
@@ -107,20 +128,17 @@ def __init__(self, cfg, root, image_set, is_train, transform=None):
         if is_train and cfg.DATASET.SELECT_DATA:
             self.db = self.select_data(self.db)
 
-        logger.info('=> load {} samples'.format(len(self.db)))
+        logger.info("=> load {} samples".format(len(self.db)))
 
     def _get_ann_file_keypoint(self):
-        """ self.root / annotations / person_keypoints_train2017.json """
-        prefix = 'person_keypoints' \
-            if 'test' not in self.image_set else 'image_info'
+        """self.root / annotations / person_keypoints_train2017.json"""
+        prefix = "person_keypoints" if "test" not in self.image_set else "image_info"
         return os.path.join(
-            self.root,
-            'annotations',
-            prefix + '_' + self.image_set + '.json'
+            self.root, "annotations", prefix + "_" + self.image_set + ".json"
         )
 
     def _load_image_set_index(self):
-        """ image id: int """
+        """image id: int"""
         image_ids = self.coco.getImgIds()
         return image_ids
 
@@ -134,7 +152,7 @@ def _get_db(self):
         return gt_db
 
     def _load_coco_keypoint_annotations(self):
-        """ ground truth bbox and keypoints """
+        """ground truth bbox and keypoints"""
         gt_db = []
         for index in self.image_set_index:
             gt_db.extend(self._load_coco_keypoint_annotation_kernal(index))
@@ -152,8 +170,8 @@ def _load_coco_keypoint_annotation_kernal(self, index):
         :return: db entry
         """
         im_ann = self.coco.loadImgs(index)[0]
-        width = im_ann['width']
-        height = im_ann['height']
+        width = im_ann["width"]
+        height = im_ann["height"]
 
         annIds = self.coco.getAnnIds(imgIds=index, iscrowd=False)
         objs = self.coco.loadAnns(annIds)
@@ -161,49 +179,51 @@ def _load_coco_keypoint_annotation_kernal(self, index):
         # sanitize bboxes
         valid_objs = []
         for obj in objs:
-            x, y, w, h = obj['bbox']
+            x, y, w, h = obj["bbox"]
             x1 = np.max((0, x))
             y1 = np.max((0, y))
             x2 = np.min((width - 1, x1 + np.max((0, w - 1))))
             y2 = np.min((height - 1, y1 + np.max((0, h - 1))))
-            if obj['area'] > 0 and x2 >= x1 and y2 >= y1:
-                obj['clean_bbox'] = [x1, y1, x2-x1, y2-y1]
+            if obj["area"] > 0 and x2 >= x1 and y2 >= y1:
+                obj["clean_bbox"] = [x1, y1, x2 - x1, y2 - y1]
                 valid_objs.append(obj)
         objs = valid_objs
 
         rec = []
         for obj in objs:
-            cls = self._coco_ind_to_class_ind[obj['category_id']]
+            cls = self._coco_ind_to_class_ind[obj["category_id"]]
             if cls != 1:
                 continue
 
             # ignore objs without keypoints annotation
-            if max(obj['keypoints']) == 0:
+            if max(obj["keypoints"]) == 0:
                 continue
 
             joints_3d = np.zeros((self.num_joints, 3), dtype=np.float32)
             joints_3d_vis = np.zeros((self.num_joints, 3), dtype=np.float32)
             for ipt in range(self.num_joints):
-                joints_3d[ipt, 0] = obj['keypoints'][ipt * 3 + 0]
-                joints_3d[ipt, 1] = obj['keypoints'][ipt * 3 + 1]
+                joints_3d[ipt, 0] = obj["keypoints"][ipt * 3 + 0]
+                joints_3d[ipt, 1] = obj["keypoints"][ipt * 3 + 1]
                 joints_3d[ipt, 2] = 0
-                t_vis = obj['keypoints'][ipt * 3 + 2]
+                t_vis = obj["keypoints"][ipt * 3 + 2]
                 if t_vis > 1:
                     t_vis = 1
                 joints_3d_vis[ipt, 0] = t_vis
                 joints_3d_vis[ipt, 1] = t_vis
                 joints_3d_vis[ipt, 2] = 0
 
-            center, scale = self._box2cs(obj['clean_bbox'][:4])
-            rec.append({
-                'image': self.image_path_from_index(index),
-                'center': center,
-                'scale': scale,
-                'joints_3d': joints_3d,
-                'joints_3d_vis': joints_3d_vis,
-                'filename': '',
-                'imgnum': 0,
-            })
+            center, scale = self._box2cs(obj["clean_bbox"][:4])
+            rec.append(
+                {
+                    "image": self.image_path_from_index(index),
+                    "center": center,
+                    "scale": scale,
+                    "joints_3d": joints_3d,
+                    "joints_3d_vis": joints_3d_vis,
+                    "filename": "",
+                    "imgnum": 0,
+                }
+            )
 
         return rec
 
@@ -221,48 +241,47 @@ def _xywh2cs(self, x, y, w, h):
         elif w < self.aspect_ratio * h:
             w = h * self.aspect_ratio
         scale = np.array(
-            [w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std],
-            dtype=np.float32)
+            [w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std], dtype=np.float32
+        )
         if center[0] != -1:
             scale = scale * 1.25
 
         return center, scale
 
     def image_path_from_index(self, index):
-        """ example: images / train2017 / 000000119993.jpg """
-        file_name = '%012d.jpg' % index
-        if '2014' in self.image_set:
-            file_name = 'COCO_%s_' % self.image_set + file_name
+        """example: images / train2017 / 000000119993.jpg"""
+        file_name = "%012d.jpg" % index
+        if "2014" in self.image_set:
+            file_name = "COCO_%s_" % self.image_set + file_name
 
-        prefix = 'test2017' if 'test' in self.image_set else self.image_set
+        prefix = "test2017" if "test" in self.image_set else self.image_set
 
-        data_name = prefix + '.zip@' if self.data_format == 'zip' else prefix
+        data_name = prefix + ".zip@" if self.data_format == "zip" else prefix
 
-        image_path = os.path.join(
-            self.root, 'images', data_name, file_name)
+        image_path = os.path.join(self.root, "images", data_name, file_name)
 
         return image_path
 
     def _load_coco_person_detection_results(self):
         all_boxes = None
-        with open(self.bbox_file, 'r') as f:
+        with open(self.bbox_file, "r") as f:
             all_boxes = json.load(f)
 
         if not all_boxes:
-            logger.error('=> Load %s fail!' % self.bbox_file)
+            logger.error("=> Load %s fail!" % self.bbox_file)
             return None
 
-        logger.info('=> Total boxes: {}'.format(len(all_boxes)))
+        logger.info("=> Total boxes: {}".format(len(all_boxes)))
 
         kpt_db = []
         num_boxes = 0
         for n_img in range(0, len(all_boxes)):
             det_res = all_boxes[n_img]
-            if det_res['category_id'] != 1:
+            if det_res["category_id"] != 1:
                 continue
-            img_name = self.image_path_from_index(det_res['image_id'])
-            box = det_res['bbox']
-            score = det_res['score']
+            img_name = self.image_path_from_index(det_res["image_id"])
+            box = det_res["bbox"]
+            score = det_res["score"]
 
             if score < self.image_thre:
                 continue
@@ -271,52 +290,56 @@ def _load_coco_person_detection_results(self):
 
             center, scale = self._box2cs(box)
             joints_3d = np.zeros((self.num_joints, 3), dtype=np.float32)
-            joints_3d_vis = np.ones(
-                (self.num_joints, 3), dtype=np.float32)
-            kpt_db.append({
-                'image': img_name,
-                'center': center,
-                'scale': scale,
-                'score': score,
-                'joints_3d': joints_3d,
-                'joints_3d_vis': joints_3d_vis,
-            })
-
-        logger.info('=> Total boxes after fliter low score@{}: {}'.format(
-            self.image_thre, num_boxes))
+            joints_3d_vis = np.ones((self.num_joints, 3), dtype=np.float32)
+            kpt_db.append(
+                {
+                    "image": img_name,
+                    "center": center,
+                    "scale": scale,
+                    "score": score,
+                    "joints_3d": joints_3d,
+                    "joints_3d_vis": joints_3d_vis,
+                }
+            )
+
+        logger.info(
+            "=> Total boxes after fliter low score@{}: {}".format(
+                self.image_thre, num_boxes
+            )
+        )
         return kpt_db
 
-    def evaluate(self, cfg, preds, output_dir, all_boxes, img_path,
-                 *args, **kwargs):
+    def evaluate(self, cfg, preds, output_dir, all_boxes, img_path, *args, **kwargs):
         rank = cfg.RANK
 
-        res_folder = os.path.join(output_dir, 'results')
+        res_folder = os.path.join(output_dir, "results")
         if not os.path.exists(res_folder):
             try:
                 os.makedirs(res_folder)
             except Exception:
-                logger.error('Fail to make {}'.format(res_folder))
+                logger.error("Fail to make {}".format(res_folder))
 
         res_file = os.path.join(
-            res_folder, 'keypoints_{}_results_{}.json'.format(
-                self.image_set, rank)
+            res_folder, "keypoints_{}_results_{}.json".format(self.image_set, rank)
         )
 
         # person x (keypoints)
         _kpts = []
         for idx, kpt in enumerate(preds):
-            _kpts.append({
-                'keypoints': kpt,
-                'center': all_boxes[idx][0:2],
-                'scale': all_boxes[idx][2:4],
-                'area': all_boxes[idx][4],
-                'score': all_boxes[idx][5],
-                'image': int(img_path[idx][-16:-4])
-            })
+            _kpts.append(
+                {
+                    "keypoints": kpt,
+                    "center": all_boxes[idx][0:2],
+                    "scale": all_boxes[idx][2:4],
+                    "area": all_boxes[idx][4],
+                    "score": all_boxes[idx][5],
+                    "image": int(img_path[idx][-16:-4]),
+                }
+            )
         # image x person x (keypoints)
         kpts = defaultdict(list)
         for kpt in _kpts:
-            kpts[kpt['image']].append(kpt)
+            kpts[kpt["image"]].append(kpt)
 
         # rescoring and oks nms
         num_joints = self.num_joints
@@ -326,84 +349,80 @@ def evaluate(self, cfg, preds, output_dir, all_boxes, img_path,
         for img in kpts.keys():
             img_kpts = kpts[img]
             for n_p in img_kpts:
-                box_score = n_p['score']
+                box_score = n_p["score"]
                 kpt_score = 0
                 valid_num = 0
                 for n_jt in range(0, num_joints):
-                    t_s = n_p['keypoints'][n_jt][2]
+                    t_s = n_p["keypoints"][n_jt][2]
                     if t_s > in_vis_thre:
                         kpt_score = kpt_score + t_s
                         valid_num = valid_num + 1
                 if valid_num != 0:
                     kpt_score = kpt_score / valid_num
                 # rescoring
-                n_p['score'] = kpt_score * box_score
+                n_p["score"] = kpt_score * box_score
 
             if self.soft_nms:
                 keep = soft_oks_nms(
-                    [img_kpts[i] for i in range(len(img_kpts))],
-                    oks_thre
+                    [img_kpts[i] for i in range(len(img_kpts))], oks_thre
                 )
             else:
-                keep = oks_nms(
-                    [img_kpts[i] for i in range(len(img_kpts))],
-                    oks_thre
-                )
+                keep = oks_nms([img_kpts[i] for i in range(len(img_kpts))], oks_thre)
 
             if len(keep) == 0:
                 oks_nmsed_kpts.append(img_kpts)
             else:
                 oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep])
 
-        self._write_coco_keypoint_results(
-            oks_nmsed_kpts, res_file)
-        if 'test' not in self.image_set:
-            info_str = self._do_python_keypoint_eval(
-                res_file, res_folder)
+        self._write_coco_keypoint_results(oks_nmsed_kpts, res_file)
+        if "test" not in self.image_set:
+            info_str = self._do_python_keypoint_eval(res_file, res_folder)
             name_value = OrderedDict(info_str)
-            return name_value, name_value['AP']
+            return name_value, name_value["AP"]
         else:
-            return {'Null': 0}, 0
+            return {"Null": 0}, 0
 
     def _write_coco_keypoint_results(self, keypoints, res_file):
         data_pack = [
             {
-                'cat_id': self._class_to_coco_ind[cls],
-                'cls_ind': cls_ind,
-                'cls': cls,
-                'ann_type': 'keypoints',
-                'keypoints': keypoints
+                "cat_id": self._class_to_coco_ind[cls],
+                "cls_ind": cls_ind,
+                "cls": cls,
+                "ann_type": "keypoints",
+                "keypoints": keypoints,
             }
-            for cls_ind, cls in enumerate(self.classes) if not cls == '__background__'
+            for cls_ind, cls in enumerate(self.classes)
+            if not cls == "__background__"
         ]
 
         results = self._coco_keypoint_results_one_category_kernel(data_pack[0])
-        logger.info('=> writing results json to %s' % res_file)
-        with open(res_file, 'w') as f:
+        logger.info("=> writing results json to %s" % res_file)
+        with open(res_file, "w") as f:
             json.dump(results, f, sort_keys=True, indent=4)
         try:
             json.load(open(res_file))
         except Exception:
             content = []
-            with open(res_file, 'r') as f:
+            with open(res_file, "r") as f:
                 for line in f:
                     content.append(line)
-            content[-1] = ']'
-            with open(res_file, 'w') as f:
+            content[-1] = "]"
+            with open(res_file, "w") as f:
                 for c in content:
                     f.write(c)
 
     def _coco_keypoint_results_one_category_kernel(self, data_pack):
-        cat_id = data_pack['cat_id']
-        keypoints = data_pack['keypoints']
+        cat_id = data_pack["cat_id"]
+        keypoints = data_pack["keypoints"]
         cat_results = []
 
         for img_kpts in keypoints:
             if len(img_kpts) == 0:
                 continue
 
-            _key_points = np.array([img_kpts[k]['keypoints']
-                                    for k in range(len(img_kpts))])
+            _key_points = np.array(
+                [img_kpts[k]["keypoints"] for k in range(len(img_kpts))]
+            )
             key_points = np.zeros(
                 (_key_points.shape[0], self.num_joints * 3), dtype=np.float
             )
@@ -415,12 +434,12 @@ def _coco_keypoint_results_one_category_kernel(self, data_pack):
 
             result = [
                 {
-                    'image_id': img_kpts[k]['image'],
-                    'category_id': cat_id,
-                    'keypoints': list(key_points[k]),
-                    'score': img_kpts[k]['score'],
-                    'center': list(img_kpts[k]['center']),
-                    'scale': list(img_kpts[k]['scale'])
+                    "image_id": img_kpts[k]["image"],
+                    "category_id": cat_id,
+                    "keypoints": list(key_points[k]),
+                    "score": img_kpts[k]["score"],
+                    "center": list(img_kpts[k]["center"]),
+                    "scale": list(img_kpts[k]["scale"]),
                 }
                 for k in range(len(img_kpts))
             ]
@@ -430,13 +449,24 @@ def _coco_keypoint_results_one_category_kernel(self, data_pack):
 
     def _do_python_keypoint_eval(self, res_file, res_folder):
         coco_dt = self.coco.loadRes(res_file)
-        coco_eval = COCOeval(self.coco, coco_dt, 'keypoints')
+        coco_eval = COCOeval(self.coco, coco_dt, "keypoints")
         coco_eval.params.useSegm = None
         coco_eval.evaluate()
         coco_eval.accumulate()
         coco_eval.summarize()
 
-        stats_names = ['AP', 'Ap .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)']
+        stats_names = [
+            "AP",
+            "Ap .5",
+            "AP .75",
+            "AP (M)",
+            "AP (L)",
+            "AR",
+            "AR .5",
+            "AR .75",
+            "AR (M)",
+            "AR (L)",
+        ]
 
         info_str = []
         for ind, name in enumerate(stats_names):
diff --git a/lib/dataset/infinity_coco.py b/lib/dataset/infinity_coco.py
index a9937f4c..89ca49d1 100644
--- a/lib/dataset/infinity_coco.py
+++ b/lib/dataset/infinity_coco.py
@@ -12,6 +12,7 @@
 
 import json_tricks as json
 import numpy as np
+from dataset.coco import COCODataset
 from dataset.JointsDataset import JointsDataset
 from nms.nms import oks_nms, soft_oks_nms
 from pycocotools.coco import COCO
@@ -61,6 +62,9 @@ def __init__(self, cfg, root, image_set, is_train, transform=None):
         self.pixel_std = 200
 
         self.coco = COCO(self._get_ann_file_keypoint())
+        self.coco_dataset = COCODataset(
+            cfg, root, image_set, is_train, transform, infinity=True
+        )
 
         # deal with class names
         cats = [cat["id"] for cat in self.coco.loadCats(self.coco.getCatIds())]
@@ -238,11 +242,14 @@ def _get_db(self):
     def _load_coco_keypoint_annotations(self):
         """ground truth bbox and keypoints"""
         gt_db = []
-        for index in self.image_set_index:
-            gt_db.extend(self._load_coco_keypoint_annotation_kernal(index))
+        for index, index_coco in zip(
+            self.image_set_index,
+            self.coco_dataset.image_set_index[: len(self.image_set_index)],
+        ):
+            gt_db.extend(self._load_coco_keypoint_annotation_kernal(index, index_coco))
         return gt_db
 
-    def _load_coco_keypoint_annotation_kernal(self, index):
+    def _load_coco_keypoint_annotation_kernal(self, index, index_coco):
         """
         coco ann: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id']
         iscrowd:
@@ -321,6 +328,26 @@ def _load_coco_keypoint_annotation_kernal(self, index):
                 }
             )
 
+        rec_coco = self.coco_dataset._load_coco_keypoint_annotation_kernal(index_coco)
+        for r in rec_coco:
+            joints_3d_coco = r["joints_3d"]
+            joints_3d_vis_coco = r["joints_3d_vis"]
+            joints_3d_coco = np.vstack(
+                (
+                    joints_3d_coco,
+                    np.zeros((self.num_joints_infinity, 3), dtype=np.float32),
+                )
+            )
+            joints_3d_vis_coco = np.vstack(
+                (
+                    joints_3d_vis_coco,
+                    np.zeros((self.num_joints_infinity, 3), dtype=np.float32),
+                )
+            )
+            r["joints_3d"] = joints_3d_coco
+            r["joints_3d_vis"] = joints_3d_vis_coco
+            rec.append(r)
+
         return rec
 
     def _box2cs(self, box):
diff --git a/lib/nms/nms.py b/lib/nms/nms.py
index 7f83e05e..a9a6bea9 100644
--- a/lib/nms/nms.py
+++ b/lib/nms/nms.py
@@ -4,9 +4,7 @@
 # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
 # ------------------------------------------------------------------------------
 
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
 
 import numpy as np
 
@@ -17,18 +15,21 @@
 def py_nms_wrapper(thresh):
     def _nms(dets):
         return nms(dets, thresh)
+
     return _nms
 
 
 def cpu_nms_wrapper(thresh):
     def _nms(dets):
         return cpu_nms(dets, thresh)
+
     return _nms
 
 
 def gpu_nms_wrapper(thresh, device_id):
     def _nms(dets):
         return gpu_nms(dets, thresh, device_id)
+
     return _nms
 
 
@@ -74,7 +75,30 @@ def nms(dets, thresh):
 
 def oks_iou(g, d, a_g, a_d, sigmas=None, in_vis_thre=None):
     if not isinstance(sigmas, np.ndarray):
-        sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89]) / 10.0
+        sigmas = (
+            np.array(
+                [
+                    0.26,
+                    0.25,
+                    0.25,
+                    0.35,
+                    0.35,
+                    0.79,
+                    0.79,
+                    0.72,
+                    0.72,
+                    0.62,
+                    0.62,
+                    1.07,
+                    1.07,
+                    0.87,
+                    0.87,
+                    0.89,
+                    0.89,
+                ]
+            )
+            / 10.0
+        )
     vars = (sigmas * 2) ** 2
     xg = g[0::3]
     yg = g[1::3]
@@ -86,7 +110,7 @@ def oks_iou(g, d, a_g, a_d, sigmas=None, in_vis_thre=None):
         vd = d[n_d, 2::3]
         dx = xd - xg
         dy = yd - yg
-        e = (dx ** 2 + dy ** 2) / vars / ((a_g + a_d[n_d]) / 2 + np.spacing(1)) / 2
+        e = (dx**2 + dy**2) / vars / ((a_g + a_d[n_d]) / 2 + np.spacing(1)) / 2
         if in_vis_thre is not None:
             ind = list(vg > in_vis_thre) and list(vd > in_vis_thre)
             e = e[ind]
@@ -105,9 +129,11 @@ def oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None):
     if len(kpts_db) == 0:
         return []
 
-    scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))])
-    kpts = np.array([kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))])
-    areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))])
+    scores = np.array([kpts_db[i]["score"] for i in range(len(kpts_db))])
+    kpts = np.array(
+        [kpts_db[i]["keypoints"][:17].flatten() for i in range(len(kpts_db))]
+    )
+    areas = np.array([kpts_db[i]["area"] for i in range(len(kpts_db))])
 
     order = scores.argsort()[::-1]
 
@@ -116,7 +142,9 @@ def oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None):
         i = order[0]
         keep.append(i)
 
-        oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]], sigmas, in_vis_thre)
+        oks_ovr = oks_iou(
+            kpts[i], kpts[order[1:]], areas[i], areas[order[1:]], sigmas, in_vis_thre
+        )
 
         inds = np.where(oks_ovr <= thresh)[0]
         order = order[inds + 1]
@@ -124,13 +152,13 @@ def oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None):
     return keep
 
 
-def rescore(overlap, scores, thresh, type='gaussian'):
+def rescore(overlap, scores, thresh, type="gaussian"):
     assert overlap.shape[0] == scores.shape[0]
-    if type == 'linear':
+    if type == "linear":
         inds = np.where(overlap >= thresh)[0]
         scores[inds] = scores[inds] * (1 - overlap[inds])
     else:
-        scores = scores * np.exp(- overlap**2 / thresh)
+        scores = scores * np.exp(-(overlap**2) / thresh)
 
     return scores
 
@@ -146,9 +174,9 @@ def soft_oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None):
     if len(kpts_db) == 0:
         return []
 
-    scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))])
-    kpts = np.array([kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))])
-    areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))])
+    scores = np.array([kpts_db[i]["score"] for i in range(len(kpts_db))])
+    kpts = np.array([kpts_db[i]["keypoints"].flatten() for i in range(len(kpts_db))])
+    areas = np.array([kpts_db[i]["area"] for i in range(len(kpts_db))])
 
     order = scores.argsort()[::-1]
     scores = scores[order]
@@ -160,7 +188,9 @@ def soft_oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None):
     while order.size > 0 and keep_cnt < max_dets:
         i = order[0]
 
-        oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]], sigmas, in_vis_thre)
+        oks_ovr = oks_iou(
+            kpts[i], kpts[order[1:]], areas[i], areas[order[1:]], sigmas, in_vis_thre
+        )
 
         order = order[1:]
         scores = rescore(oks_ovr, scores[1:], thresh)
diff --git a/lib/utils/vis.py b/lib/utils/vis.py
index 39f68101..c5f54473 100755
--- a/lib/utils/vis.py
+++ b/lib/utils/vis.py
@@ -39,7 +39,6 @@ def save_batch_image_with_joints(
                 break
             joints = batch_joints[k]
             joints_vis = batch_joints_vis[k]
-
             i_joint = 0
             for joint, joint_vis in zip(joints, joints_vis):
                 joint[0] = x * width + padding + joint[0]

From 24836b886e305b8db7662820632c290b7bcf8fe0 Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Fri, 21 Apr 2023 16:23:02 -0700
Subject: [PATCH 06/30] changed experiments config

---
 experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
index 25027e16..fdf5f160 100644
--- a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
+++ b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -17,7 +17,7 @@ DATASET:
   FLIP: true
   NUM_JOINTS_HALF_BODY: 27
   PROB_HALF_BODY: 0.3
-  ROOT: "../infinity_dataset_medium_512"
+  ROOT: "../infinity_dataset_combined"
   ROT_FACTOR: 45
   SCALE_FACTOR: 0.35
   TEST_SET: "test"
@@ -91,7 +91,7 @@ MODEL:
 LOSS:
   USE_TARGET_WEIGHT: true
 TRAIN:
-  BATCH_SIZE_PER_GPU: 2
+  BATCH_SIZE_PER_GPU: 20
   SHUFFLE: true
   BEGIN_EPOCH: 0
   END_EPOCH: 200

From 96f2e72d0688c72ee179231631f6ffe8f12d5528 Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Fri, 21 Apr 2023 20:27:33 -0700
Subject: [PATCH 07/30] change batch size

---
 experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
index fdf5f160..45cc8dc3 100644
--- a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
+++ b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -91,7 +91,7 @@ MODEL:
 LOSS:
   USE_TARGET_WEIGHT: true
 TRAIN:
-  BATCH_SIZE_PER_GPU: 20
+  BATCH_SIZE_PER_GPU: 40
   SHUFFLE: true
   BEGIN_EPOCH: 0
   END_EPOCH: 200
@@ -107,7 +107,7 @@ TRAIN:
   MOMENTUM: 0.9
   NESTEROV: false
 TEST:
-  BATCH_SIZE_PER_GPU: 5
+  BATCH_SIZE_PER_GPU: 10
   COCO_BBOX_FILE: "data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json"
   BBOX_THRE: 1.0
   IMAGE_THRE: 0.0

From 8caf3579509cd4c39cf6b063182a5eb3d8eeb40e Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Sat, 22 Apr 2023 11:25:29 -0700
Subject: [PATCH 08/30] change num workers and visualization

---
 .../hrnet/w48_384x288_adam_lr1e-3.yaml             |  6 +++---
 lib/utils/vis.py                                   | 14 +++++---------
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
index 45cc8dc3..5f4d37a2 100644
--- a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
+++ b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -4,10 +4,10 @@ CUDNN:
   DETERMINISTIC: false
   ENABLED: true
 DATA_DIR: ""
-GPUS: (0,)
+GPUS: (0, 1)
 OUTPUT_DIR: "output_infinity_coco"
 LOG_DIR: "log_infinity_coco"
-WORKERS: 1
+WORKERS: 12
 PRINT_FREQ: 10
 
 DATASET:
@@ -91,7 +91,7 @@ MODEL:
 LOSS:
   USE_TARGET_WEIGHT: true
 TRAIN:
-  BATCH_SIZE_PER_GPU: 40
+  BATCH_SIZE_PER_GPU: 20
   SHUFFLE: true
   BEGIN_EPOCH: 0
   END_EPOCH: 200
diff --git a/lib/utils/vis.py b/lib/utils/vis.py
index c5f54473..d1557512 100755
--- a/lib/utils/vis.py
+++ b/lib/utils/vis.py
@@ -43,15 +43,11 @@ def save_batch_image_with_joints(
             for joint, joint_vis in zip(joints, joints_vis):
                 joint[0] = x * width + padding + joint[0]
                 joint[1] = y * height + padding + joint[1]
-                if joint_vis[0]:
-                    if i_joint < 17:
-                        cv2.circle(
-                            ndarr, (int(joint[0]), int(joint[1])), 2, [255, 0, 0], 2
-                        )
-                    else:
-                        cv2.circle(
-                            ndarr, (int(joint[0]), int(joint[1])), 2, [0, 0, 255], 2
-                        )
+                # if joint_vis[0]:
+                if i_joint < 17:
+                    cv2.circle(ndarr, (int(joint[0]), int(joint[1])), 2, [255, 0, 0], 2)
+                else:
+                    cv2.circle(ndarr, (int(joint[0]), int(joint[1])), 2, [0, 0, 255], 2)
                 i_joint += 1
             k = k + 1
     img = cv2.cvtColor(ndarr, cv2.COLOR_BGR2RGB)

From fdbba172451dbeaabe0de081bee979cfecbdee8c Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Sat, 22 Apr 2023 11:32:11 -0700
Subject: [PATCH 09/30] change workers nb

---
 experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
index 5f4d37a2..bbe5ed5b 100644
--- a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
+++ b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -7,7 +7,7 @@ DATA_DIR: ""
 GPUS: (0, 1)
 OUTPUT_DIR: "output_infinity_coco"
 LOG_DIR: "log_infinity_coco"
-WORKERS: 12
+WORKERS: 2
 PRINT_FREQ: 10
 
 DATASET:

From fb10fcde074ca0677ed0b2a34693456b1f5cc6ea Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Mon, 1 May 2023 11:27:02 -0700
Subject: [PATCH 10/30] changed test visulization

---
 demo/demo.py                                  | 21 +++++++++++++++++--
 .../hrnet/w48_384x288_adam_lr1e-3.yaml        |  6 +++---
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/demo/demo.py b/demo/demo.py
index 5eae5b4b..6f186370 100644
--- a/demo/demo.py
+++ b/demo/demo.py
@@ -182,6 +182,7 @@
 
 NUM_KPTS = 17
 NUM_KPTS_INFINITY = 41
+NUM_KPTS_INFINITY_COCO = NUM_KPTS + NUM_KPTS_INFINITY
 
 CTX = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
 
@@ -209,7 +210,21 @@ def draw_pose_infinity(keypoints, img):
     assert keypoints.shape == (NUM_KPTS_INFINITY, 2)
     for i in range(len(keypoints)):
         x, y = keypoints[i][0], keypoints[i][1]
-        cv2.circle(img, (int(x), int(y)), 2, CocoColors[i % len(CocoColors)], -1)
+        cv2.circle(img, (int(x), int(y)), 2, [255, 0, 0], -1)
+
+
+def draw_pose_infinity_coco(keypoints, img):
+    """draw the keypoints and the skeletons.
+    :params keypoints: the shape should be equal to [17,2]
+    :params img:
+    """
+    assert keypoints.shape == (NUM_KPTS_INFINITY_COCO, 2)
+    for i in range(len(keypoints)):
+        x, y = keypoints[i][0], keypoints[i][1]
+        if i < NUM_KPTS:
+            cv2.circle(img, (int(x), int(y)), 2, [0, 0, 255], -1)
+        else:
+            cv2.circle(img, (int(x), int(y)), 2, [255, 0, 0], -1)
 
 
 def draw_bbox(box, img):
@@ -505,7 +520,9 @@ def main():
                 )
                 if len(pose_preds) >= 1:
                     for kpt in pose_preds:
-                        if len(kpt) == 41:
+                        if len(kpt) == 58:
+                            draw_pose_infinity_coco(kpt, image_bgr)
+                        elif len(kpt) == 41:
                             draw_pose_infinity(kpt, image_bgr)
                         else:
                             draw_pose(kpt, image_bgr)
diff --git a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
index bbe5ed5b..b0046bb7 100644
--- a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
+++ b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -4,7 +4,7 @@ CUDNN:
   DETERMINISTIC: false
   ENABLED: true
 DATA_DIR: ""
-GPUS: (0, 1)
+GPUS: (0,)
 OUTPUT_DIR: "output_infinity_coco"
 LOG_DIR: "log_infinity_coco"
 WORKERS: 2
@@ -91,7 +91,7 @@ MODEL:
 LOSS:
   USE_TARGET_WEIGHT: true
 TRAIN:
-  BATCH_SIZE_PER_GPU: 20
+  BATCH_SIZE_PER_GPU: 40
   SHUFFLE: true
   BEGIN_EPOCH: 0
   END_EPOCH: 200
@@ -112,7 +112,7 @@ TEST:
   BBOX_THRE: 1.0
   IMAGE_THRE: 0.0
   IN_VIS_THRE: 0.2
-  MODEL_FILE: "models/pytorch/infinity_coco/final_state.pth"
+  MODEL_FILE: "output_infinity_coco/infinity_coco/pose_hrnet/w48_384x288_adam_lr1e-3/model_best.pth"
   NMS_THRE: 1.0
   OKS_THRE: 0.9
   USE_GT_BBOX: true

From 9ffcaf2e11b790b070872da47268afee4faa1f28 Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Mon, 1 May 2023 15:51:04 -0700
Subject: [PATCH 11/30] added frozen layers

---
 demo/demo.py                                  |   4 +-
 .../hrnet/w48_384x288_adam_lr1e-3.yaml        |  18 +-
 lib/core/function.py                          | 206 +++++++++---------
 tools/test.py                                 | 109 ++++-----
 4 files changed, 169 insertions(+), 168 deletions(-)

diff --git a/demo/demo.py b/demo/demo.py
index 6f186370..c9f0e09b 100644
--- a/demo/demo.py
+++ b/demo/demo.py
@@ -460,7 +460,9 @@ def main():
                         )
                         if len(pose_preds) >= 1:
                             for kpt in pose_preds:
-                                if len(kpt) == 41:
+                                if len(kpt) == 58:
+                                    draw_pose_infinity_coco(kpt, image_bgr)
+                                elif len(kpt) == 41:
                                     draw_pose_infinity(kpt, image_bgr)
                                 else:
                                     draw_pose(kpt, image_bgr)  # draw the poses
diff --git a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
index b0046bb7..caf1be40 100644
--- a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
+++ b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -5,8 +5,8 @@ CUDNN:
   ENABLED: true
 DATA_DIR: ""
 GPUS: (0,)
-OUTPUT_DIR: "output_infinity_coco"
-LOG_DIR: "log_infinity_coco"
+OUTPUT_DIR: "output_infinity_coco_frozen"
+LOG_DIR: "log_infinity_coco_frozen"
 WORKERS: 2
 PRINT_FREQ: 10
 
@@ -36,6 +36,18 @@ MODEL:
     - 96
   SIGMA: 3
   EXTRA:
+    FREEZE_LAYERS: true
+    FROZEN_LAYERS:
+      - "conv1"
+      - "bn1"
+      - "conv2"
+      - "bn2"
+      - "layer1"
+      - "transition1"
+      - "stage2"
+      - "transition2"
+      - "stage3"
+      - "transition3"
     PRETRAINED_LAYERS:
       - "conv1"
       - "bn1"
@@ -91,7 +103,7 @@ MODEL:
 LOSS:
   USE_TARGET_WEIGHT: true
 TRAIN:
-  BATCH_SIZE_PER_GPU: 40
+  BATCH_SIZE_PER_GPU: 2
   SHUFFLE: true
   BEGIN_EPOCH: 0
   END_EPOCH: 200
diff --git a/lib/core/function.py b/lib/core/function.py
index 1bc19daa..fce6fa1b 100755
--- a/lib/core/function.py
+++ b/lib/core/function.py
@@ -4,28 +4,33 @@
 # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 # ------------------------------------------------------------------------------
 
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
- 
-import time
+from __future__ import absolute_import, division, print_function
+
 import logging
 import os
+import time
 
 import numpy as np
 import torch
-
 from core.evaluate import accuracy
 from core.inference import get_final_preds
 from utils.transforms import flip_back
 from utils.vis import save_debug_images
 
-
 logger = logging.getLogger(__name__)
 
 
-def train(config, train_loader, model, criterion, optimizer, epoch,
-          output_dir, tb_log_dir, writer_dict):
+def train(
+    config,
+    train_loader,
+    model,
+    criterion,
+    optimizer,
+    epoch,
+    output_dir,
+    tb_log_dir,
+    writer_dict,
+):
     batch_time = AverageMeter()
     data_time = AverageMeter()
     losses = AverageMeter()
@@ -33,6 +38,12 @@ def train(config, train_loader, model, criterion, optimizer, epoch,
 
     # switch to train mode
     model.train()
+    # freeze specified layers
+    extra = config.MODEL.EXTRA
+    if "FREEZE_LAYERS" in extra and extra["FREEZE_LAYERS"]:
+        frozen_layers = extra.FROZEN_LAYERS
+        for layer in frozen_layers:
+            eval("model.module." + layer + ".requires_grad_(False)")
 
     end = time.time()
     for i, (input, target, target_weight, meta) in enumerate(train_loader):
@@ -63,8 +74,9 @@ def train(config, train_loader, model, criterion, optimizer, epoch,
         # measure accuracy and record loss
         losses.update(loss.item(), input.size(0))
 
-        _, avg_acc, cnt, pred = accuracy(output.detach().cpu().numpy(),
-                                         target.detach().cpu().numpy())
+        _, avg_acc, cnt, pred = accuracy(
+            output.detach().cpu().numpy(), target.detach().cpu().numpy()
+        )
         acc.update(avg_acc, cnt)
 
         # measure elapsed time
@@ -72,30 +84,45 @@ def train(config, train_loader, model, criterion, optimizer, epoch,
         end = time.time()
 
         if i % config.PRINT_FREQ == 0:
-            msg = 'Epoch: [{0}][{1}/{2}]\t' \
-                  'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \
-                  'Speed {speed:.1f} samples/s\t' \
-                  'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \
-                  'Loss {loss.val:.5f} ({loss.avg:.5f})\t' \
-                  'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format(
-                      epoch, i, len(train_loader), batch_time=batch_time,
-                      speed=input.size(0)/batch_time.val,
-                      data_time=data_time, loss=losses, acc=acc)
+            msg = (
+                "Epoch: [{0}][{1}/{2}]\t"
+                "Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t"
+                "Speed {speed:.1f} samples/s\t"
+                "Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t"
+                "Loss {loss.val:.5f} ({loss.avg:.5f})\t"
+                "Accuracy {acc.val:.3f} ({acc.avg:.3f})".format(
+                    epoch,
+                    i,
+                    len(train_loader),
+                    batch_time=batch_time,
+                    speed=input.size(0) / batch_time.val,
+                    data_time=data_time,
+                    loss=losses,
+                    acc=acc,
+                )
+            )
             logger.info(msg)
 
-            writer = writer_dict['writer']
-            global_steps = writer_dict['train_global_steps']
-            writer.add_scalar('train_loss', losses.val, global_steps)
-            writer.add_scalar('train_acc', acc.val, global_steps)
-            writer_dict['train_global_steps'] = global_steps + 1
-
-            prefix = '{}_{}'.format(os.path.join(output_dir, 'train'), i)
-            save_debug_images(config, input, meta, target, pred*4, output,
-                              prefix)
-
-
-def validate(config, val_loader, val_dataset, model, criterion, output_dir,
-             tb_log_dir, writer_dict=None):
+            writer = writer_dict["writer"]
+            global_steps = writer_dict["train_global_steps"]
+            writer.add_scalar("train_loss", losses.val, global_steps)
+            writer.add_scalar("train_acc", acc.val, global_steps)
+            writer_dict["train_global_steps"] = global_steps + 1
+
+            prefix = "{}_{}".format(os.path.join(output_dir, "train"), i)
+            save_debug_images(config, input, meta, target, pred * 4, output, prefix)
+
+
+def validate(
+    config,
+    val_loader,
+    val_dataset,
+    model,
+    criterion,
+    output_dir,
+    tb_log_dir,
+    writer_dict=None,
+):
     batch_time = AverageMeter()
     losses = AverageMeter()
     acc = AverageMeter()
@@ -104,10 +131,7 @@ def validate(config, val_loader, val_dataset, model, criterion, output_dir,
     model.eval()
 
     num_samples = len(val_dataset)
-    all_preds = np.zeros(
-        (num_samples, config.MODEL.NUM_JOINTS, 3),
-        dtype=np.float32
-    )
+    all_preds = np.zeros((num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32)
     all_boxes = np.zeros((num_samples, 6))
     image_path = []
     filenames = []
@@ -132,15 +156,14 @@ def validate(config, val_loader, val_dataset, model, criterion, output_dir,
                 else:
                     output_flipped = outputs_flipped
 
-                output_flipped = flip_back(output_flipped.cpu().numpy(),
-                                           val_dataset.flip_pairs)
+                output_flipped = flip_back(
+                    output_flipped.cpu().numpy(), val_dataset.flip_pairs
+                )
                 output_flipped = torch.from_numpy(output_flipped.copy()).cuda()
 
-
                 # feature is not aligned, shift flipped heatmap for higher accuracy
                 if config.TEST.SHIFT_HEATMAP:
-                    output_flipped[:, :, :, 1:] = \
-                        output_flipped.clone()[:, :, :, 0:-1]
+                    output_flipped[:, :, :, 1:] = output_flipped.clone()[:, :, :, 0:-1]
 
                 output = (output + output_flipped) * 0.5
 
@@ -152,8 +175,7 @@ def validate(config, val_loader, val_dataset, model, criterion, output_dir,
             num_images = input.size(0)
             # measure accuracy and record loss
             losses.update(loss.item(), num_images)
-            _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(),
-                                             target.cpu().numpy())
+            _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(), target.cpu().numpy())
 
             acc.update(avg_acc, cnt)
 
@@ -161,42 +183,39 @@ def validate(config, val_loader, val_dataset, model, criterion, output_dir,
             batch_time.update(time.time() - end)
             end = time.time()
 
-            c = meta['center'].numpy()
-            s = meta['scale'].numpy()
-            score = meta['score'].numpy()
+            c = meta["center"].numpy()
+            s = meta["scale"].numpy()
+            score = meta["score"].numpy()
 
-            preds, maxvals = get_final_preds(
-                config, output.clone().cpu().numpy(), c, s)
+            preds, maxvals = get_final_preds(config, output.clone().cpu().numpy(), c, s)
 
-            all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2]
-            all_preds[idx:idx + num_images, :, 2:3] = maxvals
+            all_preds[idx : idx + num_images, :, 0:2] = preds[:, :, 0:2]
+            all_preds[idx : idx + num_images, :, 2:3] = maxvals
             # double check this all_boxes parts
-            all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2]
-            all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2]
-            all_boxes[idx:idx + num_images, 4] = np.prod(s*200, 1)
-            all_boxes[idx:idx + num_images, 5] = score
-            image_path.extend(meta['image'])
+            all_boxes[idx : idx + num_images, 0:2] = c[:, 0:2]
+            all_boxes[idx : idx + num_images, 2:4] = s[:, 0:2]
+            all_boxes[idx : idx + num_images, 4] = np.prod(s * 200, 1)
+            all_boxes[idx : idx + num_images, 5] = score
+            image_path.extend(meta["image"])
 
             idx += num_images
 
             if i % config.PRINT_FREQ == 0:
-                msg = 'Test: [{0}/{1}]\t' \
-                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \
-                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \
-                      'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format(
-                          i, len(val_loader), batch_time=batch_time,
-                          loss=losses, acc=acc)
+                msg = (
+                    "Test: [{0}/{1}]\t"
+                    "Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t"
+                    "Loss {loss.val:.4f} ({loss.avg:.4f})\t"
+                    "Accuracy {acc.val:.3f} ({acc.avg:.3f})".format(
+                        i, len(val_loader), batch_time=batch_time, loss=losses, acc=acc
+                    )
+                )
                 logger.info(msg)
 
-                prefix = '{}_{}'.format(
-                    os.path.join(output_dir, 'val'), i
-                )
-                save_debug_images(config, input, meta, target, pred*4, output,
-                                  prefix)
+                prefix = "{}_{}".format(os.path.join(output_dir, "val"), i)
+                save_debug_images(config, input, meta, target, pred * 4, output, prefix)
 
         name_values, perf_indicator = val_dataset.evaluate(
-            config, all_preds, output_dir, all_boxes, image_path,
-            filenames, imgnums
+            config, all_preds, output_dir, all_boxes, image_path, filenames, imgnums
         )
 
         model_name = config.MODEL.NAME
@@ -207,32 +226,16 @@ def validate(config, val_loader, val_dataset, model, criterion, output_dir,
             _print_name_value(name_values, model_name)
 
         if writer_dict:
-            writer = writer_dict['writer']
-            global_steps = writer_dict['valid_global_steps']
-            writer.add_scalar(
-                'valid_loss',
-                losses.avg,
-                global_steps
-            )
-            writer.add_scalar(
-                'valid_acc',
-                acc.avg,
-                global_steps
-            )
+            writer = writer_dict["writer"]
+            global_steps = writer_dict["valid_global_steps"]
+            writer.add_scalar("valid_loss", losses.avg, global_steps)
+            writer.add_scalar("valid_acc", acc.avg, global_steps)
             if isinstance(name_values, list):
                 for name_value in name_values:
-                    writer.add_scalars(
-                        'valid',
-                        dict(name_value),
-                        global_steps
-                    )
+                    writer.add_scalars("valid", dict(name_value), global_steps)
             else:
-                writer.add_scalars(
-                    'valid',
-                    dict(name_values),
-                    global_steps
-                )
-            writer_dict['valid_global_steps'] = global_steps + 1
+                writer.add_scalars("valid", dict(name_values), global_steps)
+            writer_dict["valid_global_steps"] = global_steps + 1
 
     return perf_indicator
 
@@ -242,24 +245,23 @@ def _print_name_value(name_value, full_arch_name):
     names = name_value.keys()
     values = name_value.values()
     num_values = len(name_value)
-    logger.info(
-        '| Arch ' +
-        ' '.join(['| {}'.format(name) for name in names]) +
-        ' |'
-    )
-    logger.info('|---' * (num_values+1) + '|')
+    logger.info("| Arch " + " ".join(["| {}".format(name) for name in names]) + " |")
+    logger.info("|---" * (num_values + 1) + "|")
 
     if len(full_arch_name) > 15:
-        full_arch_name = full_arch_name[:8] + '...'
+        full_arch_name = full_arch_name[:8] + "..."
     logger.info(
-        '| ' + full_arch_name + ' ' +
-        ' '.join(['| {:.3f}'.format(value) for value in values]) +
-         ' |'
+        "| "
+        + full_arch_name
+        + " "
+        + " ".join(["| {:.3f}".format(value) for value in values])
+        + " |"
     )
 
 
 class AverageMeter(object):
     """Computes and stores the average and current value"""
+
     def __init__(self):
         self.reset()
 
diff --git a/tools/test.py b/tools/test.py
index cfa333f5..8ed87595 100755
--- a/tools/test.py
+++ b/tools/test.py
@@ -5,62 +5,49 @@
 # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 # ------------------------------------------------------------------------------
 
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
 
 import argparse
 import os
 import pprint
 
+import _init_paths
+import dataset
 import torch
-import torch.nn.parallel
 import torch.backends.cudnn as cudnn
+import torch.nn.parallel
 import torch.optim
 import torch.utils.data
 import torch.utils.data.distributed
 import torchvision.transforms as transforms
-
-import _init_paths
-from config import cfg
-from config import update_config
-from core.loss import JointsMSELoss
+from config import cfg, update_config
 from core.function import validate
+from core.loss import JointsMSELoss
 from utils.utils import create_logger
 
-import dataset
 import models
 
 
 def parse_args():
-    parser = argparse.ArgumentParser(description='Train keypoints network')
+    parser = argparse.ArgumentParser(description="Train keypoints network")
     # general
-    parser.add_argument('--cfg',
-                        help='experiment configure file name',
-                        required=True,
-                        type=str)
-
-    parser.add_argument('opts',
-                        help="Modify config options using the command-line",
-                        default=None,
-                        nargs=argparse.REMAINDER)
-
-    parser.add_argument('--modelDir',
-                        help='model directory',
-                        type=str,
-                        default='')
-    parser.add_argument('--logDir',
-                        help='log directory',
-                        type=str,
-                        default='')
-    parser.add_argument('--dataDir',
-                        help='data directory',
-                        type=str,
-                        default='')
-    parser.add_argument('--prevModelDir',
-                        help='prev Model directory',
-                        type=str,
-                        default='')
+    parser.add_argument(
+        "--cfg", help="experiment configure file name", required=True, type=str
+    )
+
+    parser.add_argument(
+        "opts",
+        help="Modify config options using the command-line",
+        default=None,
+        nargs=argparse.REMAINDER,
+    )
+
+    parser.add_argument("--modelDir", help="model directory", type=str, default="")
+    parser.add_argument("--logDir", help="log directory", type=str, default="")
+    parser.add_argument("--dataDir", help="data directory", type=str, default="")
+    parser.add_argument(
+        "--prevModelDir", help="prev Model directory", type=str, default=""
+    )
 
     args = parser.parse_args()
     return args
@@ -70,8 +57,7 @@ def main():
     args = parse_args()
     update_config(cfg, args)
 
-    logger, final_output_dir, tb_log_dir = create_logger(
-        cfg, args.cfg, 'valid')
+    logger, final_output_dir, tb_log_dir = create_logger(cfg, args.cfg, "valid")
 
     logger.info(pprint.pformat(args))
     logger.info(cfg)
@@ -81,50 +67,49 @@ def main():
     torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
     torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
 
-    model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(
-        cfg, is_train=False
-    )
+    model = eval("models." + cfg.MODEL.NAME + ".get_pose_net")(cfg, is_train=False)
 
     if cfg.TEST.MODEL_FILE:
-        logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
+        logger.info("=> loading model from {}".format(cfg.TEST.MODEL_FILE))
         model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False)
     else:
-        model_state_file = os.path.join(
-            final_output_dir, 'final_state.pth'
-        )
-        logger.info('=> loading model from {}'.format(model_state_file))
+        model_state_file = os.path.join(final_output_dir, "final_state.pth")
+        logger.info("=> loading model from {}".format(model_state_file))
         model.load_state_dict(torch.load(model_state_file))
 
     model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()
-
     # define loss function (criterion) and optimizer
-    criterion = JointsMSELoss(
-        use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT
-    ).cuda()
+    criterion = JointsMSELoss(use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda()
 
     # Data loading code
     normalize = transforms.Normalize(
         mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
     )
-    valid_dataset = eval('dataset.'+cfg.DATASET.DATASET)(
-        cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False,
-        transforms.Compose([
-            transforms.ToTensor(),
-            normalize,
-        ])
+    valid_dataset = eval("dataset." + cfg.DATASET.DATASET)(
+        cfg,
+        cfg.DATASET.ROOT,
+        cfg.DATASET.TEST_SET,
+        False,
+        transforms.Compose(
+            [
+                transforms.ToTensor(),
+                normalize,
+            ]
+        ),
     )
     valid_loader = torch.utils.data.DataLoader(
         valid_dataset,
-        batch_size=cfg.TEST.BATCH_SIZE_PER_GPU*len(cfg.GPUS),
+        batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
         shuffle=False,
         num_workers=cfg.WORKERS,
-        pin_memory=True
+        pin_memory=True,
     )
 
     # evaluate on validation set
-    validate(cfg, valid_loader, valid_dataset, model, criterion,
-             final_output_dir, tb_log_dir)
+    validate(
+        cfg, valid_loader, valid_dataset, model, criterion, final_output_dir, tb_log_dir
+    )
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()

From a7cca3b9bb3200d97d0d5cb85a574b56de43db75 Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Mon, 1 May 2023 16:01:15 -0700
Subject: [PATCH 12/30] changed parameters for sherlock

---
 experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
index caf1be40..44bdd9cb 100644
--- a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
+++ b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -103,7 +103,7 @@ MODEL:
 LOSS:
   USE_TARGET_WEIGHT: true
 TRAIN:
-  BATCH_SIZE_PER_GPU: 2
+  BATCH_SIZE_PER_GPU: 40
   SHUFFLE: true
   BEGIN_EPOCH: 0
   END_EPOCH: 200

From 24614636ab9264b3e2b53bd61571014b10fba2e4 Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Wed, 17 May 2023 08:22:03 -0700
Subject: [PATCH 13/30] changed coco dataset to train

---
 .gitignore                                    |   6 +-
 .../hrnet/w48_384x288_adam_lr1e-3.yaml        |   6 +-
 lib/dataset/JointsDataset.py                  | 137 +++++++++---------
 lib/dataset/coco.py                           |   4 +-
 lib/dataset/infinity_coco.py                  |   1 -
 5 files changed, 75 insertions(+), 79 deletions(-)

diff --git a/.gitignore b/.gitignore
index 42b5d470..695e68e6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -84,11 +84,9 @@ target/
 lib/pycocotools/_mask.c
 lib/nms/cpu_nms.c
 
-output/*
-output_infinity_coco/*
+outputs/*
 models/*
-log/*
-log_infinity_coco/*
+logs/*
 data/*
 external/
 
diff --git a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
index 44bdd9cb..f3776f0f 100644
--- a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
+++ b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -5,8 +5,8 @@ CUDNN:
   ENABLED: true
 DATA_DIR: ""
 GPUS: (0,)
-OUTPUT_DIR: "output_infinity_coco_frozen"
-LOG_DIR: "log_infinity_coco_frozen"
+OUTPUT_DIR: "outputs/output_infinity_coco_frozen"
+LOG_DIR: "logs/log_infinity_coco_frozen"
 WORKERS: 2
 PRINT_FREQ: 10
 
@@ -124,7 +124,7 @@ TEST:
   BBOX_THRE: 1.0
   IMAGE_THRE: 0.0
   IN_VIS_THRE: 0.2
-  MODEL_FILE: "output_infinity_coco/infinity_coco/pose_hrnet/w48_384x288_adam_lr1e-3/model_best.pth"
+  MODEL_FILE: "outputs/output_infinity_coco/infinity_coco/pose_hrnet/w48_384x288_adam_lr1e-3/model_best.pth"
   NMS_THRE: 1.0
   OKS_THRE: 0.9
   USE_GT_BBOX: true
diff --git a/lib/dataset/JointsDataset.py b/lib/dataset/JointsDataset.py
index 5a8cc3a7..643c266c 100755
--- a/lib/dataset/JointsDataset.py
+++ b/lib/dataset/JointsDataset.py
@@ -4,9 +4,7 @@
 # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 # ------------------------------------------------------------------------------
 
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
 
 import copy
 import logging
@@ -16,11 +14,7 @@
 import numpy as np
 import torch
 from torch.utils.data import Dataset
-
-from utils.transforms import get_affine_transform
-from utils.transforms import affine_transform
-from utils.transforms import fliplr_joints
-
+from utils.transforms import affine_transform, fliplr_joints, get_affine_transform
 
 logger = logging.getLogger(__name__)
 
@@ -75,8 +69,7 @@ def half_body_transform(self, joints, joints_vis):
         if np.random.randn() < 0.5 and len(upper_joints) > 2:
             selected_joints = upper_joints
         else:
-            selected_joints = lower_joints \
-                if len(lower_joints) > 2 else upper_joints
+            selected_joints = lower_joints if len(lower_joints) > 2 else upper_joints
 
         if len(selected_joints) < 2:
             return None, None
@@ -96,29 +89,28 @@ def half_body_transform(self, joints, joints_vis):
             w = h * self.aspect_ratio
 
         scale = np.array(
-            [
-                w * 1.0 / self.pixel_std,
-                h * 1.0 / self.pixel_std
-            ],
-            dtype=np.float32
+            [w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std], dtype=np.float32
         )
 
         scale = scale * 1.5
 
         return center, scale
 
-    def __len__(self,):
+    def __len__(
+        self,
+    ):
         return len(self.db)
 
     def __getitem__(self, idx):
         db_rec = copy.deepcopy(self.db[idx])
 
-        image_file = db_rec['image']
-        filename = db_rec['filename'] if 'filename' in db_rec else ''
-        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''
+        image_file = db_rec["image"]
+        filename = db_rec["filename"] if "filename" in db_rec else ""
+        imgnum = db_rec["imgnum"] if "imgnum" in db_rec else ""
 
-        if self.data_format == 'zip':
+        if self.data_format == "zip":
             from utils import zipreader
+
             data_numpy = zipreader.imread(
                 image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION
             )
@@ -131,37 +123,41 @@ def __getitem__(self, idx):
             data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)
 
         if data_numpy is None:
-            logger.error('=> fail to read {}'.format(image_file))
-            raise ValueError('Fail to read {}'.format(image_file))
+            logger.error("=> fail to read {}".format(image_file))
+            raise ValueError("Fail to read {}".format(image_file))
 
-        joints = db_rec['joints_3d']
-        joints_vis = db_rec['joints_3d_vis']
+        joints = db_rec["joints_3d"]
+        joints_vis = db_rec["joints_3d_vis"]
 
-        c = db_rec['center']
-        s = db_rec['scale']
-        score = db_rec['score'] if 'score' in db_rec else 1
+        c = db_rec["center"]
+        s = db_rec["scale"]
+        score = db_rec["score"] if "score" in db_rec else 1
         r = 0
 
         if self.is_train:
-            if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body
-                and np.random.rand() < self.prob_half_body):
-                c_half_body, s_half_body = self.half_body_transform(
-                    joints, joints_vis
-                )
+            if (
+                np.sum(joints_vis[:, 0]) > self.num_joints_half_body
+                and np.random.rand() < self.prob_half_body
+            ):
+                c_half_body, s_half_body = self.half_body_transform(joints, joints_vis)
 
                 if c_half_body is not None and s_half_body is not None:
                     c, s = c_half_body, s_half_body
 
             sf = self.scale_factor
             rf = self.rotation_factor
-            s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
-            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
-                if random.random() <= 0.6 else 0
+            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
+            r = (
+                np.clip(np.random.randn() * rf, -rf * 2, rf * 2)
+                if random.random() <= 0.6
+                else 0
+            )
 
             if self.flip and random.random() <= 0.5:
                 data_numpy = data_numpy[:, ::-1, :]
                 joints, joints_vis = fliplr_joints(
-                    joints, joints_vis, data_numpy.shape[1], self.flip_pairs)
+                    joints, joints_vis, data_numpy.shape[1], self.flip_pairs
+                )
                 c[0] = data_numpy.shape[1] - c[0] - 1
 
         trans = get_affine_transform(c, s, r, self.image_size)
@@ -169,7 +165,8 @@ def __getitem__(self, idx):
             data_numpy,
             trans,
             (int(self.image_size[0]), int(self.image_size[1])),
-            flags=cv2.INTER_LINEAR)
+            flags=cv2.INTER_LINEAR,
+        )
 
         if self.transform:
             input = self.transform(input)
@@ -184,15 +181,15 @@ def __getitem__(self, idx):
         target_weight = torch.from_numpy(target_weight)
 
         meta = {
-            'image': image_file,
-            'filename': filename,
-            'imgnum': imgnum,
-            'joints': joints,
-            'joints_vis': joints_vis,
-            'center': c,
-            'scale': s,
-            'rotation': r,
-            'score': score
+            "image": image_file,
+            "filename": filename,
+            "imgnum": imgnum,
+            "joints": joints,
+            "joints_vis": joints_vis,
+            "center": c,
+            "scale": s,
+            "rotation": r,
+            "score": score,
         }
 
         return input, target, target_weight, meta
@@ -203,8 +200,7 @@ def select_data(self, db):
             num_vis = 0
             joints_x = 0.0
             joints_y = 0.0
-            for joint, joint_vis in zip(
-                    rec['joints_3d'], rec['joints_3d_vis']):
+            for joint, joint_vis in zip(rec["joints_3d"], rec["joints_3d_vis"]):
                 if joint_vis[0] <= 0:
                     continue
                 num_vis += 1
@@ -216,40 +212,38 @@ def select_data(self, db):
 
             joints_x, joints_y = joints_x / num_vis, joints_y / num_vis
 
-            area = rec['scale'][0] * rec['scale'][1] * (self.pixel_std**2)
+            area = rec["scale"][0] * rec["scale"][1] * (self.pixel_std**2)
             joints_center = np.array([joints_x, joints_y])
-            bbox_center = np.array(rec['center'])
-            diff_norm2 = np.linalg.norm((joints_center-bbox_center), 2)
-            ks = np.exp(-1.0*(diff_norm2**2) / ((0.2)**2*2.0*area))
+            bbox_center = np.array(rec["center"])
+            diff_norm2 = np.linalg.norm((joints_center - bbox_center), 2)
+            ks = np.exp(-1.0 * (diff_norm2**2) / ((0.2) ** 2 * 2.0 * area))
 
             metric = (0.2 / 16) * num_vis + 0.45 - 0.2 / 16
             if ks > metric:
                 db_selected.append(rec)
 
-        logger.info('=> num db: {}'.format(len(db)))
-        logger.info('=> num selected db: {}'.format(len(db_selected)))
+        logger.info("=> num db: {}".format(len(db)))
+        logger.info("=> num selected db: {}".format(len(db_selected)))
         return db_selected
 
     def generate_target(self, joints, joints_vis):
-        '''
+        """
         :param joints:  [num_joints, 3]
         :param joints_vis: [num_joints, 3]
         :return: target, target_weight(1: visible, 0: invisible)
-        '''
+        """
         target_weight = np.ones((self.num_joints, 1), dtype=np.float32)
         target_weight[:, 0] = joints_vis[:, 0]
 
-        assert self.target_type == 'gaussian', \
-            'Only support gaussian map now!'
+        assert self.target_type == "gaussian", "Only support gaussian map now!"
 
-        if self.target_type == 'gaussian':
-            target = np.zeros((self.num_joints,
-                               self.heatmap_size[1],
-                               self.heatmap_size[0]),
-                              dtype=np.float32)
+        if self.target_type == "gaussian":
+            target = np.zeros(
+                (self.num_joints, self.heatmap_size[1], self.heatmap_size[0]),
+                dtype=np.float32,
+            )
 
             tmp_size = self.sigma * 3
-
             for joint_id in range(self.num_joints):
                 feat_stride = self.image_size / self.heatmap_size
                 mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5)
@@ -257,8 +251,12 @@ def generate_target(self, joints, joints_vis):
                 # Check that any part of the gaussian is in-bounds
                 ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
                 br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
-                if ul[0] >= self.heatmap_size[0] or ul[1] >= self.heatmap_size[1] \
-                        or br[0] < 0 or br[1] < 0:
+                if (
+                    ul[0] >= self.heatmap_size[0]
+                    or ul[1] >= self.heatmap_size[1]
+                    or br[0] < 0
+                    or br[1] < 0
+                ):
                     # If not, just return the image as is
                     target_weight[joint_id] = 0
                     continue
@@ -269,7 +267,7 @@ def generate_target(self, joints, joints_vis):
                 y = x[:, np.newaxis]
                 x0 = y0 = size // 2
                 # The gaussian is not normalized, we want the center value to equal 1
-                g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * self.sigma ** 2))
+                g = np.exp(-((x - x0) ** 2 + (y - y0) ** 2) / (2 * self.sigma**2))
 
                 # Usable gaussian range
                 g_x = max(0, -ul[0]), min(br[0], self.heatmap_size[0]) - ul[0]
@@ -280,8 +278,9 @@ def generate_target(self, joints, joints_vis):
 
                 v = target_weight[joint_id]
                 if v > 0.5:
-                    target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \
-                        g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
+                    target[joint_id][img_y[0] : img_y[1], img_x[0] : img_x[1]] = g[
+                        g_y[0] : g_y[1], g_x[0] : g_x[1]
+                    ]
 
         if self.use_different_joints_weight:
             target_weight = np.multiply(target_weight, self.joints_weight)
diff --git a/lib/dataset/coco.py b/lib/dataset/coco.py
index 23a3e9d1..85ee7aae 100755
--- a/lib/dataset/coco.py
+++ b/lib/dataset/coco.py
@@ -49,8 +49,8 @@ class COCODataset(JointsDataset):
     def __init__(self, cfg, root, image_set, is_train, transform=None, infinity=False):
         super().__init__(cfg, root, image_set, is_train, transform)
         if infinity:
-            self.root = "data/coco/"
-            self.image_set = "val2017"
+            self.root = "$SCRATCH/coco_dataset/"
+            self.image_set = "train2017"
 
         self.nms_thre = cfg.TEST.NMS_THRE
         self.image_thre = cfg.TEST.IMAGE_THRE
diff --git a/lib/dataset/infinity_coco.py b/lib/dataset/infinity_coco.py
index 89ca49d1..884fa547 100644
--- a/lib/dataset/infinity_coco.py
+++ b/lib/dataset/infinity_coco.py
@@ -314,7 +314,6 @@ def _load_coco_keypoint_annotation_kernal(self, index, index_coco):
                 joints_3d_vis[self.num_joints_coco + ipt, 0] = t_vis
                 joints_3d_vis[self.num_joints_coco + ipt, 1] = t_vis
                 joints_3d_vis[self.num_joints_coco + ipt, 2] = 0
-
             center, scale = self._box2cs(obj["clean_bbox"][:4])
             rec.append(
                 {

From e93c4549c4b52206db263370869931383d1637de Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Wed, 17 May 2023 16:14:15 -0700
Subject: [PATCH 14/30] add coco infinity ratio

---
 .../coco/hrnet/w48_384x288_adam_lr1e-3.yaml   | 90 +++++++++----------
 .../hrnet/w48_384x288_adam_lr1e-3.yaml        | 10 +--
 lib/dataset/infinity_coco.py                  | 55 +++++++-----
 3 files changed, 83 insertions(+), 72 deletions(-)

diff --git a/experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml
index 0ffcd6d2..589a8dfd 100644
--- a/experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml
+++ b/experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -3,90 +3,90 @@ CUDNN:
   BENCHMARK: true
   DETERMINISTIC: false
   ENABLED: true
-DATA_DIR: ''
+DATA_DIR: ""
 GPUS: (0,)
-OUTPUT_DIR: 'output'
-LOG_DIR: 'log'
+OUTPUT_DIR: "output"
+LOG_DIR: "log"
 WORKERS: 12
 PRINT_FREQ: 10
 
 DATASET:
   COLOR_RGB: true
-  DATASET: 'coco'
+  DATASET: "coco"
   DATA_FORMAT: jpg
   FLIP: true
   NUM_JOINTS_HALF_BODY: 8
   PROB_HALF_BODY: 0.3
-  ROOT: 'data/coco/'
+  ROOT: "data/coco/"
   ROT_FACTOR: 45
   SCALE_FACTOR: 0.35
-  TEST_SET: 'val2017'
-  TRAIN_SET: 'train2017'
+  TEST_SET: "val2017"
+  TRAIN_SET: "train2017"
 MODEL:
   INIT_WEIGHTS: true
   NAME: pose_hrnet
   NUM_JOINTS: 17
-  PRETRAINED: 'models/pytorch/imagenet/hrnet_w48-8ef0771d.pth'
+  PRETRAINED: "models/pytorch/imagenet/hrnet_w48-8ef0771d.pth"
   TARGET_TYPE: gaussian
   IMAGE_SIZE:
-  - 288
-  - 384
+    - 288
+    - 384
   HEATMAP_SIZE:
-  - 72
-  - 96
+    - 72
+    - 96
   SIGMA: 3
   EXTRA:
     PRETRAINED_LAYERS:
-    - 'conv1'
-    - 'bn1'
-    - 'conv2'
-    - 'bn2'
-    - 'layer1'
-    - 'transition1'
-    - 'stage2'
-    - 'transition2'
-    - 'stage3'
-    - 'transition3'
-    - 'stage4'
+      - "conv1"
+      - "bn1"
+      - "conv2"
+      - "bn2"
+      - "layer1"
+      - "transition1"
+      - "stage2"
+      - "transition2"
+      - "stage3"
+      - "transition3"
+      - "stage4"
     FINAL_CONV_KERNEL: 1
     STAGE2:
       NUM_MODULES: 1
       NUM_BRANCHES: 2
       BLOCK: BASIC
       NUM_BLOCKS:
-      - 4
-      - 4
+        - 4
+        - 4
       NUM_CHANNELS:
-      - 48
-      - 96
+        - 48
+        - 96
       FUSE_METHOD: SUM
     STAGE3:
       NUM_MODULES: 4
       NUM_BRANCHES: 3
       BLOCK: BASIC
       NUM_BLOCKS:
-      - 4
-      - 4
-      - 4
+        - 4
+        - 4
+        - 4
       NUM_CHANNELS:
-      - 48
-      - 96
-      - 192
+        - 48
+        - 96
+        - 192
       FUSE_METHOD: SUM
     STAGE4:
       NUM_MODULES: 3
       NUM_BRANCHES: 4
       BLOCK: BASIC
       NUM_BLOCKS:
-      - 4
-      - 4
-      - 4
-      - 4
+        - 4
+        - 4
+        - 4
+        - 4
       NUM_CHANNELS:
-      - 48
-      - 96
-      - 192
-      - 384
+        - 48
+        - 96
+        - 192
+        - 384
       FUSE_METHOD: SUM
 LOSS:
   USE_TARGET_WEIGHT: true
@@ -99,8 +99,8 @@ TRAIN:
   LR: 0.001
   LR_FACTOR: 0.1
   LR_STEP:
-  - 170
-  - 200
+    - 170
+    - 200
   WD: 0.0001
   GAMMA1: 0.99
   GAMMA2: 0.0
@@ -108,11 +108,11 @@ TRAIN:
   NESTEROV: false
 TEST:
   BATCH_SIZE_PER_GPU: 24
-  COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
+  COCO_BBOX_FILE: "data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json"
   BBOX_THRE: 1.0
   IMAGE_THRE: 0.0
   IN_VIS_THRE: 0.2
-  MODEL_FILE: 'models/pytorch/pose_coco/pose_hrnet_w48_384x288.pth'
+  MODEL_FILE: "models/pytorch/pose_coco/pose_hrnet_w48_384x288.pth"
   NMS_THRE: 1.0
   OKS_THRE: 0.9
   USE_GT_BBOX: true
diff --git a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
index f3776f0f..c3aa5592 100644
--- a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
+++ b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -5,8 +5,8 @@ CUDNN:
   ENABLED: true
 DATA_DIR: ""
 GPUS: (0,)
-OUTPUT_DIR: "outputs/output_infinity_coco_frozen"
-LOG_DIR: "logs/log_infinity_coco_frozen"
+OUTPUT_DIR: "outputs/output_infinity_coco_unfrozen"
+LOG_DIR: "logs/log_infinity_coco_unfrozen"
 WORKERS: 2
 PRINT_FREQ: 10
 
@@ -36,7 +36,7 @@ MODEL:
     - 96
   SIGMA: 3
   EXTRA:
-    FREEZE_LAYERS: true
+    FREEZE_LAYERS: false
     FROZEN_LAYERS:
       - "conv1"
       - "bn1"
@@ -103,7 +103,7 @@ MODEL:
 LOSS:
   USE_TARGET_WEIGHT: true
 TRAIN:
-  BATCH_SIZE_PER_GPU: 40
+  BATCH_SIZE_PER_GPU: 128
   SHUFFLE: true
   BEGIN_EPOCH: 0
   END_EPOCH: 200
@@ -119,7 +119,7 @@ TRAIN:
   MOMENTUM: 0.9
   NESTEROV: false
 TEST:
-  BATCH_SIZE_PER_GPU: 10
+  BATCH_SIZE_PER_GPU: 128
   COCO_BBOX_FILE: "data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json"
   BBOX_THRE: 1.0
   IMAGE_THRE: 0.0
diff --git a/lib/dataset/infinity_coco.py b/lib/dataset/infinity_coco.py
index 884fa547..7fe357e1 100644
--- a/lib/dataset/infinity_coco.py
+++ b/lib/dataset/infinity_coco.py
@@ -58,6 +58,7 @@ def __init__(self, cfg, root, image_set, is_train, transform=None):
         self.use_gt_bbox = cfg.TEST.USE_GT_BBOX
         self.image_width = cfg.MODEL.IMAGE_SIZE[0]
         self.image_height = cfg.MODEL.IMAGE_SIZE[1]
+        self.coco_infinity_ratio = 10
         self.aspect_ratio = self.image_width * 1.0 / self.image_height
         self.pixel_std = 200
 
@@ -242,14 +243,24 @@ def _get_db(self):
     def _load_coco_keypoint_annotations(self):
         """ground truth bbox and keypoints"""
         gt_db = []
-        for index, index_coco in zip(
+        indices_coco = []
+        iter_coco = iter(self.coco_dataset.image_set_index)
+        batch_indices_coco = [
+            [next(iter_coco) for _ in range(self.coco_infinity_ratio)]
+            for _ in range(len(self.image_set_index))
+        ]
+        for index, indices_coco in zip(
             self.image_set_index,
-            self.coco_dataset.image_set_index[: len(self.image_set_index)],
+            batch_indices_coco,
         ):
-            gt_db.extend(self._load_coco_keypoint_annotation_kernal(index, index_coco))
+            gt_db.extend(
+                self._load_coco_keypoint_annotation_kernal(index, indices_coco)
+            )
         return gt_db
 
-    def _load_coco_keypoint_annotation_kernal(self, index, index_coco):
+    def _load_coco_keypoint_annotation_kernal(
+        self, index: int, indices_coco: list[int]
+    ):
         """
         coco ann: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id']
         iscrowd:
@@ -326,26 +337,26 @@ def _load_coco_keypoint_annotation_kernal(self, index, index_coco):
                     "imgnum": 0,
                 }
             )
-
-        rec_coco = self.coco_dataset._load_coco_keypoint_annotation_kernal(index_coco)
-        for r in rec_coco:
-            joints_3d_coco = r["joints_3d"]
-            joints_3d_vis_coco = r["joints_3d_vis"]
-            joints_3d_coco = np.vstack(
-                (
-                    joints_3d_coco,
-                    np.zeros((self.num_joints_infinity, 3), dtype=np.float32),
+        for index_coco in indices_coco:
+            rec_coco = self.coco_dataset._load_coco_keypoint_annotation_kernal(index_coco)
+            for r in rec_coco:
+                joints_3d_coco = r["joints_3d"]
+                joints_3d_vis_coco = r["joints_3d_vis"]
+                joints_3d_coco = np.vstack(
+                    (
+                        joints_3d_coco,
+                        np.zeros((self.num_joints_infinity, 3), dtype=np.float32),
+                    )
                 )
-            )
-            joints_3d_vis_coco = np.vstack(
-                (
-                    joints_3d_vis_coco,
-                    np.zeros((self.num_joints_infinity, 3), dtype=np.float32),
+                joints_3d_vis_coco = np.vstack(
+                    (
+                        joints_3d_vis_coco,
+                        np.zeros((self.num_joints_infinity, 3), dtype=np.float32),
+                    )
                 )
-            )
-            r["joints_3d"] = joints_3d_coco
-            r["joints_3d_vis"] = joints_3d_vis_coco
-            rec.append(r)
+                r["joints_3d"] = joints_3d_coco
+                r["joints_3d_vis"] = joints_3d_vis_coco
+                rec.append(r)
 
         return rec
 

From c684ff5818f2c468375cdf525ed8f76f13db5940 Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Wed, 17 May 2023 16:18:33 -0700
Subject: [PATCH 15/30] reduce batch size

---
 experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
index c3aa5592..b263a3e5 100644
--- a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
+++ b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -103,7 +103,7 @@ MODEL:
 LOSS:
   USE_TARGET_WEIGHT: true
 TRAIN:
-  BATCH_SIZE_PER_GPU: 128
+  BATCH_SIZE_PER_GPU: 64
   SHUFFLE: true
   BEGIN_EPOCH: 0
   END_EPOCH: 200
@@ -119,7 +119,7 @@ TRAIN:
   MOMENTUM: 0.9
   NESTEROV: false
 TEST:
-  BATCH_SIZE_PER_GPU: 128
+  BATCH_SIZE_PER_GPU: 64
   COCO_BBOX_FILE: "data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json"
   BBOX_THRE: 1.0
   IMAGE_THRE: 0.0

From 63e11c3a0b9f1004713af4d4f8e6d99a00b6790d Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Wed, 17 May 2023 16:22:33 -0700
Subject: [PATCH 16/30] reduce batch size

---
 experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
index b263a3e5..720a34dc 100644
--- a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
+++ b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -103,7 +103,7 @@ MODEL:
 LOSS:
   USE_TARGET_WEIGHT: true
 TRAIN:
-  BATCH_SIZE_PER_GPU: 64
+  BATCH_SIZE_PER_GPU: 48
   SHUFFLE: true
   BEGIN_EPOCH: 0
   END_EPOCH: 200
@@ -119,7 +119,7 @@ TRAIN:
   MOMENTUM: 0.9
   NESTEROV: false
 TEST:
-  BATCH_SIZE_PER_GPU: 64
+  BATCH_SIZE_PER_GPU: 48
   COCO_BBOX_FILE: "data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json"
   BBOX_THRE: 1.0
   IMAGE_THRE: 0.0

From 23f77d5250f8f71cf998d7f71836b603ca91e313 Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Wed, 17 May 2023 16:26:31 -0700
Subject: [PATCH 17/30] reduce batch size

---
 experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
index 720a34dc..f0f34d27 100644
--- a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
+++ b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -103,7 +103,7 @@ MODEL:
 LOSS:
   USE_TARGET_WEIGHT: true
 TRAIN:
-  BATCH_SIZE_PER_GPU: 48
+  BATCH_SIZE_PER_GPU: 40
   SHUFFLE: true
   BEGIN_EPOCH: 0
   END_EPOCH: 200
@@ -119,7 +119,7 @@ TRAIN:
   MOMENTUM: 0.9
   NESTEROV: false
 TEST:
-  BATCH_SIZE_PER_GPU: 48
+  BATCH_SIZE_PER_GPU: 40
   COCO_BBOX_FILE: "data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json"
   BBOX_THRE: 1.0
   IMAGE_THRE: 0.0

From 9920c5a435133705ead2b1eefdfa3e5d212cb02c Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Wed, 17 May 2023 19:28:49 -0700
Subject: [PATCH 18/30] reduced batch size

---
 experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
index f0f34d27..b56522db 100644
--- a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
+++ b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -103,7 +103,7 @@ MODEL:
 LOSS:
   USE_TARGET_WEIGHT: true
 TRAIN:
-  BATCH_SIZE_PER_GPU: 40
+  BATCH_SIZE_PER_GPU: 8
   SHUFFLE: true
   BEGIN_EPOCH: 0
   END_EPOCH: 200
@@ -119,7 +119,7 @@ TRAIN:
   MOMENTUM: 0.9
   NESTEROV: false
 TEST:
-  BATCH_SIZE_PER_GPU: 40
+  BATCH_SIZE_PER_GPU: 8
   COCO_BBOX_FILE: "data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json"
   BBOX_THRE: 1.0
   IMAGE_THRE: 0.0

From c206be059c3dc910e61ef7dc4bf3f004c199e51b Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Wed, 17 May 2023 19:49:19 -0700
Subject: [PATCH 19/30] increase batch size

---
 experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
index b56522db..89125512 100644
--- a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
+++ b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -103,7 +103,7 @@ MODEL:
 LOSS:
   USE_TARGET_WEIGHT: true
 TRAIN:
-  BATCH_SIZE_PER_GPU: 8
+  BATCH_SIZE_PER_GPU: 32
   SHUFFLE: true
   BEGIN_EPOCH: 0
   END_EPOCH: 200
@@ -119,7 +119,7 @@ TRAIN:
   MOMENTUM: 0.9
   NESTEROV: false
 TEST:
-  BATCH_SIZE_PER_GPU: 8
+  BATCH_SIZE_PER_GPU: 32
   COCO_BBOX_FILE: "data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json"
   BBOX_THRE: 1.0
   IMAGE_THRE: 0.0

From bb7ac7b3d94d31570a179f0d7299b9c05b39f768 Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Mon, 22 May 2023 16:11:11 -0700
Subject: [PATCH 20/30] add fina layer pretraining

---
 .../hrnet/w48_384x288_adam_lr1e-3.yaml        |   4 +
 .../hrnet/w48_384x288_adam_lr1e-3_local.yaml  | 142 ++++++++++
 lib/config/default.py                         |  59 ++--
 lib/dataset/coco.py                           |   4 +-
 lib/dataset/infinity_coco.py                  |   6 +-
 lib/models/pose_hrnet.py                      | 261 ++++++++++--------
 6 files changed, 324 insertions(+), 152 deletions(-)
 create mode 100644 experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3_local.yaml

diff --git a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
index 89125512..d6c4e525 100644
--- a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
+++ b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -18,10 +18,13 @@ DATASET:
   NUM_JOINTS_HALF_BODY: 27
   PROB_HALF_BODY: 0.3
   ROOT: "../infinity_dataset_combined"
+  ROOT_COCO: "/scratch/users/yonigoz/coco_dataset/"
   ROT_FACTOR: 45
   SCALE_FACTOR: 0.35
   TEST_SET: "test"
   TRAIN_SET: "train"
+  TRAIN_SET_COCO: "train2017"
+  COCO_INFINITY_RATIO: 10
 MODEL:
   INIT_WEIGHTS: true
   NAME: pose_hrnet
@@ -60,6 +63,7 @@ MODEL:
       - "stage3"
       - "transition3"
       - "stage4"
+    PRETRAIN_FINAL_LAYER: true
     FINAL_CONV_KERNEL: 1
     STAGE2:
       NUM_MODULES: 1
diff --git a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3_local.yaml b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3_local.yaml
new file mode 100644
index 00000000..295baef1
--- /dev/null
+++ b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3_local.yaml
@@ -0,0 +1,142 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ""
+GPUS: (0,)
+OUTPUT_DIR: "outputs/output_infinity_coco_local"
+LOG_DIR: "logs/log_infinity_coco_local"
+WORKERS: 2
+PRINT_FREQ: 10
+
+DATASET:
+  COLOR_RGB: true
+  DATASET: "infinity_coco"
+  DATA_FORMAT: png
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 27
+  PROB_HALF_BODY: 0.3
+  ROOT: "../infinity_dataset_combined"
+  ROOT_COCO: "data/coco/"
+  ROT_FACTOR: 45
+  SCALE_FACTOR: 0.35
+  TEST_SET: "test"
+  TRAIN_SET: "train"
+  TRAIN_SET_COCO: "val2017"
+  COCO_INFINITY_RATIO: 1
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 58
+  PRETRAINED: "models/pytorch/pose_coco/pose_hrnet_w48_384x288.pth"
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+    - 288
+    - 384
+  HEATMAP_SIZE:
+    - 72
+    - 96
+  SIGMA: 3
+  EXTRA:
+    FREEZE_LAYERS: false
+    FROZEN_LAYERS:
+      - "conv1"
+      - "bn1"
+      - "conv2"
+      - "bn2"
+      - "layer1"
+      - "transition1"
+      - "stage2"
+      - "transition2"
+      - "stage3"
+      - "transition3"
+    PRETRAINED_LAYERS:
+      - "conv1"
+      - "bn1"
+      - "conv2"
+      - "bn2"
+      - "layer1"
+      - "transition1"
+      - "stage2"
+      - "transition2"
+      - "stage3"
+      - "transition3"
+      - "stage4"
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+        - 4
+        - 4
+      NUM_CHANNELS:
+        - 48
+        - 96
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+        - 4
+        - 4
+        - 4
+      NUM_CHANNELS:
+        - 48
+        - 96
+        - 192
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+        - 4
+        - 4
+        - 4
+        - 4
+      NUM_CHANNELS:
+        - 48
+        - 96
+        - 192
+        - 384
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 2
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 200
+  OPTIMIZER: adam
+  LR: 0.00001
+  LR_FACTOR: 0.1
+  LR_STEP:
+    - 170
+    - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 10
+  COCO_BBOX_FILE: "data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json"
+  BBOX_THRE: 1.0
+  IMAGE_THRE: 0.0
+  IN_VIS_THRE: 0.2
+  MODEL_FILE: "outputs/output_infinity_coco/infinity_coco/pose_hrnet/w48_384x288_adam_lr1e-3/model_best.pth"
+  NMS_THRE: 1.0
+  OKS_THRE: 0.9
+  USE_GT_BBOX: true
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/lib/config/default.py b/lib/config/default.py
index 72d3faf3..ee2cf061 100644
--- a/lib/config/default.py
+++ b/lib/config/default.py
@@ -1,24 +1,20 @@
-
 # ------------------------------------------------------------------------------
 # Copyright (c) Microsoft
 # Licensed under the MIT License.
 # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 # ------------------------------------------------------------------------------
 
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
 
 import os
 
 from yacs.config import CfgNode as CN
 
-
 _C = CN()
 
-_C.OUTPUT_DIR = ''
-_C.LOG_DIR = ''
-_C.DATA_DIR = ''
+_C.OUTPUT_DIR = ""
+_C.LOG_DIR = ""
+_C.DATA_DIR = ""
 _C.GPUS = (0,)
 _C.WORKERS = 4
 _C.PRINT_FREQ = 20
@@ -34,12 +30,12 @@
 
 # common params for NETWORK
 _C.MODEL = CN()
-_C.MODEL.NAME = 'pose_hrnet'
+_C.MODEL.NAME = "pose_hrnet"
 _C.MODEL.INIT_WEIGHTS = True
-_C.MODEL.PRETRAINED = ''
+_C.MODEL.PRETRAINED = ""
 _C.MODEL.NUM_JOINTS = 17
 _C.MODEL.TAG_PER_JOINT = True
-_C.MODEL.TARGET_TYPE = 'gaussian'
+_C.MODEL.TARGET_TYPE = "gaussian"
 _C.MODEL.IMAGE_SIZE = [256, 256]  # width * height, ex: 192 * 256
 _C.MODEL.HEATMAP_SIZE = [64, 64]  # width * height, ex: 24 * 32
 _C.MODEL.SIGMA = 2
@@ -53,13 +49,16 @@
 
 # DATASET related params
 _C.DATASET = CN()
-_C.DATASET.ROOT = ''
-_C.DATASET.DATASET = 'mpii'
-_C.DATASET.TRAIN_SET = 'train'
-_C.DATASET.TEST_SET = 'valid'
-_C.DATASET.DATA_FORMAT = 'jpg'
-_C.DATASET.HYBRID_JOINTS_TYPE = ''
+_C.DATASET.ROOT = ""
+_C.DATASET.ROOT_COCO = ""
+_C.DATASET.DATASET = "mpii"
+_C.DATASET.TRAIN_SET = "train"
+_C.DATASET.TRAIN_SET_COCO = ""
+_C.DATASET.TEST_SET = "valid"
+_C.DATASET.DATA_FORMAT = "jpg"
+_C.DATASET.HYBRID_JOINTS_TYPE = ""
 _C.DATASET.SELECT_DATA = False
+_C.DATASET.COCO_INFINITY_RATIO = 1
 
 # training data augmentation
 _C.DATASET.FLIP = True
@@ -76,7 +75,7 @@
 _C.TRAIN.LR_STEP = [90, 110]
 _C.TRAIN.LR = 0.001
 
-_C.TRAIN.OPTIMIZER = 'adam'
+_C.TRAIN.OPTIMIZER = "adam"
 _C.TRAIN.MOMENTUM = 0.9
 _C.TRAIN.WD = 0.0001
 _C.TRAIN.NESTEROV = False
@@ -87,7 +86,7 @@
 _C.TRAIN.END_EPOCH = 140
 
 _C.TRAIN.RESUME = False
-_C.TRAIN.CHECKPOINT = ''
+_C.TRAIN.CHECKPOINT = ""
 
 _C.TRAIN.BATCH_SIZE_PER_GPU = 32
 _C.TRAIN.SHUFFLE = True
@@ -110,9 +109,9 @@
 _C.TEST.SOFT_NMS = False
 _C.TEST.OKS_THRE = 0.5
 _C.TEST.IN_VIS_THRE = 0.0
-_C.TEST.COCO_BBOX_FILE = ''
+_C.TEST.COCO_BBOX_FILE = ""
 _C.TEST.BBOX_THRE = 1.0
-_C.TEST.MODEL_FILE = ''
+_C.TEST.MODEL_FILE = ""
 
 # debug
 _C.DEBUG = CN()
@@ -137,24 +136,18 @@ def update_config(cfg, args):
     if args.dataDir:
         cfg.DATA_DIR = args.dataDir
 
-    cfg.DATASET.ROOT = os.path.join(
-        cfg.DATA_DIR, cfg.DATASET.ROOT
-    )
+    cfg.DATASET.ROOT = os.path.join(cfg.DATA_DIR, cfg.DATASET.ROOT)
 
-    cfg.MODEL.PRETRAINED = os.path.join(
-        cfg.DATA_DIR, cfg.MODEL.PRETRAINED
-    )
+    cfg.MODEL.PRETRAINED = os.path.join(cfg.DATA_DIR, cfg.MODEL.PRETRAINED)
 
     if cfg.TEST.MODEL_FILE:
-        cfg.TEST.MODEL_FILE = os.path.join(
-            cfg.DATA_DIR, cfg.TEST.MODEL_FILE
-        )
+        cfg.TEST.MODEL_FILE = os.path.join(cfg.DATA_DIR, cfg.TEST.MODEL_FILE)
 
     cfg.freeze()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     import sys
-    with open(sys.argv[1], 'w') as f:
-        print(_C, file=f)
 
+    with open(sys.argv[1], "w") as f:
+        print(_C, file=f)
diff --git a/lib/dataset/coco.py b/lib/dataset/coco.py
index 85ee7aae..dc185a68 100755
--- a/lib/dataset/coco.py
+++ b/lib/dataset/coco.py
@@ -49,8 +49,8 @@ class COCODataset(JointsDataset):
     def __init__(self, cfg, root, image_set, is_train, transform=None, infinity=False):
         super().__init__(cfg, root, image_set, is_train, transform)
         if infinity:
-            self.root = "$SCRATCH/coco_dataset/"
-            self.image_set = "train2017"
+            self.root = cfg.DATASET.ROOT_COCO
+            self.image_set = cfg.DATASET.TRAIN_SET_COCO
 
         self.nms_thre = cfg.TEST.NMS_THRE
         self.image_thre = cfg.TEST.IMAGE_THRE
diff --git a/lib/dataset/infinity_coco.py b/lib/dataset/infinity_coco.py
index 7fe357e1..3e236eb3 100644
--- a/lib/dataset/infinity_coco.py
+++ b/lib/dataset/infinity_coco.py
@@ -58,7 +58,7 @@ def __init__(self, cfg, root, image_set, is_train, transform=None):
         self.use_gt_bbox = cfg.TEST.USE_GT_BBOX
         self.image_width = cfg.MODEL.IMAGE_SIZE[0]
         self.image_height = cfg.MODEL.IMAGE_SIZE[1]
-        self.coco_infinity_ratio = 10
+        self.coco_infinity_ratio = cfg.DATASET.COCO_INFINITY_RATIO
         self.aspect_ratio = self.image_width * 1.0 / self.image_height
         self.pixel_std = 200
 
@@ -338,7 +338,9 @@ def _load_coco_keypoint_annotation_kernal(
                 }
             )
         for index_coco in indices_coco:
-            rec_coco = self.coco_dataset._load_coco_keypoint_annotation_kernal(index_coco)
+            rec_coco = self.coco_dataset._load_coco_keypoint_annotation_kernal(
+                index_coco
+            )
             for r in rec_coco:
                 joints_3d_coco = r["joints_3d"]
                 joints_3d_vis_coco = r["joints_3d_vis"]
diff --git a/lib/models/pose_hrnet.py b/lib/models/pose_hrnet.py
index 09ff346a..fe6b0bce 100644
--- a/lib/models/pose_hrnet.py
+++ b/lib/models/pose_hrnet.py
@@ -4,25 +4,23 @@
 # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 # ------------------------------------------------------------------------------
 
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
 
-import os
 import logging
+import os
 
 import torch
 import torch.nn as nn
 
-
 BN_MOMENTUM = 0.1
 logger = logging.getLogger(__name__)
 
 
 def conv3x3(in_planes, out_planes, stride=1):
     """3x3 convolution with padding"""
-    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
-                     padding=1, bias=False)
+    return nn.Conv2d(
+        in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False
+    )
 
 
 class BasicBlock(nn.Module):
@@ -64,13 +62,14 @@ def __init__(self, inplanes, planes, stride=1, downsample=None):
         super(Bottleneck, self).__init__()
         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
         self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
-        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
-                               padding=1, bias=False)
+        self.conv2 = nn.Conv2d(
+            planes, planes, kernel_size=3, stride=stride, padding=1, bias=False
+        )
         self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
-        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
-                               bias=False)
-        self.bn3 = nn.BatchNorm2d(planes * self.expansion,
-                                  momentum=BN_MOMENTUM)
+        self.conv3 = nn.Conv2d(
+            planes, planes * self.expansion, kernel_size=1, bias=False
+        )
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=BN_MOMENTUM)
         self.relu = nn.ReLU(inplace=True)
         self.downsample = downsample
         self.stride = stride
@@ -99,11 +98,20 @@ def forward(self, x):
 
 
 class HighResolutionModule(nn.Module):
-    def __init__(self, num_branches, blocks, num_blocks, num_inchannels,
-                 num_channels, fuse_method, multi_scale_output=True):
+    def __init__(
+        self,
+        num_branches,
+        blocks,
+        num_blocks,
+        num_inchannels,
+        num_channels,
+        fuse_method,
+        multi_scale_output=True,
+    ):
         super(HighResolutionModule, self).__init__()
         self._check_branches(
-            num_branches, blocks, num_blocks, num_inchannels, num_channels)
+            num_branches, blocks, num_blocks, num_inchannels, num_channels
+        )
 
         self.num_inchannels = num_inchannels
         self.fuse_method = fuse_method
@@ -112,44 +120,52 @@ def __init__(self, num_branches, blocks, num_blocks, num_inchannels,
         self.multi_scale_output = multi_scale_output
 
         self.branches = self._make_branches(
-            num_branches, blocks, num_blocks, num_channels)
+            num_branches, blocks, num_blocks, num_channels
+        )
         self.fuse_layers = self._make_fuse_layers()
         self.relu = nn.ReLU(True)
 
-    def _check_branches(self, num_branches, blocks, num_blocks,
-                        num_inchannels, num_channels):
+    def _check_branches(
+        self, num_branches, blocks, num_blocks, num_inchannels, num_channels
+    ):
         if num_branches != len(num_blocks):
-            error_msg = 'NUM_BRANCHES({}) <> NUM_BLOCKS({})'.format(
-                num_branches, len(num_blocks))
+            error_msg = "NUM_BRANCHES({}) <> NUM_BLOCKS({})".format(
+                num_branches, len(num_blocks)
+            )
             logger.error(error_msg)
             raise ValueError(error_msg)
 
         if num_branches != len(num_channels):
-            error_msg = 'NUM_BRANCHES({}) <> NUM_CHANNELS({})'.format(
-                num_branches, len(num_channels))
+            error_msg = "NUM_BRANCHES({}) <> NUM_CHANNELS({})".format(
+                num_branches, len(num_channels)
+            )
             logger.error(error_msg)
             raise ValueError(error_msg)
 
         if num_branches != len(num_inchannels):
-            error_msg = 'NUM_BRANCHES({}) <> NUM_INCHANNELS({})'.format(
-                num_branches, len(num_inchannels))
+            error_msg = "NUM_BRANCHES({}) <> NUM_INCHANNELS({})".format(
+                num_branches, len(num_inchannels)
+            )
             logger.error(error_msg)
             raise ValueError(error_msg)
 
-    def _make_one_branch(self, branch_index, block, num_blocks, num_channels,
-                         stride=1):
+    def _make_one_branch(self, branch_index, block, num_blocks, num_channels, stride=1):
         downsample = None
-        if stride != 1 or \
-           self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion:
+        if (
+            stride != 1
+            or self.num_inchannels[branch_index]
+            != num_channels[branch_index] * block.expansion
+        ):
             downsample = nn.Sequential(
                 nn.Conv2d(
                     self.num_inchannels[branch_index],
                     num_channels[branch_index] * block.expansion,
-                    kernel_size=1, stride=stride, bias=False
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False,
                 ),
                 nn.BatchNorm2d(
-                    num_channels[branch_index] * block.expansion,
-                    momentum=BN_MOMENTUM
+                    num_channels[branch_index] * block.expansion, momentum=BN_MOMENTUM
                 ),
             )
 
@@ -159,17 +175,13 @@ def _make_one_branch(self, branch_index, block, num_blocks, num_channels,
                 self.num_inchannels[branch_index],
                 num_channels[branch_index],
                 stride,
-                downsample
+                downsample,
             )
         )
-        self.num_inchannels[branch_index] = \
-            num_channels[branch_index] * block.expansion
+        self.num_inchannels[branch_index] = num_channels[branch_index] * block.expansion
         for i in range(1, num_blocks[branch_index]):
             layers.append(
-                block(
-                    self.num_inchannels[branch_index],
-                    num_channels[branch_index]
-                )
+                block(self.num_inchannels[branch_index], num_channels[branch_index])
             )
 
         return nn.Sequential(*layers)
@@ -178,9 +190,7 @@ def _make_branches(self, num_branches, block, num_blocks, num_channels):
         branches = []
 
         for i in range(num_branches):
-            branches.append(
-                self._make_one_branch(i, block, num_blocks, num_channels)
-            )
+            branches.append(self._make_one_branch(i, block, num_blocks, num_channels))
 
         return nn.ModuleList(branches)
 
@@ -200,17 +210,20 @@ def _make_fuse_layers(self):
                             nn.Conv2d(
                                 num_inchannels[j],
                                 num_inchannels[i],
-                                1, 1, 0, bias=False
+                                1,
+                                1,
+                                0,
+                                bias=False,
                             ),
                             nn.BatchNorm2d(num_inchannels[i]),
-                            nn.Upsample(scale_factor=2**(j-i), mode='nearest')
+                            nn.Upsample(scale_factor=2 ** (j - i), mode="nearest"),
                         )
                     )
                 elif j == i:
                     fuse_layer.append(None)
                 else:
                     conv3x3s = []
-                    for k in range(i-j):
+                    for k in range(i - j):
                         if k == i - j - 1:
                             num_outchannels_conv3x3 = num_inchannels[i]
                             conv3x3s.append(
@@ -218,9 +231,12 @@ def _make_fuse_layers(self):
                                     nn.Conv2d(
                                         num_inchannels[j],
                                         num_outchannels_conv3x3,
-                                        3, 2, 1, bias=False
+                                        3,
+                                        2,
+                                        1,
+                                        bias=False,
                                     ),
-                                    nn.BatchNorm2d(num_outchannels_conv3x3)
+                                    nn.BatchNorm2d(num_outchannels_conv3x3),
                                 )
                             )
                         else:
@@ -230,10 +246,13 @@ def _make_fuse_layers(self):
                                     nn.Conv2d(
                                         num_inchannels[j],
                                         num_outchannels_conv3x3,
-                                        3, 2, 1, bias=False
+                                        3,
+                                        2,
+                                        1,
+                                        bias=False,
                                     ),
                                     nn.BatchNorm2d(num_outchannels_conv3x3),
-                                    nn.ReLU(True)
+                                    nn.ReLU(True),
                                 )
                             )
                     fuse_layer.append(nn.Sequential(*conv3x3s))
@@ -265,73 +284,68 @@ def forward(self, x):
         return x_fuse
 
 
-blocks_dict = {
-    'BASIC': BasicBlock,
-    'BOTTLENECK': Bottleneck
-}
+blocks_dict = {"BASIC": BasicBlock, "BOTTLENECK": Bottleneck}
 
 
 class PoseHighResolutionNet(nn.Module):
-
     def __init__(self, cfg, **kwargs):
         self.inplanes = 64
-        extra = cfg['MODEL']['EXTRA']
+        extra = cfg["MODEL"]["EXTRA"]
         super(PoseHighResolutionNet, self).__init__()
 
         # stem net
-        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1,
-                               bias=False)
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
         self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
-        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1,
-                               bias=False)
+        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=False)
         self.bn2 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
         self.relu = nn.ReLU(inplace=True)
         self.layer1 = self._make_layer(Bottleneck, 64, 4)
 
-        self.stage2_cfg = extra['STAGE2']
-        num_channels = self.stage2_cfg['NUM_CHANNELS']
-        block = blocks_dict[self.stage2_cfg['BLOCK']]
+        self.stage2_cfg = extra["STAGE2"]
+        num_channels = self.stage2_cfg["NUM_CHANNELS"]
+        block = blocks_dict[self.stage2_cfg["BLOCK"]]
         num_channels = [
             num_channels[i] * block.expansion for i in range(len(num_channels))
         ]
         self.transition1 = self._make_transition_layer([256], num_channels)
         self.stage2, pre_stage_channels = self._make_stage(
-            self.stage2_cfg, num_channels)
+            self.stage2_cfg, num_channels
+        )
 
-        self.stage3_cfg = extra['STAGE3']
-        num_channels = self.stage3_cfg['NUM_CHANNELS']
-        block = blocks_dict[self.stage3_cfg['BLOCK']]
+        self.stage3_cfg = extra["STAGE3"]
+        num_channels = self.stage3_cfg["NUM_CHANNELS"]
+        block = blocks_dict[self.stage3_cfg["BLOCK"]]
         num_channels = [
             num_channels[i] * block.expansion for i in range(len(num_channels))
         ]
-        self.transition2 = self._make_transition_layer(
-            pre_stage_channels, num_channels)
+        self.transition2 = self._make_transition_layer(pre_stage_channels, num_channels)
         self.stage3, pre_stage_channels = self._make_stage(
-            self.stage3_cfg, num_channels)
+            self.stage3_cfg, num_channels
+        )
 
-        self.stage4_cfg = extra['STAGE4']
-        num_channels = self.stage4_cfg['NUM_CHANNELS']
-        block = blocks_dict[self.stage4_cfg['BLOCK']]
+        self.stage4_cfg = extra["STAGE4"]
+        num_channels = self.stage4_cfg["NUM_CHANNELS"]
+        block = blocks_dict[self.stage4_cfg["BLOCK"]]
         num_channels = [
             num_channels[i] * block.expansion for i in range(len(num_channels))
         ]
-        self.transition3 = self._make_transition_layer(
-            pre_stage_channels, num_channels)
+        self.transition3 = self._make_transition_layer(pre_stage_channels, num_channels)
         self.stage4, pre_stage_channels = self._make_stage(
-            self.stage4_cfg, num_channels, multi_scale_output=False)
+            self.stage4_cfg, num_channels, multi_scale_output=False
+        )
 
         self.final_layer = nn.Conv2d(
             in_channels=pre_stage_channels[0],
-            out_channels=cfg['MODEL']['NUM_JOINTS'],
-            kernel_size=extra['FINAL_CONV_KERNEL'],
+            out_channels=cfg["MODEL"]["NUM_JOINTS"],
+            kernel_size=extra["FINAL_CONV_KERNEL"],
             stride=1,
-            padding=1 if extra['FINAL_CONV_KERNEL'] == 3 else 0
+            padding=1 if extra["FINAL_CONV_KERNEL"] == 3 else 0,
         )
 
-        self.pretrained_layers = extra['PRETRAINED_LAYERS']
+        self.pretrained_layers = extra["PRETRAINED_LAYERS"]
+        self.pretrain_final_layer = extra["PRETRAIN_FINAL_LAYER"]
 
-    def _make_transition_layer(
-            self, num_channels_pre_layer, num_channels_cur_layer):
+    def _make_transition_layer(self, num_channels_pre_layer, num_channels_cur_layer):
         num_branches_cur = len(num_channels_cur_layer)
         num_branches_pre = len(num_channels_pre_layer)
 
@@ -344,27 +358,31 @@ def _make_transition_layer(
                             nn.Conv2d(
                                 num_channels_pre_layer[i],
                                 num_channels_cur_layer[i],
-                                3, 1, 1, bias=False
+                                3,
+                                1,
+                                1,
+                                bias=False,
                             ),
                             nn.BatchNorm2d(num_channels_cur_layer[i]),
-                            nn.ReLU(inplace=True)
+                            nn.ReLU(inplace=True),
                         )
                     )
                 else:
                     transition_layers.append(None)
             else:
                 conv3x3s = []
-                for j in range(i+1-num_branches_pre):
+                for j in range(i + 1 - num_branches_pre):
                     inchannels = num_channels_pre_layer[-1]
-                    outchannels = num_channels_cur_layer[i] \
-                        if j == i-num_branches_pre else inchannels
+                    outchannels = (
+                        num_channels_cur_layer[i]
+                        if j == i - num_branches_pre
+                        else inchannels
+                    )
                     conv3x3s.append(
                         nn.Sequential(
-                            nn.Conv2d(
-                                inchannels, outchannels, 3, 2, 1, bias=False
-                            ),
+                            nn.Conv2d(inchannels, outchannels, 3, 2, 1, bias=False),
                             nn.BatchNorm2d(outchannels),
-                            nn.ReLU(inplace=True)
+                            nn.ReLU(inplace=True),
                         )
                     )
                 transition_layers.append(nn.Sequential(*conv3x3s))
@@ -376,8 +394,11 @@ def _make_layer(self, block, planes, blocks, stride=1):
         if stride != 1 or self.inplanes != planes * block.expansion:
             downsample = nn.Sequential(
                 nn.Conv2d(
-                    self.inplanes, planes * block.expansion,
-                    kernel_size=1, stride=stride, bias=False
+                    self.inplanes,
+                    planes * block.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False,
                 ),
                 nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
             )
@@ -390,14 +411,13 @@ def _make_layer(self, block, planes, blocks, stride=1):
 
         return nn.Sequential(*layers)
 
-    def _make_stage(self, layer_config, num_inchannels,
-                    multi_scale_output=True):
-        num_modules = layer_config['NUM_MODULES']
-        num_branches = layer_config['NUM_BRANCHES']
-        num_blocks = layer_config['NUM_BLOCKS']
-        num_channels = layer_config['NUM_CHANNELS']
-        block = blocks_dict[layer_config['BLOCK']]
-        fuse_method = layer_config['FUSE_METHOD']
+    def _make_stage(self, layer_config, num_inchannels, multi_scale_output=True):
+        num_modules = layer_config["NUM_MODULES"]
+        num_branches = layer_config["NUM_BRANCHES"]
+        num_blocks = layer_config["NUM_BLOCKS"]
+        num_channels = layer_config["NUM_CHANNELS"]
+        block = blocks_dict[layer_config["BLOCK"]]
+        fuse_method = layer_config["FUSE_METHOD"]
 
         modules = []
         for i in range(num_modules):
@@ -415,7 +435,7 @@ def _make_stage(self, layer_config, num_inchannels,
                     num_inchannels,
                     num_channels,
                     fuse_method,
-                    reset_multi_scale_output
+                    reset_multi_scale_output,
                 )
             )
             num_inchannels = modules[-1].get_num_inchannels()
@@ -432,7 +452,7 @@ def forward(self, x):
         x = self.layer1(x)
 
         x_list = []
-        for i in range(self.stage2_cfg['NUM_BRANCHES']):
+        for i in range(self.stage2_cfg["NUM_BRANCHES"]):
             if self.transition1[i] is not None:
                 x_list.append(self.transition1[i](x))
             else:
@@ -440,7 +460,7 @@ def forward(self, x):
         y_list = self.stage2(x_list)
 
         x_list = []
-        for i in range(self.stage3_cfg['NUM_BRANCHES']):
+        for i in range(self.stage3_cfg["NUM_BRANCHES"]):
             if self.transition2[i] is not None:
                 x_list.append(self.transition2[i](y_list[-1]))
             else:
@@ -448,7 +468,7 @@ def forward(self, x):
         y_list = self.stage3(x_list)
 
         x_list = []
-        for i in range(self.stage4_cfg['NUM_BRANCHES']):
+        for i in range(self.stage4_cfg["NUM_BRANCHES"]):
             if self.transition3[i] is not None:
                 x_list.append(self.transition3[i](y_list[-1]))
             else:
@@ -459,14 +479,14 @@ def forward(self, x):
 
         return x
 
-    def init_weights(self, pretrained=''):
-        logger.info('=> init weights from normal distribution')
+    def init_weights(self, pretrained=""):
+        logger.info("=> init weights from normal distribution")
         for m in self.modules():
             if isinstance(m, nn.Conv2d):
                 # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                 nn.init.normal_(m.weight, std=0.001)
                 for name, _ in m.named_parameters():
-                    if name in ['bias']:
+                    if name in ["bias"]:
                         nn.init.constant_(m.bias, 0)
             elif isinstance(m, nn.BatchNorm2d):
                 nn.init.constant_(m.weight, 1)
@@ -474,28 +494,39 @@ def init_weights(self, pretrained=''):
             elif isinstance(m, nn.ConvTranspose2d):
                 nn.init.normal_(m.weight, std=0.001)
                 for name, _ in m.named_parameters():
-                    if name in ['bias']:
+                    if name in ["bias"]:
                         nn.init.constant_(m.bias, 0)
 
         if os.path.isfile(pretrained):
             pretrained_state_dict = torch.load(pretrained)
-            logger.info('=> loading pretrained model {}'.format(pretrained))
+            logger.info("=> loading pretrained model {}".format(pretrained))
 
             need_init_state_dict = {}
             for name, m in pretrained_state_dict.items():
-                if name.split('.')[0] in self.pretrained_layers \
-                   or self.pretrained_layers[0] is '*':
+                if (
+                    name.split(".")[0] in self.pretrained_layers
+                    or self.pretrained_layers[0] is "*"
+                ):
                     need_init_state_dict[name] = m
             self.load_state_dict(need_init_state_dict, strict=False)
+
+            if self.pretrain_final_layer:
+                self.final_layer.weight.data[:17, :, :, :] = pretrained_state_dict[
+                    "final_layer.weight"
+                ]
+                self.final_layer.bias.data[:17] = pretrained_state_dict[
+                    "final_layer.bias"
+                ]
+
         elif pretrained:
-            logger.error('=> please download pre-trained models first!')
-            raise ValueError('{} is not exist!'.format(pretrained))
+            logger.error("=> please download pre-trained models first!")
+            raise ValueError("{} is not exist!".format(pretrained))
 
 
 def get_pose_net(cfg, is_train, **kwargs):
     model = PoseHighResolutionNet(cfg, **kwargs)
 
-    if is_train and cfg['MODEL']['INIT_WEIGHTS']:
-        model.init_weights(cfg['MODEL']['PRETRAINED'])
+    if is_train and cfg["MODEL"]["INIT_WEIGHTS"]:
+        model.init_weights(cfg["MODEL"]["PRETRAINED"])
 
     return model

From 52d1fc17284e7c46004e84d2e3382e86932e14c0 Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Tue, 30 May 2023 13:28:33 -0700
Subject: [PATCH 21/30] add new combined dataset

---
 .gitignore                                             |  1 +
 experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml    |  6 ++++--
 .../infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml   | 10 +++++-----
 .../hrnet/w48_384x288_adam_lr1e-3_local.yaml           |  9 +++++----
 lib/core/function.py                                   |  3 +++
 lib/models/pose_hrnet.py                               |  6 +++++-
 6 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/.gitignore b/.gitignore
index 695e68e6..323d0ba2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -84,6 +84,7 @@ target/
 lib/pycocotools/_mask.c
 lib/nms/cpu_nms.c
 
+output/*
 outputs/*
 models/*
 logs/*
diff --git a/experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml
index 589a8dfd..d2e1518a 100644
--- a/experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml
+++ b/experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -25,7 +25,8 @@ DATASET:
 MODEL:
   INIT_WEIGHTS: true
   NAME: pose_hrnet
-  NUM_JOINTS: 17
+  NUM_JOINTS: 58
+  # NUM_JOINTS: 17
   PRETRAINED: "models/pytorch/imagenet/hrnet_w48-8ef0771d.pth"
   TARGET_TYPE: gaussian
   IMAGE_SIZE:
@@ -112,7 +113,8 @@ TEST:
   BBOX_THRE: 1.0
   IMAGE_THRE: 0.0
   IN_VIS_THRE: 0.2
-  MODEL_FILE: "models/pytorch/pose_coco/pose_hrnet_w48_384x288.pth"
+  MODEL_FILE: "outputs/sherlock/model_best1e-3_62_epochs.pth"
+  # MODEL_FILE: "models/pytorch/pose_coco/pose_hrnet_w48_384x288.pth"
   NMS_THRE: 1.0
   OKS_THRE: 0.9
   USE_GT_BBOX: true
diff --git a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
index d6c4e525..64b8d1fc 100644
--- a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
+++ b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -5,9 +5,9 @@ CUDNN:
   ENABLED: true
 DATA_DIR: ""
 GPUS: (0,)
-OUTPUT_DIR: "outputs/output_infinity_coco_unfrozen"
-LOG_DIR: "logs/log_infinity_coco_unfrozen"
-WORKERS: 2
+OUTPUT_DIR: "outputs/output_new_infinity"
+LOG_DIR: "logs/output_new_infinity"
+WORKERS: 12
 PRINT_FREQ: 10
 
 DATASET:
@@ -17,14 +17,14 @@ DATASET:
   FLIP: true
   NUM_JOINTS_HALF_BODY: 27
   PROB_HALF_BODY: 0.3
-  ROOT: "../infinity_dataset_combined"
+  ROOT: "../combined_infinity_dataset"
   ROOT_COCO: "/scratch/users/yonigoz/coco_dataset/"
   ROT_FACTOR: 45
   SCALE_FACTOR: 0.35
   TEST_SET: "test"
   TRAIN_SET: "train"
   TRAIN_SET_COCO: "train2017"
-  COCO_INFINITY_RATIO: 10
+  COCO_INFINITY_RATIO: 3
 MODEL:
   INIT_WEIGHTS: true
   NAME: pose_hrnet
diff --git a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3_local.yaml b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3_local.yaml
index 295baef1..584c08b7 100644
--- a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3_local.yaml
+++ b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3_local.yaml
@@ -5,9 +5,9 @@ CUDNN:
   ENABLED: true
 DATA_DIR: ""
 GPUS: (0,)
-OUTPUT_DIR: "outputs/output_infinity_coco_local"
-LOG_DIR: "logs/log_infinity_coco_local"
-WORKERS: 2
+OUTPUT_DIR: "outputs/output_new_infinity"
+LOG_DIR: "logs/output_new_infinity"
+WORKERS: 12
 PRINT_FREQ: 10
 
 DATASET:
@@ -17,7 +17,7 @@ DATASET:
   FLIP: true
   NUM_JOINTS_HALF_BODY: 27
   PROB_HALF_BODY: 0.3
-  ROOT: "../infinity_dataset_combined"
+  ROOT: "../combined_infinity_dataset"
   ROOT_COCO: "data/coco/"
   ROT_FACTOR: 45
   SCALE_FACTOR: 0.35
@@ -63,6 +63,7 @@ MODEL:
       - "stage3"
       - "transition3"
       - "stage4"
+    PRETRAIN_FINAL_LAYER: true
     FINAL_CONV_KERNEL: 1
     STAGE2:
       NUM_MODULES: 1
diff --git a/lib/core/function.py b/lib/core/function.py
index fce6fa1b..0076de2e 100755
--- a/lib/core/function.py
+++ b/lib/core/function.py
@@ -131,6 +131,7 @@ def validate(
     model.eval()
 
     num_samples = len(val_dataset)
+    # all_preds = np.zeros((num_samples, 17, 3), dtype=np.float32)
     all_preds = np.zeros((num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32)
     all_boxes = np.zeros((num_samples, 6))
     image_path = []
@@ -142,6 +143,7 @@ def validate(
         for i, (input, target, target_weight, meta) in enumerate(val_loader):
             # compute output
             outputs = model(input)
+            # outputs = model(input)[:, :17, :, :]
             if isinstance(outputs, list):
                 output = outputs[-1]
             else:
@@ -149,6 +151,7 @@ def validate(
 
             if config.TEST.FLIP_TEST:
                 input_flipped = input.flip(3)
+                # outputs_flipped = model(input_flipped)[:, :17, :, :]
                 outputs_flipped = model(input_flipped)
 
                 if isinstance(outputs_flipped, list):
diff --git a/lib/models/pose_hrnet.py b/lib/models/pose_hrnet.py
index fe6b0bce..7c4e768e 100644
--- a/lib/models/pose_hrnet.py
+++ b/lib/models/pose_hrnet.py
@@ -343,7 +343,10 @@ def __init__(self, cfg, **kwargs):
         )
 
         self.pretrained_layers = extra["PRETRAINED_LAYERS"]
-        self.pretrain_final_layer = extra["PRETRAIN_FINAL_LAYER"]
+        if "PRETRAIN_FINAL_LAYER" in extra:
+            self.pretrain_final_layer = extra["PRETRAIN_FINAL_LAYER"]
+        else:
+            self.pretrain_final_layer = False
 
     def _make_transition_layer(self, num_channels_pre_layer, num_channels_cur_layer):
         num_branches_cur = len(num_channels_cur_layer)
@@ -511,6 +514,7 @@ def init_weights(self, pretrained=""):
             self.load_state_dict(need_init_state_dict, strict=False)
 
             if self.pretrain_final_layer:
+                print("load final layer")
                 self.final_layer.weight.data[:17, :, :, :] = pretrained_state_dict[
                     "final_layer.weight"
                 ]

From f57f46e8d52f2b49bb9d147bec217188b28223d2 Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Wed, 31 May 2023 10:27:26 -0700
Subject: [PATCH 22/30] add 64 model config

---
 .../hrnet/w48_384x288_adam_lr1e-3_local.yaml  |  38 ++---
 .../hrnet/w64_384x288_adam_lr1e-3.yaml        | 143 ++++++++++++++++++
 lib/models/pose_hrnet.py                      |   3 +-
 3 files changed, 164 insertions(+), 20 deletions(-)
 create mode 100644 experiments/infinity_coco/hrnet/w64_384x288_adam_lr1e-3.yaml

diff --git a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3_local.yaml b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3_local.yaml
index 584c08b7..96c9b8cb 100644
--- a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3_local.yaml
+++ b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3_local.yaml
@@ -17,7 +17,7 @@ DATASET:
   FLIP: true
   NUM_JOINTS_HALF_BODY: 27
   PROB_HALF_BODY: 0.3
-  ROOT: "../combined_infinity_dataset"
+  ROOT: "../combined_dataset"
   ROOT_COCO: "data/coco/"
   ROT_FACTOR: 45
   SCALE_FACTOR: 0.35
@@ -57,13 +57,13 @@ MODEL:
       - "conv2"
       - "bn2"
       - "layer1"
-      - "transition1"
-      - "stage2"
-      - "transition2"
-      - "stage3"
-      - "transition3"
-      - "stage4"
-    PRETRAIN_FINAL_LAYER: true
+      # - "transition1"
+      # - "stage2"
+      # - "transition2"
+      # - "stage3"
+      # - "transition3"
+      # - "stage4"
+    PRETRAIN_FINAL_LAYER: false
     FINAL_CONV_KERNEL: 1
     STAGE2:
       NUM_MODULES: 1
@@ -73,8 +73,8 @@ MODEL:
         - 4
         - 4
       NUM_CHANNELS:
-        - 48
-        - 96
+        - 64
+        - 128
       FUSE_METHOD: SUM
     STAGE3:
       NUM_MODULES: 4
@@ -85,9 +85,9 @@ MODEL:
         - 4
         - 4
       NUM_CHANNELS:
-        - 48
-        - 96
-        - 192
+        - 64
+        - 128
+        - 256
       FUSE_METHOD: SUM
     STAGE4:
       NUM_MODULES: 3
@@ -99,15 +99,15 @@ MODEL:
         - 4
         - 4
       NUM_CHANNELS:
-        - 48
-        - 96
-        - 192
-        - 384
+        - 64
+        - 128
+        - 256
+        - 512
       FUSE_METHOD: SUM
 LOSS:
   USE_TARGET_WEIGHT: true
 TRAIN:
-  BATCH_SIZE_PER_GPU: 2
+  BATCH_SIZE_PER_GPU: 1
   SHUFFLE: true
   BEGIN_EPOCH: 0
   END_EPOCH: 200
@@ -123,7 +123,7 @@ TRAIN:
   MOMENTUM: 0.9
   NESTEROV: false
 TEST:
-  BATCH_SIZE_PER_GPU: 10
+  BATCH_SIZE_PER_GPU: 1
   COCO_BBOX_FILE: "data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json"
   BBOX_THRE: 1.0
   IMAGE_THRE: 0.0
diff --git a/experiments/infinity_coco/hrnet/w64_384x288_adam_lr1e-3.yaml b/experiments/infinity_coco/hrnet/w64_384x288_adam_lr1e-3.yaml
new file mode 100644
index 00000000..fb91c7c8
--- /dev/null
+++ b/experiments/infinity_coco/hrnet/w64_384x288_adam_lr1e-3.yaml
@@ -0,0 +1,143 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ""
+GPUS: (0,)
+OUTPUT_DIR: "outputs/output_new_infinity_64"
+LOG_DIR: "logs/output_new_infinity_64"
+WORKERS: 12
+PRINT_FREQ: 10
+
+DATASET:
+  COLOR_RGB: true
+  DATASET: "infinity_coco"
+  DATA_FORMAT: png
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 27
+  PROB_HALF_BODY: 0.3
+  ROOT: "../combined_infinity_dataset"
+  ROOT_COCO: "/scratch/users/yonigoz/coco_dataset/"
+  ROT_FACTOR: 45
+  SCALE_FACTOR: 0.35
+  TEST_SET: "test"
+  TRAIN_SET: "train"
+  TRAIN_SET_COCO: "train2017"
+  COCO_INFINITY_RATIO: 3
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 58
+  PRETRAINED: "models/pytorch/pose_coco/pose_hrnet_w48_384x288.pth"
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+    - 288
+    - 384
+  HEATMAP_SIZE:
+    - 72
+    - 96
+  SIGMA: 3
+  EXTRA:
+    FREEZE_LAYERS: false
+    FROZEN_LAYERS:
+      - "conv1"
+      - "bn1"
+      - "conv2"
+      - "bn2"
+      - "layer1"
+      - "transition1"
+      - "stage2"
+      - "transition2"
+      - "stage3"
+      - "transition3"
+    PRETRAINED_LAYERS:
+      - "conv1"
+      - "bn1"
+      - "conv2"
+      - "bn2"
+      - "layer1"
+      # - "transition1"
+      # - "stage2"
+      # - "transition2"
+      # - "stage3"
+      # - "transition3"
+      # - "stage4"
+    PRETRAIN_FINAL_LAYER: false
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+        - 4
+        - 4
+      NUM_CHANNELS:
+        - 64
+        - 128
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+        - 4
+        - 4
+        - 4
+      NUM_CHANNELS:
+        - 64
+        - 128
+        - 256
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+        - 4
+        - 4
+        - 4
+        - 4
+      NUM_CHANNELS:
+        - 64
+        - 128
+        - 256
+        - 512
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 4
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 200
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+    - 170
+    - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 4
+  COCO_BBOX_FILE: "data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json"
+  BBOX_THRE: 1.0
+  IMAGE_THRE: 0.0
+  IN_VIS_THRE: 0.2
+  MODEL_FILE: "outputs/output_infinity_coco/infinity_coco/pose_hrnet/w48_384x288_adam_lr1e-3/model_best.pth"
+  NMS_THRE: 1.0
+  OKS_THRE: 0.9
+  USE_GT_BBOX: true
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/lib/models/pose_hrnet.py b/lib/models/pose_hrnet.py
index 7c4e768e..d5f112e4 100644
--- a/lib/models/pose_hrnet.py
+++ b/lib/models/pose_hrnet.py
@@ -333,7 +333,7 @@ def __init__(self, cfg, **kwargs):
         self.stage4, pre_stage_channels = self._make_stage(
             self.stage4_cfg, num_channels, multi_scale_output=False
         )
-
+        print("in channels final layer: ", pre_stage_channels[0])
         self.final_layer = nn.Conv2d(
             in_channels=pre_stage_channels[0],
             out_channels=cfg["MODEL"]["NUM_JOINTS"],
@@ -532,5 +532,6 @@ def get_pose_net(cfg, is_train, **kwargs):
 
     if is_train and cfg["MODEL"]["INIT_WEIGHTS"]:
         model.init_weights(cfg["MODEL"]["PRETRAINED"])
+        print("finishing loading pretrained weights")
 
     return model

From cb2eb4669165a007586410b941b94b54236c4dd9 Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Mon, 12 Jun 2023 12:10:22 -0400
Subject: [PATCH 23/30] add wandb logging

---
 .gitignore                                    |  2 +-
 .../hrnet/w48_384x288_adam_lr1e-3.yaml        |  1 +
 .../hrnet/w48_384x288_adam_lr1e-3_local.yaml  | 39 ++++++++++---------
 .../hrnet/w64_384x288_adam_lr1e-3.yaml        |  1 +
 lib/config/default.py                         |  1 +
 lib/core/function.py                          | 10 +++++
 tools/train.py                                |  3 ++
 7 files changed, 37 insertions(+), 20 deletions(-)

diff --git a/.gitignore b/.gitignore
index 323d0ba2..301ec066 100644
--- a/.gitignore
+++ b/.gitignore
@@ -93,4 +93,4 @@ external/
 
 draws/
 plot/
-
+wandb/
diff --git a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
index 64b8d1fc..b439b9be 100644
--- a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
+++ b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -9,6 +9,7 @@ OUTPUT_DIR: "outputs/output_new_infinity"
 LOG_DIR: "logs/output_new_infinity"
 WORKERS: 12
 PRINT_FREQ: 10
+LOG_WANDB: True
 
 DATASET:
   COLOR_RGB: true
diff --git a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3_local.yaml b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3_local.yaml
index 96c9b8cb..2a3fa124 100644
--- a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3_local.yaml
+++ b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3_local.yaml
@@ -9,6 +9,7 @@ OUTPUT_DIR: "outputs/output_new_infinity"
 LOG_DIR: "logs/output_new_infinity"
 WORKERS: 12
 PRINT_FREQ: 10
+LOG_WANDB: True
 
 DATASET:
   COLOR_RGB: true
@@ -57,13 +58,13 @@ MODEL:
       - "conv2"
       - "bn2"
       - "layer1"
-      # - "transition1"
-      # - "stage2"
-      # - "transition2"
-      # - "stage3"
-      # - "transition3"
-      # - "stage4"
-    PRETRAIN_FINAL_LAYER: false
+      - "transition1"
+      - "stage2"
+      - "transition2"
+      - "stage3"
+      - "transition3"
+      - "stage4"
+    PRETRAIN_FINAL_LAYER: true
     FINAL_CONV_KERNEL: 1
     STAGE2:
       NUM_MODULES: 1
@@ -73,8 +74,8 @@ MODEL:
         - 4
         - 4
       NUM_CHANNELS:
-        - 64
-        - 128
+        - 48
+        - 96
       FUSE_METHOD: SUM
     STAGE3:
       NUM_MODULES: 4
@@ -85,9 +86,9 @@ MODEL:
         - 4
         - 4
       NUM_CHANNELS:
-        - 64
-        - 128
-        - 256
+        - 48
+        - 96
+        - 192
       FUSE_METHOD: SUM
     STAGE4:
       NUM_MODULES: 3
@@ -99,20 +100,20 @@ MODEL:
         - 4
         - 4
       NUM_CHANNELS:
-        - 64
-        - 128
-        - 256
-        - 512
+        - 48
+        - 96
+        - 192
+        - 384
       FUSE_METHOD: SUM
 LOSS:
   USE_TARGET_WEIGHT: true
 TRAIN:
-  BATCH_SIZE_PER_GPU: 1
+  BATCH_SIZE_PER_GPU: 2
   SHUFFLE: true
   BEGIN_EPOCH: 0
   END_EPOCH: 200
   OPTIMIZER: adam
-  LR: 0.00001
+  LR: 0.001
   LR_FACTOR: 0.1
   LR_STEP:
     - 170
@@ -123,7 +124,7 @@ TRAIN:
   MOMENTUM: 0.9
   NESTEROV: false
 TEST:
-  BATCH_SIZE_PER_GPU: 1
+  BATCH_SIZE_PER_GPU: 2
   COCO_BBOX_FILE: "data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json"
   BBOX_THRE: 1.0
   IMAGE_THRE: 0.0
diff --git a/experiments/infinity_coco/hrnet/w64_384x288_adam_lr1e-3.yaml b/experiments/infinity_coco/hrnet/w64_384x288_adam_lr1e-3.yaml
index fb91c7c8..67a86c31 100644
--- a/experiments/infinity_coco/hrnet/w64_384x288_adam_lr1e-3.yaml
+++ b/experiments/infinity_coco/hrnet/w64_384x288_adam_lr1e-3.yaml
@@ -9,6 +9,7 @@ OUTPUT_DIR: "outputs/output_new_infinity_64"
 LOG_DIR: "logs/output_new_infinity_64"
 WORKERS: 12
 PRINT_FREQ: 10
+LOG_WANDB: True
 
 DATASET:
   COLOR_RGB: true
diff --git a/lib/config/default.py b/lib/config/default.py
index ee2cf061..648ad53f 100644
--- a/lib/config/default.py
+++ b/lib/config/default.py
@@ -21,6 +21,7 @@
 _C.AUTO_RESUME = False
 _C.PIN_MEMORY = True
 _C.RANK = 0
+_C.LOG_WANDB = False
 
 # Cudnn related params
 _C.CUDNN = CN()
diff --git a/lib/core/function.py b/lib/core/function.py
index 0076de2e..86024010 100755
--- a/lib/core/function.py
+++ b/lib/core/function.py
@@ -12,6 +12,7 @@
 
 import numpy as np
 import torch
+import wandb
 from core.evaluate import accuracy
 from core.inference import get_final_preds
 from utils.transforms import flip_back
@@ -102,6 +103,15 @@ def train(
                 )
             )
             logger.info(msg)
+            if config.LOG_WANDB:
+                wandb.log(
+                    {
+                        "epoch": epoch,
+                        "loss_avg": losses.avg,
+                        "accuracy_avg": acc.avg,
+                        "speed": input.size(0) / batch_time.val,
+                    }
+                )
 
             writer = writer_dict["writer"]
             global_steps = writer_dict["train_global_steps"]
diff --git a/tools/train.py b/tools/train.py
index e89594ea..cacf0d58 100755
--- a/tools/train.py
+++ b/tools/train.py
@@ -20,6 +20,7 @@
 import torch.utils.data
 import torch.utils.data.distributed
 import torchvision.transforms as transforms
+import wandb
 from config import cfg, update_config
 from core.function import train, validate
 from core.loss import JointsMSELoss
@@ -66,6 +67,8 @@ def main():
 
     logger.info(pprint.pformat(args))
     logger.info(cfg)
+    if cfg.LOG_WANDB:
+        wandb.init(project="synthetic_finetuning", entity="yonigoz", config=cfg)
 
     # cudnn related setting
     cudnn.benchmark = cfg.CUDNN.BENCHMARK

From 59d3f03456c4e331e8bede12e84c2ae7af70f09e Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Mon, 12 Jun 2023 12:19:31 -0400
Subject: [PATCH 24/30] update cfg

---
 .../infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml        | 6 +++---
 .../infinity_coco/hrnet/w64_384x288_adam_lr1e-3.yaml        | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
index b439b9be..acf6d4dc 100644
--- a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
+++ b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -7,7 +7,7 @@ DATA_DIR: ""
 GPUS: (0,)
 OUTPUT_DIR: "outputs/output_new_infinity"
 LOG_DIR: "logs/output_new_infinity"
-WORKERS: 12
+WORKERS: 0
 PRINT_FREQ: 10
 LOG_WANDB: True
 
@@ -108,7 +108,7 @@ MODEL:
 LOSS:
   USE_TARGET_WEIGHT: true
 TRAIN:
-  BATCH_SIZE_PER_GPU: 32
+  BATCH_SIZE_PER_GPU: 16
   SHUFFLE: true
   BEGIN_EPOCH: 0
   END_EPOCH: 200
@@ -124,7 +124,7 @@ TRAIN:
   MOMENTUM: 0.9
   NESTEROV: false
 TEST:
-  BATCH_SIZE_PER_GPU: 32
+  BATCH_SIZE_PER_GPU: 16
   COCO_BBOX_FILE: "data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json"
   BBOX_THRE: 1.0
   IMAGE_THRE: 0.0
diff --git a/experiments/infinity_coco/hrnet/w64_384x288_adam_lr1e-3.yaml b/experiments/infinity_coco/hrnet/w64_384x288_adam_lr1e-3.yaml
index 67a86c31..c88476e4 100644
--- a/experiments/infinity_coco/hrnet/w64_384x288_adam_lr1e-3.yaml
+++ b/experiments/infinity_coco/hrnet/w64_384x288_adam_lr1e-3.yaml
@@ -7,7 +7,7 @@ DATA_DIR: ""
 GPUS: (0,)
 OUTPUT_DIR: "outputs/output_new_infinity_64"
 LOG_DIR: "logs/output_new_infinity_64"
-WORKERS: 12
+WORKERS: 0
 PRINT_FREQ: 10
 LOG_WANDB: True
 
@@ -108,7 +108,7 @@ MODEL:
 LOSS:
   USE_TARGET_WEIGHT: true
 TRAIN:
-  BATCH_SIZE_PER_GPU: 4
+  BATCH_SIZE_PER_GPU: 8
   SHUFFLE: true
   BEGIN_EPOCH: 0
   END_EPOCH: 200
@@ -124,7 +124,7 @@ TRAIN:
   MOMENTUM: 0.9
   NESTEROV: false
 TEST:
-  BATCH_SIZE_PER_GPU: 4
+  BATCH_SIZE_PER_GPU: 8
   COCO_BBOX_FILE: "data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json"
   BBOX_THRE: 1.0
   IMAGE_THRE: 0.0

From 6f1319433df2cc6d3cc5493a1bad4c9f5044c73d Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Mon, 12 Jun 2023 20:28:21 -0400
Subject: [PATCH 25/30] add split accuracy

---
 .../hrnet/w48_384x288_adam_lr1e-3_local.yaml  |   2 +-
 lib/core/evaluate.py                          |  71 ++++++++--
 lib/core/function.py                          | 125 +++++++++++++-----
 3 files changed, 157 insertions(+), 41 deletions(-)

diff --git a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3_local.yaml b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3_local.yaml
index 2a3fa124..1ab15f21 100644
--- a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3_local.yaml
+++ b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3_local.yaml
@@ -9,7 +9,7 @@ OUTPUT_DIR: "outputs/output_new_infinity"
 LOG_DIR: "logs/output_new_infinity"
 WORKERS: 12
 PRINT_FREQ: 10
-LOG_WANDB: True
+LOG_WANDB: true
 
 DATASET:
   COLOR_RGB: true
diff --git a/lib/core/evaluate.py b/lib/core/evaluate.py
index cf722857..269ad159 100644
--- a/lib/core/evaluate.py
+++ b/lib/core/evaluate.py
@@ -4,12 +4,9 @@
 # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 # ------------------------------------------------------------------------------
 
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
 
 import numpy as np
-
 from core.inference import get_max_preds
 
 
@@ -29,7 +26,7 @@ def calc_dists(preds, target, normalize):
 
 
 def dist_acc(dists, thr=0.5):
-    ''' Return percentage below threshold while ignoring values with a -1 '''
+    """Return percentage below threshold while ignoring values with a -1"""
     dist_cal = np.not_equal(dists, -1)
     num_dist_cal = dist_cal.sum()
     if num_dist_cal > 0:
@@ -38,16 +35,16 @@ def dist_acc(dists, thr=0.5):
         return -1
 
 
-def accuracy(output, target, hm_type='gaussian', thr=0.5):
-    '''
+def accuracy(output, target, hm_type="gaussian", thr=0.5):
+    """
     Calculate accuracy according to PCK,
     but uses ground truth heatmap rather than x,y locations
     First value to be returned is average accuracy across 'idxs',
     followed by individual accuracies
-    '''
+    """
     idx = list(range(output.shape[1]))
     norm = 1.0
-    if hm_type == 'gaussian':
+    if hm_type == "gaussian":
         pred, _ = get_max_preds(output)
         target, _ = get_max_preds(target)
         h = output.shape[2]
@@ -71,3 +68,59 @@ def accuracy(output, target, hm_type='gaussian', thr=0.5):
     return acc, avg_acc, cnt, pred
 
 
+def get_acc(idx, dists):
+    acc = np.zeros((len(idx) + 1))
+    avg_acc = 0
+    cnt = 0
+
+    for i in range(len(idx)):
+        acc[i + 1] = dist_acc(dists[idx[i]])
+        if acc[i + 1] >= 0:
+            avg_acc = avg_acc + acc[i + 1]
+            cnt += 1
+
+    avg_acc = avg_acc / cnt if cnt != 0 else 0
+    if cnt != 0:
+        acc[0] = avg_acc
+    return acc, avg_acc, cnt
+
+
+def accuracy_infinity_coco(output, target, hm_type="gaussian", thr=0.5):
+    """
+    Calculate accuracy according to PCK,
+    but uses ground truth heatmap rather than x,y locations
+    First value to be returned is average accuracy across 'idxs',
+    followed by individual accuracies
+    """
+    idx = list(range(output.shape[1]))
+    infinity_idxs = np.any(np.sum(target, axis=(2, 3))[:, 17:] > 1, axis=1)
+    norm = 1.0
+    if hm_type == "gaussian":
+        pred, _ = get_max_preds(output)
+        target, _ = get_max_preds(target)
+        h = output.shape[2]
+        w = output.shape[3]
+        norm = np.ones((pred.shape[0], 2)) * np.array([h, w]) / 10
+    pred_infinity = pred.copy()[infinity_idxs]
+    pred_coco = pred.copy()[~infinity_idxs][:, :17, :]
+    norm_infinity = norm.copy()[infinity_idxs]
+    norm_coco = norm.copy()[~infinity_idxs]
+    target_infinity = target.copy()[infinity_idxs]
+    target_coco = target.copy()[~infinity_idxs][:, :17, :]
+    # print("target_coco", target_coco)
+    # print("target_infinity", target_infinity)
+    dists_infinity = calc_dists(pred_infinity, target_infinity, norm_infinity)
+    dists_coco = calc_dists(pred_coco, target_coco, norm_coco)
+
+    acc_infinity, avg_acc_infinity, cnt_infinity = get_acc(idx, dists_infinity)
+    acc_coco, avg_acc_coco, cnt_coco = get_acc(list(range(17)), dists_coco)
+
+    return (
+        (acc_infinity, avg_acc_infinity, cnt_infinity),
+        (
+            acc_coco,
+            avg_acc_coco,
+            cnt_coco,
+        ),
+        pred,
+    )
diff --git a/lib/core/function.py b/lib/core/function.py
index 86024010..ba8c7d38 100755
--- a/lib/core/function.py
+++ b/lib/core/function.py
@@ -13,7 +13,7 @@
 import numpy as np
 import torch
 import wandb
-from core.evaluate import accuracy
+from core.evaluate import accuracy, accuracy_infinity_coco
 from core.inference import get_final_preds
 from utils.transforms import flip_back
 from utils.vis import save_debug_images
@@ -35,7 +35,11 @@ def train(
     batch_time = AverageMeter()
     data_time = AverageMeter()
     losses = AverageMeter()
-    acc = AverageMeter()
+    if config.MODEL.NUM_JOINTS == 58:
+        acc_infinity = AverageMeter()
+        acc_coco = AverageMeter()
+    else:
+        acc = AverageMeter()
 
     # switch to train mode
     model.train()
@@ -75,48 +79,99 @@ def train(
         # measure accuracy and record loss
         losses.update(loss.item(), input.size(0))
 
-        _, avg_acc, cnt, pred = accuracy(
-            output.detach().cpu().numpy(), target.detach().cpu().numpy()
-        )
-        acc.update(avg_acc, cnt)
+        if config.MODEL.NUM_JOINTS == 58:
+            (
+                (_, avg_acc_infinity, cnt_infinity),
+                (
+                    _,
+                    avg_acc_coco,
+                    cnt_coco,
+                ),
+                pred,
+            ) = accuracy_infinity_coco(
+                output.detach().cpu().numpy(), target.detach().cpu().numpy()
+            )
+            acc_infinity.update(avg_acc_infinity, cnt_infinity)
+            acc_coco.update(avg_acc_coco, cnt_coco)
+        else:
+            _, avg_acc, cnt, pred = accuracy(
+                output.detach().cpu().numpy(), target.detach().cpu().numpy()
+            )
+            acc.update(avg_acc, cnt)
 
         # measure elapsed time
         batch_time.update(time.time() - end)
         end = time.time()
 
         if i % config.PRINT_FREQ == 0:
-            msg = (
-                "Epoch: [{0}][{1}/{2}]\t"
-                "Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t"
-                "Speed {speed:.1f} samples/s\t"
-                "Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t"
-                "Loss {loss.val:.5f} ({loss.avg:.5f})\t"
-                "Accuracy {acc.val:.3f} ({acc.avg:.3f})".format(
-                    epoch,
-                    i,
-                    len(train_loader),
-                    batch_time=batch_time,
-                    speed=input.size(0) / batch_time.val,
-                    data_time=data_time,
-                    loss=losses,
-                    acc=acc,
+            if config.MODEL.NUM_JOINTS == 58:
+                msg = (
+                    "Epoch: [{0}][{1}/{2}]\t"
+                    "Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t"
+                    "Speed {speed:.1f} samples/s\t"
+                    "Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t"
+                    "Loss {loss.val:.5f} ({loss.avg:.5f})\t"
+                    "Accuracy Infinity {acc_infinity.val:.3f} ({acc_infinity.avg:.3f})\t"
+                    "Accuracy COCO {acc_coco.val:.3f} ({acc_coco.avg:.3f})".format(
+                        epoch,
+                        i,
+                        len(train_loader),
+                        batch_time=batch_time,
+                        speed=input.size(0) / batch_time.val,
+                        data_time=data_time,
+                        loss=losses,
+                        acc_infinity=acc_infinity,
+                        acc_coco=acc_coco,
+                    )
+                )
+            else:
+                msg = (
+                    "Epoch: [{0}][{1}/{2}]\t"
+                    "Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t"
+                    "Speed {speed:.1f} samples/s\t"
+                    "Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t"
+                    "Loss {loss.val:.5f} ({loss.avg:.5f})\t"
+                    "Accuracy {acc.val:.3f} ({acc.avg:.3f})".format(
+                        epoch,
+                        i,
+                        len(train_loader),
+                        batch_time=batch_time,
+                        speed=input.size(0) / batch_time.val,
+                        data_time=data_time,
+                        loss=losses,
+                        acc=acc,
+                    )
                 )
-            )
             logger.info(msg)
             if config.LOG_WANDB:
-                wandb.log(
-                    {
-                        "epoch": epoch,
-                        "loss_avg": losses.avg,
-                        "accuracy_avg": acc.avg,
-                        "speed": input.size(0) / batch_time.val,
-                    }
-                )
+                if config.MODEL.NUM_JOINTS == 58:
+                    wandb.log(
+                        {
+                            "epoch": epoch,
+                            "train/loss_avg": losses.avg,
+                            "train/accuracy_infinity_avg": acc_infinity.avg,
+                            "train/accuracy_coco_avg": acc_coco.avg,
+                            "speed": input.size(0) / batch_time.val,
+                        }
+                    )
+                else:
+                    wandb.log(
+                        {
+                            "epoch": epoch,
+                            "train/loss_avg": losses.avg,
+                            "train/accuracy_avg": acc.avg,
+                            "speed": input.size(0) / batch_time.val,
+                        }
+                    )
 
             writer = writer_dict["writer"]
             global_steps = writer_dict["train_global_steps"]
             writer.add_scalar("train_loss", losses.val, global_steps)
-            writer.add_scalar("train_acc", acc.val, global_steps)
+            if config.MODEL.NUM_JOINTS == 58:
+                writer.add_scalar("train_acc_infinity", acc_infinity.val, global_steps)
+                writer.add_scalar("train_acc_coco", acc_coco.val, global_steps)
+            else:
+                writer.add_scalar("train_acc", acc.val, global_steps)
             writer_dict["train_global_steps"] = global_steps + 1
 
             prefix = "{}_{}".format(os.path.join(output_dir, "train"), i)
@@ -250,6 +305,14 @@ def validate(
                 writer.add_scalars("valid", dict(name_values), global_steps)
             writer_dict["valid_global_steps"] = global_steps + 1
 
+        if config.LOG_WANDB:
+            wandb.log(
+                {
+                    "val/loss_avg": losses.avg,
+                    "val/accuracy_avg": acc.avg,
+                }
+            )
+
     return perf_indicator
 
 

From 28fa82ebcdc98bf68a44bdca5b1cf4edff193912 Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Mon, 12 Jun 2023 21:11:01 -0400
Subject: [PATCH 26/30] add split accuracy val

---
 lib/core/function.py | 87 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 69 insertions(+), 18 deletions(-)

diff --git a/lib/core/function.py b/lib/core/function.py
index ba8c7d38..c16ab22d 100755
--- a/lib/core/function.py
+++ b/lib/core/function.py
@@ -190,7 +190,11 @@ def validate(
 ):
     batch_time = AverageMeter()
     losses = AverageMeter()
-    acc = AverageMeter()
+    if config.MODEL.NUM_JOINTS == 58:
+        acc_infinity = AverageMeter()
+        acc_coco = AverageMeter()
+    else:
+        acc = AverageMeter()
 
     # switch to evaluate mode
     model.eval()
@@ -243,9 +247,23 @@ def validate(
             num_images = input.size(0)
             # measure accuracy and record loss
             losses.update(loss.item(), num_images)
-            _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(), target.cpu().numpy())
-
-            acc.update(avg_acc, cnt)
+            if config.MODEL.NUM_JOINTS == 58:
+                (
+                    (_, avg_acc_infinity, cnt_infinity),
+                    (
+                        _,
+                        avg_acc_coco,
+                        cnt_coco,
+                    ),
+                    pred,
+                ) = accuracy_infinity_coco(output.cpu().numpy(), target.cpu().numpy())
+                acc_infinity.update(avg_acc_infinity, cnt_infinity)
+                acc_coco.update(avg_acc_coco, cnt_coco)
+            else:
+                _, avg_acc, cnt, pred = accuracy(
+                    output.cpu().numpy(), target.cpu().numpy()
+                )
+                acc.update(avg_acc, cnt)
 
             # measure elapsed time
             batch_time.update(time.time() - end)
@@ -269,14 +287,34 @@ def validate(
             idx += num_images
 
             if i % config.PRINT_FREQ == 0:
-                msg = (
-                    "Test: [{0}/{1}]\t"
-                    "Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t"
-                    "Loss {loss.val:.4f} ({loss.avg:.4f})\t"
-                    "Accuracy {acc.val:.3f} ({acc.avg:.3f})".format(
-                        i, len(val_loader), batch_time=batch_time, loss=losses, acc=acc
+                if config.MODEL.NUM_JOINTS == 58:
+                    msg = (
+                        "Test: [{0}/{1}]\t"
+                        "Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t"
+                        "Loss {loss.val:.4f} ({loss.avg:.4f})\t"
+                        "Accuracy Infinity {acc_infinity.val:.3f} ({acc_infinity.avg:.3f})\t"
+                        "Accuracy COCO {acc_coco.val:.3f} ({acc_coco.avg:.3f})".format(
+                            i,
+                            len(val_loader),
+                            batch_time=batch_time,
+                            loss=losses,
+                            acc_infinity=acc_infinity,
+                            acc_coco=acc_coco,
+                        )
+                    )
+                else:
+                    msg = (
+                        "Test: [{0}/{1}]\t"
+                        "Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t"
+                        "Loss {loss.val:.4f} ({loss.avg:.4f})\t"
+                        "Accuracy {acc.val:.3f} ({acc.avg:.3f})".format(
+                            i,
+                            len(val_loader),
+                            batch_time=batch_time,
+                            loss=losses,
+                            acc=acc,
+                        )
                     )
-                )
                 logger.info(msg)
 
                 prefix = "{}_{}".format(os.path.join(output_dir, "val"), i)
@@ -297,7 +335,11 @@ def validate(
             writer = writer_dict["writer"]
             global_steps = writer_dict["valid_global_steps"]
             writer.add_scalar("valid_loss", losses.avg, global_steps)
-            writer.add_scalar("valid_acc", acc.avg, global_steps)
+            if config.MODEL.NUM_JOINTS == 58:
+                writer.add_scalar("valid_acc_infinity", acc_infinity.avg, global_steps)
+                writer.add_scalar("valid_acc_coco", acc_coco.avg, global_steps)
+            else:
+                writer.add_scalar("valid_acc", acc.avg, global_steps)
             if isinstance(name_values, list):
                 for name_value in name_values:
                     writer.add_scalars("valid", dict(name_value), global_steps)
@@ -306,12 +348,21 @@ def validate(
             writer_dict["valid_global_steps"] = global_steps + 1
 
         if config.LOG_WANDB:
-            wandb.log(
-                {
-                    "val/loss_avg": losses.avg,
-                    "val/accuracy_avg": acc.avg,
-                }
-            )
+            if config.MODEL.NUM_JOINTS == 58:
+                wandb.log(
+                    {
+                        "val/loss_avg": losses.avg,
+                        "val/accuracy_infinity_avg": acc_infinity.avg,
+                        "val/accuracy_coco_avg": acc_coco.avg,
+                    }
+                )
+            else:
+                wandb.log(
+                    {
+                        "val/loss_avg": losses.avg,
+                        "val/accuracy_avg": acc.avg,
+                    }
+                )
 
     return perf_indicator
 

From e0b956b07b273471c4d4376eb5ff7b9ae3b63e00 Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Mon, 19 Jun 2023 23:26:14 -0400
Subject: [PATCH 27/30] add anatomical logging

---
 lib/core/evaluate.py | 21 +++++++++++++--------
 lib/core/function.py | 30 ++++++++++++++++++++----------
 2 files changed, 33 insertions(+), 18 deletions(-)

diff --git a/lib/core/evaluate.py b/lib/core/evaluate.py
index 269ad159..39a0033a 100644
--- a/lib/core/evaluate.py
+++ b/lib/core/evaluate.py
@@ -102,25 +102,30 @@ def accuracy_infinity_coco(output, target, hm_type="gaussian", thr=0.5):
         w = output.shape[3]
         norm = np.ones((pred.shape[0], 2)) * np.array([h, w]) / 10
     pred_infinity = pred.copy()[infinity_idxs]
+    pred_anatomical = pred.copy()[infinity_idxs][:, 17:, :]
     pred_coco = pred.copy()[~infinity_idxs][:, :17, :]
+
     norm_infinity = norm.copy()[infinity_idxs]
-    norm_coco = norm.copy()[~infinity_idxs]
+    norm_anatomical = norm.copy()[infinity_idxs][:, 17:, :]
+    norm_coco = norm.copy()[~infinity_idxs][:, :17, :]
+
     target_infinity = target.copy()[infinity_idxs]
     target_coco = target.copy()[~infinity_idxs][:, :17, :]
-    # print("target_coco", target_coco)
-    # print("target_infinity", target_infinity)
+    target_anatomical = target.copy()[infinity_idxs][:, 17:, :]
+
     dists_infinity = calc_dists(pred_infinity, target_infinity, norm_infinity)
     dists_coco = calc_dists(pred_coco, target_coco, norm_coco)
+    dists_anatomical = calc_dists(pred_anatomical, target_anatomical, norm_anatomical)
 
     acc_infinity, avg_acc_infinity, cnt_infinity = get_acc(idx, dists_infinity)
     acc_coco, avg_acc_coco, cnt_coco = get_acc(list(range(17)), dists_coco)
+    acc_anatomical, avg_acc_anatomical, cnt_anatomical = get_acc(
+        list(range(output.shape[1] - 17)), dists_anatomical
+    )
 
     return (
         (acc_infinity, avg_acc_infinity, cnt_infinity),
-        (
-            acc_coco,
-            avg_acc_coco,
-            cnt_coco,
-        ),
+        (acc_anatomical, avg_acc_anatomical, cnt_anatomical),
+        (acc_coco, avg_acc_coco, cnt_coco),
         pred,
     )
diff --git a/lib/core/function.py b/lib/core/function.py
index c16ab22d..b004a547 100755
--- a/lib/core/function.py
+++ b/lib/core/function.py
@@ -37,6 +37,7 @@ def train(
     losses = AverageMeter()
     if config.MODEL.NUM_JOINTS == 58:
         acc_infinity = AverageMeter()
+        acc_anatomical = AverageMeter()
         acc_coco = AverageMeter()
     else:
         acc = AverageMeter()
@@ -82,16 +83,14 @@ def train(
         if config.MODEL.NUM_JOINTS == 58:
             (
                 (_, avg_acc_infinity, cnt_infinity),
-                (
-                    _,
-                    avg_acc_coco,
-                    cnt_coco,
-                ),
+                (_, avg_acc_anatomical, cnt_anatomical),
+                (_, avg_acc_coco, cnt_coco),
                 pred,
             ) = accuracy_infinity_coco(
                 output.detach().cpu().numpy(), target.detach().cpu().numpy()
             )
             acc_infinity.update(avg_acc_infinity, cnt_infinity)
+            acc_anatomical.update(avg_acc_anatomical, cnt_anatomical)
             acc_coco.update(avg_acc_coco, cnt_coco)
         else:
             _, avg_acc, cnt, pred = accuracy(
@@ -112,6 +111,7 @@ def train(
                     "Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t"
                     "Loss {loss.val:.5f} ({loss.avg:.5f})\t"
                     "Accuracy Infinity {acc_infinity.val:.3f} ({acc_infinity.avg:.3f})\t"
+                    "Accuracy Anatomical {acc_anatomical.val:.3f} ({acc_anatomical.avg:.3f})\t"
                     "Accuracy COCO {acc_coco.val:.3f} ({acc_coco.avg:.3f})".format(
                         epoch,
                         i,
@@ -121,6 +121,7 @@ def train(
                         data_time=data_time,
                         loss=losses,
                         acc_infinity=acc_infinity,
+                        acc_anatomical=acc_anatomical,
                         acc_coco=acc_coco,
                     )
                 )
@@ -150,6 +151,7 @@ def train(
                             "epoch": epoch,
                             "train/loss_avg": losses.avg,
                             "train/accuracy_infinity_avg": acc_infinity.avg,
+                            "train/accuracy_anatomical_avg": acc_anatomical.avg,
                             "train/accuracy_coco_avg": acc_coco.avg,
                             "speed": input.size(0) / batch_time.val,
                         }
@@ -169,6 +171,9 @@ def train(
             writer.add_scalar("train_loss", losses.val, global_steps)
             if config.MODEL.NUM_JOINTS == 58:
                 writer.add_scalar("train_acc_infinity", acc_infinity.val, global_steps)
+                writer.add_scalar(
+                    "train_acc_anatomical", acc_anatomical.val, global_steps
+                )
                 writer.add_scalar("train_acc_coco", acc_coco.val, global_steps)
             else:
                 writer.add_scalar("train_acc", acc.val, global_steps)
@@ -192,6 +197,7 @@ def validate(
     losses = AverageMeter()
     if config.MODEL.NUM_JOINTS == 58:
         acc_infinity = AverageMeter()
+        acc_anatomical = AverageMeter()
         acc_coco = AverageMeter()
     else:
         acc = AverageMeter()
@@ -250,14 +256,12 @@ def validate(
             if config.MODEL.NUM_JOINTS == 58:
                 (
                     (_, avg_acc_infinity, cnt_infinity),
-                    (
-                        _,
-                        avg_acc_coco,
-                        cnt_coco,
-                    ),
+                    (_, avg_acc_anatomical, cnt_anatomical),
+                    (_, avg_acc_coco, cnt_coco),
                     pred,
                 ) = accuracy_infinity_coco(output.cpu().numpy(), target.cpu().numpy())
                 acc_infinity.update(avg_acc_infinity, cnt_infinity)
+                acc_anatomical.update(avg_acc_anatomical, cnt_anatomical)
                 acc_coco.update(avg_acc_coco, cnt_coco)
             else:
                 _, avg_acc, cnt, pred = accuracy(
@@ -293,12 +297,14 @@ def validate(
                         "Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t"
                         "Loss {loss.val:.4f} ({loss.avg:.4f})\t"
                         "Accuracy Infinity {acc_infinity.val:.3f} ({acc_infinity.avg:.3f})\t"
+                        "Accuracy Anatomical {acc_anatomical.val:.3f} ({acc_anatomical.avg:.3f})\t"
                         "Accuracy COCO {acc_coco.val:.3f} ({acc_coco.avg:.3f})".format(
                             i,
                             len(val_loader),
                             batch_time=batch_time,
                             loss=losses,
                             acc_infinity=acc_infinity,
+                            acc_anatomical=acc_anatomical,
                             acc_coco=acc_coco,
                         )
                     )
@@ -337,6 +343,9 @@ def validate(
             writer.add_scalar("valid_loss", losses.avg, global_steps)
             if config.MODEL.NUM_JOINTS == 58:
                 writer.add_scalar("valid_acc_infinity", acc_infinity.avg, global_steps)
+                writer.add_scalar(
+                    "valid_acc_anatomical", acc_anatomical.avg, global_steps
+                )
                 writer.add_scalar("valid_acc_coco", acc_coco.avg, global_steps)
             else:
                 writer.add_scalar("valid_acc", acc.avg, global_steps)
@@ -353,6 +362,7 @@ def validate(
                     {
                         "val/loss_avg": losses.avg,
                         "val/accuracy_infinity_avg": acc_infinity.avg,
+                        "val/accuracy_anatomical_avg": acc_anatomical.avg,
                         "val/accuracy_coco_avg": acc_coco.avg,
                     }
                 )

From 6be536bc0218e34a8229c13e07f5df3c59c57999 Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Mon, 19 Jun 2023 23:53:30 -0400
Subject: [PATCH 28/30] fix norm error

---
 lib/core/evaluate.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/core/evaluate.py b/lib/core/evaluate.py
index 39a0033a..d5f126d5 100644
--- a/lib/core/evaluate.py
+++ b/lib/core/evaluate.py
@@ -106,8 +106,8 @@ def accuracy_infinity_coco(output, target, hm_type="gaussian", thr=0.5):
     pred_coco = pred.copy()[~infinity_idxs][:, :17, :]
 
     norm_infinity = norm.copy()[infinity_idxs]
-    norm_anatomical = norm.copy()[infinity_idxs][:, 17:, :]
-    norm_coco = norm.copy()[~infinity_idxs][:, :17, :]
+    norm_anatomical = norm.copy()[infinity_idxs][17:, :]
+    norm_coco = norm.copy()[~infinity_idxs][:17, :]
 
     target_infinity = target.copy()[infinity_idxs]
     target_coco = target.copy()[~infinity_idxs][:, :17, :]

From 5243621ce25976e721558e73e951518e56467e95 Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Tue, 20 Jun 2023 00:17:43 -0400
Subject: [PATCH 29/30] fix norm

---
 lib/core/evaluate.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/core/evaluate.py b/lib/core/evaluate.py
index d5f126d5..a3226473 100644
--- a/lib/core/evaluate.py
+++ b/lib/core/evaluate.py
@@ -106,12 +106,12 @@ def accuracy_infinity_coco(output, target, hm_type="gaussian", thr=0.5):
     pred_coco = pred.copy()[~infinity_idxs][:, :17, :]
 
     norm_infinity = norm.copy()[infinity_idxs]
-    norm_anatomical = norm.copy()[infinity_idxs][17:, :]
-    norm_coco = norm.copy()[~infinity_idxs][:17, :]
+    norm_anatomical = norm.copy()[infinity_idxs]
+    norm_coco = norm.copy()[~infinity_idxs]
 
     target_infinity = target.copy()[infinity_idxs]
-    target_coco = target.copy()[~infinity_idxs][:, :17, :]
     target_anatomical = target.copy()[infinity_idxs][:, 17:, :]
+    target_coco = target.copy()[~infinity_idxs][:, :17, :]
 
     dists_infinity = calc_dists(pred_infinity, target_infinity, norm_infinity)
     dists_coco = calc_dists(pred_coco, target_coco, norm_coco)

From 80f93f7c0612764cba38985da664aa2ac3491ccd Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@student-cs.fr>
Date: Tue, 4 Jul 2023 16:17:12 -0400
Subject: [PATCH 30/30] updated with new anatomical marker set

---
 demo/demo.py                                  | 10 +++---
 .../coco/hrnet/w48_384x288_adam_lr1e-3.yaml   |  4 +--
 .../hrnet/w48_256x192_adam_lr1e-3.yaml        |  2 +-
 .../hrnet/w48_384x288_adam_lr1e-3.yaml        |  2 +-
 .../hrnet/w48_384x288_adam_lr1e-3.yaml        |  2 +-
 .../hrnet/w48_384x288_adam_lr1e-3_local.yaml  |  4 +--
 .../hrnet/w64_384x288_adam_lr1e-3.yaml        |  2 +-
 lib/core/function.py                          | 32 +++++++++----------
 lib/dataset/infinity.py                       |  2 +-
 lib/dataset/infinity_coco.py                  | 17 +++-------
 10 files changed, 35 insertions(+), 42 deletions(-)

diff --git a/demo/demo.py b/demo/demo.py
index c9f0e09b..f930f019 100644
--- a/demo/demo.py
+++ b/demo/demo.py
@@ -181,7 +181,7 @@
 ]
 
 NUM_KPTS = 17
-NUM_KPTS_INFINITY = 41
+NUM_KPTS_INFINITY = 36
 NUM_KPTS_INFINITY_COCO = NUM_KPTS + NUM_KPTS_INFINITY
 
 CTX = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
@@ -460,9 +460,9 @@ def main():
                         )
                         if len(pose_preds) >= 1:
                             for kpt in pose_preds:
-                                if len(kpt) == 58:
+                                if len(kpt) == 53:
                                     draw_pose_infinity_coco(kpt, image_bgr)
-                                elif len(kpt) == 41:
+                                elif len(kpt) == 36:
                                     draw_pose_infinity(kpt, image_bgr)
                                 else:
                                     draw_pose(kpt, image_bgr)  # draw the poses
@@ -522,9 +522,9 @@ def main():
                 )
                 if len(pose_preds) >= 1:
                     for kpt in pose_preds:
-                        if len(kpt) == 58:
+                        if len(kpt) == 53:
                             draw_pose_infinity_coco(kpt, image_bgr)
-                        elif len(kpt) == 41:
+                        elif len(kpt) == 36:
                             draw_pose_infinity(kpt, image_bgr)
                         else:
                             draw_pose(kpt, image_bgr)
diff --git a/experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml
index d2e1518a..d3c27454 100644
--- a/experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml
+++ b/experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -25,7 +25,7 @@ DATASET:
 MODEL:
   INIT_WEIGHTS: true
   NAME: pose_hrnet
-  NUM_JOINTS: 58
+  NUM_JOINTS: 53
   # NUM_JOINTS: 17
   PRETRAINED: "models/pytorch/imagenet/hrnet_w48-8ef0771d.pth"
   TARGET_TYPE: gaussian
@@ -113,7 +113,7 @@ TEST:
   BBOX_THRE: 1.0
   IMAGE_THRE: 0.0
   IN_VIS_THRE: 0.2
-  MODEL_FILE: "outputs/sherlock/model_best1e-3_62_epochs.pth"
+  MODEL_FILE: "outputs/sherlock/model_best-20h-48.pth"
   # MODEL_FILE: "models/pytorch/pose_coco/pose_hrnet_w48_384x288.pth"
   NMS_THRE: 1.0
   OKS_THRE: 0.9
diff --git a/experiments/infinity/hrnet/w48_256x192_adam_lr1e-3.yaml b/experiments/infinity/hrnet/w48_256x192_adam_lr1e-3.yaml
index 5a301a61..4133991b 100644
--- a/experiments/infinity/hrnet/w48_256x192_adam_lr1e-3.yaml
+++ b/experiments/infinity/hrnet/w48_256x192_adam_lr1e-3.yaml
@@ -25,7 +25,7 @@ DATASET:
 MODEL:
   INIT_WEIGHTS: true
   NAME: pose_hrnet
-  NUM_JOINTS: 41
+  NUM_JOINTS: 36
   PRETRAINED: "models/pytorch/imagenet/hrnet_w48-8ef0771d.pth"
   TARGET_TYPE: gaussian
   IMAGE_SIZE:
diff --git a/experiments/infinity/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/infinity/hrnet/w48_384x288_adam_lr1e-3.yaml
index c7cd54d5..38364bed 100644
--- a/experiments/infinity/hrnet/w48_384x288_adam_lr1e-3.yaml
+++ b/experiments/infinity/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -25,7 +25,7 @@ DATASET:
 MODEL:
   INIT_WEIGHTS: true
   NAME: pose_hrnet
-  NUM_JOINTS: 41
+  NUM_JOINTS: 36
   PRETRAINED: "models/pytorch/imagenet/hrnet_w48-8ef0771d.pth"
   TARGET_TYPE: gaussian
   IMAGE_SIZE:
diff --git a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
index acf6d4dc..64761e41 100644
--- a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
+++ b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -29,7 +29,7 @@ DATASET:
 MODEL:
   INIT_WEIGHTS: true
   NAME: pose_hrnet
-  NUM_JOINTS: 58
+  NUM_JOINTS: 53
   PRETRAINED: "models/pytorch/pose_coco/pose_hrnet_w48_384x288.pth"
   TARGET_TYPE: gaussian
   IMAGE_SIZE:
diff --git a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3_local.yaml b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3_local.yaml
index 1ab15f21..b75689fc 100644
--- a/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3_local.yaml
+++ b/experiments/infinity_coco/hrnet/w48_384x288_adam_lr1e-3_local.yaml
@@ -18,7 +18,7 @@ DATASET:
   FLIP: true
   NUM_JOINTS_HALF_BODY: 27
   PROB_HALF_BODY: 0.3
-  ROOT: "../combined_dataset"
+  ROOT: "../new_infinity_dataset_2"
   ROOT_COCO: "data/coco/"
   ROT_FACTOR: 45
   SCALE_FACTOR: 0.35
@@ -29,7 +29,7 @@ DATASET:
 MODEL:
   INIT_WEIGHTS: true
   NAME: pose_hrnet
-  NUM_JOINTS: 58
+  NUM_JOINTS: 53
   PRETRAINED: "models/pytorch/pose_coco/pose_hrnet_w48_384x288.pth"
   TARGET_TYPE: gaussian
   IMAGE_SIZE:
diff --git a/experiments/infinity_coco/hrnet/w64_384x288_adam_lr1e-3.yaml b/experiments/infinity_coco/hrnet/w64_384x288_adam_lr1e-3.yaml
index c88476e4..1f537169 100644
--- a/experiments/infinity_coco/hrnet/w64_384x288_adam_lr1e-3.yaml
+++ b/experiments/infinity_coco/hrnet/w64_384x288_adam_lr1e-3.yaml
@@ -29,7 +29,7 @@ DATASET:
 MODEL:
   INIT_WEIGHTS: true
   NAME: pose_hrnet
-  NUM_JOINTS: 58
+  NUM_JOINTS: 53
   PRETRAINED: "models/pytorch/pose_coco/pose_hrnet_w48_384x288.pth"
   TARGET_TYPE: gaussian
   IMAGE_SIZE:
diff --git a/lib/core/function.py b/lib/core/function.py
index b004a547..6a0dcd33 100755
--- a/lib/core/function.py
+++ b/lib/core/function.py
@@ -35,7 +35,7 @@ def train(
     batch_time = AverageMeter()
     data_time = AverageMeter()
     losses = AverageMeter()
-    if config.MODEL.NUM_JOINTS == 58:
+    if config.MODEL.NUM_JOINTS == 53:
         acc_infinity = AverageMeter()
         acc_anatomical = AverageMeter()
         acc_coco = AverageMeter()
@@ -80,7 +80,7 @@ def train(
         # measure accuracy and record loss
         losses.update(loss.item(), input.size(0))
 
-        if config.MODEL.NUM_JOINTS == 58:
+        if config.MODEL.NUM_JOINTS == 53:
             (
                 (_, avg_acc_infinity, cnt_infinity),
                 (_, avg_acc_anatomical, cnt_anatomical),
@@ -103,7 +103,7 @@ def train(
         end = time.time()
 
         if i % config.PRINT_FREQ == 0:
-            if config.MODEL.NUM_JOINTS == 58:
+            if config.MODEL.NUM_JOINTS == 53:
                 msg = (
                     "Epoch: [{0}][{1}/{2}]\t"
                     "Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t"
@@ -145,7 +145,7 @@ def train(
                 )
             logger.info(msg)
             if config.LOG_WANDB:
-                if config.MODEL.NUM_JOINTS == 58:
+                if config.MODEL.NUM_JOINTS == 53:
                     wandb.log(
                         {
                             "epoch": epoch,
@@ -169,7 +169,7 @@ def train(
             writer = writer_dict["writer"]
             global_steps = writer_dict["train_global_steps"]
             writer.add_scalar("train_loss", losses.val, global_steps)
-            if config.MODEL.NUM_JOINTS == 58:
+            if config.MODEL.NUM_JOINTS == 53:
                 writer.add_scalar("train_acc_infinity", acc_infinity.val, global_steps)
                 writer.add_scalar(
                     "train_acc_anatomical", acc_anatomical.val, global_steps
@@ -195,7 +195,7 @@ def validate(
 ):
     batch_time = AverageMeter()
     losses = AverageMeter()
-    if config.MODEL.NUM_JOINTS == 58:
+    if config.MODEL.NUM_JOINTS == 53:
         acc_infinity = AverageMeter()
         acc_anatomical = AverageMeter()
         acc_coco = AverageMeter()
@@ -206,8 +206,8 @@ def validate(
     model.eval()
 
     num_samples = len(val_dataset)
-    # all_preds = np.zeros((num_samples, 17, 3), dtype=np.float32)
-    all_preds = np.zeros((num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32)
+    all_preds = np.zeros((num_samples, 17, 3), dtype=np.float32)
+    # all_preds = np.zeros((num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32)
     all_boxes = np.zeros((num_samples, 6))
     image_path = []
     filenames = []
@@ -217,8 +217,8 @@ def validate(
         end = time.time()
         for i, (input, target, target_weight, meta) in enumerate(val_loader):
             # compute output
-            outputs = model(input)
-            # outputs = model(input)[:, :17, :, :]
+            # outputs = model(input)
+            outputs = model(input)[:, :17, :, :]
             if isinstance(outputs, list):
                 output = outputs[-1]
             else:
@@ -226,8 +226,8 @@ def validate(
 
             if config.TEST.FLIP_TEST:
                 input_flipped = input.flip(3)
-                # outputs_flipped = model(input_flipped)[:, :17, :, :]
-                outputs_flipped = model(input_flipped)
+                outputs_flipped = model(input_flipped)[:, :17, :, :]
+                # outputs_flipped = model(input_flipped)
 
                 if isinstance(outputs_flipped, list):
                     output_flipped = outputs_flipped[-1]
@@ -253,7 +253,7 @@ def validate(
             num_images = input.size(0)
             # measure accuracy and record loss
             losses.update(loss.item(), num_images)
-            if config.MODEL.NUM_JOINTS == 58:
+            if config.MODEL.NUM_JOINTS == 53:
                 (
                     (_, avg_acc_infinity, cnt_infinity),
                     (_, avg_acc_anatomical, cnt_anatomical),
@@ -291,7 +291,7 @@ def validate(
             idx += num_images
 
             if i % config.PRINT_FREQ == 0:
-                if config.MODEL.NUM_JOINTS == 58:
+                if config.MODEL.NUM_JOINTS == 53:
                     msg = (
                         "Test: [{0}/{1}]\t"
                         "Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t"
@@ -341,7 +341,7 @@ def validate(
             writer = writer_dict["writer"]
             global_steps = writer_dict["valid_global_steps"]
             writer.add_scalar("valid_loss", losses.avg, global_steps)
-            if config.MODEL.NUM_JOINTS == 58:
+            if config.MODEL.NUM_JOINTS == 53:
                 writer.add_scalar("valid_acc_infinity", acc_infinity.avg, global_steps)
                 writer.add_scalar(
                     "valid_acc_anatomical", acc_anatomical.avg, global_steps
@@ -357,7 +357,7 @@ def validate(
             writer_dict["valid_global_steps"] = global_steps + 1
 
         if config.LOG_WANDB:
-            if config.MODEL.NUM_JOINTS == 58:
+            if config.MODEL.NUM_JOINTS == 53:
                 wandb.log(
                     {
                         "val/loss_avg": losses.avg,
diff --git a/lib/dataset/infinity.py b/lib/dataset/infinity.py
index 021976de..a9bc1029 100644
--- a/lib/dataset/infinity.py
+++ b/lib/dataset/infinity.py
@@ -82,7 +82,7 @@ def __init__(self, cfg, root, image_set, is_train, transform=None):
         self.num_images = len(self.image_set_index)
         logger.info("=> num_images: {}".format(self.num_images))
 
-        self.num_joints = 41
+        self.num_joints = 36
         self.flip_pairs = [
             [1, 2],
             [3, 4],
diff --git a/lib/dataset/infinity_coco.py b/lib/dataset/infinity_coco.py
index 3e236eb3..dc9b371d 100644
--- a/lib/dataset/infinity_coco.py
+++ b/lib/dataset/infinity_coco.py
@@ -87,7 +87,7 @@ def __init__(self, cfg, root, image_set, is_train, transform=None):
         self.num_images = len(self.image_set_index)
         logger.info("=> num_images: {}".format(self.num_images))
 
-        self.num_joints_infinity = 41
+        self.num_joints_infinity = 36
         self.num_joints_coco = 17
         self.num_joints = self.num_joints_infinity + self.num_joints_coco
 
@@ -107,9 +107,7 @@ def __init__(self, cfg, root, image_set, is_train, transform=None):
             [26, 27],
             [28, 29],
             [30, 31],
-            [32, 33],
             [34, 35],
-            [39, 40],
         ]
         self.flip_pairs_coco = [
             [1, 2],
@@ -121,7 +119,7 @@ def __init__(self, cfg, root, image_set, is_train, transform=None):
             [13, 14],
             [15, 16],
         ]
-        self.flip_pairs__infinity = [
+        self.flip_pairs_infinity = [
             [x + self.num_joints_coco, y + self.num_joints_coco]
             for x, y in self.flip_pairs_infinity
         ]
@@ -145,12 +143,7 @@ def __init__(self, cfg, root, image_set, is_train, transform=None):
             29,
             30,
             31,
-            32,
             33,
-            34,
-            35,
-            36,
-            38,
         )
         self.lower_body_ids_infinity = (
             12,
@@ -169,9 +162,9 @@ def __init__(self, cfg, root, image_set, is_train, transform=None):
             25,
             26,
             27,
-            37,
-            39,
-            40,
+            32,
+            34,
+            35,
         )
 
         self.upper_body_ids_coco = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)