From 2355340fec022273a2dfa9ce1099aec9e31f2720 Mon Sep 17 00:00:00 2001 From: Russell Montalbano Date: Fri, 17 Dec 2021 15:12:34 -0500 Subject: [PATCH 1/3] Choose correct person kpts --- demo/demo.py | 41 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/demo/demo.py b/demo/demo.py index ab0e449a..52e098ef 100644 --- a/demo/demo.py +++ b/demo/demo.py @@ -22,6 +22,7 @@ from config import update_config from core.function import get_final_preds from utils.transforms import get_affine_transform +import pose_estimation.sort as Sort import os cur_dir = os.path.dirname(os.path.realpath(__file__)) @@ -94,7 +95,19 @@ def draw_bbox(box,img): cv2.rectangle(img, box[0], box[1], color=(0, 255, 0),thickness=3) -def get_person_detection_boxes(model, img, threshold=0.5): +def get_id_num(tracked_boxes): + max_area = 0 + id_num = 0 + for box in tracked_boxes: + box_area = (box[2] - box[0]) * (box[3] - box[1]) + if box_area > max_area: + max_area = box_area + id_num = box[4] + + return id_num + + +def get_person_detection_boxes(model, img, tracker, id_num, threshold=0.5): pred = model(img) pred_classes = [COCO_INSTANCE_CATEGORY_NAMES[i] for i in list(pred[0]['labels'].cpu().numpy())] # Get the Prediction Score @@ -105,15 +118,30 @@ def get_person_detection_boxes(model, img, threshold=0.5): return [] # Get list of index with score greater than threshold pred_t = [pred_score.index(x) for x in pred_score if x > threshold][-1] - pred_boxes = pred_boxes[:1] + pred_boxes = pred_boxes[:pred_t+1] pred_classes = pred_classes[:pred_t+1] person_boxes = [] for idx, box in enumerate(pred_boxes): if pred_classes[idx] == 'person': + # Create array of structure [bb_x1, bb_y1, bb_x2, bb_y2, score] for use with SORT + box = [coord for pos in box for coord in pos] + box.append(pred_score[idx]) person_boxes.append(box) + + # Get ID's for each person + person_boxes = np.array(person_boxes) + boxes_tracked = tracker.update(person_boxes) + + # If this is the first frame, get the ID of the bigger bounding box (person more in focus) + if id_num is None: + id_num = get_id_num(boxes_tracked) + + # Turn into [[(x1, y2), (x2, y2)]] + person_box = [box for box in boxes_tracked if box[4] == id_num][0] + person_box = [[(person_box[0], person_box[1]), (person_box[2], person_box[3])]] - return person_boxes + return person_box, id_num def get_pose_estimation_prediction(pose_model, image, center, scale): @@ -254,6 +282,9 @@ def get_deepHRnet_keypoints(video, output_dir=None, output_video=False, save_kpt vid_fps = vidcap.get(cv2.CAP_PROP_FPS) out = cv2.VideoWriter(save_path,fourcc, vid_fps, (int(vidcap.get(3)),int(vidcap.get(4)))) + tracker = Sort.Sort(max_age=3) + id_num = None + frame_num = 0 while True: ret, image_bgr = vidcap.read() @@ -269,14 +300,14 @@ def get_deepHRnet_keypoints(video, output_dir=None, output_video=False, save_kpt input.append(img_tensor) # object detection box - pred_boxes = get_person_detection_boxes(box_model, input, threshold=0.95) + pred_boxes, id_num = get_person_detection_boxes(box_model, input, tracker, id_num, threshold=0.95) # pose estimation if len(pred_boxes) >= 1: for box in pred_boxes: center, scale = box_to_center_scale(box, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[1]) image_pose = image.copy() if cfg.DATASET.COLOR_RGB else image_bgr.copy() - pose_preds = get_pose_estimation_prediction(pose_model, image_pose, center, scale) + pose_preds = get_pose_estimation_prediction(pose_model, image_pose, center, scale) if len(pose_preds)>=1: for i, kpt in enumerate(pose_preds): name = COCO_KEYPOINT_INDEXES[i] From 676df87cb28140c0edc08eaad38b09e0b82ecfa1 Mon Sep 17 00:00:00 2001 From: Russell Montalbano Date: Fri, 31 Dec 2021 10:45:25 -0500 Subject: [PATCH 2/3] add comments --- demo/demo.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/demo/demo.py b/demo/demo.py index 52e098ef..0974fb28 100644 --- a/demo/demo.py +++ b/demo/demo.py @@ -115,7 +115,8 @@ def get_person_detection_boxes(model, img, tracker, id_num, threshold=0.5): for i in list(pred[0]['boxes'].detach().cpu().numpy())] # Bounding boxes pred_score = list(pred[0]['scores'].detach().cpu().numpy()) if not pred_score or max(pred_score) threshold][-1] pred_boxes = pred_boxes[:pred_t+1] @@ -124,7 +125,7 @@ def get_person_detection_boxes(model, img, tracker, id_num, threshold=0.5): person_boxes = [] for idx, box in enumerate(pred_boxes): if pred_classes[idx] == 'person': - # Create array of structure [bb_x1, bb_y1, bb_x2, bb_y2, score] for use with SORT + # Create array of structure [bb_x1, bb_y1, bb_x2, bb_y2, score] for use with SORT tracker box = [coord for pos in box for coord in pos] box.append(pred_score[idx]) person_boxes.append(box) @@ -133,15 +134,20 @@ def get_person_detection_boxes(model, img, tracker, id_num, threshold=0.5): person_boxes = np.array(person_boxes) boxes_tracked = tracker.update(person_boxes) - # If this is the first frame, get the ID of the bigger bounding box (person more in focus) + # If this is the first frame, get the ID of the bigger bounding box (person more in focus, most likely the thrower) if id_num is None: id_num = get_id_num(boxes_tracked) # Turn into [[(x1, y2), (x2, y2)]] - person_box = [box for box in boxes_tracked if box[4] == id_num][0] - person_box = [[(person_box[0], person_box[1]), (person_box[2], person_box[3])]] + try: + person_box = [box for box in boxes_tracked if box[4] == id_num][0] + person_box = [[(person_box[0], person_box[1]), (person_box[2], person_box[3])]] + return person_box, id_num - return person_box, id_num + # If detections weren't made for our thrower in a frame for some reason, return nothing to be smoothed later + # As long as the thrower is detected within the next 3 frames, it will be assigned the same ID as before + except IndexError: + return [], id_num def get_pose_estimation_prediction(pose_model, image, center, scale): @@ -282,6 +288,7 @@ def get_deepHRnet_keypoints(video, output_dir=None, output_video=False, save_kpt vid_fps = vidcap.get(cv2.CAP_PROP_FPS) out = cv2.VideoWriter(save_path,fourcc, vid_fps, (int(vidcap.get(3)),int(vidcap.get(4)))) + # Initialize SORT Tracker tracker = Sort.Sort(max_age=3) id_num = None From 1fe10ca135248a915afc1ee5e6e46d2722d8ce08 Mon Sep 17 00:00:00 2001 From: Russell Montalbano Date: Sun, 9 Jan 2022 14:29:43 -0500 Subject: [PATCH 3/3] add comments --- demo/demo.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/demo/demo.py b/demo/demo.py index 0974fb28..b2210ad8 100644 --- a/demo/demo.py +++ b/demo/demo.py @@ -96,6 +96,9 @@ def draw_bbox(box,img): def get_id_num(tracked_boxes): + """ + Get the SORT tracker ID number of the bounding box with the biggest area + """ max_area = 0 id_num = 0 for box in tracked_boxes: @@ -145,7 +148,7 @@ def get_person_detection_boxes(model, img, tracker, id_num, threshold=0.5): return person_box, id_num # If detections weren't made for our thrower in a frame for some reason, return nothing to be smoothed later - # As long as the thrower is detected within the next 3 frames, it will be assigned the same ID as before + # As long as the thrower is detected within the next "max_age" frames, it will be assigned the same ID as before except IndexError: return [], id_num @@ -245,7 +248,7 @@ class Bunch: def __init__(self, **kwds): self.__dict__.update(kwds) -def get_deepHRnet_keypoints(video, output_dir=None, output_video=False, save_kpts=False, custom_model=None): +def get_deepHRnet_keypoints(video, output_dir=None, output_video=False, save_kpts=False, custom_model=None, max_age=3): keypoints = None # cudnn related setting @@ -289,7 +292,7 @@ def get_deepHRnet_keypoints(video, output_dir=None, output_video=False, save_kpt out = cv2.VideoWriter(save_path,fourcc, vid_fps, (int(vidcap.get(3)),int(vidcap.get(4)))) # Initialize SORT Tracker - tracker = Sort.Sort(max_age=3) + tracker = Sort.Sort(max_age=max_age) id_num = None frame_num = 0