Skip to content

Multi person Detection #6

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 13, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 48 additions & 7 deletions demo/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from config import update_config
from core.function import get_final_preds
from utils.transforms import get_affine_transform
import pose_estimation.sort as Sort

import os
cur_dir = os.path.dirname(os.path.realpath(__file__))
Expand Down Expand Up @@ -94,26 +95,62 @@ def draw_bbox(box,img):
cv2.rectangle(img, box[0], box[1], color=(0, 255, 0),thickness=3)


def get_person_detection_boxes(model, img, threshold=0.5):
def get_id_num(tracked_boxes):
"""
Get the SORT tracker ID number of the bounding box with the biggest area
"""
max_area = 0
id_num = 0
for box in tracked_boxes:
box_area = (box[2] - box[0]) * (box[3] - box[1])
if box_area > max_area:
max_area = box_area
id_num = box[4]

return id_num


def get_person_detection_boxes(model, img, tracker, id_num, threshold=0.5):
pred = model(img)
pred_classes = [COCO_INSTANCE_CATEGORY_NAMES[i]
for i in list(pred[0]['labels'].cpu().numpy())] # Get the Prediction Score
pred_boxes = [[(i[0], i[1]), (i[2], i[3])]
for i in list(pred[0]['boxes'].detach().cpu().numpy())] # Bounding boxes
pred_score = list(pred[0]['scores'].detach().cpu().numpy())
if not pred_score or max(pred_score)<threshold:
return []
return [], id_num

# Get list of index with score greater than threshold
pred_t = [pred_score.index(x) for x in pred_score if x > threshold][-1]
pred_boxes = pred_boxes[:1]
pred_boxes = pred_boxes[:pred_t+1]
pred_classes = pred_classes[:pred_t+1]

person_boxes = []
for idx, box in enumerate(pred_boxes):
if pred_classes[idx] == 'person':
# Create array of structure [bb_x1, bb_y1, bb_x2, bb_y2, score] for use with SORT tracker
box = [coord for pos in box for coord in pos]
box.append(pred_score[idx])
person_boxes.append(box)

# Get ID's for each person
person_boxes = np.array(person_boxes)
boxes_tracked = tracker.update(person_boxes)

# If this is the first frame, get the ID of the bigger bounding box (person more in focus, most likely the thrower)
if id_num is None:
id_num = get_id_num(boxes_tracked)

return person_boxes
# Turn into [[(x1, y2), (x2, y2)]]
try:
person_box = [box for box in boxes_tracked if box[4] == id_num][0]
person_box = [[(person_box[0], person_box[1]), (person_box[2], person_box[3])]]
return person_box, id_num

# If detections weren't made for our thrower in a frame for some reason, return nothing to be smoothed later
# As long as the thrower is detected within the next "max_age" frames, it will be assigned the same ID as before
except IndexError:
return [], id_num


def get_pose_estimation_prediction(pose_model, image, center, scale):
Expand Down Expand Up @@ -211,7 +248,7 @@ class Bunch:
def __init__(self, **kwds):
self.__dict__.update(kwds)

def get_deepHRnet_keypoints(video, output_dir=None, output_video=False, save_kpts=False, custom_model=None):
def get_deepHRnet_keypoints(video, output_dir=None, output_video=False, save_kpts=False, custom_model=None, max_age=3):

keypoints = None
# cudnn related setting
Expand Down Expand Up @@ -254,6 +291,10 @@ def get_deepHRnet_keypoints(video, output_dir=None, output_video=False, save_kpt
vid_fps = vidcap.get(cv2.CAP_PROP_FPS)
out = cv2.VideoWriter(save_path,fourcc, vid_fps, (int(vidcap.get(3)),int(vidcap.get(4))))

# Initialize SORT Tracker
tracker = Sort.Sort(max_age=max_age)
id_num = None

frame_num = 0
while True:
ret, image_bgr = vidcap.read()
Expand All @@ -269,14 +310,14 @@ def get_deepHRnet_keypoints(video, output_dir=None, output_video=False, save_kpt
input.append(img_tensor)

# object detection box
pred_boxes = get_person_detection_boxes(box_model, input, threshold=0.95)
pred_boxes, id_num = get_person_detection_boxes(box_model, input, tracker, id_num, threshold=0.95)

# pose estimation
if len(pred_boxes) >= 1:
for box in pred_boxes:
center, scale = box_to_center_scale(box, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[1])
image_pose = image.copy() if cfg.DATASET.COLOR_RGB else image_bgr.copy()
pose_preds = get_pose_estimation_prediction(pose_model, image_pose, center, scale)
pose_preds = get_pose_estimation_prediction(pose_model, image_pose, center, scale)
if len(pose_preds)>=1:
for i, kpt in enumerate(pose_preds):
name = COCO_KEYPOINT_INDEXES[i]
Expand Down