Skip to content

Commit a946f69

Browse files
committed
my projects
1 parent ce654af commit a946f69

File tree

1 file changed

+97
-0
lines changed

1 file changed

+97
-0
lines changed
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import cv2
2+
import numpy as np
3+
4+
# print(output.keys(), output.shape)
5+
def handle_pose(output, input_shape):
6+
'''
7+
Handles the output of the Pose Estimation model.
8+
Returns ONLY the keypoint heatmaps, and not the Part Affinity Fields.
9+
'''
10+
# The input image shape is say (750, 1000, 3)
11+
# output is a dictionary with two items (blobs)"Mconv7_stage2_L1", "Mconv7_stage2_L2"
12+
# Extract only the second blob output (keypoint heatmaps)
13+
# which is of shape (1, 19, 32, 57)
14+
# 19 images of dimensions (32, 57). Each image corresponds to a keypoint heatmap.
15+
heatmaps = output['Mconv7_stage2_L2']
16+
# Resize the heatmap back to the size of the input
17+
# create a 19 x 750 x 1000 array
18+
out_heatmap = np.zeros([heatmaps.shape[1], input_shape[0], input_shape[1]])
19+
# Iterate through and re-size each of the heatmaps to size of the input image
20+
# also reverse the input H x W dimensions to W x H because cv2.resize
21+
# accepts horizontal(fx), vertical(fy)
22+
for h in range(len(heatmaps[0])):
23+
out_heatmap[h] = cv2.resize(heatmaps[0][h], input_shape[0:2][::-1])
24+
25+
return out_heatmap
26+
27+
28+
def handle_text(output, input_shape):
29+
'''
30+
Handles the output of the Text Detection model.
31+
Returns ONLY the text/no text classification of each pixel,
32+
and not the linkage between pixels and their neighbors.
33+
'''
34+
# input shape : input image in the format [BxCxHxW],
35+
# Extract only the first blob output (text/no text classification)
36+
#[1x2x192x320] - logits related to text/no-text classification for each pixel.
37+
text_classes = output['model/segm_logits/add']
38+
# TODO 2: Resize this output back to the size of the input
39+
# 2 x 192 x 320 -> 2 x H x W
40+
out_text = np.empty([text_classes.shape[1], input_shape[0], input_shape[1]])
41+
for t in range(len(text_classes[0])):
42+
out_text[t] = cv2.resize(text_classes[0][t], input_shape[0:2][::-1])
43+
return out_text
44+
45+
46+
def handle_car(output, input_shape):
47+
'''
48+
Handles the output of the Car Metadata model.
49+
Returns two integers: the argmax of each softmax output.
50+
The first is for color, and the second for type.
51+
'''
52+
# input :shape: [1x3x72x72] - An input image in following format [1xCxHxW]
53+
# Get the argmax of the "color" output
54+
#"color", shape: [1, 7, 1, 1] - Softmax output across seven color classes [white, gray, yellow, red, green, blue, black]
55+
color = output['color'].flatten()
56+
color_pred = np.argmax(color)
57+
# Get the argmax of the "type" output
58+
# "type", shape: [1, 4, 1, 1] - Softmax output across four type classes [car, bus, truck, van]
59+
car_type = output['type'].flatten()
60+
type_pred = np.argmax(car_type)
61+
62+
return color_pred, type_pred
63+
64+
65+
def handle_output(model_type):
66+
'''
67+
Returns the related function to handle an output,
68+
based on the model_type being used.
69+
'''
70+
if model_type == "POSE":
71+
return handle_pose
72+
elif model_type == "TEXT":
73+
return handle_text
74+
elif model_type == "CAR_META":
75+
return handle_car
76+
else:
77+
return None
78+
79+
80+
'''
81+
The below function is carried over from the previous exercise.
82+
You just need to call it appropriately in `app.py` to preprocess
83+
the input image.
84+
'''
85+
def preprocessing(input_image, height, width):
86+
'''
87+
Given an input image, height and width:
88+
- Resize to width and height
89+
- Transpose the final "channel" dimension to be first
90+
- Reshape the image to add a "batch" of 1 at the start
91+
'''
92+
image = np.copy(input_image)
93+
image = cv2.resize(image, (width, height))
94+
image = image.transpose((2,0,1))
95+
image = image.reshape(1, 3, height, width)
96+
97+
return image

0 commit comments

Comments
 (0)