Skip to content

Commit 7ea55c2

Browse files
committed
lesson3 experiments with RPi + NCS2
1 parent 3af8673 commit 7ea55c2

File tree

3 files changed

+444
-0
lines changed

3 files changed

+444
-0
lines changed
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# assuming you have ssd mobilenet and classroom.mp4 downloaded you can run the
2+
# code using the following command.
3+
4+
python3 app.py -m public/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco_2018_03_29/frozen_inference_graph.xml -o out-modified.h264 -i classroom.mp4
5+
6+
#Also run the following to get usage information
7+
8+
python3 app.py -h
9+
10+
---------------
11+
usage: Run inference on an input video [-h] -m M [-i I] [-d D] [-ct CT] [-o O]
12+
[-t T]
13+
14+
required arguments:
15+
-m M The location of the model XML file
16+
17+
optional arguments:
18+
-i I The location of the input file
19+
-d D The device name, if not 'CPU'
20+
-ct CT The confidence threshold to use with the bounding boxes
21+
-o O The output file path
22+
-t T The input type VIDEO/IMAGE
23+
24+
---------------
Lines changed: 317 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,317 @@
1+
#This code is enhancement of Lesson 3 LAB code from Udacity Intel Edge-AI
2+
# Foundation Course.
3+
# The code has been customized a bit to run on RPi + NCS2.
4+
# TODO: more enhancements needed to the code as some of it is written as
5+
# experimental code.
6+
7+
import argparse
8+
import cv2
9+
from inference import Network
10+
import numpy as np
11+
from imutils.video import FPS
12+
import threading
13+
import time
14+
from concurrent.futures import ThreadPoolExecutor
15+
16+
17+
INPUT_STREAM = "/home/pi/mydemo/test_video.mp4"
18+
#CPU_EXTENSION = "/opt/intel/openvino/deployment_tools/inference_engine/lib/intel64/libcpu_extension_sse4.so"
19+
20+
def get_args():
21+
'''
22+
Gets the arguments from the command line.
23+
'''
24+
parser = argparse.ArgumentParser("Run inference on an input video")
25+
# -- Create the descriptions for the commands
26+
m_desc = "The location of the model XML file"
27+
i_desc = "The location of the input file"
28+
d_desc = "The device name, if not 'CPU'"
29+
### Add additional arguments and descriptions for:
30+
### 1) Different confidence thresholds used to draw bounding boxes
31+
ct_desc = "The confidence threshold to use with the bounding boxes"
32+
o_desc = "The output file path"
33+
t_desc = "The input type VIDEO/IMAGE"
34+
35+
# -- Add required and optional groups
36+
parser._action_groups.pop()
37+
required = parser.add_argument_group('required arguments')
38+
optional = parser.add_argument_group('optional arguments')
39+
40+
# -- Create the arguments
41+
required.add_argument("-m", help=m_desc, required=True)
42+
optional.add_argument("-i", help=i_desc, default=INPUT_STREAM)
43+
optional.add_argument("-d", help=d_desc, default='MYRIAD')
44+
optional.add_argument("-ct", help=ct_desc, default=0.5)
45+
optional.add_argument("-o", help=o_desc, default='out.h264')
46+
optional.add_argument("-t", help=t_desc, default='VIDEO')
47+
args = parser.parse_args()
48+
49+
return args
50+
51+
# initialize the list of class labels MobileNet SSD was trained to
52+
# detect, then generate a set of bounding box colors for each class
53+
# classes for mobilenet, i had to swap car and bird
54+
# have to sawp aeroplane and person
55+
CLASSES = ["background", "person", "bicycle", "car", "boat",
56+
"aeroplane", "bus", "bird", "cat", "chair", "cow", "diningtable",
57+
"dog", "horse", "motorbike", "bottle", "pottedplant", "sheep",
58+
"sofa", "train", "tvmonitor"]
59+
60+
# use this for person-vehicle-bike-detection-crossroad.xml model
61+
#CLASSES = ["person", "bicycle", "car"]
62+
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))
63+
64+
def draw_boxes(frame, detections, args, w, h):
65+
'''
66+
Draw bounding boxes onto the frame.
67+
'''
68+
# loop over the detections
69+
# ex : 1x1x100x7 so detections.shape[2] will be 100
70+
# the class of the object, the confidence, and two corners (made of xmin, ymin, xmax, and ymax) that make up the bounding box, in that order.
71+
for i in np.arange(0, detections.shape[2]):
72+
# extract the confidence (i.e., probability) associated with
73+
# the prediction
74+
confidence = detections[0, 0, i, 2]
75+
# filter out weak detections by ensuring the `confidence` is
76+
# greater than the minimum confidence
77+
if confidence >= args.ct:
78+
# extract the index of the class label from the
79+
# detections, then compute the (x, y)-coordinates of
80+
# the bounding box for the object
81+
# index 1 has the detected class
82+
idx = int(detections[0, 0, i, 1])
83+
# index 3 to 7 have the bounding box corners
84+
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
85+
(startX, startY, endX, endY) = box.astype("int")
86+
if idx > len(CLASSES):
87+
continue
88+
# draw the prediction on top of the frame
89+
#print('class ={}'.format(CLASSES[idx]));
90+
label = "{}: {:.2f}%".format(CLASSES[idx], confidence * 100)
91+
#cv2.rectangle(frame, (startX, startY), (endX, endY), COLORS[idx], 4)
92+
cv2.rectangle(frame, (startX, startY), (endX, endY), (0,0,255), 4)
93+
# calculate the y-coordinate used to write the label on the
94+
# frame depending on the bounding box coordinate
95+
y = startY - 15 if startY - 15 > 15 else startY + 15
96+
cv2.putText(frame, label, (startX, y),
97+
cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0,255,255), 2)
98+
#cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)
99+
return frame
100+
101+
102+
def preprocessing(input_image, height, width):
103+
'''
104+
Given an input image, network input size (height and width):
105+
- Resize to width and height
106+
- Transpose the final "channel" dimension to be first
107+
- Reshape the image to add a "batch" of 1 at the start
108+
'''
109+
image = np.copy(input_image)
110+
image = cv2.resize(image, (width, height))
111+
# change data layout from HxWxC to CxHxW
112+
image = image.transpose((2,0,1))
113+
image = image.reshape(1, 3, height, width)
114+
115+
return image
116+
117+
def infer_on_image(args):
118+
print('INFER ON IMAGE')
119+
# Convert the args for confidence
120+
args.ct = float(args.ct)
121+
122+
### Initialize the Inference Engine
123+
plugin = Network()
124+
### Load the network model into the IE
125+
plugin.load_model(args.m, args.d)
126+
net_input_shape = plugin.get_input_shape()
127+
# Read the input image
128+
image = cv2.imread(args.i)
129+
h, w = net_input_shape[2], net_input_shape[3]
130+
131+
### Preprocess the input image
132+
preprocessed_image = preprocessing(image, h, w)
133+
134+
### Perform inference on the frame
135+
plugin.async_inference(preprocessed_image)
136+
### Get the output of inference
137+
if plugin.wait() == 0:
138+
output = plugin.extract_output()
139+
140+
image = draw_boxes(image, output, args, w, h)
141+
cv2.imwrite(args.o, image)
142+
143+
144+
def infer_on_video(args):
145+
print('INFER ON VIDEO')
146+
# Convert the args for confidence
147+
args.ct = float(args.ct)
148+
149+
### Initialize the Inference Engine
150+
plugin = Network()
151+
### Load the network model into the IE
152+
plugin.load_model(args.m, args.d)
153+
net_input_shape = plugin.get_input_shape()
154+
155+
# Get and open video capture
156+
cap = cv2.VideoCapture(args.i)
157+
cap.open(args.i)
158+
159+
# Grab the shape of the input
160+
width = int(cap.get(3))
161+
height = int(cap.get(4))
162+
163+
# Create a video writer for the output video
164+
# The second argument should be `cv2.VideoWriter_fourcc('M','J','P','G')`
165+
# on Mac, and `0x00000021` on Linux
166+
#out = cv2.VideoWriter('/home/pi/mydemo/out.mp4', 0x00000021, 30, (width,height))
167+
out_file = args.o
168+
#out = cv2.VideoWriter('out_file', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), 30, (width, height))
169+
#out = cv2.VideoWriter('out_file', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 30, (width, height))
170+
out = cv2.VideoWriter(out_file, cv2.VideoWriter_fourcc('H','2','6','4') , 30, (width,height))
171+
172+
#start the FPS (frames per second recorder)
173+
fps = FPS().start()
174+
175+
# Process frames until the video ends, or process is exited
176+
while cap.isOpened():
177+
# Read the next frame
178+
flag, frame = cap.read()
179+
if not flag:
180+
break
181+
#key_pressed = cv2.waitKey(60)
182+
183+
### Pre-process the frame
184+
p_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2]))
185+
p_frame = p_frame.transpose((2,0,1))
186+
p_frame = p_frame.reshape(1, *p_frame.shape)
187+
### Perform inference on the frame
188+
plugin.async_inference(p_frame)
189+
### Get the output of inference
190+
if plugin.wait() == 0:
191+
result = plugin.extract_output()
192+
### Update the frame to include detected bounding boxes
193+
frame = draw_boxes(frame, result, args, width, height)
194+
# Write out the frame
195+
out.write(frame)
196+
197+
#update the FPS counter
198+
fps.update()
199+
# Break if escape key pressed
200+
#if key_pressed == 27:
201+
# break
202+
203+
# Release the out writer, capture, and destroy any OpenCV windows
204+
fps.stop()
205+
print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
206+
print("[INFO] approx FPS: {:.2f}".format(fps.fps()))
207+
208+
out.release()
209+
cap.release()
210+
cv2.destroyAllWindows()
211+
212+
213+
def process_frame(condition, plugin, frame, net_input_shape, req_id, args, width, height):
214+
### Pre-process the frame
215+
p_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2]))
216+
p_frame = p_frame.transpose((2,0,1))
217+
p_frame = p_frame.reshape(1, *p_frame.shape)
218+
### submit frame to inference engine
219+
plugin.async_inference(p_frame)
220+
### Get the output of inference
221+
condition.acquire()
222+
try:
223+
if plugin.wait(req_id) == 0:
224+
result = plugin.extract_output()
225+
condition.release()
226+
### Update the frame to include detected bounding boxes
227+
frame = draw_boxes(frame, result, args, width, height)
228+
return frame
229+
except:
230+
condition.release()
231+
return None
232+
233+
def infer_on_video_parallel(args):
234+
print('INFER ON VIDEO PARALLEL')
235+
# Convert the args for confidence
236+
args.ct = float(args.ct)
237+
238+
executor = ThreadPoolExecutor(max_workers=2)
239+
condition = threading.Condition()
240+
NoneType = type(None)
241+
### Initialize the Inference Engine
242+
plugin = Network()
243+
### Load the network model into the IE
244+
plugin.load_model(args.m, args.d)
245+
net_input_shape = plugin.get_input_shape()
246+
247+
# Get and open video capture
248+
cap = cv2.VideoCapture(args.i)
249+
cap.open(args.i)
250+
251+
# Grab the shape of the input
252+
width = int(cap.get(3))
253+
height = int(cap.get(4))
254+
255+
out_file = args.o
256+
out = cv2.VideoWriter(out_file, cv2.VideoWriter_fourcc('H','2','6','4') , 30, (width,height))
257+
258+
#start the FPS (frames per second recorder)
259+
fps = FPS().start()
260+
# Process frames until the video ends, or process is exited
261+
while cap.isOpened():
262+
263+
#key_pressed = cv2.waitKey(60)
264+
265+
# Read the next frame
266+
flag0, frame0 = cap.read()
267+
if not flag0:
268+
break
269+
future0 = executor.submit(process_frame, condition, plugin, frame0, net_input_shape, 0, args, width, height)
270+
271+
flag1, frame1 = cap.read()
272+
if not flag1:
273+
break
274+
future1 = executor.submit(process_frame, condition, plugin, frame1, net_input_shape, 1, args, width, height)
275+
276+
result = future0.result()
277+
if type(result) == NoneType:
278+
print('none result frame0');
279+
break
280+
else:
281+
# Write out the frame
282+
fps.update()
283+
out.write(result)
284+
285+
result = future1.result()
286+
if type(result) == NoneType:
287+
print('none result frame1');
288+
break
289+
else:
290+
# Write out the frame
291+
fps.update()
292+
out.write(result)
293+
294+
# Break if escape key pressed
295+
#if key_pressed == 27:
296+
# break
297+
298+
# Release the out writer, capture, and destroy any OpenCV windows
299+
fps.stop()
300+
print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
301+
print("[INFO] approx FPS: {:.2f}".format(fps.fps()))
302+
out.release()
303+
cap.release()
304+
cv2.destroyAllWindows()
305+
306+
def main():
307+
args = get_args()
308+
type = args.t
309+
if type == 'VIDEO':
310+
#infer_on_video(args)
311+
infer_on_video_parallel(args)
312+
else:
313+
infer_on_image(args)
314+
315+
316+
if __name__ == "__main__":
317+
main()

0 commit comments

Comments
 (0)