Skip to content

Commit 0d047f4

Browse files
authored
samll yolov1 by tf
1 parent 0355edc commit 0d047f4

File tree

1 file changed

+237
-0
lines changed

1 file changed

+237
-0
lines changed

ObjectDetections/yolo/yolo_tf.py

+237
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
"""
2+
Yolo V1 by tensorflow
3+
"""
4+
5+
import numpy as np
6+
import tensorflow as tf
7+
import cv2
8+
9+
10+
def leak_relu(x, alpha=0.1):
11+
return tf.maximum(alpha * x, x)
12+
13+
class Yolo(object):
14+
def __init__(self, weights_file, verbose=True):
15+
self.verbose = verbose
16+
# detection params
17+
self.S = 7 # cell size
18+
self.B = 2 # boxes_per_cell
19+
self.classes = ["aeroplane", "bicycle", "bird", "boat", "bottle",
20+
"bus", "car", "cat", "chair", "cow", "diningtable",
21+
"dog", "horse", "motorbike", "person", "pottedplant",
22+
"sheep", "sofa", "train","tvmonitor"]
23+
self.C = len(self.classes) # number of classes
24+
# offset for box center (top left point of each cell)
25+
self.x_offset = np.transpose(np.reshape(np.array([np.arange(self.S)]*self.S*self.B),
26+
[self.B, self.S, self.S]), [1, 2, 0])
27+
self.y_offset = np.transpose(self.x_offset, [1, 0, 2])
28+
29+
self.threshold = 0.2 # confidence scores threhold
30+
self.iou_threshold = 0.4
31+
# the maximum number of boxes to be selected by non max suppression
32+
self.max_output_size = 10
33+
34+
self.sess = tf.Session()
35+
self._build_net()
36+
self._build_detector()
37+
self._load_weights(weights_file)
38+
39+
def _build_net(self):
40+
"""build the network"""
41+
if self.verbose:
42+
print("Start to build the network ...")
43+
self.images = tf.placeholder(tf.float32, [None, 448, 448, 3])
44+
net = self._conv_layer(self.images, 1, 64, 7, 2)
45+
net = self._maxpool_layer(net, 1, 2, 2)
46+
net = self._conv_layer(net, 2, 192, 3, 1)
47+
net = self._maxpool_layer(net, 2, 2, 2)
48+
net = self._conv_layer(net, 3, 128, 1, 1)
49+
net = self._conv_layer(net, 4, 256, 3, 1)
50+
net = self._conv_layer(net, 5, 256, 1, 1)
51+
net = self._conv_layer(net, 6, 512, 3, 1)
52+
net = self._maxpool_layer(net, 6, 2, 2)
53+
net = self._conv_layer(net, 7, 256, 1, 1)
54+
net = self._conv_layer(net, 8, 512, 3, 1)
55+
net = self._conv_layer(net, 9, 256, 1, 1)
56+
net = self._conv_layer(net, 10, 512, 3, 1)
57+
net = self._conv_layer(net, 11, 256, 1, 1)
58+
net = self._conv_layer(net, 12, 512, 3, 1)
59+
net = self._conv_layer(net, 13, 256, 1, 1)
60+
net = self._conv_layer(net, 14, 512, 3, 1)
61+
net = self._conv_layer(net, 15, 512, 1, 1)
62+
net = self._conv_layer(net, 16, 1024, 3, 1)
63+
net = self._maxpool_layer(net, 16, 2, 2)
64+
net = self._conv_layer(net, 17, 512, 1, 1)
65+
net = self._conv_layer(net, 18, 1024, 3, 1)
66+
net = self._conv_layer(net, 19, 512, 1, 1)
67+
net = self._conv_layer(net, 20, 1024, 3, 1)
68+
net = self._conv_layer(net, 21, 1024, 3, 1)
69+
net = self._conv_layer(net, 22, 1024, 3, 2)
70+
net = self._conv_layer(net, 23, 1024, 3, 1)
71+
net = self._conv_layer(net, 24, 1024, 3, 1)
72+
net = self._flatten(net)
73+
net = self._fc_layer(net, 25, 512, activation=leak_relu)
74+
net = self._fc_layer(net, 26, 4096, activation=leak_relu)
75+
net = self._fc_layer(net, 27, self.S*self.S*(self.C+5*self.B))
76+
self.predicts = net
77+
78+
def _build_detector(self):
79+
"""Interpret the net output and get the predicted boxes"""
80+
# the width and height of orignal image
81+
self.width = tf.placeholder(tf.float32, name="img_w")
82+
self.height = tf.placeholder(tf.float32, name="img_h")
83+
# get class prob, confidence, boxes from net output
84+
idx1 = self.S * self.S * self.C
85+
idx2 = idx1 + self.S * self.S * self.B
86+
# class prediction
87+
class_probs = tf.reshape(self.predicts[0, :idx1], [self.S, self.S, self.C])
88+
# confidence
89+
confs = tf.reshape(self.predicts[0, idx1:idx2], [self.S, self.S, self.B])
90+
# boxes -> (x, y, w, h)
91+
boxes = tf.reshape(self.predicts[0, idx2:], [self.S, self.S, self.B, 4])
92+
93+
# convert the x, y to the coordinates relative to the top left point of the image
94+
# the predictions of w, h are the square root
95+
# multiply the width and height of image
96+
boxes = tf.stack([(boxes[:, :, :, 0] + tf.constant(self.x_offset, dtype=tf.float32)) / self.S * self.width,
97+
(boxes[:, :, :, 1] + tf.constant(self.y_offset, dtype=tf.float32)) / self.S * self.height,
98+
tf.square(boxes[:, :, :, 2]) * self.width,
99+
tf.square(boxes[:, :, :, 3]) * self.height], axis=3)
100+
101+
# class-specific confidence scores [S, S, B, C]
102+
scores = tf.expand_dims(confs, -1) * tf.expand_dims(class_probs, 2)
103+
104+
scores = tf.reshape(scores, [-1, self.C]) # [S*S*B, C]
105+
boxes = tf.reshape(boxes, [-1, 4]) # [S*S*B, 4]
106+
107+
# find each box class, only select the max score
108+
box_classes = tf.argmax(scores, axis=1)
109+
box_class_scores = tf.reduce_max(scores, axis=1)
110+
111+
# filter the boxes by the score threshold
112+
filter_mask = box_class_scores >= self.threshold
113+
scores = tf.boolean_mask(box_class_scores, filter_mask)
114+
boxes = tf.boolean_mask(boxes, filter_mask)
115+
box_classes = tf.boolean_mask(box_classes, filter_mask)
116+
117+
# non max suppression (do not distinguish different classes)
118+
# ref: https://tensorflow.google.cn/api_docs/python/tf/image/non_max_suppression
119+
# box (x, y, w, h) -> box (x1, y1, x2, y2)
120+
_boxes = tf.stack([boxes[:, 0] - 0.5 * boxes[:, 2], boxes[:, 1] - 0.5 * boxes[:, 3],
121+
boxes[:, 0] + 0.5 * boxes[:, 2], boxes[:, 1] + 0.5 * boxes[:, 3]], axis=1)
122+
nms_indices = tf.image.non_max_suppression(_boxes, scores,
123+
self.max_output_size, self.iou_threshold)
124+
self.scores = tf.gather(scores, nms_indices)
125+
self.boxes = tf.gather(boxes, nms_indices)
126+
self.box_classes = tf.gather(box_classes, nms_indices)
127+
128+
def _conv_layer(self, x, id, num_filters, filter_size, stride):
129+
"""Conv layer"""
130+
in_channels = x.get_shape().as_list()[-1]
131+
weight = tf.Variable(tf.truncated_normal([filter_size, filter_size,
132+
in_channels, num_filters], stddev=0.1))
133+
bias = tf.Variable(tf.zeros([num_filters,]))
134+
# padding, note: not using padding="VALID"
135+
pad_size = filter_size // 2
136+
pad_mat = np.array([[0, 0], [pad_size, pad_size], [pad_size, pad_size], [0, 0]])
137+
x_pad = tf.pad(x, pad_mat)
138+
conv = tf.nn.conv2d(x_pad, weight, strides=[1, stride, stride, 1], padding="VALID")
139+
output = leak_relu(tf.nn.bias_add(conv, bias))
140+
if self.verbose:
141+
print(" Layer %d: type=Conv, num_filter=%d, filter_size=%d, stride=%d, output_shape=%s" \
142+
% (id, num_filters, filter_size, stride, str(output.get_shape())))
143+
return output
144+
145+
def _fc_layer(self, x, id, num_out, activation=None):
146+
"""fully connected layer"""
147+
num_in = x.get_shape().as_list()[-1]
148+
weight = tf.Variable(tf.truncated_normal([num_in, num_out], stddev=0.1))
149+
bias = tf.Variable(tf.zeros([num_out,]))
150+
output = tf.nn.xw_plus_b(x, weight, bias)
151+
if activation:
152+
output = activation(output)
153+
if self.verbose:
154+
print(" Layer %d: type=Fc, num_out=%d, output_shape=%s" \
155+
% (id, num_out, str(output.get_shape())))
156+
return output
157+
158+
def _maxpool_layer(self, x, id, pool_size, stride):
159+
output = tf.nn.max_pool(x, [1, pool_size, pool_size, 1],
160+
strides=[1, stride, stride, 1], padding="SAME")
161+
if self.verbose:
162+
print(" Layer %d: type=MaxPool, pool_size=%d, stride=%d, output_shape=%s" \
163+
% (id, pool_size, stride, str(output.get_shape())))
164+
return output
165+
166+
def _flatten(self, x):
167+
"""flatten the x"""
168+
tran_x = tf.transpose(x, [0, 3, 1, 2]) # channle first mode
169+
nums = np.product(x.get_shape().as_list()[1:])
170+
return tf.reshape(tran_x, [-1, nums])
171+
172+
def _load_weights(self, weights_file):
173+
"""Load weights from file"""
174+
if self.verbose:
175+
print("Start to load weights from file:%s" % (weights_file))
176+
saver = tf.train.Saver()
177+
saver.restore(self.sess, weights_file)
178+
179+
def detect_from_file(self, image_file, deteted_boxes_file="boxes.txt",
180+
detected_image_file="detected_image.jpg"):
181+
"""Do detection given a image file"""
182+
# read image
183+
image = cv2.imread(image_file)
184+
img_h, img_w, _ = image.shape
185+
scores, boxes, box_classes = self._detect_from_image(image)
186+
predict_boxes = []
187+
for i in range(len(scores)):
188+
predict_boxes.append((self.classes[box_classes[i]], boxes[i, 0],
189+
boxes[i, 1], boxes[i, 2], boxes[i, 3], scores[i]))
190+
self.show_results(image, predict_boxes, deteted_boxes_file, detected_image_file)
191+
192+
def _detect_from_image(self, image):
193+
"""Do detection given a cv image"""
194+
img_h, img_w, _ = image.shape
195+
img_resized = cv2.resize(image, (448, 448))
196+
img_RGB = cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB)
197+
img_resized_np = np.asarray(img_RGB)
198+
_images = np.zeros((1, 448, 448, 3), dtype=np.float32)
199+
_images[0] = (img_resized_np / 255.0) * 2.0 - 1.0
200+
scores, boxes, box_classes = self.sess.run([self.scores, self.boxes, self.box_classes],
201+
feed_dict={self.images: _images, self.width: img_w, self.height: img_h})
202+
return scores, boxes, box_classes
203+
204+
def show_results(self, image, results, imshow=True, deteted_boxes_file=None,
205+
detected_image_file=None):
206+
"""Show the detection boxes"""
207+
img_cp = image.copy()
208+
if deteted_boxes_file:
209+
f = open(deteted_boxes_file, "w")
210+
# draw boxes
211+
for i in range(len(results)):
212+
x = int(results[i][1])
213+
y = int(results[i][2])
214+
w = int(results[i][3]) // 2
215+
h = int(results[i][4]) // 2
216+
if self.verbose:
217+
print(" class: %s, [x, y, w, h]=[%d, %d, %d, %d], confidence=%f" % (results[i][0],
218+
x, y, w, h, results[i][-1]))
219+
220+
cv2.rectangle(img_cp, (x - w, y - h), (x + w, y + h), (0, 255, 0), 2)
221+
cv2.rectangle(img_cp, (x - w, y - h - 20), (x + w, y - h), (125, 125, 125), -1)
222+
cv2.putText(img_cp, results[i][0] + ' : %.2f' % results[i][5], (x - w + 5, y - h - 7),
223+
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)
224+
if deteted_boxes_file:
225+
f.write(results[i][0] + ',' + str(x) + ',' + str(y) + ',' +
226+
str(w) + ',' + str(h)+',' + str(results[i][5]) + '\n')
227+
if imshow:
228+
cv2.imshow('YOLO_small detection', img_cp)
229+
cv2.waitKey(1)
230+
if detected_image_file:
231+
cv2.imwrite(detected_image_file, img_cp)
232+
if deteted_boxes_file:
233+
f.close()
234+
235+
if __name__ == "__main__":
236+
yolo_net = Yolo("./weights/YOLO_small.ckpt")
237+
yolo_net.detect_from_file("./test/car.jpg")

0 commit comments

Comments
 (0)