Skip to content

Commit ad26338

Browse files
authored
add mobilenet v2
1 parent 6c46b7c commit ad26338

File tree

1 file changed

+349
-0
lines changed

1 file changed

+349
-0
lines changed

CNNs/mobilenet_v2.py

+349
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,349 @@
1+
"""
2+
2018-11-24
3+
"""
4+
5+
from collections import namedtuple
6+
import copy
7+
8+
import tensorflow as tf
9+
10+
slim = tf.contrib.slim
11+
12+
def _make_divisible(v, divisor, min_value=None):
13+
"""make `v` is divided exactly by `divisor`, but keep the min_value"""
14+
if min_value is None:
15+
min_value = divisor
16+
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
17+
# Make sure that round down does not go down by more than 10%.
18+
if new_v < 0.9 * v:
19+
new_v += divisor
20+
return new_v
21+
22+
23+
@slim.add_arg_scope
24+
def _depth_multiplier_func(params,
25+
multiplier,
26+
divisible_by=8,
27+
min_depth=8):
28+
"""get the new channles"""
29+
if 'num_outputs' not in params:
30+
return
31+
d = params['num_outputs']
32+
params['num_outputs'] = _make_divisible(d * multiplier, divisible_by,
33+
min_depth)
34+
35+
def _fixed_padding(inputs, kernel_size, rate=1):
36+
"""Pads the input along the spatial dimensions independently of input size.
37+
Pads the input such that if it was used in a convolution with 'VALID' padding,
38+
the output would have the same dimensions as if the unpadded input was used
39+
in a convolution with 'SAME' padding.
40+
Args:
41+
inputs: A tensor of size [batch, height_in, width_in, channels].
42+
kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
43+
rate: An integer, rate for atrous convolution.
44+
Returns:
45+
output: A tensor of size [batch, height_out, width_out, channels] with the
46+
input, either intact (if kernel_size == 1) or padded (if kernel_size > 1).
47+
"""
48+
kernel_size_effective = [kernel_size[0] + (kernel_size[0] - 1) * (rate - 1),
49+
kernel_size[0] + (kernel_size[0] - 1) * (rate - 1)]
50+
pad_total = [kernel_size_effective[0] - 1, kernel_size_effective[1] - 1]
51+
pad_beg = [pad_total[0] // 2, pad_total[1] // 2]
52+
pad_end = [pad_total[0] - pad_beg[0], pad_total[1] - pad_beg[1]]
53+
padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg[0], pad_end[0]],
54+
[pad_beg[1], pad_end[1]], [0, 0]])
55+
return padded_inputs
56+
57+
58+
@slim.add_arg_scope
59+
def expanded_conv(x,
60+
num_outputs,
61+
expansion=6,
62+
stride=1,
63+
rate=1,
64+
normalizer_fn=slim.batch_norm,
65+
project_activation_fn=tf.identity,
66+
padding="SAME",
67+
scope=None):
68+
"""The expand conv op in MobileNetv2
69+
1x1 conv -> depthwise 3x3 conv -> 1x1 linear conv
70+
"""
71+
with tf.variable_scope(scope, default_name="expanded_conv") as s, \
72+
tf.name_scope(s.original_name_scope):
73+
prev_depth = x.get_shape().as_list()[3]
74+
# the filters of expanded conv
75+
inner_size = prev_depth * expansion
76+
net = x
77+
# only inner_size > prev_depth, use expanded conv
78+
if inner_size > prev_depth:
79+
net = slim.conv2d(net, inner_size, 1, normalizer_fn=normalizer_fn,
80+
scope="expand")
81+
# depthwise conv
82+
net = slim.separable_conv2d(net, num_outputs=None, kernel_size=3,
83+
depth_multiplier=1, stride=stride,
84+
rate=rate, normalizer_fn=normalizer_fn,
85+
padding=padding, scope="depthwise")
86+
# projection
87+
net = slim.conv2d(net, num_outputs, 1, normalizer_fn=normalizer_fn,
88+
activation_fn=project_activation_fn, scope="project")
89+
90+
# residual connection
91+
if stride == 1 and net.get_shape().as_list()[-1] == prev_depth:
92+
net += x
93+
94+
return net
95+
96+
def global_pool(x, pool_op=tf.nn.avg_pool):
97+
"""Applies avg pool to produce 1x1 output.
98+
NOTE: This function is funcitonally equivalenet to reduce_mean, but it has
99+
baked in average pool which has better support across hardware.
100+
Args:
101+
input_tensor: input tensor
102+
pool_op: pooling op (avg pool is default)
103+
Returns:
104+
a tensor batch_size x 1 x 1 x depth.
105+
"""
106+
shape = x.get_shape().as_list()
107+
if shape[1] is None or shape[2] is None:
108+
kernel_size = tf.convert_to_tensor(
109+
[1, tf.shape(x)[1], tf.shape(x)[2], 1])
110+
else:
111+
kernel_size = [1, shape[1], shape[2], 1]
112+
output = pool_op(x, ksize=kernel_size, strides=[1, 1, 1, 1], padding='VALID')
113+
# Recover output shape, for unknown shape.
114+
output.set_shape([None, 1, 1, None])
115+
return output
116+
117+
118+
_Op = namedtuple("Op", ['op', 'params', 'multiplier_func'])
119+
120+
def op(op_func, **params):
121+
return _Op(op=op_func, params=params,
122+
multiplier_func=_depth_multiplier_func)
123+
124+
125+
CONV_DEF = [op(slim.conv2d, num_outputs=32, stride=2, kernel_size=3),
126+
op(expanded_conv, num_outputs=16, expansion=1),
127+
op(expanded_conv, num_outputs=24, stride=2),
128+
op(expanded_conv, num_outputs=24, stride=1),
129+
op(expanded_conv, num_outputs=32, stride=2),
130+
op(expanded_conv, num_outputs=32, stride=1),
131+
op(expanded_conv, num_outputs=32, stride=1),
132+
op(expanded_conv, num_outputs=64, stride=2),
133+
op(expanded_conv, num_outputs=64, stride=1),
134+
op(expanded_conv, num_outputs=64, stride=1),
135+
op(expanded_conv, num_outputs=64, stride=1),
136+
op(expanded_conv, num_outputs=96, stride=1),
137+
op(expanded_conv, num_outputs=96, stride=1),
138+
op(expanded_conv, num_outputs=96, stride=1),
139+
op(expanded_conv, num_outputs=160, stride=2),
140+
op(expanded_conv, num_outputs=160, stride=1),
141+
op(expanded_conv, num_outputs=160, stride=1),
142+
op(expanded_conv, num_outputs=320, stride=1),
143+
op(slim.conv2d, num_outputs=1280, stride=1, kernel_size=1),
144+
]
145+
146+
147+
def mobilenet_arg_scope(is_training=True,
148+
weight_decay=0.00004,
149+
stddev=0.09,
150+
dropout_keep_prob=0.8,
151+
bn_decay=0.997):
152+
"""Defines Mobilenet default arg scope.
153+
Usage:
154+
with tf.contrib.slim.arg_scope(mobilenet.training_scope()):
155+
logits, endpoints = mobilenet_v2.mobilenet(input_tensor)
156+
# the network created will be trainble with dropout/batch norm
157+
# initialized appropriately.
158+
Args:
159+
is_training: if set to False this will ensure that all customizations are
160+
set to non-training mode. This might be helpful for code that is reused
161+
across both training/evaluation, but most of the time training_scope with
162+
value False is not needed. If this is set to None, the parameters is not
163+
added to the batch_norm arg_scope.
164+
weight_decay: The weight decay to use for regularizing the model.
165+
stddev: Standard deviation for initialization, if negative uses xavier.
166+
dropout_keep_prob: dropout keep probability (not set if equals to None).
167+
bn_decay: decay for the batch norm moving averages (not set if equals to
168+
None).
169+
Returns:
170+
An argument scope to use via arg_scope.
171+
"""
172+
# Note: do not introduce parameters that would change the inference
173+
# model here (for example whether to use bias), modify conv_def instead.
174+
batch_norm_params = {
175+
'center': True,
176+
'scale': True,
177+
'decay': bn_decay,
178+
'is_training': is_training
179+
}
180+
if stddev < 0:
181+
weight_intitializer = slim.initializers.xavier_initializer()
182+
else:
183+
weight_intitializer = tf.truncated_normal_initializer(stddev=stddev)
184+
185+
# Set weight_decay for weights in Conv and FC layers.
186+
with slim.arg_scope(
187+
[slim.conv2d, slim.fully_connected, slim.separable_conv2d],
188+
weights_initializer=weight_intitializer,
189+
normalizer_fn=slim.batch_norm,
190+
activation_fn=tf.nn.relu6), \
191+
slim.arg_scope([slim.batch_norm], **batch_norm_params), \
192+
slim.arg_scope([slim.dropout], is_training=is_training,
193+
keep_prob=dropout_keep_prob), \
194+
slim.arg_scope([slim.conv2d, slim.separable_conv2d],
195+
biases_initializer=None,
196+
padding="SAME"), \
197+
slim.arg_scope([slim.conv2d],
198+
weights_regularizer=slim.l2_regularizer(weight_decay)), \
199+
slim.arg_scope([slim.separable_conv2d], weights_regularizer=None) as s:
200+
return s
201+
202+
203+
def mobilenetv2(x,
204+
num_classes=1001,
205+
depth_multiplier=1.0,
206+
scope='MobilenetV2',
207+
finegrain_classification_mode=False,
208+
min_depth=8,
209+
divisible_by=8,
210+
output_stride=None,
211+
):
212+
"""Mobilenet v2
213+
Args:
214+
x: The input tensor
215+
num_classes: number of classes
216+
depth_multiplier: The multiplier applied to scale number of
217+
channels in each layer. Note: this is called depth multiplier in the
218+
paper but the name is kept for consistency with slim's model builder.
219+
scope: Scope of the operator
220+
finegrain_classification_mode: When set to True, the model
221+
will keep the last layer large even for small multipliers.
222+
The paper suggests that it improves performance for ImageNet-type of problems.
223+
min_depth: If provided, will ensure that all layers will have that
224+
many channels after application of depth multiplier.
225+
divisible_by: If provided will ensure that all layers # channels
226+
will be divisible by this number.
227+
"""
228+
conv_defs = CONV_DEF
229+
230+
# keep the last conv layer very larger channel
231+
if finegrain_classification_mode:
232+
conv_defs = copy.deepcopy(conv_defs)
233+
if depth_multiplier < 1:
234+
conv_defs[-1].params['num_outputs'] /= depth_multiplier
235+
236+
depth_args = {}
237+
# NB: do not set depth_args unless they are provided to avoid overriding
238+
# whatever default depth_multiplier might have thanks to arg_scope.
239+
if min_depth is not None:
240+
depth_args['min_depth'] = min_depth
241+
if divisible_by is not None:
242+
depth_args['divisible_by'] = divisible_by
243+
244+
with slim.arg_scope([_depth_multiplier_func], **depth_args):
245+
with tf.variable_scope(scope, default_name='Mobilenet'):
246+
# The current_stride variable keeps track of the output stride of the
247+
# activations, i.e., the running product of convolution strides up to the
248+
# current network layer. This allows us to invoke atrous convolution
249+
# whenever applying the next convolution would result in the activations
250+
# having output stride larger than the target output_stride.
251+
current_stride = 1
252+
253+
# The atrous convolution rate parameter.
254+
rate = 1
255+
256+
net = x
257+
# Insert default parameters before the base scope which includes
258+
# any custom overrides set in mobilenet.
259+
end_points = {}
260+
scopes = {}
261+
for i, opdef in enumerate(conv_defs):
262+
params = dict(opdef.params)
263+
opdef.multiplier_func(params, depth_multiplier)
264+
stride = params.get('stride', 1)
265+
if output_stride is not None and current_stride == output_stride:
266+
# If we have reached the target output_stride, then we need to employ
267+
# atrous convolution with stride=1 and multiply the atrous rate by the
268+
# current unit's stride for use in subsequent layers.
269+
layer_stride = 1
270+
layer_rate = rate
271+
rate *= stride
272+
else:
273+
layer_stride = stride
274+
layer_rate = 1
275+
current_stride *= stride
276+
# Update params.
277+
params['stride'] = layer_stride
278+
# Only insert rate to params if rate > 1.
279+
if layer_rate > 1:
280+
params['rate'] = layer_rate
281+
282+
try:
283+
net = opdef.op(net, **params)
284+
except Exception:
285+
raise ValueError('Failed to create op %i: %r params: %r' % (i, opdef, params))
286+
287+
with tf.variable_scope('Logits'):
288+
net = global_pool(net)
289+
end_points['global_pool'] = net
290+
if not num_classes:
291+
return net, end_points
292+
net = slim.dropout(net, scope='Dropout')
293+
# 1 x 1 x num_classes
294+
# Note: legacy scope name.
295+
logits = slim.conv2d(
296+
net,
297+
num_classes, [1, 1],
298+
activation_fn=None,
299+
normalizer_fn=None,
300+
biases_initializer=tf.zeros_initializer(),
301+
scope='Conv2d_1c_1x1')
302+
303+
logits = tf.squeeze(logits, [1, 2])
304+
305+
return logits
306+
307+
308+
if __name__ == "__main__":
309+
import cv2
310+
import numpy as np
311+
312+
inputs = tf.placeholder(tf.uint8, [None, None, 3])
313+
images = tf.expand_dims(inputs, 0)
314+
images = tf.cast(images, tf.float32) / 128. - 1
315+
images.set_shape((None, None, None, 3))
316+
images = tf.image.resize_images(images, (224, 224))
317+
318+
with slim.arg_scope(mobilenet_arg_scope(is_training=False)):
319+
logits = mobilenetv2(images)
320+
321+
# Restore using exponential moving average since it produces (1.5-2%) higher
322+
# accuracy
323+
ema = tf.train.ExponentialMovingAverage(0.999)
324+
vars = ema.variables_to_restore()
325+
326+
saver = tf.train.Saver(vars)
327+
328+
print(len(tf.global_variables()))
329+
for var in tf.global_variables():
330+
print(var)
331+
checkpoint_path = r"C:\Users\xiaoh\Desktop\temp\mobilenet_v2_1.0_224\mobilenet_v2_1.0_224.ckpt"
332+
image_file = "C:/Users/xiaoh/Desktop/temp/pandas.jpg"
333+
with tf.Session() as sess:
334+
saver.restore(sess, checkpoint_path)
335+
336+
img = cv2.imread(image_file)
337+
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
338+
339+
print(np.argmax(sess.run(logits, feed_dict={inputs: img})[0]))
340+
341+
342+
343+
344+
345+
346+
347+
348+
349+

0 commit comments

Comments
 (0)