|
| 1 | +""" |
| 2 | +2018-11-24 |
| 3 | +""" |
| 4 | + |
| 5 | +from collections import namedtuple |
| 6 | +import copy |
| 7 | + |
| 8 | +import tensorflow as tf |
| 9 | + |
| 10 | +slim = tf.contrib.slim |
| 11 | + |
| 12 | +def _make_divisible(v, divisor, min_value=None): |
| 13 | + """make `v` is divided exactly by `divisor`, but keep the min_value""" |
| 14 | + if min_value is None: |
| 15 | + min_value = divisor |
| 16 | + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) |
| 17 | + # Make sure that round down does not go down by more than 10%. |
| 18 | + if new_v < 0.9 * v: |
| 19 | + new_v += divisor |
| 20 | + return new_v |
| 21 | + |
| 22 | + |
| 23 | +@slim.add_arg_scope |
| 24 | +def _depth_multiplier_func(params, |
| 25 | + multiplier, |
| 26 | + divisible_by=8, |
| 27 | + min_depth=8): |
| 28 | + """get the new channles""" |
| 29 | + if 'num_outputs' not in params: |
| 30 | + return |
| 31 | + d = params['num_outputs'] |
| 32 | + params['num_outputs'] = _make_divisible(d * multiplier, divisible_by, |
| 33 | + min_depth) |
| 34 | + |
| 35 | +def _fixed_padding(inputs, kernel_size, rate=1): |
| 36 | + """Pads the input along the spatial dimensions independently of input size. |
| 37 | + Pads the input such that if it was used in a convolution with 'VALID' padding, |
| 38 | + the output would have the same dimensions as if the unpadded input was used |
| 39 | + in a convolution with 'SAME' padding. |
| 40 | + Args: |
| 41 | + inputs: A tensor of size [batch, height_in, width_in, channels]. |
| 42 | + kernel_size: The kernel to be used in the conv2d or max_pool2d operation. |
| 43 | + rate: An integer, rate for atrous convolution. |
| 44 | + Returns: |
| 45 | + output: A tensor of size [batch, height_out, width_out, channels] with the |
| 46 | + input, either intact (if kernel_size == 1) or padded (if kernel_size > 1). |
| 47 | + """ |
| 48 | + kernel_size_effective = [kernel_size[0] + (kernel_size[0] - 1) * (rate - 1), |
| 49 | + kernel_size[0] + (kernel_size[0] - 1) * (rate - 1)] |
| 50 | + pad_total = [kernel_size_effective[0] - 1, kernel_size_effective[1] - 1] |
| 51 | + pad_beg = [pad_total[0] // 2, pad_total[1] // 2] |
| 52 | + pad_end = [pad_total[0] - pad_beg[0], pad_total[1] - pad_beg[1]] |
| 53 | + padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg[0], pad_end[0]], |
| 54 | + [pad_beg[1], pad_end[1]], [0, 0]]) |
| 55 | + return padded_inputs |
| 56 | + |
| 57 | + |
| 58 | +@slim.add_arg_scope |
| 59 | +def expanded_conv(x, |
| 60 | + num_outputs, |
| 61 | + expansion=6, |
| 62 | + stride=1, |
| 63 | + rate=1, |
| 64 | + normalizer_fn=slim.batch_norm, |
| 65 | + project_activation_fn=tf.identity, |
| 66 | + padding="SAME", |
| 67 | + scope=None): |
| 68 | + """The expand conv op in MobileNetv2 |
| 69 | + 1x1 conv -> depthwise 3x3 conv -> 1x1 linear conv |
| 70 | + """ |
| 71 | + with tf.variable_scope(scope, default_name="expanded_conv") as s, \ |
| 72 | + tf.name_scope(s.original_name_scope): |
| 73 | + prev_depth = x.get_shape().as_list()[3] |
| 74 | + # the filters of expanded conv |
| 75 | + inner_size = prev_depth * expansion |
| 76 | + net = x |
| 77 | + # only inner_size > prev_depth, use expanded conv |
| 78 | + if inner_size > prev_depth: |
| 79 | + net = slim.conv2d(net, inner_size, 1, normalizer_fn=normalizer_fn, |
| 80 | + scope="expand") |
| 81 | + # depthwise conv |
| 82 | + net = slim.separable_conv2d(net, num_outputs=None, kernel_size=3, |
| 83 | + depth_multiplier=1, stride=stride, |
| 84 | + rate=rate, normalizer_fn=normalizer_fn, |
| 85 | + padding=padding, scope="depthwise") |
| 86 | + # projection |
| 87 | + net = slim.conv2d(net, num_outputs, 1, normalizer_fn=normalizer_fn, |
| 88 | + activation_fn=project_activation_fn, scope="project") |
| 89 | + |
| 90 | + # residual connection |
| 91 | + if stride == 1 and net.get_shape().as_list()[-1] == prev_depth: |
| 92 | + net += x |
| 93 | + |
| 94 | + return net |
| 95 | + |
| 96 | +def global_pool(x, pool_op=tf.nn.avg_pool): |
| 97 | + """Applies avg pool to produce 1x1 output. |
| 98 | + NOTE: This function is funcitonally equivalenet to reduce_mean, but it has |
| 99 | + baked in average pool which has better support across hardware. |
| 100 | + Args: |
| 101 | + input_tensor: input tensor |
| 102 | + pool_op: pooling op (avg pool is default) |
| 103 | + Returns: |
| 104 | + a tensor batch_size x 1 x 1 x depth. |
| 105 | + """ |
| 106 | + shape = x.get_shape().as_list() |
| 107 | + if shape[1] is None or shape[2] is None: |
| 108 | + kernel_size = tf.convert_to_tensor( |
| 109 | + [1, tf.shape(x)[1], tf.shape(x)[2], 1]) |
| 110 | + else: |
| 111 | + kernel_size = [1, shape[1], shape[2], 1] |
| 112 | + output = pool_op(x, ksize=kernel_size, strides=[1, 1, 1, 1], padding='VALID') |
| 113 | + # Recover output shape, for unknown shape. |
| 114 | + output.set_shape([None, 1, 1, None]) |
| 115 | + return output |
| 116 | + |
| 117 | + |
| 118 | +_Op = namedtuple("Op", ['op', 'params', 'multiplier_func']) |
| 119 | + |
| 120 | +def op(op_func, **params): |
| 121 | + return _Op(op=op_func, params=params, |
| 122 | + multiplier_func=_depth_multiplier_func) |
| 123 | + |
| 124 | + |
| 125 | +CONV_DEF = [op(slim.conv2d, num_outputs=32, stride=2, kernel_size=3), |
| 126 | + op(expanded_conv, num_outputs=16, expansion=1), |
| 127 | + op(expanded_conv, num_outputs=24, stride=2), |
| 128 | + op(expanded_conv, num_outputs=24, stride=1), |
| 129 | + op(expanded_conv, num_outputs=32, stride=2), |
| 130 | + op(expanded_conv, num_outputs=32, stride=1), |
| 131 | + op(expanded_conv, num_outputs=32, stride=1), |
| 132 | + op(expanded_conv, num_outputs=64, stride=2), |
| 133 | + op(expanded_conv, num_outputs=64, stride=1), |
| 134 | + op(expanded_conv, num_outputs=64, stride=1), |
| 135 | + op(expanded_conv, num_outputs=64, stride=1), |
| 136 | + op(expanded_conv, num_outputs=96, stride=1), |
| 137 | + op(expanded_conv, num_outputs=96, stride=1), |
| 138 | + op(expanded_conv, num_outputs=96, stride=1), |
| 139 | + op(expanded_conv, num_outputs=160, stride=2), |
| 140 | + op(expanded_conv, num_outputs=160, stride=1), |
| 141 | + op(expanded_conv, num_outputs=160, stride=1), |
| 142 | + op(expanded_conv, num_outputs=320, stride=1), |
| 143 | + op(slim.conv2d, num_outputs=1280, stride=1, kernel_size=1), |
| 144 | + ] |
| 145 | + |
| 146 | + |
| 147 | +def mobilenet_arg_scope(is_training=True, |
| 148 | + weight_decay=0.00004, |
| 149 | + stddev=0.09, |
| 150 | + dropout_keep_prob=0.8, |
| 151 | + bn_decay=0.997): |
| 152 | + """Defines Mobilenet default arg scope. |
| 153 | + Usage: |
| 154 | + with tf.contrib.slim.arg_scope(mobilenet.training_scope()): |
| 155 | + logits, endpoints = mobilenet_v2.mobilenet(input_tensor) |
| 156 | + # the network created will be trainble with dropout/batch norm |
| 157 | + # initialized appropriately. |
| 158 | + Args: |
| 159 | + is_training: if set to False this will ensure that all customizations are |
| 160 | + set to non-training mode. This might be helpful for code that is reused |
| 161 | + across both training/evaluation, but most of the time training_scope with |
| 162 | + value False is not needed. If this is set to None, the parameters is not |
| 163 | + added to the batch_norm arg_scope. |
| 164 | + weight_decay: The weight decay to use for regularizing the model. |
| 165 | + stddev: Standard deviation for initialization, if negative uses xavier. |
| 166 | + dropout_keep_prob: dropout keep probability (not set if equals to None). |
| 167 | + bn_decay: decay for the batch norm moving averages (not set if equals to |
| 168 | + None). |
| 169 | + Returns: |
| 170 | + An argument scope to use via arg_scope. |
| 171 | + """ |
| 172 | + # Note: do not introduce parameters that would change the inference |
| 173 | + # model here (for example whether to use bias), modify conv_def instead. |
| 174 | + batch_norm_params = { |
| 175 | + 'center': True, |
| 176 | + 'scale': True, |
| 177 | + 'decay': bn_decay, |
| 178 | + 'is_training': is_training |
| 179 | + } |
| 180 | + if stddev < 0: |
| 181 | + weight_intitializer = slim.initializers.xavier_initializer() |
| 182 | + else: |
| 183 | + weight_intitializer = tf.truncated_normal_initializer(stddev=stddev) |
| 184 | + |
| 185 | + # Set weight_decay for weights in Conv and FC layers. |
| 186 | + with slim.arg_scope( |
| 187 | + [slim.conv2d, slim.fully_connected, slim.separable_conv2d], |
| 188 | + weights_initializer=weight_intitializer, |
| 189 | + normalizer_fn=slim.batch_norm, |
| 190 | + activation_fn=tf.nn.relu6), \ |
| 191 | + slim.arg_scope([slim.batch_norm], **batch_norm_params), \ |
| 192 | + slim.arg_scope([slim.dropout], is_training=is_training, |
| 193 | + keep_prob=dropout_keep_prob), \ |
| 194 | + slim.arg_scope([slim.conv2d, slim.separable_conv2d], |
| 195 | + biases_initializer=None, |
| 196 | + padding="SAME"), \ |
| 197 | + slim.arg_scope([slim.conv2d], |
| 198 | + weights_regularizer=slim.l2_regularizer(weight_decay)), \ |
| 199 | + slim.arg_scope([slim.separable_conv2d], weights_regularizer=None) as s: |
| 200 | + return s |
| 201 | + |
| 202 | + |
| 203 | +def mobilenetv2(x, |
| 204 | + num_classes=1001, |
| 205 | + depth_multiplier=1.0, |
| 206 | + scope='MobilenetV2', |
| 207 | + finegrain_classification_mode=False, |
| 208 | + min_depth=8, |
| 209 | + divisible_by=8, |
| 210 | + output_stride=None, |
| 211 | + ): |
| 212 | + """Mobilenet v2 |
| 213 | + Args: |
| 214 | + x: The input tensor |
| 215 | + num_classes: number of classes |
| 216 | + depth_multiplier: The multiplier applied to scale number of |
| 217 | + channels in each layer. Note: this is called depth multiplier in the |
| 218 | + paper but the name is kept for consistency with slim's model builder. |
| 219 | + scope: Scope of the operator |
| 220 | + finegrain_classification_mode: When set to True, the model |
| 221 | + will keep the last layer large even for small multipliers. |
| 222 | + The paper suggests that it improves performance for ImageNet-type of problems. |
| 223 | + min_depth: If provided, will ensure that all layers will have that |
| 224 | + many channels after application of depth multiplier. |
| 225 | + divisible_by: If provided will ensure that all layers # channels |
| 226 | + will be divisible by this number. |
| 227 | + """ |
| 228 | + conv_defs = CONV_DEF |
| 229 | + |
| 230 | + # keep the last conv layer very larger channel |
| 231 | + if finegrain_classification_mode: |
| 232 | + conv_defs = copy.deepcopy(conv_defs) |
| 233 | + if depth_multiplier < 1: |
| 234 | + conv_defs[-1].params['num_outputs'] /= depth_multiplier |
| 235 | + |
| 236 | + depth_args = {} |
| 237 | + # NB: do not set depth_args unless they are provided to avoid overriding |
| 238 | + # whatever default depth_multiplier might have thanks to arg_scope. |
| 239 | + if min_depth is not None: |
| 240 | + depth_args['min_depth'] = min_depth |
| 241 | + if divisible_by is not None: |
| 242 | + depth_args['divisible_by'] = divisible_by |
| 243 | + |
| 244 | + with slim.arg_scope([_depth_multiplier_func], **depth_args): |
| 245 | + with tf.variable_scope(scope, default_name='Mobilenet'): |
| 246 | + # The current_stride variable keeps track of the output stride of the |
| 247 | + # activations, i.e., the running product of convolution strides up to the |
| 248 | + # current network layer. This allows us to invoke atrous convolution |
| 249 | + # whenever applying the next convolution would result in the activations |
| 250 | + # having output stride larger than the target output_stride. |
| 251 | + current_stride = 1 |
| 252 | + |
| 253 | + # The atrous convolution rate parameter. |
| 254 | + rate = 1 |
| 255 | + |
| 256 | + net = x |
| 257 | + # Insert default parameters before the base scope which includes |
| 258 | + # any custom overrides set in mobilenet. |
| 259 | + end_points = {} |
| 260 | + scopes = {} |
| 261 | + for i, opdef in enumerate(conv_defs): |
| 262 | + params = dict(opdef.params) |
| 263 | + opdef.multiplier_func(params, depth_multiplier) |
| 264 | + stride = params.get('stride', 1) |
| 265 | + if output_stride is not None and current_stride == output_stride: |
| 266 | + # If we have reached the target output_stride, then we need to employ |
| 267 | + # atrous convolution with stride=1 and multiply the atrous rate by the |
| 268 | + # current unit's stride for use in subsequent layers. |
| 269 | + layer_stride = 1 |
| 270 | + layer_rate = rate |
| 271 | + rate *= stride |
| 272 | + else: |
| 273 | + layer_stride = stride |
| 274 | + layer_rate = 1 |
| 275 | + current_stride *= stride |
| 276 | + # Update params. |
| 277 | + params['stride'] = layer_stride |
| 278 | + # Only insert rate to params if rate > 1. |
| 279 | + if layer_rate > 1: |
| 280 | + params['rate'] = layer_rate |
| 281 | + |
| 282 | + try: |
| 283 | + net = opdef.op(net, **params) |
| 284 | + except Exception: |
| 285 | + raise ValueError('Failed to create op %i: %r params: %r' % (i, opdef, params)) |
| 286 | + |
| 287 | + with tf.variable_scope('Logits'): |
| 288 | + net = global_pool(net) |
| 289 | + end_points['global_pool'] = net |
| 290 | + if not num_classes: |
| 291 | + return net, end_points |
| 292 | + net = slim.dropout(net, scope='Dropout') |
| 293 | + # 1 x 1 x num_classes |
| 294 | + # Note: legacy scope name. |
| 295 | + logits = slim.conv2d( |
| 296 | + net, |
| 297 | + num_classes, [1, 1], |
| 298 | + activation_fn=None, |
| 299 | + normalizer_fn=None, |
| 300 | + biases_initializer=tf.zeros_initializer(), |
| 301 | + scope='Conv2d_1c_1x1') |
| 302 | + |
| 303 | + logits = tf.squeeze(logits, [1, 2]) |
| 304 | + |
| 305 | + return logits |
| 306 | + |
| 307 | + |
| 308 | +if __name__ == "__main__": |
| 309 | + import cv2 |
| 310 | + import numpy as np |
| 311 | + |
| 312 | + inputs = tf.placeholder(tf.uint8, [None, None, 3]) |
| 313 | + images = tf.expand_dims(inputs, 0) |
| 314 | + images = tf.cast(images, tf.float32) / 128. - 1 |
| 315 | + images.set_shape((None, None, None, 3)) |
| 316 | + images = tf.image.resize_images(images, (224, 224)) |
| 317 | + |
| 318 | + with slim.arg_scope(mobilenet_arg_scope(is_training=False)): |
| 319 | + logits = mobilenetv2(images) |
| 320 | + |
| 321 | + # Restore using exponential moving average since it produces (1.5-2%) higher |
| 322 | + # accuracy |
| 323 | + ema = tf.train.ExponentialMovingAverage(0.999) |
| 324 | + vars = ema.variables_to_restore() |
| 325 | + |
| 326 | + saver = tf.train.Saver(vars) |
| 327 | + |
| 328 | + print(len(tf.global_variables())) |
| 329 | + for var in tf.global_variables(): |
| 330 | + print(var) |
| 331 | + checkpoint_path = r"C:\Users\xiaoh\Desktop\temp\mobilenet_v2_1.0_224\mobilenet_v2_1.0_224.ckpt" |
| 332 | + image_file = "C:/Users/xiaoh/Desktop/temp/pandas.jpg" |
| 333 | + with tf.Session() as sess: |
| 334 | + saver.restore(sess, checkpoint_path) |
| 335 | + |
| 336 | + img = cv2.imread(image_file) |
| 337 | + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) |
| 338 | + |
| 339 | + print(np.argmax(sess.run(logits, feed_dict={inputs: img})[0])) |
| 340 | + |
| 341 | + |
| 342 | + |
| 343 | + |
| 344 | + |
| 345 | + |
| 346 | + |
| 347 | + |
| 348 | + |
| 349 | + |
0 commit comments