Skip to content

Commit 7a2f1a3

Browse files
aquariusjayYknZhu
authored andcommitted
PiperOrigin-RevId: 190154671
1 parent 67d65c6 commit 7a2f1a3

16 files changed

+660
-403
lines changed

research/deeplab/README.md

+16-4
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@ features:
2828
convolution to trade-off precision and runtime.
2929

3030
If you find the code useful for your research, please consider citing our latest
31-
work:
31+
works:
32+
33+
* DeepLabv3+:
3234

3335
```
3436
@article{deeplabv3plus2018,
@@ -39,11 +41,21 @@ work:
3941
}
4042
```
4143

44+
* MobileNetv2:
45+
46+
```
47+
@inproceedings{mobilenetv22018,
48+
title={Inverted Residuals and Linear Bottlenecks: Mobile Networks for Classification, Detection and Segmentation},
49+
author={Mark Sandler and Andrew Howard and Menglong Zhu and Andrey Zhmoginov and Liang-Chieh Chen},
50+
booktitle={CVPR},
51+
year={2018}
52+
}
53+
```
54+
4255
In the current implementation, we support adopting the following network
4356
backbones:
4457

45-
1. MobileNetv2 [8]: A fast network structure designed for mobile devices. **We
46-
will provide MobileNetv2 support in the next update. Please stay tuned.**
58+
1. MobileNetv2 [8]: A fast network structure designed for mobile devices.
4759

4860
2. Xception [9, 10]: A powerful network structure intended for server-side
4961
deployment.
@@ -71,7 +83,7 @@ Some segmentation results on Flickr images:
7183

7284
Demo:
7385

74-
* <a href='deeplab_demo.ipynb'>Jupyter notebook for off-the-shelf inference.</a><br>
86+
* <a href='https://colab.sandbox.google.com/github/tensorflow/models/blob/master/research/deeplab/deeplab_demo.ipynb'>Colab notebook for off-the-shelf inference.</a><br>
7587

7688
Running:
7789

research/deeplab/common.py

+11-6
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,11 @@
3939
'The kernel size for the convolutional kernel that '
4040
'generates logits.')
4141

42-
# We will support `mobilenet_v2' in the coming update. When using
43-
# 'xception_65', we set atrous_rates = [6, 12, 18] (output stride 16) and
44-
# decoder_output_stride = 4.
45-
flags.DEFINE_enum('model_variant', 'xception_65', ['xception_65'],
46-
'DeepLab model variants.')
42+
# When using 'mobilent_v2', we set atrous_rates = decoder_output_stride = None.
43+
# When using 'xception_65', we set atrous_rates = [6, 12, 18] (output stride 16)
44+
# and decoder_output_stride = 4.
45+
flags.DEFINE_enum('model_variant', 'mobilenet_v2',
46+
['xception_65', 'mobilenet_v2'], 'DeepLab model variant.')
4747

4848
flags.DEFINE_multi_float('image_pyramid', None,
4949
'Input scales for multi-scale feature extraction.')
@@ -60,7 +60,12 @@
6060
flags.DEFINE_multi_integer('multi_grid', None,
6161
'Employ a hierarchy of atrous rates for ResNet.')
6262

63-
# For `xception_65`, use decoder_output_stride = 4.
63+
flags.DEFINE_float('depth_multiplier', 1.0,
64+
'Multiplier for the depth (number of channels) for all '
65+
'convolution ops used in MobileNet.')
66+
67+
# For `xception_65`, use decoder_output_stride = 4. For `mobilenet_v2`, use
68+
# decoder_output_stride = None.
6469
flags.DEFINE_integer('decoder_output_stride', None,
6570
'The ratio of input to output spatial resolution when '
6671
'employing decoder to refine segmentation results.')

research/deeplab/core/feature_extractor.py

+66-1
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,62 @@
1818
import tensorflow as tf
1919

2020
from deeplab.core import xception
21+
from nets.mobilenet import mobilenet as mobilenet_lib
22+
from nets.mobilenet import mobilenet_v2
2123

2224

2325
slim = tf.contrib.slim
2426

27+
# Default end point for MobileNetv2.
28+
_MOBILENET_V2_FINAL_ENDPOINT = 'layer_18'
29+
30+
31+
def _mobilenet_v2(net,
32+
depth_multiplier,
33+
output_stride,
34+
reuse=None,
35+
scope=None,
36+
final_endpoint=None):
37+
"""Auxiliary function to add support for 'reuse' to mobilenet_v2.
38+
39+
Args:
40+
net: Input tensor of shape [batch_size, height, width, channels].
41+
depth_multiplier: Float multiplier for the depth (number of channels)
42+
for all convolution ops. The value must be greater than zero. Typical
43+
usage will be to set this value in (0, 1) to reduce the number of
44+
parameters or computation cost of the model.
45+
output_stride: An integer that specifies the requested ratio of input to
46+
output spatial resolution. If not None, then we invoke atrous convolution
47+
if necessary to prevent the network from reducing the spatial resolution
48+
of the activation maps. Allowed values are 8 (accurate fully convolutional
49+
mode), 16 (fast fully convolutional mode), 32 (classification mode).
50+
reuse: Reuse model variables.
51+
scope: Optional variable scope.
52+
final_endpoint: The endpoint to construct the network up to.
53+
54+
Returns:
55+
Features extracted by MobileNetv2.
56+
"""
57+
with tf.variable_scope(
58+
scope, 'MobilenetV2', [net], reuse=reuse) as scope:
59+
return mobilenet_lib.mobilenet_base(
60+
net,
61+
conv_defs=mobilenet_v2.V2_DEF,
62+
multiplier=depth_multiplier,
63+
final_endpoint=final_endpoint or _MOBILENET_V2_FINAL_ENDPOINT,
64+
output_stride=output_stride,
65+
scope=scope)
66+
2567

2668
# A map from network name to network function.
2769
networks_map = {
70+
'mobilenet_v2': _mobilenet_v2,
2871
'xception_65': xception.xception_65,
2972
}
3073

3174
# A map from network name to network arg scope.
3275
arg_scopes_map = {
76+
'mobilenet_v2': mobilenet_v2.training_scope,
3377
'xception_65': xception.xception_arg_scope,
3478
}
3579

@@ -38,6 +82,10 @@
3882

3983
# A dictionary from network name to a map of end point features.
4084
networks_to_feature_maps = {
85+
'mobilenet_v2': {
86+
# The provided checkpoint does not include decoder module.
87+
DECODER_END_POINTS: None,
88+
},
4189
'xception_65': {
4290
DECODER_END_POINTS: [
4391
'entry_flow/block2/unit_1/xception_module/'
@@ -49,6 +97,7 @@
4997
# A map from feature extractor name to the network name scope used in the
5098
# ImageNet pretrained versions of these models.
5199
name_scope = {
100+
'mobilenet_v2': 'MobilenetV2',
52101
'xception_65': 'xception_65',
53102
}
54103

@@ -68,6 +117,7 @@ def _preprocess_zero_mean_unit_range(inputs):
68117

69118

70119
_PREPROCESS_FN = {
120+
'mobilenet_v2': _preprocess_zero_mean_unit_range,
71121
'xception_65': _preprocess_zero_mean_unit_range,
72122
}
73123

@@ -99,6 +149,8 @@ def mean_pixel(model_variant=None):
99149
def extract_features(images,
100150
output_stride=8,
101151
multi_grid=None,
152+
depth_multiplier=1.0,
153+
final_endpoint=None,
102154
model_variant=None,
103155
weight_decay=0.0001,
104156
reuse=None,
@@ -114,6 +166,9 @@ def extract_features(images,
114166
images: A tensor of size [batch, height, width, channels].
115167
output_stride: The ratio of input to output spatial resolution.
116168
multi_grid: Employ a hierarchy of different atrous rates within network.
169+
depth_multiplier: Float multiplier for the depth (number of channels)
170+
for all convolution ops used in MobileNet.
171+
final_endpoint: The MobileNet endpoint to construct the network up to.
117172
model_variant: Model variant for feature extraction.
118173
weight_decay: The weight decay for model variables.
119174
reuse: Reuse the model variables or not.
@@ -159,7 +214,17 @@ def extract_features(images,
159214
reuse=reuse,
160215
scope=name_scope[model_variant])
161216
elif 'mobilenet' in model_variant:
162-
raise ValueError('MobileNetv2 support is coming soon.')
217+
arg_scope = arg_scopes_map[model_variant](
218+
is_training=(is_training and fine_tune_batch_norm),
219+
weight_decay=weight_decay)
220+
features, end_points = get_network(
221+
model_variant, preprocess_images, arg_scope)(
222+
inputs=images,
223+
depth_multiplier=depth_multiplier,
224+
output_stride=output_stride,
225+
reuse=reuse,
226+
scope=name_scope[model_variant],
227+
final_endpoint=final_endpoint)
163228
else:
164229
raise ValueError('Unknown model variant %s.' % model_variant)
165230

research/deeplab/datasets/convert_cityscapes.sh

+13-10
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,21 @@
1414
# limitations under the License.
1515
# ==============================================================================
1616
#
17-
# Script to preprocess the Cityscapes dataset. Note (1) the users should register
18-
# the Cityscapes dataset website: https://www.cityscapes-dataset.com/downloads/ to
19-
# download the dataset, and (2) the users should run the script provided by Cityscapes
20-
# `preparation/createTrainIdLabelImgs.py` to generate the training groundtruth.
17+
# Script to preprocess the Cityscapes dataset. Note (1) the users should
18+
# register the Cityscapes dataset website at
19+
# https://www.cityscapes-dataset.com/downloads/ to download the dataset,
20+
# and (2) the users should download the utility scripts provided by
21+
# Cityscapes at https://github.com/mcordts/cityscapesScripts.
2122
#
2223
# Usage:
2324
# bash ./preprocess_cityscapes.sh
2425
#
2526
# The folder structure is assumed to be:
26-
# + data
27+
# + datasets
2728
# - build_cityscapes_data.py
29+
# - convert_cityscapes.sh
2830
# + cityscapes
29-
# + cityscapesscripts
31+
# + cityscapesscripts (downloaded scripts)
3032
# + gtFine
3133
# + leftImg8bit
3234
#
@@ -37,17 +39,18 @@ set -e
3739
CURRENT_DIR=$(pwd)
3840
WORK_DIR="."
3941

40-
cd "${CURRENT_DIR}"
41-
42-
# Root path for PASCAL VOC 2012 dataset.
42+
# Root path for Cityscapes dataset.
4343
CITYSCAPES_ROOT="${WORK_DIR}/cityscapes"
4444

45+
# Create training labels.
46+
python "${CITYSCAPES_ROOT}/cityscapesscripts/preparation/createTrainIdLabelImgs.py"
47+
4548
# Build TFRecords of the dataset.
4649
# First, create output directory for storing TFRecords.
4750
OUTPUT_DIR="${CITYSCAPES_ROOT}/tfrecord"
4851
mkdir -p "${OUTPUT_DIR}"
4952

50-
BUILD_SCRIPT="${WORK_DIR}/build_cityscapes_data.py"
53+
BUILD_SCRIPT="${CURRENT_DIR}/build_cityscapes_data.py"
5154

5255
echo "Converting Cityscapes dataset..."
5356
python "${BUILD_SCRIPT}" \

research/deeplab/datasets/download_and_convert_voc2012.sh

+6-5
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,16 @@
2020
# bash ./download_and_preprocess_voc2012.sh
2121
#
2222
# The folder structure is assumed to be:
23-
# + data
23+
# + datasets
2424
# - build_data.py
2525
# - build_voc2012_data.py
2626
# - download_and_preprocess_voc2012.sh
2727
# - remove_gt_colormap.py
28-
# + VOCdevkit
29-
# + VOC2012
30-
# + JPEGImages
31-
# + SegmentationClass
28+
# + pascal_voc_seg
29+
# + VOCdevkit
30+
# + VOC2012
31+
# + JPEGImages
32+
# + SegmentationClass
3233
#
3334

3435
# Exit immediately if a command exits with a non-zero status.

0 commit comments

Comments
 (0)