Skip to content

Commit a17394c

Browse files
aquariusjayhuihui-personal
authored andcommitted
PiperOrigin-RevId: 205684720
1 parent 7922c9e commit a17394c

8 files changed

+102
-38
lines changed

research/deeplab/README.md

+8-7
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,10 @@ works:
3333
* DeepLabv3+:
3434

3535
```
36-
@article{deeplabv3plus2018,
36+
@inproceedings{deeplabv3plus2018,
3737
title={Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation},
3838
author={Liang-Chieh Chen and Yukun Zhu and George Papandreou and Florian Schroff and Hartwig Adam},
39-
journal={arXiv:1802.02611},
39+
booktitle={ECCV},
4040
year={2018}
4141
}
4242
```
@@ -45,7 +45,7 @@ works:
4545

4646
```
4747
@inproceedings{mobilenetv22018,
48-
title={Inverted Residuals and Linear Bottlenecks: Mobile Networks for Classification, Detection and Segmentation},
48+
title={MobileNetV2: Inverted Residuals and Linear Bottlenecks},
4949
author={Mark Sandler and Andrew Howard and Menglong Zhu and Andrey Zhmoginov and Liang-Chieh Chen},
5050
booktitle={CVPR},
5151
year={2018}
@@ -78,6 +78,7 @@ Some segmentation results on Flickr images:
7878
* Liang-Chieh Chen, github: [aquariusjay](https://github.com/aquariusjay)
7979
* YuKun Zhu, github: [yknzhu](https://github.com/YknZhu)
8080
* George Papandreou, github: [gpapan](https://github.com/gpapan)
81+
* Hui Hui, github: [huihui-personal](https://github.com/huihui-personal)
8182

8283
## Tables of Contents
8384

@@ -154,8 +155,8 @@ and Cityscapes.
154155
[[link]](http://arxiv.org/abs/1706.05587). arXiv: 1706.05587, 2017.
155156

156157
4. **Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation**<br />
157-
Liang-Chieh Chen, Yukun Zhu, George Papandreou, Florian Schroff, Hartwig Adam. arXiv: 1802.02611.<br />
158-
[[link]](https://arxiv.org/abs/1802.02611). arXiv: 1802.02611, 2018.
158+
Liang-Chieh Chen, Yukun Zhu, George Papandreou, Florian Schroff, Hartwig Adam.<br />
159+
[[link]](https://arxiv.org/abs/1802.02611). In ECCV, 2018.
159160

160161
5. **ParseNet: Looking Wider to See Better**<br />
161162
Wei Liu, Andrew Rabinovich, Alexander C Berg<br />
@@ -169,9 +170,9 @@ and Cityscapes.
169170
Sergey Ioffe, Christian Szegedy <br />
170171
[[link]](https://arxiv.org/abs/1502.03167). In ICML, 2015.
171172

172-
8. **Inverted Residuals and Linear Bottlenecks: Mobile Networks for Classification, Detection and Segmentation**<br />
173+
8. **MobileNetV2: Inverted Residuals and Linear Bottlenecks**<br />
173174
Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen<br />
174-
[[link]](https://arxiv.org/abs/1801.04381). arXiv:1801.04381, 2018.
175+
[[link]](https://arxiv.org/abs/1801.04381). In CVPR, 2018.
175176

176177
9. **Xception: Deep Learning with Depthwise Separable Convolutions**<br />
177178
François Chollet<br />

research/deeplab/common.py

+18-4
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
Common flags from train/eval/vis/export_model.py are collected in this script.
1818
"""
1919
import collections
20+
import copy
2021

2122
import tensorflow as tf
2223

@@ -51,6 +52,12 @@
5152
flags.DEFINE_boolean('add_image_level_feature', True,
5253
'Add image level feature.')
5354

55+
flags.DEFINE_multi_integer(
56+
'image_pooling_crop_size', None,
57+
'Image pooling crop size [height, width] used in the ASPP module. When '
58+
'value is None, the model performs image pooling with "crop_size". This'
59+
'flag is useful when one likes to use different image pooling sizes.')
60+
5461
flags.DEFINE_boolean('aspp_with_batch_norm', True,
5562
'Use batch norm parameters for ASPP or not.')
5663

@@ -106,6 +113,7 @@ class ModelOptions(
106113
'output_stride',
107114
'merge_method',
108115
'add_image_level_feature',
116+
'image_pooling_crop_size',
109117
'aspp_with_batch_norm',
110118
'aspp_with_separable_conv',
111119
'multi_grid',
@@ -140,7 +148,13 @@ def __new__(cls,
140148
return super(ModelOptions, cls).__new__(
141149
cls, outputs_to_num_classes, crop_size, atrous_rates, output_stride,
142150
FLAGS.merge_method, FLAGS.add_image_level_feature,
143-
FLAGS.aspp_with_batch_norm, FLAGS.aspp_with_separable_conv,
144-
FLAGS.multi_grid, FLAGS.decoder_output_stride,
145-
FLAGS.decoder_use_separable_conv, FLAGS.logits_kernel_size,
146-
FLAGS.model_variant, FLAGS.depth_multiplier)
151+
FLAGS.image_pooling_crop_size, FLAGS.aspp_with_batch_norm,
152+
FLAGS.aspp_with_separable_conv, FLAGS.multi_grid,
153+
FLAGS.decoder_output_stride, FLAGS.decoder_use_separable_conv,
154+
FLAGS.logits_kernel_size, FLAGS.model_variant, FLAGS.depth_multiplier)
155+
156+
def __deepcopy__(self, memo):
157+
return ModelOptions(copy.deepcopy(self.outputs_to_num_classes),
158+
self.crop_size,
159+
self.atrous_rates,
160+
self.output_stride)

research/deeplab/common_test.py

+18
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
# ==============================================================================
1515

1616
"""Tests for common.py."""
17+
import copy
1718

1819
import tensorflow as tf
1920

@@ -29,6 +30,23 @@ def testOutputsToNumClasses(self):
2930
self.assertEqual(model_options.outputs_to_num_classes[common.OUTPUT_TYPE],
3031
num_classes)
3132

33+
def testDeepcopy(self):
34+
num_classes = 21
35+
model_options = common.ModelOptions(
36+
outputs_to_num_classes={common.OUTPUT_TYPE: num_classes})
37+
model_options_new = copy.deepcopy(model_options)
38+
self.assertEqual((model_options_new.
39+
outputs_to_num_classes[common.OUTPUT_TYPE]),
40+
num_classes)
41+
42+
num_classes_new = 22
43+
model_options_new.outputs_to_num_classes[common.OUTPUT_TYPE] = (
44+
num_classes_new)
45+
self.assertEqual(model_options.outputs_to_num_classes[common.OUTPUT_TYPE],
46+
num_classes)
47+
self.assertEqual((model_options_new.
48+
outputs_to_num_classes[common.OUTPUT_TYPE]),
49+
num_classes_new)
3250

3351
if __name__ == '__main__':
3452
tf.test.main()

research/deeplab/deeplab_demo.ipynb

+2-2
Original file line numberDiff line numberDiff line change
@@ -294,13 +294,13 @@
294294
" try:\n",
295295
" f = urllib.request.urlopen(url)\n",
296296
" jpeg_str = f.read()\n",
297-
" original_im = Image.open(BytesIO(jpeg_str))\n",
297+
" orignal_im = Image.open(BytesIO(jpeg_str))\n",
298298
" except IOError:\n",
299299
" print('Cannot retrieve image. Please check url: ' + url)\n",
300300
" return\n",
301301
"\n",
302302
" print('running deeplab on image %s...' % url)\n",
303-
" resized_im, seg_map = MODEL.run(original_im)\n",
303+
" resized_im, seg_map = MODEL.run(orignal_im)\n",
304304
"\n",
305305
" vis_segmentation(resized_im, seg_map)\n",
306306
"\n",

research/deeplab/input_preprocess.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -95,11 +95,12 @@ def preprocess_image_and_label(image,
9595
original_image = tf.identity(processed_image)
9696

9797
# Data augmentation by randomly scaling the inputs.
98-
scale = preprocess_utils.get_random_scale(
99-
min_scale_factor, max_scale_factor, scale_factor_step_size)
100-
processed_image, label = preprocess_utils.randomly_scale_image_and_label(
101-
processed_image, label, scale)
102-
processed_image.set_shape([None, None, 3])
98+
if is_training:
99+
scale = preprocess_utils.get_random_scale(
100+
min_scale_factor, max_scale_factor, scale_factor_step_size)
101+
processed_image, label = preprocess_utils.randomly_scale_image_and_label(
102+
processed_image, label, scale)
103+
processed_image.set_shape([None, None, 3])
103104

104105
# Pad image and label to have dimensions >= [crop_height, crop_width]
105106
image_shape = tf.shape(processed_image)

research/deeplab/model.py

+42-16
Original file line numberDiff line numberDiff line change
@@ -237,9 +237,6 @@ def multi_scale_logits(images,
237237
# Setup default values.
238238
if not image_pyramid:
239239
image_pyramid = [1.0]
240-
if model_options.crop_size is None and model_options.add_image_level_feature:
241-
raise ValueError(
242-
'Crop size must be specified for using image-level feature.')
243240
crop_height = (
244241
model_options.crop_size[0]
245242
if model_options.crop_size else tf.shape(images)[1])
@@ -378,18 +375,39 @@ def extract_features(images,
378375
branch_logits = []
379376

380377
if model_options.add_image_level_feature:
381-
pool_height = scale_dimension(model_options.crop_size[0],
382-
1. / model_options.output_stride)
383-
pool_width = scale_dimension(model_options.crop_size[1],
384-
1. / model_options.output_stride)
385-
image_feature = slim.avg_pool2d(
386-
features, [pool_height, pool_width], [pool_height, pool_width],
387-
padding='VALID')
378+
if model_options.crop_size is not None:
379+
image_pooling_crop_size = model_options.image_pooling_crop_size
380+
# If image_pooling_crop_size is not specified, use crop_size.
381+
if image_pooling_crop_size is None:
382+
image_pooling_crop_size = model_options.crop_size
383+
pool_height = scale_dimension(image_pooling_crop_size[0],
384+
1. / model_options.output_stride)
385+
pool_width = scale_dimension(image_pooling_crop_size[1],
386+
1. / model_options.output_stride)
387+
image_feature = slim.avg_pool2d(
388+
features, [pool_height, pool_width], [1, 1], padding='VALID')
389+
resize_height = scale_dimension(model_options.crop_size[0],
390+
1. / model_options.output_stride)
391+
resize_width = scale_dimension(model_options.crop_size[1],
392+
1. / model_options.output_stride)
393+
else:
394+
# If crop_size is None, we simply do global pooling.
395+
pool_height = tf.shape(features)[1]
396+
pool_width = tf.shape(features)[2]
397+
image_feature = tf.reduce_mean(features, axis=[1, 2])[:, tf.newaxis,
398+
tf.newaxis]
399+
resize_height = pool_height
400+
resize_width = pool_width
388401
image_feature = slim.conv2d(
389402
image_feature, depth, 1, scope=IMAGE_POOLING_SCOPE)
390403
image_feature = tf.image.resize_bilinear(
391-
image_feature, [pool_height, pool_width], align_corners=True)
392-
image_feature.set_shape([None, pool_height, pool_width, depth])
404+
image_feature, [resize_height, resize_width], align_corners=True)
405+
# Set shape for resize_height/resize_width if they are not Tensor.
406+
if isinstance(resize_height, tf.Tensor):
407+
resize_height = None
408+
if isinstance(resize_width, tf.Tensor):
409+
resize_width = None
410+
image_feature.set_shape([None, resize_height, resize_width, depth])
393411
branch_logits.append(image_feature)
394412

395413
# Employ a 1x1 convolution.
@@ -453,9 +471,14 @@ def _get_logits(images,
453471
fine_tune_batch_norm=fine_tune_batch_norm)
454472

455473
if model_options.decoder_output_stride is not None:
456-
decoder_height = scale_dimension(model_options.crop_size[0],
474+
if model_options.crop_size is None:
475+
height = tf.shape(images)[1]
476+
width = tf.shape(images)[2]
477+
else:
478+
height, width = model_options.crop_size
479+
decoder_height = scale_dimension(height,
457480
1.0 / model_options.decoder_output_stride)
458-
decoder_width = scale_dimension(model_options.crop_size[1],
481+
decoder_width = scale_dimension(width,
459482
1.0 / model_options.decoder_output_stride)
460483
features = refine_by_decoder(
461484
features,
@@ -557,8 +580,11 @@ def refine_by_decoder(features,
557580
for j, feature in enumerate(decoder_features_list):
558581
decoder_features_list[j] = tf.image.resize_bilinear(
559582
feature, [decoder_height, decoder_width], align_corners=True)
560-
decoder_features_list[j].set_shape(
561-
[None, decoder_height, decoder_width, None])
583+
h = (None if isinstance(decoder_height, tf.Tensor)
584+
else decoder_height)
585+
w = (None if isinstance(decoder_width, tf.Tensor)
586+
else decoder_width)
587+
decoder_features_list[j].set_shape([None, h, w, None])
562588
decoder_depth = 256
563589
if decoder_use_separable_conv:
564590
decoder_features = split_separable_conv2d(

research/deeplab/utils/get_dataset_colormap.py

+2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
2222
* Cityscapes dataset (https://www.cityscapes-dataset.com).
2323
24+
* Mapillary Vistas (https://research.mapillary.com).
25+
2426
* PASCAL VOC 2012 (http://host.robots.ox.ac.uk/pascal/VOC/).
2527
"""
2628

research/deeplab/utils/train_utils.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -112,10 +112,12 @@ def get_model_init_fn(train_logdir,
112112

113113
variables_to_restore = slim.get_variables_to_restore(exclude=exclude_list)
114114

115-
return slim.assign_from_checkpoint_fn(
116-
tf_initial_checkpoint,
117-
variables_to_restore,
118-
ignore_missing_vars=ignore_missing_vars)
115+
if variables_to_restore:
116+
return slim.assign_from_checkpoint_fn(
117+
tf_initial_checkpoint,
118+
variables_to_restore,
119+
ignore_missing_vars=ignore_missing_vars)
120+
return None
119121

120122

121123
def get_model_gradient_multipliers(last_layers, last_layer_gradient_multiplier):

0 commit comments

Comments
 (0)