# Copyright 2018 The TensorFlow Authors All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Provides flags that are common to scripts. Common flags from train/eval/vis/export_model.py are collected in this script. """ import collections import copy import json import tensorflow as tf flags = tf.app.flags # Flags for input preprocessing. flags.DEFINE_integer('min_resize_value', None, 'Desired size of the smaller image side.') flags.DEFINE_integer('max_resize_value', None, 'Maximum allowed size of the larger image side.') flags.DEFINE_integer('resize_factor', None, 'Resized dimensions are multiple of factor plus one.') flags.DEFINE_boolean('keep_aspect_ratio', True, 'Keep aspect ratio after resizing or not.') # Model dependent flags. flags.DEFINE_integer('logits_kernel_size', 1, 'The kernel size for the convolutional kernel that ' 'generates logits.') # When using 'mobilent_v2', we set atrous_rates = decoder_output_stride = None. # When using 'xception_65' or 'resnet_v1' model variants, we set # atrous_rates = [6, 12, 18] (output stride 16) and decoder_output_stride = 4. # See core/feature_extractor.py for supported model variants. flags.DEFINE_string('model_variant', 'mobilenet_v2', 'DeepLab model variant.') flags.DEFINE_multi_float('image_pyramid', None, 'Input scales for multi-scale feature extraction.') flags.DEFINE_boolean('add_image_level_feature', True, 'Add image level feature.') flags.DEFINE_list( 'image_pooling_crop_size', None, 'Image pooling crop size [height, width] used in the ASPP module. When ' 'value is None, the model performs image pooling with "crop_size". This' 'flag is useful when one likes to use different image pooling sizes.') flags.DEFINE_list( 'image_pooling_stride', '1,1', 'Image pooling stride [height, width] used in the ASPP image pooling. ') flags.DEFINE_boolean('aspp_with_batch_norm', True, 'Use batch norm parameters for ASPP or not.') flags.DEFINE_boolean('aspp_with_separable_conv', True, 'Use separable convolution for ASPP or not.') # Defaults to None. Set multi_grid = [1, 2, 4] when using provided # 'resnet_v1_{50,101}_beta' checkpoints. flags.DEFINE_multi_integer('multi_grid', None, 'Employ a hierarchy of atrous rates for ResNet.') flags.DEFINE_float('depth_multiplier', 1.0, 'Multiplier for the depth (number of channels) for all ' 'convolution ops used in MobileNet.') flags.DEFINE_integer('divisible_by', None, 'An integer that ensures the layer # channels are ' 'divisible by this value. Used in MobileNet.') # For `xception_65`, use decoder_output_stride = 4. For `mobilenet_v2`, use # decoder_output_stride = None. flags.DEFINE_list('decoder_output_stride', None, 'Comma-separated list of strings with the number specifying ' 'output stride of low-level features at each network level.' 'Current semantic segmentation implementation assumes at ' 'most one output stride (i.e., either None or a list with ' 'only one element.') flags.DEFINE_boolean('decoder_use_separable_conv', True, 'Employ separable convolution for decoder or not.') flags.DEFINE_enum('merge_method', 'max', ['max', 'avg'], 'Scheme to merge multi scale features.') flags.DEFINE_boolean( 'prediction_with_upsampled_logits', True, 'When performing prediction, there are two options: (1) bilinear ' 'upsampling the logits followed by softmax, or (2) softmax followed by ' 'bilinear upsampling.') flags.DEFINE_string( 'dense_prediction_cell_json', '', 'A JSON file that specifies the dense prediction cell.') flags.DEFINE_integer( 'nas_stem_output_num_conv_filters', 20, 'Number of filters of the stem output tensor in NAS models.') flags.DEFINE_bool('nas_use_classification_head', False, 'Use image classification head for NAS model variants.') flags.DEFINE_bool('nas_remove_os32_stride', False, 'Remove the stride in the output stride 32 branch.') flags.DEFINE_bool('use_bounded_activation', False, 'Whether or not to use bounded activations. Bounded ' 'activations better lend themselves to quantized inference.') flags.DEFINE_boolean('aspp_with_concat_projection', True, 'ASPP with concat projection.') flags.DEFINE_boolean('aspp_with_squeeze_and_excitation', False, 'ASPP with squeeze and excitation.') flags.DEFINE_integer('aspp_convs_filters', 256, 'ASPP convolution filters.') flags.DEFINE_boolean('decoder_use_sum_merge', False, 'Decoder uses simply sum merge.') flags.DEFINE_integer('decoder_filters', 256, 'Decoder filters.') flags.DEFINE_boolean('decoder_output_is_logits', False, 'Use decoder output as logits or not.') flags.DEFINE_boolean('image_se_uses_qsigmoid', False, 'Use q-sigmoid.') flags.DEFINE_multi_float( 'label_weights', None, 'A list of label weights, each element represents the weight for the label ' 'of its index, for example, label_weights = [0.1, 0.5] means the weight ' 'for label 0 is 0.1 and the weight for label 1 is 0.5. If set as None, all ' 'the labels have the same weight 1.0.') flags.DEFINE_float('batch_norm_decay', 0.9997, 'Batchnorm decay.') FLAGS = flags.FLAGS # Constants # Perform semantic segmentation predictions. OUTPUT_TYPE = 'semantic' # Semantic segmentation item names. LABELS_CLASS = 'labels_class' IMAGE = 'image' HEIGHT = 'height' WIDTH = 'width' IMAGE_NAME = 'image_name' LABEL = 'label' ORIGINAL_IMAGE = 'original_image' # Test set name. TEST_SET = 'test' class ModelOptions( collections.namedtuple('ModelOptions', [ 'outputs_to_num_classes', 'crop_size', 'atrous_rates', 'output_stride', 'preprocessed_images_dtype', 'merge_method', 'add_image_level_feature', 'image_pooling_crop_size', 'image_pooling_stride', 'aspp_with_batch_norm', 'aspp_with_separable_conv', 'multi_grid', 'decoder_output_stride', 'decoder_use_separable_conv', 'logits_kernel_size', 'model_variant', 'depth_multiplier', 'divisible_by', 'prediction_with_upsampled_logits', 'dense_prediction_cell_config', 'nas_architecture_options', 'use_bounded_activation', 'aspp_with_concat_projection', 'aspp_with_squeeze_and_excitation', 'aspp_convs_filters', 'decoder_use_sum_merge', 'decoder_filters', 'decoder_output_is_logits', 'image_se_uses_qsigmoid', 'label_weights', 'sync_batch_norm_method', 'batch_norm_decay', ])): """Immutable class to hold model options.""" __slots__ = () def __new__(cls, outputs_to_num_classes, crop_size=None, atrous_rates=None, output_stride=8, preprocessed_images_dtype=tf.float32): """Constructor to set default values. Args: outputs_to_num_classes: A dictionary from output type to the number of classes. For example, for the task of semantic segmentation with 21 semantic classes, we would have outputs_to_num_classes['semantic'] = 21. crop_size: A tuple [crop_height, crop_width]. atrous_rates: A list of atrous convolution rates for ASPP. output_stride: The ratio of input to output spatial resolution. preprocessed_images_dtype: The type after the preprocessing function. Returns: A new ModelOptions instance. """ dense_prediction_cell_config = None if FLAGS.dense_prediction_cell_json: with tf.gfile.Open(FLAGS.dense_prediction_cell_json, 'r') as f: dense_prediction_cell_config = json.load(f) decoder_output_stride = None if FLAGS.decoder_output_stride: decoder_output_stride = [ int(x) for x in FLAGS.decoder_output_stride] if sorted(decoder_output_stride, reverse=True) != decoder_output_stride: raise ValueError('Decoder output stride need to be sorted in the ' 'descending order.') image_pooling_crop_size = None if FLAGS.image_pooling_crop_size: image_pooling_crop_size = [int(x) for x in FLAGS.image_pooling_crop_size] image_pooling_stride = [1, 1] if FLAGS.image_pooling_stride: image_pooling_stride = [int(x) for x in FLAGS.image_pooling_stride] label_weights = FLAGS.label_weights if label_weights is None: label_weights = 1.0 nas_architecture_options = { 'nas_stem_output_num_conv_filters': ( FLAGS.nas_stem_output_num_conv_filters), 'nas_use_classification_head': FLAGS.nas_use_classification_head, 'nas_remove_os32_stride': FLAGS.nas_remove_os32_stride, } return super(ModelOptions, cls).__new__( cls, outputs_to_num_classes, crop_size, atrous_rates, output_stride, preprocessed_images_dtype, FLAGS.merge_method, FLAGS.add_image_level_feature, image_pooling_crop_size, image_pooling_stride, FLAGS.aspp_with_batch_norm, FLAGS.aspp_with_separable_conv, FLAGS.multi_grid, decoder_output_stride, FLAGS.decoder_use_separable_conv, FLAGS.logits_kernel_size, FLAGS.model_variant, FLAGS.depth_multiplier, FLAGS.divisible_by, FLAGS.prediction_with_upsampled_logits, dense_prediction_cell_config, nas_architecture_options, FLAGS.use_bounded_activation, FLAGS.aspp_with_concat_projection, FLAGS.aspp_with_squeeze_and_excitation, FLAGS.aspp_convs_filters, FLAGS.decoder_use_sum_merge, FLAGS.decoder_filters, FLAGS.decoder_output_is_logits, FLAGS.image_se_uses_qsigmoid, label_weights, 'None', FLAGS.batch_norm_decay) def __deepcopy__(self, memo): return ModelOptions(copy.deepcopy(self.outputs_to_num_classes), self.crop_size, self.atrous_rates, self.output_stride, self.preprocessed_images_dtype)