alibaba
diff --git a/‎configs/detection/mask_rcnn/mask_rcnn_r50_fpn.py
Lines changed: 138 additions & 0 deletions b/‎configs/detection/mask_rcnn/mask_rcnn_r50_fpn.py
Lines changed: 138 additions & 0 deletions
diff --git a/‎docs/source/tutorials/mmdet_models_usage_guide.md
Lines changed: 69 additions & 0 deletions b/‎docs/source/tutorials/mmdet_models_usage_guide.md
Lines changed: 69 additions & 0 deletions
diff --git a/‎easycv/apis/test.py
Lines changed: 9 additions & 2 deletions b/‎easycv/apis/test.py
Lines changed: 9 additions & 2 deletions
diff --git a/‎easycv/apis/train.py
Lines changed: 1 addition & 1 deletion b/‎easycv/apis/train.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎easycv/core/evaluation/coco_evaluation.py
Lines changed: 31 additions & 1 deletion b/‎easycv/core/evaluation/coco_evaluation.py
Lines changed: 31 additions & 1 deletion
diff --git a/‎easycv/datasets/detection/pipelines/mm_transforms.py
Lines changed: 29 additions & 28 deletions b/‎easycv/datasets/detection/pipelines/mm_transforms.py
Lines changed: 29 additions & 28 deletions
diff --git a/‎easycv/datasets/detection/raw.py
Lines changed: 4 additions & 0 deletions b/‎easycv/datasets/detection/raw.py
Lines changed: 4 additions & 0 deletions
@@ -0,0 +1,138 @@
+# model settings
+model = dict(
+    type='MaskRCNN',
+    # EasyCV backbone
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(1, 2, 3, 4),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True),
+    # mmdet backbone
+    # backbone=dict(
+    #     type='ResNet',
+    #     depth=50,
+    #     num_stages=4,
+    #     out_indices=(0, 1, 2, 3),
+    #     frozen_stages=1,
+    #     norm_cfg=dict(type='BN', requires_grad=True),
+    #     norm_eval=True,
+    #     style='pytorch',
+    #     init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+    roi_head=dict(
+        type='StandardRoIHead',
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=dict(
+            type='Shared2FCBBoxHead',
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=80,
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+        mask_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        mask_head=dict(
+            type='FCNMaskHead',
+            num_convs=4,
+            in_channels=256,
+            conv_out_channels=256,
+            num_classes=80,
+            loss_mask=dict(
+                type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=-1,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=2000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            mask_size=28,
+            pos_weight=-1,
+            debug=False)),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.5),
+            max_per_img=100,
+            mask_thr_binary=0.5)))
+
+mmlab_modules = [
+    dict(type='mmdet', name='MaskRCNN', module='model'),
+    # dict(type=MMDET, name='ResNet', module='backbone'), # comment out, use EasyCV ResNet
+    dict(type='mmdet', name='FPN', module='neck'),
+    dict(type='mmdet', name='RPNHead', module='head'),
+    dict(type='mmdet', name='StandardRoIHead', module='head'),
+]
@@ -0,0 +1,69 @@
+# Use mmdetection's models in EasyCV
+
+For details of mmdetection, please refer to :https://github.com/open-mmlab/mmdetection
+
+**We only support mmdet's models and do not support other series in mmlab and other modules such as transforms, dataset api, etc. are not supported either.**
+
+The models module of EasyCV is divided into four modules: `backbone`, `head`, `neck`, and `model`.
+
+So we support the models combination of EasyCV and mmdet from these four levels.
+
+**We will not adapt the other apis involved in these four levels modules, we package the entire api for use.**
+
+> **Note: **
+>
+> **If you want to combine the models part of mmdet and easycv, please pay attention to the compatibility between the apis, we do not guarantee that the api of EasyCV and mmdet are compatible.**
+
+Take the `MaskRCNN` model as an example, please refer to [mask_rcnn_r50_fpn.py](https://github.com/alibaba/EasyCV/tree/master/configs/detection/mask_rcnn/mask_rcnn_r50_fpn.py). Except for the backbone, other parts in this model are all mmdet apis.
+
+The framework of `MaskRCNN` can be divided into the following parts from the `backbone`, `head`, `neck`, and `model` levels：
+
+- backbone: `ResNet`
+
+- head：`RPNHead`,	`StandardRoIHead`
+
+- neck: `FPN`
+
+- model: `MaskRCNN`
+
+The configuration adapt for mmdet is as follows:
+
+```python
+mmlab_modules = [
+    dict(type='mmdet', name='MaskRCNN', module='model'),
+    # dict(type='mmdet', name='ResNet', module='backbone'), # comment out, use EasyCV ResNet
+    dict(type='mmdet', name='FPN', module='neck'),
+    dict(type='mmdet', name='RPNHead', module='head'),
+    dict(type='mmdet', name='StandardRoIHead', module='head'),
+]
+```
+
+> Parameters:
+>
+> 	- type: the name of the open source, only `mmdet` is supported
+> 	- name:  the name of api
+> 	- Module: The name of the module to which the api belongs, only  `backbone`,`head`,`neck`,`model` are supported.
+
+In this configuration , the `head`, `neck`, and `model` parts specify the type as `mmdet`, except for `backbone`.
+
+**No configured api will use the EasyCV api by default, , such as backbone (ResNet).**
+
+**For other explicitly configured type as `mmdet`, we will use the mmdet api.**
+
+Which is:
+
+- `MaskRCNN`(model): Use mmdet's `MaskRCNN` api.
+
+- `ResNet`(backbone): Use EasyCV's `ResNet` api.
+
+  > Note that the parameters of the `ResNet`of mmdet and EasyCV are different. Please pay attention to it!.
+
+- `RPNHead`(head): Use mmdet's `RPNHead` api.
+
+  > Note that all the other apis configured in `RPNHead`, such as `AnchorGenerator`, `DeltaXYWHBBoxCoder`, etc., are all mmdet's apis, because we package the entire api for use.
+
+- `StandardRoIHead`(head): Use mmdet's `StandardRoIHead` api.
+
+  > Note that all the other apis configured in `StandardRoIHead`, such as `SingleRoIExtractor`, `SingleRoIExtractor`, etc., are all mmdet's apis, because we package the entire api for use.
+
+- `FPN`(neck): Use mmdet's `FPN` api.
@@ -119,9 +119,16 @@ def single_gpu_test(model, data_loader, mode='test', use_fp16=False, **kwargs):
             results[k].append(v)
 
         if 'img_metas' in data:
-            batch_size = len(data['img_metas'].data[0])
+            if isinstance(data['img_metas'], list):
+                batch_size = len(data['img_metas'][0].data[0])
+            else:
+                batch_size = len(data['img_metas'].data[0])
+
         else:
-            batch_size = data['img'].size(0)
+            if isinstance(data['img'], list):
+                batch_size = data['img'][0].size(0)
+            else:
+                batch_size = data['img'].size(0)
 
         for _ in range(batch_size):
             prog_bar.update()
 
@@ -151,7 +151,7 @@ def train_model(model,
     if validate:
         interval = cfg.eval_config.pop('interval', 1)
         for idx, eval_pipe in enumerate(cfg.eval_pipelines):
-            data = eval_pipe.data
+            data = eval_pipe.get('data', None) or cfg.data.val
             dist_eval = eval_pipe.get('dist_eval', False)
 
             evaluator_cfg = eval_pipe.evaluators[0]
 
@@ -473,7 +473,8 @@ def add_single_detected_image_info(self, image_id, detections_dict):
         groundtruth_masks_shape = self._image_id_to_mask_shape_map[image_id]
         detection_masks = detections_dict[
             standard_fields.DetectionResultFields.detection_masks]
-        if groundtruth_masks_shape[1:] != detection_masks.shape[1:]:
+        if len(detection_masks
+               ) and groundtruth_masks_shape[1:] != detection_masks.shape[1:]:
             raise ValueError(
                 'Spatial shape of groundtruth masks and detection masks '
                 'are incompatible: {} vs {}'.format(groundtruth_masks_shape,
@@ -601,6 +602,9 @@ def _evaluate_impl(self, prediction_dict, groundtruth_dict):
             else:
                 groundtruth_is_crowd = groundtruth_is_crowd_list[idx]
 
+            gt_masks = np.array(
+                [self._ann_to_mask(mask, height, width) for mask in gt_masks],
+                dtype=np.uint8)
             groundtruth_dict = {
                 'groundtruth_boxes': gt_boxes_absolute,
                 'groundtruth_instance_masks': gt_masks,
@@ -609,6 +613,11 @@ def _evaluate_impl(self, prediction_dict, groundtruth_dict):
             }
             self.add_single_ground_truth_image_info(image_id, groundtruth_dict)
 
+            detection_masks = np.array([
+                self._ann_to_mask(mask, height, width)
+                for mask in detection_masks
+            ],
+                                       dtype=np.uint8)
             # add detection info
             detection_dict = {
                 'detection_masks': detection_masks,
@@ -621,6 +630,27 @@ def _evaluate_impl(self, prediction_dict, groundtruth_dict):
         self.clear()
         return eval_dict
 
+    def _ann_to_mask(self, segmentation, height, width):
+        from xtcocotools import mask as maskUtils
+        segm = segmentation
+        h = height
+        w = width
+
+        if type(segm) == list:
+            # polygon -- a single object might consist of multiple parts
+            # we merge all parts into one mask rle code
+            rles = maskUtils.frPyObjects(segm, h, w)
+            rle = maskUtils.merge(rles)
+        elif type(segm['counts']) == list:
+            # uncompressed RLE
+            rle = maskUtils.frPyObjects(segm, h, w)
+        else:
+            # rle
+            rle = segm
+
+        m = maskUtils.decode(rle)
+        return m
+
 
 @EVALUATORS.register_module
 class CoCoPoseTopDownEvaluator(Evaluator):
 
@@ -1644,20 +1644,21 @@ def _poly2mask(self, mask_ann, img_h, img_w):
         Returns:
             numpy.ndarray: The decode bitmap mask of shape (img_h, img_w).
         """
-        raise NotImplementedError
-        # if isinstance(mask_ann, list):
-        #     # polygon -- a single object might consist of multiple parts
-        #     # we merge all parts into one mask rle code
-        #     rles = maskUtils.frPyObjects(mask_ann, img_h, img_w)
-        #     rle = maskUtils.merge(rles)
-        # elif isinstance(mask_ann['counts'], list):
-        #     # uncompressed RLE
-        #     rle = maskUtils.frPyObjects(mask_ann, img_h, img_w)
-        # else:
-        #     # rle
-        #     rle = mask_ann
-        # mask = maskUtils.decode(rle)
-        # return mask
+        import xtcocotools.mask as maskUtils
+
+        if isinstance(mask_ann, list):
+            # polygon -- a single object might consist of multiple parts
+            # we merge all parts into one mask rle code
+            rles = maskUtils.frPyObjects(mask_ann, img_h, img_w)
+            rle = maskUtils.merge(rles)
+        elif isinstance(mask_ann['counts'], list):
+            # uncompressed RLE
+            rle = maskUtils.frPyObjects(mask_ann, img_h, img_w)
+        else:
+            # rle
+            rle = mask_ann
+        mask = maskUtils.decode(rle)
+        return mask
 
     def process_polygons(self, polygons):
         """Convert polygons to list of ndarray and filter invalid polygons.
@@ -1687,20 +1688,20 @@ def _load_masks(self, results):
                 If ``self.poly2mask`` is set ``True``, `gt_mask` will contain
                 :obj:`PolygonMasks`. Otherwise, :obj:`BitmapMasks` is used.
         """
-        raise NotImplementedError
-
-        # h, w = results['img_info']['height'], results['img_info']['width']
-        # gt_masks = results['ann_info']['masks']
-        # if self.poly2mask:
-        #     gt_masks = BitmapMasks(
-        #         [self._poly2mask(mask, h, w) for mask in gt_masks], h, w)
-        # else:
-        #     gt_masks = PolygonMasks(
-        #         [self.process_polygons(polygons) for polygons in gt_masks], h,
-        #         w)
-        # results['gt_masks'] = gt_masks
-        # results['mask_fields'].append('gt_masks')
-        # return results
+        from mmdet.core import BitmapMasks, PolygonMasks
+
+        h, w = results['img_info']['height'], results['img_info']['width']
+        gt_masks = results['ann_info']['masks']
+        if self.poly2mask:
+            gt_masks = BitmapMasks(
+                [self._poly2mask(mask, h, w) for mask in gt_masks], h, w)
+        else:
+            gt_masks = PolygonMasks(
+                [self.process_polygons(polygons) for polygons in gt_masks], h,
+                w)
+        results['gt_masks'] = gt_masks
+        results['mask_fields'].append('gt_masks')
+        return results
 
     def _load_semantic_seg(self, results):
         """Private function to load semantic segmentation annotations.
 
@@ -70,6 +70,10 @@ def evaluate(self, results, evaluators=None, logger=None):
             self.data_source.get_ann_info(idx)['groundtruth_is_crowd']
             for idx in range(len(results['img_metas']))
         ]
+        groundtruth_dict['groundtruth_instance_masks'] = [
+            self.data_source.get_ann_info(idx).get('masks', None)
+            for idx in range(len(results['img_metas']))
+        ]
 
         for evaluator in evaluators:
             eval_result.update(evaluator.evaluate(results, groundtruth_dict))