|
| 1 | +_base_ = ['./default_runtime.py'] |
| 2 | +n_points = 100000 |
| 3 | + |
| 4 | +backend_args = None |
| 5 | +# Uncomment the following if use ceph or other file clients. |
| 6 | +# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient |
| 7 | +# for more details. |
| 8 | +# file_client_args = dict( |
| 9 | +# backend='petrel', |
| 10 | +# path_mapping=dict({ |
| 11 | +# './data/scannet/': |
| 12 | +# 's3://openmmlab/datasets/detection3d/scannet_processed/', |
| 13 | +# 'data/scannet/': |
| 14 | +# 's3://openmmlab/datasets/detection3d/scannet_processed/' |
| 15 | +# })) |
| 16 | + |
| 17 | +model = dict( |
| 18 | + type='Embodied3DDetector', |
| 19 | + data_preprocessor=dict(type='Det3DDataPreprocessor', |
| 20 | + mean=[123.675, 116.28, 103.53], |
| 21 | + std=[58.395, 57.12, 57.375], |
| 22 | + bgr_to_rgb=True, |
| 23 | + pad_size_divisor=32, |
| 24 | + batchwise_inputs=True), |
| 25 | + backbone=dict( |
| 26 | + type='mmdet.ResNet', |
| 27 | + depth=50, |
| 28 | + base_channels=16, # to make it consistent with mink resnet |
| 29 | + num_stages=4, |
| 30 | + out_indices=(0, 1, 2, 3), |
| 31 | + frozen_stages=1, |
| 32 | + norm_cfg=dict(type='BN', requires_grad=False), |
| 33 | + norm_eval=True, |
| 34 | + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), |
| 35 | + style='pytorch'), |
| 36 | + backbone_lidar=dict(type='MinkResNet', in_channels=3, depth=34), |
| 37 | + use_xyz_feat=True, |
| 38 | + bbox_head=dict(type='FCAF3DHeadRotMat', |
| 39 | + in_channels=(128, 256, 512, 1024), |
| 40 | + out_channels=128, |
| 41 | + voxel_size=.01, |
| 42 | + pts_prune_threshold=20000, |
| 43 | + pts_assign_threshold=27, |
| 44 | + pts_center_threshold=18, |
| 45 | + num_classes=284, |
| 46 | + num_reg_outs=12, |
| 47 | + center_loss=dict(type='mmdet.CrossEntropyLoss', |
| 48 | + use_sigmoid=True), |
| 49 | + bbox_loss=dict(type='RotatedIoU3DLoss', loss_weight=0.0), |
| 50 | + bbox_loss2=dict(type='BBoxCDLoss', |
| 51 | + mode='l1', |
| 52 | + loss_weight=1.0, |
| 53 | + group='g8'), |
| 54 | + cls_loss=dict(type='mmdet.FocalLoss'), |
| 55 | + decouple_bbox_loss=True, |
| 56 | + decouple_groups=4, |
| 57 | + decouple_weights=[0.2, 0.2, 0.2, 0.4]), |
| 58 | + coord_type='DEPTH', |
| 59 | + train_cfg=dict(), |
| 60 | + test_cfg=dict(nms_pre=1000, iou_thr=.5, score_thr=.01)) |
| 61 | + |
| 62 | +dataset_type = 'EmbodiedScanDataset' |
| 63 | +data_root = 'data' |
| 64 | +class_names = ( |
| 65 | + 'adhesive tape', 'air conditioner', 'alarm', 'album', 'arch', 'backpack', |
| 66 | + 'bag', 'balcony', 'ball', 'banister', 'bar', 'barricade', 'baseboard', |
| 67 | + 'basin', 'basket', 'bathtub', 'beam', 'beanbag', 'bed', 'bench', 'bicycle', |
| 68 | + 'bidet', 'bin', 'blackboard', 'blanket', 'blinds', 'board', 'body loofah', |
| 69 | + 'book', 'boots', 'bottle', 'bowl', 'box', 'bread', 'broom', 'brush', |
| 70 | + 'bucket', 'cabinet', 'calendar', 'camera', 'can', 'candle', 'candlestick', |
| 71 | + 'cap', 'car', 'carpet', 'cart', 'case', 'chair', 'chandelier', 'cleanser', |
| 72 | + 'clock', 'clothes', 'clothes dryer', 'coat hanger', 'coffee maker', 'coil', |
| 73 | + 'column', 'commode', 'computer', 'conducting wire', 'container', 'control', |
| 74 | + 'copier', 'cosmetics', 'couch', 'counter', 'countertop', 'crate', 'crib', |
| 75 | + 'cube', 'cup', 'curtain', 'cushion', 'decoration', 'desk', 'detergent', |
| 76 | + 'device', 'dish rack', 'dishwasher', 'dispenser', 'divider', 'door', |
| 77 | + 'door knob', 'doorframe', 'doorway', 'drawer', 'dress', 'dresser', 'drum', |
| 78 | + 'duct', 'dumbbell', 'dustpan', 'dvd', 'eraser', 'excercise equipment', |
| 79 | + 'fan', 'faucet', 'fence', 'file', 'fire extinguisher', 'fireplace', |
| 80 | + 'flowerpot', 'flush', 'folder', 'food', 'footstool', 'frame', 'fruit', |
| 81 | + 'furniture', 'garage door', 'garbage', 'glass', 'globe', 'glove', |
| 82 | + 'grab bar', 'grass', 'guitar', 'hair dryer', 'hamper', 'handle', 'hanger', |
| 83 | + 'hat', 'headboard', 'headphones', 'heater', 'helmets', 'holder', 'hook', |
| 84 | + 'humidifier', 'ironware', 'jacket', 'jalousie', 'jar', 'kettle', |
| 85 | + 'keyboard', 'kitchen island', 'kitchenware', 'knife', 'label', 'ladder', |
| 86 | + 'lamp', 'laptop', 'ledge', 'letter', 'light', 'luggage', 'machine', |
| 87 | + 'magazine', 'mailbox', 'map', 'mask', 'mat', 'mattress', 'menu', |
| 88 | + 'microwave', 'mirror', 'molding', 'monitor', 'mop', 'mouse', 'napkins', |
| 89 | + 'notebook', 'ottoman', 'oven', 'pack', 'package', 'pad', 'pan', 'panel', |
| 90 | + 'paper', 'paper cutter', 'partition', 'pedestal', 'pen', 'person', 'piano', |
| 91 | + 'picture', 'pillar', 'pillow', 'pipe', 'pitcher', 'plant', 'plate', |
| 92 | + 'player', 'plug', 'plunger', 'pool', 'pool table', 'poster', 'pot', |
| 93 | + 'price tag', 'printer', 'projector', 'purse', 'rack', 'radiator', 'radio', |
| 94 | + 'rail', 'range hood', 'refrigerator', 'remote control', 'ridge', 'rod', |
| 95 | + 'roll', 'roof', 'rope', 'sack', 'salt', 'scale', 'scissors', 'screen', |
| 96 | + 'seasoning', 'shampoo', 'sheet', 'shelf', 'shirt', 'shoe', 'shovel', |
| 97 | + 'shower', 'sign', 'sink', 'soap', 'soap dish', 'soap dispenser', 'socket', |
| 98 | + 'speaker', 'sponge', 'spoon', 'stairs', 'stall', 'stand', 'stapler', |
| 99 | + 'statue', 'steps', 'stick', 'stool', 'stopcock', 'stove', 'structure', |
| 100 | + 'sunglasses', 'support', 'switch', 'table', 'tablet', 'teapot', |
| 101 | + 'telephone', 'thermostat', 'tissue', 'tissue box', 'toaster', 'toilet', |
| 102 | + 'toilet paper', 'toiletry', 'tool', 'toothbrush', 'toothpaste', 'towel', |
| 103 | + 'toy', 'tray', 'treadmill', 'trophy', 'tube', 'tv', 'umbrella', 'urn', |
| 104 | + 'utensil', 'vacuum cleaner', 'vanity', 'vase', 'vent', 'ventilation', |
| 105 | + 'wardrobe', 'washbasin', 'washing machine', 'water cooler', 'water heater', |
| 106 | + 'window', 'window frame', 'windowsill', 'wine', 'wire', 'wood', 'wrap') |
| 107 | +head_labels = [ |
| 108 | + 48, 177, 82, 179, 37, 243, 28, 277, 32, 84, 215, 145, 182, 170, 22, 72, 30, |
| 109 | + 141, 65, 257, 221, 225, 52, 75, 231, 158, 236, 156, 47, 74, 6, 18, 71, 242, |
| 110 | + 217, 251, 66, 263, 5, 45, 14, 73, 278, 198, 24, 23, 196, 252, 19, 135, 26, |
| 111 | + 229, 183, 200, 107, 272, 246, 269, 125, 59, 279, 15, 163, 258, 57, 195, 51, |
| 112 | + 88, 97, 58, 102, 36, 137, 31, 80, 160, 155, 61, 238, 96, 190, 25, 219, 152, |
| 113 | + 142, 201, 274, 249, 178, 192 |
| 114 | +] |
| 115 | +common_labels = [ |
| 116 | + 189, 164, 101, 205, 273, 233, 131, 180, 86, 220, 67, 268, 224, 270, 53, |
| 117 | + 203, 237, 226, 10, 133, 248, 41, 55, 16, 199, 134, 99, 185, 2, 20, 234, |
| 118 | + 194, 253, 35, 174, 8, 223, 13, 91, 262, 230, 121, 49, 63, 119, 162, 79, |
| 119 | + 168, 245, 267, 122, 104, 100, 1, 176, 280, 140, 209, 259, 143, 165, 147, |
| 120 | + 117, 85, 105, 95, 109, 207, 68, 175, 106, 60, 4, 46, 171, 204, 111, 211, |
| 121 | + 108, 120, 157, 222, 17, 264, 151, 98, 38, 261, 123, 78, 118, 127, 240, 124 |
| 122 | +] |
| 123 | +tail_labels = [ |
| 124 | + 76, 149, 173, 250, 275, 255, 34, 77, 266, 283, 112, 115, 186, 136, 256, 40, |
| 125 | + 254, 172, 9, 212, 213, 181, 154, 94, 191, 193, 3, 130, 146, 70, 128, 167, |
| 126 | + 126, 81, 7, 11, 148, 228, 239, 247, 21, 42, 89, 153, 161, 244, 110, 0, 29, |
| 127 | + 114, 132, 159, 218, 232, 260, 56, 92, 116, 282, 33, 113, 138, 12, 188, 44, |
| 128 | + 150, 197, 271, 169, 206, 90, 235, 103, 281, 184, 208, 216, 202, 214, 241, |
| 129 | + 129, 210, 276, 64, 27, 87, 139, 227, 187, 62, 43, 50, 69, 93, 144, 166, |
| 130 | + 265, 54, 83, 39 |
| 131 | +] |
| 132 | +metainfo = dict(classes=class_names, |
| 133 | + classes_split=(head_labels, common_labels, tail_labels), |
| 134 | + box_type_3d='euler-depth') |
| 135 | + |
| 136 | +train_pipeline = [ |
| 137 | + dict(type='LoadAnnotations3D', with_visible_instance_masks=True), |
| 138 | + dict(type='MultiViewPipeline', |
| 139 | + n_images=10, |
| 140 | + transforms=[ |
| 141 | + dict(type='LoadImageFromFile', backend_args=backend_args), |
| 142 | + dict(type='LoadDepthFromFile', backend_args=backend_args), |
| 143 | + dict(type='ConvertRGBDToPoints', coord_type='CAMERA'), |
| 144 | + dict(type='PointSample', num_points=n_points // 10), |
| 145 | + dict(type='Resize', scale=(480, 480), keep_ratio=False) |
| 146 | + ]), |
| 147 | + dict(type='AggregateMultiViewPoints', coord_type='DEPTH', |
| 148 | + save_slices=True), |
| 149 | + # dict(type='PointSample', num_points=n_points), |
| 150 | + dict( |
| 151 | + type='RandomFlip3D', |
| 152 | + sync_2d=False, |
| 153 | + flip_2d=False, # only flip points |
| 154 | + flip_ratio_bev_horizontal=0.5, |
| 155 | + flip_ratio_bev_vertical=0.5), |
| 156 | + dict(type='GlobalRotScaleTrans', |
| 157 | + rot_range=[-0.087266, 0.087266], |
| 158 | + scale_ratio_range=[.9, 1.1], |
| 159 | + translation_std=[.1, .1, .1], |
| 160 | + shift_height=False), |
| 161 | + dict(type='ConstructMultiSweeps'), |
| 162 | + dict(type='Pack3DDetInputs', |
| 163 | + keys=['img', 'points', 'gt_bboxes_3d', 'gt_labels_3d']) |
| 164 | +] |
| 165 | +test_pipeline = [ |
| 166 | + dict(type='LoadAnnotations3D', with_visible_instance_masks=True), |
| 167 | + dict(type='MultiViewPipeline', |
| 168 | + n_images=50, |
| 169 | + ordered=True, |
| 170 | + transforms=[ |
| 171 | + dict(type='LoadImageFromFile', backend_args=backend_args), |
| 172 | + dict(type='LoadDepthFromFile', backend_args=backend_args), |
| 173 | + dict(type='ConvertRGBDToPoints', coord_type='CAMERA'), |
| 174 | + dict(type='PointSample', num_points=n_points // 10), |
| 175 | + dict(type='Resize', scale=(480, 480), keep_ratio=False) |
| 176 | + ]), |
| 177 | + dict(type='AggregateMultiViewPoints', coord_type='DEPTH', |
| 178 | + save_slices=True), |
| 179 | + # dict(type='PointSample', num_points=n_points), |
| 180 | + dict(type='ConstructMultiSweeps'), |
| 181 | + dict(type='Pack3DDetInputs', |
| 182 | + keys=['img', 'points', 'gt_bboxes_3d', 'gt_labels_3d']) |
| 183 | +] |
| 184 | + |
| 185 | +# TODO: to determine a reasonable batch size |
| 186 | +train_dataloader = dict( |
| 187 | + batch_size=1, |
| 188 | + num_workers=1, |
| 189 | + persistent_workers=True, |
| 190 | + sampler=dict(type='DefaultSampler', shuffle=True), |
| 191 | + dataset=dict(type='RepeatDataset', |
| 192 | + times=8, |
| 193 | + dataset=dict(type=dataset_type, |
| 194 | + data_root=data_root, |
| 195 | + ann_file='embodiedscan_infos_train_full.pkl', |
| 196 | + pipeline=train_pipeline, |
| 197 | + test_mode=False, |
| 198 | + filter_empty_gt=True, |
| 199 | + box_type_3d='Euler-Depth', |
| 200 | + metainfo=metainfo, |
| 201 | + remove_dontcare=True))) |
| 202 | + |
| 203 | +val_dataloader = dict( |
| 204 | + batch_size=1, |
| 205 | + num_workers=1, |
| 206 | + persistent_workers=True, |
| 207 | + drop_last=False, |
| 208 | + sampler=dict(type='DefaultSampler', shuffle=False), |
| 209 | + dataset=dict( |
| 210 | + type=dataset_type, |
| 211 | + data_root=data_root, |
| 212 | + ann_file='embodiedscan_infos_val_full.pkl', # 'debug_test.pkl', |
| 213 | + pipeline=test_pipeline, |
| 214 | + test_mode=True, |
| 215 | + filter_empty_gt=True, |
| 216 | + box_type_3d='Euler-Depth', |
| 217 | + metainfo=metainfo, |
| 218 | + remove_dontcare=True)) |
| 219 | +test_dataloader = val_dataloader |
| 220 | + |
| 221 | +val_evaluator = dict(type='IndoorDetMetric', batchwise_anns=True) |
| 222 | +test_evaluator = val_evaluator |
| 223 | + |
| 224 | +# training schedule for 1x |
| 225 | +train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=12) |
| 226 | +val_cfg = dict(type='ValLoop') |
| 227 | +test_cfg = dict(type='TestLoop') |
| 228 | + |
| 229 | +optim_wrapper = dict(type='OptimWrapper', |
| 230 | + optimizer=dict(type='AdamW', |
| 231 | + lr=0.0004, |
| 232 | + weight_decay=0.0001), |
| 233 | + clip_grad=dict(max_norm=10, norm_type=2)) |
| 234 | + |
| 235 | +# learning rate |
| 236 | +param_scheduler = dict(type='MultiStepLR', |
| 237 | + begin=0, |
| 238 | + end=12, |
| 239 | + by_epoch=True, |
| 240 | + milestones=[8, 11], |
| 241 | + gamma=0.1) |
| 242 | + |
| 243 | +custom_hooks = [dict(type='EmptyCacheHook', after_iter=True)] |
| 244 | + |
| 245 | +# hooks |
| 246 | +default_hooks = dict( |
| 247 | + checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=4)) |
0 commit comments