Skip to content

Commit ca5b24b

Browse files
committed
Reproduce model
1 parent 325dff0 commit ca5b24b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+8894
-125
lines changed

.gitignore

+5-1
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,6 @@ venv.bak/
105105
.mypy_cache/
106106

107107
# cython generated cpp
108-
data
109108
.vscode
110109
.idea
111110

@@ -121,3 +120,8 @@ demo/data/*
121120

122121
# mac
123122
.DS_Store
123+
124+
# local data
125+
data/
126+
exps/
127+
todo.md

config/default_runtime.py

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
default_scope = 'embodiedscan'
2+
3+
default_hooks = dict(timer=dict(type='IterTimerHook'),
4+
logger=dict(type='LoggerHook', interval=50),
5+
param_scheduler=dict(type='ParamSchedulerHook'),
6+
checkpoint=dict(type='CheckpointHook',
7+
interval=1,
8+
max_keep_ckpts=1),
9+
sampler_seed=dict(type='DistSamplerSeedHook'))
10+
# visualization=dict(type='Det3DVisualizationHook'))
11+
12+
env_cfg = dict(
13+
cudnn_benchmark=False,
14+
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
15+
dist_cfg=dict(backend='nccl'),
16+
)
17+
18+
log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
19+
20+
log_level = 'INFO'
21+
load_from = None
22+
resume = False
23+
24+
# TODO: support auto scaling lr
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
_base_ = ['./default_runtime.py']
2+
n_points = 100000
3+
4+
backend_args = None
5+
# Uncomment the following if use ceph or other file clients.
6+
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
7+
# for more details.
8+
# file_client_args = dict(
9+
# backend='petrel',
10+
# path_mapping=dict({
11+
# './data/scannet/':
12+
# 's3://openmmlab/datasets/detection3d/scannet_processed/',
13+
# 'data/scannet/':
14+
# 's3://openmmlab/datasets/detection3d/scannet_processed/'
15+
# }))
16+
17+
model = dict(
18+
type='Embodied3DDetector',
19+
data_preprocessor=dict(type='Det3DDataPreprocessor',
20+
mean=[123.675, 116.28, 103.53],
21+
std=[58.395, 57.12, 57.375],
22+
bgr_to_rgb=True,
23+
pad_size_divisor=32,
24+
batchwise_inputs=True),
25+
backbone=dict(
26+
type='mmdet.ResNet',
27+
depth=50,
28+
base_channels=16, # to make it consistent with mink resnet
29+
num_stages=4,
30+
out_indices=(0, 1, 2, 3),
31+
frozen_stages=1,
32+
norm_cfg=dict(type='BN', requires_grad=False),
33+
norm_eval=True,
34+
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
35+
style='pytorch'),
36+
backbone_lidar=dict(type='MinkResNet', in_channels=3, depth=34),
37+
use_xyz_feat=True,
38+
bbox_head=dict(type='FCAF3DHeadRotMat',
39+
in_channels=(128, 256, 512, 1024),
40+
out_channels=128,
41+
voxel_size=.01,
42+
pts_prune_threshold=20000,
43+
pts_assign_threshold=27,
44+
pts_center_threshold=18,
45+
num_classes=284,
46+
num_reg_outs=12,
47+
center_loss=dict(type='mmdet.CrossEntropyLoss',
48+
use_sigmoid=True),
49+
bbox_loss=dict(type='RotatedIoU3DLoss', loss_weight=0.0),
50+
bbox_loss2=dict(type='BBoxCDLoss',
51+
mode='l1',
52+
loss_weight=1.0,
53+
group='g8'),
54+
cls_loss=dict(type='mmdet.FocalLoss'),
55+
decouple_bbox_loss=True,
56+
decouple_groups=4,
57+
decouple_weights=[0.2, 0.2, 0.2, 0.4]),
58+
coord_type='DEPTH',
59+
train_cfg=dict(),
60+
test_cfg=dict(nms_pre=1000, iou_thr=.5, score_thr=.01))
61+
62+
dataset_type = 'EmbodiedScanDataset'
63+
data_root = 'data'
64+
class_names = (
65+
'adhesive tape', 'air conditioner', 'alarm', 'album', 'arch', 'backpack',
66+
'bag', 'balcony', 'ball', 'banister', 'bar', 'barricade', 'baseboard',
67+
'basin', 'basket', 'bathtub', 'beam', 'beanbag', 'bed', 'bench', 'bicycle',
68+
'bidet', 'bin', 'blackboard', 'blanket', 'blinds', 'board', 'body loofah',
69+
'book', 'boots', 'bottle', 'bowl', 'box', 'bread', 'broom', 'brush',
70+
'bucket', 'cabinet', 'calendar', 'camera', 'can', 'candle', 'candlestick',
71+
'cap', 'car', 'carpet', 'cart', 'case', 'chair', 'chandelier', 'cleanser',
72+
'clock', 'clothes', 'clothes dryer', 'coat hanger', 'coffee maker', 'coil',
73+
'column', 'commode', 'computer', 'conducting wire', 'container', 'control',
74+
'copier', 'cosmetics', 'couch', 'counter', 'countertop', 'crate', 'crib',
75+
'cube', 'cup', 'curtain', 'cushion', 'decoration', 'desk', 'detergent',
76+
'device', 'dish rack', 'dishwasher', 'dispenser', 'divider', 'door',
77+
'door knob', 'doorframe', 'doorway', 'drawer', 'dress', 'dresser', 'drum',
78+
'duct', 'dumbbell', 'dustpan', 'dvd', 'eraser', 'excercise equipment',
79+
'fan', 'faucet', 'fence', 'file', 'fire extinguisher', 'fireplace',
80+
'flowerpot', 'flush', 'folder', 'food', 'footstool', 'frame', 'fruit',
81+
'furniture', 'garage door', 'garbage', 'glass', 'globe', 'glove',
82+
'grab bar', 'grass', 'guitar', 'hair dryer', 'hamper', 'handle', 'hanger',
83+
'hat', 'headboard', 'headphones', 'heater', 'helmets', 'holder', 'hook',
84+
'humidifier', 'ironware', 'jacket', 'jalousie', 'jar', 'kettle',
85+
'keyboard', 'kitchen island', 'kitchenware', 'knife', 'label', 'ladder',
86+
'lamp', 'laptop', 'ledge', 'letter', 'light', 'luggage', 'machine',
87+
'magazine', 'mailbox', 'map', 'mask', 'mat', 'mattress', 'menu',
88+
'microwave', 'mirror', 'molding', 'monitor', 'mop', 'mouse', 'napkins',
89+
'notebook', 'ottoman', 'oven', 'pack', 'package', 'pad', 'pan', 'panel',
90+
'paper', 'paper cutter', 'partition', 'pedestal', 'pen', 'person', 'piano',
91+
'picture', 'pillar', 'pillow', 'pipe', 'pitcher', 'plant', 'plate',
92+
'player', 'plug', 'plunger', 'pool', 'pool table', 'poster', 'pot',
93+
'price tag', 'printer', 'projector', 'purse', 'rack', 'radiator', 'radio',
94+
'rail', 'range hood', 'refrigerator', 'remote control', 'ridge', 'rod',
95+
'roll', 'roof', 'rope', 'sack', 'salt', 'scale', 'scissors', 'screen',
96+
'seasoning', 'shampoo', 'sheet', 'shelf', 'shirt', 'shoe', 'shovel',
97+
'shower', 'sign', 'sink', 'soap', 'soap dish', 'soap dispenser', 'socket',
98+
'speaker', 'sponge', 'spoon', 'stairs', 'stall', 'stand', 'stapler',
99+
'statue', 'steps', 'stick', 'stool', 'stopcock', 'stove', 'structure',
100+
'sunglasses', 'support', 'switch', 'table', 'tablet', 'teapot',
101+
'telephone', 'thermostat', 'tissue', 'tissue box', 'toaster', 'toilet',
102+
'toilet paper', 'toiletry', 'tool', 'toothbrush', 'toothpaste', 'towel',
103+
'toy', 'tray', 'treadmill', 'trophy', 'tube', 'tv', 'umbrella', 'urn',
104+
'utensil', 'vacuum cleaner', 'vanity', 'vase', 'vent', 'ventilation',
105+
'wardrobe', 'washbasin', 'washing machine', 'water cooler', 'water heater',
106+
'window', 'window frame', 'windowsill', 'wine', 'wire', 'wood', 'wrap')
107+
head_labels = [
108+
48, 177, 82, 179, 37, 243, 28, 277, 32, 84, 215, 145, 182, 170, 22, 72, 30,
109+
141, 65, 257, 221, 225, 52, 75, 231, 158, 236, 156, 47, 74, 6, 18, 71, 242,
110+
217, 251, 66, 263, 5, 45, 14, 73, 278, 198, 24, 23, 196, 252, 19, 135, 26,
111+
229, 183, 200, 107, 272, 246, 269, 125, 59, 279, 15, 163, 258, 57, 195, 51,
112+
88, 97, 58, 102, 36, 137, 31, 80, 160, 155, 61, 238, 96, 190, 25, 219, 152,
113+
142, 201, 274, 249, 178, 192
114+
]
115+
common_labels = [
116+
189, 164, 101, 205, 273, 233, 131, 180, 86, 220, 67, 268, 224, 270, 53,
117+
203, 237, 226, 10, 133, 248, 41, 55, 16, 199, 134, 99, 185, 2, 20, 234,
118+
194, 253, 35, 174, 8, 223, 13, 91, 262, 230, 121, 49, 63, 119, 162, 79,
119+
168, 245, 267, 122, 104, 100, 1, 176, 280, 140, 209, 259, 143, 165, 147,
120+
117, 85, 105, 95, 109, 207, 68, 175, 106, 60, 4, 46, 171, 204, 111, 211,
121+
108, 120, 157, 222, 17, 264, 151, 98, 38, 261, 123, 78, 118, 127, 240, 124
122+
]
123+
tail_labels = [
124+
76, 149, 173, 250, 275, 255, 34, 77, 266, 283, 112, 115, 186, 136, 256, 40,
125+
254, 172, 9, 212, 213, 181, 154, 94, 191, 193, 3, 130, 146, 70, 128, 167,
126+
126, 81, 7, 11, 148, 228, 239, 247, 21, 42, 89, 153, 161, 244, 110, 0, 29,
127+
114, 132, 159, 218, 232, 260, 56, 92, 116, 282, 33, 113, 138, 12, 188, 44,
128+
150, 197, 271, 169, 206, 90, 235, 103, 281, 184, 208, 216, 202, 214, 241,
129+
129, 210, 276, 64, 27, 87, 139, 227, 187, 62, 43, 50, 69, 93, 144, 166,
130+
265, 54, 83, 39
131+
]
132+
metainfo = dict(classes=class_names,
133+
classes_split=(head_labels, common_labels, tail_labels),
134+
box_type_3d='euler-depth')
135+
136+
train_pipeline = [
137+
dict(type='LoadAnnotations3D', with_visible_instance_masks=True),
138+
dict(type='MultiViewPipeline',
139+
n_images=10,
140+
transforms=[
141+
dict(type='LoadImageFromFile', backend_args=backend_args),
142+
dict(type='LoadDepthFromFile', backend_args=backend_args),
143+
dict(type='ConvertRGBDToPoints', coord_type='CAMERA'),
144+
dict(type='PointSample', num_points=n_points // 10),
145+
dict(type='Resize', scale=(480, 480), keep_ratio=False)
146+
]),
147+
dict(type='AggregateMultiViewPoints', coord_type='DEPTH',
148+
save_slices=True),
149+
# dict(type='PointSample', num_points=n_points),
150+
dict(
151+
type='RandomFlip3D',
152+
sync_2d=False,
153+
flip_2d=False, # only flip points
154+
flip_ratio_bev_horizontal=0.5,
155+
flip_ratio_bev_vertical=0.5),
156+
dict(type='GlobalRotScaleTrans',
157+
rot_range=[-0.087266, 0.087266],
158+
scale_ratio_range=[.9, 1.1],
159+
translation_std=[.1, .1, .1],
160+
shift_height=False),
161+
dict(type='ConstructMultiSweeps'),
162+
dict(type='Pack3DDetInputs',
163+
keys=['img', 'points', 'gt_bboxes_3d', 'gt_labels_3d'])
164+
]
165+
test_pipeline = [
166+
dict(type='LoadAnnotations3D', with_visible_instance_masks=True),
167+
dict(type='MultiViewPipeline',
168+
n_images=50,
169+
ordered=True,
170+
transforms=[
171+
dict(type='LoadImageFromFile', backend_args=backend_args),
172+
dict(type='LoadDepthFromFile', backend_args=backend_args),
173+
dict(type='ConvertRGBDToPoints', coord_type='CAMERA'),
174+
dict(type='PointSample', num_points=n_points // 10),
175+
dict(type='Resize', scale=(480, 480), keep_ratio=False)
176+
]),
177+
dict(type='AggregateMultiViewPoints', coord_type='DEPTH',
178+
save_slices=True),
179+
# dict(type='PointSample', num_points=n_points),
180+
dict(type='ConstructMultiSweeps'),
181+
dict(type='Pack3DDetInputs',
182+
keys=['img', 'points', 'gt_bboxes_3d', 'gt_labels_3d'])
183+
]
184+
185+
# TODO: to determine a reasonable batch size
186+
train_dataloader = dict(
187+
batch_size=1,
188+
num_workers=1,
189+
persistent_workers=True,
190+
sampler=dict(type='DefaultSampler', shuffle=True),
191+
dataset=dict(type='RepeatDataset',
192+
times=8,
193+
dataset=dict(type=dataset_type,
194+
data_root=data_root,
195+
ann_file='embodiedscan_infos_train_full.pkl',
196+
pipeline=train_pipeline,
197+
test_mode=False,
198+
filter_empty_gt=True,
199+
box_type_3d='Euler-Depth',
200+
metainfo=metainfo,
201+
remove_dontcare=True)))
202+
203+
val_dataloader = dict(
204+
batch_size=1,
205+
num_workers=1,
206+
persistent_workers=True,
207+
drop_last=False,
208+
sampler=dict(type='DefaultSampler', shuffle=False),
209+
dataset=dict(
210+
type=dataset_type,
211+
data_root=data_root,
212+
ann_file='embodiedscan_infos_val_full.pkl', # 'debug_test.pkl',
213+
pipeline=test_pipeline,
214+
test_mode=True,
215+
filter_empty_gt=True,
216+
box_type_3d='Euler-Depth',
217+
metainfo=metainfo,
218+
remove_dontcare=True))
219+
test_dataloader = val_dataloader
220+
221+
val_evaluator = dict(type='IndoorDetMetric', batchwise_anns=True)
222+
test_evaluator = val_evaluator
223+
224+
# training schedule for 1x
225+
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=12)
226+
val_cfg = dict(type='ValLoop')
227+
test_cfg = dict(type='TestLoop')
228+
229+
optim_wrapper = dict(type='OptimWrapper',
230+
optimizer=dict(type='AdamW',
231+
lr=0.0004,
232+
weight_decay=0.0001),
233+
clip_grad=dict(max_norm=10, norm_type=2))
234+
235+
# learning rate
236+
param_scheduler = dict(type='MultiStepLR',
237+
begin=0,
238+
end=12,
239+
by_epoch=True,
240+
milestones=[8, 11],
241+
gamma=0.1)
242+
243+
custom_hooks = [dict(type='EmptyCacheHook', after_iter=True)]
244+
245+
# hooks
246+
default_hooks = dict(
247+
checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=4))

embodiedscan/datasets/__init__.py

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
from .embodied_dataset import EmbodiedScanDataset
2+
from .transforms import * # noqa: F401,F403
3+
4+
__all__ = ['EmbodiedScanDataset']

0 commit comments

Comments
 (0)