Support continuous occupancy drawer and fix linting

Tai-Wang · Tai-Wang · commit 325dff08c08a · 2023-12-22T18:05:31.000+08:00
diff --git a/embodiedscan/explorer.py b/embodiedscan/explorer.py
@@ -6,7 +6,8 @@
 import open3d as o3d
 
 from embodiedscan.utils.color_selector import ColorMap
-from embodiedscan.utils.continuous_drawer import ContinuousDrawer
+from embodiedscan.utils.continuous_drawer import (ContinuousDrawer,
+                                                  ContinuousOccupancyDrawer)
 from embodiedscan.utils.img_drawer import ImageDrawer
 
 DATASETS = ['scannet', '3rscan', 'matterport3d']
@@ -299,6 +300,54 @@ def render_countinuous_scene(self,
                                   pcd_downsample)
         drawer.begin()
 
+    def render_countinuous_occupancy(self, scene_name, start_cam=None):
+        """Render occupancy with continuous ego-centric observations.
+
+        Args:
+            scene_name (str): Scene name.
+            start_cam (str, optional): Camera frame from which the rendering
+                starts. Defaults to None, corresponding to the first frame.
+        """
+        s = scene_name.split('/')
+        if len(s) == 2:
+            dataset, region = s
+        else:
+            dataset, building, region = s
+
+        selected_scene = None
+        start_idx = -1
+        for scene in self.data:
+            if scene['sample_idx'] == scene_name:
+                selected_scene = scene
+                if start_cam is not None:
+                    start_idx = -1
+                    for i, img in enumerate(scene['images']):
+                        img_path = img['img_path']
+                        if dataset == 'scannet':
+                            cam_name = img_path.split('/')[-1][:-4]
+                        elif dataset == '3rscan':
+                            cam_name = img_path.split('/')[-1][:-10]
+                        elif dataset == 'matterport3d':
+                            cam_name = img_path.split(
+                                '/')[-1][:-8] + img_path.split('/')[-1][-7:-4]
+                        if cam_name == start_cam:
+                            start_idx = i
+                            break
+                    if start_idx == -1:
+                        print('No such camera')
+                        return
+                else:
+                    start_idx = 0
+
+        if selected_scene is None:
+            print('No such scene')
+            return
+
+        drawer = ContinuousOccupancyDrawer(dataset, self.dataroot[dataset],
+                                           selected_scene, self.classes,
+                                           self.color_selector, start_idx)
+        drawer.begin()
+
     def render_occupancy(self, scene_name):
         """Render the occupancy annotation of a given scene.
 
diff --git a/embodiedscan/structures/__init__.py b/embodiedscan/structures/__init__.py
@@ -1,16 +1,13 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from .bbox_3d import (BaseInstance3DBoxes, Box3DMode, # CameraInstance3DBoxes,
-                      Coord3DMode, # DepthInstance3DBoxes, LiDARInstance3DBoxes,
-                      EulerInstance3DBoxes, # EulerCameraInstance3DBoxes, EulerDepthInstance3DBoxes,
-                      get_box_type, get_proj_mat_by_coord_type, limit_period,
+from .bbox_3d import (BaseInstance3DBoxes, Box3DMode, Coord3DMode,
+                      EulerInstance3DBoxes, get_box_type,
+                      get_proj_mat_by_coord_type, limit_period,
                       mono_cam_box2vis, points_cam2img, points_img2cam,
                       rotation_3d_in_axis, rotation_3d_in_euler, xywhr2xyxyr)
 
 __all__ = [
-    'BaseInstance3DBoxes', 'Box3DMode', # 'CameraInstance3DBoxes',
-    'Coord3DMode', # 'DepthInstance3DBoxes', 'LiDARInstance3DBoxes',
-    'EulerInstance3DBoxes', # 'EulerDepthInstance3DBoxes', 'EulerCameraInstance3DBoxes',
+    'BaseInstance3DBoxes', 'Box3DMode', 'Coord3DMode', 'EulerInstance3DBoxes',
     'get_box_type', 'get_proj_mat_by_coord_type', 'limit_period',
     'mono_cam_box2vis', 'points_cam2img', 'points_img2cam',
     'rotation_3d_in_axis', 'rotation_3d_in_euler', 'xywhr2xyxyr'
-]
+]
diff --git a/embodiedscan/utils/continuous_drawer.py b/embodiedscan/utils/continuous_drawer.py
@@ -1,6 +1,7 @@
 import os
 
 import cv2
+import mmengine
 import numpy as np
 import open3d as o3d
 
@@ -39,13 +40,16 @@ def __init__(self, dataset, dir, scene, classes, color_selector, start_idx,
         self.vis.register_key_callback(262, self.draw_next)  # Right Arrow
         self.vis.register_key_callback(ord('D'), self.draw_next)
         self.vis.register_key_callback(ord('N'), self.draw_next)
+        self.vis.register_key_callback(256, self.close)
 
     def begin(self):
         """Some preparations before starting the rendering."""
         print('Press N/D/Right Arrow to draw next frame.')
         print('Press Q to close the window and quit.')
-        print('Please wait for a few seconds after rendering some frames,',
-              'or the program may crash.')
+        print("When you've rendered a lot of frames, the exit can become",
+              'very slow because the program needs time to free up space.')
+        print('You can also press Esc to close window immediately,',
+              'which may result in a segmentation fault.')
         s = self.scene['sample_idx'].split('/')
         self.occupied = np.zeros((len(self.scene['instances']), ), dtype=bool)
         if len(s) == 2:
@@ -115,9 +119,12 @@ def draw_next(self, vis):
         pc.colors = o3d.utility.Vector3dVector(colors[::self.downsample])
         vis.add_geometry(pc)
         if self.camera is not None:
-            vis.remove_geometry(self.camera)
-        self.camera = draw_camera(extrinsic)
-        vis.add_geometry(self.camera)
+            cam_points = draw_camera(extrinsic, return_points=True)
+            self.camera.points = cam_points
+            vis.update_geometry(self.camera)
+        else:
+            self.camera = draw_camera(extrinsic)
+            vis.add_geometry(self.camera)
 
         for ins_idx in img['visible_instance_ids']:
             if self.occupied[ins_idx]:
@@ -135,3 +142,174 @@ def draw_next(self, vis):
         vis.update_renderer()
         vis.poll_events()
         vis.run()
+
+    def close(self, vis):
+        """Close the visualizer.
+
+        Args:
+            vis (open3d.visualization.VisualizerWithKeyCallback): Visualizer.
+        """
+        vis.clear_geometries()
+        vis.destroy_window()
+        vis.close()
+
+
+class ContinuousOccupancyDrawer:
+    """Visualization tool for Continuous Occupancy Prediction task.
+
+    This class serves as the API for visualizing Continuous 3D Object
+    Detection task.
+
+    Args:
+        dataset (str): Name of composed raw dataset, one of
+            scannet/3rscan/matterport3d.
+        dir (str): Root path of the dataset.
+        scene (dict): Annotation of the selected scene.
+        classes (list): Class information.
+        color_selector (ColorMap): ColorMap for visualization.
+        start_idx (int) : Index of the frame which the task starts.
+    """
+
+    def __init__(self, dataset, dir, scene, classes, color_selector,
+                 start_idx):
+        self.dir = dir
+        self.dataset = dataset
+        self.scene = scene
+        self.classes = classes
+        self.color_selector = color_selector
+        self.idx = start_idx
+        self.camera = None
+
+        if dataset == 'matterport3d':
+            _, building, region = scene['sample_idx'].split('/')
+        else:
+            _, region = scene['sample_idx'].split('/')
+
+        if dataset == 'scannet':
+            self.occ_path = os.path.join(self.dir, 'scans', region,
+                                         'occupancy', 'occupancy.npy')
+            self.mask_path = os.path.join(self.dir, 'scans', region,
+                                          'occupancy', 'visible_occupancy.pkl')
+        elif dataset == '3rscan':
+            self.occ_path = os.path.join(self.dir, region, 'occupancy',
+                                         'occupancy.npy')
+            self.mask_path = os.path.join(self.dir, region, 'occupancy',
+                                          'visible_occupancy.pkl')
+        elif dataset == 'matterport3d':
+            self.occ_path = os.path.join(self.dir, building, 'occupancy',
+                                         f'occupancy_{region}.npy')
+            self.mask_path = os.path.join(self.dir, building, 'occupancy',
+                                          f'visible_occupancy_{region}.pkl')
+        else:
+            raise NotImplementedError
+
+        self.occupied = np.zeros((len(self.scene['instances']), ), dtype=bool)
+        self.vis = o3d.visualization.VisualizerWithKeyCallback()
+        self.vis.register_key_callback(262, self.draw_next)  # Right Arrow
+        self.vis.register_key_callback(ord('D'), self.draw_next)
+        self.vis.register_key_callback(ord('N'), self.draw_next)
+        self.vis.register_key_callback(256, self.close)
+
+    def begin(self):
+        """Some preparations before starting the rendering."""
+        print('Press N/D/Right Arrow to draw next frame.')
+        print('Press Q to close the window and quit.')
+        print("When you've rendered a lot of frames, the exit can become",
+              'very slow because the program needs time to free up space.')
+        print('You can also press Esc to close window immediately,',
+              'which may result in a segmentation fault.')
+        self.gt = np.load(self.occ_path)
+        self.mask = mmengine.load(self.mask_path)
+
+        point_cloud_range = [-3.2, -3.2, -1.28 + 0.5, 3.2, 3.2, 1.28 + 0.5]
+        occ_size = [40, 40, 16]
+        self.grid_size = 0.16
+
+        self.points = np.zeros((self.gt.shape[0], 6), dtype=float)
+        self.gird_id = np.ones(occ_size, dtype=int) * -1
+        self.visible_mask = np.zeros((self.gt.shape[0], ), dtype=bool)
+        for i in range(self.gt.shape[0]):
+            x, y, z, label_id = self.gt[i]
+            self.gird_id[x, y, z] = i
+            label_id = int(label_id)
+            if label_id == 0:
+                label = 'object'
+            else:
+                label = self.classes[label_id - 1]
+            color = self.color_selector.get_color(label)
+            color = [x / 255.0 for x in color]
+            self.points[i][:3] = [
+                x * self.grid_size + point_cloud_range[0] + self.grid_size / 2,
+                y * self.grid_size + point_cloud_range[1] + self.grid_size / 2,
+                z * self.grid_size + point_cloud_range[2] + self.grid_size / 2
+            ]
+            self.points[i][3:] = color
+
+        pcd = o3d.geometry.PointCloud()
+        pcd.points = o3d.utility.Vector3dVector(self.points[:, :3])
+        pcd.colors = o3d.utility.Vector3dVector(self.points[:, 3:])
+        voxel_grid = o3d.geometry.VoxelGrid.create_from_point_cloud(
+            pcd, voxel_size=self.grid_size)
+        frame = o3d.geometry.TriangleMesh.create_coordinate_frame()
+        self.vis.create_window()
+        self.vis.add_geometry(voxel_grid)
+        self.vis.add_geometry(frame)
+        ctr = self.vis.get_view_control()
+        self.view_param = ctr.convert_to_pinhole_camera_parameters()
+        self.voxel_grid = voxel_grid
+        self.draw_next(self.vis)
+
+    def draw_next(self, vis):
+        """Render the next frame.
+
+        Args:
+            vis (open3d.visualization.VisualizerWithKeyCallback): Visualizer.
+        """
+        if self.idx >= len(self.scene['images']):
+            print('No more images')
+            return
+
+        img = self.scene['images'][self.idx]
+        extrinsic = self.scene['axis_align_matrix'] @ img['cam2global']
+
+        mask = self.mask[self.idx]['visible_occupancy']
+        visible_ids = np.unique(self.gird_id[mask])
+        visible_ids = visible_ids[visible_ids >= 0]
+        self.visible_mask[visible_ids] = True
+        pcd = o3d.geometry.PointCloud()
+        pcd.points = o3d.utility.Vector3dVector(
+            self.points[self.visible_mask][:, :3])
+        pcd.colors = o3d.utility.Vector3dVector(
+            self.points[self.visible_mask][:, 3:])
+        voxel_grid = o3d.geometry.VoxelGrid.create_from_point_cloud(
+            pcd, voxel_size=self.grid_size)
+
+        if self.camera is not None:
+            cam_points = draw_camera(extrinsic, return_points=True)
+            self.camera.points = cam_points
+            vis.update_geometry(self.camera)
+        else:
+            self.camera = draw_camera(extrinsic)
+            vis.add_geometry(self.camera)
+
+        self.voxel_grid.clear()
+        vis.update_geometry(self.voxel_grid)
+        vis.remove_geometry(self.voxel_grid)
+        vis.add_geometry(voxel_grid)
+        self.voxel_grid = voxel_grid
+        self.idx += 1
+        ctr = vis.get_view_control()
+        ctr.convert_from_pinhole_camera_parameters(self.view_param)
+        vis.update_renderer()
+        vis.poll_events()
+        vis.run()
+
+    def close(self, vis):
+        """Close the visualizer.
+
+        Args:
+            vis (open3d.visualization.VisualizerWithKeyCallback): Visualizer.
+        """
+        vis.clear_geometries()
+        vis.destroy_window()
+        vis.close()