allowing maps to be updated using heterogeneous sets of camera images…

…, intrinsics, and extrinsics
bdaiinstitute · Aug 24, 2023 · 24888f6 · 24888f6
1 parent bee00b3
commit 24888f6
Show file tree

Hide file tree

Showing 7 changed files with 230 additions and 183 deletions.
diff --git a/zsos/mapping/base_map.py b/zsos/mapping/base_map.py
@@ -3,11 +3,6 @@
 import numpy as np
 
 from zsos.mapping.traj_visualizer import TrajectoryVisualizer
-from zsos.utils.geometry_utils import extract_yaw
-from zsos.utils.img_utils import (
-    place_img_in_img,
-    rotate_image,
-)
 
 
 class BaseMap:
@@ -16,29 +11,12 @@ class BaseMap:
     _last_camera_yaw: float = None
     _map_dtype: np.dtype = np.float32
 
-    def __init__(
-        self,
-        fov: float,
-        min_depth: float,
-        max_depth: float,
-        size: int = 1000,
-        *args,
-        **kwargs,
-    ):
+    def __init__(self, size: int = 1000, *args, **kwargs):
         """
         Args:
-            num_channels: The number of channels in the value map.
-            fov: The field of view of the camera in degrees.
-            max_depth: The desired maximum depth of the camera in meters.
-            use_max_confidence: Whether to use the maximum confidence value in the value
-                map or a weighted average confidence value.
+            size: The size of the map in pixels.
         """
         self.pixels_per_meter = 20
-
-        self._fov = np.deg2rad(fov)
-        self._min_depth = min_depth
-        self._max_depth = max_depth
-
         self._map = np.zeros((size, size), dtype=self._map_dtype)
         self._episode_pixel_origin = np.array([size // 2, size // 2])
         self._traj_vis = TrajectoryVisualizer(
@@ -52,36 +30,6 @@ def reset(self):
             self._episode_pixel_origin, self.pixels_per_meter
         )
 
-    def _localize_new_data(
-        self, depth: np.ndarray, tf_camera_to_episodic: np.ndarray
-    ) -> np.ndarray:
-        # Get new portion of the map
-        curr_data = self._process_local_data(depth, tf_camera_to_episodic)
-
-        # Rotate this new data to match the camera's orientation
-        self._last_camera_yaw = yaw = extract_yaw(tf_camera_to_episodic)
-        curr_data = rotate_image(curr_data, -yaw)
-
-        # Determine where this mask should be overlaid
-        cam_x, cam_y = tf_camera_to_episodic[:2, 3] / tf_camera_to_episodic[3, 3]
-        self._camera_positions.append(np.array([cam_x, cam_y]))
-
-        # Convert to pixel units
-        px = int(cam_x * self.pixels_per_meter) + self._episode_pixel_origin[0]
-        py = int(-cam_y * self.pixels_per_meter) + self._episode_pixel_origin[1]
-
-        # Overlay the new data onto the map
-        curr_map = np.zeros_like(self._map)
-        curr_map = place_img_in_img(curr_map, curr_data, px, py)
-
-        return curr_map
-
-    def _process_local_data(
-        self, depth: np.ndarray, tf_camera_to_episodic: np.ndarray
-    ) -> np.ndarray:
-        """Processes the local data (depth image) to be used for updating the map."""
-        raise NotImplementedError
-
     def _xy_to_px(self, points: np.ndarray) -> np.ndarray:
         """Converts an array of (x, y) coordinates to pixel coordinates.
 

diff --git a/zsos/mapping/object_point_cloud_map.py b/zsos/mapping/object_point_cloud_map.py
@@ -9,28 +9,10 @@
 
 class ObjectPointCloudMap:
     clouds: Dict[str, np.ndarray] = {}
-    _image_width: int = None  # set upon execution of update_map method
-    _image_height: int = None  # set upon execution of update_map method
-    __fx: float = None  # set upon execution of update_map method
 
-    def __init__(
-        self, min_depth: float, max_depth: float, hfov: float, erosion_size: float
-    ) -> None:
-        self._min_depth = min_depth
-        self._max_depth = max_depth
-        self._hfov = np.deg2rad(hfov)
+    def __init__(self, erosion_size: float) -> None:
         self._erosion_size = erosion_size
 
-    @property
-    def _fx(self) -> float:
-        if self.__fx is None:
-            self.__fx = self._image_width / (2 * np.tan(self._hfov / 2))
-        return self.__fx
-
-    @property
-    def _fy(self) -> float:
-        return self._fx
-
     def reset(self):
         self.clouds = {}
 
@@ -43,14 +25,19 @@ def update_map(
         depth_img: np.ndarray,
         object_mask: np.ndarray,
         tf_camera_to_episodic: np.ndarray,
+        min_depth: float,
+        max_depth: float,
+        fx: float,
+        fy: float,
     ) -> None:
         """Updates the object map with the latest information from the agent."""
-        self._image_height, self._image_width = depth_img.shape
-        local_cloud = self._extract_object_cloud(depth_img, object_mask)
+        local_cloud = self._extract_object_cloud(
+            depth_img, object_mask, min_depth, max_depth, fx, fy
+        )
 
         # Mark all points of local_cloud whose distance from the camera is too far
         # as being out of range
-        within_range = local_cloud[:, 0] <= self._max_depth * 0.95  # 5% margin
+        within_range = local_cloud[:, 0] <= max_depth * 0.95  # 5% margin
         global_cloud = transform_points(tf_camera_to_episodic, local_cloud)
         global_cloud = np.concatenate((global_cloud, within_range[:, None]), axis=1)
 
@@ -87,17 +74,21 @@ def get_target_cloud(self, target_class: str) -> np.ndarray:
         return target_cloud
 
     def _extract_object_cloud(
-        self, depth: np.ndarray, object_mask: np.ndarray
+        self,
+        depth: np.ndarray,
+        object_mask: np.ndarray,
+        min_depth: float,
+        max_depth: float,
+        fx: float,
+        fy: float,
     ) -> np.ndarray:
         final_mask = object_mask * 255
         final_mask = cv2.erode(final_mask, None, iterations=self._erosion_size)
 
         valid_depth = depth.copy()
         valid_depth[valid_depth == 0] = 1  # set all holes (0) to just be far (1)
-        valid_depth = (
-            valid_depth * (self._max_depth - self._min_depth) + self._min_depth
-        )
-        cloud = get_point_cloud(valid_depth, final_mask, self._fx, self._fy)
+        valid_depth = valid_depth * (max_depth - min_depth) + min_depth
+        cloud = get_point_cloud(valid_depth, final_mask, fx, fy)
         cloud = open3d_dbscan_filtering(cloud)
 
         return cloud

diff --git a/zsos/mapping/obstacle_map.py b/zsos/mapping/obstacle_map.py
@@ -22,24 +22,19 @@ class ObstacleMap(BaseMap):
 
     def __init__(
         self,
-        fov: float,
-        min_depth: float,
-        max_depth: float,
         min_height: float,
         max_height: float,
         agent_radius: float,
         area_thresh: float = 3.0,  # square meters
         size: int = 1000,
     ):
-        super().__init__(fov, min_depth, max_depth, size)
+        super().__init__(size)
         self._map = np.zeros((size, size), dtype=bool)
         self._navigable_map = np.zeros((size, size), dtype=bool)
         self._explored_area = np.zeros((size, size), dtype=bool)
-        self._hfov = np.deg2rad(fov)
         self._min_height = min_height
         self._max_height = max_height
         self._area_thresh_in_pixels = area_thresh * (self.pixels_per_meter**2)
-        self.__fx = None
         kernel_size = self.pixels_per_meter * agent_radius * 2
         # round kernel_size to nearest odd number
         kernel_size = int(kernel_size) + (int(kernel_size) % 2 == 0)
@@ -52,23 +47,39 @@ def reset(self):
         self._frontiers_px = np.array([])
         self.frontiers = np.array([])
 
-    @property
-    def _fx(self) -> float:
-        if self.__fx is None:
-            self.__fx = self._image_width / (2 * np.tan(self._hfov / 2))
-        return self.__fx
-
-    @property
-    def _fy(self) -> float:
-        return self._fx
-
-    def update_map(self, depth: np.ndarray, tf_camera_to_episodic: np.ndarray):
-        """Adds all obstacles from the current view to the map. Also updates the area
+    def update_map(
+        self,
+        depth: np.ndarray,
+        tf_camera_to_episodic: np.ndarray,
+        min_depth: float,
+        max_depth: float,
+        fx: float,
+        fy: float,
+        topdown_fov: float,
+    ):
+        """
+        Adds all obstacles from the current view to the map. Also updates the area
         that the robot has explored so far.
+
+        Args:
+            depth (np.ndarray): The depth image to use for updating the object map. It
+                is normalized to the range [0, 1] and has a shape of (height, width).
+
+            tf_camera_to_episodic (np.ndarray): The transformation matrix from the
+                camera to the episodic coordinate frame.
+            min_depth (float): The minimum depth value (in meters) of the depth image.
+            max_depth (float): The maximum depth value (in meters) of the depth image.
+            fx (float): The focal length of the camera in the x direction.
+            fy (float): The focal length of the camera in the y direction.
+            topdown_fov (float): The field of view of the depth camera projected onto
+                the topdown map.
         """
         if self._image_width is None:
             self._image_width = depth.shape[1]
-        point_cloud_camera_frame = self._get_local_point_cloud(depth)
+
+        scaled_depth = depth * (max_depth - min_depth) + min_depth
+        mask = scaled_depth < max_depth
+        point_cloud_camera_frame = get_point_cloud(scaled_depth, mask, fx, fy)
         point_cloud_episodic_frame = transform_points(
             tf_camera_to_episodic, point_cloud_camera_frame
         )
@@ -99,8 +110,8 @@ def update_map(self, depth: np.ndarray, tf_camera_to_episodic: np.ndarray):
             current_fog_of_war_mask=self._explored_area.astype(np.uint8),
             current_point=agent_pixel_location[::-1],
             current_angle=-self._last_camera_yaw,
-            fov=np.rad2deg(self._hfov),
-            max_line_len=self._max_depth * self.pixels_per_meter,
+            fov=np.rad2deg(topdown_fov),
+            max_line_len=max_depth * self.pixels_per_meter,
         )
 
         # Compute frontier locations
@@ -147,12 +158,6 @@ def visualize(self):
 
         return vis_img
 
-    def _get_local_point_cloud(self, depth: np.ndarray) -> np.ndarray:
-        scaled_depth = depth * (self._max_depth - self._min_depth) + self._min_depth
-        mask = scaled_depth < self._max_depth
-        point_cloud = get_point_cloud(scaled_depth, mask, self._fx, self._fy)
-        return point_cloud
-
 
 def filter_points_by_height(
     points: np.ndarray, min_height: float, max_height: float
@@ -167,11 +172,8 @@ def replay_from_dir():
         data = json.load(f)
 
     v = ObstacleMap(
-        fov=kwargs["fov"],
         min_height=float(kwargs.get("min_height", 0.15)),
         max_height=float(kwargs.get("max_height", 0.88)),
-        min_depth=kwargs["min_depth"],
-        max_depth=kwargs["max_depth"],
         agent_radius=float(kwargs.get("agent_radius", 0.18)),
         size=kwargs["size"],
     )