mypy fixes, bug fixes, deprecated code removal (#18)

Co-authored-by: Katrina Ashton <kashton@theaiinstitute.com>
bdaiinstitute · Dec 15, 2023 · 98be30e · 98be30e
1 parent 456ccef
commit 98be30e
Show file tree

Hide file tree

Showing 19 changed files with 74 additions and 233 deletions.
diff --git a/vlfm/mapping/base_map.py b/vlfm/mapping/base_map.py
@@ -8,10 +8,9 @@
 
 
 class BaseMap:
-    _confidence_mask: np.ndarray = None
     _camera_positions: List[np.ndarray] = []
     _last_camera_yaw: float = 0.0
-    _map_dtype: np.dtype = np.float32
+    _map_dtype: np.dtype = np.dtype(np.float32)
 
     def __init__(
         self, size: int = 1000, pixels_per_meter: int = 20, *args: Any, **kwargs: Any

diff --git a/vlfm/mapping/object_point_cloud_map.py b/vlfm/mapping/object_point_cloud_map.py
@@ -55,7 +55,7 @@ def update_map(
         else:
             # Mark all points of local_cloud whose distance from the camera is too far
             # as being out of range
-            within_range = local_cloud[:, 0] <= max_depth * 0.95  # 5% margin
+            within_range = (local_cloud[:, 0] <= max_depth * 0.95) * 1.0  # 5% margin
             # All values of 1 in within_range will be considered within range, and all
             # values of 0 will be considered out of range; these 0s need to be
             # assigned with a random number so that they can be identified later.
@@ -147,7 +147,7 @@ def update_explored(
     def get_target_cloud(self, target_class: str) -> np.ndarray:
         target_cloud = self.clouds[target_class].copy()
         # Determine whether any points are within range
-        within_range_exists: bool = np.any(target_cloud[:, -1] == 1)
+        within_range_exists = np.any(target_cloud[:, -1] == 1)
         if within_range_exists:
             # Filter out all points that are not within range
             target_cloud = target_cloud[target_cloud[:, -1] == 1]

diff --git a/vlfm/mapping/obstacle_map.py b/vlfm/mapping/obstacle_map.py
@@ -1,5 +1,7 @@
 # Copyright (c) 2023 Boston Dynamics AI Institute LLC. All rights reserved.
 
+from typing import Any, Union
+
 import cv2
 import numpy as np
 from frontier_exploration.frontier_detection import detect_frontier_waypoints
@@ -15,7 +17,7 @@ class ObstacleMap(BaseMap):
     and another representing the obstacles that the robot has seen so far.
     """
 
-    _map_dtype: np.dtype = bool
+    _map_dtype: np.dtype = np.dtype(bool)
     _frontiers_px: np.ndarray = np.array([])
     frontiers: np.ndarray = np.array([])
     radius_padding_color: tuple = (100, 100, 100)
@@ -52,7 +54,7 @@ def reset(self) -> None:
 
     def update_map(
         self,
-        depth: np.ndarray,
+        depth: Union[np.ndarray, Any],
         tf_camera_to_episodic: np.ndarray,
         min_depth: float,
         max_depth: float,

diff --git a/vlfm/mapping/traj_visualizer.py b/vlfm/mapping/traj_visualizer.py
@@ -36,7 +36,9 @@ def draw_trajectory(
         img = self._draw_agent(img, camera_positions[-1], camera_yaw)
         return img
 
-    def _draw_path(self, img: np.ndarray, camera_positions: np.ndarray) -> np.ndarray:
+    def _draw_path(
+        self, img: np.ndarray, camera_positions: Union[np.ndarray, List[np.ndarray]]
+    ) -> np.ndarray:
         """Draws the path on the image and returns it"""
         if len(camera_positions) < 2:
             return img

diff --git a/vlfm/policy/base_objectnav_policy.py b/vlfm/policy/base_objectnav_policy.py
@@ -34,8 +34,8 @@ class BasePolicy:  # type: ignore
 class BaseObjectNavPolicy(BasePolicy):
     _target_object: str = ""
     _policy_info: Dict[str, Any] = {}
-    _object_masks: np.ndarray = None  # set by ._update_object_map()
-    _stop_action: Tensor = None  # MUST BE SET BY SUBCLASS
+    _object_masks: Union[np.ndarray, Any] = None  # set by ._update_object_map()
+    _stop_action: Union[Tensor, Any] = None  # MUST BE SET BY SUBCLASS
     _observations_cache: Dict[str, Any] = {}
     _non_coco_caption = ""
     _load_yolo: bool = True

diff --git a/vlfm/policy/reality_policies.py b/vlfm/policy/reality_policies.py
@@ -56,7 +56,7 @@ def from_config(
     def act(
         self: Union["RealityMixin", ITMPolicyV2],
         observations: Dict[str, Any],
-        rnn_hidden_states: Tensor,
+        rnn_hidden_states: Union[Tensor, Any],
         prev_actions: Any,
         masks: Tensor,
         deterministic: bool = False,
@@ -77,10 +77,10 @@ def act(
         if self._done_initializing:
             angular = action[0][0].item()
             linear = action[0][1].item()
-            arm_yaw = -1
+            arm_yaw = -1.0
         else:
-            angular = 0
-            linear = 0
+            angular = 0.0
+            linear = 0.0
             arm_yaw = action[0][0].item()
 
         self._done_initializing = len(self._initial_yaws) == 0

diff --git a/vlfm/policy/utils/non_habitat_policy/nh_pointnav_policy.py b/vlfm/policy/utils/non_habitat_policy/nh_pointnav_policy.py
@@ -49,9 +49,9 @@ class PointNavResNetNet(nn.Module):
     def __init__(self, discrete_actions: bool = False, no_fwd_dict: bool = False):
         super().__init__()
         if discrete_actions:
-            self.prev_action_embedding = nn.Embedding(4 + 1, 32)
+            self.prev_action_embedding_discrete = nn.Embedding(4 + 1, 32)
         else:
-            self.prev_action_embedding = nn.Linear(
+            self.prev_action_embedding_cont = nn.Linear(
                 in_features=2, out_features=32, bias=True
             )
         self.tgt_embeding = nn.Linear(in_features=3, out_features=32, bias=True)
@@ -95,11 +95,11 @@ def forward(
             prev_actions = prev_actions.squeeze(-1)
             start_token = torch.zeros_like(prev_actions)
             # The mask means the previous action will be zero, an extra dummy action
-            prev_actions = self.prev_action_embedding(
+            prev_actions = self.prev_action_embedding_discrete(
                 torch.where(masks.view(-1), prev_actions + 1, start_token)
             )
         else:
-            prev_actions = self.prev_action_embedding(masks * prev_actions.float())
+            prev_actions = self.prev_action_embedding_cont(masks * prev_actions.float())
 
         x.append(prev_actions)
 

diff --git a/vlfm/policy/utils/non_habitat_policy/resnet.py b/vlfm/policy/utils/non_habitat_policy/resnet.py
@@ -6,7 +6,7 @@
 # https://github.com/facebookresearch/habitat-lab/blob/main/habitat-baselines/habitat_baselines/rl/ddppo/policy/resnet.py
 # This is a filtered down version that only support ResNet-18
 
-from typing import List, Optional, Type, cast
+from typing import List, Optional, Type
 
 from torch import Tensor
 from torch import nn as nn
@@ -149,7 +149,6 @@ def _make_layer(
     def forward(self, x: Tensor) -> Tensor:
         x = self.conv1(x)
         x = self.maxpool(x)
-        x = cast(Tensor, x)
         x = self.layer1(x)
         x = self.layer2(x)
         x = self.layer3(x)

diff --git a/vlfm/policy/utils/non_habitat_policy/rnn_state_encoder.py b/vlfm/policy/utils/non_habitat_policy/rnn_state_encoder.py
@@ -6,7 +6,7 @@
 # https://github.com/facebookresearch/habitat-lab/blob/main/habitat-baselines/habitat_baselines/rl/models/rnn_state_encoder.py
 # This is a filtered down version that only supports LSTM
 
-from typing import Dict, Optional, Tuple
+from typing import Any, Dict, Optional, Tuple
 
 import torch
 import torch.nn as nn
@@ -21,6 +21,24 @@ class RNNStateEncoder(nn.Module):
     timesteps to handle episodes ending in the middle of a rollout.
     """
 
+    def __init__(
+        self,
+        input_size: int,
+        hidden_size: int,
+        num_layers: int = 1,
+    ):
+        super().__init__()
+
+        self.num_recurrent_layers = num_layers * 2
+
+        self.rnn = nn.LSTM(
+            input_size=input_size,
+            hidden_size=hidden_size,
+            num_layers=num_layers,
+        )
+
+        self.layer_init()
+
     def layer_init(self) -> None:
         for name, param in self.rnn.named_parameters():
             if "weight" in name:
@@ -112,26 +130,18 @@ def __init__(
         hidden_size: int,
         num_layers: int = 1,
     ):
-        super().__init__()
-
-        self.num_recurrent_layers = num_layers * 2
-
-        self.rnn = nn.LSTM(
-            input_size=input_size,
-            hidden_size=hidden_size,
-            num_layers=num_layers,
-        )
-
-        self.layer_init()
+        super().__init__(input_size, hidden_size, num_layers)
 
+    # Note: Type handling mypy errors in pytorch libraries prevent
+    # directly setting hidden_states type
     def pack_hidden(
-        self, hidden_states: Tuple[torch.Tensor, torch.Tensor]
+        self, hidden_states: Any  # type is Tuple[torch.Tensor, torch.Tensor]
     ) -> torch.Tensor:
         return torch.cat(hidden_states, 0)
 
     def unpack_hidden(
         self, hidden_states: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
+    ) -> Any:  # type is Tuple[torch.Tensor, torch.Tensor]
         lstm_states = torch.chunk(hidden_states.contiguous(), 2, 0)
         return (lstm_states[0], lstm_states[1])
 

diff --git a/vlfm/policy/utils/pointnav_policy.py b/vlfm/policy/utils/pointnav_policy.py
@@ -203,7 +203,7 @@ def load_pointnav_policy(file_path: str) -> PointNavResNetTensorOutputPolicy:
 
 
 def move_obs_to_device(
-    observations: Dict[str, Union[Tensor, np.ndarray]],
+    observations: Dict[str, Any],
     device: torch.device,
     unsqueeze: bool = False,
 ) -> Dict[str, Tensor]:

diff --git a/vlfm/reality/bdsw_nav_env.py b/vlfm/reality/bdsw_nav_env.py
@@ -16,8 +16,9 @@ def run_env(
     done = False
     mask = torch.zeros(1, 1, device=policy.device, dtype=torch.bool)
     action = policy.act(observations, mask)
+    action_dict = {"rho_theta": action}
     while not done:
-        observations, _, done, info = env.step(action)
+        observations, _, done, info = env.step(action_dict)
         action = policy.act(observations, mask, deterministic=True)
         mask = torch.ones_like(mask)
 

diff --git a/vlfm/reality/objectnav_env.py b/vlfm/reality/objectnav_env.py
@@ -59,7 +59,7 @@ def __init__(self, max_gripper_cam_depth: float, *args: Any, **kwargs: Any) -> N
         self._vis_dir = f"{date_string}"
         os.makedirs(f"vis/{self._vis_dir}", exist_ok=True)
 
-    def reset(self, goal: Any, *args: Any, **kwargs: Any) -> Dict[str, np.ndarray]:
+    def reset(self, goal: Any, *args: Any, **kwargs: Any) -> Dict[str, Any]:
         assert isinstance(goal, str)
         self.target_object = goal
         # Transformation matrix from where the robot started to the global frame
@@ -69,7 +69,7 @@ def reset(self, goal: Any, *args: Any, **kwargs: Any) -> Dict[str, np.ndarray]:
         self.episodic_start_yaw = self.robot.xy_yaw[1]
         return self._get_obs()
 
-    def step(self, action: Dict[str, np.ndarray]) -> Tuple[Dict, float, bool, Dict]:
+    def step(self, action: Dict[str, Any]) -> Tuple[Dict, float, bool, Dict]:
         # Parent class only moves the base; if we want to move the gripper camera,
         # we need to do it here
         vis_imgs = []
@@ -117,7 +117,7 @@ def step(self, action: Dict[str, np.ndarray]) -> Tuple[Dict, float, bool, Dict]:
 
         return self._get_obs(), 0.0, done, {}  # not using info dict yet
 
-    def _get_obs(self) -> Dict[str, np.ndarray]:
+    def _get_obs(self) -> Dict[str, Any]:
         robot_xy, robot_heading = self._get_gps(), self._get_compass()
         nav_depth, obstacle_map_depths, value_map_rgbd, object_map_rgbd = (
             self._get_camera_obs()

diff --git a/vlfm/reality/pointnav_env.py b/vlfm/reality/pointnav_env.py
@@ -54,7 +54,7 @@ def reset(
         self.goal = goal
         return self._get_obs()
 
-    def step(self, action: Dict[str, np.ndarray]) -> Tuple[Dict, float, bool, Dict]:
+    def step(self, action: Dict[str, Any]) -> Tuple[Dict, float, bool, Dict]:
         if self._cmd_id is not None:
             cmd_status = 0
             while cmd_status != 1:
@@ -100,15 +100,13 @@ def step(self, action: Dict[str, np.ndarray]) -> Tuple[Dict, float, bool, Dict]:
         self._num_steps += 1
         return self._get_obs(), 0.0, done, {}  # not using info dict yet
 
-    def _compute_velocities(self, action: Dict[str, np.ndarray]) -> Tuple[float, float]:
+    def _compute_velocities(self, action: Dict[str, Any]) -> Tuple[float, float]:
         ang_dist, lin_dist = self._compute_displacements(action)
         ang_vel = ang_dist / self._time_step
         lin_vel = lin_dist / self._time_step
         return ang_vel, lin_vel
 
-    def _compute_displacements(
-        self, action: Dict[str, np.ndarray]
-    ) -> Tuple[float, float]:
+    def _compute_displacements(self, action: Dict[str, Any]) -> Tuple[float, float]:
         displacements = []
         for action_key, max_dist in (
             ("angular", self._max_ang_dist),
@@ -120,7 +118,7 @@ def _compute_displacements(
             act_val = action[action_key]
             dist = np.clip(act_val, -1.0, 1.0)  # clip to [-1, 1]
             dist *= max_dist  # scale to max distance
-            displacements.append(dist)  # convert to velocity
+            displacements.append(dist)  # convert to velocities
         ang_dist, lin_dist = displacements
         return ang_dist, lin_dist
 

diff --git a/vlfm/reality/robots/bdsw_robot.py b/vlfm/reality/robots/bdsw_robot.py
@@ -89,7 +89,7 @@ def open_gripper(self) -> None:
         """Opens the gripper"""
         self.spot.open_gripper()
 
-    def get_camera_data(self, srcs: List[str]) -> Dict[str, np.ndarray]:
+    def get_camera_data(self, srcs: List[str]) -> Dict[str, Dict[str, Any]]:
         """Returns a dict that maps each camera id to its image, focal lengths, and
         transform matrix (from camera to global frame).
 
@@ -106,7 +106,7 @@ def get_camera_data(self, srcs: List[str]) -> Dict[str, np.ndarray]:
         }
         return imgs
 
-    def _camera_response_to_data(self, response: Any) -> Dict[str, np.ndarray]:
+    def _camera_response_to_data(self, response: Any) -> Dict[str, Any]:
         image: np.ndarray = image_response_to_cv2(response, reorient=False)
         fx: float = response.source.pinhole.intrinsics.focal_length.x
         fy: float = response.source.pinhole.intrinsics.focal_length.y