making yolo loading configurable (disabled for real world), setting v…

…qa to be false by default, using position control for Spot base instead of explicit euler integrations of veclocities that don't really even work very well tbh
bdaiinstitute · Sep 13, 2023 · ea8c592 · ea8c592
1 parent 1231c78
commit ea8c592
Show file tree

Hide file tree

Showing 3 changed files with 48 additions and 15 deletions.
diff --git a/zsos/policy/base_objectnav_policy.py b/zsos/policy/base_objectnav_policy.py
@@ -36,6 +36,7 @@ class BaseObjectNavPolicy(BasePolicy):
     _stop_action: Tensor = None  # MUST BE SET BY SUBCLASS
     _observations_cache: Dict[str, Any] = {}
     _non_coco_caption = ""
+    _load_yolo: bool = True
 
     def __init__(
         self,
@@ -50,7 +51,7 @@ def __init__(
         agent_radius: float = 0.18,
         obstacle_map_area_threshold: float = 1.5,
         hole_area_thresh: int = 100000,
-        use_vqa: bool = True,
+        use_vqa: bool = False,
         vqa_prompt: str = "Is this ",
         coco_threshold: float = 0.6,
         non_coco_threshold: float = 0.4,
@@ -229,7 +230,7 @@ def _get_policy_info(self, detections: ObjectDetections) -> Dict[str, Any]:
 
     def _get_object_detections(self, img: np.ndarray) -> ObjectDetections:
         target_classes = self._target_object.split("|")
-        has_coco = any(c in COCO_CLASSES for c in target_classes)
+        has_coco = any(c in COCO_CLASSES for c in target_classes) and self._load_yolo
         has_non_coco = any(c not in COCO_CLASSES for c in target_classes)
 
         detections = (
@@ -406,7 +407,7 @@ class ZSOSConfig:
     min_obstacle_height: float = 0.61
     max_obstacle_height: float = 0.88
     hole_area_thresh: int = 100000
-    use_vqa: bool = True
+    use_vqa: bool = False
     vqa_prompt: str = "Is this "
     coco_threshold: float = 0.6
     non_coco_threshold: float = 0.4

diff --git a/zsos/policy/reality_policies.py b/zsos/policy/reality_policies.py
@@ -21,6 +21,11 @@ class RealityMixin:
     """
 
     _stop_action: Tensor = torch.tensor([[0.0, 0.0]], dtype=torch.float32)
+    _load_yolo: bool = False
+    _non_coco_caption: str = (
+        "chair . table . tv . laptop . microwave . toaster . sink . refrigerator . book"
+        " . clock . vase . scissors . teddy bear . hair drier . toothbrush ."
+    )
 
     def __init__(self: BaseObjectNavPolicy, *args: Any, **kwargs: Any) -> None:
         super().__init__(*args, **kwargs)

diff --git a/zsos/reality/pointnav_env.py b/zsos/reality/pointnav_env.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 
-from zsos.reality.robots.base_robot import BaseRobot
+from zsos.reality.robots.bdsw_robot import BDSWRobot
 from zsos.reality.robots.camera_ids import SpotCamIds
 from zsos.utils.geometry_utils import convert_to_global_frame, rho_theta
 
@@ -18,8 +18,7 @@ class PointNavEnv:
 
     def __init__(
         self,
-        robot: BaseRobot,
-        # robot: BDSWRobot,
+        robot: BDSWRobot,
         max_body_cam_depth: float = 3.5,  # max depth (in meters) for body cameras
         max_lin_dist: float = 0.25,
         max_ang_dist: float = np.deg2rad(30),
@@ -33,6 +32,7 @@ def __init__(
         self._max_lin_dist = max_lin_dist
         self._max_ang_dist = max_ang_dist
         self._time_step = time_step
+        self._cmd_id = None
 
     def reset(self, goal: Any, relative=True, *args, **kwargs) -> Dict[str, np.ndarray]:
         assert isinstance(goal, np.ndarray)
@@ -46,25 +46,52 @@ def reset(self, goal: Any, relative=True, *args, **kwargs) -> Dict[str, np.ndarr
         return self._get_obs()
 
     def step(self, action: Dict[str, np.ndarray]) -> Tuple[Dict, float, bool, Dict]:
-        ang_vel, lin_vel = self._compute_velocities(action)
-        # self.robot.command_base_velocity(ang_vel, lin_vel)
-        time.sleep(self._time_step)
-        self.robot.command_base_velocity(0.0, 0.0)
-        done = ang_vel == 0.0 and lin_vel == 0.0
+        if self._cmd_id is not None:
+            cmd_status = 0
+            while cmd_status != 1:
+                feedback_resp = self.robot.spot.get_cmd_feedback(self._cmd_id)
+                cmd_status = (
+                    feedback_resp.feedback.synchronized_feedback
+                ).mobility_command_feedback.se2_trajectory_feedback.status
+                if cmd_status != 1:
+                    time.sleep(0.1)
+
+        ang_dist, lin_dist = self._compute_displacements(action)
+        self._cmd_id = self.robot.spot.set_base_position(
+            x_pos=lin_dist,
+            y_pos=0,
+            yaw=ang_dist,
+            end_time=100,
+            relative=True,
+            max_fwd_vel=1.0,
+            max_hor_vel=0.5,
+            max_ang_vel=np.deg2rad(60),
+            disable_obstacle_avoidance=False,
+            blocking=False,
+        )
+        done = ang_dist == 0.0 and lin_dist == 0.0
         return self._get_obs(), 0.0, done, {}  # not using info dict yet
 
     def _compute_velocities(self, action: Dict[str, np.ndarray]) -> Tuple[float, float]:
-        velocities = []
+        ang_dist, lin_dist = self._compute_displacements(action)
+        ang_vel = ang_dist / self._time_step
+        lin_vel = lin_dist / self._time_step
+        return ang_vel, lin_vel
+
+    def _compute_displacements(
+        self, action: Dict[str, np.ndarray]
+    ) -> Tuple[float, float]:
+        displacements = []
         for action_key, max_dist in (
             ["angular", self._max_ang_dist],
             ["linear", self._max_lin_dist],
         ):
             act_val = action.get(action_key, 0.0)  # default to 0.0 if key not present
             dist = np.clip(act_val, -1.0, 1.0)  # clip to [-1, 1]
             dist *= max_dist  # scale to max distance
-            velocities.append(dist / self._time_step)  # convert to velocity
-        ang_vel, lin_vel = velocities
-        return ang_vel, lin_vel
+            displacements.append(dist)  # convert to velocity
+        ang_dist, lin_dist = displacements
+        return ang_dist, lin_dist
 
     def _get_obs(self) -> Dict[str, np.ndarray]:
         return {