From ea8c5927d9b92419e46e7954a8a975da59816e83 Mon Sep 17 00:00:00 2001 From: Naoki Yokoyama Date: Tue, 12 Sep 2023 21:35:30 -0400 Subject: [PATCH] making yolo loading configurable (disabled for real world), setting vqa to be false by default, using position control for Spot base instead of explicit euler integrations of veclocities that don't really even work very well tbh --- zsos/policy/base_objectnav_policy.py | 7 ++-- zsos/policy/reality_policies.py | 5 +++ zsos/reality/pointnav_env.py | 51 +++++++++++++++++++++------- 3 files changed, 48 insertions(+), 15 deletions(-) diff --git a/zsos/policy/base_objectnav_policy.py b/zsos/policy/base_objectnav_policy.py index 279aaef..bd42e89 100644 --- a/zsos/policy/base_objectnav_policy.py +++ b/zsos/policy/base_objectnav_policy.py @@ -36,6 +36,7 @@ class BaseObjectNavPolicy(BasePolicy): _stop_action: Tensor = None # MUST BE SET BY SUBCLASS _observations_cache: Dict[str, Any] = {} _non_coco_caption = "" + _load_yolo: bool = True def __init__( self, @@ -50,7 +51,7 @@ def __init__( agent_radius: float = 0.18, obstacle_map_area_threshold: float = 1.5, hole_area_thresh: int = 100000, - use_vqa: bool = True, + use_vqa: bool = False, vqa_prompt: str = "Is this ", coco_threshold: float = 0.6, non_coco_threshold: float = 0.4, @@ -229,7 +230,7 @@ def _get_policy_info(self, detections: ObjectDetections) -> Dict[str, Any]: def _get_object_detections(self, img: np.ndarray) -> ObjectDetections: target_classes = self._target_object.split("|") - has_coco = any(c in COCO_CLASSES for c in target_classes) + has_coco = any(c in COCO_CLASSES for c in target_classes) and self._load_yolo has_non_coco = any(c not in COCO_CLASSES for c in target_classes) detections = ( @@ -406,7 +407,7 @@ class ZSOSConfig: min_obstacle_height: float = 0.61 max_obstacle_height: float = 0.88 hole_area_thresh: int = 100000 - use_vqa: bool = True + use_vqa: bool = False vqa_prompt: str = "Is this " coco_threshold: float = 0.6 non_coco_threshold: float = 0.4 diff --git a/zsos/policy/reality_policies.py b/zsos/policy/reality_policies.py index 92ea854..4038104 100644 --- a/zsos/policy/reality_policies.py +++ b/zsos/policy/reality_policies.py @@ -21,6 +21,11 @@ class RealityMixin: """ _stop_action: Tensor = torch.tensor([[0.0, 0.0]], dtype=torch.float32) + _load_yolo: bool = False + _non_coco_caption: str = ( + "chair . table . tv . laptop . microwave . toaster . sink . refrigerator . book" + " . clock . vase . scissors . teddy bear . hair drier . toothbrush ." + ) def __init__(self: BaseObjectNavPolicy, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) diff --git a/zsos/reality/pointnav_env.py b/zsos/reality/pointnav_env.py index 1fc1e07..dc2023c 100644 --- a/zsos/reality/pointnav_env.py +++ b/zsos/reality/pointnav_env.py @@ -3,7 +3,7 @@ import numpy as np -from zsos.reality.robots.base_robot import BaseRobot +from zsos.reality.robots.bdsw_robot import BDSWRobot from zsos.reality.robots.camera_ids import SpotCamIds from zsos.utils.geometry_utils import convert_to_global_frame, rho_theta @@ -18,8 +18,7 @@ class PointNavEnv: def __init__( self, - robot: BaseRobot, - # robot: BDSWRobot, + robot: BDSWRobot, max_body_cam_depth: float = 3.5, # max depth (in meters) for body cameras max_lin_dist: float = 0.25, max_ang_dist: float = np.deg2rad(30), @@ -33,6 +32,7 @@ def __init__( self._max_lin_dist = max_lin_dist self._max_ang_dist = max_ang_dist self._time_step = time_step + self._cmd_id = None def reset(self, goal: Any, relative=True, *args, **kwargs) -> Dict[str, np.ndarray]: assert isinstance(goal, np.ndarray) @@ -46,15 +46,42 @@ def reset(self, goal: Any, relative=True, *args, **kwargs) -> Dict[str, np.ndarr return self._get_obs() def step(self, action: Dict[str, np.ndarray]) -> Tuple[Dict, float, bool, Dict]: - ang_vel, lin_vel = self._compute_velocities(action) - # self.robot.command_base_velocity(ang_vel, lin_vel) - time.sleep(self._time_step) - self.robot.command_base_velocity(0.0, 0.0) - done = ang_vel == 0.0 and lin_vel == 0.0 + if self._cmd_id is not None: + cmd_status = 0 + while cmd_status != 1: + feedback_resp = self.robot.spot.get_cmd_feedback(self._cmd_id) + cmd_status = ( + feedback_resp.feedback.synchronized_feedback + ).mobility_command_feedback.se2_trajectory_feedback.status + if cmd_status != 1: + time.sleep(0.1) + + ang_dist, lin_dist = self._compute_displacements(action) + self._cmd_id = self.robot.spot.set_base_position( + x_pos=lin_dist, + y_pos=0, + yaw=ang_dist, + end_time=100, + relative=True, + max_fwd_vel=1.0, + max_hor_vel=0.5, + max_ang_vel=np.deg2rad(60), + disable_obstacle_avoidance=False, + blocking=False, + ) + done = ang_dist == 0.0 and lin_dist == 0.0 return self._get_obs(), 0.0, done, {} # not using info dict yet def _compute_velocities(self, action: Dict[str, np.ndarray]) -> Tuple[float, float]: - velocities = [] + ang_dist, lin_dist = self._compute_displacements(action) + ang_vel = ang_dist / self._time_step + lin_vel = lin_dist / self._time_step + return ang_vel, lin_vel + + def _compute_displacements( + self, action: Dict[str, np.ndarray] + ) -> Tuple[float, float]: + displacements = [] for action_key, max_dist in ( ["angular", self._max_ang_dist], ["linear", self._max_lin_dist], @@ -62,9 +89,9 @@ def _compute_velocities(self, action: Dict[str, np.ndarray]) -> Tuple[float, flo act_val = action.get(action_key, 0.0) # default to 0.0 if key not present dist = np.clip(act_val, -1.0, 1.0) # clip to [-1, 1] dist *= max_dist # scale to max distance - velocities.append(dist / self._time_step) # convert to velocity - ang_vel, lin_vel = velocities - return ang_vel, lin_vel + displacements.append(dist) # convert to velocity + ang_dist, lin_dist = displacements + return ang_dist, lin_dist def _get_obs(self) -> Dict[str, np.ndarray]: return {