diff --git a/zsos/mapping/value_map.py b/zsos/mapping/value_map.py index 7352c76..0af637e 100644 --- a/zsos/mapping/value_map.py +++ b/zsos/mapping/value_map.py @@ -12,8 +12,8 @@ from zsos.mapping.traj_visualizer import TrajectoryVisualizer from zsos.utils.geometry_utils import extract_yaw, get_rotation_matrix from zsos.utils.img_utils import ( - max_pixel_value_within_radius, monochannel_to_inferno_rgb, + pixel_value_within_radius, place_img_in_img, rotate_image, ) @@ -32,9 +32,9 @@ class ValueMap: _confidence_mask: np.ndarray = None _camera_positions: List[np.ndarray] = [] _last_camera_yaw: float = None - use_max_confidence: bool = False + _use_max_confidence: bool = False - def __init__(self, fov: float, max_depth: float): + def __init__(self, fov: float, max_depth: float, use_max_confidence: bool = True): """ Args: fov: The field of view of the camera in degrees. @@ -45,6 +45,8 @@ def __init__(self, fov: float, max_depth: float): self.fov = np.deg2rad(fov) self.max_depth = max_depth + self.use_max_confidence = use_max_confidence + self.value_map = np.zeros((size, size), np.float32) self.confidence_map = np.zeros((size, size), np.float32) self.episode_pixel_origin = np.array([size // 2, size // 2]) @@ -141,7 +143,7 @@ def get_value(point: np.ndarray) -> float: px = int(-x * self.pixels_per_meter) + self.episode_pixel_origin[0] py = int(-y * self.pixels_per_meter) + self.episode_pixel_origin[1] point_px = (self.value_map.shape[0] - px, py) - value = max_pixel_value_within_radius(self.value_map, point_px, radius_px) + value = pixel_value_within_radius(self.value_map, point_px, radius_px) return value values = [get_value(point) for point in waypoints] @@ -279,12 +281,11 @@ def _fuse_new_data(self, confidence: np.ndarray, value: float): # self.decision_threshold AND less than the confidence in the existing map # will be re-assigned with a confidence of 0 confidence_mask = np.logical_and( - confidence < self.decision_threshold, - confidence < self.confidence_map, + confidence < self.decision_threshold, confidence < self.confidence_map ) confidence[confidence_mask] = 0 - if self.use_max_confidence: + if self._use_max_confidence: # For every pixel that has a higher confidence in the new map than the # existing value map, replace the value in the existing value map with # the new value diff --git a/zsos/policy/habitat_policies.py b/zsos/policy/habitat_policies.py index 8525a5c..e6824d6 100644 --- a/zsos/policy/habitat_policies.py +++ b/zsos/policy/habitat_policies.py @@ -180,6 +180,7 @@ class ZSOSPolicyConfig(PolicyConfig): value_map_max_depth: float = 5.0 value_map_hfov: float = 79.0 object_map_proximity_threshold: float = 1.5 + use_max_confidence: bool = True @classmethod def arg_names(cls) -> List[str]: @@ -195,6 +196,7 @@ def arg_names(cls) -> List[str]: "object_map_proximity_threshold", "value_map_max_depth", "value_map_hfov", + "use_max_confidence", ] diff --git a/zsos/policy/itm_policy.py b/zsos/policy/itm_policy.py index b7ba345..4ebc318 100644 --- a/zsos/policy/itm_policy.py +++ b/zsos/policy/itm_policy.py @@ -26,12 +26,19 @@ class BaseITMPolicy(BaseObjectNavPolicy): _circle_marker_radius: int = 5 def __init__( - self, value_map_max_depth: float, value_map_hfov: float, *args, **kwargs + self, + value_map_max_depth: float, + value_map_hfov: float, + use_max_confidence: bool = True, + *args, + **kwargs, ): super().__init__(*args, **kwargs) self._itm = BLIP2ITMClient() self._value_map: ValueMap = ValueMap( - fov=value_map_hfov, max_depth=value_map_max_depth + fov=value_map_hfov, + max_depth=value_map_max_depth, + use_max_confidence=use_max_confidence, ) def _reset(self): diff --git a/zsos/policy/utils/acyclic_enforcer.py b/zsos/policy/utils/acyclic_enforcer.py index ece83d8..3fe7315 100644 --- a/zsos/policy/utils/acyclic_enforcer.py +++ b/zsos/policy/utils/acyclic_enforcer.py @@ -9,7 +9,9 @@ def __init__(self, position: np.ndarray, action: Any): self.action = action def __eq__(self, other: "StateAction") -> bool: - return self.__hash__() == other.__hash__() + dist1 = np.linalg.norm(self.position - other.position) + dist2 = np.linalg.norm(self.action - other.action) + return dist1 < 0.5 and dist2 < 0.5 def __hash__(self) -> int: string_repr = f"{self.position}_{self.action}" diff --git a/zsos/utils/img_utils.py b/zsos/utils/img_utils.py index 09be908..1b675ae 100644 --- a/zsos/utils/img_utils.py +++ b/zsos/utils/img_utils.py @@ -213,8 +213,11 @@ def pad_larger_dim(image: np.ndarray, target_dimension: int) -> np.ndarray: return padded_image -def max_pixel_value_within_radius( - image: np.ndarray, pixel_location: Tuple[int, int], radius: int +def pixel_value_within_radius( + image: np.ndarray, + pixel_location: Tuple[int, int], + radius: int, + reduction: str = "median", ) -> Union[float, int]: """Returns the maximum pixel value within a given radius of a specified pixel location in the given image. @@ -224,6 +227,8 @@ def max_pixel_value_within_radius( pixel_location (Tuple[int, int]): The location of the pixel as a tuple (row, column). radius (int): The radius within which to find the maximum pixel value. + reduction (str, optional): The method to use to reduce the cropped image to a + single value. Defaults to "median". Returns: Union[float, int]: The maximum pixel value within the given radius of the pixel @@ -250,5 +255,11 @@ def max_pixel_value_within_radius( color=255, thickness=-1, ) - - return np.max(cropped_image[circle_mask > 0]) + if reduction == "mean": + return np.mean(cropped_image[circle_mask > 0]) + elif reduction == "max": + return np.max(cropped_image[circle_mask > 0]) + elif reduction == "median": + return np.median(cropped_image[circle_mask > 0]) + else: + raise ValueError(f"Invalid reduction method: {reduction}")