Skip to content

Commit

Permalink
replace weighted averaging with non-maximum suppression for value upd…
Browse files Browse the repository at this point in the history
…ates, using median instead of max to evaluate waypoint value
  • Loading branch information
naokiyokoyamabd committed Aug 14, 2023
1 parent 97751ad commit 5f4d696
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 14 deletions.
15 changes: 8 additions & 7 deletions zsos/mapping/value_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
from zsos.mapping.traj_visualizer import TrajectoryVisualizer
from zsos.utils.geometry_utils import extract_yaw, get_rotation_matrix
from zsos.utils.img_utils import (
max_pixel_value_within_radius,
monochannel_to_inferno_rgb,
pixel_value_within_radius,
place_img_in_img,
rotate_image,
)
Expand All @@ -32,9 +32,9 @@ class ValueMap:
_confidence_mask: np.ndarray = None
_camera_positions: List[np.ndarray] = []
_last_camera_yaw: float = None
use_max_confidence: bool = False
_use_max_confidence: bool = False

def __init__(self, fov: float, max_depth: float):
def __init__(self, fov: float, max_depth: float, use_max_confidence: bool = True):
"""
Args:
fov: The field of view of the camera in degrees.
Expand All @@ -45,6 +45,8 @@ def __init__(self, fov: float, max_depth: float):

self.fov = np.deg2rad(fov)
self.max_depth = max_depth
self.use_max_confidence = use_max_confidence

self.value_map = np.zeros((size, size), np.float32)
self.confidence_map = np.zeros((size, size), np.float32)
self.episode_pixel_origin = np.array([size // 2, size // 2])
Expand Down Expand Up @@ -141,7 +143,7 @@ def get_value(point: np.ndarray) -> float:
px = int(-x * self.pixels_per_meter) + self.episode_pixel_origin[0]
py = int(-y * self.pixels_per_meter) + self.episode_pixel_origin[1]
point_px = (self.value_map.shape[0] - px, py)
value = max_pixel_value_within_radius(self.value_map, point_px, radius_px)
value = pixel_value_within_radius(self.value_map, point_px, radius_px)
return value

values = [get_value(point) for point in waypoints]
Expand Down Expand Up @@ -279,12 +281,11 @@ def _fuse_new_data(self, confidence: np.ndarray, value: float):
# self.decision_threshold AND less than the confidence in the existing map
# will be re-assigned with a confidence of 0
confidence_mask = np.logical_and(
confidence < self.decision_threshold,
confidence < self.confidence_map,
confidence < self.decision_threshold, confidence < self.confidence_map
)
confidence[confidence_mask] = 0

if self.use_max_confidence:
if self._use_max_confidence:
# For every pixel that has a higher confidence in the new map than the
# existing value map, replace the value in the existing value map with
# the new value
Expand Down
2 changes: 2 additions & 0 deletions zsos/policy/habitat_policies.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ class ZSOSPolicyConfig(PolicyConfig):
value_map_max_depth: float = 5.0
value_map_hfov: float = 79.0
object_map_proximity_threshold: float = 1.5
use_max_confidence: bool = True

@classmethod
def arg_names(cls) -> List[str]:
Expand All @@ -195,6 +196,7 @@ def arg_names(cls) -> List[str]:
"object_map_proximity_threshold",
"value_map_max_depth",
"value_map_hfov",
"use_max_confidence",
]


Expand Down
11 changes: 9 additions & 2 deletions zsos/policy/itm_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,19 @@ class BaseITMPolicy(BaseObjectNavPolicy):
_circle_marker_radius: int = 5

def __init__(
self, value_map_max_depth: float, value_map_hfov: float, *args, **kwargs
self,
value_map_max_depth: float,
value_map_hfov: float,
use_max_confidence: bool = True,
*args,
**kwargs,
):
super().__init__(*args, **kwargs)
self._itm = BLIP2ITMClient()
self._value_map: ValueMap = ValueMap(
fov=value_map_hfov, max_depth=value_map_max_depth
fov=value_map_hfov,
max_depth=value_map_max_depth,
use_max_confidence=use_max_confidence,
)

def _reset(self):
Expand Down
4 changes: 3 additions & 1 deletion zsos/policy/utils/acyclic_enforcer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ def __init__(self, position: np.ndarray, action: Any):
self.action = action

def __eq__(self, other: "StateAction") -> bool:
return self.__hash__() == other.__hash__()
dist1 = np.linalg.norm(self.position - other.position)
dist2 = np.linalg.norm(self.action - other.action)
return dist1 < 0.5 and dist2 < 0.5

def __hash__(self) -> int:
string_repr = f"{self.position}_{self.action}"
Expand Down
19 changes: 15 additions & 4 deletions zsos/utils/img_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,11 @@ def pad_larger_dim(image: np.ndarray, target_dimension: int) -> np.ndarray:
return padded_image


def max_pixel_value_within_radius(
image: np.ndarray, pixel_location: Tuple[int, int], radius: int
def pixel_value_within_radius(
image: np.ndarray,
pixel_location: Tuple[int, int],
radius: int,
reduction: str = "median",
) -> Union[float, int]:
"""Returns the maximum pixel value within a given radius of a specified pixel
location in the given image.
Expand All @@ -224,6 +227,8 @@ def max_pixel_value_within_radius(
pixel_location (Tuple[int, int]): The location of the pixel as a tuple (row,
column).
radius (int): The radius within which to find the maximum pixel value.
reduction (str, optional): The method to use to reduce the cropped image to a
single value. Defaults to "median".
Returns:
Union[float, int]: The maximum pixel value within the given radius of the pixel
Expand All @@ -250,5 +255,11 @@ def max_pixel_value_within_radius(
color=255,
thickness=-1,
)

return np.max(cropped_image[circle_mask > 0])
if reduction == "mean":
return np.mean(cropped_image[circle_mask > 0])
elif reduction == "max":
return np.max(cropped_image[circle_mask > 0])
elif reduction == "median":
return np.median(cropped_image[circle_mask > 0])
else:
raise ValueError(f"Invalid reduction method: {reduction}")

0 comments on commit 5f4d696

Please sign in to comment.