Skip to content

Commit

Permalink
Merge pull request #293 from bdaiinstitute/change-perceiver-to-have-h…
Browse files Browse the repository at this point in the history
…istory

Change perceiver to have history
  • Loading branch information
ashay-bdai authored Sep 16, 2024
2 parents 5a461cc + 9ce43d3 commit 37b5245
Show file tree
Hide file tree
Showing 12 changed files with 173 additions and 90 deletions.
3 changes: 2 additions & 1 deletion predicators/approaches/bilevel_planning_approach.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ def _solve(self, task: Task, timeout: int) -> Callable[[State], Action]:
nsrts = self._get_current_nsrts()
preds = self._get_current_predicates()
utils.abstract(task.init, preds, self._vlm)
import pdb; pdb.set_trace()
import pdb
pdb.set_trace()
# utils.abstract(task.init, preds, self._vlm)
# Run task planning only and then greedily sample and execute in the
# policy.
Expand Down
90 changes: 40 additions & 50 deletions predicators/envs/spot_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
update_pbrspot_robot_conf, verify_estop
from predicators.structs import Action, EnvironmentTask, GoalDescription, \
GroundAtom, LiftedAtom, Object, Observation, Predicate, \
SpotActionExtraInfo, State, STRIPSOperator, Type, Variable
SpotActionExtraInfo, State, STRIPSOperator, Type, Variable, _Option

###############################################################################
# Base Class #
Expand Down Expand Up @@ -106,6 +106,7 @@ class _TruncatedSpotObservation:
# # A placeholder until all predicates have classifiers
# nonpercept_atoms: Set[GroundAtom]
# nonpercept_predicates: Set[Predicate]
executed_skill: Optional[_Option] = None
# Object detections per camera in self.rgbd_images.
object_detections_per_camera: Dict[str, List[Tuple[ObjectDetectionID, SegmentedBoundingBox]]]

Expand Down Expand Up @@ -197,7 +198,9 @@ def get_robot_only() -> Tuple[Optional[Robot], Optional[LeaseClient]]:
verify_estop(robot)
lease_client = robot.ensure_client(LeaseClient.default_service_name)
lease_client.take()
lease_keepalive = LeaseKeepAlive(lease_client, must_acquire=True, return_at_exit=True)
lease_keepalive = LeaseKeepAlive(lease_client,
must_acquire=True,
return_at_exit=True)
return robot, lease_client


Expand Down Expand Up @@ -1484,45 +1487,37 @@ def _get_sweeping_surface_for_container(container: Object,
"IsSemanticallyGreaterThan", [_base_object_type, _base_object_type],
_is_semantically_greater_than_classifier)


def _get_vlm_query_str(pred_name: str, objects: Sequence[Object]) -> str:
return pred_name + "(" + ", ".join(str(obj.name) for obj in objects) + ")" # pragma: no cover
_VLMOn = utils.create_vlm_predicate(
"VLMOn",
[_movable_object_type, _base_object_type],
lambda o: _get_vlm_query_str("VLMOn", o)
)
return pred_name + "(" + ", ".join(
str(obj.name) for obj in objects) + ")" # pragma: no cover


_VLMOn = utils.create_vlm_predicate("VLMOn",
[_movable_object_type, _base_object_type],
lambda o: _get_vlm_query_str("VLMOn", o))
_Upright = utils.create_vlm_predicate(
"Upright",
[_movable_object_type],
lambda o: _get_vlm_query_str("Upright", o)
)
"Upright", [_movable_object_type],
lambda o: _get_vlm_query_str("Upright", o))
_Toasted = utils.create_vlm_predicate(
"Toasted",
[_movable_object_type],
lambda o: _get_vlm_query_str("Toasted", o)
)
"Toasted", [_movable_object_type],
lambda o: _get_vlm_query_str("Toasted", o))
_VLMIn = utils.create_vlm_predicate(
"VLMIn",
[_movable_object_type, _immovable_object_type],
lambda o: _get_vlm_query_str("In", o)
)
_Open = utils.create_vlm_predicate(
"Open",
[_movable_object_type],
lambda o: _get_vlm_query_str("Open", o)
)
"VLMIn", [_movable_object_type, _immovable_object_type],
lambda o: _get_vlm_query_str("In", o))
_Open = utils.create_vlm_predicate("Open", [_movable_object_type],
lambda o: _get_vlm_query_str("Open", o))
_Stained = utils.create_vlm_predicate(
"Stained",
[_movable_object_type],
lambda o: _get_vlm_query_str("Stained", o)
)
"Stained", [_movable_object_type],
lambda o: _get_vlm_query_str("Stained", o))

_ALL_PREDICATES = {
_NEq, _On, _TopAbove, _Inside, _NotInsideAnyContainer, _FitsInXY,
_HandEmpty, _Holding, _NotHolding, _InHandView, _InView, _Reachable,
_Blocking, _NotBlocked, _ContainerReadyForSweeping, _IsPlaceable,
_IsNotPlaceable, _IsSweeper, _HasFlatTopSurface, _RobotReadyForSweeping,
_IsSemanticallyGreaterThan, _VLMOn, _Upright, _Toasted, _VLMIn, _Open, _Stained
_IsSemanticallyGreaterThan, _VLMOn, _Upright, _Toasted, _VLMIn, _Open,
_Stained
}
_NONPERCEPT_PREDICATES: Set[Predicate] = set()

Expand Down Expand Up @@ -2455,16 +2450,17 @@ class VLMTestEnv(SpotRearrangementEnv):
@property
def predicates(self) -> Set[Predicate]:
# return set(p for p in _ALL_PREDICATES if p.name in ["VLMOn", "Holding", "HandEmpty", "Pourable", "Toasted", "VLMIn", "Open"])
return set(p for p in _ALL_PREDICATES if p.name in ["VLMOn", "Holding", "HandEmpty", "Upright"])

return set(p for p in _ALL_PREDICATES
if p.name in ["VLMOn", "Holding", "HandEmpty", "Upright"])

@property
def goal_predicates(self) -> Set[Predicate]:
return self.predicates

@classmethod
def get_name(cls) -> str:
return "spot_vlm_test_env"

def _get_dry_task(self, train_or_test: str,
task_idx: int) -> EnvironmentTask:
raise NotImplementedError("No dry task for VLMTestEnv.")
Expand Down Expand Up @@ -2495,35 +2491,31 @@ def _create_operators(self) -> Iterator[STRIPSOperator]:
LiftedAtom(_NotHolding, [robot, obj]),
LiftedAtom(_VLMOn, [obj, table])
}
add_effs: Set[LiftedAtom] = {
LiftedAtom(_Holding, [robot, obj])
}
add_effs: Set[LiftedAtom] = {LiftedAtom(_Holding, [robot, obj])}
del_effs: Set[LiftedAtom] = {
LiftedAtom(_HandEmpty, [robot]),
LiftedAtom(_NotHolding, [robot, obj]),
LiftedAtom(_VLMOn, [obj, table])
}
ignore_effs: Set[LiftedAtom] = set()
yield STRIPSOperator("Pick", parameters, preconds, add_effs, del_effs, ignore_effs)
yield STRIPSOperator("Pick", parameters, preconds, add_effs, del_effs,
ignore_effs)

# Place object
robot = Variable("?robot", _robot_type)
obj = Variable("?object", _movable_object_type)
pan = Variable("?pan", _container_type)
parameters = [robot, obj, pan]
preconds: Set[LiftedAtom] = {
LiftedAtom(_Holding, [robot, obj])
}
preconds: Set[LiftedAtom] = {LiftedAtom(_Holding, [robot, obj])}
add_effs: Set[LiftedAtom] = {
LiftedAtom(_HandEmpty, [robot]),
LiftedAtom(_NotHolding, [robot, obj]),
LiftedAtom(_VLMOn, [obj, pan])
}
del_effs: Set[LiftedAtom] = {
LiftedAtom(_Holding, [robot, obj])
}
del_effs: Set[LiftedAtom] = {LiftedAtom(_Holding, [robot, obj])}
ignore_effs: Set[LiftedAtom] = set()
yield STRIPSOperator("Place", parameters, preconds, add_effs, del_effs, ignore_effs)
yield STRIPSOperator("Place", parameters, preconds, add_effs, del_effs,
ignore_effs)

# def _generate_train_tasks(self) -> List[EnvironmentTask]:
# goal = self._generate_goal_description() # currently just one goal
Expand Down Expand Up @@ -2555,10 +2547,7 @@ def __init__(self, use_gui: bool = True) -> None:
# Create constant objects.
self._spot_object = Object("robot", _robot_type)
op_to_name = {o.name: o for o in self._create_operators()}
op_names_to_keep = {
"Pick",
"Place"
}
op_names_to_keep = {"Pick", "Place"}
self._strips_operators = {op_to_name[o] for o in op_names_to_keep}
self._train_tasks = []
self._test_tasks = []
Expand Down Expand Up @@ -2649,7 +2638,7 @@ def _actively_construct_env_task(self) -> EnvironmentTask:

def _generate_goal_description(self) -> GoalDescription:
return "put the cup in the pan"

def reset(self, train_or_test: str, task_idx: int) -> Observation:
prompt = f"Please set up {train_or_test} task {task_idx}!"
utils.prompt_user(prompt)
Expand All @@ -2667,7 +2656,7 @@ def reset(self, train_or_test: str, task_idx: int) -> Observation:
self._current_task_goal_reached = False
self._last_action = None
return self._current_task.init_obs

def step(self, action: Action) -> Observation:
assert self._robot is not None
action_name = action.extra_info.action_name
Expand Down Expand Up @@ -2716,6 +2705,7 @@ def step(self, action: Action) -> Observation:
)
return obs


###############################################################################
# Cube Table Env #
###############################################################################
Expand Down
10 changes: 5 additions & 5 deletions predicators/ground_truth_models/spot_env/nsrts.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,11 +285,11 @@ class SpotEnvsGroundTruthNSRTFactory(GroundTruthNSRTFactory):
@classmethod
def get_env_names(cls) -> Set[str]:
return {
"spot_vlm_test_env",
"spot_cube_env", "spot_soda_floor_env", "spot_soda_table_env",
"spot_soda_bucket_env", "spot_soda_chair_env",
"spot_main_sweep_env", "spot_ball_and_cup_sticky_table_env",
"spot_brush_shelf_env", "lis_spot_block_floor_env"
"spot_vlm_test_env", "spot_cube_env", "spot_soda_floor_env",
"spot_soda_table_env", "spot_soda_bucket_env",
"spot_soda_chair_env", "spot_main_sweep_env",
"spot_ball_and_cup_sticky_table_env", "spot_brush_shelf_env",
"lis_spot_block_floor_env"
}

@staticmethod
Expand Down
15 changes: 8 additions & 7 deletions predicators/ground_truth_models/spot_env/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -899,9 +899,11 @@ def _move_to_ready_sweep_policy(state: State, memory: Dict,
robot_obj_idx, target_obj_idx, do_gaze,
state, memory, objects, params)

def _teleop_policy(state: State, memory: Dict, objects: Sequence[Object], params: Array) -> Action:

def _teleop_policy(state: State, memory: Dict, objects: Sequence[Object],
params: Array) -> Action:
del state, memory, params

robot, lease_client = get_robot_only()

def _teleop(robot: Robot, lease_client: LeaseClient):
Expand All @@ -914,15 +916,14 @@ def _teleop(robot: Robot, lease_client: LeaseClient):
# Take back control.
robot, lease_client = get_robot_only()
lease_client.take()

fn = _teleop
fn_args = (robot, lease_client)
sim_fn = lambda _: None
sim_fn_args = ()
name = "teleop"
action_extra_info = SpotActionExtraInfo(
name, objects, fn, fn_args, sim_fn, sim_fn_args
)
action_extra_info = SpotActionExtraInfo(name, objects, fn, fn_args, sim_fn,
sim_fn_args)
return utils.create_spot_env_action(action_extra_info)


Expand Down Expand Up @@ -958,7 +959,7 @@ def _teleop(robot: Robot, lease_client: LeaseClient):
"MoveToReadySweep": Box(0, 1, (0, )), # empty
"Pick": Box(0, 1, (0, )), # empty
"Place": Box(0, 1, (0, )) # empty
}
}

# NOTE: the policies MUST be unique because they output actions with extra info
# that includes the name of the operators.
Expand Down
22 changes: 14 additions & 8 deletions predicators/perception/spot_perceiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
get_allowed_map_regions, load_spot_metadata, object_to_top_down_geom
from predicators.structs import Action, DefaultState, EnvironmentTask, \
GoalDescription, GroundAtom, Object, Observation, Predicate, \
SpotActionExtraInfo, State, Task, Video
SpotActionExtraInfo, State, Task, Video, _Option


class SpotPerceiver(BasePerceiver):
Expand Down Expand Up @@ -581,7 +581,7 @@ def render_mental_images(self, observation: Observation,
logging.info(f"Wrote out to {outfile}")
plt.close()
return [img]


class SpotMinimalPerceiver(BasePerceiver):
"""A perceiver for spot envs with minimal functionality."""
Expand Down Expand Up @@ -622,21 +622,23 @@ def __init__(self) -> None:
self._curr_env: Optional[BaseEnv] = None
self._waiting_for_observation = True
self._ordered_objects: List[Object] = [] # list of all known objects
self._state_history: List[State] = []
self._executed_skill_history: List[_Option] = []
# # Keep track of objects that are contained (out of view) in another
# # object, like a bag or bucket. This is important not only for gremlins
# # but also for small changes in the container's perceived pose.
# self._container_to_contained_objects: Dict[Object, Set[Object]] = {}
# Load static, hard-coded features of objects, like their shapes.
# meta = load_spot_metadata()
# self._static_object_features = meta.get("static-object-features", {})

def update_perceiver_with_action(self, action: Action) -> None:
# NOTE: we need to keep track of the previous action
# because the step function (where we need knowledge
# of the previous action) occurs *after* the action
# has already been taken.
self._prev_action = action

def _create_goal(self, state: State,
goal_description: GoalDescription) -> Set[GroundAtom]:
del state # not used
Expand Down Expand Up @@ -720,14 +722,18 @@ def step(self, observation: Observation) -> State:
pil_img = PIL.Image.fromarray(img)
draw = ImageDraw.Draw(pil_img)
font = utils.get_scaled_default_font(draw, 4)
annotated_pil_img = utils.add_text_to_draw_img(draw, (0, 0), self.camera_name_to_annotation[img_name], font)
annotated_pil_img = utils.add_text_to_draw_img(
draw, (0, 0), self.camera_name_to_annotation[img_name], font)
annotated_pil_imgs.append(pil_img)
annotated_imgs = [np.array(img) for img in annotated_pil_imgs]
# import pdb; pdb.set_trace()
self._gripper_open_percentage = observation.gripper_open_percentage
self._curr_state = self._create_state()
self._curr_state.simulator_state["images"] = annotated_imgs
ret_state = self._curr_state.copy()
ret_state.simulator_state["state_history"] = list(self._state_history)
self._state_history.append(ret_state)
self._executed_skill_history.append(observation.executed_skill)
ret_state.simulator_state["skill_history"] = list(self._executed_skill_history)
return ret_state

def _create_state(self) -> State:
Expand Down Expand Up @@ -778,7 +784,7 @@ def _create_state(self) -> State:
"qz": 0,
"shape": 0,
"height": 0,
"width" : 0,
"width": 0,
"length": 0,
"object_id": 2,
"placeable": 1,
Expand Down Expand Up @@ -885,7 +891,7 @@ def _create_state(self) -> State:
# }
}
state_dict = {k: list(v.values()) for k, v in state_dict.items()}
ret_state = State(state_dict)
ret_state = State(state_dict)
ret_state.simulator_state = {}
ret_state.simulator_state["images"] = []
return ret_state
5 changes: 4 additions & 1 deletion predicators/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ class GlobalSettings:

# parameters for vision language models
# gemini-1.5-pro-latest, gpt-4-turbo, gpt-4o
vlm_model_name = "gemini-1.5-flash" #"gemini-1.5-pro-latest"
vlm_model_name = "gemini-1.5-flash" #"gemini-1.5-pro-latest"
vlm_temperature = 0.0
vlm_num_completions = 1
vlm_include_cropped_images = False
Expand Down Expand Up @@ -719,6 +719,9 @@ class GlobalSettings:
# saved_vlm_img_demos_folder
vlm_trajs_folder_name = ""
vlm_predicate_vision_api_generate_ground_atoms = False
# At test-time, we will use the below number of states
# as part of labelling the current state's VLM atoms.
vlm_test_time_atom_label_prompt_type = "per_scene_naive"

@classmethod
def get_arg_specific_settings(cls, args: Dict[str, Any]) -> Dict[str, Any]:
Expand Down
3 changes: 2 additions & 1 deletion predicators/spot_utils/perception/perception_structs.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,10 @@ def rotated_rgb(self) -> NDArray[np.uint8]:
"""The image rotated to be upright."""
return ndimage.rotate(self.rgb, self.image_rot, reshape=False)


@dataclass
class RGBDImage:
"""An RGBD image"""
"""An RGBD image."""
rgb: NDArray[np.uint8]
depth: NDArray[np.uint16]
image_rot: float
Expand Down
3 changes: 2 additions & 1 deletion predicators/spot_utils/perception/spot_cameras.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,8 @@ def capture_images_without_context(
# world_tform_camera, depth_scale,
# transforms_snapshot,
# frame_name_image_sensor, camera_model)
rgbd = RGBDImage(rgb_img, depth_img, rot, camera_name, depth_scale, camera_model)
rgbd = RGBDImage(rgb_img, depth_img, rot, camera_name, depth_scale,
camera_model)
rgbds[camera_name] = rgbd

# _LAST_CAPTURED_IMAGES = rgbds
Expand Down
Loading

0 comments on commit 37b5245

Please sign in to comment.