diff --git a/predicators/envs/spot_env.py b/predicators/envs/spot_env.py index e84aa7ae7..800c44fd4 100644 --- a/predicators/envs/spot_env.py +++ b/predicators/envs/spot_env.py @@ -2451,7 +2451,7 @@ class VLMTestEnv(SpotRearrangementEnv): def predicates(self) -> Set[Predicate]: # return set(p for p in _ALL_PREDICATES if p.name in ["VLMOn", "Holding", "HandEmpty", "Pourable", "Toasted", "VLMIn", "Open"]) return set(p for p in _ALL_PREDICATES - if p.name in ["VLMOn", "Holding", "HandEmpty", "Upright"]) + if p.name in ["VLMOn", "Holding", "HandEmpty"]) @property def goal_predicates(self) -> Set[Predicate]: @@ -2474,6 +2474,7 @@ def _detection_id_to_obj(self) -> Dict[ObjectDetectionID, Object]: Object("cup", _movable_object_type), Object("chair", _movable_object_type), Object("bowl", _movable_object_type), + Object("table", _movable_object_type), } for o in objects: detection_id = LanguageObjectDetectionID(o.name) diff --git a/predicators/main.py b/predicators/main.py index 173b660d8..08748d681 100644 --- a/predicators/main.py +++ b/predicators/main.py @@ -362,6 +362,7 @@ def _run_testing(env: BaseEnv, cogman: CogMan) -> Metrics: metrics: Metrics = defaultdict(float) curr_num_nodes_created = 0.0 curr_num_nodes_expanded = 0.0 + import pdb; pdb.set_trace() for test_task_idx, env_task in enumerate(test_tasks): solve_start = time.perf_counter() try: diff --git a/predicators/perception/spot_perceiver.py b/predicators/perception/spot_perceiver.py index 54e0f513b..b9325aadf 100644 --- a/predicators/perception/spot_perceiver.py +++ b/predicators/perception/spot_perceiver.py @@ -635,13 +635,6 @@ def __init__(self) -> None: # self._static_object_features = meta.get("static-object-features", {}) - def update_perceiver_with_action(self, action: Action) -> None: - # NOTE: we need to keep track of the previous action - # because the step function (where we need knowledge - # of the previous action) occurs *after* the action - # has already been taken. - self._prev_action = action - def _create_goal(self, state: State, goal_description: GoalDescription) -> Set[GroundAtom]: del state # not used @@ -680,11 +673,20 @@ def reset(self, env_task: EnvironmentTask) -> Task: # self._curr_state = state self._curr_env = get_or_create_env(CFG.env) state = self._create_state() - state.simulator_state = {} - state.simulator_state["images"] = [] - # self._curr_state = state - self._curr_state = None # this will get set by self.step() + # state.simulator_state = {} + # state.simulator_state["images"] = [] + # state.simulator_state["state_history"] = [] + # state.simulator_state["skill_history"] = [] + # state.simulator_state["vlm_atoms_history"] = [] + self._curr_state = state goal = self._create_goal(state, env_task.goal_description) + + # Reset run-specific things. + self._state_history = [] + self._executed_skill_history = [] + self._vlm_label_history = [] + self._prev_action = None + return Task(state, goal) def step(self, observation: Observation) -> State: @@ -718,21 +720,66 @@ def step(self, observation: Observation) -> State: draw.rectangle(text_bbox, fill='green') draw.text((x0 + 1, y0 - 1.5*text_height), text, fill='white', font=font) - import PIL - from PIL import ImageDraw - annotated_pil_imgs = [] - for img, img_name in zip(imgs, img_names): - pil_img = PIL.Image.fromarray(img) - draw = ImageDraw.Draw(pil_img) - font = utils.get_scaled_default_font(draw, 4) - annotated_pil_img = utils.add_text_to_draw_img(draw, (0, 0), self.camera_name_to_annotation[img_name], font) - annotated_pil_imgs.append(pil_img) - annotated_imgs = [np.array(img) for img in annotated_pil_imgs] + # import PIL + # from PIL import ImageDraw + # annotated_pil_imgs = [] + # for img, img_name in zip(imgs, img_names): + # pil_img = PIL.Image.fromarray(img) + # draw = ImageDraw.Draw(pil_img) + # font = utils.get_scaled_default_font(draw, 4) + # annotated_pil_img = utils.add_text_to_draw_img(draw, (0, 0), self.camera_name_to_annotation[img_name], font) + # annotated_pil_imgs.append(pil_img) + annotated_imgs = [np.array(img) for img in pil_imgs] self._gripper_open_percentage = observation.gripper_open_percentage - curr_state = self._create_state + # check if self._curr_state is what we expect it to be. + import pdb; pdb.set_trace() + self._curr_state = self._create_state() + # This state is a default/empty. We have to set the attributes + # of the objects and set the simulator state properly. + self._curr_state.simulator_state["images"] = annotated_imgs + # At the first timestep, these histories will be empty due to self.reset(). + # But at every timestep that isn't the first one, they will be non-empty. + self._curr_state.simulator_state["state_history"] = list(self._state_history) + self._curr_state.simulator_state["skill_history"] = list(self._executed_skill_history) + self._curr_state.simulator_state["vlm_label_history"] = list(self._vlm_label_history) + + # Add to histories. + # A bit of extra work is required to build the VLM label history. + # We want to keep `utils.abstract()` as straightforward as possible, + # so we'll "rebuild" the VLM labels from the abstract state + # returned by `utils.abstract()`. And since we call this function, + # we might as well store the abstract state as a part of the simulator + # state so that we don't need to recompute it later in the approach or + # in planning. + assert self._curr_env is not None + preds = self._curr_env.predicates + state_copy = self._curr_env.copy() + abstract_state = utils.abstract(state_copy, preds) + self._curr_state.simulator_state["abstract_state"] = abstract_state + # Compute all the VLM atoms. `utils.abstract()` only returns the ones that + # are True. The remaining ones are the ones that are False. + vlm_preds = set(pred for pred in preds if isinstance(pred, VLMPredicate)) + vlm_atoms = set() + for pred in vlm_preds: + for choice in utils.get_object_combinations(list(state_copy), pred.types): + vlm_atoms.add(GroundAtom(pred, choice)) + vlm_atoms = sorted(vlm_atoms) + import pdb; pdb.set_trace() + + self._state_history.append(self._curr_state.copy()) + # The executed skill will be `None` in the first timestep. + # This should be handled in the function that processes the + # history when passing it to the VLM. + self._executed_skill_history.append(observation.executed_skill) + + ############################# + + + + curr_state = self._create_state self._curr_state = self._create_state() self._curr_state.simulator_state["images"] = annotated_imgs ret_state = self._curr_state.copy() @@ -777,9 +824,9 @@ def _create_state(self) -> State: return DefaultState # Build the continuous part of the state. assert self._robot is not None - # table = Object("table", _immovable_object_type) + table = Object("table", _immovable_object_type) cup = Object("cup", _movable_object_type) - # pan = Object("pan", _container_type) + pan = Object("pan", _container_type) # bread = Object("bread", _movable_object_type) # toaster = Object("toaster", _immovable_object_type) # microwave = Object("microwave", _movable_object_type) @@ -795,21 +842,21 @@ def _create_state(self) -> State: "qy": 0, "qz": 0, }, - # table: { - # "x": 0, - # "y": 0, - # "z": 0, - # "qw": 0, - # "qx": 0, - # "qy": 0, - # "qz": 0, - # "shape": 0, - # "height": 0, - # "width" : 0, - # "length": 0, - # "object_id": 1, - # "flat_top_surface": 1 - # }, + table: { + "x": 0, + "y": 0, + "z": 0, + "qw": 0, + "qx": 0, + "qy": 0, + "qz": 0, + "shape": 0, + "height": 0, + "width" : 0, + "length": 0, + "object_id": 1, + "flat_top_surface": 1 + }, cup: { "x": 0, "y": 0, @@ -905,29 +952,32 @@ def _create_state(self) -> State: # "object_id": 1, # "flat_top_surface": 1 # }, - # pan: { - # "x": 0, - # "y": 0, - # "z": 0, - # "qw": 0, - # "qx": 0, - # "qy": 0, - # "qz": 0, - # "shape": 0, - # "height": 0, - # "width" : 0, - # "length": 0, - # "object_id": 3, - # "placeable": 1, - # "held": 0, - # "lost": 0, - # "in_hand_view": 0, - # "in_view": 1, - # "is_sweeper": 0 - # } + pan: { + "x": 0, + "y": 0, + "z": 0, + "qw": 0, + "qx": 0, + "qy": 0, + "qz": 0, + "shape": 0, + "height": 0, + "width" : 0, + "length": 0, + "object_id": 3, + "placeable": 1, + "held": 0, + "lost": 0, + "in_hand_view": 0, + "in_view": 1, + "is_sweeper": 0 + } } state_dict = {k: list(v.values()) for k, v in state_dict.items()} - ret_state = State(state_dict) - ret_state.simulator_state = {} - ret_state.simulator_state["images"] = [] - return ret_state + state = State(state_dict) + state.simulator_state = {} + state.simulator_state["images"] = [] + state.simulator_state["state_history"] = [] + state.simulator_state["skill_history"] = [] + state.simulator_state["vlm_atoms_history"] = [] + return state diff --git a/predicators/spot_utils/perception/spot_cameras.py b/predicators/spot_utils/perception/spot_cameras.py index 9f85bcc83..f39dbc0cc 100644 --- a/predicators/spot_utils/perception/spot_cameras.py +++ b/predicators/spot_utils/perception/spot_cameras.py @@ -26,8 +26,8 @@ # "hand_color_image": "hand_depth_in_hand_color_frame", # "left_fisheye_image": "left_depth_in_visual_frame", # "right_fisheye_image": "right_depth_in_visual_frame", - "frontleft_fisheye_image": "frontleft_depth_in_visual_frame", - # "frontright_fisheye_image": "frontright_depth_in_visual_frame", + # "frontleft_fisheye_image": "frontleft_depth_in_visual_frame", + "frontright_fisheye_image": "frontright_depth_in_visual_frame", # "back_fisheye_image": "back_depth_in_visual_frame" } diff --git a/predicators/utils.py b/predicators/utils.py index 9d3f3f723..d41f2c09a 100644 --- a/predicators/utils.py +++ b/predicators/utils.py @@ -2603,6 +2603,7 @@ def query_vlm_for_atom_vals( vlm_output_str = vlm_output[0] print(f"VLM output: {vlm_output_str}") all_vlm_responses = vlm_output_str.strip().split("\n") + # import pdb; pdb.set_trace() # NOTE: this assumption is likely too brittle; if this is breaking, feel # free to remove/adjust this and change the below parsing loop accordingly! assert len(atom_queries_list) == len(all_vlm_responses) @@ -2615,7 +2616,7 @@ def query_vlm_for_atom_vals( ":"):period_idx].lower().strip() == "true": true_atoms.add(vlm_atoms[i]) - breakpoint() + # breakpoint() # Add the text of the VLM's response to the state, to be used in the future! # REMOVE THIS -> AND PUT IT IN THE PERCEIVER # Perceiver calls utils.abstract once, and puts it in the state history. @@ -2623,7 +2624,7 @@ def query_vlm_for_atom_vals( # instead just pull the abstract state from the state simulator state field that has it already. # The appending of vlm atom history is currently done in query_vlm_for_atom_vals() in utils.py, # and utils.ground calls that. - state.simulator_state["vlm_atoms_history"].append(all_vlm_responses) + # state.simulator_state["vlm_atoms_history"].append(all_vlm_responses) return true_atoms @@ -2652,6 +2653,7 @@ def abstract(state: State, for pred in vlm_preds: for choice in get_object_combinations(list(state), pred.types): vlm_atoms.add(GroundAtom(pred, choice)) + # import pdb; pdb.set_trace() true_vlm_atoms = query_vlm_for_atom_vals(vlm_atoms, state, vlm) atoms |= true_vlm_atoms return atoms