added stuff

bdaiinstitute · Oct 17, 2024 · 81a0859 · 81a0859
1 parent c153fab
commit 81a0859
Show file tree

Hide file tree

Showing 18 changed files with 108 additions and 45 deletions.
diff --git a/hand_image_0.png b/hand_image_0.png
diff --git a/image_0.png b/image_0.png
diff --git a/image_1.png b/image_1.png
diff --git a/image_2.png b/image_2.png
diff --git a/image_3.png b/image_3.png
diff --git a/image_4.png b/image_4.png
diff --git a/image_5.png b/image_5.png
diff --git a/image_i.png b/image_i.png
diff --git a/my_image.png b/my_image.png
diff --git a/my_image2.png b/my_image2.png
diff --git a/predicators/cogman.py b/predicators/cogman.py
@@ -80,6 +80,8 @@ def step(self, observation: Observation) -> Optional[Action]:
             logging.info("[CogMan] Termination triggered.")
             return None
         # Check if we should replan.
+        self._exec_monitor.perceiver = self._perceiver
+        self._exec_monitor.env_task = self._current_env_task
         if self._exec_monitor.step(state):
             logging.info("[CogMan] Replanning triggered.")
             assert self._current_goal is not None

diff --git a/predicators/envs/spot_env.py b/predicators/envs/spot_env.py
@@ -1641,22 +1641,22 @@ def _get_sweeping_surface_for_container(container: Object,
 
 
     # NOTE: How to express the belief-space 3-value predicate using state-space binary predicate is tricky, because the system doesn't support NOT (negation) or OR (disjunction). Thus, it couldn't do "NotKnownAsTrue OR NotKnownAsFalse".
-    # E.g,. _ContainingWaterKnownAsTrue won't work.
-    _ContainingWaterKnown = VLMPredicate(
-        "ContainingWaterKnown", [_container_type],
-        prompt="[Answer: yes/no only] This predicate is true (answer [yes]) if you know whether the container contains water or not. If you don't know, answer [no]."
+    # E.g,. _ContainingFoodKnownAsTrue won't work.
+    _ContainingFoodKnown = VLMPredicate(
+        "ContainingFoodKnown", [_container_type],
+        prompt="[Answer: yes/no only] This predicate is true (answer [yes]) if you know whether the steel bowl has anything in it or not. If you don't know, answer [no]."
     )
-    _ContainingWaterUnknown = VLMPredicate(
-        "ContainingWaterUnknown", [_container_type],
-        prompt="[Answer: yes/no only] This predicate is true (answer [yes]) if you do not know whether the container contains water or not. If you know, answer [no]."
+    _ContainingFoodUnknown = VLMPredicate(
+        "ContainingFoodUnknown", [_container_type],
+        prompt="[Answer: yes/no only] This predicate is true (answer [yes]) if you do not know whether the steel bowl has anything in it or not. If you know, answer [no]."
     )
-    _ContainingWater = VLMPredicate(
-        "ContainingWater", [_container_type],
-        prompt="[Answer: yes/no only] This predicate is true (answer [yes]) if the container has water in it. If you know it doesn't have water, answer [no]."
+    _ContainingFood = VLMPredicate(
+        "ContainingFood", [_container_type],
+        prompt="[Answer: yes/no only] This predicate is true (answer [yes]) if the steel bowl has anything in it. If you know it doesn't have anything, answer [no]."
     )
-    _NotContainingWater = VLMPredicate(
-        "NotContainingWater", [_container_type],
-        prompt="[Answer: yes/no only] This predicate is true (answer [yes]) if the container does not have water in it. If it has water, answer [no]."
+    _NotContainingFood = VLMPredicate(
+        "NotContainingFood", [_container_type],
+        prompt="[Answer: yes/no only] This predicate is true (answer [yes]) if the steel bowl does not have anything in it. If it has anything, answer [no]."
     )
 
     _InHandViewFromTop = VLMPredicate(
@@ -1667,10 +1667,10 @@ def _get_sweeping_surface_for_container(container: Object,
     _ALL_PREDICATES.update({
         _DoorOpenKnownTrue,
         _DoorOpenKnownFalse,
-        _ContainingWaterKnown,
-        _ContainingWaterUnknown,
-        _ContainingWater,
-        _NotContainingWater,
+        _ContainingFoodKnown,
+        _ContainingFoodUnknown,
+        _ContainingFood,
+        _NotContainingFood,
         _InHandViewFromTop  # TODO check why missing
     })
 
@@ -1745,18 +1745,18 @@ def _create_operators() -> Iterator[STRIPSOperator]:
         LiftedAtom(_InHandViewFromTop, [robot, cup]),  # TODO comment
         LiftedAtom(_HandEmpty, [robot]),
         LiftedAtom(_NotHolding, [robot, cup]),
-        LiftedAtom(_ContainingWaterUnknown, [cup]),
+        LiftedAtom(_ContainingFoodUnknown, [cup]),
     }
     # NOTE: Determinized effect: both Containing and NotContaining
     # The belief state will be updated after execution
     add_effs = {
-        LiftedAtom(_ContainingWaterKnown, [cup]),
-        LiftedAtom(_ContainingWater, [cup]),
+        LiftedAtom(_ContainingFoodKnown, [cup]),
+        LiftedAtom(_ContainingFood, [cup]),
         # TODO add not containing water
-        LiftedAtom(_NotContainingWater, [cup])
+        LiftedAtom(_NotContainingFood, [cup])
     }
     del_effs = {
-        LiftedAtom(_ContainingWaterUnknown, [cup])
+        LiftedAtom(_ContainingFoodUnknown, [cup])
     }
     # TODO check ignore effs
     ignore_effs = {_Reachable, _InHandViewFromTop, _InView, _RobotReadyForSweeping}
@@ -3472,7 +3472,8 @@ def _detection_id_to_obj(self) -> Dict[ObjectDetectionID, Object]:
         # NOTE: we view cup as container; 
         cup = Object("cup", _container_type)
         # cup_detection = LanguageObjectDetectionID("green bowl/greenish bowl")
-        cup_detection = LanguageObjectDetectionID("orange cup/orange cylinder/orange-ish mug")
+        # cup_detection = LanguageObjectDetectionID("steel bowl/metal bowl/shiny-metallic bowl")
+        cup_detection = LanguageObjectDetectionID("blue cup/blue cylinder/blue-ish mug")
         # TODO test
         # cup_detection = LanguageObjectDetectionID("spam box/spam container/spam-ish box")
         # cup_detection = LanguageObjectDetectionID("yellow apple/yellowish apple")

diff --git a/predicators/execution_monitoring/expected_atoms_monitor.py b/predicators/execution_monitoring/expected_atoms_monitor.py
@@ -31,6 +31,11 @@ def step(self, state: State) -> bool:
         # If the expected atoms are a subset of the current atoms, then
         # we don't have to replan.
         unsat_atoms = {a for a in next_expected_atoms if not a.holds(state)}
+        # Check goal
+        assert self.perceiver is not None and self.env_task is not None
+        goal = self.perceiver._create_goal(state, self.env_task.goal_description)
+        import ipdb; ipdb.set_trace()
+        #
         if not unsat_atoms:
             return False
         logging.info(

diff --git a/predicators/ground_truth_models/spot_env/nsrts.py b/predicators/ground_truth_models/spot_env/nsrts.py
@@ -98,8 +98,8 @@ def _move_to_hand_view_object_from_top_sampler(state: State, goal: Set[GroundAto
     # Parameters are relative distance, dyaw (to the object you're moving to).
     del goal
 
-    min_dist = 1.2
-    max_dist = 1.8
+    min_dist = 0.9
+    max_dist = 1.2
 
     robot_obj = objs[0]
     obj_to_nav_to = objs[1]

diff --git a/predicators/ground_truth_models/spot_env/options.py b/predicators/ground_truth_models/spot_env/options.py
@@ -38,7 +38,8 @@
 from predicators.spot_utils.utils import DEFAULT_HAND_DROP_OBJECT_POSE, \
     DEFAULT_HAND_LOOK_STRAIGHT_DOWN_POSE, DEFAULT_HAND_POST_DUMP_POSE, \
     DEFAULT_HAND_PRE_DUMP_LIFT_POSE, DEFAULT_HAND_PRE_DUMP_POSE, \
-    get_relative_se2_from_se3, load_spot_metadata, object_to_top_down_geom
+    get_relative_se2_from_se3, load_spot_metadata, object_to_top_down_geom, \
+    DEFAULT_HAND_LOOK_FROM_TOP
 from predicators.structs import Action, Array, Object, ParameterizedOption, \
     Predicate, SpotActionExtraInfo, State, Type
 
@@ -531,7 +532,9 @@ def _move_to_hand_view_object_from_above_policy(state: State, memory: Dict,
     yaw_param_idx = 1
     robot_obj_idx = 0
     target_obj_idx = 1
-    do_gaze = True
+    do_gaze = False
+    robot, localizer, _ = get_robot()
+    move_hand_to_relative_pose(robot, DEFAULT_HAND_LOOK_FROM_TOP)
     return _move_to_target_policy(name, distance_param_idx, yaw_param_idx,
                                   robot_obj_idx, target_obj_idx, do_gaze,
                                   state, memory, objects, params)

diff --git a/predicators/perception/spot_perceiver.py b/predicators/perception/spot_perceiver.py
@@ -3,7 +3,7 @@
 import logging
 import time
 from pathlib import Path
-from typing import Dict, List, Optional, Set
+from typing import Dict, List, Optional, Set, Collection
 
 import imageio.v2 as iio
 import numpy as np
@@ -14,7 +14,7 @@
 from predicators.envs import BaseEnv, get_or_create_env
 from predicators.envs.spot_env import HANDEMPTY_GRIPPER_THRESHOLD, \
     SpotCubeEnv, SpotRearrangementEnv, _drafting_table_type, \
-    _PartialPerceptionState, _SpotObservation, in_general_view_classifier
+    _PartialPerceptionState, _SpotObservation, in_general_view_classifier, _ALL_TYPES
 from predicators.perception.base_perceiver import BasePerceiver
 from predicators.settings import CFG
 from predicators.spot_utils.utils import _container_type, \
@@ -23,6 +23,7 @@
 from predicators.structs import Action, DefaultState, EnvironmentTask, \
     GoalDescription, GroundAtom, Object, Observation, Predicate, \
     SpotActionExtraInfo, State, Task, Video
+from predicators.spot_utils.perception.object_perception import vlm
 
 
 class SpotPerceiver(BasePerceiver):
@@ -311,7 +312,6 @@ def _create_state(self) -> State:
 
     def _create_goal(self, state: State,
                      goal_description: GoalDescription) -> Set[GroundAtom]:
-        del state  # not used
         # Unfortunate hack to deal with the fact that the state is actually
         # not yet set. Hopefully one day other cleanups will enable cleaning.
         assert self._curr_env is not None
@@ -550,30 +550,34 @@ def _create_goal(self, state: State,
         if goal_description == "know container not as empty":
             # container = Object("container", _container_type)
             cup = Object("cup", _container_type)
-            ContainingWaterKnown = pred_name_to_pred["ContainingWaterKnown"]
-            ContainingWater = pred_name_to_pred["ContainingWater"]
+            ContainingFoodKnown = pred_name_to_pred["ContainingFoodKnown"]
+            ContainingFood = pred_name_to_pred["ContainingFood"]
             return {
-                GroundAtom(ContainingWaterKnown, [cup]),
-                GroundAtom(ContainingWater, [cup]),
+                GroundAtom(ContainingFoodKnown, [cup]),
+                GroundAtom(ContainingFood, [cup]),
             }
         if goal_description == "place empty cup into the box":
             cup = Object("cup", _container_type)
             plastic_bin = Object("plastic_bin", _container_type)
-            ContainingWaterKnown = pred_name_to_pred["ContainingWaterKnown"]
-            NotContainingWater = pred_name_to_pred["NotContainingWater"]
+            ContainingFoodKnown = pred_name_to_pred["ContainingFoodKnown"]
+            NotContainingFood = pred_name_to_pred["NotContainingFood"]
             Inside = pred_name_to_pred["Inside"]
-            return {
-                #GroundAtom(ContainingWaterKnown, [cup]),
-                #GroundAtom(NotContainingWater, [cup]),
-                GroundAtom(Inside, [cup, plastic_bin]),
-            }
+            if state.data == {}:
+                return {
+                    #GroundAtom(ContainingFoodKnown, [cup]),
+                    #GroundAtom(NotContainingFood, [cup]),
+                    GroundAtom(Inside, [cup, plastic_bin]),
+                }
+            object_name_to_object = {}
+            self._parse_vlm_goal_from_state(state, goal_description, object_name_to_object)
+            import ipdb; ipdb.set_trace()
         if goal_description == "know container as empty":
             cup = Object("cup", _container_type)
-            ContainingWaterKnown = pred_name_to_pred["ContainingWaterKnown"]
-            NotContainingWater = pred_name_to_pred["NotContainingWater"]
+            ContainingFoodKnown = pred_name_to_pred["ContainingFoodKnown"]
+            NotContainingFood = pred_name_to_pred["NotContainingFood"]
             return {
-                GroundAtom(ContainingWaterKnown, [cup]),
-                GroundAtom(NotContainingWater, [cup]),
+                GroundAtom(ContainingFoodKnown, [cup]),
+                GroundAtom(NotContainingFood, [cup]),
             }
         if goal_description == "put the cup into the plastic bin on floor":
             cup = Object("cup", _container_type)
@@ -695,3 +699,45 @@ def render_mental_images(self, observation: Observation,
         logging.info(f"Wrote out to {outfile}")
         plt.close()
         return [img]
+
+    def _get_language_goal_prompt_prefix(self,
+                                         object_names: Collection[str]) -> str:
+        # pylint:disable=line-too-long
+        available_predicates = ", ".join([p for p in sorted([pred.pretty_str()[1] for pred in self._curr_env.goal_predicates])])
+        available_object_types = ", ".join(sorted([t.name for t in _ALL_TYPES]))
+        # We could extract the object names, but this is simpler.
+        prompt = f"""# The available predicates are: {available_predicates}
+# The available object types are: {available_object_types}
+# Use the available predicates and object types to convert natural language goals into JSON goals.
+        
+# I want a sandwich with a patty, cheese, and lettuce, and get ready to give me a glass of milk.
+{{"Holding": [["robot", "milk"]], "On": [["bread0", "board"], ["bread1", "lettuce0"], ["lettuce0", "cheese0"], ["cheese0", "patty0"], ["patty0", "bread0"]]}}
+"""
+        return prompt
+
+    def _parse_vlm_goal_from_state(
+            self, state: State, language_goal: str,
+            id_to_obj: Dict[str, Object]) -> Set[GroundAtom]:
+        """Helper for parsing language-based goals from JSON task specs."""
+        object_names = set(id_to_obj)
+        prompt_prefix = self._get_language_goal_prompt_prefix(object_names)
+        prompt = prompt_prefix + f"\n# {language_goal}"
+        import ipdb; ipdb.set_trace()
+        image_list = [
+            PIL.Image.fromarray(v.rotated_rgb) for _, v in rgbds.items()
+        ]
+        responses = vlm.sample_completions(
+                prompt=prompt,
+                imgs=image_list,
+                temperature=0.1,
+                seed=int(time.time()),
+                num_completions=1,
+            )
+        response = responses[0]
+        import ipdb; ipdb.set_trace()
+        # Currently assumes that the LLM is perfect. In the future, will need
+        # to handle various errors and perhaps query the LLM for multiple
+        # responses until we find one that can be parsed.
+        goal_spec = json.loads(response)
+        return self._curr_env._parse_goal_from_json(goal_spec, id_to_obj)
+
diff --git a/predicators/spot_utils/perception/object_perception.py b/predicators/spot_utils/perception/object_perception.py
@@ -216,6 +216,7 @@ def query_vlm(full_prompt, image_list):
         votes = [result[i] for result in results]
         final_results.append(votes.count(True) > votes.count(False))
 
+    import ipdb; ipdb.set_trace()
     return final_results
 
 

diff --git a/predicators/spot_utils/utils.py b/predicators/spot_utils/utils.py
@@ -47,6 +47,11 @@
 DEFAULT_HAND_POST_DUMP_POSE = math_helpers.SE3Pose(
     x=0.80, y=0.0, z=0.25, rot=math_helpers.Quat.from_pitch(np.pi / 2))
 DEFAULT_SIM_ROBOT_Z_OFFSET = 0.6
+DEFAULT_HAND_LOOK_FROM_TOP = math_helpers.SE3Pose(x=0.80,
+                                         y=0.0,
+                                         z=0.75,
+                                         rot=math_helpers.Quat.from_pitch(
+                                             5*np.pi / 12))
 
 
 # Spot-specific types.