Skip to content

Commit

Permalink
Get sweeping demo mostly working.
Browse files Browse the repository at this point in the history
  • Loading branch information
ashay-bdai committed Sep 19, 2024
1 parent ba8d070 commit 29242a8
Show file tree
Hide file tree
Showing 8 changed files with 120 additions and 50 deletions.
9 changes: 6 additions & 3 deletions predicators/approaches/bilevel_planning_approach.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,12 @@ def _solve(self, task: Task, timeout: int) -> Callable[[State], Action]:
self._last_atoms_seq = atoms_seq
policy = utils.nsrt_plan_to_greedy_policy(nsrt_plan, task.goal,
self._rng)
logging.debug("Current Task Plan:")
for act in nsrt_plan:
logging.debug(act)
# logging.debug("Current Task Plan:")
# for act in nsrt_plan:
# logging.debug(act)
logging.info("Current Task Plan:")
for n in nsrt_plan:
logging.info(f"{n.name}, {n.objects}")

# Run full bilevel planning.
else:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,23 @@
You are a vision system for a robot. You are provided with two images corresponding to the states before and after a particular skill is executed. You are given a list of predicates below, and you are given the values of these predicates in the image before the skill is executed. Your job is to output the values of the following predicates in the image after the skill is executed. Pay careful attention to the visual changes between the two images to figure out which predicates change and which predicates do not change. For the predicates that change, list these separately at the end of your response. Note that in some scenes, there might be no changes. First, output a description of what changes you expect to happen based on the skill that was just run, explicitly noting the skill that was run. Second, output a description of what visual changes you see happen between the before and after images, looking specifically at the objects involved in the skill's arguments, noting what objects these are. Next, output each predicate value in the after image as a bulleted list with each predicate and value on a different line. For each predicate value, provide an explanation as to why you labelled this predicate as having this particular value. Use the format: <predicate>: <truth_value>. <explanation>.
You are a vision system for a robot. You are provided with two images corresponding to the states before and after a particular skill is executed. You are given a list of predicates below, and you are given the values of these predicates in the image before the skill is executed. Your job is to output the values of the following predicates in the image after the skill is executed. Pay careful attention to the visual changes between the two images to figure out which predicates change and which predicates do not change. Note that some or all of the predicates don't necessary have to change. First, output a description of what changes you expect to happen based on the skill that was just run, explicitly noting the skill that was run. Second, output a description of what visual changes you see happen between the before and after images, looking specifically at the objects involved in the skill's arguments, noting what objects these are. Next, output each predicate value in the after image as a bulleted list (use '*' for the bullets) with each predicate and value on a different line. Ensure there is a period ('.') after the truth value of the predicate. For each predicate value, provide an explanation as to why you labelled this predicate as having this particular value. Use the format: `* <predicate>: <truth_value>. <explanation>`. When labeling the value of a predicate, if you don't see the objects involved in that predicate, retain its truth value from the previous timestep.

The current task plan involves the following actions in sequence:
Pick1, (robot:robot, dustpan:dustpan)
PlaceNextTo, (robot:robot, dustpan:dustpan, wrappers:wrappers)
Pick2, (robot:robot, broom:broom)
Sweep, (robot:robot, broom:broom, wrappers:wrappers, dustpan:dustpan)
PlaceOnFloor, (robot:robot, broom:broom)
Pick1, (robot:robot, dustpan:dustpan)

Your response should have three sections. Here is an outline of what your response should look like:
[START OULTLINE]
# Expected changes based on the executed skill
[insert your analysis on the expected changes you will see based on the skill that was executed]

# Visual changes observed between the images
[insert your analysis on the visual changes observed between the images]

# Predicate values in the after image
[insert your bulleted list of `* <predicate>: <truth value>. <explanation>`]
[END OUTLINE]

Predicates:
35 changes: 18 additions & 17 deletions predicators/envs/spot_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
from predicators.spot_utils.spot_localization import SpotLocalizer
from predicators.spot_utils.utils import _base_object_type, _container_type, \
_immovable_object_type, _movable_object_type, _robot_type, \
_broom_type, _dustpan_type, _wrappers_type, \
construct_state_given_pbrspot, get_allowed_map_regions, \
get_graph_nav_dir, get_robot_gripper_open_percentage, get_spot_home_pose, \
load_spot_metadata, object_to_top_down_geom, update_pbrspot_given_state, \
Expand Down Expand Up @@ -1517,11 +1518,11 @@ def _get_vlm_query_str(pred_name: str, objects: Sequence[Object]) -> str:
# long = "Is the dustpan oriented such that a single sweeping motion with a broom would move the mess into the dustpan?"
# long = "Touching(dustpan, mess)"
_Touching = utils.create_vlm_predicate(
"Touching", [_movable_object_type, _movable_object_type],
lambda o: _get_vlm_query_str("Touching(dustpan, mess)", o))
"Touching", [_dustpan_type, _wrappers_type],
lambda o: _get_vlm_query_str("Touching", o))
_Inside = utils.create_vlm_predicate(
"Inside", [_movable_object_type, _movable_object_type],
lambda o: _get_vlm_query_str("Inside(mess, dustpan)", o))
"Inside", [_wrappers_type, _dustpan_type],
lambda o: _get_vlm_query_str("Inside", o))

_ALL_PREDICATES = {
_NEq, _On, _TopAbove, _NotInsideAnyContainer, _FitsInXY,
Expand Down Expand Up @@ -2487,8 +2488,8 @@ def _detection_id_to_obj(self) -> Dict[ObjectDetectionID, Object]:
# Object("chair", _movable_object_type),
# Object("bowl", _movable_object_type),
# Object("table", _movable_object_type),
Object("wrappers", _movable_object_type),
Object("dustpan", _movable_object_type),
Object("wrappers", _wrappers_type),
Object("dustpan", _dustpan_type),
}
for o in objects:
detection_id = LanguageObjectDetectionID(o.name)
Expand Down Expand Up @@ -2536,7 +2537,7 @@ def _create_operators(self) -> Iterator[STRIPSOperator]:
##########################################3
# Pick(robot, dustpan)
robot = Variable("?robot", _robot_type)
dustpan = Variable("?dustpan", _movable_object_type)
dustpan = Variable("?dustpan", _dustpan_type)
parameters = [robot, dustpan]
preconds: Set[LiftedAtom] = {
LiftedAtom(_HandEmpty, [robot]),
Expand All @@ -2553,8 +2554,8 @@ def _create_operators(self) -> Iterator[STRIPSOperator]:

# Place(robot, dustpan, mess)
robot = Variable("?robot", _robot_type)
dustpan = Variable("?dustpan", _movable_object_type)
mess = Variable("?mess", _movable_object_type)
dustpan = Variable("?dustpan", _dustpan_type)
mess = Variable("?mess", _wrappers_type)
parameters = [robot, dustpan, mess]
preconds: Set[LiftedAtom] = {LiftedAtom(_Holding, [robot, dustpan])}
add_effs: Set[LiftedAtom] = {
Expand All @@ -2564,12 +2565,12 @@ def _create_operators(self) -> Iterator[STRIPSOperator]:
}
del_effs: Set[LiftedAtom] = {LiftedAtom(_Holding, [robot, dustpan])}
ignore_effs: Set[LiftedAtom] = set()
yield STRIPSOperator("Place1", parameters, preconds, add_effs, del_effs,
yield STRIPSOperator("PlaceNextTo", parameters, preconds, add_effs, del_effs,
ignore_effs)

# Pick(robot, broom)
robot = Variable("?robot", _robot_type)
broom = Variable("?broom", _movable_object_type)
broom = Variable("?broom", _broom_type)
parameters = [robot, broom]
preconds: Set[LiftedAtom] = {
LiftedAtom(_HandEmpty, [robot]),
Expand All @@ -2586,9 +2587,9 @@ def _create_operators(self) -> Iterator[STRIPSOperator]:

# Sweep(robot, broom, mess, dustpan)
robot = Variable("?robot", _robot_type)
broom = Variable("?broom", _movable_object_type)
mess = Variable("?mess", _movable_object_type)
dustpan = Variable("?dustpan", _movable_object_type)
broom = Variable("?broom", _broom_type)
mess = Variable("?mess", _wrappers_type)
dustpan = Variable("?dustpan", _dustpan_type)
parameters = [robot, broom, mess, dustpan]
preconds: Set[LiftedAtom] = {
LiftedAtom(_Holding, [robot, broom]),
Expand All @@ -2603,7 +2604,7 @@ def _create_operators(self) -> Iterator[STRIPSOperator]:

# Place(robot, broom)
robot = Variable("?robot", _robot_type)
broom = Variable("?broom", _movable_object_type)
broom = Variable("?broom", _broom_type)
parameters = [robot, broom]
preconds: Set[LiftedAtom] = {LiftedAtom(_Holding, [robot, broom])}
add_effs: Set[LiftedAtom] = {
Expand All @@ -2612,7 +2613,7 @@ def _create_operators(self) -> Iterator[STRIPSOperator]:
}
del_effs: Set[LiftedAtom] = {LiftedAtom(_Holding, [robot, broom])}
ignore_effs: Set[LiftedAtom] = set()
yield STRIPSOperator("Place2", parameters, preconds, add_effs, del_effs,
yield STRIPSOperator("PlaceOnFloor", parameters, preconds, add_effs, del_effs,
ignore_effs)

# def _generate_train_tasks(self) -> List[EnvironmentTask]:
Expand Down Expand Up @@ -2645,7 +2646,7 @@ def __init__(self, use_gui: bool = True) -> None:
# Create constant objects.
self._spot_object = Object("robot", _robot_type)
op_to_name = {o.name: o for o in self._create_operators()}
op_names_to_keep = {"Pick1", "Place1", "Pick2", "Sweep", "Place2"}
op_names_to_keep = {"Pick1", "PlaceNextTo", "Pick2", "Sweep", "PlaceOnFloor"}
self._strips_operators = {op_to_name[o] for o in op_names_to_keep}
self._train_tasks = []
self._test_tasks = []
Expand Down
4 changes: 2 additions & 2 deletions predicators/ground_truth_models/spot_env/nsrts.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,10 +322,10 @@ def get_nsrts(env_name: str, types: Dict[str, Type],
"DropNotPlaceableObject": utils.null_sampler,
"MoveToReadySweep": utils.null_sampler,
"Pick1": utils.null_sampler,
"Place1": utils.null_sampler,
"PlaceNextTo": utils.null_sampler,
"Pick2": utils.null_sampler,
"Sweep": utils.null_sampler,
"Place2": utils.null_sampler
"PlaceOnFloor": utils.null_sampler
}

# If we're doing proper bilevel planning with a simulator, then
Expand Down
8 changes: 4 additions & 4 deletions predicators/ground_truth_models/spot_env/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -958,10 +958,10 @@ def _teleop(robot: Robot, lease_client: LeaseClient):
"DropNotPlaceableObject": Box(0, 1, (0, )), # empty
"MoveToReadySweep": Box(0, 1, (0, )), # empty
"Pick1": Box(0, 1, (0, )), # empty
"Place1": Box(0, 1, (0, )), # empty
"PlaceNextTo": Box(0, 1, (0, )), # empty
"Pick2": Box(0, 1, (0, )), # empty
"Sweep": Box(0, 1, (0, )), # empty
"Place2": Box(0, 1, (0, )) # empty
"PlaceOnFloor": Box(0, 1, (0, )) # empty
}

# NOTE: the policies MUST be unique because they output actions with extra info
Expand All @@ -986,10 +986,10 @@ def _teleop(robot: Robot, lease_client: LeaseClient):
"DropNotPlaceableObject": _drop_not_placeable_object_policy,
"MoveToReadySweep": _move_to_ready_sweep_policy,
"Pick1": _teleop_policy,
"Place1": _teleop_policy,
"PlaceNextTo": _teleop_policy,
"Pick2": _teleop_policy,
"Sweep": _teleop_policy,
"Place2": _teleop_policy
"PlaceOnFloor": _teleop_policy
}


Expand Down
30 changes: 26 additions & 4 deletions predicators/perception/spot_perceiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
detect_objects, visualize_all_artifacts
from predicators.spot_utils.utils import _container_type, \
_immovable_object_type, _movable_object_type, _robot_type, \
_broom_type, _dustpan_type, _wrappers_type, \
get_allowed_map_regions, load_spot_metadata, object_to_top_down_geom
from predicators.structs import Action, DefaultState, EnvironmentTask, \
GoalDescription, GroundAtom, Object, Observation, Predicate, \
Expand Down Expand Up @@ -659,8 +660,8 @@ def _create_goal(self, state: State,
return goal
if goal_description == "put the mess in the dustpan":
robot = Object("robot", _robot_type)
dustpan = Object("dustpan", _movable_object_type)
wrappers = Object("wrappers", _movable_object_type)
dustpan = Object("dustpan", _dustpan_type)
wrappers = Object("wrappers", _wrappers_type)
goal = {
GroundAtom(Inside, [wrappers, dustpan]),
GroundAtom(Holding, [robot, dustpan])
Expand Down Expand Up @@ -809,8 +810,9 @@ def _create_state(self) -> State:
# table = Object("table", _movable_object_type)
# cup = Object("cup", _movable_object_type)
# pan = Object("pan", _container_type)
wrappers = Object("wrappers", _movable_object_type)
dustpan = Object("dustpan", _movable_object_type)
wrappers = Object("wrappers", _wrappers_type)
dustpan = Object("dustpan", _dustpan_type)
broom = Object("broom", _broom_type)
# bread = Object("bread", _movable_object_type)
# toaster = Object("toaster", _immovable_object_type)
# microwave = Object("microwave", _movable_object_type)
Expand Down Expand Up @@ -1000,6 +1002,26 @@ def _create_state(self) -> State:
"in_hand_view": 0,
"in_view": 1,
"is_sweeper": 0
},
broom: {
"x": 0,
"y": 0,
"z": 0,
"qw": 0,
"qx": 0,
"qy": 0,
"qz": 0,
"shape": 0,
"height": 0,
"width" : 0,
"length": 0,
"object_id": 2,
"placeable": 1,
"held": 0,
"lost": 0,
"in_hand_view": 0,
"in_view": 1,
"is_sweeper": 0
}
}
state_dict = {k: list(v.values()) for k, v in state_dict.items()}
Expand Down
9 changes: 9 additions & 0 deletions predicators/spot_utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,15 @@ class _Spot3DShape(Enum):
_container_type = Type("container",
list(_movable_object_type.feature_names),
parent=_movable_object_type)
_dustpan_type = Type("dustpan",
list(_movable_object_type.feature_names),
parent=_movable_object_type)
_broom_type = Type("broom",
list(_movable_object_type.feature_names),
parent=_movable_object_type)
_wrappers_type = Type("wrappers",
list(_movable_object_type.feature_names),
parent=_movable_object_type)


def get_collision_geoms_for_nav(state: State) -> List[_Geom2D]:
Expand Down
53 changes: 34 additions & 19 deletions predicators/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1489,6 +1489,7 @@ def _option_policy(state: State) -> _Option:
raise OptionExecutionFailure(
"Executing the NSRT failed to achieve the necessary atoms.")
cur_nsrt = nsrt_queue.pop(0)
logging.info(f"Running nsrt: {cur_nsrt.name}, {cur_nsrt.objects}")
cur_option = cur_nsrt.sample_option(state, goal, rng)
logging.debug(f"Using option {cur_option.name}{cur_option.objects} "
"from NSRT plan.")
Expand Down Expand Up @@ -2511,13 +2512,13 @@ def get_prompt_for_vlm_state_labelling(
"/predicators/datasets/vlm_input_data_prompts/atom_labelling/"
try:
with open(filepath_prefix +
CFG.grammar_search_vlm_atom_label_prompt_type + ".txt",
prompt_type + ".txt",
"r",
encoding="utf-8") as f:
prompt = f.read()
except FileNotFoundError:
raise ValueError("Unknown VLM prompting option " +
f"{CFG.grammar_search_vlm_atom_label_prompt_type}")
f"{prompt_type}")
# The prompt ends with a section for 'Predicates', so list these.
for atom_str in atoms_list:
prompt += f"\n{atom_str}"
Expand All @@ -2537,7 +2538,8 @@ def get_prompt_for_vlm_state_labelling(
# and beyond.
curr_prompt = prompt[:]
curr_prompt_imgs = [
imgs_timestep for imgs_timestep in imgs_history[-1]
imgs_history[-2][0],
imgs_history[-1][0]
]
if CFG.vlm_include_cropped_images:
if CFG.env in ["burger", "burger_no_move"]: # pragma: no cover
Expand All @@ -2553,12 +2555,13 @@ def get_prompt_for_vlm_state_labelling(
curr_prompt += "\n\nPredicate values in the first scene, " \
"before the skill was executed: \n"
curr_prompt += label_history[-1]
# import pdb; pdb.set_trace()
return (curr_prompt, curr_prompt_imgs)
else:
# NOTE: we rip out only the first image from each trajectory
# which is fine for most domains, but will be problematic for
# situations in which there is more than one image per state.
return (prompt, [imgs_history[-1][0]])
return (prompt, imgs_history[-1])


def query_vlm_for_atom_vals(
Expand Down Expand Up @@ -2606,7 +2609,6 @@ def query_vlm_for_atom_vals(
vlm_query_str, imgs = get_prompt_for_vlm_state_labelling(
CFG.vlm_test_time_atom_label_prompt_type, atom_queries_list,
label_history, images_history, [], skill_history)
import pdb; pdb.set_trace()
if vlm is None:
vlm = create_vlm_by_name(CFG.vlm_model_name) # pragma: no cover.
vlm_input_imgs = \
Expand All @@ -2618,20 +2620,33 @@ def query_vlm_for_atom_vals(
num_completions=1)
assert len(vlm_output) == 1
vlm_output_str = vlm_output[0]
print(f"VLM output: {vlm_output_str}")
all_vlm_responses = vlm_output_str.strip().split("\n")
# NOTE: this assumption is likely too brittle; if this is breaking, feel
# free to remove/adjust this and change the below parsing loop accordingly!
assert len(atom_queries_list) == len(all_vlm_responses)
for i, (atom_query, curr_vlm_output_line) in enumerate(
zip(atom_queries_list, all_vlm_responses)):
assert atom_query + ":" in curr_vlm_output_line
assert "." in curr_vlm_output_line
period_idx = curr_vlm_output_line.find(".")
# value = curr_vlm_output_line[len(atom_query + ":"):period_idx].lower().strip()
value = curr_vlm_output_line.split(': ')[-1].strip('.').lower()
if value == "true":
true_atoms.add(vlm_atoms[i])
print(f"VLM output: \n{vlm_output_str}")

# ALTERNATIVE WAY TO PARSE
if len(label_history) > 0:
truth_values = re.findall(r'\* (.*): (True|False)', vlm_output_str)
import pdb; pdb.set_trace()
for i, (atom_query, pred_label) in enumerate(zip(atom_queries_list, truth_values)):
pred, label = pred_label
assert pred in atom_query
label = label.lower()
if label == "true":
true_atoms.add(vlm_atoms[i])
else:
# import pdb; pdb.set_trace()
all_vlm_responses = vlm_output_str.strip().split("\n")
# NOTE: this assumption is likely too brittle; if this is breaking, feel
# free to remove/adjust this and change the below parsing loop accordingly!
assert len(atom_queries_list) == len(all_vlm_responses)
for i, (atom_query, curr_vlm_output_line) in enumerate(
zip(atom_queries_list, all_vlm_responses)):
assert atom_query + ":" in curr_vlm_output_line
assert "." in curr_vlm_output_line
period_idx = curr_vlm_output_line.find(".")
# value = curr_vlm_output_line[len(atom_query + ":"):period_idx].lower().strip()
value = curr_vlm_output_line.split(': ')[-1].strip('.').lower()
if value == "true":
true_atoms.add(vlm_atoms[i])

# breakpoint()
# Add the text of the VLM's response to the state, to be used in the future!
Expand Down

0 comments on commit 29242a8

Please sign in to comment.