Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…Systems/predicators into merge-with-upstream-vlm
  • Loading branch information
nkumar-bdai committed May 21, 2024
2 parents a8151a2 + 415c55c commit 5a171c7
Show file tree
Hide file tree
Showing 86 changed files with 3,560 additions and 609 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
__pycache__
*.pyc
.DS_Store
.vscode
*.egg-info
*.pkl
*~
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ This approach is primarily useful for inventing predicates via program synthesis

An example command for running the approach from that paper is:
```
python predicators/main.py --env cover --approach grammar_search_invention --excluded_predicates all --num_train_tasks 50
python predicators/main.py --env cover --approach grammar_search_invention --excluded_predicates all --num_train_tasks 50 --seed 0
```

Last updated: 04/28/2024
Expand Down Expand Up @@ -64,9 +64,9 @@ apple_coring__vlm_demos__456__2
### Running predicate invention using these image demos
To use the Gemini VLM, you need to set the `GOOGLE_API_KEY` environment variable in your terminal. You can make/get an API key [here](https://aistudio.google.com/app/apikey).

Example command: `python predicators/main.py --env apple_coring --seed 456 --approach grammar_search_invention --excluded_predicates all --num_train_tasks 1 --num_test_tasks 0 --offline_data_method img_demos --vlm_trajs_folder_name apple_coring__vlm_demos__456__1`
Example command: `python predicators/main.py --env apple_coring --seed 456 --approach grammar_search_invention --excluded_predicates all --num_train_tasks 1 --num_test_tasks 0 --offline_data_method saved_vlm_img_demos_folder --vlm_trajs_folder_name apple_coring__vlm_demos__456__1`

The important flags here are the `--offline_data_method img_demos` and the `--vlm_trajs_folder_name apple_coring__vlm_demos__456__1`. The latter should point to the folder housing the demonstration set of interest!
The important flags here are the `--offline_data_method saved_vlm_img_demos_folder` and the `--vlm_trajs_folder_name apple_coring__vlm_demos__456__1`. The latter should point to the folder housing the demonstration set of interest!

Note that VLM responses are always cached, so if you run the command on a demonstration set and then rerun it, it should be much faster since it's using cached responses!

Expand Down
8 changes: 6 additions & 2 deletions predicators/approaches/grammar_search_invention_approach.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,8 @@ def _quantified_types(self) -> List[Type]:
]

def _classify_object(self, s: State, obj: Object) -> bool:
assert obj.type == self.body.types[self.free_variable_idx]
# assert obj.type == self.body.types[self.free_variable_idx]
assert obj.is_instance(self.body.types[self.free_variable_idx])
for o in utils.get_object_combinations(set(s), self._quantified_types):
o_lst = list(o)
o_lst.insert(self.free_variable_idx, obj)
Expand Down Expand Up @@ -995,7 +996,10 @@ def _parse_atom_dataset_from_annotated_dataset(
return (atom_dataset, candidates)

def learn_from_offline_dataset(self, dataset: Dataset) -> None:
if not CFG.offline_data_method == "demo+labelled_atoms":
if not CFG.offline_data_method in [
"demo+labelled_atoms", "saved_vlm_img_demos_folder",
"demo_with_vlm_imgs"
]:
atom_dataset, candidates = self._generate_atom_dataset_via_grammar(
dataset)
else:
Expand Down
24 changes: 24 additions & 0 deletions predicators/approaches/nsrt_learning_approach.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,30 @@ def _learn_nsrts(self, trajectories: List[LowLevelTrajectory],
ground_atom_dataset = utils.create_ground_atom_dataset(
trajectories, self._get_current_predicates())
utils.save_ground_atom_dataset(ground_atom_dataset, dataset_fname)
elif CFG.offline_data_method in [
"demo+labelled_atoms", "saved_vlm_img_demos_folder",
"demo_with_vlm_imgs"
]:
# In this case, the annotations are basically ground atoms!
# We can use these to make GroundAtomTrajectories.
assert annotations is not None
assert len(annotations) == len(trajectories)
ground_atom_dataset = []
annotations_with_only_selected_preds = []
selected_preds = self._get_current_predicates()
for atoms_traj in annotations:
curr_selected_preds_atoms_traj = []
for atoms_set in atoms_traj:
curr_selected_preds_atoms_set = set(
atom for atom in atoms_set
if atom.predicate in selected_preds)
curr_selected_preds_atoms_traj.append(
curr_selected_preds_atoms_set)
annotations_with_only_selected_preds.append(
curr_selected_preds_atoms_traj)
for ll_traj, atoms in zip(trajectories,
annotations_with_only_selected_preds):
ground_atom_dataset.append((ll_traj, atoms))
self._nsrts, self._segmented_trajs, self._seg_to_nsrt = \
learn_nsrts_from_data(trajectories,
self._train_tasks,
Expand Down
1 change: 1 addition & 0 deletions predicators/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def create_arg_parser(env_required: bool = True,
parser.add_argument("--make_failure_videos", action="store_true")
parser.add_argument("--make_interaction_videos", action="store_true")
parser.add_argument("--make_demo_videos", action="store_true")
parser.add_argument("--make_demo_images", action="store_true")
parser.add_argument("--make_cogman_videos", action="store_true")
parser.add_argument("--load_approach", action="store_true")
# In the case of online learning approaches, load_approach by itself
Expand Down
118 changes: 116 additions & 2 deletions predicators/cogman.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,20 @@
The name "CogMan" is due to Leslie Kaelbling.
"""
import logging
from typing import Callable, List, Optional, Sequence, Set
import time
from collections import defaultdict
from typing import Callable, List, Optional, Sequence, Set, Tuple
from typing import Type as TypingType

from predicators import utils
from predicators.approaches import BaseApproach
from predicators.envs import BaseEnv
from predicators.execution_monitoring import BaseExecutionMonitor
from predicators.perception import BasePerceiver
from predicators.settings import CFG
from predicators.structs import Action, Dataset, EnvironmentTask, GroundAtom, \
InteractionRequest, InteractionResult, LowLevelTrajectory, Metrics, \
Observation, State, Task, Video
Observation, State, Task, Video, _Option


class CogMan:
Expand Down Expand Up @@ -177,3 +181,113 @@ def _reset_policy(self, task: Task) -> None:
else:
self._current_policy = self._approach.solve(task,
timeout=CFG.timeout)


def run_episode_and_get_observations(
cogman: CogMan,
env: BaseEnv,
train_or_test: str,
task_idx: int,
max_num_steps: int,
do_env_reset: bool = True,
terminate_on_goal_reached: bool = True,
exceptions_to_break_on: Optional[Set[TypingType[Exception]]] = None,
monitor: Optional[utils.LoggingMonitor] = None
) -> Tuple[Tuple[List[Observation], List[Action]], bool, Metrics]:
"""Execute cogman starting from the initial state of a train or test task
in the environment.
Note that the environment and cogman internal states are updated.
Terminates when any of these conditions hold: (1) cogman.step
returns None, indicating termination (2) max_num_steps is reached
(3) cogman or env raise an exception of type in
exceptions_to_break_on (4) terminate_on_goal_reached is True and the
env goal is reached. Note that in the case where the exception is
raised in step, we exclude the last action from the returned
trajectory to maintain the invariant that the trajectory states are
of length one greater than the actions. Ideally, this method would
live in utils.py, but that results in import errors with this file.
So we keep it here for now. It might be moved in the future.
"""
if do_env_reset:
env.reset(train_or_test, task_idx)
if monitor is not None:
monitor.reset(train_or_test, task_idx)
obs = env.get_observation()
observations = [obs]
actions: List[Action] = []
curr_option: Optional[_Option] = None
metrics: Metrics = defaultdict(float)
metrics["policy_call_time"] = 0.0
metrics["num_options_executed"] = 0.0
exception_raised_in_step = False
if not (terminate_on_goal_reached and env.goal_reached()):
for _ in range(max_num_steps):
monitor_observed = False
exception_raised_in_step = False
try:
start_time = time.perf_counter()
act = cogman.step(obs)
metrics["policy_call_time"] += time.perf_counter() - start_time
if act is None:
break
if act.has_option() and act.get_option() != curr_option:
curr_option = act.get_option()
metrics["num_options_executed"] += 1
# Note: it's important to call monitor.observe() before
# env.step(), because the monitor may, for example, call
# env.render(), which outputs images of the current env
# state. If we instead called env.step() first, we would
# mistakenly record images of the next time step instead of
# the current one.
if monitor is not None:
monitor.observe(obs, act)
monitor_observed = True
obs = env.step(act)
actions.append(act)
observations.append(obs)
except Exception as e:
if exceptions_to_break_on is not None and \
any(issubclass(type(e), c) for c in exceptions_to_break_on):
if monitor_observed:
exception_raised_in_step = True
break
if monitor is not None and not monitor_observed:
monitor.observe(obs, None)
raise e
if terminate_on_goal_reached and env.goal_reached():
break
if monitor is not None and not exception_raised_in_step:
monitor.observe(obs, None)
cogman.finish_episode(obs)
traj = (observations, actions)
solved = env.goal_reached()
return traj, solved, metrics


def run_episode_and_get_states(
cogman: CogMan,
env: BaseEnv,
train_or_test: str,
task_idx: int,
max_num_steps: int,
do_env_reset: bool = True,
terminate_on_goal_reached: bool = True,
exceptions_to_break_on: Optional[Set[TypingType[Exception]]] = None,
monitor: Optional[utils.LoggingMonitor] = None
) -> Tuple[LowLevelTrajectory, bool, Metrics]:
"""Execute cogman starting from the initial state of a train or test task
in the environment.
Return a trajectory involving States (which come from running a
perceiver on observations). Having states instead of observations is
useful for downstream learning (e.g. predicates, operators,
samplers, etc.) Note that the only difference between this and the
above run_episode_and_get_observations is that this method returns a
trajectory of states instead of one of observations.
"""
_, solved, metrics = run_episode_and_get_observations(
cogman, env, train_or_test, task_idx, max_num_steps, do_env_reset,
terminate_on_goal_reached, exceptions_to_break_on, monitor)
ll_traj = cogman.get_current_history()
return ll_traj, solved, metrics
42 changes: 35 additions & 7 deletions predicators/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,18 @@
from predicators.datasets.demo_only import create_demo_data
from predicators.datasets.demo_replay import create_demo_replay_data
from predicators.datasets.generate_atom_trajs_with_vlm import \
create_ground_atom_data_from_img_trajs, \
create_ground_atom_data_from_labelled_txt
create_ground_atom_data_from_generated_demos, \
create_ground_atom_data_from_labelled_txt, \
create_ground_atom_data_from_saved_img_trajs
from predicators.datasets.ground_atom_data import create_ground_atom_data
from predicators.envs import BaseEnv
from predicators.settings import CFG
from predicators.structs import Dataset, ParameterizedOption, Task
from predicators.structs import Dataset, ParameterizedOption, Predicate, Task


def create_dataset(env: BaseEnv, train_tasks: List[Task],
known_options: Set[ParameterizedOption]) -> Dataset:
known_options: Set[ParameterizedOption],
known_predicates: Set[Predicate]) -> Dataset:
"""Create offline datasets for training, given a set of training tasks for
an environment.
Expand All @@ -43,16 +45,42 @@ def create_dataset(env: BaseEnv, train_tasks: List[Task],
n = int(CFG.teacher_dataset_num_examples)
assert n >= 1, "Must have at least 1 example of each predicate"
return create_ground_atom_data(env, base_dataset, excluded_preds, n)
if CFG.offline_data_method == "demo_with_vlm_imgs": # pragma: no cover # pylint:disable=line-too-long
# NOTE: this below method is tested separately; it's just that testing
# it by calling the above function is painful because a VLM is
# instantiated and called from inside this method, but when testing,
# we want to instantiate our own 'dummy' VLM.
# NOTE: this data generation method is currently not compatible with
# option learning because it will modify dataset trajectories to
# remove a number of intermediate states when an option was being
# executed. Thus, we assert this before doing anything further.
assert CFG.option_learner == "no_learning", \
("offline data method demo_with_vlm_imgs only compatible with the"
"'no_learning' option learner.")
# First, we call create_demo_data to create a dataset.
demo_data = create_demo_data(env,
train_tasks,
known_options,
annotate_with_gt_ops=False)
assert len(demo_data.trajectories) == len(train_tasks), (
"Cannot run "
"VLM-based predicate invention if we don't have one demo per "
"training task; ensure there are no failures in demonstration "
"generation.")
# Second, we add annotations to these trajectories by leveraging
# a VLM.
return create_ground_atom_data_from_generated_demos(
demo_data, env, known_predicates, train_tasks)
if CFG.offline_data_method == "demo+labelled_atoms":
return create_ground_atom_data_from_labelled_txt(
env, train_tasks, known_options)
if CFG.offline_data_method == "img_demos": # pragma: no cover.
if CFG.offline_data_method == "saved_vlm_img_demos_folder": # pragma: no cover # pylint:disable=line-too-long
# NOTE: this below method is tested separately; it's just that testing
# it by calling the above function is painful because a VLM is
# instantiated and called from inside this method, but when testing,
# we want to instantiate our own 'dummy' VLM.
return create_ground_atom_data_from_img_trajs(env, train_tasks,
known_options)
return create_ground_atom_data_from_saved_img_trajs(
env, train_tasks, known_predicates, known_options)
if CFG.offline_data_method == "empty":
return Dataset([])
raise NotImplementedError("Unrecognized dataset method.")
Loading

0 comments on commit 5a171c7

Please sign in to comment.