Merge branch 'master' of https://github.com/Learning-and-Intelligent-…

…Systems/predicators into merge-with-upstream-vlm
bdaiinstitute · May 21, 2024 · 5a171c7 · 5a171c7
2 parents a8151a2 + 415c55c
commit 5a171c7
Show file tree

Hide file tree

Showing 86 changed files with 3,560 additions and 609 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,7 @@
 __pycache__
 *.pyc
 .DS_Store
+.vscode
 *.egg-info
 *.pkl
 *~

diff --git a/predicators/approaches/documentation/grammar_search_invention_approach.md b/predicators/approaches/documentation/grammar_search_invention_approach.md
@@ -4,7 +4,7 @@ This approach is primarily useful for inventing predicates via program synthesis
 
 An example command for running the approach from that paper is:
 ```
-python predicators/main.py --env cover --approach grammar_search_invention --excluded_predicates all --num_train_tasks 50
+python predicators/main.py --env cover --approach grammar_search_invention --excluded_predicates all --num_train_tasks 50 --seed 0
 ```
 
 Last updated: 04/28/2024
@@ -64,9 +64,9 @@ apple_coring__vlm_demos__456__2
 ### Running predicate invention using these image demos
 To use the Gemini VLM, you need to set the `GOOGLE_API_KEY` environment variable in your terminal. You can make/get an API key [here](https://aistudio.google.com/app/apikey).
 
-Example command: `python predicators/main.py --env apple_coring --seed 456 --approach grammar_search_invention --excluded_predicates all --num_train_tasks 1 --num_test_tasks 0 --offline_data_method img_demos --vlm_trajs_folder_name apple_coring__vlm_demos__456__1`
+Example command: `python predicators/main.py --env apple_coring --seed 456 --approach grammar_search_invention --excluded_predicates all --num_train_tasks 1 --num_test_tasks 0 --offline_data_method saved_vlm_img_demos_folder --vlm_trajs_folder_name apple_coring__vlm_demos__456__1`
 
-The important flags here are the `--offline_data_method img_demos` and the `--vlm_trajs_folder_name apple_coring__vlm_demos__456__1`. The latter should point to the folder housing the demonstration set of interest!
+The important flags here are the `--offline_data_method saved_vlm_img_demos_folder` and the `--vlm_trajs_folder_name apple_coring__vlm_demos__456__1`. The latter should point to the folder housing the demonstration set of interest!
 
 Note that VLM responses are always cached, so if you run the command on a demonstration set and then rerun it, it should be much faster since it's using cached responses!
 

diff --git a/predicators/approaches/grammar_search_invention_approach.py b/predicators/approaches/grammar_search_invention_approach.py
@@ -341,7 +341,8 @@ def _quantified_types(self) -> List[Type]:
         ]
 
     def _classify_object(self, s: State, obj: Object) -> bool:
-        assert obj.type == self.body.types[self.free_variable_idx]
+        # assert obj.type == self.body.types[self.free_variable_idx]
+        assert obj.is_instance(self.body.types[self.free_variable_idx])
         for o in utils.get_object_combinations(set(s), self._quantified_types):
             o_lst = list(o)
             o_lst.insert(self.free_variable_idx, obj)
@@ -995,7 +996,10 @@ def _parse_atom_dataset_from_annotated_dataset(
         return (atom_dataset, candidates)
 
     def learn_from_offline_dataset(self, dataset: Dataset) -> None:
-        if not CFG.offline_data_method == "demo+labelled_atoms":
+        if not CFG.offline_data_method in [
+                "demo+labelled_atoms", "saved_vlm_img_demos_folder",
+                "demo_with_vlm_imgs"
+        ]:
             atom_dataset, candidates = self._generate_atom_dataset_via_grammar(
                 dataset)
         else:

diff --git a/predicators/approaches/nsrt_learning_approach.py b/predicators/approaches/nsrt_learning_approach.py
@@ -74,6 +74,30 @@ def _learn_nsrts(self, trajectories: List[LowLevelTrajectory],
             ground_atom_dataset = utils.create_ground_atom_dataset(
                 trajectories, self._get_current_predicates())
             utils.save_ground_atom_dataset(ground_atom_dataset, dataset_fname)
+        elif CFG.offline_data_method in [
+                "demo+labelled_atoms", "saved_vlm_img_demos_folder",
+                "demo_with_vlm_imgs"
+        ]:
+            # In this case, the annotations are basically ground atoms!
+            # We can use these to make GroundAtomTrajectories.
+            assert annotations is not None
+            assert len(annotations) == len(trajectories)
+            ground_atom_dataset = []
+            annotations_with_only_selected_preds = []
+            selected_preds = self._get_current_predicates()
+            for atoms_traj in annotations:
+                curr_selected_preds_atoms_traj = []
+                for atoms_set in atoms_traj:
+                    curr_selected_preds_atoms_set = set(
+                        atom for atom in atoms_set
+                        if atom.predicate in selected_preds)
+                    curr_selected_preds_atoms_traj.append(
+                        curr_selected_preds_atoms_set)
+                annotations_with_only_selected_preds.append(
+                    curr_selected_preds_atoms_traj)
+            for ll_traj, atoms in zip(trajectories,
+                                      annotations_with_only_selected_preds):
+                ground_atom_dataset.append((ll_traj, atoms))
         self._nsrts, self._segmented_trajs, self._seg_to_nsrt = \
             learn_nsrts_from_data(trajectories,
                                   self._train_tasks,

diff --git a/predicators/args.py b/predicators/args.py
@@ -28,6 +28,7 @@ def create_arg_parser(env_required: bool = True,
     parser.add_argument("--make_failure_videos", action="store_true")
     parser.add_argument("--make_interaction_videos", action="store_true")
     parser.add_argument("--make_demo_videos", action="store_true")
+    parser.add_argument("--make_demo_images", action="store_true")
     parser.add_argument("--make_cogman_videos", action="store_true")
     parser.add_argument("--load_approach", action="store_true")
     # In the case of online learning approaches, load_approach by itself

diff --git a/predicators/cogman.py b/predicators/cogman.py
@@ -9,16 +9,20 @@
 The name "CogMan" is due to Leslie Kaelbling.
 """
 import logging
-from typing import Callable, List, Optional, Sequence, Set
+import time
+from collections import defaultdict
+from typing import Callable, List, Optional, Sequence, Set, Tuple
+from typing import Type as TypingType
 
 from predicators import utils
 from predicators.approaches import BaseApproach
+from predicators.envs import BaseEnv
 from predicators.execution_monitoring import BaseExecutionMonitor
 from predicators.perception import BasePerceiver
 from predicators.settings import CFG
 from predicators.structs import Action, Dataset, EnvironmentTask, GroundAtom, \
     InteractionRequest, InteractionResult, LowLevelTrajectory, Metrics, \
-    Observation, State, Task, Video
+    Observation, State, Task, Video, _Option
 
 
 class CogMan:
@@ -177,3 +181,113 @@ def _reset_policy(self, task: Task) -> None:
         else:
             self._current_policy = self._approach.solve(task,
                                                         timeout=CFG.timeout)
+
+
+def run_episode_and_get_observations(
+    cogman: CogMan,
+    env: BaseEnv,
+    train_or_test: str,
+    task_idx: int,
+    max_num_steps: int,
+    do_env_reset: bool = True,
+    terminate_on_goal_reached: bool = True,
+    exceptions_to_break_on: Optional[Set[TypingType[Exception]]] = None,
+    monitor: Optional[utils.LoggingMonitor] = None
+) -> Tuple[Tuple[List[Observation], List[Action]], bool, Metrics]:
+    """Execute cogman starting from the initial state of a train or test task
+    in the environment.
+
+    Note that the environment and cogman internal states are updated.
+    Terminates when any of these conditions hold: (1) cogman.step
+    returns None, indicating termination (2) max_num_steps is reached
+    (3) cogman or env raise an exception of type in
+    exceptions_to_break_on (4) terminate_on_goal_reached is True and the
+    env goal is reached. Note that in the case where the exception is
+    raised in step, we exclude the last action from the returned
+    trajectory to maintain the invariant that the trajectory states are
+    of length one greater than the actions. Ideally, this method would
+    live in utils.py, but that results in import errors with this file.
+    So we keep it here for now. It might be moved in the future.
+    """
+    if do_env_reset:
+        env.reset(train_or_test, task_idx)
+        if monitor is not None:
+            monitor.reset(train_or_test, task_idx)
+    obs = env.get_observation()
+    observations = [obs]
+    actions: List[Action] = []
+    curr_option: Optional[_Option] = None
+    metrics: Metrics = defaultdict(float)
+    metrics["policy_call_time"] = 0.0
+    metrics["num_options_executed"] = 0.0
+    exception_raised_in_step = False
+    if not (terminate_on_goal_reached and env.goal_reached()):
+        for _ in range(max_num_steps):
+            monitor_observed = False
+            exception_raised_in_step = False
+            try:
+                start_time = time.perf_counter()
+                act = cogman.step(obs)
+                metrics["policy_call_time"] += time.perf_counter() - start_time
+                if act is None:
+                    break
+                if act.has_option() and act.get_option() != curr_option:
+                    curr_option = act.get_option()
+                    metrics["num_options_executed"] += 1
+                # Note: it's important to call monitor.observe() before
+                # env.step(), because the monitor may, for example, call
+                # env.render(), which outputs images of the current env
+                # state. If we instead called env.step() first, we would
+                # mistakenly record images of the next time step instead of
+                # the current one.
+                if monitor is not None:
+                    monitor.observe(obs, act)
+                    monitor_observed = True
+                obs = env.step(act)
+                actions.append(act)
+                observations.append(obs)
+            except Exception as e:
+                if exceptions_to_break_on is not None and \
+                   any(issubclass(type(e), c) for c in exceptions_to_break_on):
+                    if monitor_observed:
+                        exception_raised_in_step = True
+                    break
+                if monitor is not None and not monitor_observed:
+                    monitor.observe(obs, None)
+                raise e
+            if terminate_on_goal_reached and env.goal_reached():
+                break
+    if monitor is not None and not exception_raised_in_step:
+        monitor.observe(obs, None)
+    cogman.finish_episode(obs)
+    traj = (observations, actions)
+    solved = env.goal_reached()
+    return traj, solved, metrics
+
+
+def run_episode_and_get_states(
+    cogman: CogMan,
+    env: BaseEnv,
+    train_or_test: str,
+    task_idx: int,
+    max_num_steps: int,
+    do_env_reset: bool = True,
+    terminate_on_goal_reached: bool = True,
+    exceptions_to_break_on: Optional[Set[TypingType[Exception]]] = None,
+    monitor: Optional[utils.LoggingMonitor] = None
+) -> Tuple[LowLevelTrajectory, bool, Metrics]:
+    """Execute cogman starting from the initial state of a train or test task
+    in the environment.
+
+    Return a trajectory involving States (which come from running a
+    perceiver on observations). Having states instead of observations is
+    useful for downstream learning (e.g. predicates, operators,
+    samplers, etc.) Note that the only difference between this and the
+    above run_episode_and_get_observations is that this method returns a
+    trajectory of states instead of one of observations.
+    """
+    _, solved, metrics = run_episode_and_get_observations(
+        cogman, env, train_or_test, task_idx, max_num_steps, do_env_reset,
+        terminate_on_goal_reached, exceptions_to_break_on, monitor)
+    ll_traj = cogman.get_current_history()
+    return ll_traj, solved, metrics
diff --git a/predicators/datasets/__init__.py b/predicators/datasets/__init__.py
@@ -7,16 +7,18 @@
 from predicators.datasets.demo_only import create_demo_data
 from predicators.datasets.demo_replay import create_demo_replay_data
 from predicators.datasets.generate_atom_trajs_with_vlm import \
-    create_ground_atom_data_from_img_trajs, \
-    create_ground_atom_data_from_labelled_txt
+    create_ground_atom_data_from_generated_demos, \
+    create_ground_atom_data_from_labelled_txt, \
+    create_ground_atom_data_from_saved_img_trajs
 from predicators.datasets.ground_atom_data import create_ground_atom_data
 from predicators.envs import BaseEnv
 from predicators.settings import CFG
-from predicators.structs import Dataset, ParameterizedOption, Task
+from predicators.structs import Dataset, ParameterizedOption, Predicate, Task
 
 
 def create_dataset(env: BaseEnv, train_tasks: List[Task],
-                   known_options: Set[ParameterizedOption]) -> Dataset:
+                   known_options: Set[ParameterizedOption],
+                   known_predicates: Set[Predicate]) -> Dataset:
     """Create offline datasets for training, given a set of training tasks for
     an environment.
 
@@ -43,16 +45,42 @@ def create_dataset(env: BaseEnv, train_tasks: List[Task],
         n = int(CFG.teacher_dataset_num_examples)
         assert n >= 1, "Must have at least 1 example of each predicate"
         return create_ground_atom_data(env, base_dataset, excluded_preds, n)
+    if CFG.offline_data_method == "demo_with_vlm_imgs":  # pragma: no cover  # pylint:disable=line-too-long
+        # NOTE: this below method is tested separately; it's just that testing
+        # it by calling the above function is painful because a VLM is
+        # instantiated and called from inside this method, but when testing,
+        # we want to instantiate our own 'dummy' VLM.
+        # NOTE: this data generation method is currently not compatible with
+        # option learning because it will modify dataset trajectories to
+        # remove a number of intermediate states when an option was being
+        # executed. Thus, we assert this before doing anything further.
+        assert CFG.option_learner == "no_learning", \
+            ("offline data method demo_with_vlm_imgs only compatible with the"
+            "'no_learning' option learner.")
+        # First, we call create_demo_data to create a dataset.
+        demo_data = create_demo_data(env,
+                                     train_tasks,
+                                     known_options,
+                                     annotate_with_gt_ops=False)
+        assert len(demo_data.trajectories) == len(train_tasks), (
+            "Cannot run "
+            "VLM-based predicate invention if we don't have one demo per "
+            "training task; ensure there are no failures in demonstration "
+            "generation.")
+        # Second, we add annotations to these trajectories by leveraging
+        # a VLM.
+        return create_ground_atom_data_from_generated_demos(
+            demo_data, env, known_predicates, train_tasks)
     if CFG.offline_data_method == "demo+labelled_atoms":
         return create_ground_atom_data_from_labelled_txt(
             env, train_tasks, known_options)
-    if CFG.offline_data_method == "img_demos":  # pragma: no cover.
+    if CFG.offline_data_method == "saved_vlm_img_demos_folder":  # pragma: no cover  # pylint:disable=line-too-long
         # NOTE: this below method is tested separately; it's just that testing
         # it by calling the above function is painful because a VLM is
         # instantiated and called from inside this method, but when testing,
         # we want to instantiate our own 'dummy' VLM.
-        return create_ground_atom_data_from_img_trajs(env, train_tasks,
-                                                      known_options)
+        return create_ground_atom_data_from_saved_img_trajs(
+            env, train_tasks, known_predicates, known_options)
     if CFG.offline_data_method == "empty":
         return Dataset([])
     raise NotImplementedError("Unrecognized dataset method.")