Merge branch 'master' into add-text2position

bdaiinstitute · Jul 18, 2023 · 96d31a1 · 96d31a1
2 parents 1f6e2b2 + 9a9b61b
commit 96d31a1
Show file tree

Hide file tree

Showing 42 changed files with 1,316 additions and 397 deletions.
diff --git a/predicators/approaches/bilevel_planning_approach.py b/predicators/approaches/bilevel_planning_approach.py
@@ -150,7 +150,7 @@ def _save_metrics(self, metrics: Metrics, nsrts: Set[NSRT],
         for metric in [
                 "num_samples", "num_skeletons_optimized",
                 "num_failures_discovered", "num_nodes_expanded",
-                "num_nodes_created", "plan_length"
+                "num_nodes_created", "plan_length", "refinement_time"
         ]:
             self._metrics[f"total_{metric}"] += metrics[metric]
         self._metrics["total_num_nsrts"] += len(nsrts)

diff --git a/predicators/envs/cover.py b/predicators/envs/cover.py
@@ -996,6 +996,29 @@ def _Covers_holds(state: State, objects: Sequence[Object]) -> bool:
                (by - bh == 0)
 
 
+class CoverEnvPlaceHard(CoverEnv):
+    """A cover environment where the only thing that's hard is placing.
+    Specifically, there is only one block and one target, and the default grasp
+    sampler always picks up the block directly in the middle. The robot is
+    allowed to place anywhere, and the default sampler tries placing in a
+    region that's 2x bigger than the target, often missing the target. The only
+    thing that needs to be learned is how to place to correctly cover the
+    target.
+
+    This environment is specifically useful for testing various aspects
+    of different sampler learning approaches.
+    """
+    _allow_free_space_placing: ClassVar[bool] = True
+
+    @classmethod
+    def get_name(cls) -> str:
+        return "cover_place_hard"
+
+    def _get_hand_regions(self, state: State) -> List[Tuple[float, float]]:
+        # Allow placing anywhere!
+        return [(0.0, 1.0)]
+
+
 class BumpyCoverEnv(CoverEnvRegrasp):
     """A variation on the cover regrasp environment where some blocks are
     'bumpy', as indicated by a new feature of blocks.

diff --git a/predicators/envs/exit_garage.py b/predicators/envs/exit_garage.py
@@ -51,7 +51,7 @@ class ExitGarageEnv(BaseEnv):
     robot_starting_y: ClassVar[float] = 0.8
     obstacle_area_left_padding: ClassVar[float] = 0.4
     obstacle_area_right_padding: ClassVar[float] = 0.1
-    obstacle_area_vertical_padding: ClassVar[float] = 0.1
+    obstacle_area_vertical_padding: ClassVar[float] = 0.05
     car_starting_x: ClassVar[float] = 0.15
     car_starting_y: ClassVar[float] = 0.3
 
@@ -64,7 +64,7 @@ class ExitGarageEnv(BaseEnv):
     _robot_type = Type("robot", ["x", "y", "carrying"])  # carrying: bool
     _obstacle_type = Type("obstacle", ["x", "y", "carried"])  # carried: bool
     # Convenience type for storage area, storing number of obstacles in it
-    # This is used in the StoreObstacle option to calculate where to place the
+    # This is used in the ClearObstacle option to calculate where to place the
     # a new obstacle in the storage area.
     _storage_type = Type("storage", ["num_stored"])
 
@@ -73,12 +73,6 @@ def __init__(self, use_gui: bool = True) -> None:
         # Predicates
         self._CarHasExited = Predicate("CarHasExited", [self._car_type],
                                        self._CarHasExited_holds)
-        self._CarryingObstacle = Predicate(
-            "CarryingObstacle", [self._robot_type, self._obstacle_type],
-            self._CarryingObstacle_holds)
-        self._NotCarryingObstacle = Predicate("NotCarryingObstacle",
-                                              [self._robot_type],
-                                              self._NotCarryingObstacle_holds)
         self._ObstacleCleared = Predicate("ObstacleCleared",
                                           [self._obstacle_type],
                                           self._ObstacleCleared_holds)
@@ -150,8 +144,7 @@ def simulate(self, state: State, action: Action) -> State:
             else:
                 # Place the current obstacle if in storage area and there is
                 # no collision caused by doing so
-                if ry > 1.0 - self.storage_area_height and not \
-                        self._placed_object_collides(state, rx, ry):
+                if ry > 1.0 - self.storage_area_height:
                     next_state.set(carried_obstacle, "x", rx)
                     next_state.set(carried_obstacle, "y", ry)
                     next_state.set(carried_obstacle, "carried", 0)
@@ -179,9 +172,7 @@ def _generate_test_tasks(self) -> List[EnvironmentTask]:
     @property
     def predicates(self) -> Set[Predicate]:
         return {
-            self._CarHasExited, self._CarryingObstacle,
-            self._NotCarryingObstacle, self._ObstacleCleared,
-            self._ObstacleNotCleared
+            self._CarHasExited, self._ObstacleCleared, self._ObstacleNotCleared
         }
 
     @property
@@ -232,6 +223,7 @@ def render_state_plt(
         self._exit_geom.plot(ax, color=exit_color)
 
         # Draw obstacles
+        carried_obstacle_geom: Optional[utils.Circle] = None
         for obstacle in state.get_objects(self._obstacle_type):
             if state.get(obstacle, "carried") == 1:
                 # Obstacle is being carried, so draw it under the robot instead
@@ -240,11 +232,12 @@ def render_state_plt(
                 robot_y = state.get(self._robot, "y")
                 carried_obstacle_geom = utils.Circle(robot_x, robot_y,
                                                      self.obstacle_radius)
-                carried_obstacle_geom.plot(ax, color=carried_color)
             else:
                 # Obstacle is not being carried, just draw normally
                 obstacle_geom = self._object_to_geom(obstacle, state)
                 obstacle_geom.plot(ax, color=obstacle_color)
+        if carried_obstacle_geom:
+            carried_obstacle_geom.plot(ax, color=carried_color)
 
         # Draw robot
         robot_geom = self._object_to_geom(self._robot, state)
@@ -333,18 +326,6 @@ def _CarHasExited_holds(self, state: State,
         car_geom = self._object_to_geom(car, state)
         return car_geom.intersects(self._exit_geom)
 
-    def _CarryingObstacle_holds(self, state: State,
-                                objects: Sequence[Object]) -> bool:
-        robot, obstacle = objects
-        robot_carrying_something = state.get(robot, "carrying") == 1
-        obstacle_is_carried = state.get(obstacle, "carried") == 1
-        return robot_carrying_something and obstacle_is_carried
-
-    def _NotCarryingObstacle_holds(self, state: State,
-                                   objects: Sequence[Object]) -> bool:
-        robot, = objects
-        return state.get(robot, "carrying") == 0
-
     def _ObstacleCleared_holds(self, state: State,
                                objects: Sequence[Object]) -> bool:
         obstacle, = objects
@@ -398,12 +379,6 @@ def get_car_collision_object(cls, state: State) -> Optional[Object]:
                 return obstacle
         return None
 
-    @classmethod
-    def _placed_object_collides(cls, state: State, new_x: float,
-                                new_y: float) -> bool:
-        """Returns True if an obstacle placed at (new_x, new_y) would collide
-        with an existing obstacle in the storage area."""
-
     @classmethod
     def _robot_carrying_obstacle(cls, state: State) -> Optional[Object]:
         """If the robot is currently carrying an obstacle, return it; else

diff --git a/predicators/explorers/active_sampler_explorer.py b/predicators/explorers/active_sampler_explorer.py
@@ -5,15 +5,15 @@
 import os
 import re
 import time
-from typing import Callable, Dict, List, Optional, Set
+from typing import Callable, Dict, Iterator, List, Optional, Set
 
 import dill as pkl
 import numpy as np
 from gym.spaces import Box
 
 from predicators import utils
 from predicators.explorers.base_explorer import BaseExplorer
-from predicators.planning import run_task_plan_once
+from predicators.planning import PlanningFailure, run_task_plan_once
 from predicators.settings import CFG
 from predicators.structs import NSRT, ExplorationStrategy, GroundAtom, \
     NSRTSampler, ParameterizedOption, Predicate, State, Task, Type, \
@@ -116,25 +116,47 @@ def _option_policy(state: State) -> _Option:
             if current_policy is None:
                 # If the assigned goal hasn't yet been reached, try for it.
                 if not assigned_task_goal_reached:
-                    goal = assigned_task.goal
-                    logging.info(
-                        f"[Explorer] Pursuing assigned task goal: {goal}")
+                    logging.info("[Explorer] Pursuing assigned task goal")
+
+                    def generate_goals() -> Iterator[Set[GroundAtom]]:
+                        # Just a single goal.
+                        yield assigned_task.goal
+
                 # Otherwise, practice.
                 else:
-                    # If there are no ground NSRTs that we've tried so far,
-                    # just wait until we have tried to solve some task.
-                    if len(self._ground_op_hist) == 0:
-                        raise utils.OptionExecutionFailure(
-                            "No ground operators to practice yet")
-                    next_practice_nsrt = self._get_practice_ground_nsrt()
-                    logging.info("[Explorer] Pursuing NRST preconditions "
-                                 f"{next_practice_nsrt.name}"
-                                 f"{next_practice_nsrt.objects}")
-                    goal = next_practice_nsrt.preconditions
-                task = Task(state, goal)
-                logging.info(f"[Explorer] Replanning to {task.goal}")
-                current_policy = self._get_option_policy_for_task(task)
+                    logging.info("[Explorer] Pursuing NSRT preconditions")
+
+                    def generate_goals() -> Iterator[Set[GroundAtom]]:
+                        nonlocal next_practice_nsrt
+                        # Generate goals sorted by their descending score.
+                        for op in sorted(self._ground_op_hist,
+                                         key=self._score_ground_op,
+                                         reverse=True):
+                            nsrt = [
+                                n for n in self._nsrts if n.op == op.parent
+                            ][0]
+                            # NOTE: setting nonlocal variable.
+                            next_practice_nsrt = nsrt.ground(op.objects)
+                            yield next_practice_nsrt.preconditions
 
+                # Try to plan to each goal until a task plan is found.
+                for goal in generate_goals():
+                    task = Task(state, goal)
+                    logging.info(f"[Explorer] Replanning to {task.goal}")
+                    try:
+                        current_policy = self._get_option_policy_for_task(task)
+                    # Not covering this case because the intention of this
+                    # explorer is to be used in environments where any goal can
+                    # be reached from anywhere, but we still don't want to
+                    # crash in case that assumption is not met.
+                    except PlanningFailure:  # pragma: no cover
+                        continue
+                    logging.info("[Explorer] Plan found.")
+                    break
+                # Terminate early if no goal could be found.
+                else:
+                    logging.info("[Explorer] No reachable goal found.")
+                    raise utils.RequestActPolicyFailure("Failed to find goal.")
             # Query the current policy.
             assert current_policy is not None
             try:
@@ -224,12 +246,6 @@ def _update_ground_op_hist(self, state: State) -> None:
         with open(f"{prefix}{datapoint_id}.data", "wb") as f:
             pkl.dump(data, f)
 
-    def _get_practice_ground_nsrt(self) -> _GroundNSRT:
-        best_op = max(self._ground_op_hist, key=self._score_ground_op)
-        logging.info(f"[Explorer] Practicing {best_op.name}{best_op.objects}")
-        nsrt = [n for n in self._nsrts if n.op == best_op.parent][0]
-        return nsrt.ground(best_op.objects)
-
     def _get_option_policy_for_task(self,
                                     task: Task) -> Callable[[State], _Option]:
         # Run task planning and then greedily execute.

diff --git a/predicators/gnn/gnn.py b/predicators/gnn/gnn.py
@@ -45,18 +45,19 @@ def _aggregation_func(graph: Dict) -> Tuple[torch.Tensor, Array]:
 
 def _prepare_receiver_matrix(graph: Dict) -> torch.Tensor:
     num_nodes = graph['nodes'].size()[0]
-    columns = torch.arange(0, num_nodes).long()
+    columns = torch.arange(0, num_nodes).long().to(graph['nodes'].device)
     rec_m = graph['receivers'].view(-1)[:, None] == columns
     return rec_m.float()
 
 
 def _aggregate_globals(graph: Dict, global_node_idxs: Array,
                        global_edge_idxs: Array) -> torch.Tensor:
     num_graphs = graph['globals'].size()[0]
-    columns = torch.arange(0, num_graphs).long()
+    device = graph['globals'].device
+    columns = torch.arange(0, num_graphs).long().to(device)
 
-    node_idxs = torch.LongTensor(global_node_idxs)[:, None]
-    edge_idxs = torch.LongTensor(global_edge_idxs)[:, None]
+    node_idxs = torch.LongTensor(global_node_idxs)[:, None].to(device)
+    edge_idxs = torch.LongTensor(global_edge_idxs)[:, None].to(device)
 
     nodes_agg = torch.mm(graph['nodes'].t(),
                          (node_idxs == columns).float()).t()