Source code for torchrl.data.map.tree
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
from __future__ import annotations
import weakref
from collections import deque
from typing import Any, Callable, Dict, List, Literal, Tuple
import torch
from tensordict import (
merge_tensordicts,
NestedKey,
TensorClass,
TensorDict,
TensorDictBase,
unravel_key,
)
from torchrl.data.map.tdstorage import TensorDictMap
from torchrl.data.map.utils import _plot_plotly_box, _plot_plotly_tree
from torchrl.data.replay_buffers.storages import ListStorage
from torchrl.data.tensor_specs import Composite
from torchrl.envs.common import EnvBase
[docs]class Tree(TensorClass["nocast"]):
"""Representation of a single MCTS (Monte Carlo Tree Search) Tree.
This class encapsulates the data and behavior of a tree node in an MCTS algorithm.
It includes attributes for storing information about the node, such as its children,
visit count, and rollout data. Methods are provided for traversing the tree,
computing statistics, and visualizing the tree structure.
It is somewhat indistinguishable from a node or a vertex - we use the term "Tree" when talking about
a node with children, "node" or "vertex" when talking about a place in the tree where a branching occurs.
A node in the tree is defined primarily by its ``hash`` value. Usually, a ``hash`` is determined by a unique
combination of state (or observation) and action. If one observation (found in the ``node`` attribute) has more than
one action associated, each branch will be stored in the ``subtree`` attribute as a stack of ``Tree`` instances.
Attributes:
count (int): The number of visits to this node.
index (torch.Tensor): Indices of the child nodes in the data map.
hash (torch.Tensor): A hash value for this node.
It may be the case that ``hash`` is ``None`` in the specific case where the root of the tree
has more than one action associated. In that case, each subtree branch will have a different action
associated and a hash correspoding to the ``(observation, action)`` pair.
node_id (int): A unique identifier for this node.
rollout (TensorDict): Rollout data following the observation encoded in this node, in a TED format.
If there are multiple actions taken at this node, subtrees are stored in the corresponding
entry. Rollouts can be reconstructed using the :meth:`~.rollout_from_path` method.
node (TensorDict): Data defining this node (e.g., observations) before the next branching.
Entries usually matches the ``in_keys`` in ``MCTSForest.node_map``.
subtree (Tree): A stack of subtrees produced when actions are taken.
num_children (int): The number of child nodes (read-only).
is_terminal (bool): whether the tree has children nodes (read-only).
If the tree is compact, ``is_terminal == True`` means that there are more than one child node in
``self.subtree``.
Methods:
__contains__: Whether another tree can be found in the tree.
vertices: Returns a dictionary containing all vertices in the tree. Keys must be paths, ids or hashes.
num_vertices: Returns the total number of vertices in the tree, with or without duplicates.
edges: Returns a list of edges in the tree.
valid_paths: Yields all valid paths in the tree.
max_length: Returns the maximum length of any path in the tree.
rollout_from_path: Reconstructs a rollout from a given path.
plot: Visualizes the tree using a specified backend and figure type.
get_node_by_id: returns the vertex given by its id in the tree.
get_node_by_hash: returns the vertex given by its hash in the forest.
"""
count: int | torch.Tensor = None
wins: int | torch.Tensor = None
index: torch.Tensor | None = None
# The hash is None if the node has more than one action associated
hash: int | None = None
node_id: int | None = None
# rollout following the observation encoded in node, in a TorchRL (TED) format
rollout: TensorDict | None = None
# The data specifying the node (typically an observation or a set of observations)
node_data: TensorDict | None = None
# Stack of subtrees. A subtree is produced when an action is taken.
subtree: "Tree" = None
# weakrefs to the parent(s) of the node
_parent: weakref.ref | List[weakref.ref] | None = None
# Specs: contains information such as action or observation keys and spaces.
# If present, they should be structured like env specs are:
# Composite(input_spec=Composite(full_state_spec=..., full_action_spec=...),
# output_spec=Composite(full_observation_spec=..., full_reward_spec=..., full_done_spec=...))
# where every leaf component is optional.
specs: Composite | None = None
[docs] @classmethod
def make_node(
cls,
data: TensorDictBase,
*,
device: torch.device | None = None,
batch_size: torch.Size | None = None,
specs: Composite | None = None,
) -> Tree:
"""Creates a new node given some data."""
if "next" in data.keys():
rollout = data
if not rollout.ndim:
rollout = rollout.unsqueeze(0)
subtree = TensorDict.lazy_stack([cls.make_node(data["next"][..., -1])])
else:
rollout = None
subtree = None
if device is None:
device = data.device
return cls(
count=torch.zeros(()),
wins=torch.zeros(()),
node=data.exclude("action", "next"),
rollout=rollout,
subtree=subtree,
device=device,
batch_size=batch_size,
)
# Specs
@property
def full_observation_spec(self):
"""The observation spec of the tree.
This is an alias for `Tree.specs['output_spec', 'full_observation_spec']`.
"""
return self.specs["output_spec", "full_observation_spec"]
@property
def full_reward_spec(self):
"""The reward spec of the tree.
This is an alias for `Tree.specs['output_spec', 'full_reward_spec']`.
"""
return self.specs["output_spec", "full_reward_spec"]
@property
def full_done_spec(self):
"""The done spec of the tree.
This is an alias for `Tree.specs['output_spec', 'full_done_spec']`.
"""
return self.specs["output_spec", "full_done_spec"]
@property
def full_state_spec(self):
"""The state spec of the tree.
This is an alias for `Tree.specs['input_spec', 'full_state_spec']`.
"""
return self.specs["input_spec", "full_state_spec"]
@property
def full_action_spec(self):
"""The action spec of the tree.
This is an alias for `Tree.specs['input_spec', 'full_action_spec']`.
"""
return self.specs["input_spec", "full_action_spec"]
@property
def selected_actions(self) -> torch.Tensor | TensorDictBase | None:
"""Returns a tensor containing all the selected actions branching out from this node."""
if self.subtree is None:
return None
return self.subtree.rollout[..., 0]["action"]
@property
def prev_action(self) -> torch.Tensor | TensorDictBase | None:
"""The action undertaken just before this node's observation was generated.
Returns:
a tensor, tensordict or None if the node has no parent.
.. seealso:: This will be equal to :class:`~torchrl.data.Tree.branching_action` whenever the rollout data contains a single step.
.. seealso:: :class:`All actions associated with a given node (or observation) in the tree <~torchrl.data.Tree.selected_action>`.
"""
if self.rollout is None:
return None
return self.rollout[..., -1]["action"]
@property
def branching_action(self) -> torch.Tensor | TensorDictBase | None:
"""Returns the action that branched out to this particular node.
Returns:
a tensor, tensordict or None if the node has no parent.
.. seealso:: This will be equal to :class:`~torchrl.data.Tree.prev_action` whenever the rollout data contains a single step.
.. seealso:: :class:`All actions associated with a given node (or observation) in the tree <~torchrl.data.Tree.selected_action>`.
"""
if self.rollout is None:
return None
return self.rollout[..., 0]["action"]
@property
def node_observation(self) -> torch.Tensor | TensorDictBase:
"""Returns the observation associated with this particular node.
This is the observation (or bag of observations) that defines the node before a branching occurs.
If the node contains a :attr:`~.rollout` attribute, the node observation is typically identical to the
observation resulting from the last action undertaken, i.e., ``node.rollout[..., -1]["next", "observation"]``.
If more than one observation key is associated with the tree specs, a :class:`~tensordict.TensorDict` instance
is returned instead.
For a more consistent representation, see :attr:`~.node_observations`.
"""
# TODO: implement specs
return self.node_data["observation"]
@property
def node_observations(self) -> torch.Tensor | TensorDictBase:
"""Returns the observations associated with this particular node in a TensorDict format.
This is the observation (or bag of observations) that defines the node before a branching occurs.
If the node contains a :attr:`~.rollout` attribute, the node observation is typically identical to the
observation resulting from the last action undertaken, i.e., ``node.rollout[..., -1]["next", "observation"]``.
If more than one observation key is associated with the tree specs, a :class:`~tensordict.TensorDict` instance
is returned instead.
For a more consistent representation, see :attr:`~.node_observations`.
"""
# TODO: implement specs
return self.node_data.select("observation")
@property
def visits(self) -> int | torch.Tensor:
"""Returns the number of visits associated with this particular node.
This is an alias for the :attr:`~.count` attribute.
"""
return self.count
@visits.setter
def visits(self, count):
self.count = count
def __setattr__(self, name: str, value: Any) -> None:
if name == "subtree" and value is not None:
wr = weakref.ref(self._tensordict)
if value._parent is None:
value._parent = wr
elif isinstance(value._parent, list):
value._parent.append(wr)
else:
value._parent = [value._parent, wr]
return super().__setattr__(name, value)
@property
def parent(self) -> Tree | None:
"""The parent of the node.
If the node has a parent and this object is still present in the python workspace, it will be returned by this
property.
For re-branching trees, this property may return a stack of trees where every index of the stack corresponds to
a different parent.
.. note:: the ``parent`` attribute will match in content but not in identity: the tensorclass object is recustructed
using the same tensors (i.e., tensors that point to the same memory locations).
Returns:
A ``Tree`` containing the parent data or ``None`` if the parent data is out of scope or the node is the root.
"""
parent = self._parent
if parent is not None:
# Check that all parents match
queue = [parent]
def maybe_flatten_list(maybe_nested_list):
if isinstance(maybe_nested_list, list):
for p in maybe_nested_list:
if isinstance(p, list):
queue.append(p)
else:
yield p()
else:
yield maybe_nested_list()
parent_result = None
while len(queue):
local_result = None
for r in maybe_flatten_list(queue.pop()):
if local_result is None:
local_result = r
elif r is not None and r is not local_result:
if isinstance(local_result, list):
local_result.append(r)
else:
local_result = [local_result, r]
if local_result is None:
continue
# replicate logic at macro level
if parent_result is None:
parent_result = local_result
else:
if isinstance(local_result, list):
local_result = [
r for r in local_result if r not in parent_result
]
else:
local_result = [local_result]
if isinstance(parent_result, list):
parent_result.extend(local_result)
else:
parent_result = [parent_result, *local_result]
if isinstance(parent_result, list):
return TensorDict.lazy_stack(
[self._from_tensordict(r) for r in parent_result]
)
return self._from_tensordict(parent_result)
@property
def num_children(self) -> int:
"""Number of children of this node.
Equates to the number of elements in the ``self.subtree`` stack.
"""
return len(self.subtree) if self.subtree is not None else 0
@property
def is_terminal(self) -> bool | torch.Tensor:
"""Returns True if the tree has no children nodes."""
if self.rollout is not None:
return self.rollout[..., -1]["next", "done"].squeeze(-1)
# If there is no rollout, there is no preceding data - either this is a root or it's a floating node.
# In either case, we assume that the node is not terminal.
return False
[docs] def fully_expanded(self, env: EnvBase) -> bool:
"""Returns True if the number of children is equal to the environment cardinality."""
cardinality = env.cardinality(self.node_data)
num_actions = self.num_children
return cardinality == num_actions
[docs] def get_vertex_by_id(self, id: int) -> Tree:
"""Goes through the tree and returns the node corresponding the given id."""
q = deque()
q.append(self)
while len(q):
tree = q.popleft()
if tree.node_id == id:
return tree
if tree.subtree is not None:
q.extend(tree.subtree.unbind(0))
raise ValueError(f"Node with id {id} not found.")
[docs] def get_vertex_by_hash(self, hash: int) -> Tree:
"""Goes through the tree and returns the node corresponding the given hash."""
q = deque()
q.append(self)
while len(q):
tree = q.popleft()
if tree.hash == hash:
return tree
if tree.subtree is not None:
q.extend(tree.subtree.unbind(0))
raise ValueError(f"Node with hash {hash} not found.")
def __contains__(self, other: Tree) -> bool:
hash = other.hash
for vertex in self.vertices().values():
if vertex.hash == hash:
return True
else:
return False
[docs] def vertices(
self, *, key_type: Literal["id", "hash", "path"] = "hash"
) -> Dict[int | Tuple[int], Tree]:
"""Returns a map containing the vertices of the Tree.
Keyword args:
key_type (Literal["id", "hash", "path"], optional): Specifies the type of key to use for the vertices.
- "id": Use the vertex ID as the key.
- "hash": Use a hash of the vertex as the key.
- "path": Use the path to the vertex as the key. This may lead to a dictionary with a longer length than
when ``"id"`` or ``"hash"`` are used as the same node may be part of multiple trajectories.
Defaults to ``"hash"``.
Defaults to an empty string, which may imply a default behavior.
Returns:
Dict[int | Tuple[int], Tree]: A dictionary mapping keys to Tree vertices.
"""
memo = set()
result = {}
q = deque()
cur_path = ()
q.append((self, cur_path))
use_hash = key_type == "hash"
use_id = key_type == "id"
use_path = key_type == "path"
while len(q):
tree, cur_path = q.popleft()
h = tree.hash
if h in memo and not use_path:
continue
memo.add(h)
if use_path:
result[cur_path] = tree
elif use_id:
result[tree.node_id] = tree
elif use_hash:
result[tree.node_id] = tree
else:
raise ValueError(
f"key_type must be either 'hash', 'id' or 'path'. Got {key_type}."
)
n = int(tree.num_children)
for i in range(n):
cur_path_tree = cur_path + (i,)
q.append((tree.subtree[i], cur_path_tree))
return result
[docs] def num_vertices(self, *, count_repeat: bool = False) -> int:
"""Returns the number of unique vertices in the Tree.
Keyword Args:
count_repeat (bool, optional): Determines whether to count repeated vertices.
- If ``False``, counts each unique vertex only once.
- If ``True``, counts vertices multiple times if they appear in different paths.
Defaults to ``False``.
Returns:
int: The number of unique vertices in the Tree.
"""
return len(
{
v.node_id
for v in self.vertices(
key_type="hash" if not count_repeat else "path"
).values()
}
)
[docs] def edges(self) -> List[Tuple[int, int]]:
"""Retrieves a list of edges in the tree.
Each edge is represented as a tuple of two node IDs: the parent node ID and the child node ID.
The tree is traversed using Breadth-First Search (BFS) to ensure all edges are visited.
Returns:
A list of tuples, where each tuple contains a parent node ID and a child node ID.
"""
result = []
q = deque()
parent = self.node_id
q.append((self, parent))
while len(q):
tree, parent = q.popleft()
n = int(tree.num_children)
for i in range(n):
node = tree.subtree[i]
node_id = node.node_id
result.append((parent, node_id))
q.append((node, node_id))
return result
[docs] def valid_paths(self):
"""Generates all valid paths in the tree.
A valid path is a sequence of child indices that starts at the root node and ends at a leaf node.
Each path is represented as a tuple of integers, where each integer corresponds to the index of a child node.
Yields:
tuple: A valid path in the tree.
"""
# Initialize a queue with the current tree node and an empty path
q = deque()
cur_path = ()
q.append((self, cur_path))
# Perform BFS traversal of the tree
while len(q):
# Dequeue the next tree node and its current path
tree, cur_path = q.popleft()
# Get the number of child nodes
n = int(tree.num_children)
# If this is a leaf node, yield the current path
if not n:
yield cur_path
# Iterate over the child nodes
for i in range(n):
cur_path_tree = cur_path + (i,)
q.append((tree.subtree[i], cur_path_tree))
[docs] def max_length(self):
"""Returns the maximum length of all valid paths in the tree.
The length of a path is defined as the number of nodes in the path.
If the tree is empty, returns 0.
Returns:
int: The maximum length of all valid paths in the tree.
"""
lengths = tuple(len(path) for path in self.valid_paths())
if len(lengths) == 0:
return 0
elif len(lengths) == 1:
return lengths[0]
return max(*lengths)
[docs] def rollout_from_path(self, path: Tuple[int]) -> TensorDictBase | None:
"""Retrieves the rollout data along a given path in the tree.
The rollout data is concatenated along the last dimension (dim=-1) for each node in the path.
If no rollout data is found along the path, returns ``None``.
Args:
path: A tuple of integers representing the path in the tree.
Returns:
The concatenated rollout data along the path, or None if no data is found.
"""
r = self.rollout
tree = self
rollouts = []
if r is not None:
rollouts.append(r)
for i in path:
tree = tree.subtree[i]
r = tree.rollout
if r is not None:
rollouts.append(r)
if rollouts:
return torch.cat(rollouts, dim=-1)
@staticmethod
def _label(info: List[str], tree: "Tree", root=False):
labels = []
for key in info:
if key == "hash":
hash = tree.hash
if hash is not None:
hash = hash.item()
v = f"hash={hash}"
elif root:
v = f"{key}=None"
else:
v = f"{key}={tree.rollout[key].mean().item()}"
labels.append(v)
return ", ".join(labels)
[docs] def plot(
self: Tree,
backend: str = "plotly",
figure: str = "tree",
info: List[str] = None,
make_labels: Callable[[Any, ...], Any] | None = None,
):
"""Plots a visualization of the tree using the specified backend and figure type.
Args:
backend: The plotting backend to use. Currently only supports 'plotly'.
figure: The type of figure to plot. Can be either 'tree' or 'box'.
info: A list of additional information to include in the plot (not currently used).
make_labels: An optional function to generate custom labels for the plot.
Raises:
NotImplementedError: If an unsupported backend or figure type is specified.
"""
if backend == "plotly":
if figure == "box":
_plot_plotly_box(self)
return
elif figure == "tree":
_plot_plotly_tree(self, make_labels=make_labels)
return
else:
pass
raise NotImplementedError(
f"Unknown plotting backend {backend} with figure {figure}."
)
[docs]class MCTSForest:
"""A collection of MCTS trees.
.. warning:: This class is currently under active development. Expect frequent API changes.
The class is aimed at storing rollouts in a storage, and produce trees based on a given root
in that dataset.
Keyword Args:
data_map (TensorDictMap, optional): the storage to use to store the data
(observation, reward, states etc). If not provided, it is lazily
initialized using :meth:`~torchrl.data.map.tdstorage.TensorDictMap.from_tensordict_pair`
using the list of :attr:`observation_keys` and :attr:`action_keys` as ``in_keys``.
node_map (TensorDictMap, optional): a map from the observation space to the index space.
Internally, the node map is used to gather all possible branches coming out of
a given node. For example, if an observation has two associated actions and outcomes
in the data map, then the :attr:`node_map` will return a data structure containing the
two indices in the :attr:`data_map` that correspond to these two outcomes.
If not provided, it is lazily initialized using
:meth:`~torchrl.data.map.tdstorage.TensorDictMap.from_tensordict_pair` using the list of
:attr:`observation_keys` as ``in_keys`` and the :class:`~torchrl.data.QueryModule` as
``out_keys``.
max_size (int, optional): the size of the maps.
If not provided, defaults to ``data_map.max_size`` if this can be found, then
``node_map.max_size``. If none of these are provided, defaults to `1000`.
done_keys (list of NestedKey, optional): the done keys of the environment. If not provided,
defaults to ``("done", "terminated", "truncated")``.
The :meth:`~.get_keys_from_env` can be used to automatically determine the keys.
action_keys (list of NestedKey, optional): the action keys of the environment. If not provided,
defaults to ``("action",)``.
The :meth:`~.get_keys_from_env` can be used to automatically determine the keys.
reward_keys (list of NestedKey, optional): the reward keys of the environment. If not provided,
defaults to ``("reward",)``.
The :meth:`~.get_keys_from_env` can be used to automatically determine the keys.
observation_keys (list of NestedKey, optional): the observation keys of the environment. If not provided,
defaults to ``("observation",)``.
The :meth:`~.get_keys_from_env` can be used to automatically determine the keys.
excluded_keys (list of NestedKey, optional): a list of keys to exclude from the data storage.
consolidated (bool, optional): if ``True``, the data_map storage will be consolidated on disk.
Defaults to ``False``.
Examples:
>>> from torchrl.envs import GymEnv
>>> import torch
>>> from tensordict import TensorDict, LazyStackedTensorDict
>>> from torchrl.data import TensorDictMap, ListStorage
>>> from torchrl.data.map.tree import MCTSForest
>>>
>>> from torchrl.envs import PendulumEnv, CatTensors, UnsqueezeTransform, StepCounter
>>> # Create the MCTS Forest
>>> forest = MCTSForest()
>>> # Create an environment. We're using a stateless env to be able to query it at any given state (like an oracle)
>>> env = PendulumEnv()
>>> obs_keys = list(env.observation_spec.keys(True, True))
>>> state_keys = set(env.full_state_spec.keys(True, True)) - set(obs_keys)
>>> # Appending transforms to get an "observation" key that concatenates the observations together
>>> env = env.append_transform(
... UnsqueezeTransform(
... in_keys=obs_keys,
... out_keys=[("unsqueeze", key) for key in obs_keys],
... dim=-1
... )
... )
>>> env = env.append_transform(
... CatTensors([("unsqueeze", key) for key in obs_keys], "observation")
... )
>>> env = env.append_transform(StepCounter())
>>> env.set_seed(0)
>>> # Get a reset state, then make a rollout out of it
>>> reset_state = env.reset()
>>> rollout0 = env.rollout(6, auto_reset=False, tensordict=reset_state.clone())
>>> # Append the rollout to the forest. We're removing the state entries for clarity
>>> rollout0 = rollout0.copy()
>>> rollout0.exclude(*state_keys, inplace=True).get("next").exclude(*state_keys, inplace=True)
>>> forest.extend(rollout0)
>>> # The forest should have 6 elements (the length of the rollout)
>>> assert len(forest) == 6
>>> # Let's make another rollout from the same reset state
>>> rollout1 = env.rollout(6, auto_reset=False, tensordict=reset_state.clone())
>>> rollout1.exclude(*state_keys, inplace=True).get("next").exclude(*state_keys, inplace=True)
>>> forest.extend(rollout1)
>>> assert len(forest) == 12
>>> # Let's make another final rollout from an intermediate step in the second rollout
>>> rollout1b = env.rollout(6, auto_reset=False, tensordict=rollout1[3].exclude("next"))
>>> rollout1b.exclude(*state_keys, inplace=True)
>>> rollout1b.get("next").exclude(*state_keys, inplace=True)
>>> forest.extend(rollout1b)
>>> assert len(forest) == 18
>>> # Since we have 2 rollouts starting at the same state, our tree should have two
>>> # branches if we produce it from the reset entry. Take the state, and call `get_tree`:
>>> r = rollout0[0]
>>> # Let's get the compact tree that follows the initial reset. A compact tree is
>>> # a tree where nodes that have a single child are collapsed.
>>> tree = forest.get_tree(r)
>>> print(tree.max_length())
2
>>> print(list(tree.valid_paths()))
[(0,), (1, 0), (1, 1)]
>>> from tensordict import assert_close
>>> # We can manually rebuild the tree
>>> assert_close(
... rollout1,
... torch.cat([tree.subtree[1].rollout, tree.subtree[1].subtree[0].rollout]),
... intersection=True,
... )
True
>>> # Or we can rebuild it using the dedicated method
>>> assert_close(
... rollout1,
... tree.rollout_from_path((1, 0)),
... intersection=True,
... )
True
>>> tree.plot()
>>> tree = forest.get_tree(r, compact=False)
>>> print(tree.max_length())
9
>>> print(list(tree.valid_paths()))
[(0, 0, 0, 0, 0, 0), (1, 0, 0, 0, 0, 0), (1, 0, 0, 1, 0, 0, 0, 0, 0)]
>>> assert_close(
... rollout1,
... tree.rollout_from_path((1, 0, 0, 0, 0, 0)),
... intersection=True,
... )
True
"""
def __init__(
self,
*,
data_map: TensorDictMap | None = None,
node_map: TensorDictMap | None = None,
max_size: int | None = None,
done_keys: List[NestedKey] | None = None,
reward_keys: List[NestedKey] = None,
observation_keys: List[NestedKey] = None,
action_keys: List[NestedKey] = None,
excluded_keys: List[NestedKey] = None,
consolidated: bool | None = None,
):
self.data_map = data_map
self.node_map = node_map
if max_size is None:
if data_map is not None:
max_size = data_map.max_size
if max_size != getattr(node_map, "max_size", max_size):
raise ValueError(
f"Conflicting max_size: got data_map.max_size={data_map.max_size} and node_map.max_size={node_map.max_size}."
)
elif node_map is not None:
max_size = node_map.max_size
else:
max_size = None
elif data_map is not None and max_size != getattr(
data_map, "max_size", max_size
):
raise ValueError(
f"Conflicting max_size: got data_map.max_size={data_map.max_size} and max_size={max_size}."
)
elif node_map is not None and max_size != getattr(
node_map, "max_size", max_size
):
raise ValueError(
f"Conflicting max_size: got node_map.max_size={node_map.max_size} and max_size={max_size}."
)
self.max_size = max_size
self.done_keys = done_keys
self.action_keys = action_keys
self.reward_keys = reward_keys
self.observation_keys = observation_keys
self.excluded_keys = excluded_keys
self.consolidated = consolidated
@property
def done_keys(self) -> List[NestedKey]:
"""Done Keys.
Returns the keys used to indicate that an episode has ended.
The default done keys are "done", "terminated", and "truncated". These keys can be
used in the environment's output to signal the end of an episode.
Returns:
A list of strings representing the done keys.
"""
done_keys = getattr(self, "_done_keys", None)
if done_keys is None:
self._done_keys = done_keys = ["done", "terminated", "truncated"]
return done_keys
@done_keys.setter
def done_keys(self, value):
self._done_keys = _make_list_of_nestedkeys(value, "done_keys")
@property
def reward_keys(self) -> List[NestedKey]:
"""Reward Keys.
Returns the keys used to retrieve rewards from the environment's output.
The default reward key is "reward".
Returns:
A list of strings or tuples representing the reward keys.
"""
reward_keys = getattr(self, "_reward_keys", None)
if reward_keys is None:
self._reward_keys = reward_keys = ["reward"]
return reward_keys
@reward_keys.setter
def reward_keys(self, value):
self._reward_keys = _make_list_of_nestedkeys(value, "reward_keys")
@property
def action_keys(self) -> List[NestedKey]:
"""Action Keys.
Returns the keys used to retrieve actions from the environment's input.
The default action key is "action".
Returns:
A list of strings or tuples representing the action keys.
"""
action_keys = getattr(self, "_action_keys", None)
if action_keys is None:
self._action_keys = action_keys = ["action"]
return action_keys
@action_keys.setter
def action_keys(self, value):
self._action_keys = _make_list_of_nestedkeys(value, "action_keys")
@property
def observation_keys(self) -> List[NestedKey]:
"""Observation Keys.
Returns the keys used to retrieve observations from the environment's output.
The default observation key is "observation".
Returns:
A list of strings or tuples representing the observation keys.
"""
observation_keys = getattr(self, "_observation_keys", None)
if observation_keys is None:
self._observation_keys = observation_keys = ["observation"]
return observation_keys
@observation_keys.setter
def observation_keys(self, value):
self._observation_keys = _make_list_of_nestedkeys(value, "observation_keys")
@property
def excluded_keys(self) -> List[NestedKey] | None:
return self._excluded_keys
@excluded_keys.setter
def excluded_keys(self, value):
self._excluded_keys = _make_list_of_nestedkeys(value, "excluded_keys")
[docs] def get_keys_from_env(self, env: EnvBase):
"""Writes missing done, action and reward keys to the Forest given an environment.
Existing keys are not overwritten.
"""
if getattr(self, "_reward_keys", None) is None:
self.reward_keys = env.reward_keys
if getattr(self, "_done_keys", None) is None:
self.done_keys = env.done_keys
if getattr(self, "_action_keys", None) is None:
self.action_keys = env.action_keys
if getattr(self, "_observation_keys", None) is None:
self.observation_keys = env.observation_keys
@classmethod
def _write_fn_stack(cls, new, old=None):
# This function updates the old values by adding the new ones
# if and only if the new ones are not there.
# If the old value is not provided, we assume there are none and the
# `new` is just prepared.
# This involves unsqueezing the last dim (since we'll be stacking tensors
# and calling unique).
# The update involves calling cat along the last dim + unique
# which will keep only the new values that were unknown to
# the storage.
# We use this method to track all the indices that are associated with
# an observation. Every time a new index is obtained, it is stacked alongside
# the others.
if old is None:
# we unsqueeze the values to stack them along dim -1
result = new.apply(lambda x: x.unsqueeze(-1), filter_empty=False)
result.set(
"count", torch.ones(result.shape, dtype=torch.int, device=result.device)
)
else:
def cat(name, x, y):
if name == "count":
return x
if y.ndim < x.ndim:
y = y.unsqueeze(-1)
result = torch.cat([x, y], -1)
# Breaks on mps
if result.device.type == "mps":
result = result.cpu()
result = result.unique(dim=-1, sorted=False)
result = result.to("mps")
else:
result = result.unique(dim=-1, sorted=False)
return result
result = old.named_apply(cat, new, default=None)
result.set_("count", old.get("count") + 1)
return result
def _make_data_map(self, source, dest):
try:
kwargs = {}
if self.max_size is not None:
kwargs["max_size"] = self.max_size
self.data_map = TensorDictMap.from_tensordict_pair(
source,
dest,
in_keys=[*self.observation_keys, *self.action_keys],
consolidated=self.consolidated,
**kwargs,
)
if self.max_size is None:
self.max_size = self.data_map.max_size
except KeyError as err:
raise KeyError(
"A KeyError occurred during data map creation. This could be due to the wrong setting of a key in the MCTSForest constructor. Scroll up for more info."
) from err
def _make_node_map(self, source, dest):
kwargs = {}
if self.max_size is not None:
kwargs["max_size"] = self.max_size
self.node_map = TensorDictMap.from_tensordict_pair(
source,
dest,
in_keys=[*self.observation_keys],
out_keys=[
*self.data_map.query_module.out_keys, # hash and index
# *self.action_keys,
# *[("next", rk) for rk in self.reward_keys],
"count",
],
storage_constructor=ListStorage,
collate_fn=TensorDict.lazy_stack,
write_fn=self._write_fn_stack,
**kwargs,
)
if self.max_size is None:
self.max_size = self.data_map.max_size
def extend(self, rollout, *, return_node: bool = False):
source, dest = (
rollout.exclude("next").copy(),
rollout.select("next", *self.action_keys).copy(),
)
if self.excluded_keys is not None:
dest = dest.exclude(*self.excluded_keys, inplace=True)
dest.get("next").exclude(*self.excluded_keys, inplace=True)
if self.data_map is None:
self._make_data_map(source, dest)
# We need to set the action somewhere to keep track of what action lead to what child
# # Set the action in the 'next'
# dest[1:] = source[:-1].exclude(*self.done_keys)
# Add ('observation', 'action') -> ('next, observation')
self.data_map[source] = dest
value = source
if self.node_map is None:
self._make_node_map(source, dest)
# map ('observation',) -> ('indices',)
self.node_map[source] = TensorDict.lazy_stack(value.unbind(0))
if return_node:
return self.get_tree(rollout)
def add(self, step, *, return_node: bool = False):
source, dest = (
step.exclude("next").copy(),
step.select("next", *self.action_keys).copy(),
)
if self.data_map is None:
self._make_data_map(source, dest)
# We need to set the action somewhere to keep track of what action lead to what child
# # Set the action in the 'next'
# dest[1:] = source[:-1].exclude(*self.done_keys)
# Add ('observation', 'action') -> ('next, observation')
self.data_map[source] = dest
value = source
if self.node_map is None:
self._make_node_map(source, dest)
# map ('observation',) -> ('indices',)
self.node_map[source] = value
if return_node:
return self.get_tree(step)
def get_child(self, root: TensorDictBase) -> TensorDictBase:
return self.data_map[root]
def _make_local_tree(
self,
root: TensorDictBase,
index: torch.Tensor | None = None,
compact: bool = True,
) -> Tuple[Tree, torch.Tensor | None, torch.Tensor | None]:
root = root.select(*self.node_map.in_keys)
node_meta = None
if root in self.node_map:
node_meta = self.node_map[root]
if index is None:
node_meta = self.node_map[root]
index = node_meta["_index"]
elif index is not None:
pass
else:
return None
steps = []
while index.numel() <= 1:
index = index.squeeze()
d = self.data_map.storage[index]
# Rebuild rollout step
steps.append(merge_tensordicts(d, root, callback_exist=lambda *x: None))
d = d["next"]
if d in self.node_map:
root = d.select(*self.node_map.in_keys)
node_meta = self.node_map[root]
index = node_meta["_index"]
if not compact:
break
else:
# If the root is provided and not gathered from the storage, it could be that its
# device doesn't match the data_map storage device.
root = steps[-1]["next"].select(*self.node_map.in_keys)
device = getattr(self.data_map.storage, "device", None)
if root.device != device:
if device is not None:
root = root.to(self.data_map.storage.device)
else:
root.clear_device_()
index = None
break
rollout = None
if steps:
rollout = torch.stack(steps, -1)
# Will be populated later
hash = node_meta["_hash"]
return (
Tree(
rollout=rollout,
count=torch.zeros((), dtype=torch.int32),
wins=torch.zeros(()),
node_data=root,
index=index,
hash=None,
# We do this to avoid raising an exception as rollout and subtree must be provided together
subtree=None,
),
index,
hash,
)
# The recursive implementation is slower and less compatible with compile
# def _make_tree(self, root: TensorDictBase, index: torch.Tensor|None=None)->Tree:
# tree, indices = self._make_local_tree(root, index=index)
# subtrees = []
# if indices is not None:
# for i in indices:
# subtree = self._make_tree(tree.node, index=i)
# subtrees.append(subtree)
# subtrees = TensorDict.lazy_stack(subtrees)
# tree.subtree = subtrees
# return tree
def _make_tree_iter(
self, root, index=None, max_depth: int | None = None, compact: bool = True
):
q = deque()
memo = {}
tree, indices, hash = self._make_local_tree(root, index=index, compact=compact)
tree.node_id = 0
result = tree
depth = 0
counter = 1
if indices is not None:
q.append((tree, indices, hash, depth))
while len(q):
tree, indices, hash, depth = q.popleft()
extend = max_depth is None or depth < max_depth
subtrees = []
for i, h in zip(indices, hash):
# TODO: remove the .item()
h = h.item()
subtree, subtree_indices, subtree_hash = memo.get(h, (None,) * 3)
if subtree is None:
subtree, subtree_indices, subtree_hash = self._make_local_tree(
tree.node_data,
index=i,
compact=compact,
)
subtree.node_id = counter
counter += 1
subtree.hash = h
memo[h] = (subtree, subtree_indices, subtree_hash)
else:
# We just need to save the two (or more) rollouts
subtree_bis, _, _ = self._make_local_tree(
tree.node_data,
index=i,
compact=compact,
)
if subtree.rollout.ndim == subtree_bis.rollout.ndim:
subtree.rollout = TensorDict.stack(
[subtree.rollout, subtree_bis.rollout]
)
else:
subtree.rollout = TensorDict.stack(
[*subtree.rollout, subtree_bis.rollout]
)
subtrees.append(subtree)
if extend and subtree_indices is not None:
q.append((subtree, subtree_indices, subtree_hash, depth + 1))
subtrees = TensorDict.lazy_stack(subtrees)
tree.subtree = subtrees
return result
def get_tree(
self,
root,
*,
max_depth: int | None = None,
compact: bool = True,
) -> Tree:
return self._make_tree_iter(root=root, max_depth=max_depth, compact=compact)
@classmethod
def valid_paths(cls, tree: Tree):
yield from tree.valid_paths()
def __len__(self):
return len(self.data_map)
def _make_list_of_nestedkeys(obj: Any, attr: str) -> List[NestedKey]:
if obj is None:
return obj
if isinstance(obj, (str, tuple)):
return [obj]
if not isinstance(obj, list):
raise ValueError(
f"{attr} must be a list of NestedKeys or a NestedKey, got {obj}."
)
return [unravel_key(key) for key in obj]