Source code for torchrl.data.replay_buffers.replay_buffers
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
from __future__ import annotations
import collections
import contextlib
import json
import multiprocessing
import textwrap
import threading
import warnings
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
from typing import Any, Callable, Dict, List, Sequence, Tuple, Union
import numpy as np
import torch
try:
from torch.compiler import is_dynamo_compiling
except ImportError:
from torch._dynamo import is_compiling as is_dynamo_compiling
from tensordict import (
is_tensor_collection,
is_tensorclass,
LazyStackedTensorDict,
NestedKey,
TensorDict,
TensorDictBase,
unravel_key,
)
from tensordict.nn.utils import _set_dispatch_td_nn_modules
from tensordict.utils import expand_as_right, expand_right
from torch import Tensor
from torch.utils._pytree import tree_map
from torchrl._utils import _make_ordinal_device, accept_remote_rref_udf_invocation
from torchrl.data.replay_buffers.samplers import (
PrioritizedSampler,
RandomSampler,
Sampler,
SamplerEnsemble,
)
from torchrl.data.replay_buffers.storages import (
_get_default_collate,
_stack_anything,
ListStorage,
Storage,
StorageEnsemble,
)
from torchrl.data.replay_buffers.utils import (
_is_int,
_reduce,
_to_numpy,
_to_torch,
INT_CLASSES,
pin_memory_output,
)
from torchrl.data.replay_buffers.writers import (
RoundRobinWriter,
TensorDictRoundRobinWriter,
Writer,
WriterEnsemble,
)
from torchrl.data.utils import DEVICE_TYPING
from torchrl.envs.transforms.transforms import _InvertTransform
[docs]class ReplayBuffer:
"""A generic, composable replay buffer class.
Keyword Args:
storage (Storage, optional): the storage to be used. If none is provided
a default :class:`~torchrl.data.replay_buffers.ListStorage` with
``max_size`` of ``1_000`` will be created.
sampler (Sampler, optional): the sampler to be used. If none is provided,
a default :class:`~torchrl.data.replay_buffers.RandomSampler`
will be used.
writer (Writer, optional): the writer to be used. If none is provided
a default :class:`~torchrl.data.replay_buffers.RoundRobinWriter`
will be used.
collate_fn (callable, optional): merges a list of samples to form a
mini-batch of Tensor(s)/outputs. Used when using batched
loading from a map-style dataset. The default value will be decided
based on the storage type.
pin_memory (bool): whether pin_memory() should be called on the rb
samples.
prefetch (int, optional): number of next batches to be prefetched
using multithreading. Defaults to None (no prefetching).
transform (Transform, optional): Transform to be executed when
:meth:`~.sample` is called.
To chain transforms use the :class:`~torchrl.envs.Compose` class.
Transforms should be used with :class:`tensordict.TensorDict`
content. A generic callable can also be passed if the replay buffer
is used with PyTree structures (see example below).
batch_size (int, optional): the batch size to be used when sample() is
called.
.. note::
The batch-size can be specified at construction time via the
``batch_size`` argument, or at sampling time. The former should
be preferred whenever the batch-size is consistent across the
experiment. If the batch-size is likely to change, it can be
passed to the :meth:`~.sample` method. This option is
incompatible with prefetching (since this requires to know the
batch-size in advance) as well as with samplers that have a
``drop_last`` argument.
dim_extend (int, optional): indicates the dim to consider for
extension when calling :meth:`~.extend`. Defaults to ``storage.ndim-1``.
When using ``dim_extend > 0``, we recommend using the ``ndim``
argument in the storage instantiation if that argument is
available, to let storages know that the data is
multi-dimensional and keep consistent notions of storage-capacity
and batch-size during sampling.
.. note:: This argument has no effect on :meth:`~.add` and
therefore should be used with caution when both :meth:`~.add`
and :meth:`~.extend` are used in a codebase. For example:
>>> data = torch.zeros(3, 4)
>>> rb = ReplayBuffer(
... storage=LazyTensorStorage(10, ndim=2),
... dim_extend=1)
>>> # these two approaches are equivalent:
>>> for d in data.unbind(1):
... rb.add(d)
>>> rb.extend(data)
generator (torch.Generator, optional): a generator to use for sampling.
Using a dedicated generator for the replay buffer can allow a fine-grained control
over seeding, for instance keeping the global seed different but the RB seed identical
for distributed jobs.
Defaults to ``None`` (global default generator).
.. warning:: As of now, the generator has no effect on the transforms.
shared (bool, optional): whether the buffer will be shared using multiprocessing or not.
Defaults to ``False``.
compilable (bool, optional): whether the writer is compilable.
If ``True``, the writer cannot be shared between multiple processes.
Defaults to ``False``.
Examples:
>>> import torch
>>>
>>> from torchrl.data import ReplayBuffer, ListStorage
>>>
>>> torch.manual_seed(0)
>>> rb = ReplayBuffer(
... storage=ListStorage(max_size=1000),
... batch_size=5,
... )
>>> # populate the replay buffer and get the item indices
>>> data = range(10)
>>> indices = rb.extend(data)
>>> # sample will return as many elements as specified in the constructor
>>> sample = rb.sample()
>>> print(sample)
tensor([4, 9, 3, 0, 3])
>>> # Passing the batch-size to the sample method overrides the one in the constructor
>>> sample = rb.sample(batch_size=3)
>>> print(sample)
tensor([9, 7, 3])
>>> # one cans sample using the ``sample`` method or iterate over the buffer
>>> for i, batch in enumerate(rb):
... print(i, batch)
... if i == 3:
... break
0 tensor([7, 3, 1, 6, 6])
1 tensor([9, 8, 6, 6, 8])
2 tensor([4, 3, 6, 9, 1])
3 tensor([4, 4, 1, 9, 9])
Replay buffers accept *any* kind of data. Not all storage types
will work, as some expect numerical data only, but the default
:class:`~torchrl.data.ListStorage` will:
Examples:
>>> torch.manual_seed(0)
>>> buffer = ReplayBuffer(storage=ListStorage(100), collate_fn=lambda x: x)
>>> indices = buffer.extend(["a", 1, None])
>>> buffer.sample(3)
[None, 'a', None]
The :class:`~torchrl.data.replay_buffers.TensorStorage`, :class:`~torchrl.data.replay_buffers.LazyMemmapStorage`
and :class:`~torchrl.data.replay_buffers.LazyTensorStorage` also work
with any PyTree structure (a PyTree is a nested structure of arbitrary depth made of dicts,
lists or tuples where the leaves are tensors) provided that it only contains
tensor data.
Examples:
>>> from torch.utils._pytree import tree_map
>>> def transform(x):
... # Zeros all the data in the pytree
... return tree_map(lambda y: y * 0, x)
>>> rb = ReplayBuffer(storage=LazyMemmapStorage(100), transform=transform)
>>> data = {
... "a": torch.randn(3),
... "b": {"c": (torch.zeros(2), [torch.ones(1)])},
... 30: -torch.ones(()),
... }
>>> rb.add(data)
>>> # The sample has a similar structure to the data (with a leading dimension of 10 for each tensor)
>>> s = rb.sample(10)
>>> # let's check that our transform did its job:
>>> def assert0(x):
>>> assert (x == 0).all()
>>> tree_map(assert0, s)
"""
def __init__(
self,
*,
storage: Storage | None = None,
sampler: Sampler | None = None,
writer: Writer | None = None,
collate_fn: Callable | None = None,
pin_memory: bool = False,
prefetch: int | None = None,
transform: "Transform" | None = None, # noqa-F821
batch_size: int | None = None,
dim_extend: int | None = None,
checkpointer: "StorageCheckpointerBase" | None = None, # noqa: F821
generator: torch.Generator | None = None,
shared: bool = False,
compilable: bool = None,
) -> None:
self._storage = (
storage
if storage is not None
else ListStorage(max_size=1_000, compilable=compilable)
)
self._storage.attach(self)
self._sampler = sampler if sampler is not None else RandomSampler()
self._writer = (
writer
if writer is not None
else RoundRobinWriter(compilable=bool(compilable))
)
self._writer.register_storage(self._storage)
self._get_collate_fn(collate_fn)
self._pin_memory = pin_memory
self._prefetch = bool(prefetch)
self._prefetch_cap = prefetch or 0
self._prefetch_queue = collections.deque()
if self._prefetch_cap:
self._prefetch_executor = ThreadPoolExecutor(max_workers=self._prefetch_cap)
self.shared = shared
self.share(self.shared)
self._replay_lock = threading.RLock()
self._futures_lock = threading.RLock()
from torchrl.envs.transforms.transforms import (
_CallableTransform,
Compose,
Transform,
)
if transform is None:
transform = Compose()
elif not isinstance(transform, Compose):
if not isinstance(transform, Transform) and callable(transform):
transform = _CallableTransform(transform)
elif not isinstance(transform, Transform):
raise RuntimeError(
"transform must be either a Transform instance or a callable."
)
transform = Compose(transform)
transform.eval()
self._transform = transform
if batch_size is None and prefetch:
raise ValueError(
"Dynamic batch-size specification is incompatible "
"with multithreaded sampling. "
"When using prefetch, the batch-size must be specified in "
"advance. "
)
if (
batch_size is None
and hasattr(self._sampler, "drop_last")
and self._sampler.drop_last
):
raise ValueError(
"Samplers with drop_last=True must work with a predictible batch-size. "
"Please pass the batch-size to the ReplayBuffer constructor."
)
self._batch_size = batch_size
if dim_extend is not None and dim_extend < 0:
raise ValueError("dim_extend must be a positive value.")
self.dim_extend = dim_extend
self._storage.checkpointer = checkpointer
self.set_rng(generator=generator)
def share(self, shared: bool = True):
self.shared = shared
if self.shared:
self._write_lock = multiprocessing.Lock()
else:
self._write_lock = contextlib.nullcontext()
def set_rng(self, generator):
self._rng = generator
self._storage._rng = generator
self._sampler._rng = generator
self._writer._rng = generator
@property
def dim_extend(self):
return self._dim_extend
@dim_extend.setter
def dim_extend(self, value):
if (
hasattr(self, "_dim_extend")
and self._dim_extend is not None
and self._dim_extend != value
):
raise RuntimeError(
"dim_extend cannot be reset. Please create a new replay buffer."
)
if value is None:
if self._storage is not None:
ndim = self._storage.ndim
value = ndim - 1
else:
value = 1
self._dim_extend = value
def _transpose(self, data):
if is_tensor_collection(data):
return data.transpose(self.dim_extend, 0)
return tree_map(lambda x: x.transpose(self.dim_extend, 0), data)
def _get_collate_fn(self, collate_fn):
self._collate_fn = (
collate_fn
if collate_fn is not None
else _get_default_collate(
self._storage, _is_tensordict=isinstance(self, TensorDictReplayBuffer)
)
)
[docs] def set_storage(self, storage: Storage, collate_fn: Callable | None = None):
"""Sets a new storage in the replay buffer and returns the previous storage.
Args:
storage (Storage): the new storage for the buffer.
collate_fn (callable, optional): if provided, the collate_fn is set to this
value. Otherwise it is reset to a default value.
"""
prev_storage = self._storage
self._storage = storage
self._get_collate_fn(collate_fn)
return prev_storage
[docs] def set_writer(self, writer: Writer):
"""Sets a new writer in the replay buffer and returns the previous writer."""
prev_writer = self._writer
self._writer = writer
self._writer.register_storage(self._storage)
return prev_writer
[docs] def set_sampler(self, sampler: Sampler):
"""Sets a new sampler in the replay buffer and returns the previous sampler."""
prev_sampler = self._sampler
self._sampler = sampler
return prev_sampler
def __len__(self) -> int:
with self._replay_lock:
return len(self._storage)
@property
def write_count(self):
"""The total number of items written so far in the buffer through add and extend."""
return self._writer._write_count
def __repr__(self) -> str:
from torchrl.envs.transforms import Compose
storage = textwrap.indent(f"storage={self._storage}", " " * 4)
writer = textwrap.indent(f"writer={self._writer}", " " * 4)
sampler = textwrap.indent(f"sampler={self._sampler}", " " * 4)
if self._transform is not None and not (
isinstance(self._transform, Compose) and not len(self._transform)
):
transform = textwrap.indent(f"transform={self._transform}", " " * 4)
transform = f"\n{self._transform}, "
else:
transform = ""
batch_size = textwrap.indent(f"batch_size={self._batch_size}", " " * 4)
collate_fn = textwrap.indent(f"collate_fn={self._collate_fn}", " " * 4)
return f"{self.__class__.__name__}(\n{storage}, \n{sampler}, \n{writer}, {transform}\n{batch_size}, \n{collate_fn})"
@pin_memory_output
def __getitem__(self, index: int | torch.Tensor | NestedKey) -> Any:
if isinstance(index, str) or (isinstance(index, tuple) and unravel_key(index)):
return self[:][index]
if isinstance(index, tuple):
if len(index) == 1:
return self[index[0]]
else:
return self[:][index]
index = _to_numpy(index)
if self.dim_extend > 0:
index = (slice(None),) * self.dim_extend + (index,)
with self._replay_lock:
data = self._storage[index]
data = self._transpose(data)
else:
with self._replay_lock:
data = self._storage[index]
if not isinstance(index, INT_CLASSES):
data = self._collate_fn(data)
if self._transform is not None and len(self._transform):
with data.unlock_() if is_tensor_collection(
data
) else contextlib.nullcontext():
data = self._transform(data)
return data
def __setitem__(self, index, value) -> None:
if isinstance(index, str) or (isinstance(index, tuple) and unravel_key(index)):
self[:][index] = value
return
if isinstance(index, tuple):
if len(index) == 1:
self[index[0]] = value
else:
self[:][index] = value
return
index = _to_numpy(index)
if self._transform is not None and len(self._transform):
value = self._transform.inv(value)
if self.dim_extend > 0:
index = (slice(None),) * self.dim_extend + (index,)
with self._replay_lock:
self._storage[index] = self._transpose(value)
else:
with self._replay_lock:
self._storage[index] = value
return
def state_dict(self) -> Dict[str, Any]:
return {
"_storage": self._storage.state_dict(),
"_sampler": self._sampler.state_dict(),
"_writer": self._writer.state_dict(),
"_transforms": self._transform.state_dict(),
"_batch_size": self._batch_size,
"_rng": (self._rng.get_state().clone(), str(self._rng.device))
if self._rng is not None
else None,
}
def load_state_dict(self, state_dict: Dict[str, Any]) -> None:
self._storage.load_state_dict(state_dict["_storage"])
self._sampler.load_state_dict(state_dict["_sampler"])
self._writer.load_state_dict(state_dict["_writer"])
self._transform.load_state_dict(state_dict["_transforms"])
self._batch_size = state_dict["_batch_size"]
rng = state_dict.get("_rng")
if rng is not None:
state, device = rng
rng = torch.Generator(device=device)
rng.set_state(state)
self.set_rng(generator=rng)
[docs] def dumps(self, path):
"""Saves the replay buffer on disk at the specified path.
Args:
path (Path or str): path where to save the replay buffer.
Examples:
>>> import tempfile
>>> import tqdm
>>> from torchrl.data import LazyMemmapStorage, TensorDictReplayBuffer
>>> from torchrl.data.replay_buffers.samplers import PrioritizedSampler, RandomSampler
>>> import torch
>>> from tensordict import TensorDict
>>> # Build and populate the replay buffer
>>> S = 1_000_000
>>> sampler = PrioritizedSampler(S, 1.1, 1.0)
>>> # sampler = RandomSampler()
>>> storage = LazyMemmapStorage(S)
>>> rb = TensorDictReplayBuffer(storage=storage, sampler=sampler)
>>>
>>> for _ in tqdm.tqdm(range(100)):
... td = TensorDict({"obs": torch.randn(100, 3, 4), "next": {"obs": torch.randn(100, 3, 4)}, "td_error": torch.rand(100)}, [100])
... rb.extend(td)
... sample = rb.sample(32)
... rb.update_tensordict_priority(sample)
>>> # save and load the buffer
>>> with tempfile.TemporaryDirectory() as tmpdir:
... rb.dumps(tmpdir)
...
... sampler = PrioritizedSampler(S, 1.1, 1.0)
... # sampler = RandomSampler()
... storage = LazyMemmapStorage(S)
... rb_load = TensorDictReplayBuffer(storage=storage, sampler=sampler)
... rb_load.loads(tmpdir)
... assert len(rb) == len(rb_load)
"""
path = Path(path).absolute()
path.mkdir(exist_ok=True)
self._storage.dumps(path / "storage")
self._sampler.dumps(path / "sampler")
self._writer.dumps(path / "writer")
if self._rng is not None:
rng_state = TensorDict(
rng_state=self._rng.get_state().clone(),
device=self._rng.device,
)
rng_state.memmap(path / "rng_state")
# fall back on state_dict for transforms
transform_sd = self._transform.state_dict()
if transform_sd:
torch.save(transform_sd, path / "transform.t")
with open(path / "buffer_metadata.json", "w") as file:
json.dump({"batch_size": self._batch_size}, file)
[docs] def loads(self, path):
"""Loads a replay buffer state at the given path.
The buffer should have matching components and be saved using :meth:`~.dumps`.
Args:
path (Path or str): path where the replay buffer was saved.
See :meth:`~.dumps` for more info.
"""
path = Path(path).absolute()
self._storage.loads(path / "storage")
self._sampler.loads(path / "sampler")
self._writer.loads(path / "writer")
if (path / "rng_state").exists():
rng_state = TensorDict.load_memmap(path / "rng_state")
rng = torch.Generator(device=rng_state.device)
rng.set_state(rng_state["rng_state"])
self.set_rng(rng)
# fall back on state_dict for transforms
if (path / "transform.t").exists():
self._transform.load_state_dict(torch.load(path / "transform.t"))
with open(path / "buffer_metadata.json", "r") as file:
metadata = json.load(file)
self._batch_size = metadata["batch_size"]
[docs] def save(self, *args, **kwargs):
"""Alias for :meth:`~.dumps`."""
return self.dumps(*args, **kwargs)
[docs] def dump(self, *args, **kwargs):
"""Alias for :meth:`~.dumps`."""
return self.dumps(*args, **kwargs)
[docs] def load(self, *args, **kwargs):
"""Alias for :meth:`~.loads`."""
return self.loads(*args, **kwargs)
[docs] def register_save_hook(self, hook: Callable[[Any], Any]):
"""Registers a save hook for the storage.
.. note:: Hooks are currently not serialized when saving a replay buffer: they must
be manually re-initialized every time the buffer is created.
"""
self._storage.register_save_hook(hook)
[docs] def register_load_hook(self, hook: Callable[[Any], Any]):
"""Registers a load hook for the storage.
.. note:: Hooks are currently not serialized when saving a replay buffer: they must
be manually re-initialized every time the buffer is created.
"""
self._storage.register_load_hook(hook)
[docs] def add(self, data: Any) -> int:
"""Add a single element to the replay buffer.
Args:
data (Any): data to be added to the replay buffer
Returns:
index where the data lives in the replay buffer.
"""
if self._transform is not None and len(self._transform):
with _set_dispatch_td_nn_modules(is_tensor_collection(data)):
data = self._transform.inv(data)
if data is None:
return torch.zeros((0, self._storage.ndim), dtype=torch.long)
return self._add(data)
def _add(self, data):
with self._replay_lock, self._write_lock:
index = self._writer.add(data)
self._sampler.add(index)
return index
def _extend(self, data: Sequence) -> torch.Tensor:
is_compiling = is_dynamo_compiling()
nc = contextlib.nullcontext()
with self._replay_lock if not is_compiling else nc, self._write_lock if not is_compiling else nc:
if self.dim_extend > 0:
data = self._transpose(data)
index = self._writer.extend(data)
self._sampler.extend(index)
return index
[docs] def extend(self, data: Sequence) -> torch.Tensor:
"""Extends the replay buffer with one or more elements contained in an iterable.
If present, the inverse transforms will be called.`
Args:
data (iterable): collection of data to be added to the replay
buffer.
Returns:
Indices of the data added to the replay buffer.
.. warning:: :meth:`~torchrl.data.replay_buffers.ReplayBuffer.extend` can have an
ambiguous signature when dealing with lists of values, which should be interpreted
either as PyTree (in which case all elements in the list will be put in a slice
in the stored PyTree in the storage) or a list of values to add one at a time.
To solve this, TorchRL makes the clear-cut distinction between list and tuple:
a tuple will be viewed as a PyTree, a list (at the root level) will be interpreted
as a stack of values to add one at a time to the buffer.
For :class:`~torchrl.data.replay_buffers.ListStorage` instances, only
unbound elements can be provided (no PyTrees).
"""
if self._transform is not None and len(self._transform):
with _set_dispatch_td_nn_modules(is_tensor_collection(data)):
data = self._transform.inv(data)
if data is None:
return torch.zeros((0, self._storage.ndim), dtype=torch.long)
return self._extend(data)
def update_priority(
self,
index: Union[int, torch.Tensor, Tuple[torch.Tensor]],
priority: Union[int, torch.Tensor],
) -> None:
if isinstance(index, tuple):
index = torch.stack(index, -1)
priority = torch.as_tensor(priority)
if self.dim_extend > 0 and priority.ndim > 1:
priority = self._transpose(priority).flatten()
# priority = priority.flatten()
with self._replay_lock, self._write_lock:
self._sampler.update_priority(index, priority, storage=self.storage)
@pin_memory_output
def _sample(self, batch_size: int) -> Tuple[Any, dict]:
with self._replay_lock if not is_dynamo_compiling() else contextlib.nullcontext():
index, info = self._sampler.sample(self._storage, batch_size)
info["index"] = index
data = self._storage.get(index)
if not isinstance(index, INT_CLASSES):
data = self._collate_fn(data)
if self._transform is not None and len(self._transform):
is_td = is_tensor_collection(data)
with data.unlock_() if is_td else contextlib.nullcontext(), _set_dispatch_td_nn_modules(
is_td
):
data = self._transform(data)
return data, info
[docs] def empty(self):
"""Empties the replay buffer and reset cursor to 0."""
self._writer._empty()
self._sampler._empty()
self._storage._empty()
[docs] def sample(self, batch_size: int | None = None, return_info: bool = False) -> Any:
"""Samples a batch of data from the replay buffer.
Uses Sampler to sample indices, and retrieves them from Storage.
Args:
batch_size (int, optional): size of data to be collected. If none
is provided, this method will sample a batch-size as indicated
by the sampler.
return_info (bool): whether to return info. If True, the result
is a tuple (data, info). If False, the result is the data.
Returns:
A batch of data selected in the replay buffer.
A tuple containing this batch and info if return_info flag is set to True.
"""
if (
batch_size is not None
and self._batch_size is not None
and batch_size != self._batch_size
):
warnings.warn(
f"Got conflicting batch_sizes in constructor ({self._batch_size}) "
f"and `sample` ({batch_size}). Refer to the ReplayBuffer documentation "
"for a proper usage of the batch-size arguments. "
"The batch-size provided to the sample method "
"will prevail."
)
elif batch_size is None and self._batch_size is not None:
batch_size = self._batch_size
elif batch_size is None:
raise RuntimeError(
"batch_size not specified. You can specify the batch_size when "
"constructing the replay buffer, or pass it to the sample method. "
"Refer to the ReplayBuffer documentation "
"for a proper usage of the batch-size arguments."
)
if not self._prefetch:
result = self._sample(batch_size)
else:
with self._futures_lock:
while (
len(self._prefetch_queue)
< min(self._sampler._remaining_batches, self._prefetch_cap)
and not self._sampler.ran_out
) or not len(self._prefetch_queue):
fut = self._prefetch_executor.submit(self._sample, batch_size)
self._prefetch_queue.append(fut)
result = self._prefetch_queue.popleft().result()
if return_info:
out, info = result
if getattr(self.storage, "device", None) is not None:
device = self.storage.device
info = tree_map(lambda x: x.to(device) if hasattr(x, "to") else x, info)
return out, info
return result[0]
def mark_update(self, index: Union[int, torch.Tensor]) -> None:
self._sampler.mark_update(index, storage=self._storage)
[docs] def append_transform(
self, transform: "Transform", *, invert: bool = False # noqa-F821
) -> ReplayBuffer: # noqa: D417
"""Appends transform at the end.
Transforms are applied in order when `sample` is called.
Args:
transform (Transform): The transform to be appended
Keyword Args:
invert (bool, optional): if ``True``, the transform will be inverted (forward calls will be called
during writing and inverse calls during reading). Defaults to ``False``.
Example:
>>> rb = ReplayBuffer(storage=LazyMemmapStorage(10), batch_size=4)
>>> data = TensorDict({"a": torch.zeros(10)}, [10])
>>> def t(data):
... data += 1
... return data
>>> rb.append_transform(t, invert=True)
>>> rb.extend(data)
>>> assert (data == 1).all()
"""
from torchrl.envs.transforms.transforms import _CallableTransform, Transform
if not isinstance(transform, Transform) and callable(transform):
transform = _CallableTransform(transform)
if invert:
transform = _InvertTransform(transform)
transform.eval()
self._transform.append(transform)
return self
[docs] def insert_transform(
self,
index: int,
transform: "Transform", # noqa-F821
*,
invert: bool = False,
) -> ReplayBuffer: # noqa: D417
"""Inserts transform.
Transforms are executed in order when `sample` is called.
Args:
index (int): Position to insert the transform.
transform (Transform): The transform to be appended
Keyword Args:
invert (bool, optional): if ``True``, the transform will be inverted (forward calls will be called
during writing and inverse calls during reading). Defaults to ``False``.
"""
transform.eval()
if invert:
transform = _InvertTransform(transform)
self._transform.insert(index, transform)
return self
def __iter__(self):
if self._sampler.ran_out:
self._sampler.ran_out = False
if self._batch_size is None:
raise RuntimeError(
"Cannot iterate over the replay buffer. "
"Batch_size was not specified during construction of the replay buffer."
)
while not self._sampler.ran_out or (
self._prefetch and len(self._prefetch_queue)
):
yield self.sample()
def __getstate__(self) -> Dict[str, Any]:
state = self.__dict__.copy()
if self._rng is not None:
rng_state = TensorDict(
rng_state=self._rng.get_state().clone(),
device=self._rng.device,
)
state["_rng"] = rng_state
_replay_lock = state.pop("_replay_lock", None)
_futures_lock = state.pop("_futures_lock", None)
if _replay_lock is not None:
state["_replay_lock_placeholder"] = None
if _futures_lock is not None:
state["_futures_lock_placeholder"] = None
return state
def __setstate__(self, state: Dict[str, Any]):
rngstate = None
if "_rng" in state:
rngstate = state["_rng"]
if rngstate is not None:
rng = torch.Generator(device=rngstate.device)
rng.set_state(rngstate["rng_state"])
if "_replay_lock_placeholder" in state:
state.pop("_replay_lock_placeholder")
_replay_lock = threading.RLock()
state["_replay_lock"] = _replay_lock
if "_futures_lock_placeholder" in state:
state.pop("_futures_lock_placeholder")
_futures_lock = threading.RLock()
state["_futures_lock"] = _futures_lock
self.__dict__.update(state)
if rngstate is not None:
self.set_rng(rng)
@property
def sampler(self):
"""The sampler of the replay buffer.
The sampler must be an instance of :class:`~torchrl.data.replay_buffers.Sampler`.
"""
return self._sampler
@property
def writer(self):
"""The writer of the replay buffer.
The writer must be an instance of :class:`~torchrl.data.replay_buffers.Writer`.
"""
return self._writer
@property
def storage(self):
"""The storage of the replay buffer.
The storage must be an instance of :class:`~torchrl.data.replay_buffers.Storage`.
"""
return self._storage
[docs]class PrioritizedReplayBuffer(ReplayBuffer):
"""Prioritized replay buffer.
All arguments are keyword-only arguments.
Presented in
"Schaul, T.; Quan, J.; Antonoglou, I.; and Silver, D. 2015.
Prioritized experience replay."
(https://arxiv.org/abs/1511.05952)
Args:
alpha (float): exponent α determines how much prioritization is used,
with α = 0 corresponding to the uniform case.
beta (float): importance sampling negative exponent.
eps (float): delta added to the priorities to ensure that the buffer
does not contain null priorities.
storage (Storage, optional): the storage to be used. If none is provided
a default :class:`~torchrl.data.replay_buffers.ListStorage` with
``max_size`` of ``1_000`` will be created.
collate_fn (callable, optional): merges a list of samples to form a
mini-batch of Tensor(s)/outputs. Used when using batched
loading from a map-style dataset. The default value will be decided
based on the storage type.
pin_memory (bool): whether pin_memory() should be called on the rb
samples.
prefetch (int, optional): number of next batches to be prefetched
using multithreading. Defaults to None (no prefetching).
transform (Transform, optional): Transform to be executed when
sample() is called.
To chain transforms use the :class:`~torchrl.envs.Compose` class.
Transforms should be used with :class:`tensordict.TensorDict`
content. If used with other structures, the transforms should be
encoded with a ``"data"`` leading key that will be used to
construct a tensordict from the non-tensordict content.
batch_size (int, optional): the batch size to be used when sample() is
called.
.. note::
The batch-size can be specified at construction time via the
``batch_size`` argument, or at sampling time. The former should
be preferred whenever the batch-size is consistent across the
experiment. If the batch-size is likely to change, it can be
passed to the :meth:`~.sample` method. This option is
incompatible with prefetching (since this requires to know the
batch-size in advance) as well as with samplers that have a
``drop_last`` argument.
dim_extend (int, optional): indicates the dim to consider for
extension when calling :meth:`~.extend`. Defaults to ``storage.ndim-1``.
When using ``dim_extend > 0``, we recommend using the ``ndim``
argument in the storage instantiation if that argument is
available, to let storages know that the data is
multi-dimensional and keep consistent notions of storage-capacity
and batch-size during sampling.
.. note:: This argument has no effect on :meth:`~.add` and
therefore should be used with caution when both :meth:`~.add`
and :meth:`~.extend` are used in a codebase. For example:
>>> data = torch.zeros(3, 4)
>>> rb = ReplayBuffer(
... storage=LazyTensorStorage(10, ndim=2),
... dim_extend=1)
>>> # these two approaches are equivalent:
>>> for d in data.unbind(1):
... rb.add(d)
>>> rb.extend(data)
.. note::
Generic prioritized replay buffers (ie. non-tensordict backed) require
calling :meth:`~.sample` with the ``return_info`` argument set to
``True`` to have access to the indices, and hence update the priority.
Using :class:`tensordict.TensorDict` and the related
:class:`~torchrl.data.TensorDictPrioritizedReplayBuffer` simplifies this
process.
Examples:
>>> import torch
>>>
>>> from torchrl.data import ListStorage, PrioritizedReplayBuffer
>>>
>>> torch.manual_seed(0)
>>>
>>> rb = PrioritizedReplayBuffer(alpha=0.7, beta=0.9, storage=ListStorage(10))
>>> data = range(10)
>>> rb.extend(data)
>>> sample = rb.sample(3)
>>> print(sample)
tensor([1, 0, 1])
>>> # get the info to find what the indices are
>>> sample, info = rb.sample(5, return_info=True)
>>> print(sample, info)
tensor([2, 7, 4, 3, 5]) {'_weight': array([1., 1., 1., 1., 1.], dtype=float32), 'index': array([2, 7, 4, 3, 5])}
>>> # update priority
>>> priority = torch.ones(5) * 5
>>> rb.update_priority(info["index"], priority)
>>> # and now a new sample, the weights should be updated
>>> sample, info = rb.sample(5, return_info=True)
>>> print(sample, info)
tensor([2, 5, 2, 2, 5]) {'_weight': array([0.36278465, 0.36278465, 0.36278465, 0.36278465, 0.36278465],
dtype=float32), 'index': array([2, 5, 2, 2, 5])}
"""
def __init__(
self,
*,
alpha: float,
beta: float,
eps: float = 1e-8,
dtype: torch.dtype = torch.float,
storage: Storage | None = None,
collate_fn: Callable | None = None,
pin_memory: bool = False,
prefetch: int | None = None,
transform: "Transform" | None = None, # noqa-F821
batch_size: int | None = None,
dim_extend: int | None = None,
) -> None:
if storage is None:
storage = ListStorage(max_size=1_000)
sampler = PrioritizedSampler(storage.max_size, alpha, beta, eps, dtype)
super(PrioritizedReplayBuffer, self).__init__(
storage=storage,
sampler=sampler,
collate_fn=collate_fn,
pin_memory=pin_memory,
prefetch=prefetch,
transform=transform,
batch_size=batch_size,
dim_extend=dim_extend,
)
[docs]class TensorDictReplayBuffer(ReplayBuffer):
"""TensorDict-specific wrapper around the :class:`~torchrl.data.ReplayBuffer` class.
Keyword Args:
storage (Storage, optional): the storage to be used. If none is provided
a default :class:`~torchrl.data.replay_buffers.ListStorage` with
``max_size`` of ``1_000`` will be created.
sampler (Sampler, optional): the sampler to be used. If none is provided
a default RandomSampler() will be used.
writer (Writer, optional): the writer to be used. If none is provided
a default :class:`~torchrl.data.replay_buffers.RoundRobinWriter`
will be used.
collate_fn (callable, optional): merges a list of samples to form a
mini-batch of Tensor(s)/outputs. Used when using batched
loading from a map-style dataset. The default value will be decided
based on the storage type.
pin_memory (bool): whether pin_memory() should be called on the rb
samples.
prefetch (int, optional): number of next batches to be prefetched
using multithreading. Defaults to None (no prefetching).
transform (Transform, optional): Transform to be executed when
sample() is called.
To chain transforms use the :class:`~torchrl.envs.Compose` class.
Transforms should be used with :class:`tensordict.TensorDict`
content. If used with other structures, the transforms should be
encoded with a ``"data"`` leading key that will be used to
construct a tensordict from the non-tensordict content.
batch_size (int, optional): the batch size to be used when sample() is
called.
.. note::
The batch-size can be specified at construction time via the
``batch_size`` argument, or at sampling time. The former should
be preferred whenever the batch-size is consistent across the
experiment. If the batch-size is likely to change, it can be
passed to the :meth:`~.sample` method. This option is
incompatible with prefetching (since this requires to know the
batch-size in advance) as well as with samplers that have a
``drop_last`` argument.
priority_key (str, optional): the key at which priority is assumed to
be stored within TensorDicts added to this ReplayBuffer.
This is to be used when the sampler is of type
:class:`~torchrl.data.PrioritizedSampler`.
Defaults to ``"td_error"``.
dim_extend (int, optional): indicates the dim to consider for
extension when calling :meth:`~.extend`. Defaults to ``storage.ndim-1``.
When using ``dim_extend > 0``, we recommend using the ``ndim``
argument in the storage instantiation if that argument is
available, to let storages know that the data is
multi-dimensional and keep consistent notions of storage-capacity
and batch-size during sampling.
.. note:: This argument has no effect on :meth:`~.add` and
therefore should be used with caution when both :meth:`~.add`
and :meth:`~.extend` are used in a codebase. For example:
>>> data = torch.zeros(3, 4)
>>> rb = ReplayBuffer(
... storage=LazyTensorStorage(10, ndim=2),
... dim_extend=1)
>>> # these two approaches are equivalent:
>>> for d in data.unbind(1):
... rb.add(d)
>>> rb.extend(data)
generator (torch.Generator, optional): a generator to use for sampling.
Using a dedicated generator for the replay buffer can allow a fine-grained control
over seeding, for instance keeping the global seed different but the RB seed identical
for distributed jobs.
Defaults to ``None`` (global default generator).
.. warning:: As of now, the generator has no effect on the transforms.
shared (bool, optional): whether the buffer will be shared using multiprocessing or not.
Defaults to ``False``.
Examples:
>>> import torch
>>>
>>> from torchrl.data import LazyTensorStorage, TensorDictReplayBuffer
>>> from tensordict import TensorDict
>>>
>>> torch.manual_seed(0)
>>>
>>> rb = TensorDictReplayBuffer(storage=LazyTensorStorage(10), batch_size=5)
>>> data = TensorDict({"a": torch.ones(10, 3), ("b", "c"): torch.zeros(10, 1, 1)}, [10])
>>> rb.extend(data)
>>> sample = rb.sample(3)
>>> # samples keep track of the index
>>> print(sample)
TensorDict(
fields={
a: Tensor(shape=torch.Size([3, 3]), device=cpu, dtype=torch.float32, is_shared=False),
b: TensorDict(
fields={
c: Tensor(shape=torch.Size([3, 1, 1]), device=cpu, dtype=torch.float32, is_shared=False)},
batch_size=torch.Size([3]),
device=cpu,
is_shared=False),
index: Tensor(shape=torch.Size([3]), device=cpu, dtype=torch.int32, is_shared=False)},
batch_size=torch.Size([3]),
device=cpu,
is_shared=False)
>>> # we can iterate over the buffer
>>> for i, data in enumerate(rb):
... print(i, data)
... if i == 2:
... break
0 TensorDict(
fields={
a: Tensor(shape=torch.Size([5, 3]), device=cpu, dtype=torch.float32, is_shared=False),
b: TensorDict(
fields={
c: Tensor(shape=torch.Size([5, 1, 1]), device=cpu, dtype=torch.float32, is_shared=False)},
batch_size=torch.Size([5]),
device=cpu,
is_shared=False),
index: Tensor(shape=torch.Size([5]), device=cpu, dtype=torch.int32, is_shared=False)},
batch_size=torch.Size([5]),
device=cpu,
is_shared=False)
1 TensorDict(
fields={
a: Tensor(shape=torch.Size([5, 3]), device=cpu, dtype=torch.float32, is_shared=False),
b: TensorDict(
fields={
c: Tensor(shape=torch.Size([5, 1, 1]), device=cpu, dtype=torch.float32, is_shared=False)},
batch_size=torch.Size([5]),
device=cpu,
is_shared=False),
index: Tensor(shape=torch.Size([5]), device=cpu, dtype=torch.int32, is_shared=False)},
batch_size=torch.Size([5]),
device=cpu,
is_shared=False)
"""
def __init__(self, *, priority_key: str = "td_error", **kwargs) -> None:
writer = kwargs.get("writer", None)
if writer is None:
kwargs["writer"] = TensorDictRoundRobinWriter()
super().__init__(**kwargs)
self.priority_key = priority_key
def _get_priority_item(self, tensordict: TensorDictBase) -> float:
priority = tensordict.get(self.priority_key, None)
if self._storage.ndim > 1:
# We have to flatten the priority otherwise we'll be aggregating
# the priority across batches
priority = priority.flatten(0, self._storage.ndim - 1)
if priority is None:
return self._sampler.default_priority
try:
if priority.numel() > 1:
priority = _reduce(priority, self._sampler.reduction)
else:
priority = priority.item()
except ValueError:
raise ValueError(
f"Found a priority key of size"
f" {tensordict.get(self.priority_key).shape} but expected "
f"scalar value"
)
if self._storage.ndim > 1:
priority = priority.unflatten(0, tensordict.shape[: self._storage.ndim])
return priority
def _get_priority_vector(self, tensordict: TensorDictBase) -> torch.Tensor:
priority = tensordict.get(self.priority_key, None)
if priority is None:
return torch.tensor(
self._sampler.default_priority,
dtype=torch.float,
device=tensordict.device,
).expand(tensordict.shape[0])
if self._storage.ndim > 1 and priority.ndim >= self._storage.ndim:
# We have to flatten the priority otherwise we'll be aggregating
# the priority across batches
priority = priority.flatten(0, self._storage.ndim - 1)
priority = priority.reshape(priority.shape[0], -1)
priority = _reduce(priority, self._sampler.reduction, dim=1)
if self._storage.ndim > 1:
priority = priority.unflatten(0, tensordict.shape[: self._storage.ndim])
return priority
[docs] def add(self, data: TensorDictBase) -> int:
if self._transform is not None:
with _set_dispatch_td_nn_modules(is_tensor_collection(data)):
data = self._transform.inv(data)
if data is None:
return torch.zeros((0, self._storage.ndim), dtype=torch.long)
index = super()._add(data)
if index is not None:
if is_tensor_collection(data):
self._set_index_in_td(data, index)
self.update_tensordict_priority(data)
return index
[docs] def extend(self, tensordicts: TensorDictBase) -> torch.Tensor:
if not isinstance(tensordicts, TensorDictBase):
raise ValueError(
f"{self.__class__.__name__} only accepts TensorDictBase subclasses. tensorclasses "
f"and other types are not compatible with that class. "
"Please use a regular `ReplayBuffer` instead."
)
if self._transform is not None:
tensordicts = self._transform.inv(tensordicts)
if tensordicts is None:
return torch.zeros((0, self._storage.ndim), dtype=torch.long)
index = super()._extend(tensordicts)
# TODO: to be usable directly, the indices should be flipped but the issue
# is that just doing this results in indices that are not sorted like the original data
# so the actualy indices will have to be used on the _storage directly (not on the buffer)
self._set_index_in_td(tensordicts, index)
# TODO: in principle this is a good idea but currently it doesn't work + it re-writes a priority that has just been written
# self.update_tensordict_priority(tensordicts)
return index
def _set_index_in_td(self, tensordict, index):
if index is None:
return
if _is_int(index):
index = torch.as_tensor(index, device=tensordict.device)
elif index.ndim == 2 and index.shape[:1] != tensordict.shape[:1]:
for dim in range(2, tensordict.ndim + 1):
if index.shape[:1].numel() == tensordict.shape[:dim].numel():
# if index has 2 dims and is in a non-zero format
index = index.unflatten(0, tensordict.shape[:dim])
break
else:
raise RuntimeError(
f"could not find how to reshape index with shape {index.shape} to fit in tensordict with shape {tensordict.shape}"
)
tensordict.set("index", index)
return
tensordict.set("index", expand_as_right(index, tensordict))
def update_tensordict_priority(self, data: TensorDictBase) -> None:
if not isinstance(self._sampler, PrioritizedSampler):
return
if data.ndim:
priority = self._get_priority_vector(data)
else:
priority = torch.as_tensor(self._get_priority_item(data))
index = data.get("index")
if self._storage.ndim > 1 and index.ndim == 2:
index = index.unbind(-1)
else:
while index.shape != priority.shape:
# reduce index
index = index[..., 0]
return self.update_priority(index, priority)
[docs] def sample(
self,
batch_size: int | None = None,
return_info: bool = False,
include_info: bool = None,
) -> TensorDictBase:
"""Samples a batch of data from the replay buffer.
Uses Sampler to sample indices, and retrieves them from Storage.
Args:
batch_size (int, optional): size of data to be collected. If none
is provided, this method will sample a batch-size as indicated
by the sampler.
return_info (bool): whether to return info. If True, the result
is a tuple (data, info). If False, the result is the data.
Returns:
A tensordict containing a batch of data selected in the replay buffer.
A tuple containing this tensordict and info if return_info flag is set to True.
"""
if include_info is not None:
warnings.warn(
"include_info is going to be deprecated soon."
"The default behavior has changed to `include_info=True` "
"to avoid bugs linked to wrongly preassigned values in the "
"output tensordict."
)
data, info = super().sample(batch_size, return_info=True)
is_tc = is_tensor_collection(data)
if is_tc and not is_tensorclass(data) and include_info in (True, None):
is_locked = data.is_locked
if is_locked:
data.unlock_()
for key, val in info.items():
if key == "index" and isinstance(val, tuple):
val = torch.stack(val, -1)
try:
val = _to_torch(val, data.device)
if val.ndim < data.ndim:
val = expand_as_right(val, data)
data.set(key, val)
except RuntimeError:
raise RuntimeError(
"Failed to set the metadata (e.g., indices or weights) in the sampled tensordict within TensorDictReplayBuffer.sample. "
"This is probably caused by a shape mismatch (one of the transforms has proably modified "
"the shape of the output tensordict). "
"You can always recover these items from the `sample` method from a regular ReplayBuffer "
"instance with the 'return_info' flag set to True."
)
if is_locked:
data.lock_()
elif not is_tc and include_info in (True, None):
raise RuntimeError("Cannot include info in non-tensordict data")
if return_info:
return data, info
return data
@pin_memory_output
def _sample(self, batch_size: int) -> Tuple[Any, dict]:
with self._replay_lock:
index, info = self._sampler.sample(self._storage, batch_size)
info["index"] = index
data = self._storage.get(index)
if not isinstance(index, INT_CLASSES):
data = self._collate_fn(data)
if self._transform is not None and len(self._transform):
with data.unlock_(), _set_dispatch_td_nn_modules(True):
data = self._transform(data)
return data, info
[docs]class TensorDictPrioritizedReplayBuffer(TensorDictReplayBuffer):
"""TensorDict-specific wrapper around the :class:`~torchrl.data.PrioritizedReplayBuffer` class.
This class returns tensordicts with a new key ``"index"`` that represents
the index of each element in the replay buffer. It also provides the
:meth:`~.update_tensordict_priority` method that only requires for the
tensordict to be passed to it with its new priority value.
Keyword Args:
alpha (float): exponent α determines how much prioritization is used,
with α = 0 corresponding to the uniform case.
beta (float): importance sampling negative exponent.
eps (float): delta added to the priorities to ensure that the buffer
does not contain null priorities.
storage (Storage, optional): the storage to be used. If none is provided
a default :class:`~torchrl.data.replay_buffers.ListStorage` with
``max_size`` of ``1_000`` will be created.
collate_fn (callable, optional): merges a list of samples to form a
mini-batch of Tensor(s)/outputs. Used when using batched
loading from a map-style dataset. The default value will be decided
based on the storage type.
pin_memory (bool): whether pin_memory() should be called on the rb
samples.
prefetch (int, optional): number of next batches to be prefetched
using multithreading. Defaults to None (no prefetching).
transform (Transform, optional): Transform to be executed when
sample() is called.
To chain transforms use the :class:`~torchrl.envs.Compose` class.
Transforms should be used with :class:`tensordict.TensorDict`
content. If used with other structures, the transforms should be
encoded with a ``"data"`` leading key that will be used to
construct a tensordict from the non-tensordict content.
batch_size (int, optional): the batch size to be used when sample() is
called.
.. note::
The batch-size can be specified at construction time via the
``batch_size`` argument, or at sampling time. The former should
be preferred whenever the batch-size is consistent across the
experiment. If the batch-size is likely to change, it can be
passed to the :meth:`~.sample` method. This option is
incompatible with prefetching (since this requires to know the
batch-size in advance) as well as with samplers that have a
``drop_last`` argument.
priority_key (str, optional): the key at which priority is assumed to
be stored within TensorDicts added to this ReplayBuffer.
This is to be used when the sampler is of type
:class:`~torchrl.data.PrioritizedSampler`.
Defaults to ``"td_error"``.
reduction (str, optional): the reduction method for multidimensional
tensordicts (ie stored trajectories). Can be one of "max", "min",
"median" or "mean".
dim_extend (int, optional): indicates the dim to consider for
extension when calling :meth:`~.extend`. Defaults to ``storage.ndim-1``.
When using ``dim_extend > 0``, we recommend using the ``ndim``
argument in the storage instantiation if that argument is
available, to let storages know that the data is
multi-dimensional and keep consistent notions of storage-capacity
and batch-size during sampling.
.. note:: This argument has no effect on :meth:`~.add` and
therefore should be used with caution when both :meth:`~.add`
and :meth:`~.extend` are used in a codebase. For example:
>>> data = torch.zeros(3, 4)
>>> rb = ReplayBuffer(
... storage=LazyTensorStorage(10, ndim=2),
... dim_extend=1)
>>> # these two approaches are equivalent:
>>> for d in data.unbind(1):
... rb.add(d)
>>> rb.extend(data)
generator (torch.Generator, optional): a generator to use for sampling.
Using a dedicated generator for the replay buffer can allow a fine-grained control
over seeding, for instance keeping the global seed different but the RB seed identical
for distributed jobs.
Defaults to ``None`` (global default generator).
.. warning:: As of now, the generator has no effect on the transforms.
shared (bool, optional): whether the buffer will be shared using multiprocessing or not.
Defaults to ``False``.
Examples:
>>> import torch
>>>
>>> from torchrl.data import LazyTensorStorage, TensorDictPrioritizedReplayBuffer
>>> from tensordict import TensorDict
>>>
>>> torch.manual_seed(0)
>>>
>>> rb = TensorDictPrioritizedReplayBuffer(alpha=0.7, beta=1.1, storage=LazyTensorStorage(10), batch_size=5)
>>> data = TensorDict({"a": torch.ones(10, 3), ("b", "c"): torch.zeros(10, 3, 1)}, [10])
>>> rb.extend(data)
>>> print("len of rb", len(rb))
len of rb 10
>>> sample = rb.sample(5)
>>> print(sample)
TensorDict(
fields={
_weight: Tensor(shape=torch.Size([5]), device=cpu, dtype=torch.float32, is_shared=False),
a: Tensor(shape=torch.Size([5, 3]), device=cpu, dtype=torch.float32, is_shared=False),
b: TensorDict(
fields={
c: Tensor(shape=torch.Size([5, 3, 1]), device=cpu, dtype=torch.float32, is_shared=False)},
batch_size=torch.Size([5]),
device=cpu,
is_shared=False),
index: Tensor(shape=torch.Size([5]), device=cpu, dtype=torch.int64, is_shared=False)},
batch_size=torch.Size([5]),
device=cpu,
is_shared=False)
>>> print("index", sample["index"])
index tensor([9, 5, 2, 2, 7])
>>> # give a high priority to these samples...
>>> sample.set("td_error", 100*torch.ones(sample.shape))
>>> # and update priority
>>> rb.update_tensordict_priority(sample)
>>> # the new sample should have a high overlap with the previous one
>>> sample = rb.sample(5)
>>> print(sample)
TensorDict(
fields={
_weight: Tensor(shape=torch.Size([5]), device=cpu, dtype=torch.float32, is_shared=False),
a: Tensor(shape=torch.Size([5, 3]), device=cpu, dtype=torch.float32, is_shared=False),
b: TensorDict(
fields={
c: Tensor(shape=torch.Size([5, 3, 1]), device=cpu, dtype=torch.float32, is_shared=False)},
batch_size=torch.Size([5]),
device=cpu,
is_shared=False),
index: Tensor(shape=torch.Size([5]), device=cpu, dtype=torch.int64, is_shared=False)},
batch_size=torch.Size([5]),
device=cpu,
is_shared=False)
>>> print("index", sample["index"])
index tensor([2, 5, 5, 9, 7])
"""
def __init__(
self,
*,
alpha: float,
beta: float,
priority_key: str = "td_error",
eps: float = 1e-8,
storage: Storage | None = None,
collate_fn: Callable | None = None,
pin_memory: bool = False,
prefetch: int | None = None,
transform: "Transform" | None = None, # noqa-F821
reduction: str = "max",
batch_size: int | None = None,
dim_extend: int | None = None,
generator: torch.Generator | None = None,
shared: bool = False,
) -> None:
if storage is None:
storage = ListStorage(max_size=1_000)
sampler = PrioritizedSampler(
storage.max_size, alpha, beta, eps, reduction=reduction
)
super(TensorDictPrioritizedReplayBuffer, self).__init__(
priority_key=priority_key,
storage=storage,
sampler=sampler,
collate_fn=collate_fn,
pin_memory=pin_memory,
prefetch=prefetch,
transform=transform,
batch_size=batch_size,
dim_extend=dim_extend,
generator=generator,
shared=shared,
)
@accept_remote_rref_udf_invocation
class RemoteTensorDictReplayBuffer(TensorDictReplayBuffer):
"""A remote invocation friendly ReplayBuffer class. Public methods can be invoked by remote agents using `torch.rpc` or called locally as normal."""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def sample(
self,
batch_size: int | None = None,
include_info: bool = None,
return_info: bool = False,
) -> TensorDictBase:
return super().sample(
batch_size=batch_size, include_info=include_info, return_info=return_info
)
def add(self, data: TensorDictBase) -> int:
return super().add(data)
def extend(self, tensordicts: Union[List, TensorDictBase]) -> torch.Tensor:
return super().extend(tensordicts)
def update_priority(
self, index: Union[int, torch.Tensor], priority: Union[int, torch.Tensor]
) -> None:
return super().update_priority(index, priority)
def update_tensordict_priority(self, data: TensorDictBase) -> None:
return super().update_tensordict_priority(data)
class InPlaceSampler:
"""A sampler to write tennsordicts in-place.
.. warning:: This class is deprecated and will be removed in v0.7.
To be used cautiously as this may lead to unexpected behavior (i.e. tensordicts
overwritten during execution).
"""
def __init__(self, device: DEVICE_TYPING | None = None):
warnings.warn(
"InPlaceSampler has been deprecated and will be removed in v0.7.",
category=DeprecationWarning,
)
self.out = None
if device is None:
device = "cpu"
self.device = _make_ordinal_device(torch.device(device))
def __call__(self, list_of_tds):
if self.out is None:
self.out = torch.stack(list_of_tds, 0).contiguous()
if self.device is not None:
self.out = self.out.to(self.device)
else:
torch.stack(list_of_tds, 0, out=self.out)
return self.out
def stack_tensors(list_of_tensor_iterators: List) -> Tuple[torch.Tensor]:
"""Zips a list of iterables containing tensor-like objects and stacks the resulting lists of tensors together.
Args:
list_of_tensor_iterators (list): Sequence containing similar iterators,
where each element of the nested iterator is a tensor whose
shape match the tensor of other iterators that have the same index.
Returns:
Tuple of stacked tensors.
Examples:
>>> list_of_tensor_iterators = [[torch.ones(3), torch.zeros(1,2)]
... for _ in range(4)]
>>> stack_tensors(list_of_tensor_iterators)
(tensor([[1., 1., 1.],
[1., 1., 1.],
[1., 1., 1.],
[1., 1., 1.]]), tensor([[[0., 0.]],
<BLANKLINE>
[[0., 0.]],
<BLANKLINE>
[[0., 0.]],
<BLANKLINE>
[[0., 0.]]]))
"""
return tuple(torch.stack(tensors, 0) for tensors in zip(*list_of_tensor_iterators))
[docs]class ReplayBufferEnsemble(ReplayBuffer):
"""An ensemble of replay buffers.
This class allows to read and sample from multiple replay buffers at once.
It automatically composes ensemble of storages (:class:`~torchrl.data.replay_buffers.storages.StorageEnsemble`),
writers (:class:`~torchrl.data.replay_buffers.writers.WriterEnsemble`) and
samplers (:class:`~torchrl.data.replay_buffers.samplers.SamplerEnsemble`).
.. note::
Writing directly to this class is forbidden, but it can be indexed to retrieve
the nested nested-buffer and extending it.
There are two distinct ways of constructing a :class:`~torchrl.data.ReplayBufferEnsemble`:
one can either pass a list of replay buffers, or directly pass the components
(storage, writers and samplers) like it is done for other replay buffer subclasses.
Args:
rbs (sequence of ReplayBuffer instances, optional): the replay buffers to ensemble.
storages (StorageEnsemble, optional): the ensemble of storages, if the replay
buffers are not passed.
samplers (SamplerEnsemble, optional): the ensemble of samplers, if the replay
buffers are not passed.
writers (WriterEnsemble, optional): the ensemble of writers, if the replay
buffers are not passed.
transform (Transform, optional): if passed, this will be the transform
of the ensemble of replay buffers. Individual transforms for each
replay buffer is retrieved from its parent replay buffer, or directly
written in the :class:`~torchrl.data.replay_buffers.storages.StorageEnsemble`
object.
batch_size (int, optional): the batch-size to use during sampling.
collate_fn (callable, optional): the function to use to collate the
data after each individual collate_fn has been called and the data
is placed in a list (along with the buffer id).
collate_fns (list of callables, optional): collate_fn of each nested
replay buffer. Retrieved from the :class:`~ReplayBuffer` instances
if not provided.
p (list of float or Tensor, optional): a list of floating numbers
indicating the relative weight of each replay buffer. Can also
be passed to torchrl.data.replay_buffers.samplers.SamplerEnsemble`
if the buffer is built explicitely.
sample_from_all (bool, optional): if ``True``, each dataset will be sampled
from. This is not compatible with the ``p`` argument. Defaults to ``False``.
Can also be passed to torchrl.data.replay_buffers.samplers.SamplerEnsemble`
if the buffer is built explicitely.
num_buffer_sampled (int, optional): the number of buffers to sample.
if ``sample_from_all=True``, this has no effect, as it defaults to the
number of buffers. If ``sample_from_all=False``, buffers will be
sampled according to the probabilities ``p``. Can also
be passed to torchrl.data.replay_buffers.samplers.SamplerEnsemble`
if the buffer is built explicitely.
generator (torch.Generator, optional): a generator to use for sampling.
Using a dedicated generator for the replay buffer can allow a fine-grained control
over seeding, for instance keeping the global seed different but the RB seed identical
for distributed jobs.
Defaults to ``None`` (global default generator).
.. warning:: As of now, the generator has no effect on the transforms.
shared (bool, optional): whether the buffer will be shared using multiprocessing or not.
Defaults to ``False``.
Examples:
>>> from torchrl.envs import Compose, ToTensorImage, Resize, RenameTransform
>>> from torchrl.data import TensorDictReplayBuffer, ReplayBufferEnsemble, LazyMemmapStorage
>>> from tensordict import TensorDict
>>> import torch
>>> rb0 = TensorDictReplayBuffer(
... storage=LazyMemmapStorage(10),
... transform=Compose(
... ToTensorImage(in_keys=["pixels", ("next", "pixels")]),
... Resize(32, in_keys=["pixels", ("next", "pixels")]),
... RenameTransform([("some", "key")], ["renamed"]),
... ),
... )
>>> rb1 = TensorDictReplayBuffer(
... storage=LazyMemmapStorage(10),
... transform=Compose(
... ToTensorImage(in_keys=["pixels", ("next", "pixels")]),
... Resize(32, in_keys=["pixels", ("next", "pixels")]),
... RenameTransform(["another_key"], ["renamed"]),
... ),
... )
>>> rb = ReplayBufferEnsemble(
... rb0,
... rb1,
... p=[0.5, 0.5],
... transform=Resize(33, in_keys=["pixels"], out_keys=["pixels33"]),
... )
>>> print(rb)
ReplayBufferEnsemble(
storages=StorageEnsemble(
storages=(<torchrl.data.replay_buffers.storages.LazyMemmapStorage object at 0x13a2ef430>, <torchrl.data.replay_buffers.storages.LazyMemmapStorage object at 0x13a2f9310>),
transforms=[Compose(
ToTensorImage(keys=['pixels', ('next', 'pixels')]),
Resize(w=32, h=32, interpolation=InterpolationMode.BILINEAR, keys=['pixels', ('next', 'pixels')]),
RenameTransform(keys=[('some', 'key')])), Compose(
ToTensorImage(keys=['pixels', ('next', 'pixels')]),
Resize(w=32, h=32, interpolation=InterpolationMode.BILINEAR, keys=['pixels', ('next', 'pixels')]),
RenameTransform(keys=['another_key']))]),
samplers=SamplerEnsemble(
samplers=(<torchrl.data.replay_buffers.samplers.RandomSampler object at 0x13a2f9220>, <torchrl.data.replay_buffers.samplers.RandomSampler object at 0x13a2f9f70>)),
writers=WriterEnsemble(
writers=(<torchrl.data.replay_buffers.writers.TensorDictRoundRobinWriter object at 0x13a2d9b50>, <torchrl.data.replay_buffers.writers.TensorDictRoundRobinWriter object at 0x13a2f95b0>)),
batch_size=None,
transform=Compose(
Resize(w=33, h=33, interpolation=InterpolationMode.BILINEAR, keys=['pixels'])),
collate_fn=<built-in method stack of type object at 0x128648260>)
>>> data0 = TensorDict(
... {
... "pixels": torch.randint(255, (10, 244, 244, 3)),
... ("next", "pixels"): torch.randint(255, (10, 244, 244, 3)),
... ("some", "key"): torch.randn(10),
... },
... batch_size=[10],
... )
>>> data1 = TensorDict(
... {
... "pixels": torch.randint(255, (10, 64, 64, 3)),
... ("next", "pixels"): torch.randint(255, (10, 64, 64, 3)),
... "another_key": torch.randn(10),
... },
... batch_size=[10],
... )
>>> rb[0].extend(data0)
>>> rb[1].extend(data1)
>>> for _ in range(2):
... sample = rb.sample(10)
... assert sample["next", "pixels"].shape == torch.Size([2, 5, 3, 32, 32])
... assert sample["pixels"].shape == torch.Size([2, 5, 3, 32, 32])
... assert sample["pixels33"].shape == torch.Size([2, 5, 3, 33, 33])
... assert sample["renamed"].shape == torch.Size([2, 5])
"""
_collate_fn_val = None
def __init__(
self,
*rbs,
storages: StorageEnsemble | None = None,
samplers: SamplerEnsemble | None = None,
writers: WriterEnsemble | None = None,
transform: "Transform" | None = None, # noqa: F821
batch_size: int | None = None,
collate_fn: Callable | None = None,
collate_fns: List[Callable] | None = None,
p: Tensor = None,
sample_from_all: bool = False,
num_buffer_sampled: int | None = None,
generator: torch.Generator | None = None,
shared: bool = False,
**kwargs,
):
if collate_fn is None:
collate_fn = _stack_anything
if rbs:
if storages is not None or samplers is not None or writers is not None:
raise RuntimeError
storages = StorageEnsemble(
*[rb._storage for rb in rbs], transforms=[rb._transform for rb in rbs]
)
samplers = SamplerEnsemble(
*[rb._sampler for rb in rbs],
p=p,
sample_from_all=sample_from_all,
num_buffer_sampled=num_buffer_sampled,
)
writers = WriterEnsemble(*[rb._writer for rb in rbs])
if collate_fns is None:
collate_fns = [rb._collate_fn for rb in rbs]
else:
rbs = None
if collate_fns is None:
collate_fns = [
_get_default_collate(storage) for storage in storages._storages
]
self._rbs = rbs
self._collate_fns = collate_fns
super().__init__(
storage=storages,
sampler=samplers,
writer=writers,
transform=transform,
batch_size=batch_size,
collate_fn=collate_fn,
generator=generator,
shared=shared,
**kwargs,
)
def _sample(self, *args, **kwargs):
sample, info = super()._sample(*args, **kwargs)
if isinstance(sample, TensorDictBase):
buffer_ids = info.get(("index", "buffer_ids"))
info.set(
("index", "buffer_ids"), expand_right(buffer_ids, sample.batch_size)
)
if isinstance(info, LazyStackedTensorDict):
for _info, _sample in zip(
info.unbind(info.stack_dim), sample.unbind(info.stack_dim)
):
_info.batch_size = _sample.batch_size
info = torch.stack(info.tensordicts, info.stack_dim)
else:
info.batch_size = sample.batch_size
sample.update(info)
return sample, info
@property
def _collate_fn(self):
def new_collate(samples):
samples = [self._collate_fns[i](sample) for (i, sample) in samples]
return self._collate_fn_val(samples)
return new_collate
@_collate_fn.setter
def _collate_fn(self, value):
self._collate_fn_val = value
_INDEX_ERROR = "Expected an index of type torch.Tensor, range, np.ndarray, int, slice or ellipsis, got {} instead."
def __getitem__(
self, index: Union[int, torch.Tensor, Tuple, np.ndarray, List, slice, Ellipsis]
) -> Any:
# accepts inputs:
# (int | 1d tensor | 1d list | 1d array | slice | ellipsis | range, int | tensor | list | array | slice | ellipsis | range)
# tensor
if isinstance(index, tuple):
if index[0] is Ellipsis:
index = (slice(None), index[1:])
rb = self[index[0]]
if len(index) > 1:
if rb is self:
# then index[0] is an ellipsis/slice(None)
sample = [
(i, storage[index[1:]])
for i, storage in enumerate(self._storage._storages)
]
return self._collate_fn(sample)
if isinstance(rb, ReplayBufferEnsemble):
new_index = (slice(None), *index[1:])
return rb[new_index]
return rb[index[1:]]
return rb
if isinstance(index, slice) and index == slice(None):
return self
if isinstance(index, (list, range, np.ndarray)):
index = torch.as_tensor(index)
if isinstance(index, torch.Tensor):
if index.ndim > 1:
raise RuntimeError(
f"Cannot index a {type(self)} with tensor indices that have more than one dimension."
)
if index.is_floating_point():
raise TypeError(
"A floating point index was recieved when an integer dtype was expected."
)
if self._rbs is not None and (
isinstance(index, int) or (not isinstance(index, slice) and len(index) == 0)
):
try:
index = int(index)
except Exception:
raise IndexError(self._INDEX_ERROR.format(type(index)))
try:
return self._rbs[index]
except IndexError:
raise IndexError(self._INDEX_ERROR.format(type(index)))
if self._rbs is not None:
if isinstance(index, torch.Tensor):
index = index.tolist()
rbs = [self._rbs[i] for i in index]
_collate_fns = [self._collate_fns[i] for i in index]
else:
try:
# slice
rbs = self._rbs[index]
_collate_fns = self._collate_fns[index]
except IndexError:
raise IndexError(self._INDEX_ERROR.format(type(index)))
p = self._sampler._p[index] if self._sampler._p is not None else None
return ReplayBufferEnsemble(
*rbs,
transform=self._transform,
batch_size=self._batch_size,
collate_fn=self._collate_fn_val,
collate_fns=_collate_fns,
sample_from_all=self._sampler.sample_from_all,
num_buffer_sampled=self._sampler.num_buffer_sampled,
p=p,
)
try:
samplers = self._sampler[index]
writers = self._writer[index]
storages = self._storage[index]
if isinstance(index, torch.Tensor):
_collate_fns = [self._collate_fns[i] for i in index.tolist()]
else:
_collate_fns = self._collate_fns[index]
p = self._sampler._p[index] if self._sampler._p is not None else None
except IndexError:
raise IndexError(self._INDEX_ERROR.format(type(index)))
return ReplayBufferEnsemble(
samplers=samplers,
writers=writers,
storages=storages,
transform=self._transform,
batch_size=self._batch_size,
collate_fn=self._collate_fn_val,
collate_fns=_collate_fns,
sample_from_all=self._sampler.sample_from_all,
num_buffer_sampled=self._sampler.num_buffer_sampled,
p=p,
)
def __len__(self):
return len(self._storage)
def __repr__(self):
storages = textwrap.indent(f"storages={self._storage}", " " * 4)
writers = textwrap.indent(f"writers={self._writer}", " " * 4)
samplers = textwrap.indent(f"samplers={self._sampler}", " " * 4)
return f"ReplayBufferEnsemble(\n{storages}, \n{samplers}, \n{writers}, \nbatch_size={self._batch_size}, \ntransform={self._transform}, \ncollate_fn={self._collate_fn_val})"