Source code for torchrl.data.replay_buffers.utils
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# import tree
from __future__ import annotations
import contextlib
import itertools
import math
import operator
import os
import typing
from pathlib import Path
from typing import Any, Callable, Union
import numpy as np
import torch
from tensordict import (
LazyStackedTensorDict,
MemoryMappedTensor,
NonTensorData,
TensorDict,
TensorDictBase,
unravel_key,
)
from torch import Tensor
from torch.nn import functional as F
from torch.utils._pytree import LeafSpec, tree_flatten, tree_unflatten
from torchrl._utils import implement_for, logger as torchrl_logger
SINGLE_TENSOR_BUFFER_NAME = os.environ.get(
"SINGLE_TENSOR_BUFFER_NAME", "_-single-tensor-_"
)
INT_CLASSES_TYPING = Union[int, np.integer]
if hasattr(typing, "get_args"):
INT_CLASSES = typing.get_args(INT_CLASSES_TYPING)
else:
# python 3.7
INT_CLASSES = (int, np.integer)
def _to_numpy(data: Tensor) -> np.ndarray:
return data.detach().cpu().numpy() if isinstance(data, torch.Tensor) else data
def _to_torch(
data: Tensor, device, pin_memory: bool = False, non_blocking: bool = False
) -> torch.Tensor:
if isinstance(data, np.generic):
return torch.as_tensor(data, device=device)
elif isinstance(data, np.ndarray):
data = torch.from_numpy(data)
elif not isinstance(data, Tensor):
data = torch.as_tensor(data, device=device)
if pin_memory:
data = data.pin_memory()
if device is not None:
data = data.to(device, non_blocking=non_blocking)
return data
def pin_memory_output(fun) -> Callable:
"""Calls pin_memory on outputs of decorated function if they have such method."""
def decorated_fun(self, *args, **kwargs):
output = fun(self, *args, **kwargs)
if self._pin_memory:
_tuple_out = True
if not isinstance(output, tuple):
_tuple_out = False
output = (output,)
output = tuple(_pin_memory(_output) for _output in output)
if _tuple_out:
return output
return output[0]
return output
return decorated_fun
def _pin_memory(output: Any) -> Any:
if hasattr(output, "pin_memory") and output.device == torch.device("cpu"):
return output.pin_memory()
else:
return output
def _reduce(
tensor: torch.Tensor, reduction: str, dim: int | None = None
) -> Union[float, torch.Tensor]:
"""Reduces a tensor given the reduction method."""
if reduction == "max":
result = tensor.max(dim=dim)
elif reduction == "min":
result = tensor.min(dim=dim)
elif reduction == "mean":
result = tensor.mean(dim=dim)
elif reduction == "median":
result = tensor.median(dim=dim)
elif reduction == "sum":
result = tensor.sum(dim=dim)
else:
raise NotImplementedError(f"Unknown reduction method {reduction}")
if isinstance(result, tuple):
result = result[0]
return result.item() if dim is None else result
def _is_int(index):
if isinstance(index, INT_CLASSES):
return True
if isinstance(index, (np.ndarray, torch.Tensor)):
return index.ndim == 0
return False
[docs]class TED2Flat:
"""A storage saving hook to serialize TED data in a compact format.
Args:
done_key (NestedKey, optional): the key where the done states should be read.
Defaults to ``("next", "done")``.
shift_key (NestedKey, optional): the key where the shift will be written.
Defaults to "shift".
is_full_key (NestedKey, optional): the key where the is_full attribute will be written.
Defaults to "is_full".
done_keys (Tuple[NestedKey], optional): a tuple of nested keys indicating the done entries.
Defaults to ("done", "truncated", "terminated")
reward_keys (Tuple[NestedKey], optional): a tuple of nested keys indicating the reward entries.
Defaults to ("reward",)
Examples:
>>> import tempfile
>>>
>>> from tensordict import TensorDict
>>>
>>> from torchrl.collectors import SyncDataCollector
>>> from torchrl.data import ReplayBuffer, TED2Flat, LazyMemmapStorage
>>> from torchrl.envs import GymEnv
>>> import torch
>>>
>>> env = GymEnv("CartPole-v1")
>>> env.set_seed(0)
>>> torch.manual_seed(0)
>>> collector = SyncDataCollector(env, policy=env.rand_step, total_frames=200, frames_per_batch=200)
>>> rb = ReplayBuffer(storage=LazyMemmapStorage(200))
>>> rb.register_save_hook(TED2Flat())
>>> with tempfile.TemporaryDirectory() as tmpdir:
... for i, data in enumerate(collector):
... rb.extend(data)
... rb.dumps(tmpdir)
... # load the data to represent it
... td = TensorDict.load(tmpdir + "/storage/")
... print(td)
TensorDict(
fields={
action: MemoryMappedTensor(shape=torch.Size([200, 2]), device=cpu, dtype=torch.int64, is_shared=True),
collector: TensorDict(
fields={
traj_ids: MemoryMappedTensor(shape=torch.Size([200]), device=cpu, dtype=torch.int64, is_shared=True)},
batch_size=torch.Size([]),
device=cpu,
is_shared=False),
done: MemoryMappedTensor(shape=torch.Size([200, 1]), device=cpu, dtype=torch.bool, is_shared=True),
observation: MemoryMappedTensor(shape=torch.Size([220, 4]), device=cpu, dtype=torch.float32, is_shared=True),
reward: MemoryMappedTensor(shape=torch.Size([200, 1]), device=cpu, dtype=torch.float32, is_shared=True),
terminated: MemoryMappedTensor(shape=torch.Size([200, 1]), device=cpu, dtype=torch.bool, is_shared=True),
truncated: MemoryMappedTensor(shape=torch.Size([200, 1]), device=cpu, dtype=torch.bool, is_shared=True)},
batch_size=torch.Size([]),
device=cpu,
is_shared=False)
"""
_shift: int = None
_is_full: bool = None
def __init__(
self,
done_key=("next", "done"),
shift_key="shift",
is_full_key="is_full",
done_keys=("done", "truncated", "terminated"),
reward_keys=("reward",),
):
self.done_key = done_key
self.shift_key = shift_key
self.is_full_key = is_full_key
self.done_keys = {unravel_key(key) for key in done_keys}
self.reward_keys = {unravel_key(key) for key in reward_keys}
@property
def shift(self):
return self._shift
@shift.setter
def shift(self, value: int):
self._shift = value
@property
def is_full(self):
return self._is_full
@is_full.setter
def is_full(self, value: int):
self._is_full = value
def __call__(self, data: TensorDictBase, path: Path = None):
# Get the done state
shift = self.shift
is_full = self.is_full
# Create an output storage
output = TensorDict()
output.set_non_tensor(self.is_full_key, is_full)
output.set_non_tensor(self.shift_key, shift)
output.set_non_tensor("_storage_shape", tuple(data.shape))
output.memmap_(path)
# Preallocate the output
done = data.get(self.done_key).squeeze(-1).clone()
if not is_full:
# shift is the cursor place
done[shift - 1] = True
else:
done = done.roll(-shift, dims=0)
done[-1] = True
ntraj = done.sum()
# Get the keys that require extra storage
keys_to_expand = set(data.get("next").keys(True, True)) - (
self.done_keys.union(self.reward_keys)
)
total_keys = data.exclude("next").keys(True, True)
total_keys = set(total_keys).union(set(data.get("next").keys(True, True)))
len_with_offset = data.numel() + ntraj # + done[0].numel()
for key in total_keys:
if key in (self.done_keys.union(self.reward_keys)):
entry = data.get(("next", key))
else:
entry = data.get(key)
if key in keys_to_expand:
shape = torch.Size([len_with_offset, *entry.shape[data.ndim :]])
dtype = entry.dtype
output.make_memmap(key, shape=shape, dtype=dtype)
else:
shape = torch.Size([data.numel(), *entry.shape[data.ndim :]])
output.make_memmap(key, shape=shape, dtype=entry.dtype)
if data.ndim == 1:
return self._call(
data=data,
output=output,
is_full=is_full,
shift=shift,
done=done,
total_keys=total_keys,
keys_to_expand=keys_to_expand,
)
with data.flatten(1, -1) if data.ndim > 2 else contextlib.nullcontext(
data
) as data_flat:
if data.ndim > 2:
done = done.flatten(1, -1)
traj_per_dim = done.sum(0)
nsteps = data_flat.shape[0]
start = 0
start_with_offset = start
stop_with_offset = 0
stop = 0
for data_slice, done_slice, traj_for_dim in zip(
data_flat.unbind(1), done.unbind(1), traj_per_dim
):
stop_with_offset = stop_with_offset + nsteps + traj_for_dim
cur_slice_offset = slice(start_with_offset, stop_with_offset)
start_with_offset = stop_with_offset
stop = stop + data.shape[0]
cur_slice = slice(start, stop)
start = stop
def _index(
key,
val,
keys_to_expand=keys_to_expand,
cur_slice=cur_slice,
cur_slice_offset=cur_slice_offset,
):
if key in keys_to_expand:
return val[cur_slice_offset]
return val[cur_slice]
out_slice = output.named_apply(_index, nested_keys=True)
self._call(
data=data_slice,
output=out_slice,
is_full=is_full,
shift=shift,
done=done_slice,
total_keys=total_keys,
keys_to_expand=keys_to_expand,
)
return output
def _call(self, *, data, output, is_full, shift, done, total_keys, keys_to_expand):
# capture for each item in data where the observation should be written
idx = torch.arange(data.shape[0])
idx_done = (idx + done.cumsum(0))[done]
idx += torch.nn.functional.pad(done, [1, 0])[:-1].cumsum(0)
for key in total_keys:
if key in (self.done_keys.union(self.reward_keys)):
entry = data.get(("next", key))
else:
entry = data.get(key)
if key in keys_to_expand:
mmap = output.get(key)
shifted_next = data.get(("next", key))
if is_full:
_roll_inplace(entry, shift=-shift, out=mmap, index_dest=idx)
_roll_inplace(
shifted_next,
shift=-shift,
out=mmap,
index_dest=idx_done,
index_source=done,
)
else:
mmap[idx] = entry
mmap[idx_done] = shifted_next[done]
elif is_full:
mmap = output.get(key)
_roll_inplace(entry, shift=-shift, out=mmap)
else:
mmap = output.get(key)
mmap.copy_(entry)
return output
[docs]class Flat2TED:
"""A storage loading hook to deserialize flattened TED data to TED format.
Args:
done_key (NestedKey, optional): the key where the done states should be read.
Defaults to ``("next", "done")``.
shift_key (NestedKey, optional): the key where the shift will be written.
Defaults to "shift".
is_full_key (NestedKey, optional): the key where the is_full attribute will be written.
Defaults to "is_full".
done_keys (Tuple[NestedKey], optional): a tuple of nested keys indicating the done entries.
Defaults to ("done", "truncated", "terminated")
reward_keys (Tuple[NestedKey], optional): a tuple of nested keys indicating the reward entries.
Defaults to ("reward",)
Examples:
>>> import tempfile
>>>
>>> from tensordict import TensorDict
>>>
>>> from torchrl.collectors import SyncDataCollector
>>> from torchrl.data import ReplayBuffer, TED2Flat, LazyMemmapStorage, Flat2TED
>>> from torchrl.envs import GymEnv
>>> import torch
>>>
>>> env = GymEnv("CartPole-v1")
>>> env.set_seed(0)
>>> torch.manual_seed(0)
>>> collector = SyncDataCollector(env, policy=env.rand_step, total_frames=200, frames_per_batch=200)
>>> rb = ReplayBuffer(storage=LazyMemmapStorage(200))
>>> rb.register_save_hook(TED2Flat())
>>> with tempfile.TemporaryDirectory() as tmpdir:
... for i, data in enumerate(collector):
... rb.extend(data)
... rb.dumps(tmpdir)
... # load the data to represent it
... td = TensorDict.load(tmpdir + "/storage/")
...
... rb_load = ReplayBuffer(storage=LazyMemmapStorage(200))
... rb_load.register_load_hook(Flat2TED())
... rb_load.load(tmpdir)
... print("storage after loading", rb_load[:])
... assert (rb[:] == rb_load[:]).all()
storage after loading TensorDict(
fields={
action: MemoryMappedTensor(shape=torch.Size([200, 2]), device=cpu, dtype=torch.int64, is_shared=False),
collector: TensorDict(
fields={
traj_ids: MemoryMappedTensor(shape=torch.Size([200]), device=cpu, dtype=torch.int64, is_shared=False)},
batch_size=torch.Size([200]),
device=cpu,
is_shared=False),
done: MemoryMappedTensor(shape=torch.Size([200, 1]), device=cpu, dtype=torch.bool, is_shared=False),
next: TensorDict(
fields={
done: MemoryMappedTensor(shape=torch.Size([200, 1]), device=cpu, dtype=torch.bool, is_shared=False),
observation: MemoryMappedTensor(shape=torch.Size([200, 4]), device=cpu, dtype=torch.float32, is_shared=False),
reward: MemoryMappedTensor(shape=torch.Size([200, 1]), device=cpu, dtype=torch.float32, is_shared=False),
terminated: MemoryMappedTensor(shape=torch.Size([200, 1]), device=cpu, dtype=torch.bool, is_shared=False),
truncated: MemoryMappedTensor(shape=torch.Size([200, 1]), device=cpu, dtype=torch.bool, is_shared=False)},
batch_size=torch.Size([200]),
device=cpu,
is_shared=False),
observation: MemoryMappedTensor(shape=torch.Size([200, 4]), device=cpu, dtype=torch.float32, is_shared=False),
terminated: MemoryMappedTensor(shape=torch.Size([200, 1]), device=cpu, dtype=torch.bool, is_shared=False),
truncated: MemoryMappedTensor(shape=torch.Size([200, 1]), device=cpu, dtype=torch.bool, is_shared=False)},
batch_size=torch.Size([200]),
device=cpu,
is_shared=False)
"""
def __init__(
self,
done_key="done",
shift_key="shift",
is_full_key="is_full",
done_keys=("done", "truncated", "terminated"),
reward_keys=("reward",),
):
self.done_key = done_key
self.shift_key = shift_key
self.is_full_key = is_full_key
self.done_keys = {unravel_key(key) for key in done_keys}
self.reward_keys = {unravel_key(key) for key in reward_keys}
def __call__(self, data: TensorDictBase, out: TensorDictBase = None):
_storage_shape = data.get_non_tensor("_storage_shape", default=None)
if isinstance(_storage_shape, int):
_storage_shape = torch.Size([_storage_shape])
shift = data.get_non_tensor(self.shift_key, default=None)
is_full = data.get_non_tensor(self.is_full_key, default=None)
done = (
data.get("done")
.reshape((*_storage_shape[1:], -1))
.contiguous()
.permute(-1, *range(0, len(_storage_shape) - 1))
.clone()
)
if not is_full:
# shift is the cursor place
done[shift - 1] = True
else:
# done = done.roll(-shift, dims=0)
done[-1] = True
if _storage_shape is not None and len(_storage_shape) > 1:
# iterate over data and allocate
if out is None:
# out = TensorDict(batch_size=_storage_shape)
# for i in range(out.ndim):
# if i >= 2:
# # FLattening the lazy stack will make the data unavailable - we need to find a way to make this
# # possible.
# raise RuntimeError(
# "Checkpointing an uninitialized buffer with more than 2 dimensions is currently not supported. "
# "Please file an issue on GitHub to ask for this feature!"
# )
# out = LazyStackedTensorDict(*out.unbind(i), stack_dim=i)
out = TensorDict(batch_size=_storage_shape)
for i in range(1, out.ndim):
if i >= 2:
# FLattening the lazy stack will make the data unavailable - we need to find a way to make this
# possible.
raise RuntimeError(
"Checkpointing an uninitialized buffer with more than 2 dimensions is currently not supported. "
"Please file an issue on GitHub to ask for this feature!"
)
out_list = [
out._get_sub_tensordict((slice(None),) * i + (j,))
for j in range(out.shape[i])
]
out = LazyStackedTensorDict(*out_list, stack_dim=i)
# Create a function that reads slices of the input data
with out.flatten(1, -1) if out.ndim > 2 else contextlib.nullcontext(
out
) as out_flat:
nsteps = done.shape[0]
n_elt_batch = done.shape[1:].numel()
traj_per_dim = done.sum(0)
start = 0
start_with_offset = start
stop_with_offset = 0
stop = 0
for out_unbound, traj_for_dim in zip(out_flat.unbind(-1), traj_per_dim):
stop_with_offset = stop_with_offset + nsteps + traj_for_dim
cur_slice_offset = slice(start_with_offset, stop_with_offset)
start_with_offset = stop_with_offset
stop = stop + nsteps
cur_slice = slice(start, stop)
start = stop
def _index(
key,
val,
cur_slice=cur_slice,
nsteps=nsteps,
n_elt_batch=n_elt_batch,
cur_slice_offset=cur_slice_offset,
):
if val.shape[0] != (nsteps * n_elt_batch):
return val[cur_slice_offset]
return val[cur_slice]
data_slice = data.named_apply(
_index, nested_keys=True, batch_size=[]
)
self._call(
data=data_slice,
out=out_unbound,
is_full=is_full,
shift=shift,
_storage_shape=_storage_shape,
)
return out
return self._call(
data=data,
out=out,
is_full=is_full,
shift=shift,
_storage_shape=_storage_shape,
)
def _call(self, *, data, out, _storage_shape, shift, is_full):
done = data.get(self.done_key)
done = done.clone()
nsteps = done.shape[0]
# capture for each item in data where the observation should be written
idx = torch.arange(done.shape[0])
padded_done = F.pad(done.squeeze(-1), [1, 0])
root_idx = idx + padded_done[:-1].cumsum(0)
next_idx = root_idx + 1
if out is None:
out = TensorDict(batch_size=[nsteps])
def maybe_roll(entry, out=None):
if is_full and shift is not None:
if out is not None:
_roll_inplace(entry, shift=shift, out=out)
return
else:
return entry.roll(shift, dims=0)
if out is not None:
out.copy_(entry)
return
return entry
root_idx = maybe_roll(root_idx)
next_idx = maybe_roll(next_idx)
if not is_full:
next_idx = next_idx[:-1]
for key, entry in data.items(True, True):
if entry.shape[0] == nsteps:
if key in (self.done_keys.union(self.reward_keys)):
if key != "reward" and key not in out.keys(True, True):
# Create a done state at the root full of 0s
out.set(key, torch.zeros_like(entry), inplace=True)
entry = maybe_roll(entry, out=out.get(("next", key), None))
if entry is not None:
out.set(("next", key), entry, inplace=True)
else:
# action and similar
entry = maybe_roll(entry, out=out.get(key, default=None))
if entry is not None:
# then out is not locked
out.set(key, entry, inplace=True)
else:
dest_next = out.get(("next", key), None)
if dest_next is not None:
if not is_full:
dest_next = dest_next[:-1]
dest_next.copy_(entry[next_idx])
else:
if not is_full:
val = entry[next_idx]
val = torch.cat([val, torch.zeros_like(val[:1])])
out.set(("next", key), val, inplace=True)
else:
out.set(("next", key), entry[next_idx], inplace=True)
dest = out.get(key, None)
if dest is not None:
dest.copy_(entry[root_idx])
else:
out.set(key, entry[root_idx], inplace=True)
return out
[docs]class TED2Nested(TED2Flat):
"""Converts a TED-formatted dataset into a tensordict populated with nested tensors where each row is a trajectory."""
_shift: int = None
_is_full: bool = None
def __init__(self, *args, **kwargs):
if not hasattr(torch, "_nested_compute_contiguous_strides_offsets"):
raise ValueError(
f"Unsupported torch version {torch.__version__}. "
f"torch>=2.4 is required for {type(self).__name__} to be used."
)
return super().__init__(*args, **kwargs)
def __call__(self, data: TensorDictBase, path: Path = None):
data = super().__call__(data, path=path)
shift = self.shift
is_full = self.is_full
storage_shape = data.get_non_tensor("_storage_shape", (-1,))
# place time at the end
storage_shape = (*storage_shape[1:], storage_shape[0])
done = data.get("done")
done = done.squeeze(-1).clone()
if not is_full:
done.view(storage_shape)[..., shift - 1] = True
# else:
done.view(storage_shape)[..., -1] = True
ntraj = done.sum()
nz = done.nonzero(as_tuple=True)[0]
traj_lengths = torch.cat([nz[:1] + 1, nz.diff()])
# if not is_full:
# traj_lengths = torch.cat(
# [traj_lengths, (done.shape[0] - traj_lengths.sum()).unsqueeze(0)]
# )
keys_to_expand, keys_to_keep = zip(
*[
(key, None) if val.shape[0] != done.shape[0] else (None, key)
for key, val in data.items(True, True)
]
)
keys_to_expand = [key for key in keys_to_expand if key is not None]
keys_to_keep = [key for key in keys_to_keep if key is not None]
out = TensorDict(batch_size=[ntraj])
out.update(dict(data.non_tensor_items()))
out.memmap_(path)
traj_lengths = traj_lengths.unsqueeze(-1)
if not is_full:
# Increment by one only the trajectories that are not terminal
traj_lengths_expand = traj_lengths + (
traj_lengths.cumsum(0) % storage_shape[-1] != 0
)
else:
traj_lengths_expand = traj_lengths + 1
for key in keys_to_expand:
val = data.get(key)
shape = torch.cat(
[
traj_lengths_expand,
torch.tensor(val.shape[1:], dtype=torch.long).repeat(
traj_lengths.numel(), 1
),
],
-1,
)
# This works because the storage location is the same as the previous one - no copy is done
# but a new shape is written
out.make_memmap_from_storage(
key, val.untyped_storage(), dtype=val.dtype, shape=shape
)
for key in keys_to_keep:
val = data.get(key)
shape = torch.cat(
[
traj_lengths,
torch.tensor(val.shape[1:], dtype=torch.long).repeat(
traj_lengths.numel(), 1
),
],
-1,
)
out.make_memmap_from_storage(
key, val.untyped_storage(), dtype=val.dtype, shape=shape
)
return out
[docs]class Nested2TED(Flat2TED):
"""Converts a nested tensordict where each row is a trajectory into the TED format."""
def __call__(self, data, out: TensorDictBase = None):
# Get a flat representation of data
def flatten_het_dim(tensor):
shape = [tensor.size(i) for i in range(2, tensor.ndim)]
tensor = torch.tensor(tensor.untyped_storage(), dtype=tensor.dtype).view(
-1, *shape
)
return tensor
data = data.apply(flatten_het_dim, batch_size=[])
data.auto_batch_size_()
return super().__call__(data, out=out)
[docs]class H5Split(TED2Flat):
"""Splits a dataset prepared with TED2Nested into a TensorDict where each trajectory is stored as views on their parent nested tensors."""
_shift: int = None
_is_full: bool = None
def __call__(self, data):
nzeros = int(math.ceil(math.log10(data.shape[0])))
result = TensorDict(
{
f"traj_{str(i).zfill(nzeros)}": _data
for i, _data in enumerate(data.filter_non_tensor_data().unbind(0))
}
).update(dict(data.non_tensor_items()))
return result
[docs]class H5Combine:
"""Combines trajectories in a persistent tensordict into a single standing tensordict stored in filesystem."""
def __call__(self, data, out=None):
# TODO: this load the entire H5 in memory, which can be problematic
# Ideally we would want to load it on a memmap tensordict
# We currently ignore out in this call but we should leverage that
values = [val for key, val in data.items() if key.startswith("traj")]
metadata_keys = [key for key in data.keys() if not key.startswith("traj")]
result = TensorDict({key: NonTensorData(data[key]) for key in metadata_keys})
# Create a memmap in file system (no files associated)
result.memmap_()
# Create each entry
def initialize(key, *x):
result.make_memmap(
key,
shape=torch.stack([torch.tensor(_x.shape) for _x in x]),
dtype=x[0].dtype,
)
return
values[0].named_apply(
initialize,
*values[1:],
nested_keys=True,
batch_size=[],
filter_empty=True,
)
# Populate the entries
def populate(key, *x):
dest = result.get(key)
for i, _x in enumerate(x):
dest[i].copy_(_x)
values[0].named_apply(
populate,
*values[1:],
nested_keys=True,
batch_size=[],
filter_empty=True,
)
return result
@implement_for("torch", "2.3", None)
def _path2str(path, default_name=None):
# Uses the Keys defined in pytree to build a path
from torch.utils._pytree import MappingKey, SequenceKey
if default_name is None:
default_name = SINGLE_TENSOR_BUFFER_NAME
if not path:
return default_name
if isinstance(path, tuple):
return "/".join([_path2str(_sub, default_name=default_name) for _sub in path])
if isinstance(path, MappingKey):
if not isinstance(path.key, (int, str, bytes)):
raise ValueError("Values must be of type int, str or bytes in PyTree maps.")
result = str(path.key)
if result == default_name:
raise RuntimeError(
"A tensor had the same identifier as the default name used when the buffer contains "
f"a single tensor (name={default_name}). This behavior is not allowed. Please rename your "
f"tensor in the map/dict or set a new default name with the environment variable SINGLE_TENSOR_BUFFER_NAME."
)
return result
if isinstance(path, SequenceKey):
return str(path.idx)
@implement_for("torch", None, "2.3")
def _path2str(path, default_name=None): # noqa: F811
raise RuntimeError
def _save_pytree_common(tensor_path, path, tensor, metadata):
if "." in tensor_path:
tensor_path.replace(".", "_<dot>_")
total_tensor_path = path / (tensor_path + ".memmap")
if os.path.exists(total_tensor_path):
MemoryMappedTensor.from_filename(
shape=tensor.shape,
filename=total_tensor_path,
dtype=tensor.dtype,
).copy_(tensor)
else:
os.makedirs(total_tensor_path.parent, exist_ok=True)
MemoryMappedTensor.from_tensor(
tensor,
filename=total_tensor_path,
copy_existing=True,
copy_data=True,
)
key = tensor_path.replace("/", ".")
if key in metadata:
raise KeyError(
"At least two values have conflicting representations in "
f"the data structure to be serialized: {key}."
)
metadata[key] = {
"dtype": str(tensor.dtype),
"shape": list(tensor.shape),
}
@implement_for("torch", "2.3", None)
def _save_pytree(_storage, metadata, path):
from torch.utils._pytree import tree_map_with_path
def save_tensor(
tensor_path: tuple, tensor: torch.Tensor, metadata=metadata, path=path
):
tensor_path = _path2str(tensor_path)
_save_pytree_common(tensor_path, path, tensor, metadata)
tree_map_with_path(save_tensor, _storage)
@implement_for("torch", None, "2.3")
def _save_pytree(_storage, metadata, path): # noqa: F811
flat_storage, storage_specs = tree_flatten(_storage)
storage_paths = _get_paths(storage_specs)
def save_tensor(
tensor_path: str, tensor: torch.Tensor, metadata=metadata, path=path
):
_save_pytree_common(tensor_path, path, tensor, metadata)
for tensor, tensor_path in zip(flat_storage, storage_paths):
save_tensor(tensor_path, tensor)
def _get_paths(spec, cumulpath=""):
# alternative way to build a path without the keys
if isinstance(spec, LeafSpec):
yield cumulpath if cumulpath else SINGLE_TENSOR_BUFFER_NAME
contexts = spec.context
children_specs = spec.children_specs
if contexts is None:
contexts = range(len(children_specs))
for context, spec in zip(contexts, children_specs):
cpath = "/".join((cumulpath, str(context))) if cumulpath else str(context)
yield from _get_paths(spec, cpath)
def _init_pytree_common(tensor_path, scratch_dir, max_size_fn, tensor):
if "." in tensor_path:
tensor_path.replace(".", "_<dot>_")
if scratch_dir is not None:
total_tensor_path = Path(scratch_dir) / (tensor_path + ".memmap")
if os.path.exists(total_tensor_path):
raise RuntimeError(
f"The storage of tensor {total_tensor_path} already exists. "
f"To load an existing replay buffer, use storage.loads. "
f"Choose a different path to store your buffer or delete the existing files."
)
os.makedirs(total_tensor_path.parent, exist_ok=True)
else:
total_tensor_path = None
out = MemoryMappedTensor.empty(
shape=max_size_fn(tensor.shape),
filename=total_tensor_path,
dtype=tensor.dtype,
)
try:
filesize = os.path.getsize(tensor.filename) / 1024 / 1024
torchrl_logger.debug(
f"The storage was created in {out.filename} and occupies {filesize} Mb of storage."
)
except (RuntimeError, AttributeError):
pass
return out
@implement_for("torch", "2.3", None)
def _init_pytree(scratch_dir, max_size_fn, data):
from torch.utils._pytree import tree_map_with_path
# If not a tensorclass/tensordict, it must be a tensor(-like) or a PyTree
# if Tensor, we just create a MemoryMappedTensor of the desired shape, device and dtype
def save_tensor(tensor_path: tuple, tensor: torch.Tensor):
tensor_path = _path2str(tensor_path)
return _init_pytree_common(tensor_path, scratch_dir, max_size_fn, tensor)
out = tree_map_with_path(save_tensor, data)
return out
@implement_for("torch", None, "2.3")
def _init_pytree(scratch_dir, max_size, data): # noqa: F811
flat_data, data_specs = tree_flatten(data)
data_paths = _get_paths(data_specs)
data_paths = list(data_paths)
# If not a tensorclass/tensordict, it must be a tensor(-like) or a PyTree
# if Tensor, we just create a MemoryMappedTensor of the desired shape, device and dtype
def save_tensor(tensor_path: str, tensor: torch.Tensor):
return _init_pytree_common(tensor_path, scratch_dir, max_size, tensor)
out = []
for tensor, tensor_path in zip(flat_data, data_paths):
out.append(save_tensor(tensor_path, tensor))
return tree_unflatten(out, data_specs)
def _roll_inplace(tensor, shift, out, index_dest=None, index_source=None):
# slice 0
source0 = tensor[:-shift]
if index_source is not None:
source0 = source0[index_source[shift:]]
slice0_shift = source0.shape[0]
if index_dest is not None:
out[index_dest[-slice0_shift:]] = source0
else:
slice0 = out[-slice0_shift:]
slice0.copy_(source0)
# slice 1
source1 = tensor[-shift:]
if index_source is not None:
source1 = source1[index_source[:shift]]
if index_dest is not None:
out[index_dest[:-slice0_shift]] = source1
else:
slice1 = out[:-slice0_shift]
slice1.copy_(source1)
return out
# Copy-paste of unravel-index for PT 2.0
def _unravel_index(
indices: Tensor, shape: Union[int, typing.Sequence[int], torch.Size]
) -> typing.Tuple[Tensor, ...]:
res_tensor = _unravel_index_impl(indices, shape)
return res_tensor.unbind(-1)
def _unravel_index_impl(
indices: Tensor, shape: Union[int, typing.Sequence[int]]
) -> Tensor:
if isinstance(shape, (int, torch.SymInt)):
shape = torch.Size([shape])
else:
shape = torch.Size(shape)
coefs = list(
reversed(
list(
itertools.accumulate(
reversed(shape[1:] + torch.Size([1])), func=operator.mul
)
)
)
)
return indices.unsqueeze(-1).floor_divide(
torch.tensor(coefs, device=indices.device, dtype=torch.int64)
) % torch.tensor(shape, device=indices.device, dtype=torch.int64)
@implement_for("torch", None, "2.2")
def unravel_index(indices, shape):
"""A version-compatible wrapper around torch.unravel_index."""
return _unravel_index(indices, shape)
@implement_for("torch", "2.2")
def unravel_index(indices, shape): # noqa: F811
"""A version-compatible wrapper around torch.unravel_index."""
return torch.unravel_index(indices, shape)
@implement_for("torch", None, "2.3")
def tree_iter(pytree):
"""A version-compatible wrapper around tree_iter."""
flat_tree, _ = torch.utils._pytree.tree_flatten(pytree)
yield from flat_tree
@implement_for("torch", "2.3", "2.4")
def tree_iter(pytree): # noqa: F811
"""A version-compatible wrapper around tree_iter."""
yield from torch.utils._pytree.tree_leaves(pytree)
@implement_for("torch", "2.4")
def tree_iter(pytree): # noqa: F811
"""A version-compatible wrapper around tree_iter."""
yield from torch.utils._pytree.tree_iter(pytree)
def _auto_device() -> torch.device:
if torch.cuda.is_available():
return torch.device("cuda:0")
elif torch.mps.is_available():
return torch.device("mps:0")
return torch.device("cpu")