Source code for torchrl.envs.libs.unity_mlagents
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
from __future__ import annotations
import importlib.util
from typing import Dict, List, Optional
import torch
from tensordict import TensorDict, TensorDictBase
from torchrl.data.tensor_specs import (
BoundedContinuous,
Categorical,
Composite,
MultiCategorical,
MultiOneHot,
Unbounded,
)
from torchrl.envs.common import _EnvWrapper
from torchrl.envs.utils import _classproperty, check_marl_grouping, MarlGroupMapType
_has_unity_mlagents = importlib.util.find_spec("mlagents_envs") is not None
def _get_registered_envs():
if not _has_unity_mlagents:
raise ImportError(
"mlagents_envs not found. Consider downloading and installing "
f"mlagents from {UnityMLAgentsWrapper.git_url}."
)
from mlagents_envs.registry import default_registry
return list(default_registry.keys())
[docs]class UnityMLAgentsWrapper(_EnvWrapper):
"""Unity ML-Agents environment wrapper.
GitHub: https://github.com/Unity-Technologies/ml-agents
Documentation: https://unity-technologies.github.io/ml-agents/Python-LLAPI/
Args:
env (mlagents_envs.environment.UnityEnvironment): the ML-Agents
environment to wrap.
Keyword Args:
device (torch.device, optional): if provided, the device on which the data
is to be cast. Defaults to ``None``.
batch_size (torch.Size, optional): the batch size of the environment.
Defaults to ``torch.Size([])``.
allow_done_after_reset (bool, optional): if ``True``, it is tolerated
for envs to be ``done`` just after :meth:`~.reset` is called.
Defaults to ``False``.
group_map (MarlGroupMapType or Dict[str, List[str]]], optional): how to
group agents in tensordicts for input/output. See
:class:`~torchrl.envs.utils.MarlGroupMapType` for more info. If not
specified, agents are grouped according to the group ID given by the
Unity environment. Defaults to ``None``.
categorical_actions (bool, optional): if ``True``, categorical specs
will be converted to the TorchRL equivalent
(:class:`torchrl.data.Categorical`), otherwise a one-hot encoding
will be used (:class:`torchrl.data.OneHot`). Defaults to ``False``.
Attributes:
available_envs: list of registered environments available to build
Examples:
>>> from mlagents_envs.environment import UnityEnvironment
>>> base_env = UnityEnvironment()
>>> from torchrl.envs import UnityMLAgentsWrapper
>>> env = UnityMLAgentsWrapper(base_env)
>>> td = env.reset()
>>> td = env.step(td.update(env.full_action_spec.rand()))
"""
git_url = "https://github.com/Unity-Technologies/ml-agents"
libname = "mlagents_envs"
_lib = None
@_classproperty
def lib(cls):
if cls._lib is not None:
return cls._lib
import mlagents_envs
import mlagents_envs.environment
cls._lib = mlagents_envs
return mlagents_envs
def __init__(
self,
env=None,
*,
group_map: MarlGroupMapType | Dict[str, List[str]] | None = None,
categorical_actions: bool = False,
**kwargs,
):
if env is not None:
kwargs["env"] = env
self.group_map = group_map
self.categorical_actions = categorical_actions
super().__init__(**kwargs)
def _check_kwargs(self, kwargs: Dict):
mlagents_envs = self.lib
if "env" not in kwargs:
raise TypeError("Could not find environment key 'env' in kwargs.")
env = kwargs["env"]
if not isinstance(env, mlagents_envs.environment.UnityEnvironment):
raise TypeError(
"env is not of type 'mlagents_envs.environment.UnityEnvironment'"
)
def _build_env(self, env, requires_grad: bool = False, **kwargs):
self.requires_grad = requires_grad
return env
def _init_env(self):
self._update_action_mask()
# Creates a group map where agents are grouped by the group_id given by the
# Unity environment.
def _collect_agents(self, env):
agent_name_to_behavior_map = {}
agent_name_to_group_id_map = {}
for steps_idx in [0, 1]:
for behavior in env.behavior_specs.keys():
steps = env.get_steps(behavior)[steps_idx]
is_terminal = steps_idx == 1
agent_ids = steps.agent_id
group_ids = steps.group_id
for agent_id, group_id in zip(agent_ids, group_ids):
agent_name = f"agent_{agent_id}"
if agent_name in agent_name_to_behavior_map:
# Sometimes in an MLAgents environment, an agent may
# show up in both the decision steps and the terminal
# steps. When that happens, just skip the duplicate.
assert is_terminal
continue
agent_name_to_behavior_map[agent_name] = behavior
agent_name_to_group_id_map[agent_name] = group_id
return (
agent_name_to_behavior_map,
agent_name_to_group_id_map,
)
# Creates a group map where agents are grouped by their group_id.
def _make_default_group_map(self, agent_name_to_group_id_map):
group_map = {}
for agent_name, group_id in agent_name_to_group_id_map.items():
group_name = f"group_{group_id}"
if group_name not in group_map:
group_map[group_name] = []
group_map[group_name].append(agent_name)
return group_map
def _make_group_map(self, group_map, agent_name_to_group_id_map):
if group_map is None:
group_map = self._make_default_group_map(agent_name_to_group_id_map)
elif isinstance(group_map, MarlGroupMapType):
group_map = group_map.get_group_map(agent_name_to_group_id_map.keys())
check_marl_grouping(group_map, agent_name_to_group_id_map.keys())
agent_name_to_group_name_map = {}
for group_name, agents in group_map.items():
for agent_name in agents:
agent_name_to_group_name_map[agent_name] = group_name
return group_map, agent_name_to_group_name_map
def _make_specs(
self, env: "mlagents_envs.environment.UnityEnvironment" # noqa: F821
) -> None:
# NOTE: We need to reset here because mlagents only initializes the
# agents and behaviors after reset. In order to build specs, we make the
# following assumptions about the mlagents environment:
# * all behaviors are defined on the first step
# * all agents request an action on the first step
# However, mlagents allows you to break these assumptions, so we probably
# will need to detect changes to the behaviors and agents on each step.
env.reset()
(
self.agent_name_to_behavior_map,
self.agent_name_to_group_id_map,
) = self._collect_agents(env)
(self.group_map, self.agent_name_to_group_name_map) = self._make_group_map(
self.group_map, self.agent_name_to_group_id_map
)
action_spec = {}
observation_spec = {}
reward_spec = {}
done_spec = {}
for group_name, agents in self.group_map.items():
group_action_spec = {}
group_observation_spec = {}
group_reward_spec = {}
group_done_spec = {}
for agent_name in agents:
behavior = self.agent_name_to_behavior_map[agent_name]
behavior_spec = env.behavior_specs[behavior]
# Create action spec
agent_action_spec = Composite()
env_action_spec = behavior_spec.action_spec
discrete_branches = env_action_spec.discrete_branches
continuous_size = env_action_spec.continuous_size
if len(discrete_branches) > 0:
discrete_action_spec_cls = (
MultiCategorical if self.categorical_actions else MultiOneHot
)
agent_action_spec["discrete_action"] = discrete_action_spec_cls(
discrete_branches,
dtype=torch.int32,
device=self.device,
)
if continuous_size > 0:
# In mlagents, continuous actions can take values between -1
# and 1 by default:
# https://github.com/Unity-Technologies/ml-agents/blob/22a59aad34ef46a5de05469735426feed758f8f5/ml-agents-envs/mlagents_envs/base_env.py#L395
agent_action_spec["continuous_action"] = BoundedContinuous(
-1, 1, (continuous_size,), self.device, torch.float32
)
group_action_spec[agent_name] = agent_action_spec
# Create observation spec
agent_observation_spec = Composite()
for obs_idx, env_observation_spec in enumerate(
behavior_spec.observation_specs
):
if len(env_observation_spec.name) == 0:
obs_name = f"observation_{obs_idx}"
else:
obs_name = env_observation_spec.name
agent_observation_spec[obs_name] = Unbounded(
env_observation_spec.shape,
dtype=torch.float32,
device=self.device,
)
group_observation_spec[agent_name] = agent_observation_spec
# Create reward spec
agent_reward_spec = Composite()
agent_reward_spec["reward"] = Unbounded(
(1,),
dtype=torch.float32,
device=self.device,
)
agent_reward_spec["group_reward"] = Unbounded(
(1,),
dtype=torch.float32,
device=self.device,
)
group_reward_spec[agent_name] = agent_reward_spec
# Create done spec
agent_done_spec = Composite()
for done_key in ["done", "terminated", "truncated"]:
agent_done_spec[done_key] = Categorical(
2, (1,), dtype=torch.bool, device=self.device
)
group_done_spec[agent_name] = agent_done_spec
action_spec[group_name] = group_action_spec
observation_spec[group_name] = group_observation_spec
reward_spec[group_name] = group_reward_spec
done_spec[group_name] = group_done_spec
self.action_spec = Composite(action_spec)
self.observation_spec = Composite(observation_spec)
self.reward_spec = Composite(reward_spec)
self.done_spec = Composite(done_spec)
def _set_seed(self, seed):
if seed is not None:
raise NotImplementedError("This environment has no seed.")
def _check_agent_exists(self, agent_name, group_id):
if agent_name not in self.agent_name_to_group_id_map:
raise RuntimeError(
(
"Unity environment added a new agent. This is not yet "
"supported in torchrl."
)
)
if self.agent_name_to_group_id_map[agent_name] != group_id:
raise RuntimeError(
(
"Unity environment changed the group of an agent. This "
"is not yet supported in torchrl."
)
)
def _update_action_mask(self):
for behavior, behavior_spec in self._env.behavior_specs.items():
env_action_spec = behavior_spec.action_spec
discrete_branches = env_action_spec.discrete_branches
if len(discrete_branches) > 0:
steps = self._env.get_steps(behavior)[0]
env_action_mask = steps.action_mask
if env_action_mask is not None:
combined_action_mask = torch.cat(
[
torch.tensor(m, device=self.device, dtype=torch.bool)
for m in env_action_mask
],
dim=-1,
).logical_not()
for agent_id, group_id, agent_action_mask in zip(
steps.agent_id, steps.group_id, combined_action_mask
):
agent_name = f"agent_{agent_id}"
self._check_agent_exists(agent_name, group_id)
group_name = self.agent_name_to_group_name_map[agent_name]
self.full_action_spec[
group_name, agent_name, "discrete_action"
].update_mask(agent_action_mask)
def _make_td_out(self, tensordict_in, is_reset=False):
source = {}
for behavior, behavior_spec in self._env.behavior_specs.items():
for idx, steps in enumerate(self._env.get_steps(behavior)):
is_terminal = idx == 1
for steps_idx, (agent_id, group_id) in enumerate(
zip(steps.agent_id, steps.group_id)
):
agent_name = f"agent_{agent_id}"
self._check_agent_exists(agent_name, group_id)
group_name = self.agent_name_to_group_name_map[agent_name]
if group_name not in source:
source[group_name] = {}
if agent_name not in source[group_name]:
source[group_name][agent_name] = {}
# Add observations
for obs_idx, (
behavior_observation,
env_observation_spec,
) in enumerate(zip(steps.obs, behavior_spec.observation_specs)):
observation = torch.tensor(
behavior_observation[steps_idx],
device=self.device,
dtype=torch.float32,
)
if len(env_observation_spec.name) == 0:
obs_name = f"observation_{obs_idx}"
else:
obs_name = env_observation_spec.name
source[group_name][agent_name][obs_name] = observation
# Add rewards
if not is_reset:
source[group_name][agent_name]["reward"] = torch.tensor(
steps.reward[steps_idx],
device=self.device,
dtype=torch.float32,
)
source[group_name][agent_name]["group_reward"] = torch.tensor(
steps.group_reward[steps_idx],
device=self.device,
dtype=torch.float32,
)
# Add done
done = is_terminal and not is_reset
source[group_name][agent_name]["done"] = torch.tensor(
done, device=self.device, dtype=torch.bool
)
source[group_name][agent_name]["truncated"] = torch.tensor(
done and steps.interrupted[steps_idx],
device=self.device,
dtype=torch.bool,
)
source[group_name][agent_name]["terminated"] = torch.tensor(
done and not steps.interrupted[steps_idx],
device=self.device,
dtype=torch.bool,
)
if tensordict_in is not None:
# In MLAgents, a given step will only contain information for agents
# which either terminated or requested a decision during the step.
# Some agents may have neither terminated nor requested a decision,
# so we need to fill in their information from the previous step.
for group_name, agents in self.group_map.items():
for agent_name in agents:
if group_name not in source.keys():
source[group_name] = {}
if agent_name not in source[group_name].keys():
agent_dict = {}
agent_behavior = self.agent_name_to_behavior_map[agent_name]
behavior_spec = self._env.behavior_specs[agent_behavior]
td_agent_in = tensordict_in[group_name, agent_name]
# Add observations
for env_observation_spec in behavior_spec.observation_specs:
if len(env_observation_spec.name) == 0:
obs_name = f"observation_{obs_idx}"
else:
obs_name = env_observation_spec.name
agent_dict[obs_name] = td_agent_in[obs_name]
# Add rewards
if not is_reset:
# Since the agent didn't request an decision, the
# reward is 0
agent_dict["reward"] = torch.zeros(
(1,), device=self.device, dtype=torch.float32
)
agent_dict["group_reward"] = torch.zeros(
(1,), device=self.device, dtype=torch.float32
)
# Add done
agent_dict["done"] = torch.tensor(
False, device=self.device, dtype=torch.bool
)
agent_dict["terminated"] = torch.tensor(
False, device=self.device, dtype=torch.bool
)
agent_dict["truncated"] = torch.tensor(
False, device=self.device, dtype=torch.bool
)
source[group_name][agent_name] = agent_dict
tensordict_out = TensorDict(
source=source,
batch_size=self.batch_size,
device=self.device,
)
return tensordict_out
def _get_action_from_tensor(self, tensor):
if not self.categorical_actions:
action = torch.argmax(tensor, dim=-1)
else:
action = tensor
return action
def _step(self, tensordict: TensorDictBase) -> TensorDictBase:
# Apply actions
for behavior, behavior_spec in self._env.behavior_specs.items():
env_action_spec = behavior_spec.action_spec
steps = self._env.get_steps(behavior)[0]
for agent_id, group_id in zip(steps.agent_id, steps.group_id):
agent_name = f"agent_{agent_id}"
self._check_agent_exists(agent_name, group_id)
group_name = self.agent_name_to_group_name_map[agent_name]
agent_action_spec = self.full_action_spec[group_name, agent_name]
action_tuple = self.lib.base_env.ActionTuple()
discrete_branches = env_action_spec.discrete_branches
continuous_size = env_action_spec.continuous_size
if len(discrete_branches) > 0:
discrete_spec = agent_action_spec["discrete_action"]
discrete_action = tensordict[
group_name, agent_name, "discrete_action"
]
if not self.categorical_actions:
discrete_action = discrete_spec.to_categorical(discrete_action)
action_tuple.add_discrete(discrete_action[None, ...].numpy())
if continuous_size > 0:
continuous_action = tensordict[
group_name, agent_name, "continuous_action"
]
action_tuple.add_continuous(continuous_action[None, ...].numpy())
self._env.set_action_for_agent(behavior, agent_id, action_tuple)
self._env.step()
self._update_action_mask()
return self._make_td_out(tensordict)
def _to_tensor(self, value):
return torch.tensor(value, device=self.device, dtype=torch.float32)
def _reset(
self, tensordict: TensorDictBase | None = None, **kwargs
) -> TensorDictBase:
self._env.reset()
return self._make_td_out(tensordict, is_reset=True)
def close(self):
self._env.close()
@_classproperty
def available_envs(cls):
if not _has_unity_mlagents:
return []
return _get_registered_envs()
[docs]class UnityMLAgentsEnv(UnityMLAgentsWrapper):
"""Unity ML-Agents environment wrapper.
GitHub: https://github.com/Unity-Technologies/ml-agents
Documentation: https://unity-technologies.github.io/ml-agents/Python-LLAPI/
This class can be provided any of the optional initialization arguments that
:class:`mlagents_envs.environment.UnityEnvironment` class provides. For a
list of these arguments, see:
https://unity-technologies.github.io/ml-agents/Python-LLAPI-Documentation/#__init__
If both ``file_name`` and ``registered_name`` are given, an error is raised.
If neither ``file_name`` nor``registered_name`` are given, the environment
setup waits on a localhost port, and the user must execute a Unity ML-Agents
environment binary for to connect to it.
Args:
file_name (str, optional): if provided, the path to the Unity
environment binary. Defaults to ``None``.
registered_name (str, optional): if provided, the Unity environment
binary is loaded from the default ML-Agents registry. The list of
registered environments is in :attr:`~.available_envs`. Defaults to
``None``.
Keyword Args:
device (torch.device, optional): if provided, the device on which the data
is to be cast. Defaults to ``None``.
batch_size (torch.Size, optional): the batch size of the environment.
Defaults to ``torch.Size([])``.
allow_done_after_reset (bool, optional): if ``True``, it is tolerated
for envs to be ``done`` just after :meth:`~.reset` is called.
Defaults to ``False``.
group_map (MarlGroupMapType or Dict[str, List[str]]], optional): how to
group agents in tensordicts for input/output. See
:class:`~torchrl.envs.utils.MarlGroupMapType` for more info. If not
specified, agents are grouped according to the group ID given by the
Unity environment. Defaults to ``None``.
categorical_actions (bool, optional): if ``True``, categorical specs
will be converted to the TorchRL equivalent
(:class:`torchrl.data.Categorical`), otherwise a one-hot encoding
will be used (:class:`torchrl.data.OneHot`). Defaults to ``False``.
Attributes:
available_envs: list of registered environments available to build
Examples:
>>> from torchrl.envs import UnityMLAgentsEnv
>>> env = UnityMLAgentsEnv(registered_name='3DBall')
>>> td = env.reset()
>>> td = env.step(td.update(env.full_action_spec.rand()))
>>> td
TensorDict(
fields={
group_0: TensorDict(
fields={
agent_0: TensorDict(
fields={
VectorSensor_size8: Tensor(shape=torch.Size([8]), device=cpu, dtype=torch.float32, is_shared=False),
continuous_action: Tensor(shape=torch.Size([2]), device=cpu, dtype=torch.float32, is_shared=False),
done: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
terminated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
truncated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False)},
batch_size=torch.Size([]),
device=None,
is_shared=False),
agent_10: TensorDict(
fields={
VectorSensor_size8: Tensor(shape=torch.Size([8]), device=cpu, dtype=torch.float32, is_shared=False),
continuous_action: Tensor(shape=torch.Size([2]), device=cpu, dtype=torch.float32, is_shared=False),
done: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
terminated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
truncated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False)},
batch_size=torch.Size([]),
device=None,
is_shared=False),
agent_11: TensorDict(
fields={
VectorSensor_size8: Tensor(shape=torch.Size([8]), device=cpu, dtype=torch.float32, is_shared=False),
continuous_action: Tensor(shape=torch.Size([2]), device=cpu, dtype=torch.float32, is_shared=False),
done: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
terminated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
truncated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False)},
batch_size=torch.Size([]),
device=None,
is_shared=False),
agent_1: TensorDict(
fields={
VectorSensor_size8: Tensor(shape=torch.Size([8]), device=cpu, dtype=torch.float32, is_shared=False),
continuous_action: Tensor(shape=torch.Size([2]), device=cpu, dtype=torch.float32, is_shared=False),
done: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
terminated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
truncated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False)},
batch_size=torch.Size([]),
device=None,
is_shared=False),
agent_2: TensorDict(
fields={
VectorSensor_size8: Tensor(shape=torch.Size([8]), device=cpu, dtype=torch.float32, is_shared=False),
continuous_action: Tensor(shape=torch.Size([2]), device=cpu, dtype=torch.float32, is_shared=False),
done: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
terminated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
truncated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False)},
batch_size=torch.Size([]),
device=None,
is_shared=False),
agent_3: TensorDict(
fields={
VectorSensor_size8: Tensor(shape=torch.Size([8]), device=cpu, dtype=torch.float32, is_shared=False),
continuous_action: Tensor(shape=torch.Size([2]), device=cpu, dtype=torch.float32, is_shared=False),
done: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
terminated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
truncated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False)},
batch_size=torch.Size([]),
device=None,
is_shared=False),
agent_4: TensorDict(
fields={
VectorSensor_size8: Tensor(shape=torch.Size([8]), device=cpu, dtype=torch.float32, is_shared=False),
continuous_action: Tensor(shape=torch.Size([2]), device=cpu, dtype=torch.float32, is_shared=False),
done: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
terminated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
truncated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False)},
batch_size=torch.Size([]),
device=None,
is_shared=False),
agent_5: TensorDict(
fields={
VectorSensor_size8: Tensor(shape=torch.Size([8]), device=cpu, dtype=torch.float32, is_shared=False),
continuous_action: Tensor(shape=torch.Size([2]), device=cpu, dtype=torch.float32, is_shared=False),
done: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
terminated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
truncated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False)},
batch_size=torch.Size([]),
device=None,
is_shared=False),
agent_6: TensorDict(
fields={
VectorSensor_size8: Tensor(shape=torch.Size([8]), device=cpu, dtype=torch.float32, is_shared=False),
continuous_action: Tensor(shape=torch.Size([2]), device=cpu, dtype=torch.float32, is_shared=False),
done: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
terminated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
truncated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False)},
batch_size=torch.Size([]),
device=None,
is_shared=False),
agent_7: TensorDict(
fields={
VectorSensor_size8: Tensor(shape=torch.Size([8]), device=cpu, dtype=torch.float32, is_shared=False),
continuous_action: Tensor(shape=torch.Size([2]), device=cpu, dtype=torch.float32, is_shared=False),
done: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
terminated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
truncated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False)},
batch_size=torch.Size([]),
device=None,
is_shared=False),
agent_8: TensorDict(
fields={
VectorSensor_size8: Tensor(shape=torch.Size([8]), device=cpu, dtype=torch.float32, is_shared=False),
continuous_action: Tensor(shape=torch.Size([2]), device=cpu, dtype=torch.float32, is_shared=False),
done: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
terminated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
truncated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False)},
batch_size=torch.Size([]),
device=None,
is_shared=False),
agent_9: TensorDict(
fields={
VectorSensor_size8: Tensor(shape=torch.Size([8]), device=cpu, dtype=torch.float32, is_shared=False),
continuous_action: Tensor(shape=torch.Size([2]), device=cpu, dtype=torch.float32, is_shared=False),
done: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
terminated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
truncated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False)},
batch_size=torch.Size([]),
device=None,
is_shared=False)},
batch_size=torch.Size([]),
device=None,
is_shared=False),
next: TensorDict(
fields={
group_0: TensorDict(
fields={
agent_0: TensorDict(
fields={
VectorSensor_size8: Tensor(shape=torch.Size([8]), device=cpu, dtype=torch.float32, is_shared=False),
done: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
group_reward: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.float32, is_shared=False),
reward: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.float32, is_shared=False),
terminated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
truncated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False)},
batch_size=torch.Size([]),
device=None,
is_shared=False),
agent_10: TensorDict(
fields={
VectorSensor_size8: Tensor(shape=torch.Size([8]), device=cpu, dtype=torch.float32, is_shared=False),
done: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
group_reward: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.float32, is_shared=False),
reward: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.float32, is_shared=False),
terminated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
truncated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False)},
batch_size=torch.Size([]),
device=None,
is_shared=False),
agent_11: TensorDict(
fields={
VectorSensor_size8: Tensor(shape=torch.Size([8]), device=cpu, dtype=torch.float32, is_shared=False),
done: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
group_reward: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.float32, is_shared=False),
reward: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.float32, is_shared=False),
terminated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
truncated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False)},
batch_size=torch.Size([]),
device=None,
is_shared=False),
agent_1: TensorDict(
fields={
VectorSensor_size8: Tensor(shape=torch.Size([8]), device=cpu, dtype=torch.float32, is_shared=False),
done: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
group_reward: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.float32, is_shared=False),
reward: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.float32, is_shared=False),
terminated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
truncated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False)},
batch_size=torch.Size([]),
device=None,
is_shared=False),
agent_2: TensorDict(
fields={
VectorSensor_size8: Tensor(shape=torch.Size([8]), device=cpu, dtype=torch.float32, is_shared=False),
done: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
group_reward: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.float32, is_shared=False),
reward: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.float32, is_shared=False),
terminated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
truncated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False)},
batch_size=torch.Size([]),
device=None,
is_shared=False),
agent_3: TensorDict(
fields={
VectorSensor_size8: Tensor(shape=torch.Size([8]), device=cpu, dtype=torch.float32, is_shared=False),
done: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
group_reward: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.float32, is_shared=False),
reward: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.float32, is_shared=False),
terminated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
truncated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False)},
batch_size=torch.Size([]),
device=None,
is_shared=False),
agent_4: TensorDict(
fields={
VectorSensor_size8: Tensor(shape=torch.Size([8]), device=cpu, dtype=torch.float32, is_shared=False),
done: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
group_reward: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.float32, is_shared=False),
reward: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.float32, is_shared=False),
terminated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
truncated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False)},
batch_size=torch.Size([]),
device=None,
is_shared=False),
agent_5: TensorDict(
fields={
VectorSensor_size8: Tensor(shape=torch.Size([8]), device=cpu, dtype=torch.float32, is_shared=False),
done: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
group_reward: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.float32, is_shared=False),
reward: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.float32, is_shared=False),
terminated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
truncated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False)},
batch_size=torch.Size([]),
device=None,
is_shared=False),
agent_6: TensorDict(
fields={
VectorSensor_size8: Tensor(shape=torch.Size([8]), device=cpu, dtype=torch.float32, is_shared=False),
done: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
group_reward: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.float32, is_shared=False),
reward: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.float32, is_shared=False),
terminated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
truncated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False)},
batch_size=torch.Size([]),
device=None,
is_shared=False),
agent_7: TensorDict(
fields={
VectorSensor_size8: Tensor(shape=torch.Size([8]), device=cpu, dtype=torch.float32, is_shared=False),
done: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
group_reward: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.float32, is_shared=False),
reward: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.float32, is_shared=False),
terminated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
truncated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False)},
batch_size=torch.Size([]),
device=None,
is_shared=False),
agent_8: TensorDict(
fields={
VectorSensor_size8: Tensor(shape=torch.Size([8]), device=cpu, dtype=torch.float32, is_shared=False),
done: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
group_reward: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.float32, is_shared=False),
reward: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.float32, is_shared=False),
terminated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
truncated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False)},
batch_size=torch.Size([]),
device=None,
is_shared=False),
agent_9: TensorDict(
fields={
VectorSensor_size8: Tensor(shape=torch.Size([8]), device=cpu, dtype=torch.float32, is_shared=False),
done: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
group_reward: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.float32, is_shared=False),
reward: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.float32, is_shared=False),
terminated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False),
truncated: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.bool, is_shared=False)},
batch_size=torch.Size([]),
device=None,
is_shared=False)},
batch_size=torch.Size([]),
device=None,
is_shared=False)},
batch_size=torch.Size([]),
device=None,
is_shared=False)},
batch_size=torch.Size([]),
device=None,
is_shared=False)
"""
def __init__(
self,
file_name: Optional[str] = None,
registered_name: Optional[str] = None,
*,
group_map: MarlGroupMapType | Dict[str, List[str]] | None = None,
categorical_actions=False,
**kwargs,
):
kwargs["file_name"] = file_name
kwargs["registered_name"] = registered_name
super().__init__(
group_map=group_map,
categorical_actions=categorical_actions,
**kwargs,
)
def _build_env(
self,
file_name: Optional[str],
registered_name: Optional[str],
**kwargs,
) -> "mlagents_envs.environment.UnityEnvironment": # noqa: F821
if not _has_unity_mlagents:
raise ImportError(
"mlagents_envs not found, unable to create environment. "
"Consider downloading and installing mlagents from "
f"{self.git_url}"
)
if file_name is not None and registered_name is not None:
raise ValueError(
"Both `file_name` and `registered_name` were specified, which "
"is not allowed. Specify one of them or neither."
)
elif registered_name is not None:
from mlagents_envs.registry import default_registry
env = default_registry[registered_name].make(**kwargs)
else:
env = self.lib.environment.UnityEnvironment(file_name, **kwargs)
requires_grad = kwargs.pop("requires_grad", False)
return super()._build_env(
env,
requires_grad=requires_grad,
)
@property
def file_name(self):
return self._constructor_kwargs["file_name"]
@property
def registered_name(self):
return self._constructor_kwargs["registered_name"]
def _check_kwargs(self, kwargs: Dict):
pass
def __repr__(self) -> str:
if self.registered_name is not None:
env_name = self.registered_name
else:
env_name = self.file_name
return f"{self.__class__.__name__}(env={env_name}, batch_size={self.batch_size}, device={self.device})"