Shortcuts

Source code for torchx.specs

#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""
This contains the TorchX AppDef and related component definitions. These are
used by components to define the apps which can then be launched via a TorchX
scheduler or pipeline adapter.
"""
import difflib
from typing import Callable, Dict, Optional

from torchx.specs.named_resources_aws import NAMED_RESOURCES as AWS_NAMED_RESOURCES
from torchx.specs.named_resources_generic import (
    NAMED_RESOURCES as GENERIC_NAMED_RESOURCES,
)
from torchx.util.entrypoints import load_group

from .api import (  # noqa: F401 F403
    ALL,
    AppDef,
    AppDryRunInfo,
    AppHandle,
    AppState,
    AppStatus,
    BindMount,
    CfgVal,
    DeviceMount,
    get_type_name,
    InvalidRunConfigException,
    is_terminal,
    macros,
    MalformedAppHandleException,
    MISSING,
    NONE,
    NULL_RESOURCE,
    parse_app_handle,
    ReplicaState,
    ReplicaStatus,
    Resource,
    RetryPolicy,
    Role,
    RoleStatus,
    runopt,
    runopts,
    UnknownAppException,
    UnknownSchedulerException,
    VolumeMount,
)
from .builders import make_app_handle, materialize_appdef, parse_mounts  # noqa

GiB: int = 1024


def _load_named_resources() -> Dict[str, Callable[[], Resource]]:
    resource_methods = load_group("torchx.named_resources", default={})
    materialized_resources: Dict[str, Callable[[], Resource]] = {}

    for name, resource in {
        **GENERIC_NAMED_RESOURCES,
        **AWS_NAMED_RESOURCES,
        **resource_methods,
    }.items():
        materialized_resources[name] = resource

    materialized_resources["NULL"] = lambda: NULL_RESOURCE
    materialized_resources["MISSING"] = lambda: NULL_RESOURCE
    return materialized_resources


_named_resource_factories: Dict[str, Callable[[], Resource]] = _load_named_resources()


class _NamedResourcesLibrary:
    def __getitem__(self, key: str) -> Resource:
        if key in _named_resource_factories:
            return _named_resource_factories[key]()
        else:
            matches = difflib.get_close_matches(
                key,
                _named_resource_factories.keys(),
                n=1,
            )
            if matches:
                msg = f"Did you mean `{matches[0]}`?"
            else:
                msg = f"Registered named resources: {list(_named_resource_factories.keys())}"

            raise KeyError(f"No named resource found for `{key}`. {msg}")

    def __contains__(self, key: str) -> bool:
        return key in _named_resource_factories

    def __iter__(self) -> None:
        raise NotImplementedError("named resources doesn't support iterating")


named_resources: _NamedResourcesLibrary = _NamedResourcesLibrary()


[docs]def resource( cpu: Optional[int] = None, gpu: Optional[int] = None, memMB: Optional[int] = None, h: Optional[str] = None, ) -> Resource: """ Convenience method to create a ``Resource`` object from either the raw resource specs (cpu, gpu, memMB) or the registered named resource (``h``). Note that the (cpu, gpu, memMB) is mutually exclusive with ``h`` taking predecence if specified. If ``h`` is specified then it is used to look up the resource specs from the list of registered named resources. See `registering named resource <https://pytorch.org/torchx/latest/advanced.html#registering-named-resources>`_. Otherwise a ``Resource`` object is created from the raw resource specs. Example: .. code-block:: python resource(cpu=1) # returns Resource(cpu=1) resource(named_resource="foobar") # returns registered named resource "foo" resource(cpu=1, named_resource="foobar") # returns registered named resource "foo" (cpu=1 ignored) resource() # returns default resource values resource(cpu=None, gpu=None, memMB=None) # throws """ if h: return get_named_resources(h) else: # could make these defaults customizable via entrypoint # not doing that now since its not a requested feature and may just over complicate things # keeping these defaults method local so that no one else takes a dep on it DEFAULT_CPU = 2 DEFAULT_GPU = 0 DEFAULT_MEM_MB = 1024 return Resource( cpu=cpu or DEFAULT_CPU, gpu=gpu or DEFAULT_GPU, memMB=memMB or DEFAULT_MEM_MB, )
[docs]def get_named_resources(res: str) -> Resource: """ Get resource object based on the string definition registered via entrypoints.txt. TorchX implements ``named_resource`` registration mechanism, which consists of the following steps: 1. Create a module and define your resource retrieval function: .. code-block:: python # my_module.resources from typing import Dict from torchx.specs import Resource def gpu_x_1() -> Dict[str, Resource]: return Resource(cpu=2, memMB=64 * 1024, gpu = 2) 2. Register resource retrieval in the entrypoints section: :: [torchx.named_resources] gpu_x_1 = my_module.resources:gpu_x_1 The ``gpu_x_1`` can be used as string argument to this function: :: from torchx.specs import named_resources resource = named_resources["gpu_x_1"] """ return named_resources[res]

Docs

Access comprehensive developer documentation for PyTorch

View Docs

Tutorials

Get in-depth tutorials for beginners and advanced developers

View Tutorials

Resources

Find development resources and get your questions answered

View Resources