Shortcuts

Source code for torchx.specs

#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# pyre-strict

"""
This contains the TorchX AppDef and related component definitions. These are
used by components to define the apps which can then be launched via a TorchX
scheduler or pipeline adapter.
"""
import difflib
from typing import Callable, Dict, Optional

from torchx.specs.named_resources_aws import NAMED_RESOURCES as AWS_NAMED_RESOURCES
from torchx.specs.named_resources_generic import (
    NAMED_RESOURCES as GENERIC_NAMED_RESOURCES,
)
from torchx.util.entrypoints import load_group

from .api import (  # noqa: F401 F403
    ALL,
    AppDef,
    AppDryRunInfo,
    AppHandle,
    AppState,
    AppStatus,
    BindMount,
    CfgVal,
    DeviceMount,
    get_type_name,
    InvalidRunConfigException,
    is_terminal,
    macros,
    MalformedAppHandleException,
    MISSING,
    NONE,
    NULL_RESOURCE,
    parse_app_handle,
    ReplicaState,
    ReplicaStatus,
    Resource,
    RetryPolicy,
    Role,
    RoleStatus,
    runopt,
    runopts,
    UnknownAppException,
    UnknownSchedulerException,
    VolumeMount,
)
from .builders import make_app_handle, materialize_appdef, parse_mounts  # noqa

GiB: int = 1024


def _load_named_resources() -> Dict[str, Callable[[], Resource]]:
    resource_methods = load_group("torchx.named_resources", default={})
    materialized_resources: Dict[str, Callable[[], Resource]] = {}

    for name, resource in {
        **GENERIC_NAMED_RESOURCES,
        **AWS_NAMED_RESOURCES,
        **resource_methods,
    }.items():
        materialized_resources[name] = resource

    materialized_resources["NULL"] = lambda: NULL_RESOURCE
    materialized_resources["MISSING"] = lambda: NULL_RESOURCE
    return materialized_resources


_named_resource_factories: Dict[str, Callable[[], Resource]] = _load_named_resources()


class _NamedResourcesLibrary:
    def __getitem__(self, key: str) -> Resource:
        if key in _named_resource_factories:
            return _named_resource_factories[key]()
        else:
            matches = difflib.get_close_matches(
                key,
                _named_resource_factories.keys(),
                n=1,
            )
            if matches:
                msg = f"Did you mean `{matches[0]}`?"
            else:
                msg = f"Registered named resources: {list(_named_resource_factories.keys())}"

            raise KeyError(f"No named resource found for `{key}`. {msg}")

    def __contains__(self, key: str) -> bool:
        return key in _named_resource_factories

    def __iter__(self) -> None:
        raise NotImplementedError("named resources doesn't support iterating")


named_resources: _NamedResourcesLibrary = _NamedResourcesLibrary()


[docs]def resource( cpu: Optional[int] = None, gpu: Optional[int] = None, memMB: Optional[int] = None, h: Optional[str] = None, ) -> Resource: """ Convenience method to create a ``Resource`` object from either the raw resource specs (cpu, gpu, memMB) or the registered named resource (``h``). Note that the (cpu, gpu, memMB) is mutually exclusive with ``h`` taking predecence if specified. If ``h`` is specified then it is used to look up the resource specs from the list of registered named resources. See `registering named resource <https://pytorch.org/torchx/latest/advanced.html#registering-named-resources>`_. Otherwise a ``Resource`` object is created from the raw resource specs. Example: .. code-block:: python resource(cpu=1) # returns Resource(cpu=1) resource(named_resource="foobar") # returns registered named resource "foo" resource(cpu=1, named_resource="foobar") # returns registered named resource "foo" (cpu=1 ignored) resource() # returns default resource values resource(cpu=None, gpu=None, memMB=None) # throws """ if h: return get_named_resources(h) else: # could make these defaults customizable via entrypoint # not doing that now since its not a requested feature and may just over complicate things # keeping these defaults method local so that no one else takes a dep on it DEFAULT_CPU = 2 DEFAULT_GPU = 0 DEFAULT_MEM_MB = 1024 return Resource( cpu=cpu or DEFAULT_CPU, gpu=gpu or DEFAULT_GPU, memMB=memMB or DEFAULT_MEM_MB, )
[docs]def get_named_resources(res: str) -> Resource: """ Get resource object based on the string definition registered via entrypoints.txt. TorchX implements ``named_resource`` registration mechanism, which consists of the following steps: 1. Create a module and define your resource retrieval function: .. code-block:: python # my_module.resources from typing import Dict from torchx.specs import Resource def gpu_x_1() -> Dict[str, Resource]: return Resource(cpu=2, memMB=64 * 1024, gpu = 2) 2. Register resource retrieval in the entrypoints section: :: [torchx.named_resources] gpu_x_1 = my_module.resources:gpu_x_1 The ``gpu_x_1`` can be used as string argument to this function: :: from torchx.specs import named_resources resource = named_resources["gpu_x_1"] """ return named_resources[res]

Docs

Access comprehensive developer documentation for PyTorch

View Docs

Tutorials

Get in-depth tutorials for beginners and advanced developers

View Tutorials

Resources

Find development resources and get your questions answered

View Resources