Source code for torch.distributed.elastic.agent.server.health_check_server
#!/usr/bin/env python3# Copyright (c) Facebook, Inc. and its affiliates.# All rights reserved.## This source code is licensed under the BSD-style license found in the# LICENSE file in the root directory of this source tree.fromtypingimportCallablefromtorch.distributed.elastic.utils.loggingimportget_loggerlog=get_logger(__name__)__all__=["HealthCheckServer","create_healthcheck_server"]
[docs]classHealthCheckServer:""" Interface for health check monitoring server, which can be extended by starting tcp/http server on the specified port. Args: alive_callback: Callable[[], int], callback to last progress time of agent port: int, port number to start tcp/http server timeout: int, timeout seconds to decide agent is alive/dead """_alive_callback:Callable[[],int]_port:int_timeout:intdef__init__(self,alive_callback:Callable[[],int],port:int,timeout:int)->None:self._alive_callback=alive_callbackself._port=portself._timeout=timeout
[docs]defstart(self)->None:""" Unsupported functionality for Pytorch, doesn't start any health check server """log.warning("No health check server started")
[docs]defstop(self)->None:""" Function to stop health check server """log.info("Stopping noop health check server.")
[docs]defcreate_healthcheck_server(alive_callback:Callable[[],int],port:int,timeout:int,)->HealthCheckServer:""" creates health check server object """returnHealthCheckServer(alive_callback,port,timeout)
Docs
Access comprehensive developer documentation for PyTorch
To analyze traffic and optimize your experience, we serve cookies on this site. By clicking or navigating, you agree to allow our usage of cookies. As the current maintainers of this site, Facebook’s Cookies Policy applies. Learn more, including about available controls: Cookies Policy.