# mypy: allow-untyped-defsr"""This package adds support for NVIDIA Tools Extension (NVTX) used in profiling."""fromcontextlibimportcontextmanagertry:fromtorch._Cimport_nvtxexceptImportError:class_NVTXStub:@staticmethoddef_fail(*args,**kwargs):raiseRuntimeError("NVTX functions not installed. Are you sure you have a CUDA build?")rangePushA=_failrangePop=_failmarkA=_fail_nvtx=_NVTXStub()# type: ignore[assignment]__all__=["range_push","range_pop","range_start","range_end","mark","range"]
[docs]defrange_push(msg):""" Push a range onto a stack of nested range span. Returns zero-based depth of the range that is started. Args: msg (str): ASCII message to associate with range """return_nvtx.rangePushA(msg)
[docs]defrange_pop():"""Pop a range off of a stack of nested range spans. Returns the zero-based depth of the range that is ended."""return_nvtx.rangePop()
defrange_start(msg)->int:""" Mark the start of a range with string message. It returns an unique handle for this range to pass to the corresponding call to rangeEnd(). A key difference between this and range_push/range_pop is that the range_start/range_end version supports range across threads (start on one thread and end on another thread). Returns: A range handle (uint64_t) that can be passed to range_end(). Args: msg (str): ASCII message to associate with the range. """return_nvtx.rangeStartA(msg)defrange_end(range_id)->None:""" Mark the end of a range for a given range_id. Args: range_id (int): an unique handle for the start range. """_nvtx.rangeEnd(range_id)def_device_range_start(msg:str,stream:int=0)->object:""" Marks the start of a range with string message. It returns an opaque heap-allocated handle for this range to pass to the corresponding call to device_range_end(). A key difference between this and range_start is that the range_start marks the range right away, while _device_range_start marks the start of the range as soon as all the tasks on the CUDA stream are completed. Returns: An opaque heap-allocated handle that should be passed to _device_range_end(). Args: msg (str): ASCII message to associate with the range. stream (int): CUDA stream id. """return_nvtx.deviceRangeStart(msg,stream)def_device_range_end(range_handle:object,stream:int=0)->None:""" Mark the end of a range for a given range_handle as soon as all the tasks on the CUDA stream are completed. Args: range_handle: an unique handle for the start range. stream (int): CUDA stream id. """_nvtx.deviceRangeEnd(range_handle,stream)
[docs]defmark(msg):""" Describe an instantaneous event that occurred at some point. Args: msg (str): ASCII message to associate with the event. """return_nvtx.markA(msg)
[docs]@contextmanagerdefrange(msg,*args,**kwargs):""" Context manager / decorator that pushes an NVTX range at the beginning of its scope, and pops it at the end. If extra arguments are given, they are passed as arguments to msg.format(). Args: msg (str): message to associate with the range """range_push(msg.format(*args,**kwargs))try:yieldfinally:range_pop()
Docs
Access comprehensive developer documentation for PyTorch
To analyze traffic and optimize your experience, we serve cookies on this site. By clicking or navigating, you agree to allow our usage of cookies. As the current maintainers of this site, Facebook’s Cookies Policy applies. Learn more, including about available controls: Cookies Policy.