Source code for torch_tensorrt.ts._compile_spec

from __future__ import annotations

from copy import deepcopy
from typing import Any, Dict, List, Optional, Set

import torch
import torch_tensorrt._C.ts as _ts_C
from torch_tensorrt import _C
from torch_tensorrt._Device import Device
from torch_tensorrt._enums import DeviceType, EngineCapability, dtype
from torch_tensorrt._Input import Input
from torch_tensorrt.ts._Device import TorchScriptDevice
from torch_tensorrt.ts._Input import TorchScriptInput
from torch_tensorrt.ts.logging import Level, log

import tensorrt as trt


def _internal_input_to_torch_class_input(i: _C.Input) -> torch.classes.tensorrt._Input:
    clone = torch.classes.tensorrt._Input()
    clone._set_min(i.min)
    clone._set_opt(i.opt)
    clone._set_max(i.max)
    clone._set_dtype(i.dtype)
    clone._set_tensor_domain(i.tensor_domain)
    clone._set_format(i.format)
    clone._set_input_is_dynamic(i.input_is_dynamic)
    clone._set_explicit_set_dtype(i._explicit_set_dtype)
    return clone


def _supported_input_size_type(input_size: Any) -> bool:
    if isinstance(input_size, torch.Size):
        return True
    elif isinstance(input_size, tuple):
        return True
    elif isinstance(input_size, list):
        return True
    else:
        raise TypeError(
            "Input sizes for inputs are required to be a List, tuple or torch.Size or a Dict of three sizes (min, opt, max), found type: "
            + str(type(input_size))
        )


def _parse_op_precision(precision: Any) -> _C.dtype:
    return dtype._from(precision).to(_C.dtype)


def _parse_enabled_precisions(precisions: Any) -> Set[_C.dtype]:
    parsed_precisions = set()
    if any(isinstance(precisions, type) for type in [list, tuple, set]):
        for p in precisions:
            parsed_precisions.add(_parse_op_precision(p))
    else:
        parsed_precisions.add(_parse_op_precision(precisions))
    return parsed_precisions


def _parse_device_type(device: Any) -> _C.DeviceType:
    return DeviceType._from(device).to(_C.DeviceType)


def _parse_device(device_info: Any) -> _C.Device:
    if isinstance(device_info, dict):
        info = _C.Device()
        if "device_type" not in device_info:
            raise KeyError("Device type is required parameter")
        else:
            info.device_type = _parse_device_type(device_info["device_type"])

        if "gpu_id" in device_info:
            assert isinstance(device_info["gpu_id"], int)
            info.gpu_id = device_info["gpu_id"]

        if "dla_core" in device_info:
            assert isinstance(device_info["dla_core"], int)
            info.dla_core = device_info["dla_core"]

        if "allow_gpu_fallback" in device_info:
            assert isinstance(device_info["allow_gpu_fallback"], bool)
            info.allow_gpu_fallback = device_info["allow_gpu_fallback"]

        return info
    elif isinstance(device_info, Device):
        return TorchScriptDevice._from(device_info)._to_internal()
    elif isinstance(device_info, TorchScriptDevice):
        return device_info._to_internal()
    elif isinstance(device_info, torch.device):
        return TorchScriptDevice._from(device_info)._to_internal()
    else:
        raise ValueError(
            "Unsupported data for device specification. Expected either a dict, torch_tensorrt.Device or torch.Device"
        )


def _parse_torch_fallback(fallback_info: Dict[str, Any]) -> _ts_C.TorchFallback:
    info = _ts_C.TorchFallback()
    if "enabled" not in fallback_info:
        raise KeyError("Enabled is required parameter")
    else:
        assert isinstance(fallback_info["enabled"], bool)
        info.enabled = fallback_info["enabled"]
    if "min_block_size" in fallback_info:
        assert isinstance(fallback_info["min_block_size"], int)
        info.min_block_size = fallback_info["min_block_size"]

    if "forced_fallback_ops" in fallback_info:
        assert isinstance(fallback_info["forced_fallback_ops"], list)
        info.forced_fallback_operators = fallback_info["forced_fallback_ops"]

    if "forced_fallback_modules" in fallback_info:
        assert isinstance(fallback_info["forced_fallback_modules"], list)
        info.forced_fallback_modules = fallback_info["forced_fallback_modules"]

    return info


def _parse_input_signature(input_signature: Any, depth: int = 0) -> Any:
    if depth > 2:
        raise AssertionError(
            "Input nesting depth exceeds max supported depth, use 1 level: [A, B], or 2 level: [A, (B, C)]"
        )

    if isinstance(input_signature, tuple):
        input_list = []
        for item in input_signature:
            input = _parse_input_signature(item, depth + 1)
            input_list.append(input)
        return tuple(input_list)
    elif isinstance(input_signature, list):
        input_list = []
        for item in input_signature:
            input = _parse_input_signature(item, depth + 1)
            input_list.append(input)
        return input_list
    elif isinstance(input_signature, (Input, torch.Tensor)):
        i = (
            Input.from_tensor(input_signature)
            if isinstance(input_signature, torch.Tensor)
            else input_signature
        )

        if not i.dtype.try_to(trt.DataType, use_default=True):
            raise TypeError(
                "Using non-TRT input types ({}) with input_signature is not currently ".format(
                    i.dtype
                )
                + "supported. Please specify inputs individually to use "
                + "non-TRT types."
            )

        ts_i = i
        if i.shape_mode == Input._ShapeMode.STATIC:
            ts_i = TorchScriptInput(shape=i.shape, dtype=i.dtype, format=i.format)
        elif i.shape_mode == Input._ShapeMode.DYNAMIC:
            if isinstance(i.shape, dict):
                ts_i = TorchScriptInput(
                    min_shape=i.shape["min_shape"],
                    opt_shape=i.shape["opt_shape"],
                    max_shape=i.shape["max_shape"],
                    dtype=i.dtype,
                    format=i.format,
                )
            else:
                raise ValueError(
                    f"Input set as dynamic, expected dictionary of shapes but found {i.shape}"
                )
        else:
            raise ValueError(
                "Invalid shape mode detected for input while parsing the input_signature"
            )

        clone = _internal_input_to_torch_class_input(ts_i._to_internal())
        return clone
    else:
        raise KeyError(
            "Input signature contains an unsupported type {}".format(
                type(input_signature)
            )
        )


def _parse_compile_spec(compile_spec_: Dict[str, Any]) -> _ts_C.CompileSpec:
    # TODO: Use deepcopy to support partial compilation of collections
    compile_spec = deepcopy(compile_spec_)
    info = _ts_C.CompileSpec()

    if len(compile_spec["inputs"]) > 0:
        if not all(
            isinstance(i, (torch.Tensor, Input)) for i in compile_spec["inputs"]
        ):
            raise KeyError(
                "Input specs should be either torch_tensorrt.Input or torch.Tensor, found types: {}".format(
                    [type(i) for i in compile_spec["inputs"]]
                )
            )

        inputs = [
            Input.from_tensor(i) if isinstance(i, torch.Tensor) else i
            for i in compile_spec["inputs"]
        ]
        ts_inputs = []
        for i in inputs:
            if i.shape_mode == Input._ShapeMode.STATIC:
                ts_inputs.append(
                    TorchScriptInput(
                        shape=i.shape,
                        dtype=i.dtype.to(_C.dtype),
                        format=i.format.to(_C.TensorFormat),
                    )._to_internal()
                )
            elif i.shape_mode == Input._ShapeMode.DYNAMIC:
                ts_inputs.append(
                    TorchScriptInput(
                        min_shape=i.shape["min_shape"],
                        opt_shape=i.shape["opt_shape"],
                        max_shape=i.shape["max_shape"],
                        dtype=i.dtype.to(_C.dtype),
                        format=i.format.to(_C.TensorFormat),
                    )._to_internal()
                )
        info.inputs = ts_inputs

    elif compile_spec["input_signature"] is not None:
        log(
            Level.Warning,
            "Input signature parsing is an experimental feature, behavior and APIs may change",
        )
        signature = _parse_input_signature(compile_spec["input_signature"])
        info.input_signature = _C.InputSignature(signature)  # py_object

    else:
        raise KeyError(
            'Module input definitions are required to compile module. Provide a list of torch_tensorrt.Input keyed to "inputs" in the compile spec'
        )

    if "enabled_precisions" in compile_spec:
        info.enabled_precisions = _parse_enabled_precisions(
            compile_spec["enabled_precisions"]
        )

    if "calibrator" in compile_spec and compile_spec["calibrator"]:
        info.ptq_calibrator = compile_spec["calibrator"]

    if "sparse_weights" in compile_spec:
        assert isinstance(compile_spec["sparse_weights"], bool)
        info.sparse_weights = compile_spec["sparse_weights"]

    if "disable_tf32" in compile_spec:
        assert isinstance(compile_spec["disable_tf32"], bool)
        info.disable_tf32 = compile_spec["disable_tf32"]

    if "refit" in compile_spec:
        assert isinstance(compile_spec["refit"], bool)
        info.refit = compile_spec["refit"]

    if "debug" in compile_spec:
        assert isinstance(compile_spec["debug"], bool)
        info.debug = compile_spec["debug"]

    if "allow_shape_tensors" in compile_spec:
        assert isinstance(compile_spec["allow_shape_tensors"], bool)
        info.allow_shape_tensors = compile_spec["allow_shape_tensors"]

    if "device" in compile_spec:
        info.device = _parse_device(compile_spec["device"])

    if "capability" in compile_spec:
        capability = EngineCapability._from(compile_spec["capability"]).to(
            _C.EngineCapability
        )
        info.capability = capability

    if "num_avg_timing_iters" in compile_spec:
        assert type(compile_spec["num_avg_timing_iters"]) is int
        info.num_avg_timing_iters = compile_spec["num_avg_timing_iters"]

    if "workspace_size" in compile_spec:
        assert type(compile_spec["workspace_size"]) is int
        info.workspace_size = compile_spec["workspace_size"]

    if "dla_sram_size" in compile_spec:
        assert type(compile_spec["dla_sram_size"]) is int
        info.dla_sram_size = compile_spec["dla_sram_size"]

    if "dla_local_dram_size" in compile_spec:
        assert type(compile_spec["dla_local_dram_size"]) is int
        info.dla_local_dram_size = compile_spec["dla_local_dram_size"]

    if "dla_global_dram_size" in compile_spec:
        assert type(compile_spec["dla_global_dram_size"]) is int
        info.dla_global_dram_size = compile_spec["dla_global_dram_size"]

    if "truncate_long_and_double" in compile_spec:
        assert type(compile_spec["truncate_long_and_double"]) is bool
        info.truncate_long_and_double = compile_spec["truncate_long_and_double"]

    if "torch_fallback" in compile_spec:
        info.torch_fallback = _parse_torch_fallback(compile_spec["torch_fallback"])

    log(Level.Debug, str(info))

    return info


[docs]def TensorRTCompileSpec(
    inputs: Optional[List[torch.Tensor | Input]] = None,
    input_signature: Optional[Any] = None,
    device: Optional[torch.device | Device] = None,
    disable_tf32: bool = False,
    sparse_weights: bool = False,
    enabled_precisions: Optional[Set[torch.dtype | dtype]] = None,
    refit: bool = False,
    debug: bool = False,
    capability: EngineCapability = EngineCapability.STANDARD,
    num_avg_timing_iters: int = 1,
    workspace_size: int = 0,
    dla_sram_size: int = 1048576,
    dla_local_dram_size: int = 1073741824,
    dla_global_dram_size: int = 536870912,
    truncate_long_and_double: bool = False,
    calibrator: object = None,
    allow_shape_tensors: bool = False,
) -> torch.classes.tensorrt.CompileSpec:
    """Utility to create a formatted spec dictionary for using the PyTorch TensorRT backend

    Keyword Args:
        inputs (List[Union(torch_tensorrt.Input, torch.Tensor)]): **Required** List of specifications of input shape, dtype and memory layout for inputs to the module. This argument is required. Input Sizes can be specified as torch sizes, tuples or lists. dtypes can be specified using
            torch datatypes or torch_tensorrt datatypes and you can use either torch devices or the torch_tensorrt device type enum
            to select device type. ::

                input=[
                    torch_tensorrt.Input((1, 3, 224, 224)), # Static NCHW input shape for input #1
                    torch_tensorrt.Input(
                        min_shape=(1, 224, 224, 3),
                        opt_shape=(1, 512, 512, 3),
                        max_shape=(1, 1024, 1024, 3),
                        dtype=torch.int32
                        format=torch.channel_last
                    ), # Dynamic input shape for input #2
                    torch.randn((1, 3, 224, 244)) # Use an example tensor and let torch_tensorrt infer settings
                ]

        device (Union(torch_tensorrt.Device, torch.device, dict)): Target device for TensorRT engines to run on ::

            device=torch_tensorrt.Device("dla:1", allow_gpu_fallback=True)

        disable_tf32 (bool): Force FP32 layers to use traditional as FP32 format vs the default behavior of rounding the inputs to 10-bit mantissas before multiplying, but accumulates the sum using 23-bit mantissas
        sparse_weights (bool): Enable sparsity for convolution and fully connected layers.
        enabled_precision (Set(Union(torch.dtype, torch_tensorrt.dtype))): The set of datatypes that TensorRT can use when selecting kernels
        refit (bool): Enable refitting
        debug (bool): Enable debuggable engine
        capability (torch_tensorrt.EngineCapability): Restrict kernel selection to safe gpu kernels or safe dla kernels
        num_avg_timing_iters (int): Number of averaging timing iterations used to select kernels
        workspace_size (int): Maximum size of workspace given to TensorRT
        truncate_long_and_double (bool): Truncate weights provided in int64 or double (float64) to int32 and float32
        calibrator (Union(torch_tensorrt._C.IInt8Calibrator, tensorrt.IInt8Calibrator)): Calibrator object which will provide data to the PTQ system for INT8 Calibration
        allow_shape_tensors: (Experimental) Allow aten::size to output shape tensors using IShapeLayer in TensorRT

      Returns:
        torch.classes.tensorrt.CompileSpec: List of methods and formatted spec objects to be provided to ``torch._C._jit_to_tensorrt``
    """

    compile_spec = {
        "inputs": inputs if inputs is not None else [],
        # "input_signature": input_signature,
        "device": Device._current_device() if device is None else device,
        "disable_tf32": disable_tf32,  # Force FP32 layers to use traditional as FP32 format vs the default behavior of rounding the inputs to 10-bit mantissas before multiplying, but accumulates the sum using 23-bit mantissas
        "sparse_weights": sparse_weights,  # Enable sparsity for convolution and fully connected layers.
        "enabled_precisions": (
            enabled_precisions if enabled_precisions is not None else set()
        ),  # Enabling FP16 kernels
        "refit": refit,  # enable refit
        "debug": debug,  # enable debuggable engine
        "capability": capability,  # Restrict kernel selection to safe gpu kernels or safe dla kernels
        "num_avg_timing_iters": num_avg_timing_iters,  # Number of averaging timing iterations used to select kernels
        "workspace_size": workspace_size,  # Maximum size of workspace given to TensorRT
        "dla_sram_size": dla_sram_size,  # Fast software managed RAM used by DLA to communicate within a layer.
        "dla_local_dram_size": dla_local_dram_size,  # Host RAM used by DLA to share intermediate tensor data across operations
        "dla_global_dram_size": dla_global_dram_size,  # Host RAM used by DLA to store weights and metadata for execution
        "calibrator": calibrator,
        "truncate_long_and_double": truncate_long_and_double,
        "allow_shape_tensors": allow_shape_tensors,
    }

    parsed_spec = _parse_compile_spec(compile_spec)

    backend_spec = torch.classes.tensorrt.CompileSpec()

    if input_signature is not None:
        raise ValueError(
            "Input signature parsing is not currently supported in the TorchScript backend integration"
        )

    for i in parsed_spec.inputs:
        clone = _internal_input_to_torch_class_input(i)
        backend_spec._append_input(clone)

    d = torch.classes.tensorrt._Device()
    d._set_device_type(int(parsed_spec.device.device_type))
    d._set_gpu_id(parsed_spec.device.gpu_id)
    d._set_dla_core(parsed_spec.device.dla_core)
    d._set_allow_gpu_fallback(parsed_spec.device.allow_gpu_fallback)

    if parsed_spec.torch_fallback.enabled:
        raise RuntimeError(
            "Partial module compilation is not currently supported via the PyTorch TensorRT backend. If you need partial compilation, use torch_tensorrt.compile"
        )

    torch_fallback = torch.classes.tensorrt._TorchFallback()
    torch_fallback._set_enabled(parsed_spec.torch_fallback.enabled)
    torch_fallback._set_min_block_size(parsed_spec.torch_fallback.min_block_size)
    torch_fallback._set_forced_fallback_operators(
        parsed_spec.torch_fallback.forced_fallback_operators
    )
    torch_fallback._set_forced_fallback_modules(
        parsed_spec.torch_fallback.forced_fallback_modules
    )

    backend_spec._set_device(d)
    backend_spec._set_torch_fallback(torch_fallback)
    backend_spec._set_precisions([int(i) for i in parsed_spec.enabled_precisions])

    backend_spec._set_disable_tf32(parsed_spec.disable_tf32)
    backend_spec._set_refit(parsed_spec.refit)
    backend_spec._set_debug(parsed_spec.debug)
    backend_spec._set_refit(parsed_spec.refit)
    backend_spec._set_capability(int(parsed_spec.capability))
    backend_spec._set_num_avg_timing_iters(parsed_spec.num_avg_timing_iters)
    backend_spec._set_workspace_size(parsed_spec.workspace_size)
    backend_spec._set_dla_sram_size(parsed_spec.dla_sram_size)
    backend_spec._set_dla_local_dram_size(parsed_spec.dla_local_dram_size)
    backend_spec._set_dla_global_dram_size(parsed_spec.dla_global_dram_size)
    backend_spec._set_truncate_long_and_double(parsed_spec.truncate_long_and_double)
    backend_spec._set_allow_shape_tensors(parsed_spec.allow_shape_tensors)
    backend_spec._set_ptq_calibrator(parsed_spec._get_calibrator_handle())

    return backend_spec
Source code for torch_tensorrt.ts._compile_spec

Docs

Tutorials

Resources