Source code for torch.quantization.fake_quantize

import torch
from torch.nn import Module
from .observer import MovingAverageMinMaxObserver, HistogramObserver, MovingAveragePerChannelMinMaxObserver, _with_args
import re
from abc import ABC, abstractmethod

def _is_per_channel(qscheme: 'torch.qscheme') -> bool:
    return qscheme in [torch.per_channel_symmetric, torch.per_channel_affine]

def _is_per_tensor(qscheme: 'torch.qscheme') -> bool:
    return qscheme in [torch.per_tensor_symmetric, torch.per_tensor_affine]

class FakeQuantizeBase(ABC, Module):
    r""" Base fake quantize module
    Any fake quantize implementation should derive from this class.

    Concrete fake quantize module should follow the same API. In forward, they will update
    the statistics of the observed Tensor and fake quantize the input. They should also provide a
    `calculate_qparams` function that computes the quantization parameters given
    the collected statistics.


    fake_quant_enabled: torch.Tensor
    observer_enabled: torch.Tensor

    def __init__(self):
        # fake_quant_enabled and observer_enabled are buffers to support their
        # replication in DDP. Data type is uint8 because NCCL does not support
        # bool tensors.
        self.register_buffer('fake_quant_enabled', torch.tensor([1], dtype=torch.uint8))
        self.register_buffer('observer_enabled', torch.tensor([1], dtype=torch.uint8))

    def forward(self, x):

    def calculate_qparams(self, **kwargs):

    def enable_fake_quant(self, enabled=True):
        # type: (bool) -> None
        self.fake_quant_enabled[0] = 1 if enabled else 0

    def disable_fake_quant(self):

    def enable_observer(self, enabled=True):
        # type: (bool) -> None
        self.observer_enabled[0] = 1 if enabled else 0

    def disable_observer(self):

    with_args = classmethod(_with_args)

[docs]class FakeQuantize(FakeQuantizeBase): r""" Simulate the quantize and dequantize operations in training time. The output of this module is given by x_out = (clamp(round(x/scale + zero_point), quant_min, quant_max)-zero_point)*scale * :attr:`scale` defines the scale factor used for quantization. * :attr:`zero_point` specifies the quantized value to which 0 in floating point maps to * :attr:`quant_min` specifies the minimum allowable quantized value. * :attr:`quant_max` specifies the maximum allowable quantized value. * :attr:`fake_quant_enable` controls the application of fake quantization on tensors, note that statistics can still be updated. * :attr:`observer_enable` controls statistics collection on tensors * :attr:`dtype` specifies the quantized dtype that is being emulated with fake-quantization, allowable values are torch.qint8 and torch.quint8. The values of quant_min and quant_max should be chosen to be consistent with the dtype Args: observer (module): Module for observing statistics on input tensors and calculating scale and zero-point. quant_min (int): The minimum allowable quantized value. quant_max (int): The maximum allowable quantized value. observer_kwargs (optional): Arguments for the observer module Attributes: observer (Module): User provided module that collects statistics on the input tensor and provides a method to calculate scale and zero-point. """ scale: torch.Tensor zero_point: torch.Tensor def __init__(self, observer=MovingAverageMinMaxObserver, quant_min=0, quant_max=255, **observer_kwargs): super().__init__() assert quant_min <= quant_max, \ 'quant_min must be less than or equal to quant_max' self.quant_min = quant_min self.quant_max = quant_max self.activation_post_process = observer(**observer_kwargs) assert torch.iinfo(self.activation_post_process.dtype).min <= quant_min, 'quant_min out of bound' assert quant_max <= torch.iinfo(self.activation_post_process.dtype).max, 'quant_max out of bound' self.register_buffer('scale', torch.tensor([1.0])) self.register_buffer('zero_point', torch.tensor([0])) self.dtype = self.activation_post_process.dtype self.qscheme = self.activation_post_process.qscheme self.ch_axis = self.activation_post_process.ch_axis \ if hasattr(self.activation_post_process, 'ch_axis') else -1 assert _is_per_channel(self.qscheme) or \ _is_per_tensor(self.qscheme), \ 'Only per channel and per tensor quantization are supported in fake quantize' + \ ' got qscheme: ' + str(self.qscheme) self.is_per_channel = _is_per_channel(self.qscheme) @torch.jit.export def calculate_qparams(self): return self.activation_post_process.calculate_qparams() def forward(self, X): if self.observer_enabled[0] == 1: self.activation_post_process(X.detach()) _scale, _zero_point = self.calculate_qparams() _scale, _zero_point =, self.scale.resize_(_scale.shape) self.scale.copy_(_scale) self.zero_point.resize_(_zero_point.shape) self.zero_point.copy_(_zero_point) if self.fake_quant_enabled[0] == 1: if self.is_per_channel: X = torch.fake_quantize_per_channel_affine(X, self.scale, self.zero_point, self.ch_axis, self.quant_min, self.quant_max) else: X = torch.fake_quantize_per_tensor_affine(X, float(self.scale), int(self.zero_point), self.quant_min, self.quant_max) return X @torch.jit.export def extra_repr(self): return 'fake_quant_enabled={}, observer_enabled={}, ' \ 'quant_min={}, quant_max={}, dtype={}, qscheme={}, ch_axis={}, ' \ 'scale={}, zero_point={}'.format( self.fake_quant_enabled, self.observer_enabled, self.quant_min, self.quant_max, self.dtype, self.qscheme, self.ch_axis, self.scale, self.zero_point) def _save_to_state_dict(self, destination, prefix, keep_vars): # We cannot currently register scalar values as buffers, so need to manually # specify serialization here. super(FakeQuantize, self)._save_to_state_dict(destination, prefix, keep_vars) destination[prefix + 'scale'] = self.scale destination[prefix + 'zero_point'] = self.zero_point def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs): # Removing this function throws an error that the the size of the loaded tensor does not match the original size # i.e., These buffers start out with numel 0 and become numel 1 once they have their first forward pass. local_state = ['scale', 'zero_point'] for name in local_state: key = prefix + name if key in state_dict: val = state_dict[key] setattr(self, name, val) elif strict: missing_keys.append(key) super(FakeQuantize, self)._load_from_state_dict(state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs)
class FixedQParamsFakeQuantize(FakeQuantizeBase): """ Simulate quantize and dequantize with fixed quantization parameters in training time. Only per tensor quantization is supported. Args: `scale` (float): fixed scale for the fake quantize module `zero_point` (int): fixed zero point for the fake quantize module `dtype`, `qscheme`, `quant_min`, `quant_max` """ scale: torch.Tensor zero_point: torch.Tensor def __init__(self, scale, zero_point, dtype=torch.quint8, qscheme=torch.per_tensor_affine, quant_min=0, quant_max=255): super().__init__() assert quant_min <= quant_max, 'quant_min should be less than or equal to quant_max' self.quant_min = quant_min self.quant_max = quant_max self.register_buffer('scale', torch.tensor([scale])) self.register_buffer('zero_point', torch.tensor([zero_point])) self.dtype = dtype self.qscheme = qscheme assert _is_per_tensor(self.qscheme), 'Only per tensor quantization is supported' + \ ' FixedQParamsFakeQuantize module, got qscheme:' + str(self.qscheme) def forward(self, X): if self.fake_quant_enabled[0] == 1: X = torch.fake_quantize_per_tensor_affine(X, float(self.scale), int(self.zero_point), self.quant_min, self.quant_max) return X @torch.jit.export def calculate_qparams(self): return self.scale, self.zero_point @torch.jit.export def extra_repr(self): return 'fake_quant_enabled={}, observer_enabled={}, scale={}, zero_point={}, ' \ 'dtype={}, quant_min={}, quant_max={}, qscheme={}'.format( self.fake_quant_enabled, self.observer_enabled, self.scale, self.zero_point, self.dtype, self.quant_min, self.quant_max, self.qscheme) default_fake_quant = FakeQuantize.with_args(observer=MovingAverageMinMaxObserver, quant_min=0, quant_max=255, dtype=torch.quint8, qscheme=torch.per_tensor_affine, reduce_range=True) default_weight_fake_quant = FakeQuantize.with_args(observer=MovingAverageMinMaxObserver, quant_min=-128, quant_max=127, dtype=torch.qint8, qscheme=torch.per_tensor_symmetric, reduce_range=False) # TODO(future PR): remove these defaults and enforce activation functions # to explicitly specify their output range default_symmetric_fixed_qparams_fake_quant = FixedQParamsFakeQuantize.with_args( scale=2.0 / 256.0, zero_point=128, dtype=torch.quint8, quant_min=0, quant_max=255) default_affine_fixed_qparams_fake_quant = FixedQParamsFakeQuantize.with_args( scale=1.0 / 256.0, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255) default_per_channel_weight_fake_quant = FakeQuantize.with_args(observer=MovingAveragePerChannelMinMaxObserver, quant_min=-128, quant_max=127, dtype=torch.qint8, qscheme=torch.per_channel_symmetric, reduce_range=False, ch_axis=0) default_histogram_fake_quant = FakeQuantize.with_args(observer=HistogramObserver, quant_min=0, quant_max=255, dtype=torch.quint8, qscheme=torch.per_tensor_affine, reduce_range=True) def _is_fake_quant_script_module(mod): ''' Returns true if given mod is an instance of FakeQuantize script module. ''' if isinstance(mod, torch.jit.RecursiveScriptModule): # qualified name looks like '__torch__.torch.quantization.fake_quantize.___torch_mangle_2.FakeQuantize' suffix = mod._c.qualified_name.split('.', 1)[1] name = re.sub(r'\.___torch_mangle_\d+', '', suffix) return name == 'torch.quantization.fake_quantize.FakeQuantize' return False def disable_fake_quant(mod): if isinstance(mod, FakeQuantizeBase) or _is_fake_quant_script_module(mod): mod.disable_fake_quant() def enable_fake_quant(mod): if isinstance(mod, FakeQuantizeBase) or _is_fake_quant_script_module(mod): mod.enable_fake_quant() def disable_observer(mod): if isinstance(mod, FakeQuantizeBase) or _is_fake_quant_script_module(mod): mod.disable_observer() def enable_observer(mod): if isinstance(mod, FakeQuantizeBase) or _is_fake_quant_script_module(mod): mod.enable_observer()


Access comprehensive developer documentation for PyTorch

View Docs


Get in-depth tutorials for beginners and advanced developers

View Tutorials


Find development resources and get your questions answered

View Resources