Shortcuts

# Source code for torch.nn.modules.normalization

# mypy: allow-untyped-defs
import numbers
from typing import List, Optional, Tuple, Union

import torch
from torch import Size, Tensor
from torch.nn import functional as F, init
from torch.nn.parameter import Parameter

from ._functions import CrossMapLRN2d as _cross_map_lrn2d
from .module import Module

__all__ = ["LocalResponseNorm", "CrossMapLRN2d", "LayerNorm", "GroupNorm", "RMSNorm"]

[docs]class LocalResponseNorm(Module):
r"""Applies local response normalization over an input signal.

The input signal is composed of several input planes, where channels occupy the second dimension.
Applies normalization across channels.

.. math::
b_{c} = a_{c}\left(k + \frac{\alpha}{n}
\sum_{c'=\max(0, c-n/2)}^{\min(N-1,c+n/2)}a_{c'}^2\right)^{-\beta}

Args:
size: amount of neighbouring channels used for normalization
alpha: multiplicative factor. Default: 0.0001
beta: exponent. Default: 0.75

Shape:
- Input: :math:(N, C, *)
- Output: :math:(N, C, *) (same shape as input)

Examples::

>>> lrn = nn.LocalResponseNorm(2)
>>> signal_2d = torch.randn(32, 5, 24, 24)
>>> signal_4d = torch.randn(16, 5, 7, 7, 7, 7)
>>> output_2d = lrn(signal_2d)
>>> output_4d = lrn(signal_4d)

"""

__constants__ = ["size", "alpha", "beta", "k"]
size: int
alpha: float
beta: float
k: float

def __init__(
self, size: int, alpha: float = 1e-4, beta: float = 0.75, k: float = 1.0
) -> None:
super().__init__()
self.size = size
self.alpha = alpha
self.beta = beta
self.k = k

def forward(self, input: Tensor) -> Tensor:
return F.local_response_norm(input, self.size, self.alpha, self.beta, self.k)

def extra_repr(self):
return "{size}, alpha={alpha}, beta={beta}, k={k}".format(**self.__dict__)

class CrossMapLRN2d(Module):
size: int
alpha: float
beta: float
k: float

def __init__(
self, size: int, alpha: float = 1e-4, beta: float = 0.75, k: float = 1
) -> None:
super().__init__()
self.size = size
self.alpha = alpha
self.beta = beta
self.k = k

def forward(self, input: Tensor) -> Tensor:
return _cross_map_lrn2d.apply(input, self.size, self.alpha, self.beta, self.k)

def extra_repr(self) -> str:
return "{size}, alpha={alpha}, beta={beta}, k={k}".format(**self.__dict__)

_shape_t = Union[int, List[int], Size]

[docs]class LayerNorm(Module):
r"""Applies Layer Normalization over a mini-batch of inputs.

This layer implements the operation as described in
the paper Layer Normalization <https://arxiv.org/abs/1607.06450>__

.. math::
y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta

The mean and standard-deviation are calculated over the last D dimensions, where D
is the dimension of :attr:normalized_shape. For example, if :attr:normalized_shape
is (3, 5) (a 2-dimensional shape), the mean and standard-deviation are computed over
the last 2 dimensions of the input (i.e. input.mean((-2, -1))).
:math:\gamma and :math:\beta are learnable affine transform parameters of
:attr:normalized_shape if :attr:elementwise_affine is True.
The standard-deviation is calculated via the biased estimator, equivalent to
torch.var(input, unbiased=False).

.. note::
Unlike Batch Normalization and Instance Normalization, which applies
scalar scale and bias for each entire channel/plane with the
:attr:affine option, Layer Normalization applies per-element scale and
bias with :attr:elementwise_affine.

This layer uses statistics computed from input data in both training and
evaluation modes.

Args:
normalized_shape (int or list or torch.Size): input shape from an expected input
of size

.. math::
[* \times \text{normalized\_shape}[0] \times \text{normalized\_shape}[1]
\times \ldots \times \text{normalized\_shape}[-1]]

If a single integer is used, it is treated as a singleton list, and this module will
normalize over the last dimension which is expected to be of that specific size.
eps: a value added to the denominator for numerical stability. Default: 1e-5
elementwise_affine: a boolean value that when set to True, this module
has learnable per-element affine parameters initialized to ones (for weights)
and zeros (for biases). Default: True.
bias: If set to False, the layer will not learn an additive bias (only relevant if
:attr:elementwise_affine is True). Default: True.

Attributes:
weight: the learnable weights of the module of shape
:math:\text{normalized\_shape} when :attr:elementwise_affine is set to True.
The values are initialized to 1.
bias:   the learnable bias of the module of shape
:math:\text{normalized\_shape} when :attr:elementwise_affine is set to True.
The values are initialized to 0.

Shape:
- Input: :math:(N, *)
- Output: :math:(N, *) (same shape as input)

Examples::

>>> # NLP Example
>>> batch, sentence_length, embedding_dim = 20, 5, 10
>>> embedding = torch.randn(batch, sentence_length, embedding_dim)
>>> layer_norm = nn.LayerNorm(embedding_dim)
>>> # Activate module
>>> layer_norm(embedding)
>>>
>>> # Image Example
>>> N, C, H, W = 20, 5, 10, 10
>>> input = torch.randn(N, C, H, W)
>>> # Normalize over the last three dimensions (i.e. the channel and spatial dimensions)
>>> # as shown in the image below
>>> layer_norm = nn.LayerNorm([C, H, W])
>>> output = layer_norm(input)

.. image:: ../_static/img/nn/layer_norm.jpg
:scale: 50 %

"""

__constants__ = ["normalized_shape", "eps", "elementwise_affine"]
normalized_shape: Tuple[int, ...]
eps: float
elementwise_affine: bool

def __init__(
self,
normalized_shape: _shape_t,
eps: float = 1e-5,
elementwise_affine: bool = True,
bias: bool = True,
device=None,
dtype=None,
) -> None:
factory_kwargs = {"device": device, "dtype": dtype}
super().__init__()
if isinstance(normalized_shape, numbers.Integral):
# mypy error: incompatible types in assignment
normalized_shape = (normalized_shape,)  # type: ignore[assignment]
self.normalized_shape = tuple(normalized_shape)  # type: ignore[arg-type]
self.eps = eps
self.elementwise_affine = elementwise_affine
if self.elementwise_affine:
self.weight = Parameter(
torch.empty(self.normalized_shape, **factory_kwargs)
)
if bias:
self.bias = Parameter(
torch.empty(self.normalized_shape, **factory_kwargs)
)
else:
self.register_parameter("bias", None)
else:
self.register_parameter("weight", None)
self.register_parameter("bias", None)

self.reset_parameters()

def reset_parameters(self) -> None:
if self.elementwise_affine:
init.ones_(self.weight)
if self.bias is not None:
init.zeros_(self.bias)

def forward(self, input: Tensor) -> Tensor:
return F.layer_norm(
input, self.normalized_shape, self.weight, self.bias, self.eps
)

def extra_repr(self) -> str:
return (
"{normalized_shape}, eps={eps}, "
"elementwise_affine={elementwise_affine}".format(**self.__dict__)
)

[docs]class GroupNorm(Module):
r"""Applies Group Normalization over a mini-batch of inputs.

This layer implements the operation as described in
the paper Group Normalization <https://arxiv.org/abs/1803.08494>__

.. math::
y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta

The input channels are separated into :attr:num_groups groups, each containing
num_channels / num_groups channels. :attr:num_channels must be divisible by
:attr:num_groups. The mean and standard-deviation are calculated
separately over the each group. :math:\gamma and :math:\beta are learnable
per-channel affine transform parameter vectors of size :attr:num_channels if
:attr:affine is True.
The standard-deviation is calculated via the biased estimator, equivalent to
torch.var(input, unbiased=False).

This layer uses statistics computed from input data in both training and
evaluation modes.

Args:
num_groups (int): number of groups to separate the channels into
num_channels (int): number of channels expected in input
eps: a value added to the denominator for numerical stability. Default: 1e-5
affine: a boolean value that when set to True, this module
has learnable per-channel affine parameters initialized to ones (for weights)
and zeros (for biases). Default: True.

Shape:
- Input: :math:(N, C, *) where :math:C=\text{num\_channels}
- Output: :math:(N, C, *) (same shape as input)

Examples::

>>> input = torch.randn(20, 6, 10, 10)
>>> # Separate 6 channels into 3 groups
>>> m = nn.GroupNorm(3, 6)
>>> # Separate 6 channels into 6 groups (equivalent with InstanceNorm)
>>> m = nn.GroupNorm(6, 6)
>>> # Put all 6 channels into a single group (equivalent with LayerNorm)
>>> m = nn.GroupNorm(1, 6)
>>> # Activating the module
>>> output = m(input)
"""

__constants__ = ["num_groups", "num_channels", "eps", "affine"]
num_groups: int
num_channels: int
eps: float
affine: bool

def __init__(
self,
num_groups: int,
num_channels: int,
eps: float = 1e-5,
affine: bool = True,
device=None,
dtype=None,
) -> None:
factory_kwargs = {"device": device, "dtype": dtype}
super().__init__()
if num_channels % num_groups != 0:
raise ValueError("num_channels must be divisible by num_groups")

self.num_groups = num_groups
self.num_channels = num_channels
self.eps = eps
self.affine = affine
if self.affine:
self.weight = Parameter(torch.empty(num_channels, **factory_kwargs))
self.bias = Parameter(torch.empty(num_channels, **factory_kwargs))
else:
self.register_parameter("weight", None)
self.register_parameter("bias", None)

self.reset_parameters()

def reset_parameters(self) -> None:
if self.affine:
init.ones_(self.weight)
init.zeros_(self.bias)

def forward(self, input: Tensor) -> Tensor:
return F.group_norm(input, self.num_groups, self.weight, self.bias, self.eps)

def extra_repr(self) -> str:
return "{num_groups}, {num_channels}, eps={eps}, " "affine={affine}".format(
**self.__dict__
)

[docs]class RMSNorm(Module):
r"""Applies Root Mean Square Layer Normalization over a mini-batch of inputs.

This layer implements the operation as described in
the paper Root Mean Square Layer Normalization <https://arxiv.org/pdf/1910.07467.pdf>__

.. math::
y = \frac{x}{\sqrt{\mathrm{RMS}[x] + \epsilon}} * \gamma

The root mean squared norm is taken over the last D dimensions, where D
is the dimension of :attr:normalized_shape. For example, if :attr:normalized_shape
is (3, 5) (a 2-dimensional shape), the rms norm is computed over
the last 2 dimensions of the input.

Args:
normalized_shape (int or list or torch.Size): input shape from an expected input
of size

.. math::
[* \times \text{normalized\_shape}[0] \times \text{normalized\_shape}[1]
\times \ldots \times \text{normalized\_shape}[-1]]

If a single integer is used, it is treated as a singleton list, and this module will
normalize over the last dimension which is expected to be of that specific size.
eps: a value added to the denominator for numerical stability. Default: :func:torch.finfo(x.dtype).eps
elementwise_affine: a boolean value that when set to True, this module
has learnable per-element affine parameters initialized to ones (for weights)
and zeros (for biases). Default: True.

Shape:
- Input: :math:(N, *)
- Output: :math:(N, *) (same shape as input)

Examples::

>>> rms_norm = nn.RMSNorm([2, 3])
>>> input = torch.randn(2, 2, 3)
>>> rms_norm(input)

"""
__constants__ = ["normalized_shape", "eps", "elementwise_affine"]
normalized_shape: Tuple[int, ...]
eps: Optional[float]
elementwise_affine: bool

def __init__(
self,
normalized_shape: _shape_t,
eps: Optional[float] = None,
elementwise_affine: bool = True,
device=None,
dtype=None,
) -> None:
factory_kwargs = {"device": device, "dtype": dtype}
super().__init__()
if isinstance(normalized_shape, numbers.Integral):
# mypy error: incompatible types in assignment
normalized_shape = (normalized_shape,)  # type: ignore[assignment]
self.normalized_shape = tuple(normalized_shape)  # type: ignore[arg-type]
self.eps = eps
self.elementwise_affine = elementwise_affine
if self.elementwise_affine:
self.weight = Parameter(
torch.empty(self.normalized_shape, **factory_kwargs)
)
else:
self.register_parameter("weight", None)
self.reset_parameters()

[docs]    def reset_parameters(self) -> None:
"""
Resets parameters based on their initialization used in __init__.
"""
if self.elementwise_affine:
init.ones_(self.weight)

[docs]    def forward(self, x: torch.Tensor) -> torch.Tensor:
"""
Runs forward pass.
"""
return F.rms_norm(x, self.normalized_shape, self.weight, self.eps)

[docs]    def extra_repr(self) -> str:
"""
"""
return (
"{normalized_shape}, eps={eps}, "
"elementwise_affine={elementwise_affine}".format(**self.__dict__)
)

# TODO: ContrastiveNorm2d
# TODO: DivisiveNorm2d
# TODO: SubtractiveNorm2d


## Docs

Access comprehensive developer documentation for PyTorch

View Docs

## Tutorials

Get in-depth tutorials for beginners and advanced developers

View Tutorials