pytorch-image-models/timm/models/layers/norm.py

""" Normalization layers and wrappers

Norm layer definitions that support fast norm and consistent channel arg order (always first arg).

Hacked together by / Copyright 2022 Ross Wightman
"""

import torch
import torch.nn as nn
import torch.nn.functional as F

from .fast_norm import is_fast_norm, fast_group_norm, fast_layer_norm


class GroupNorm(nn.GroupNorm):
    def __init__(self, num_channels, num_groups=32, eps=1e-5, affine=True):
        # NOTE num_channels is swapped to first arg for consistency in swapping norm layers with BN
        super().__init__(num_groups, num_channels, eps=eps, affine=affine)
        self.fast_norm = is_fast_norm()  # can't script unless we have these flags here (no globals)

    def forward(self, x):
        if self.fast_norm:
            return fast_group_norm(x, self.num_groups, self.weight, self.bias, self.eps)
        else:
            return F.group_norm(x, self.num_groups, self.weight, self.bias, self.eps)


class GroupNorm1(nn.GroupNorm):
    """ Group Normalization with 1 group.
    Input: tensor in shape [B, C, *]
    """

    def __init__(self, num_channels, **kwargs):
        super().__init__(1, num_channels, **kwargs)
        self.fast_norm = is_fast_norm()  # can't script unless we have these flags here (no globals)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        if self.fast_norm:
            return fast_group_norm(x, self.num_groups, self.weight, self.bias, self.eps)
        else:
            return F.group_norm(x, self.num_groups, self.weight, self.bias, self.eps)


class LayerNorm(nn.LayerNorm):
    """ LayerNorm w/ fast norm option
    """
    def __init__(self, num_channels, eps=1e-6, affine=True):
        super().__init__(num_channels, eps=eps, elementwise_affine=affine)
        self._fast_norm = is_fast_norm()  # can't script unless we have these flags here (no globals)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        if self._fast_norm:
            x = fast_layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
        else:
            x = F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
        return x


class LayerNorm2d(nn.LayerNorm):
    """ LayerNorm for channels of '2D' spatial NCHW tensors """
    def __init__(self, num_channels, eps=1e-6, affine=True):
        super().__init__(num_channels, eps=eps, elementwise_affine=affine)
        self._fast_norm = is_fast_norm()  # can't script unless we have these flags here (no globals)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = x.permute(0, 2, 3, 1)
        if self._fast_norm:
            x = fast_layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
        else:
            x = F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
        x = x.permute(0, 3, 1, 2)
        return x


def _is_contiguous(tensor: torch.Tensor) -> bool:
    # jit is oh so lovely :/
    if torch.jit.is_scripting():
        return tensor.is_contiguous()
    else:
        return tensor.is_contiguous(memory_format=torch.contiguous_format)


@torch.jit.script
def _layer_norm_cf(x: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, eps: float):
    s, u = torch.var_mean(x, dim=1, unbiased=False, keepdim=True)
    x = (x - u) * torch.rsqrt(s + eps)
    x = x * weight[:, None, None] + bias[:, None, None]
    return x


def _layer_norm_cf_sqm(x: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, eps: float):
    u = x.mean(dim=1, keepdim=True)
    s = ((x * x).mean(dim=1, keepdim=True) - (u * u)).clamp(0)
    x = (x - u) * torch.rsqrt(s + eps)
    x = x * weight.view(1, -1, 1, 1) + bias.view(1, -1, 1, 1)
    return x


class LayerNormExp2d(nn.LayerNorm):
    """ LayerNorm for channels_first tensors with 2d spatial dimensions (ie N, C, H, W).

    Experimental implementation w/ manual norm for tensors non-contiguous tensors.

    This improves throughput in some scenarios (tested on Ampere GPU), esp w/ channels_last
    layout. However, benefits are not always clear and can perform worse on other GPUs.
    """

    def __init__(self, num_channels, eps=1e-6):
        super().__init__(num_channels, eps=eps)

    def forward(self, x) -> torch.Tensor:
        if _is_contiguous(x):
            x = F.layer_norm(
                x.permute(0, 2, 3, 1), self.normalized_shape, self.weight, self.bias, self.eps).permute(0, 3, 1, 2)
        else:
            x = _layer_norm_cf(x, self.weight, self.bias, self.eps)
        return x
Missed norm.py 4 years ago			`""" Normalization layers and wrappers`
Add norm/norm_act header comments 2 years ago
			`Norm layer definitions that support fast norm and consistent channel arg order (always first arg).`

			`Hacked together by / Copyright 2022 Ross Wightman`
Missed norm.py 4 years ago			`"""`
Add 'fast' layer norm that doesn't cast to float32, support APEX LN impl for slight speed gain, update norm and act factories, tweak SE for ability to disable bias (needed by GCVit) 2 years ago
Missed norm.py 4 years ago			`import torch`
			`import torch.nn as nn`
			`import torch.nn.functional as F`

Add 'fast' layer norm that doesn't cast to float32, support APEX LN impl for slight speed gain, update norm and act factories, tweak SE for ability to disable bias (needed by GCVit) 2 years ago			`from .fast_norm import is_fast_norm, fast_group_norm, fast_layer_norm`

Missed norm.py 4 years ago
			`class GroupNorm(nn.GroupNorm):`
Add ResNet-50 w/ GN (resnet50_gn) and SEBotNet-33-TS (sebotnet33ts_256) model defs and weights. Update halonet50ts weights w/ slightly better variant in1k val, more robust to test sets. 3 years ago			`def __init__(self, num_channels, num_groups=32, eps=1e-5, affine=True):`
Missed norm.py 4 years ago			`# NOTE num_channels is swapped to first arg for consistency in swapping norm layers with BN`
			`super().__init__(num_groups, num_channels, eps=eps, affine=affine)`
Add 'fast' layer norm that doesn't cast to float32, support APEX LN impl for slight speed gain, update norm and act factories, tweak SE for ability to disable bias (needed by GCVit) 2 years ago			`self.fast_norm = is_fast_norm() # can't script unless we have these flags here (no globals)`
Missed norm.py 4 years ago
			`def forward(self, x):`
Add 'fast' layer norm that doesn't cast to float32, support APEX LN impl for slight speed gain, update norm and act factories, tweak SE for ability to disable bias (needed by GCVit) 2 years ago			`if self.fast_norm:`
			`return fast_group_norm(x, self.num_groups, self.weight, self.bias, self.eps)`
			`else:`
			`return F.group_norm(x, self.num_groups, self.weight, self.bias, self.eps)`
Add Gather-Excite and Global Context attn modules. Refactor existing SE-like attn for consistency and refactor byob/byoanet for less redundancy. 4 years ago

Add MobileVitV2 support. Fix #1332. Move GroupNorm1 to common layers (used in poolformer + mobilevitv2). Keep ol custom ConvNeXt LayerNorm2d impl as LayerNormExp2d for reference. 2 years ago			`class GroupNorm1(nn.GroupNorm):`
			`""" Group Normalization with 1 group.`
			`Input: tensor in shape [B, C, *]`
			`"""`

			`def __init__(self, num_channels, **kwargs):`
			`super().__init__(1, num_channels, **kwargs)`
Add 'fast' layer norm that doesn't cast to float32, support APEX LN impl for slight speed gain, update norm and act factories, tweak SE for ability to disable bias (needed by GCVit) 2 years ago			`self.fast_norm = is_fast_norm() # can't script unless we have these flags here (no globals)`

			`def forward(self, x: torch.Tensor) -> torch.Tensor:`
			`if self.fast_norm:`
			`return fast_group_norm(x, self.num_groups, self.weight, self.bias, self.eps)`
			`else:`
			`return F.group_norm(x, self.num_groups, self.weight, self.bias, self.eps)`


			`class LayerNorm(nn.LayerNorm):`
			`""" LayerNorm w/ fast norm option`
			`"""`
			`def __init__(self, num_channels, eps=1e-6, affine=True):`
			`super().__init__(num_channels, eps=eps, elementwise_affine=affine)`
			`self._fast_norm = is_fast_norm() # can't script unless we have these flags here (no globals)`

			`def forward(self, x: torch.Tensor) -> torch.Tensor:`
			`if self._fast_norm:`
Fix spacing misalignment for fast norm path in LayerNorm modules 2 years ago			`x = fast_layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)`
Add 'fast' layer norm that doesn't cast to float32, support APEX LN impl for slight speed gain, update norm and act factories, tweak SE for ability to disable bias (needed by GCVit) 2 years ago			`else:`
			`x = F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)`
			`return x`
Add MobileVitV2 support. Fix #1332. Move GroupNorm1 to common layers (used in poolformer + mobilevitv2). Keep ol custom ConvNeXt LayerNorm2d impl as LayerNormExp2d for reference. 2 years ago

Add Gather-Excite and Global Context attn modules. Refactor existing SE-like attn for consistency and refactor byob/byoanet for less redundancy. 4 years ago			`class LayerNorm2d(nn.LayerNorm):`
Add MobileVitV2 support. Fix #1332. Move GroupNorm1 to common layers (used in poolformer + mobilevitv2). Keep ol custom ConvNeXt LayerNorm2d impl as LayerNormExp2d for reference. 2 years ago			`""" LayerNorm for channels of '2D' spatial NCHW tensors """`
			`def __init__(self, num_channels, eps=1e-6, affine=True):`
			`super().__init__(num_channels, eps=eps, elementwise_affine=affine)`
Add 'fast' layer norm that doesn't cast to float32, support APEX LN impl for slight speed gain, update norm and act factories, tweak SE for ability to disable bias (needed by GCVit) 2 years ago			`self._fast_norm = is_fast_norm() # can't script unless we have these flags here (no globals)`
Add Gather-Excite and Global Context attn modules. Refactor existing SE-like attn for consistency and refactor byob/byoanet for less redundancy. 4 years ago
			`def forward(self, x: torch.Tensor) -> torch.Tensor:`
Add 'fast' layer norm that doesn't cast to float32, support APEX LN impl for slight speed gain, update norm and act factories, tweak SE for ability to disable bias (needed by GCVit) 2 years ago			`x = x.permute(0, 2, 3, 1)`
			`if self._fast_norm:`
Fix spacing misalignment for fast norm path in LayerNorm modules 2 years ago			`x = fast_layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)`
Add 'fast' layer norm that doesn't cast to float32, support APEX LN impl for slight speed gain, update norm and act factories, tweak SE for ability to disable bias (needed by GCVit) 2 years ago			`else:`
			`x = F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)`
			`x = x.permute(0, 3, 1, 2)`
			`return x`
Add MobileVitV2 support. Fix #1332. Move GroupNorm1 to common layers (used in poolformer + mobilevitv2). Keep ol custom ConvNeXt LayerNorm2d impl as LayerNormExp2d for reference. 2 years ago

			`def _is_contiguous(tensor: torch.Tensor) -> bool:`
			`# jit is oh so lovely :/`
			`if torch.jit.is_scripting():`
			`return tensor.is_contiguous()`
			`else:`
			`return tensor.is_contiguous(memory_format=torch.contiguous_format)`


			`@torch.jit.script`
			`def _layer_norm_cf(x: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, eps: float):`
			`s, u = torch.var_mean(x, dim=1, unbiased=False, keepdim=True)`
			`x = (x - u) * torch.rsqrt(s + eps)`
			`x = x * weight[:, None, None] + bias[:, None, None]`
			`return x`


Add 'fast' layer norm that doesn't cast to float32, support APEX LN impl for slight speed gain, update norm and act factories, tweak SE for ability to disable bias (needed by GCVit) 2 years ago			`def _layer_norm_cf_sqm(x: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, eps: float):`
			`u = x.mean(dim=1, keepdim=True)`
			`s = ((x * x).mean(dim=1, keepdim=True) - (u * u)).clamp(0)`
			`x = (x - u) * torch.rsqrt(s + eps)`
			`x = x * weight.view(1, -1, 1, 1) + bias.view(1, -1, 1, 1)`
			`return x`


Add MobileVitV2 support. Fix #1332. Move GroupNorm1 to common layers (used in poolformer + mobilevitv2). Keep ol custom ConvNeXt LayerNorm2d impl as LayerNormExp2d for reference. 2 years ago			`class LayerNormExp2d(nn.LayerNorm):`
			`""" LayerNorm for channels_first tensors with 2d spatial dimensions (ie N, C, H, W).`

			`Experimental implementation w/ manual norm for tensors non-contiguous tensors.`

			`This improves throughput in some scenarios (tested on Ampere GPU), esp w/ channels_last`
			`layout. However, benefits are not always clear and can perform worse on other GPUs.`
			`"""`

			`def __init__(self, num_channels, eps=1e-6):`
			`super().__init__(num_channels, eps=eps)`

			`def forward(self, x) -> torch.Tensor:`
			`if _is_contiguous(x):`
			`x = F.layer_norm(`
			`x.permute(0, 2, 3, 1), self.normalized_shape, self.weight, self.bias, self.eps).permute(0, 3, 1, 2)`
			`else:`
			`x = _layer_norm_cf(x, self.weight, self.bias, self.eps)`
			`return x`