pytorch-image-models/timm/models/layers/fast_norm.py

""" 'Fast' Normalization Functions

For GroupNorm and LayerNorm these functions bypass typical AMP upcast to float32.

Additionally, for LayerNorm, the APEX fused LN is used if available (which also does not upcast)

Hacked together by / Copyright 2022 Ross Wightman
"""
from typing import List, Optional

import torch
from torch.nn import functional as F

try:
    from apex.normalization.fused_layer_norm import fused_layer_norm_affine
    has_apex = True
except ImportError:
    has_apex = False


# fast (ie lower precision LN) can be disabled with this flag if issues crop up
_USE_FAST_NORM = False  # defaulting to False for now


def is_fast_norm():
    return _USE_FAST_NORM


def set_fast_norm(enable=True):
    global _USE_FAST_NORM
    _USE_FAST_NORM = enable


def fast_group_norm(
    x: torch.Tensor,
    num_groups: int,
    weight: Optional[torch.Tensor] = None,
    bias: Optional[torch.Tensor] = None,
    eps: float = 1e-5
) -> torch.Tensor:
    if torch.jit.is_scripting():
        # currently cannot use is_autocast_enabled within torchscript
        return F.group_norm(x, num_groups, weight, bias, eps)

    if torch.is_autocast_enabled():
        # normally native AMP casts GN inputs to float32
        # here we use the low precision autocast dtype
        # FIXME what to do re CPU autocast?
        dt = torch.get_autocast_gpu_dtype()
        x, weight, bias = x.to(dt), weight.to(dt), bias.to(dt)

    with torch.cuda.amp.autocast(enabled=False):
        return F.group_norm(x, num_groups, weight, bias, eps)


def fast_layer_norm(
    x: torch.Tensor,
    normalized_shape: List[int],
    weight: Optional[torch.Tensor] = None,
    bias: Optional[torch.Tensor] = None,
    eps: float = 1e-5
) -> torch.Tensor:
    if torch.jit.is_scripting():
        # currently cannot use is_autocast_enabled within torchscript
        return F.layer_norm(x, normalized_shape, weight, bias, eps)

    if has_apex:
        return fused_layer_norm_affine(x, weight, bias, normalized_shape, eps)

    if torch.is_autocast_enabled():
        # normally native AMP casts LN inputs to float32
        # apex LN does not, this is behaving like Apex
        dt = torch.get_autocast_gpu_dtype()
        # FIXME what to do re CPU autocast?
        x, weight, bias = x.to(dt), weight.to(dt), bias.to(dt)

    with torch.cuda.amp.autocast(enabled=False):
        return F.layer_norm(x, normalized_shape, weight, bias, eps)
Add norm/norm_act header comments 2 years ago			`""" 'Fast' Normalization Functions`

			`For GroupNorm and LayerNorm these functions bypass typical AMP upcast to float32.`

			`Additionally, for LayerNorm, the APEX fused LN is used if available (which also does not upcast)`

			`Hacked together by / Copyright 2022 Ross Wightman`
			`"""`
Add 'fast' layer norm that doesn't cast to float32, support APEX LN impl for slight speed gain, update norm and act factories, tweak SE for ability to disable bias (needed by GCVit) 2 years ago			`from typing import List, Optional`

			`import torch`
			`from torch.nn import functional as F`

			`try:`
			`from apex.normalization.fused_layer_norm import fused_layer_norm_affine`
			`has_apex = True`
			`except ImportError:`
			`has_apex = False`


			`# fast (ie lower precision LN) can be disabled with this flag if issues crop up`
			`_USE_FAST_NORM = False # defaulting to False for now`


			`def is_fast_norm():`
			`return _USE_FAST_NORM`


			`def set_fast_norm(enable=True):`
			`global _USE_FAST_NORM`
			`_USE_FAST_NORM = enable`


			`def fast_group_norm(`
			`x: torch.Tensor,`
			`num_groups: int,`
			`weight: Optional[torch.Tensor] = None,`
			`bias: Optional[torch.Tensor] = None,`
			`eps: float = 1e-5`
			`) -> torch.Tensor:`
			`if torch.jit.is_scripting():`
			`# currently cannot use is_autocast_enabled within torchscript`
			`return F.group_norm(x, num_groups, weight, bias, eps)`

			`if torch.is_autocast_enabled():`
			`# normally native AMP casts GN inputs to float32`
			`# here we use the low precision autocast dtype`
Add norm/norm_act header comments 2 years ago			`# FIXME what to do re CPU autocast?`
Add 'fast' layer norm that doesn't cast to float32, support APEX LN impl for slight speed gain, update norm and act factories, tweak SE for ability to disable bias (needed by GCVit) 2 years ago			`dt = torch.get_autocast_gpu_dtype()`
			`x, weight, bias = x.to(dt), weight.to(dt), bias.to(dt)`

			`with torch.cuda.amp.autocast(enabled=False):`
			`return F.group_norm(x, num_groups, weight, bias, eps)`


			`def fast_layer_norm(`
			`x: torch.Tensor,`
			`normalized_shape: List[int],`
			`weight: Optional[torch.Tensor] = None,`
			`bias: Optional[torch.Tensor] = None,`
			`eps: float = 1e-5`
			`) -> torch.Tensor:`
			`if torch.jit.is_scripting():`
			`# currently cannot use is_autocast_enabled within torchscript`
			`return F.layer_norm(x, normalized_shape, weight, bias, eps)`

			`if has_apex:`
			`return fused_layer_norm_affine(x, weight, bias, normalized_shape, eps)`

			`if torch.is_autocast_enabled():`
			`# normally native AMP casts LN inputs to float32`
			`# apex LN does not, this is behaving like Apex`
			`dt = torch.get_autocast_gpu_dtype()`
Add norm/norm_act header comments 2 years ago			`# FIXME what to do re CPU autocast?`
Add 'fast' layer norm that doesn't cast to float32, support APEX LN impl for slight speed gain, update norm and act factories, tweak SE for ability to disable bias (needed by GCVit) 2 years ago			`x, weight, bias = x.to(dt), weight.to(dt), bias.to(dt)`

			`with torch.cuda.amp.autocast(enabled=False):`
			`return F.layer_norm(x, normalized_shape, weight, bias, eps)`