You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
73 lines
2.5 KiB
73 lines
2.5 KiB
""" Normalization layers and wrappers
|
|
"""
|
|
import torch
|
|
import torch.nn as nn
|
|
import torch.nn.functional as F
|
|
|
|
|
|
class GroupNorm(nn.GroupNorm):
|
|
def __init__(self, num_channels, num_groups=32, eps=1e-5, affine=True):
|
|
# NOTE num_channels is swapped to first arg for consistency in swapping norm layers with BN
|
|
super().__init__(num_groups, num_channels, eps=eps, affine=affine)
|
|
|
|
def forward(self, x):
|
|
return F.group_norm(x, self.num_groups, self.weight, self.bias, self.eps)
|
|
|
|
|
|
class GroupNorm1(nn.GroupNorm):
|
|
""" Group Normalization with 1 group.
|
|
Input: tensor in shape [B, C, *]
|
|
"""
|
|
|
|
def __init__(self, num_channels, **kwargs):
|
|
super().__init__(1, num_channels, **kwargs)
|
|
|
|
|
|
class LayerNorm2d(nn.LayerNorm):
|
|
""" LayerNorm for channels of '2D' spatial NCHW tensors """
|
|
def __init__(self, num_channels, eps=1e-6, affine=True):
|
|
super().__init__(num_channels, eps=eps, elementwise_affine=affine)
|
|
|
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
return F.layer_norm(
|
|
x.permute(0, 2, 3, 1), self.normalized_shape, self.weight, self.bias, self.eps).permute(0, 3, 1, 2)
|
|
|
|
|
|
def _is_contiguous(tensor: torch.Tensor) -> bool:
|
|
# jit is oh so lovely :/
|
|
# if torch.jit.is_tracing():
|
|
# return True
|
|
if torch.jit.is_scripting():
|
|
return tensor.is_contiguous()
|
|
else:
|
|
return tensor.is_contiguous(memory_format=torch.contiguous_format)
|
|
|
|
|
|
@torch.jit.script
|
|
def _layer_norm_cf(x: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, eps: float):
|
|
s, u = torch.var_mean(x, dim=1, unbiased=False, keepdim=True)
|
|
x = (x - u) * torch.rsqrt(s + eps)
|
|
x = x * weight[:, None, None] + bias[:, None, None]
|
|
return x
|
|
|
|
|
|
class LayerNormExp2d(nn.LayerNorm):
|
|
""" LayerNorm for channels_first tensors with 2d spatial dimensions (ie N, C, H, W).
|
|
|
|
Experimental implementation w/ manual norm for tensors non-contiguous tensors.
|
|
|
|
This improves throughput in some scenarios (tested on Ampere GPU), esp w/ channels_last
|
|
layout. However, benefits are not always clear and can perform worse on other GPUs.
|
|
"""
|
|
|
|
def __init__(self, num_channels, eps=1e-6):
|
|
super().__init__(num_channels, eps=eps)
|
|
|
|
def forward(self, x) -> torch.Tensor:
|
|
if _is_contiguous(x):
|
|
x = F.layer_norm(
|
|
x.permute(0, 2, 3, 1), self.normalized_shape, self.weight, self.bias, self.eps).permute(0, 3, 1, 2)
|
|
else:
|
|
x = _layer_norm_cf(x, self.weight, self.bias, self.eps)
|
|
return x
|