Monster commit, activation refactor, VoVNet, norm_act improvements, more
* refactor activations into basic PyTorch, jit scripted, and memory efficient custom auto
* implement hard-mish, better grad for hard-swish
* add initial VovNet V1/V2 impl, fix #151
* VovNet and DenseNet first models to use NormAct layers (support BatchNormAct2d, EvoNorm, InplaceIABN)
* Wrap IABN for any models that use it
* make more models torchscript compatible (DPN, PNasNet, Res2Net, SelecSLS) and add tests
4 years ago
|
|
|
""" Activations
|
|
|
|
|
|
|
|
A collection of jit-scripted activations fn and modules with a common interface so that they can
|
|
|
|
easily be swapped. All have an `inplace` arg even if not used.
|
|
|
|
|
|
|
|
All jit scripted activations are lacking in-place variations on purpose, scripted kernel fusion does not
|
|
|
|
currently work across in-place op boundaries, thus performance is equal to or less than the non-scripted
|
|
|
|
versions if they contain in-place ops.
|
|
|
|
|
|
|
|
Hacked together by / Copyright 2020 Ross Wightman
|
Monster commit, activation refactor, VoVNet, norm_act improvements, more
* refactor activations into basic PyTorch, jit scripted, and memory efficient custom auto
* implement hard-mish, better grad for hard-swish
* add initial VovNet V1/V2 impl, fix #151
* VovNet and DenseNet first models to use NormAct layers (support BatchNormAct2d, EvoNorm, InplaceIABN)
* Wrap IABN for any models that use it
* make more models torchscript compatible (DPN, PNasNet, Res2Net, SelecSLS) and add tests
4 years ago
|
|
|
"""
|
|
|
|
|
|
|
|
import torch
|
|
|
|
from torch import nn as nn
|
|
|
|
from torch.nn import functional as F
|
|
|
|
|
|
|
|
|
|
|
|
@torch.jit.script
|
|
|
|
def swish_jit(x, inplace: bool = False):
|
|
|
|
"""Swish - Described in: https://arxiv.org/abs/1710.05941
|
|
|
|
"""
|
|
|
|
return x.mul(x.sigmoid())
|
|
|
|
|
|
|
|
|
|
|
|
@torch.jit.script
|
|
|
|
def mish_jit(x, _inplace: bool = False):
|
|
|
|
"""Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681
|
|
|
|
"""
|
|
|
|
return x.mul(F.softplus(x).tanh())
|
|
|
|
|
|
|
|
|
|
|
|
class SwishJit(nn.Module):
|
|
|
|
def __init__(self, inplace: bool = False):
|
|
|
|
super(SwishJit, self).__init__()
|
|
|
|
|
|
|
|
def forward(self, x):
|
|
|
|
return swish_jit(x)
|
|
|
|
|
|
|
|
|
|
|
|
class MishJit(nn.Module):
|
|
|
|
def __init__(self, inplace: bool = False):
|
|
|
|
super(MishJit, self).__init__()
|
|
|
|
|
|
|
|
def forward(self, x):
|
|
|
|
return mish_jit(x)
|
|
|
|
|
|
|
|
|
|
|
|
@torch.jit.script
|
|
|
|
def hard_sigmoid_jit(x, inplace: bool = False):
|
|
|
|
# return F.relu6(x + 3.) / 6.
|
|
|
|
return (x + 3).clamp(min=0, max=6).div(6.) # clamp seems ever so slightly faster?
|
|
|
|
|
|
|
|
|
|
|
|
class HardSigmoidJit(nn.Module):
|
|
|
|
def __init__(self, inplace: bool = False):
|
|
|
|
super(HardSigmoidJit, self).__init__()
|
|
|
|
|
|
|
|
def forward(self, x):
|
|
|
|
return hard_sigmoid_jit(x)
|
|
|
|
|
|
|
|
|
|
|
|
@torch.jit.script
|
|
|
|
def hard_swish_jit(x, inplace: bool = False):
|
|
|
|
# return x * (F.relu6(x + 3.) / 6)
|
|
|
|
return x * (x + 3).clamp(min=0, max=6).div(6.) # clamp seems ever so slightly faster?
|
|
|
|
|
|
|
|
|
|
|
|
class HardSwishJit(nn.Module):
|
|
|
|
def __init__(self, inplace: bool = False):
|
|
|
|
super(HardSwishJit, self).__init__()
|
|
|
|
|
|
|
|
def forward(self, x):
|
|
|
|
return hard_swish_jit(x)
|
|
|
|
|
|
|
|
|
|
|
|
@torch.jit.script
|
|
|
|
def hard_mish_jit(x, inplace: bool = False):
|
|
|
|
""" Hard Mish
|
|
|
|
Experimental, based on notes by Mish author Diganta Misra at
|
|
|
|
https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md
|
|
|
|
"""
|
|
|
|
return 0.5 * x * (x + 2).clamp(min=0, max=2)
|
|
|
|
|
|
|
|
|
|
|
|
class HardMishJit(nn.Module):
|
|
|
|
def __init__(self, inplace: bool = False):
|
|
|
|
super(HardMishJit, self).__init__()
|
|
|
|
|
|
|
|
def forward(self, x):
|
|
|
|
return hard_mish_jit(x)
|