pytorch-image-models/timm/models/layers/activations_jit.py

""" Activations

A collection of jit-scripted activations fn and modules with a common interface so that they can
easily be swapped. All have an `inplace` arg even if not used.

All jit scripted activations are lacking in-place variations on purpose, scripted kernel fusion does not
currently work across in-place op boundaries, thus performance is equal to or less than the non-scripted
versions if they contain in-place ops.

Hacked together by Ross Wightman
"""

import torch
from torch import nn as nn
from torch.nn import functional as F


@torch.jit.script
def swish_jit(x, inplace: bool = False):
    """Swish - Described in: https://arxiv.org/abs/1710.05941
    """
    return x.mul(x.sigmoid())


@torch.jit.script
def mish_jit(x, _inplace: bool = False):
    """Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681
    """
    return x.mul(F.softplus(x).tanh())


class SwishJit(nn.Module):
    def __init__(self, inplace: bool = False):
        super(SwishJit, self).__init__()

    def forward(self, x):
        return swish_jit(x)


class MishJit(nn.Module):
    def __init__(self, inplace: bool = False):
        super(MishJit, self).__init__()

    def forward(self, x):
        return mish_jit(x)


@torch.jit.script
def hard_sigmoid_jit(x, inplace: bool = False):
    # return F.relu6(x + 3.) / 6.
    return (x + 3).clamp(min=0, max=6).div(6.)  # clamp seems ever so slightly faster?


class HardSigmoidJit(nn.Module):
    def __init__(self, inplace: bool = False):
        super(HardSigmoidJit, self).__init__()

    def forward(self, x):
        return hard_sigmoid_jit(x)


@torch.jit.script
def hard_swish_jit(x, inplace: bool = False):
    # return x * (F.relu6(x + 3.) / 6)
    return x * (x + 3).clamp(min=0, max=6).div(6.)  # clamp seems ever so slightly faster?


class HardSwishJit(nn.Module):
    def __init__(self, inplace: bool = False):
        super(HardSwishJit, self).__init__()

    def forward(self, x):
        return hard_swish_jit(x)


@torch.jit.script
def hard_mish_jit(x, inplace: bool = False):
    """ Hard Mish
    Experimental, based on notes by Mish author Diganta Misra at
      https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md
    """
    return 0.5 * x * (x + 2).clamp(min=0, max=2)


class HardMishJit(nn.Module):
    def __init__(self, inplace: bool = False):
        super(HardMishJit, self).__init__()

    def forward(self, x):
        return hard_mish_jit(x)
Monster commit, activation refactor, VoVNet, norm_act improvements, more * refactor activations into basic PyTorch, jit scripted, and memory efficient custom auto * implement hard-mish, better grad for hard-swish * add initial VovNet V1/V2 impl, fix #151 * VovNet and DenseNet first models to use NormAct layers (support BatchNormAct2d, EvoNorm, InplaceIABN) * Wrap IABN for any models that use it * make more models torchscript compatible (DPN, PNasNet, Res2Net, SelecSLS) and add tests 5 years ago			`""" Activations`

			`A collection of jit-scripted activations fn and modules with a common interface so that they can`
			easily be swapped. All have an `inplace` arg even if not used.

			`All jit scripted activations are lacking in-place variations on purpose, scripted kernel fusion does not`
			`currently work across in-place op boundaries, thus performance is equal to or less than the non-scripted`
			`versions if they contain in-place ops.`

			`Hacked together by Ross Wightman`
			`"""`

			`import torch`
			`from torch import nn as nn`
			`from torch.nn import functional as F`


			`@torch.jit.script`
			`def swish_jit(x, inplace: bool = False):`
			`"""Swish - Described in: https://arxiv.org/abs/1710.05941`
			`"""`
			`return x.mul(x.sigmoid())`


			`@torch.jit.script`
			`def mish_jit(x, _inplace: bool = False):`
			`"""Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681`
			`"""`
			`return x.mul(F.softplus(x).tanh())`


			`class SwishJit(nn.Module):`
			`def __init__(self, inplace: bool = False):`
			`super(SwishJit, self).__init__()`

			`def forward(self, x):`
			`return swish_jit(x)`


			`class MishJit(nn.Module):`
			`def __init__(self, inplace: bool = False):`
			`super(MishJit, self).__init__()`

			`def forward(self, x):`
			`return mish_jit(x)`


			`@torch.jit.script`
			`def hard_sigmoid_jit(x, inplace: bool = False):`
			`# return F.relu6(x + 3.) / 6.`
			`return (x + 3).clamp(min=0, max=6).div(6.) # clamp seems ever so slightly faster?`


			`class HardSigmoidJit(nn.Module):`
			`def __init__(self, inplace: bool = False):`
			`super(HardSigmoidJit, self).__init__()`

			`def forward(self, x):`
			`return hard_sigmoid_jit(x)`


			`@torch.jit.script`
			`def hard_swish_jit(x, inplace: bool = False):`
			`# return x * (F.relu6(x + 3.) / 6)`
			`return x * (x + 3).clamp(min=0, max=6).div(6.) # clamp seems ever so slightly faster?`


			`class HardSwishJit(nn.Module):`
			`def __init__(self, inplace: bool = False):`
			`super(HardSwishJit, self).__init__()`

			`def forward(self, x):`
			`return hard_swish_jit(x)`


			`@torch.jit.script`
			`def hard_mish_jit(x, inplace: bool = False):`
			`""" Hard Mish`
			`Experimental, based on notes by Mish author Diganta Misra at`
			`https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md`
			`"""`
			`return 0.5 * x * (x + 2).clamp(min=0, max=2)`


			`class HardMishJit(nn.Module):`
			`def __init__(self, inplace: bool = False):`
			`super(HardMishJit, self).__init__()`

			`def forward(self, x):`
			`return hard_mish_jit(x)`