pytorch-image-models/timm/models/layers/mlp.py

""" MLP module w/ dropout and configurable activation layer

Hacked together by / Copyright 2020 Ross Wightman
"""
from torch import nn as nn

from .helpers import to_2tuple


class Mlp(nn.Module):
    """ MLP as used in Vision Transformer, MLP-Mixer and related networks
    """
    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
        super().__init__()
        out_features = out_features or in_features
        hidden_features = hidden_features or in_features
        drop_probs = to_2tuple(drop)

        self.fc1 = nn.Linear(in_features, hidden_features)
        self.act = act_layer()
        self.drop1 = nn.Dropout(drop_probs[0])
        self.fc2 = nn.Linear(hidden_features, out_features)
        self.drop2 = nn.Dropout(drop_probs[1])

    def forward(self, x):
        x = self.fc1(x)
        x = self.act(x)
        x = self.drop1(x)
        x = self.fc2(x)
        x = self.drop2(x)
        return x


class GluMlp(nn.Module):
    """ MLP w/ GLU style gating
    See: https://arxiv.org/abs/1612.08083, https://arxiv.org/abs/2002.05202
    """
    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.Sigmoid, drop=0.):
        super().__init__()
        out_features = out_features or in_features
        hidden_features = hidden_features or in_features
        assert hidden_features % 2 == 0
        drop_probs = to_2tuple(drop)

        self.fc1 = nn.Linear(in_features, hidden_features)
        self.act = act_layer()
        self.drop1 = nn.Dropout(drop_probs[0])
        self.fc2 = nn.Linear(hidden_features // 2, out_features)
        self.drop2 = nn.Dropout(drop_probs[1])

    def init_weights(self):
        # override init of fc1 w/ gate portion set to weight near zero, bias=1
        fc1_mid = self.fc1.bias.shape[0] // 2
        nn.init.ones_(self.fc1.bias[fc1_mid:])
        nn.init.normal_(self.fc1.weight[fc1_mid:], std=1e-6)

    def forward(self, x):
        x = self.fc1(x)
        x, gates = x.chunk(2, dim=-1)
        x = x * self.act(gates)
        x = self.drop1(x)
        x = self.fc2(x)
        x = self.drop2(x)
        return x


class GatedMlp(nn.Module):
    """ MLP as used in gMLP
    """
    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU,
                 gate_layer=None, drop=0.):
        super().__init__()
        out_features = out_features or in_features
        hidden_features = hidden_features or in_features
        drop_probs = to_2tuple(drop)

        self.fc1 = nn.Linear(in_features, hidden_features)
        self.act = act_layer()
        self.drop1 = nn.Dropout(drop_probs[0])
        if gate_layer is not None:
            assert hidden_features % 2 == 0
            self.gate = gate_layer(hidden_features)
            hidden_features = hidden_features // 2  # FIXME base reduction on gate property?
        else:
            self.gate = nn.Identity()
        self.fc2 = nn.Linear(hidden_features, out_features)
        self.drop2 = nn.Dropout(drop_probs[1])

    def forward(self, x):
        x = self.fc1(x)
        x = self.act(x)
        x = self.drop1(x)
        x = self.gate(x)
        x = self.fc2(x)
        x = self.drop2(x)
        return x


class ConvMlp(nn.Module):
    """ MLP using 1x1 convs that keeps spatial dims
    """
    def __init__(
            self, in_features, hidden_features=None, out_features=None, act_layer=nn.ReLU, norm_layer=None, drop=0.):
        super().__init__()
        out_features = out_features or in_features
        hidden_features = hidden_features or in_features
        self.fc1 = nn.Conv2d(in_features, hidden_features, kernel_size=1, bias=True)
        self.norm = norm_layer(hidden_features) if norm_layer else nn.Identity()
        self.act = act_layer()
        self.fc2 = nn.Conv2d(hidden_features, out_features, kernel_size=1, bias=True)
        self.drop = nn.Dropout(drop)

    def forward(self, x):
        x = self.fc1(x)
        x = self.norm(x)
        x = self.act(x)
        x = self.drop(x)
        x = self.fc2(x)
        return x
Move Mlp and PatchEmbed modules into layers. Being used in lots of models now... 4 years ago			`""" MLP module w/ dropout and configurable activation layer`

			`Hacked together by / Copyright 2020 Ross Wightman`
			`"""`
			`from torch import nn as nn`

Cleanup re-use of Dropout modules in Mlp modules after some twitter feedback :p 3 years ago			`from .helpers import to_2tuple`

Move Mlp and PatchEmbed modules into layers. Being used in lots of models now... 4 years ago
			`class Mlp(nn.Module):`
			`""" MLP as used in Vision Transformer, MLP-Mixer and related networks`
			`"""`
			`def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):`
			`super().__init__()`
			`out_features = out_features or in_features`
			`hidden_features = hidden_features or in_features`
Cleanup re-use of Dropout modules in Mlp modules after some twitter feedback :p 3 years ago			`drop_probs = to_2tuple(drop)`

Move Mlp and PatchEmbed modules into layers. Being used in lots of models now... 4 years ago			`self.fc1 = nn.Linear(in_features, hidden_features)`
			`self.act = act_layer()`
Cleanup re-use of Dropout modules in Mlp modules after some twitter feedback :p 3 years ago			`self.drop1 = nn.Dropout(drop_probs[0])`
Move Mlp and PatchEmbed modules into layers. Being used in lots of models now... 4 years ago			`self.fc2 = nn.Linear(hidden_features, out_features)`
Cleanup re-use of Dropout modules in Mlp modules after some twitter feedback :p 3 years ago			`self.drop2 = nn.Dropout(drop_probs[1])`
Move Mlp and PatchEmbed modules into layers. Being used in lots of models now... 4 years ago
			`def forward(self, x):`
			`x = self.fc1(x)`
			`x = self.act(x)`
Cleanup re-use of Dropout modules in Mlp modules after some twitter feedback :p 3 years ago			`x = self.drop1(x)`
Move Mlp and PatchEmbed modules into layers. Being used in lots of models now... 4 years ago			`x = self.fc2(x)`
Cleanup re-use of Dropout modules in Mlp modules after some twitter feedback :p 3 years ago			`x = self.drop2(x)`
Move Mlp and PatchEmbed modules into layers. Being used in lots of models now... 4 years ago			`return x`


			`class GluMlp(nn.Module):`
			`""" MLP w/ GLU style gating`
			`See: https://arxiv.org/abs/1612.08083, https://arxiv.org/abs/2002.05202`
			`"""`
			`def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.Sigmoid, drop=0.):`
			`super().__init__()`
			`out_features = out_features or in_features`
			`hidden_features = hidden_features or in_features`
Add preliminary gMLP and ResMLP impl to Mlp-Mixer 4 years ago			`assert hidden_features % 2 == 0`
Cleanup re-use of Dropout modules in Mlp modules after some twitter feedback :p 3 years ago			`drop_probs = to_2tuple(drop)`

Add preliminary gMLP and ResMLP impl to Mlp-Mixer 4 years ago			`self.fc1 = nn.Linear(in_features, hidden_features)`
Move Mlp and PatchEmbed modules into layers. Being used in lots of models now... 4 years ago			`self.act = act_layer()`
Cleanup re-use of Dropout modules in Mlp modules after some twitter feedback :p 3 years ago			`self.drop1 = nn.Dropout(drop_probs[0])`
Add preliminary gMLP and ResMLP impl to Mlp-Mixer 4 years ago			`self.fc2 = nn.Linear(hidden_features // 2, out_features)`
Cleanup re-use of Dropout modules in Mlp modules after some twitter feedback :p 3 years ago			`self.drop2 = nn.Dropout(drop_probs[1])`
Move Mlp and PatchEmbed modules into layers. Being used in lots of models now... 4 years ago
Refactoring, cleanup, improved test coverage. * Add eca_nfnet_l2 weights, 84.7 @ 384x384 * All 'non-std' (ie transformer / mlp) models have classifier / default_cfg test added * Fix #694 reset_classifer / num_features / forward_features / num_classes=0 consistency for transformer / mlp models * Add direct loading of npz to vision transformer (pure transformer so far, hybrid to come) * Rename vit_deit* to deit_* * Remove some deprecated vit hybrid model defs * Clean up classifier flatten for conv classifiers and unusual cases (mobilenetv3/ghostnet) * Remove explicit model fns for levit conv, just pass in arg 3 years ago			`def init_weights(self):`
			`# override init of fc1 w/ gate portion set to weight near zero, bias=1`
			`fc1_mid = self.fc1.bias.shape[0] // 2`
			`nn.init.ones_(self.fc1.bias[fc1_mid:])`
			`nn.init.normal_(self.fc1.weight[fc1_mid:], std=1e-6)`

Move Mlp and PatchEmbed modules into layers. Being used in lots of models now... 4 years ago			`def forward(self, x):`
			`x = self.fc1(x)`
			`x, gates = x.chunk(2, dim=-1)`
			`x = x * self.act(gates)`
Cleanup re-use of Dropout modules in Mlp modules after some twitter feedback :p 3 years ago			`x = self.drop1(x)`
Move Mlp and PatchEmbed modules into layers. Being used in lots of models now... 4 years ago			`x = self.fc2(x)`
Cleanup re-use of Dropout modules in Mlp modules after some twitter feedback :p 3 years ago			`x = self.drop2(x)`
Move Mlp and PatchEmbed modules into layers. Being used in lots of models now... 4 years ago			`return x`
Add preliminary gMLP and ResMLP impl to Mlp-Mixer 4 years ago

			`class GatedMlp(nn.Module):`
			`""" MLP as used in gMLP`
			`"""`
			`def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU,`
			`gate_layer=None, drop=0.):`
			`super().__init__()`
			`out_features = out_features or in_features`
			`hidden_features = hidden_features or in_features`
Cleanup re-use of Dropout modules in Mlp modules after some twitter feedback :p 3 years ago			`drop_probs = to_2tuple(drop)`

Add preliminary gMLP and ResMLP impl to Mlp-Mixer 4 years ago			`self.fc1 = nn.Linear(in_features, hidden_features)`
			`self.act = act_layer()`
Cleanup re-use of Dropout modules in Mlp modules after some twitter feedback :p 3 years ago			`self.drop1 = nn.Dropout(drop_probs[0])`
Add preliminary gMLP and ResMLP impl to Mlp-Mixer 4 years ago			`if gate_layer is not None:`
			`assert hidden_features % 2 == 0`
			`self.gate = gate_layer(hidden_features)`
			`hidden_features = hidden_features // 2 # FIXME base reduction on gate property?`
			`else:`
			`self.gate = nn.Identity()`
			`self.fc2 = nn.Linear(hidden_features, out_features)`
Cleanup re-use of Dropout modules in Mlp modules after some twitter feedback :p 3 years ago			`self.drop2 = nn.Dropout(drop_probs[1])`
Add preliminary gMLP and ResMLP impl to Mlp-Mixer 4 years ago
			`def forward(self, x):`
			`x = self.fc1(x)`
			`x = self.act(x)`
Cleanup re-use of Dropout modules in Mlp modules after some twitter feedback :p 3 years ago			`x = self.drop1(x)`
Add preliminary gMLP and ResMLP impl to Mlp-Mixer 4 years ago			`x = self.gate(x)`
			`x = self.fc2(x)`
Cleanup re-use of Dropout modules in Mlp modules after some twitter feedback :p 3 years ago			`x = self.drop2(x)`
Add preliminary gMLP and ResMLP impl to Mlp-Mixer 4 years ago			`return x`
Add Gather-Excite and Global Context attn modules. Refactor existing SE-like attn for consistency and refactor byob/byoanet for less redundancy. 3 years ago

			`class ConvMlp(nn.Module):`
			`""" MLP using 1x1 convs that keeps spatial dims`
			`"""`
			`def __init__(`
			`self, in_features, hidden_features=None, out_features=None, act_layer=nn.ReLU, norm_layer=None, drop=0.):`
			`super().__init__()`
			`out_features = out_features or in_features`
			`hidden_features = hidden_features or in_features`
			`self.fc1 = nn.Conv2d(in_features, hidden_features, kernel_size=1, bias=True)`
			`self.norm = norm_layer(hidden_features) if norm_layer else nn.Identity()`
			`self.act = act_layer()`
			`self.fc2 = nn.Conv2d(hidden_features, out_features, kernel_size=1, bias=True)`
			`self.drop = nn.Dropout(drop)`

			`def forward(self, x):`
			`x = self.fc1(x)`
			`x = self.norm(x)`
			`x = self.act(x)`
			`x = self.drop(x)`
			`x = self.fc2(x)`
			`return x`