pytorch-image-models/timm/models/layers/cbam.py

""" CBAM (sort-of) Attention

Experimental impl of CBAM: Convolutional Block Attention Module: https://arxiv.org/abs/1807.06521

WARNING: Results with these attention layers have been mixed. They can significantly reduce performance on
some tasks, especially fine-grained it seems. I may end up removing this impl.

Hacked together by / Copyright 2020 Ross Wightman
"""
import torch
from torch import nn as nn
import torch.nn.functional as F

from .conv_bn_act import ConvBnAct
from .create_act import create_act_layer, get_act_layer
from .helpers import make_divisible


class ChannelAttn(nn.Module):
    """ Original CBAM channel attention module, currently avg + max pool variant only.
    """
    def __init__(
            self, channels, rd_ratio=1./16, rd_channels=None, rd_divisor=1,
            act_layer=nn.ReLU, gate_layer='sigmoid', mlp_bias=False):
        super(ChannelAttn, self).__init__()
        if not rd_channels:
            rd_channels = make_divisible(channels * rd_ratio, rd_divisor, round_limit=0.)
        self.fc1 = nn.Conv2d(channels, rd_channels, 1, bias=mlp_bias)
        self.act = act_layer(inplace=True)
        self.fc2 = nn.Conv2d(rd_channels, channels, 1, bias=mlp_bias)
        self.gate = create_act_layer(gate_layer)

    def forward(self, x):
        x_avg = self.fc2(self.act(self.fc1(x.mean((2, 3), keepdim=True))))
        x_max = self.fc2(self.act(self.fc1(x.amax((2, 3), keepdim=True))))
        return x * self.gate(x_avg + x_max)


class LightChannelAttn(ChannelAttn):
    """An experimental 'lightweight' that sums avg + max pool first
    """
    def __init__(
            self, channels, rd_ratio=1./16, rd_channels=None, rd_divisor=1,
            act_layer=nn.ReLU, gate_layer='sigmoid', mlp_bias=False):
        super(LightChannelAttn, self).__init__(
            channels, rd_ratio, rd_channels, rd_divisor, act_layer, gate_layer, mlp_bias)

    def forward(self, x):
        x_pool = 0.5 * x.mean((2, 3), keepdim=True) + 0.5 * x.amax((2, 3), keepdim=True)
        x_attn = self.fc2(self.act(self.fc1(x_pool)))
        return x * F.sigmoid(x_attn)


class SpatialAttn(nn.Module):
    """ Original CBAM spatial attention module
    """
    def __init__(self, kernel_size=7, gate_layer='sigmoid'):
        super(SpatialAttn, self).__init__()
        self.conv = ConvBnAct(2, 1, kernel_size, act_layer=None)
        self.gate = create_act_layer(gate_layer)

    def forward(self, x):
        x_attn = torch.cat([x.mean(dim=1, keepdim=True), x.amax(dim=1, keepdim=True)], dim=1)
        x_attn = self.conv(x_attn)
        return x * self.gate(x_attn)


class LightSpatialAttn(nn.Module):
    """An experimental 'lightweight' variant that sums avg_pool and max_pool results.
    """
    def __init__(self, kernel_size=7, gate_layer='sigmoid'):
        super(LightSpatialAttn, self).__init__()
        self.conv = ConvBnAct(1, 1, kernel_size, act_layer=None)
        self.gate = create_act_layer(gate_layer)

    def forward(self, x):
        x_attn = 0.5 * x.mean(dim=1, keepdim=True) + 0.5 * x.amax(dim=1, keepdim=True)
        x_attn = self.conv(x_attn)
        return x * self.gate(x_attn)


class CbamModule(nn.Module):
    def __init__(
            self, channels, rd_ratio=1./16, rd_channels=None, rd_divisor=1,
            spatial_kernel_size=7, act_layer=nn.ReLU, gate_layer='sigmoid', mlp_bias=False):
        super(CbamModule, self).__init__()
        self.channel = ChannelAttn(
            channels, rd_ratio=rd_ratio, rd_channels=rd_channels,
            rd_divisor=rd_divisor, act_layer=act_layer, gate_layer=gate_layer, mlp_bias=mlp_bias)
        self.spatial = SpatialAttn(spatial_kernel_size, gate_layer=gate_layer)

    def forward(self, x):
        x = self.channel(x)
        x = self.spatial(x)
        return x


class LightCbamModule(nn.Module):
    def __init__(
            self, channels, rd_ratio=1./16, rd_channels=None, rd_divisor=1,
            spatial_kernel_size=7, act_layer=nn.ReLU, gate_layer='sigmoid', mlp_bias=False):
        super(LightCbamModule, self).__init__()
        self.channel = LightChannelAttn(
            channels, rd_ratio=rd_ratio, rd_channels=rd_channels,
            rd_divisor=rd_divisor, act_layer=act_layer, gate_layer=gate_layer, mlp_bias=mlp_bias)
        self.spatial = LightSpatialAttn(spatial_kernel_size)

    def forward(self, x):
        x = self.channel(x)
        x = self.spatial(x)
        return x
Add CBAM for experimentation 5 years ago			`""" CBAM (sort-of) Attention`

			`Experimental impl of CBAM: Convolutional Block Attention Module: https://arxiv.org/abs/1807.06521`

Tweak some comments, add SKNet models with weights to sotabench, remove an unused branch 5 years ago			`WARNING: Results with these attention layers have been mixed. They can significantly reduce performance on`
			`some tasks, especially fine-grained it seems. I may end up removing this impl.`

Fix some attributions, add copyrights to some file docstrings 4 years ago			`Hacked together by / Copyright 2020 Ross Wightman`
Add CBAM for experimentation 5 years ago			`"""`
			`import torch`
			`from torch import nn as nn`
AdaptiveAvgPool2d -> mean((2,3)) for all SE/attn layers to avoid NaN with AMP + channels_last layout. See https://github.com/pytorch/pytorch/issues/43992 4 years ago			`import torch.nn.functional as F`
Add Gather-Excite and Global Context attn modules. Refactor existing SE-like attn for consistency and refactor byob/byoanet for less redundancy. 3 years ago
Add CBAM for experimentation 5 years ago			`from .conv_bn_act import ConvBnAct`
Add Gather-Excite and Global Context attn modules. Refactor existing SE-like attn for consistency and refactor byob/byoanet for less redundancy. 3 years ago			`from .create_act import create_act_layer, get_act_layer`
			`from .helpers import make_divisible`
Add CBAM for experimentation 5 years ago

			`class ChannelAttn(nn.Module):`
			`""" Original CBAM channel attention module, currently avg + max pool variant only.`
			`"""`
Add Gather-Excite and Global Context attn modules. Refactor existing SE-like attn for consistency and refactor byob/byoanet for less redundancy. 3 years ago			`def __init__(`
			`self, channels, rd_ratio=1./16, rd_channels=None, rd_divisor=1,`
			`act_layer=nn.ReLU, gate_layer='sigmoid', mlp_bias=False):`
Add CBAM for experimentation 5 years ago			`super(ChannelAttn, self).__init__()`
Add Gather-Excite and Global Context attn modules. Refactor existing SE-like attn for consistency and refactor byob/byoanet for less redundancy. 3 years ago			`if not rd_channels:`
			`rd_channels = make_divisible(channels * rd_ratio, rd_divisor, round_limit=0.)`
			`self.fc1 = nn.Conv2d(channels, rd_channels, 1, bias=mlp_bias)`
Add CBAM for experimentation 5 years ago			`self.act = act_layer(inplace=True)`
Add Gather-Excite and Global Context attn modules. Refactor existing SE-like attn for consistency and refactor byob/byoanet for less redundancy. 3 years ago			`self.fc2 = nn.Conv2d(rd_channels, channels, 1, bias=mlp_bias)`
			`self.gate = create_act_layer(gate_layer)`
Add CBAM for experimentation 5 years ago
			`def forward(self, x):`
Add Gather-Excite and Global Context attn modules. Refactor existing SE-like attn for consistency and refactor byob/byoanet for less redundancy. 3 years ago			`x_avg = self.fc2(self.act(self.fc1(x.mean((2, 3), keepdim=True))))`
			`x_max = self.fc2(self.act(self.fc1(x.amax((2, 3), keepdim=True))))`
			`return x * self.gate(x_avg + x_max)`
Add CBAM for experimentation 5 years ago

			`class LightChannelAttn(ChannelAttn):`
			`"""An experimental 'lightweight' that sums avg + max pool first`
			`"""`
Add Gather-Excite and Global Context attn modules. Refactor existing SE-like attn for consistency and refactor byob/byoanet for less redundancy. 3 years ago			`def __init__(`
			`self, channels, rd_ratio=1./16, rd_channels=None, rd_divisor=1,`
			`act_layer=nn.ReLU, gate_layer='sigmoid', mlp_bias=False):`
			`super(LightChannelAttn, self).__init__(`
			`channels, rd_ratio, rd_channels, rd_divisor, act_layer, gate_layer, mlp_bias)`
Add CBAM for experimentation 5 years ago
			`def forward(self, x):`
Add Gather-Excite and Global Context attn modules. Refactor existing SE-like attn for consistency and refactor byob/byoanet for less redundancy. 3 years ago			`x_pool = 0.5 * x.mean((2, 3), keepdim=True) + 0.5 * x.amax((2, 3), keepdim=True)`
Add CBAM for experimentation 5 years ago			`x_attn = self.fc2(self.act(self.fc1(x_pool)))`
Add Gather-Excite and Global Context attn modules. Refactor existing SE-like attn for consistency and refactor byob/byoanet for less redundancy. 3 years ago			`return x * F.sigmoid(x_attn)`
Add CBAM for experimentation 5 years ago

			`class SpatialAttn(nn.Module):`
			`""" Original CBAM spatial attention module`
			`"""`
Add Gather-Excite and Global Context attn modules. Refactor existing SE-like attn for consistency and refactor byob/byoanet for less redundancy. 3 years ago			`def __init__(self, kernel_size=7, gate_layer='sigmoid'):`
Add CBAM for experimentation 5 years ago			`super(SpatialAttn, self).__init__()`
			`self.conv = ConvBnAct(2, 1, kernel_size, act_layer=None)`
Add Gather-Excite and Global Context attn modules. Refactor existing SE-like attn for consistency and refactor byob/byoanet for less redundancy. 3 years ago			`self.gate = create_act_layer(gate_layer)`
Add CBAM for experimentation 5 years ago
			`def forward(self, x):`
Add Gather-Excite and Global Context attn modules. Refactor existing SE-like attn for consistency and refactor byob/byoanet for less redundancy. 3 years ago			`x_attn = torch.cat([x.mean(dim=1, keepdim=True), x.amax(dim=1, keepdim=True)], dim=1)`
Add CBAM for experimentation 5 years ago			`x_attn = self.conv(x_attn)`
Add Gather-Excite and Global Context attn modules. Refactor existing SE-like attn for consistency and refactor byob/byoanet for less redundancy. 3 years ago			`return x * self.gate(x_attn)`
Add CBAM for experimentation 5 years ago

			`class LightSpatialAttn(nn.Module):`
			`"""An experimental 'lightweight' variant that sums avg_pool and max_pool results.`
			`"""`
Add Gather-Excite and Global Context attn modules. Refactor existing SE-like attn for consistency and refactor byob/byoanet for less redundancy. 3 years ago			`def __init__(self, kernel_size=7, gate_layer='sigmoid'):`
Add CBAM for experimentation 5 years ago			`super(LightSpatialAttn, self).__init__()`
			`self.conv = ConvBnAct(1, 1, kernel_size, act_layer=None)`
Add Gather-Excite and Global Context attn modules. Refactor existing SE-like attn for consistency and refactor byob/byoanet for less redundancy. 3 years ago			`self.gate = create_act_layer(gate_layer)`
Add CBAM for experimentation 5 years ago
			`def forward(self, x):`
Add Gather-Excite and Global Context attn modules. Refactor existing SE-like attn for consistency and refactor byob/byoanet for less redundancy. 3 years ago			`x_attn = 0.5 * x.mean(dim=1, keepdim=True) + 0.5 * x.amax(dim=1, keepdim=True)`
Add CBAM for experimentation 5 years ago			`x_attn = self.conv(x_attn)`
Add Gather-Excite and Global Context attn modules. Refactor existing SE-like attn for consistency and refactor byob/byoanet for less redundancy. 3 years ago			`return x * self.gate(x_attn)`
Add CBAM for experimentation 5 years ago

			`class CbamModule(nn.Module):`
Add Gather-Excite and Global Context attn modules. Refactor existing SE-like attn for consistency and refactor byob/byoanet for less redundancy. 3 years ago			`def __init__(`
			`self, channels, rd_ratio=1./16, rd_channels=None, rd_divisor=1,`
			`spatial_kernel_size=7, act_layer=nn.ReLU, gate_layer='sigmoid', mlp_bias=False):`
Add CBAM for experimentation 5 years ago			`super(CbamModule, self).__init__()`
Add Gather-Excite and Global Context attn modules. Refactor existing SE-like attn for consistency and refactor byob/byoanet for less redundancy. 3 years ago			`self.channel = ChannelAttn(`
			`channels, rd_ratio=rd_ratio, rd_channels=rd_channels,`
			`rd_divisor=rd_divisor, act_layer=act_layer, gate_layer=gate_layer, mlp_bias=mlp_bias)`
			`self.spatial = SpatialAttn(spatial_kernel_size, gate_layer=gate_layer)`
Add CBAM for experimentation 5 years ago
			`def forward(self, x):`
			`x = self.channel(x)`
			`x = self.spatial(x)`
			`return x`


			`class LightCbamModule(nn.Module):`
Add Gather-Excite and Global Context attn modules. Refactor existing SE-like attn for consistency and refactor byob/byoanet for less redundancy. 3 years ago			`def __init__(`
			`self, channels, rd_ratio=1./16, rd_channels=None, rd_divisor=1,`
			`spatial_kernel_size=7, act_layer=nn.ReLU, gate_layer='sigmoid', mlp_bias=False):`
Add CBAM for experimentation 5 years ago			`super(LightCbamModule, self).__init__()`
Add Gather-Excite and Global Context attn modules. Refactor existing SE-like attn for consistency and refactor byob/byoanet for less redundancy. 3 years ago			`self.channel = LightChannelAttn(`
			`channels, rd_ratio=rd_ratio, rd_channels=rd_channels,`
			`rd_divisor=rd_divisor, act_layer=act_layer, gate_layer=gate_layer, mlp_bias=mlp_bias)`
Add CBAM for experimentation 5 years ago			`self.spatial = LightSpatialAttn(spatial_kernel_size)`

			`def forward(self, x):`
			`x = self.channel(x)`
			`x = self.spatial(x)`
			`return x`