""" PyTorch Involution Layer Official impl: https://github.com/d-li14/involution/blob/main/cls/mmcls/models/utils/involution_naive.py Paper: `Involution: Inverting the Inherence of Convolution for Visual Recognition` - https://arxiv.org/abs/2103.06255 """ import torch.nn as nn from .conv_bn_act import ConvBnAct from .create_conv2d import create_conv2d class Involution(nn.Module): def __init__( self, channels, kernel_size=3, stride=1, group_size=16, rd_ratio=4, norm_layer=nn.BatchNorm2d, act_layer=nn.ReLU, ): super(Involution, self).__init__() self.kernel_size = kernel_size self.stride = stride self.channels = channels self.group_size = group_size self.groups = self.channels // self.group_size self.conv1 = ConvBnAct( in_channels=channels, out_channels=channels // rd_ratio, kernel_size=1, norm_layer=norm_layer, act_layer=act_layer) self.conv2 = self.conv = create_conv2d( in_channels=channels // rd_ratio, out_channels=kernel_size**2 * self.groups, kernel_size=1, stride=1) self.avgpool = nn.AvgPool2d(stride, stride) if stride == 2 else nn.Identity() self.unfold = nn.Unfold(kernel_size, 1, (kernel_size-1)//2, stride) def forward(self, x): weight = self.conv2(self.conv1(self.avgpool(x))) B, C, H, W = weight.shape KK = int(self.kernel_size ** 2) weight = weight.view(B, self.groups, KK, H, W).unsqueeze(2) out = self.unfold(x).view(B, self.groups, self.group_size, KK, H, W) out = (weight * out).sum(dim=3).view(B, self.channels, H, W) return out