diff --git a/timm/models/efficientnet.py b/timm/models/efficientnet.py index fdbe9368..880220b8 100644 --- a/timm/models/efficientnet.py +++ b/timm/models/efficientnet.py @@ -829,11 +829,16 @@ def _gen_efficientnet_v2s(variant, channel_multiplier=1.0, depth_multiplier=1.0, and weights Ref impl: - Paper: https://arxiv.org/abs/2104.00298 + Paper: `EfficientNetV2: Smaller Models and Faster Training` - https://arxiv.org/abs/2104.00298 """ arch_def = [ - ['er_r2_k3_s1_e1_c24_noskip'], + # FIXME it's not clear if the FusedMBConv layers have SE enabled for the Small variant, + # Table 4 suggests no. 23.94M params w/o, 23.98 with which is closer to 24M. + # ['er_r2_k3_s1_e1_c24_se0.25'], + # ['er_r4_k3_s2_e4_c48_se0.25'], + # ['er_r4_k3_s2_e4_c64_se0.25'], + ['er_r2_k3_s1_e1_c24'], ['er_r4_k3_s2_e4_c48'], ['er_r4_k3_s2_e4_c64'], ['ir_r6_k3_s2_e4_c128_se0.25'], @@ -846,7 +851,7 @@ def _gen_efficientnet_v2s(variant, channel_multiplier=1.0, depth_multiplier=1.0, stem_size=24, channel_multiplier=channel_multiplier, norm_kwargs=resolve_bn_args(kwargs), - act_layer=resolve_act_layer(kwargs, 'silu'), + act_layer=resolve_act_layer(kwargs, 'silu'), # FIXME this is an assumption, paper does not mention **kwargs, ) model = _create_effnet(variant, pretrained, **model_kwargs) diff --git a/timm/models/efficientnet_blocks.py b/timm/models/efficientnet_blocks.py index 5d9141fb..114533cf 100644 --- a/timm/models/efficientnet_blocks.py +++ b/timm/models/efficientnet_blocks.py @@ -205,7 +205,14 @@ class DepthwiseSeparableConv(nn.Module): class InvertedResidual(nn.Module): - """ Inverted residual block w/ optional SE and CondConv routing""" + """ Inverted residual block w/ optional SE + + Originally used in MobileNet-V2 - https://arxiv.org/abs/1801.04381v4, this layer is often + referred to as 'MBConv' for (Mobile inverted bottleneck conv) and is also used in + * MNasNet - https://arxiv.org/abs/1807.11626 + * EfficientNet - https://arxiv.org/abs/1905.11946 + * MobileNet-V3 - https://arxiv.org/abs/1905.02244 + """ def __init__(self, in_chs, out_chs, dw_kernel_size=3, stride=1, dilation=1, pad_type='', act_layer=nn.ReLU, noskip=False, @@ -333,7 +340,16 @@ class CondConvResidual(InvertedResidual): class EdgeResidual(nn.Module): - """ Residual block with expansion convolution followed by pointwise-linear w/ stride""" + """ Residual block with expansion convolution followed by pointwise-linear w/ stride + + Originally introduced in `EfficientNet-EdgeTPU: Creating Accelerator-Optimized Neural Networks with AutoML` + - https://ai.googleblog.com/2019/08/efficientnet-edgetpu-creating.html + + This layer is also called FusedMBConv in the MobileDet, EfficientNet-X, and EfficientNet-V2 papers + * MobileDet - https://arxiv.org/abs/2004.14525 + * EfficientNet-X - https://arxiv.org/abs/2102.05610 + * EfficientNet-V2 - https://arxiv.org/abs/2104.00298 + """ def __init__(self, in_chs, out_chs, exp_kernel_size=3, exp_ratio=1.0, fake_in_chs=0, stride=1, dilation=1, pad_type='', act_layer=nn.ReLU, noskip=False, pw_kernel_size=1,