diff --git a/timm/models/efficientnet.py b/timm/models/efficientnet.py
index fdbe9368..880220b8 100644
--- a/timm/models/efficientnet.py
+++ b/timm/models/efficientnet.py
@@ -829,11 +829,16 @@ def _gen_efficientnet_v2s(variant, channel_multiplier=1.0, depth_multiplier=1.0,
     and weights
 
     Ref impl:
-    Paper: https://arxiv.org/abs/2104.00298
+    Paper: `EfficientNetV2: Smaller Models and Faster Training` - https://arxiv.org/abs/2104.00298
     """
 
     arch_def = [
-        ['er_r2_k3_s1_e1_c24_noskip'],
+        # FIXME it's not clear if the FusedMBConv layers have SE enabled for the Small variant,
+        # Table 4 suggests no. 23.94M params w/o, 23.98 with which is closer to 24M.
+        # ['er_r2_k3_s1_e1_c24_se0.25'],
+        # ['er_r4_k3_s2_e4_c48_se0.25'],
+        # ['er_r4_k3_s2_e4_c64_se0.25'],
+        ['er_r2_k3_s1_e1_c24'],
         ['er_r4_k3_s2_e4_c48'],
         ['er_r4_k3_s2_e4_c64'],
         ['ir_r6_k3_s2_e4_c128_se0.25'],
@@ -846,7 +851,7 @@ def _gen_efficientnet_v2s(variant, channel_multiplier=1.0, depth_multiplier=1.0,
         stem_size=24,
         channel_multiplier=channel_multiplier,
         norm_kwargs=resolve_bn_args(kwargs),
-        act_layer=resolve_act_layer(kwargs, 'silu'),
+        act_layer=resolve_act_layer(kwargs, 'silu'),  # FIXME this is an assumption, paper does not mention
         **kwargs,
     )
     model = _create_effnet(variant, pretrained, **model_kwargs)
diff --git a/timm/models/efficientnet_blocks.py b/timm/models/efficientnet_blocks.py
index 5d9141fb..114533cf 100644
--- a/timm/models/efficientnet_blocks.py
+++ b/timm/models/efficientnet_blocks.py
@@ -205,7 +205,14 @@ class DepthwiseSeparableConv(nn.Module):
 
 
 class InvertedResidual(nn.Module):
-    """ Inverted residual block w/ optional SE and CondConv routing"""
+    """ Inverted residual block w/ optional SE
+
+    Originally used in MobileNet-V2 - https://arxiv.org/abs/1801.04381v4, this layer is often
+    referred to as 'MBConv' for (Mobile inverted bottleneck conv) and is also used in
+      * MNasNet - https://arxiv.org/abs/1807.11626
+      * EfficientNet - https://arxiv.org/abs/1905.11946
+      * MobileNet-V3 - https://arxiv.org/abs/1905.02244
+    """
 
     def __init__(self, in_chs, out_chs, dw_kernel_size=3,
                  stride=1, dilation=1, pad_type='', act_layer=nn.ReLU, noskip=False,
@@ -333,7 +340,16 @@ class CondConvResidual(InvertedResidual):
 
 
 class EdgeResidual(nn.Module):
-    """ Residual block with expansion convolution followed by pointwise-linear w/ stride"""
+    """ Residual block with expansion convolution followed by pointwise-linear w/ stride
+
+    Originally introduced in `EfficientNet-EdgeTPU: Creating Accelerator-Optimized Neural Networks with AutoML`
+        - https://ai.googleblog.com/2019/08/efficientnet-edgetpu-creating.html
+
+    This layer is also called FusedMBConv in the MobileDet, EfficientNet-X, and EfficientNet-V2 papers
+      * MobileDet - https://arxiv.org/abs/2004.14525
+      * EfficientNet-X - https://arxiv.org/abs/2102.05610
+      * EfficientNet-V2 - https://arxiv.org/abs/2104.00298
+    """
 
     def __init__(self, in_chs, out_chs, exp_kernel_size=3, exp_ratio=1.0, fake_in_chs=0,
                  stride=1, dilation=1, pad_type='', act_layer=nn.ReLU, noskip=False, pw_kernel_size=1,