diff --git a/timm/models/byobnet.py b/timm/models/byobnet.py index 8214b490..8ec8690a 100644 --- a/timm/models/byobnet.py +++ b/timm/models/byobnet.py @@ -90,7 +90,7 @@ default_cfgs = { # experimental configs 'resnet51q': _cfg( url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/resnet51q_ra2-d47dcc76.pth', - first_conv='stem.conv1.conv', input_size=(3, 256, 256), pool_size=(8, 8), + first_conv='stem.conv1', input_size=(3, 256, 256), pool_size=(8, 8), test_input_size=(3, 288, 288), crop_pct=1.0), 'resnet61q': _cfg( first_conv='stem.conv1.conv', input_size=(3, 256, 256), pool_size=(8, 8), interpolation='bicubic'), diff --git a/timm/models/efficientnet_blocks.py b/timm/models/efficientnet_blocks.py index 7853db0e..ea0c791e 100644 --- a/timm/models/efficientnet_blocks.py +++ b/timm/models/efficientnet_blocks.py @@ -22,18 +22,16 @@ class SqueezeExcite(nn.Module): se_ratio (float): ratio of squeeze reduction act_layer (nn.Module): activation layer of containing block gate_fn (Callable): attention gate function - block_in_chs (int): input channels of containing block (for calculating reduction from) - reduce_from_block (bool): calculate reduction from block input channels if True force_act_layer (nn.Module): override block's activation fn if this is set/bound - divisor (int): make reduction channels divisible by this + round_chs_fn (Callable): specify a fn to calculate rounding of reduced chs """ def __init__( self, in_chs, se_ratio=0.25, act_layer=nn.ReLU, gate_fn=sigmoid, - block_in_chs=None, reduce_from_block=True, force_act_layer=None, divisor=1): + force_act_layer=None, round_chs_fn=None): super(SqueezeExcite, self).__init__() - reduced_chs = (block_in_chs or in_chs) if reduce_from_block else in_chs - reduced_chs = make_divisible(reduced_chs * se_ratio, divisor) + round_chs_fn = round_chs_fn or round + reduced_chs = round_chs_fn(in_chs * se_ratio) act_layer = force_act_layer or act_layer self.conv_reduce = nn.Conv2d(in_chs, reduced_chs, 1, bias=True) self.act1 = create_act_layer(act_layer, inplace=True) @@ -168,8 +166,7 @@ class InvertedResidual(nn.Module): self.act2 = act_layer(inplace=True) # Squeeze-and-excitation - self.se = se_layer( - mid_chs, se_ratio=se_ratio, act_layer=act_layer, block_in_chs=in_chs) if has_se else nn.Identity() + self.se = se_layer(mid_chs, se_ratio=se_ratio, act_layer=act_layer) if has_se else nn.Identity() # Point-wise linear projection self.conv_pwl = create_conv2d(mid_chs, out_chs, pw_kernel_size, padding=pad_type, **conv_kwargs) @@ -292,8 +289,7 @@ class EdgeResidual(nn.Module): self.act1 = act_layer(inplace=True) # Squeeze-and-excitation - self.se = SqueezeExcite( - mid_chs, se_ratio=se_ratio, act_layer=act_layer, block_in_chs=in_chs) if has_se else nn.Identity() + self.se = SqueezeExcite(mid_chs, se_ratio=se_ratio, act_layer=act_layer) if has_se else nn.Identity() # Point-wise linear projection self.conv_pwl = create_conv2d(mid_chs, out_chs, pw_kernel_size, padding=pad_type) diff --git a/timm/models/efficientnet_builder.py b/timm/models/efficientnet_builder.py index 57e2039b..35019747 100644 --- a/timm/models/efficientnet_builder.py +++ b/timm/models/efficientnet_builder.py @@ -265,11 +265,12 @@ class EfficientNetBuilder: https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/modeling/backbone/fbnet_builder.py """ - def __init__(self, output_stride=32, pad_type='', round_chs_fn=round_channels, + def __init__(self, output_stride=32, pad_type='', round_chs_fn=round_channels, se_from_exp=False, act_layer=None, norm_layer=None, se_layer=None, drop_path_rate=0., feature_location=''): self.output_stride = output_stride self.pad_type = pad_type self.round_chs_fn = round_chs_fn + self.se_from_exp = se_from_exp # calculate se channel reduction from expanded (mid) chs self.act_layer = act_layer self.norm_layer = norm_layer self.se_layer = se_layer @@ -301,6 +302,8 @@ class EfficientNetBuilder: ba['norm_layer'] = self.norm_layer if bt != 'cn': ba['se_layer'] = self.se_layer + if not self.se_from_exp and ba['se_ratio']: + ba['se_ratio'] /= ba.get('exp_ratio', 1.0) ba['drop_path_rate'] = drop_path_rate if bt == 'ir': @@ -418,28 +421,28 @@ def _init_weight_goog(m, n='', fix_group_fanout=True): if fix_group_fanout: fan_out //= m.groups init_weight_fn = get_condconv_initializer( - lambda w: w.data.normal_(0, math.sqrt(2.0 / fan_out)), m.num_experts, m.weight_shape) + lambda w: nn.init.normal_(w, 0, math.sqrt(2.0 / fan_out)), m.num_experts, m.weight_shape) init_weight_fn(m.weight) if m.bias is not None: - m.bias.data.zero_() + nn.init.zeros_(m.bias) elif isinstance(m, nn.Conv2d): fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels if fix_group_fanout: fan_out //= m.groups - m.weight.data.normal_(0, math.sqrt(2.0 / fan_out)) + nn.init.normal_(m.weight, 0, math.sqrt(2.0 / fan_out)) if m.bias is not None: - m.bias.data.zero_() + nn.init.zeros_(m.bias) elif isinstance(m, nn.BatchNorm2d): - m.weight.data.fill_(1.0) - m.bias.data.zero_() + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) elif isinstance(m, nn.Linear): fan_out = m.weight.size(0) # fan-out fan_in = 0 if 'routing_fn' in n: fan_in = m.weight.size(1) init_range = 1.0 / math.sqrt(fan_in + fan_out) - m.weight.data.uniform_(-init_range, init_range) - m.bias.data.zero_() + nn.init.uniform_(m.weight, -init_range, init_range) + nn.init.zeros_(m.bias) def efficientnet_init_weights(model: nn.Module, init_fn=None): diff --git a/timm/models/ghostnet.py b/timm/models/ghostnet.py index 1783ff7a..d82a91b4 100644 --- a/timm/models/ghostnet.py +++ b/timm/models/ghostnet.py @@ -40,7 +40,7 @@ default_cfgs = { } -_SE_LAYER = partial(SqueezeExcite, gate_fn='hard_sigmoid', divisor=4) +_SE_LAYER = partial(SqueezeExcite, gate_fn='hard_sigmoid', round_chs_fn=partial(make_divisible, divisor=4)) class GhostModule(nn.Module): diff --git a/timm/models/hardcorenas.py b/timm/models/hardcorenas.py index 231bb4b6..16b9c4bc 100644 --- a/timm/models/hardcorenas.py +++ b/timm/models/hardcorenas.py @@ -4,7 +4,7 @@ import torch.nn as nn from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD from .efficientnet_blocks import SqueezeExcite -from .efficientnet_builder import decode_arch_def, resolve_act_layer, resolve_bn_args +from .efficientnet_builder import decode_arch_def, resolve_act_layer, resolve_bn_args, round_channels from .helpers import build_model_with_cfg, default_cfg_for_features from .layers import get_act_fn from .mobilenetv3 import MobileNetV3, MobileNetV3Features @@ -40,7 +40,7 @@ def _gen_hardcorenas(pretrained, variant, arch_def, **kwargs): """ num_features = 1280 se_layer = partial( - SqueezeExcite, gate_fn=get_act_fn('hard_sigmoid'), force_act_layer=nn.ReLU, reduce_from_block=False, divisor=8) + SqueezeExcite, gate_fn=get_act_fn('hard_sigmoid'), force_act_layer=nn.ReLU, round_chs_fn=round_channels) model_kwargs = dict( block_args=decode_arch_def(arch_def), num_features=num_features, diff --git a/timm/models/layers/helpers.py b/timm/models/layers/helpers.py index 64573ef6..cc54ca7f 100644 --- a/timm/models/layers/helpers.py +++ b/timm/models/layers/helpers.py @@ -28,4 +28,4 @@ def make_divisible(v, divisor=8, min_value=None, round_limit=.9): # Make sure that round down does not go down by more than 10%. if new_v < round_limit * v: new_v += divisor - return new_v \ No newline at end of file + return new_v diff --git a/timm/models/mobilenetv3.py b/timm/models/mobilenetv3.py index 9afa3d75..fad88aa7 100644 --- a/timm/models/mobilenetv3.py +++ b/timm/models/mobilenetv3.py @@ -72,6 +72,10 @@ default_cfgs = { 'tf_mobilenetv3_small_minimal_100': _cfg( url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_small_minimal_100-922a7843.pth', mean=IMAGENET_INCEPTION_MEAN, std=IMAGENET_INCEPTION_STD), + + 'fbnetv3_b': _cfg(), + 'fbnetv3_d': _cfg(), + 'fbnetv3_g': _cfg(), } @@ -86,7 +90,7 @@ class MobileNetV3(nn.Module): """ def __init__(self, block_args, num_classes=1000, in_chans=3, stem_size=16, num_features=1280, head_bias=True, - pad_type='', act_layer=None, norm_layer=None, se_layer=None, + pad_type='', act_layer=None, norm_layer=None, se_layer=None, se_from_exp=True, round_chs_fn=round_channels, drop_rate=0., drop_path_rate=0., global_pool='avg'): super(MobileNetV3, self).__init__() act_layer = act_layer or nn.ReLU @@ -104,7 +108,7 @@ class MobileNetV3(nn.Module): # Middle stages (IR/ER/DS Blocks) builder = EfficientNetBuilder( - output_stride=32, pad_type=pad_type, round_chs_fn=round_chs_fn, + output_stride=32, pad_type=pad_type, round_chs_fn=round_chs_fn, se_from_exp=se_from_exp, act_layer=act_layer, norm_layer=norm_layer, se_layer=se_layer, drop_path_rate=drop_path_rate) self.blocks = nn.Sequential(*builder(stem_size, block_args)) self.feature_info = builder.features @@ -161,8 +165,8 @@ class MobileNetV3Features(nn.Module): and object detection models. """ - def __init__(self, block_args, out_indices=(0, 1, 2, 3, 4), feature_location='bottleneck', - in_chans=3, stem_size=16, output_stride=32, pad_type='', round_chs_fn=round_channels, + def __init__(self, block_args, out_indices=(0, 1, 2, 3, 4), feature_location='bottleneck', in_chans=3, + stem_size=16, output_stride=32, pad_type='', round_chs_fn=round_channels, se_from_exp=True, act_layer=None, norm_layer=None, se_layer=None, drop_rate=0., drop_path_rate=0.): super(MobileNetV3Features, self).__init__() act_layer = act_layer or nn.ReLU @@ -178,7 +182,7 @@ class MobileNetV3Features(nn.Module): # Middle stages (IR/ER/DS Blocks) builder = EfficientNetBuilder( - output_stride=output_stride, pad_type=pad_type, round_chs_fn=round_chs_fn, + output_stride=output_stride, pad_type=pad_type, round_chs_fn=round_chs_fn, se_from_exp=se_from_exp, act_layer=act_layer, norm_layer=norm_layer, se_layer=se_layer, drop_path_rate=drop_path_rate, feature_location=feature_location) self.blocks = nn.Sequential(*builder(stem_size, block_args)) @@ -262,7 +266,7 @@ def _gen_mobilenet_v3_rw(variant, channel_multiplier=1.0, pretrained=False, **kw round_chs_fn=partial(round_channels, multiplier=channel_multiplier), norm_layer=partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)), act_layer=resolve_act_layer(kwargs, 'hard_swish'), - se_layer=partial(SqueezeExcite, gate_fn=get_act_fn('hard_sigmoid'), reduce_from_block=False), + se_layer=partial(SqueezeExcite, gate_fn=get_act_fn('hard_sigmoid')), **kwargs, ) model = _create_mnv3(variant, pretrained, **model_kwargs) @@ -351,7 +355,7 @@ def _gen_mobilenet_v3(variant, channel_multiplier=1.0, pretrained=False, **kwarg ['cn_r1_k1_s1_c960'], # hard-swish ] se_layer = partial( - SqueezeExcite, gate_fn=get_act_fn('hard_sigmoid'), force_act_layer=nn.ReLU, reduce_from_block=False, divisor=8) + SqueezeExcite, gate_fn=get_act_fn('hard_sigmoid'), force_act_layer=nn.ReLU, round_chs_fn=round_channels) model_kwargs = dict( block_args=decode_arch_def(arch_def), num_features=num_features, @@ -366,6 +370,86 @@ def _gen_mobilenet_v3(variant, channel_multiplier=1.0, pretrained=False, **kwarg return model +def _gen_fbnetv3(variant, channel_multiplier=1.0, pretrained=False, **kwargs): + """ FBNetV3 + FIXME untested, this is a preliminary impl of some FBNet-V3 variants. + """ + vl = variant.split('_')[-1] + if vl in ('a', 'b'): + stem_size = 16 + arch_def = [ + # stage 0, 112x112 in + ['ds_r2_k3_s1_e1_c16'], + # stage 1, 112x112 in + ['ir_r1_k5_s2_e4_c24', 'ir_r3_k5_s1_e2_c24'], + # stage 2, 56x56 in + ['ir_r1_k5_s2_e5_c40_se0.25', 'ir_r4_k5_s1_e3_c40_se0.25'], + # stage 3, 28x28 in + ['ir_r1_k5_s2_e5_c72', 'ir_r4_k3_s1_e3_c72'], + # stage 4, 14x14in + ['ir_r1_k3_s1_e5_c120_se0.25', 'ir_r5_k5_s1_e3_c120_se0.25'], + # stage 5, 14x14in + ['ir_r1_k3_s2_e6_c184_se0.25', 'ir_r5_k5_s1_e4_c184_se0.25', 'ir_r1_k5_s1_e6_c224_se0.25'], + # stage 6, 7x7 in + ['cn_r1_k1_s1_c1344'], + ] + elif vl == 'd': + stem_size = 24 + arch_def = [ + # stage 0, 112x112 in + ['ds_r2_k3_s1_e1_c16'], + # stage 1, 112x112 in + ['ir_r1_k3_s2_e5_c24', 'ir_r5_k3_s1_e2_c24'], + # stage 2, 56x56 in + ['ir_r1_k5_s2_e4_c40_se0.25', 'ir_r4_k3_s1_e3_c40_se0.25'], + # stage 3, 28x28 in + ['ir_r1_k3_s2_e5_c72', 'ir_r4_k3_s1_e3_c72'], + # stage 4, 14x14in + ['ir_r1_k3_s1_e5_c128_se0.25', 'ir_r6_k5_s1_e3_c128_se0.25'], + # stage 5, 14x14in + ['ir_r1_k3_s2_e6_c208_se0.25', 'ir_r5_k5_s1_e5_c208_se0.25', 'ir_r1_k5_s1_e6_c240_se0.25'], + # stage 6, 7x7 in + ['cn_r1_k1_s1_c1440'], + ] + elif vl == 'g': + stem_size = 32 + arch_def = [ + # stage 0, 112x112 in + ['ds_r3_k3_s1_e1_c24'], + # stage 1, 112x112 in + ['ir_r1_k5_s2_e4_c40', 'ir_r4_k5_s1_e2_c40'], + # stage 2, 56x56 in + ['ir_r1_k5_s2_e4_c56_se0.25', 'ir_r4_k5_s1_e3_c56_se0.25'], + # stage 3, 28x28 in + ['ir_r1_k5_s2_e5_c104', 'ir_r4_k3_s1_e3_c104'], + # stage 4, 14x14in + ['ir_r1_k3_s1_e5_c160_se0.25', 'ir_r8_k5_s1_e3_c160_se0.25'], + # stage 5, 14x14in + ['ir_r1_k3_s2_e6_c264_se0.25', 'ir_r6_k5_s1_e5_c264_se0.25', 'ir_r2_k5_s1_e6_c288_se0.25'], + # stage 6, 7x7 in + ['cn_r1_k1_s1_c1728'], # hard-swish + ] + else: + raise NotImplemented + round_chs_fn = partial(round_channels, multiplier=channel_multiplier, round_limit=0.95) + se_layer = partial(SqueezeExcite, gate_fn=get_act_fn('hard_sigmoid'), round_chs_fn=round_chs_fn) + act_layer = resolve_act_layer(kwargs, 'hard_swish') + model_kwargs = dict( + block_args=decode_arch_def(arch_def), + num_features=1984, + head_bias=False, + stem_size=stem_size, + round_chs_fn=round_chs_fn, + se_from_exp=False, + norm_layer=partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)), + act_layer=act_layer, + se_layer=se_layer, + **kwargs, + ) + model = _create_mnv3(variant, pretrained, **model_kwargs) + return model + + @register_model def mobilenetv3_large_075(pretrained=False, **kwargs): """ MobileNet V3 """ @@ -474,3 +558,24 @@ def tf_mobilenetv3_small_minimal_100(pretrained=False, **kwargs): kwargs['pad_type'] = 'same' model = _gen_mobilenet_v3('tf_mobilenetv3_small_minimal_100', 1.0, pretrained=pretrained, **kwargs) return model + + +@register_model +def fbnetv3_b(pretrained=False, **kwargs): + """ FBNetV3-B """ + model = _gen_fbnetv3('fbnetv3_b', pretrained=pretrained, **kwargs) + return model + + +@register_model +def fbnetv3_d(pretrained=False, **kwargs): + """ FBNetV3-D """ + model = _gen_fbnetv3('fbnetv3_d', pretrained=pretrained, **kwargs) + return model + + +@register_model +def fbnetv3_g(pretrained=False, **kwargs): + """ FBNetV3-G """ + model = _gen_fbnetv3('fbnetv3_g', pretrained=pretrained, **kwargs) + return model