From 9811e229f74c1a0e151a45041a35598025f7125d Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Tue, 9 Feb 2021 15:58:39 -0800 Subject: [PATCH 1/7] Fix regression in models with 1001 class pretrained weights. Improve batchnorm arg and BatchNormAct layer handling in several models. --- tests/test_models.py | 12 +++- timm/models/dpn.py | 8 ++- timm/models/gluon_xception.py | 66 +++++++------------ timm/models/helpers.py | 94 ++++++++++++++------------- timm/models/inception_resnet_v2.py | 8 ++- timm/models/inception_v4.py | 5 +- timm/models/layers/__init__.py | 2 +- timm/models/layers/conv_bn_act.py | 12 ++-- timm/models/layers/create_attn.py | 2 + timm/models/layers/create_norm_act.py | 25 ++++--- timm/models/layers/norm_act.py | 13 ++-- timm/models/layers/separable_conv.py | 11 ++-- timm/models/nasnet.py | 10 ++- timm/models/pnasnet.py | 8 ++- timm/models/xception_aligned.py | 28 ++++---- 15 files changed, 157 insertions(+), 147 deletions(-) diff --git a/tests/test_models.py b/tests/test_models.py index dee4fbe7..3f1c4cda 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -83,7 +83,6 @@ def test_model_default_cfgs(model_name, batch_size): cfg = model.default_cfg classifier = cfg['classifier'] - first_conv = cfg['first_conv'] pool_size = cfg['pool_size'] input_size = model.default_cfg['input_size'] @@ -111,9 +110,16 @@ def test_model_default_cfgs(model_name, batch_size): # FIXME mobilenetv3 forward_features vs removed pooling differ assert outputs.shape[-1] == pool_size[-1] and outputs.shape[-2] == pool_size[-2] - # check classifier and first convolution names match those in default_cfg + # check classifier name matches default_cfg assert classifier + ".weight" in state_dict.keys(), f'{classifier} not in model params' - assert first_conv + ".weight" in state_dict.keys(), f'{first_conv} not in model params' + + # check first conv(s) names match default_cfg + first_conv = cfg['first_conv'] + if isinstance(first_conv, str): + first_conv = (first_conv,) + assert isinstance(first_conv, (tuple, list)) + for fc in first_conv: + assert fc + ".weight" in state_dict.keys(), f'{fc} not in model params' if 'GITHUB_ACTIONS' not in os.environ: diff --git a/timm/models/dpn.py b/timm/models/dpn.py index 045d634c..ac9c7755 100644 --- a/timm/models/dpn.py +++ b/timm/models/dpn.py @@ -7,6 +7,7 @@ This implementation is compatible with the pretrained weights from cypw's MXNet Hacked together by / Copyright 2020 Ross Wightman """ from collections import OrderedDict +from functools import partial from typing import Tuple import torch @@ -173,12 +174,14 @@ class DPN(nn.Module): self.drop_rate = drop_rate self.b = b assert output_stride == 32 # FIXME look into dilation support + norm_layer = partial(BatchNormAct2d, eps=.001) + fc_norm_layer = partial(BatchNormAct2d, eps=.001, act_layer=fc_act, inplace=False) bw_factor = 1 if small else 4 blocks = OrderedDict() # conv1 blocks['conv1_1'] = ConvBnAct( - in_chans, num_init_features, kernel_size=3 if small else 7, stride=2, norm_kwargs=dict(eps=.001)) + in_chans, num_init_features, kernel_size=3 if small else 7, stride=2, norm_layer=norm_layer) blocks['conv1_pool'] = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.feature_info = [dict(num_chs=num_init_features, reduction=2, module='features.conv1_1')] @@ -226,8 +229,7 @@ class DPN(nn.Module): in_chs += inc self.feature_info += [dict(num_chs=in_chs, reduction=32, module=f'features.conv5_{k_sec[3]}')] - def _fc_norm(f, eps): return BatchNormAct2d(f, eps=eps, act_layer=fc_act, inplace=False) - blocks['conv5_bn_ac'] = CatBnAct(in_chs, norm_layer=_fc_norm) + blocks['conv5_bn_ac'] = CatBnAct(in_chs, norm_layer=fc_norm_layer) self.num_features = in_chs self.features = nn.Sequential(blocks) diff --git a/timm/models/gluon_xception.py b/timm/models/gluon_xception.py index 3782c500..8fc398d6 100644 --- a/timm/models/gluon_xception.py +++ b/timm/models/gluon_xception.py @@ -42,10 +42,8 @@ for Tensorflow 'SAME' padding. PyTorch symmetric padding behaves the way we'd w class SeparableConv2d(nn.Module): - def __init__(self, inplanes, planes, kernel_size=3, stride=1, - dilation=1, bias=False, norm_layer=None, norm_kwargs=None): + def __init__(self, inplanes, planes, kernel_size=3, stride=1, dilation=1, bias=False, norm_layer=None): super(SeparableConv2d, self).__init__() - norm_kwargs = norm_kwargs if norm_kwargs is not None else {} self.kernel_size = kernel_size self.dilation = dilation @@ -54,7 +52,7 @@ class SeparableConv2d(nn.Module): self.conv_dw = nn.Conv2d( inplanes, inplanes, kernel_size, stride=stride, padding=padding, dilation=dilation, groups=inplanes, bias=bias) - self.bn = norm_layer(num_features=inplanes, **norm_kwargs) + self.bn = norm_layer(num_features=inplanes) # pointwise convolution self.conv_pw = nn.Conv2d(inplanes, planes, kernel_size=1, bias=bias) @@ -66,10 +64,8 @@ class SeparableConv2d(nn.Module): class Block(nn.Module): - def __init__(self, inplanes, planes, stride=1, dilation=1, start_with_relu=True, - norm_layer=None, norm_kwargs=None, ): + def __init__(self, inplanes, planes, stride=1, dilation=1, start_with_relu=True, norm_layer=None): super(Block, self).__init__() - norm_kwargs = norm_kwargs if norm_kwargs is not None else {} if isinstance(planes, (list, tuple)): assert len(planes) == 3 else: @@ -80,7 +76,7 @@ class Block(nn.Module): self.skip = nn.Sequential() self.skip.add_module('conv1', nn.Conv2d( inplanes, outplanes, 1, stride=stride, bias=False)), - self.skip.add_module('bn1', norm_layer(num_features=outplanes, **norm_kwargs)) + self.skip.add_module('bn1', norm_layer(num_features=outplanes)) else: self.skip = None @@ -88,9 +84,8 @@ class Block(nn.Module): for i in range(3): rep['act%d' % (i + 1)] = nn.ReLU(inplace=True) rep['conv%d' % (i + 1)] = SeparableConv2d( - inplanes, planes[i], 3, stride=stride if i == 2 else 1, dilation=dilation, - norm_layer=norm_layer, norm_kwargs=norm_kwargs) - rep['bn%d' % (i + 1)] = norm_layer(planes[i], **norm_kwargs) + inplanes, planes[i], 3, stride=stride if i == 2 else 1, dilation=dilation, norm_layer=norm_layer) + rep['bn%d' % (i + 1)] = norm_layer(planes[i]) inplanes = planes[i] if not start_with_relu: @@ -115,74 +110,63 @@ class Xception65(nn.Module): """ def __init__(self, num_classes=1000, in_chans=3, output_stride=32, norm_layer=nn.BatchNorm2d, - norm_kwargs=None, drop_rate=0., global_pool='avg'): + drop_rate=0., global_pool='avg'): super(Xception65, self).__init__() self.num_classes = num_classes self.drop_rate = drop_rate - norm_kwargs = norm_kwargs if norm_kwargs is not None else {} if output_stride == 32: entry_block3_stride = 2 exit_block20_stride = 2 - middle_block_dilation = 1 - exit_block_dilations = (1, 1) + middle_dilation = 1 + exit_dilation = (1, 1) elif output_stride == 16: entry_block3_stride = 2 exit_block20_stride = 1 - middle_block_dilation = 1 - exit_block_dilations = (1, 2) + middle_dilation = 1 + exit_dilation = (1, 2) elif output_stride == 8: entry_block3_stride = 1 exit_block20_stride = 1 - middle_block_dilation = 2 - exit_block_dilations = (2, 4) + middle_dilation = 2 + exit_dilation = (2, 4) else: raise NotImplementedError # Entry flow self.conv1 = nn.Conv2d(in_chans, 32, kernel_size=3, stride=2, padding=1, bias=False) - self.bn1 = norm_layer(num_features=32, **norm_kwargs) + self.bn1 = norm_layer(num_features=32) self.act1 = nn.ReLU(inplace=True) self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1, bias=False) self.bn2 = norm_layer(num_features=64) self.act2 = nn.ReLU(inplace=True) - self.block1 = Block( - 64, 128, stride=2, start_with_relu=False, norm_layer=norm_layer, norm_kwargs=norm_kwargs) + self.block1 = Block(64, 128, stride=2, start_with_relu=False, norm_layer=norm_layer) self.block1_act = nn.ReLU(inplace=True) - self.block2 = Block( - 128, 256, stride=2, start_with_relu=False, norm_layer=norm_layer, norm_kwargs=norm_kwargs) - self.block3 = Block( - 256, 728, stride=entry_block3_stride, norm_layer=norm_layer, norm_kwargs=norm_kwargs) + self.block2 = Block(128, 256, stride=2, start_with_relu=False, norm_layer=norm_layer) + self.block3 = Block(256, 728, stride=entry_block3_stride, norm_layer=norm_layer) # Middle flow self.mid = nn.Sequential(OrderedDict([('block%d' % i, Block( - 728, 728, stride=1, dilation=middle_block_dilation, - norm_layer=norm_layer, norm_kwargs=norm_kwargs)) for i in range(4, 20)])) + 728, 728, stride=1, dilation=middle_dilation, norm_layer=norm_layer)) for i in range(4, 20)])) # Exit flow self.block20 = Block( - 728, (728, 1024, 1024), stride=exit_block20_stride, dilation=exit_block_dilations[0], - norm_layer=norm_layer, norm_kwargs=norm_kwargs) + 728, (728, 1024, 1024), stride=exit_block20_stride, dilation=exit_dilation[0], norm_layer=norm_layer) self.block20_act = nn.ReLU(inplace=True) - self.conv3 = SeparableConv2d( - 1024, 1536, 3, stride=1, dilation=exit_block_dilations[1], - norm_layer=norm_layer, norm_kwargs=norm_kwargs) - self.bn3 = norm_layer(num_features=1536, **norm_kwargs) + self.conv3 = SeparableConv2d(1024, 1536, 3, stride=1, dilation=exit_dilation[1], norm_layer=norm_layer) + self.bn3 = norm_layer(num_features=1536) self.act3 = nn.ReLU(inplace=True) - self.conv4 = SeparableConv2d( - 1536, 1536, 3, stride=1, dilation=exit_block_dilations[1], - norm_layer=norm_layer, norm_kwargs=norm_kwargs) - self.bn4 = norm_layer(num_features=1536, **norm_kwargs) + self.conv4 = SeparableConv2d(1536, 1536, 3, stride=1, dilation=exit_dilation[1], norm_layer=norm_layer) + self.bn4 = norm_layer(num_features=1536) self.act4 = nn.ReLU(inplace=True) self.num_features = 2048 self.conv5 = SeparableConv2d( - 1536, self.num_features, 3, stride=1, dilation=exit_block_dilations[1], - norm_layer=norm_layer, norm_kwargs=norm_kwargs) - self.bn5 = norm_layer(num_features=self.num_features, **norm_kwargs) + 1536, self.num_features, 3, stride=1, dilation=exit_dilation[1], norm_layer=norm_layer) + self.bn5 = norm_layer(num_features=self.num_features) self.act5 = nn.ReLU(inplace=True) self.feature_info = [ dict(num_chs=64, reduction=2, module='act2'), diff --git a/timm/models/helpers.py b/timm/models/helpers.py index 562a01c5..d56cdc57 100644 --- a/timm/models/helpers.py +++ b/timm/models/helpers.py @@ -148,6 +148,31 @@ def load_custom_pretrained(model, cfg=None, load_fn=None, progress=False, check_ _logger.warning("Valid function to load pretrained weights is not available, using random initialization.") +def adapt_input_conv(in_chans, conv_weight): + conv_type = conv_weight.dtype + conv_weight = conv_weight.float() # Some weights are in torch.half, ensure it's float for sum on CPU + O, I, J, K = conv_weight.shape + if in_chans == 1: + if I > 3: + assert conv_weight.shape[1] % 3 == 0 + # For models with space2depth stems + conv_weight = conv_weight.reshape(O, I // 3, 3, J, K) + conv_weight = conv_weight.sum(dim=2, keepdim=False) + else: + conv_weight = conv_weight.sum(dim=1, keepdim=True) + elif in_chans != 3: + if I != 3: + raise NotImplementedError('Weight format not supported by conversion.') + else: + # NOTE this strategy should be better than random init, but there could be other combinations of + # the original RGB input layer weights that'd work better for specific cases. + repeat = int(math.ceil(in_chans / 3)) + conv_weight = conv_weight.repeat(1, repeat, 1, 1)[:, :in_chans, :, :] + conv_weight *= (3 / float(in_chans)) + conv_weight = conv_weight.to(conv_type) + return conv_weight + + def load_pretrained(model, cfg=None, num_classes=1000, in_chans=3, filter_fn=None, strict=True, progress=False): if cfg is None: cfg = getattr(model, 'default_cfg') @@ -159,56 +184,35 @@ def load_pretrained(model, cfg=None, num_classes=1000, in_chans=3, filter_fn=Non if filter_fn is not None: state_dict = filter_fn(state_dict) - if in_chans == 1: - conv1_name = cfg['first_conv'] - _logger.info('Converting first conv (%s) pretrained weights from 3 to 1 channel' % conv1_name) - conv1_weight = state_dict[conv1_name + '.weight'] - # Some weights are in torch.half, ensure it's float for sum on CPU - conv1_type = conv1_weight.dtype - conv1_weight = conv1_weight.float() - O, I, J, K = conv1_weight.shape - if I > 3: - assert conv1_weight.shape[1] % 3 == 0 - # For models with space2depth stems - conv1_weight = conv1_weight.reshape(O, I // 3, 3, J, K) - conv1_weight = conv1_weight.sum(dim=2, keepdim=False) - else: - conv1_weight = conv1_weight.sum(dim=1, keepdim=True) - conv1_weight = conv1_weight.to(conv1_type) - state_dict[conv1_name + '.weight'] = conv1_weight - elif in_chans != 3: - conv1_name = cfg['first_conv'] - conv1_weight = state_dict[conv1_name + '.weight'] - conv1_type = conv1_weight.dtype - conv1_weight = conv1_weight.float() - O, I, J, K = conv1_weight.shape - if I != 3: - _logger.warning('Deleting first conv (%s) from pretrained weights.' % conv1_name) - del state_dict[conv1_name + '.weight'] - strict = False - else: - # NOTE this strategy should be better than random init, but there could be other combinations of - # the original RGB input layer weights that'd work better for specific cases. - _logger.info('Repeating first conv (%s) weights in channel dim.' % conv1_name) - repeat = int(math.ceil(in_chans / 3)) - conv1_weight = conv1_weight.repeat(1, repeat, 1, 1)[:, :in_chans, :, :] - conv1_weight *= (3 / float(in_chans)) - conv1_weight = conv1_weight.to(conv1_type) - state_dict[conv1_name + '.weight'] = conv1_weight + input_convs = cfg.get('first_conv', None) + if input_convs is not None: + if isinstance(input_convs, str): + input_convs = (input_convs,) + for input_conv_name in input_convs: + weight_name = input_conv_name + '.weight' + try: + state_dict[weight_name] = adapt_input_conv(in_chans, state_dict[weight_name]) + _logger.info( + f'Converted input conv {input_conv_name} pretrained weights from 3 to {in_chans} channel(s)') + except NotImplementedError as e: + del state_dict[weight_name] + strict = False + _logger.warning( + f'Unable to convert pretrained {input_conv_name} weights, using random init for this layer.') classifier_name = cfg['classifier'] - if num_classes == 1000 and cfg['num_classes'] == 1001: - # FIXME this special case is problematic as number of pretrained weight sources increases - # special case for imagenet trained models with extra background class in pretrained weights - classifier_weight = state_dict[classifier_name + '.weight'] - state_dict[classifier_name + '.weight'] = classifier_weight[1:] - classifier_bias = state_dict[classifier_name + '.bias'] - state_dict[classifier_name + '.bias'] = classifier_bias[1:] - elif num_classes != cfg['num_classes']: - # completely discard fully connected for all other differences between pretrained and created model + label_offset = cfg.get('label_offset', 0) + if num_classes != cfg['num_classes']: + # completely discard fully connected if model num_classes doesn't match pretrained weights del state_dict[classifier_name + '.weight'] del state_dict[classifier_name + '.bias'] strict = False + elif label_offset > 0: + # special case for pretrained weights with an extra background class in pretrained weights + classifier_weight = state_dict[classifier_name + '.weight'] + state_dict[classifier_name + '.weight'] = classifier_weight[label_offset:] + classifier_bias = state_dict[classifier_name + '.bias'] + state_dict[classifier_name + '.bias'] = classifier_bias[label_offset:] model.load_state_dict(state_dict, strict=strict) diff --git a/timm/models/inception_resnet_v2.py b/timm/models/inception_resnet_v2.py index a5efa330..adfe330e 100644 --- a/timm/models/inception_resnet_v2.py +++ b/timm/models/inception_resnet_v2.py @@ -17,18 +17,20 @@ default_cfgs = { # ported from http://download.tensorflow.org/models/inception_resnet_v2_2016_08_30.tar.gz 'inception_resnet_v2': { 'url': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/inception_resnet_v2-940b1cd6.pth', - 'num_classes': 1001, 'input_size': (3, 299, 299), 'pool_size': (8, 8), + 'num_classes': 1000, 'input_size': (3, 299, 299), 'pool_size': (8, 8), 'crop_pct': 0.8975, 'interpolation': 'bicubic', 'mean': IMAGENET_INCEPTION_MEAN, 'std': IMAGENET_INCEPTION_STD, 'first_conv': 'conv2d_1a.conv', 'classifier': 'classif', + 'label_offset': 1, # 1001 classes in pretrained weights }, # ported from http://download.tensorflow.org/models/ens_adv_inception_resnet_v2_2017_08_18.tar.gz 'ens_adv_inception_resnet_v2': { 'url': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/ens_adv_inception_resnet_v2-2592a550.pth', - 'num_classes': 1001, 'input_size': (3, 299, 299), 'pool_size': (8, 8), + 'num_classes': 1000, 'input_size': (3, 299, 299), 'pool_size': (8, 8), 'crop_pct': 0.8975, 'interpolation': 'bicubic', 'mean': IMAGENET_INCEPTION_MEAN, 'std': IMAGENET_INCEPTION_STD, 'first_conv': 'conv2d_1a.conv', 'classifier': 'classif', + 'label_offset': 1, # 1001 classes in pretrained weights } } @@ -222,7 +224,7 @@ class Block8(nn.Module): class InceptionResnetV2(nn.Module): - def __init__(self, num_classes=1001, in_chans=3, drop_rate=0., output_stride=32, global_pool='avg'): + def __init__(self, num_classes=1000, in_chans=3, drop_rate=0., output_stride=32, global_pool='avg'): super(InceptionResnetV2, self).__init__() self.drop_rate = drop_rate self.num_classes = num_classes diff --git a/timm/models/inception_v4.py b/timm/models/inception_v4.py index 40a0f291..69f9ff5a 100644 --- a/timm/models/inception_v4.py +++ b/timm/models/inception_v4.py @@ -16,10 +16,11 @@ __all__ = ['InceptionV4'] default_cfgs = { 'inception_v4': { 'url': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-cadene/inceptionv4-8e4777a0.pth', - 'num_classes': 1001, 'input_size': (3, 299, 299), 'pool_size': (8, 8), + 'num_classes': 1000, 'input_size': (3, 299, 299), 'pool_size': (8, 8), 'crop_pct': 0.875, 'interpolation': 'bicubic', 'mean': IMAGENET_INCEPTION_MEAN, 'std': IMAGENET_INCEPTION_STD, 'first_conv': 'features.0.conv', 'classifier': 'last_linear', + 'label_offset': 1, # 1001 classes in pretrained weights } } @@ -241,7 +242,7 @@ class InceptionC(nn.Module): class InceptionV4(nn.Module): - def __init__(self, num_classes=1001, in_chans=3, output_stride=32, drop_rate=0., global_pool='avg'): + def __init__(self, num_classes=1000, in_chans=3, output_stride=32, drop_rate=0., global_pool='avg'): super(InceptionV4, self).__init__() assert output_stride == 32 self.drop_rate = drop_rate diff --git a/timm/models/layers/__init__.py b/timm/models/layers/__init__.py index 8f52099f..6eb9f8a1 100644 --- a/timm/models/layers/__init__.py +++ b/timm/models/layers/__init__.py @@ -12,7 +12,7 @@ from .conv_bn_act import ConvBnAct from .create_act import create_act_layer, get_act_layer, get_act_fn from .create_attn import get_attn, create_attn from .create_conv2d import create_conv2d -from .create_norm_act import create_norm_act, get_norm_act_layer +from .create_norm_act import get_norm_act_layer, create_norm_act, convert_norm_act from .drop import DropBlock2d, DropPath, drop_block_2d, drop_path from .eca import EcaModule, CecaModule from .evo_norm import EvoNormBatch2d, EvoNormSample2d diff --git a/timm/models/layers/conv_bn_act.py b/timm/models/layers/conv_bn_act.py index 90735357..33005c37 100644 --- a/timm/models/layers/conv_bn_act.py +++ b/timm/models/layers/conv_bn_act.py @@ -5,23 +5,23 @@ Hacked together by / Copyright 2020 Ross Wightman from torch import nn as nn from .create_conv2d import create_conv2d -from .create_norm_act import convert_norm_act_type +from .create_norm_act import convert_norm_act class ConvBnAct(nn.Module): def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding='', dilation=1, groups=1, - norm_layer=nn.BatchNorm2d, norm_kwargs=None, act_layer=nn.ReLU, apply_act=True, - drop_block=None, aa_layer=None): + bias=False, apply_act=True, norm_layer=nn.BatchNorm2d, act_layer=nn.ReLU, aa_layer=None, + drop_block=None): super(ConvBnAct, self).__init__() use_aa = aa_layer is not None self.conv = create_conv2d( in_channels, out_channels, kernel_size, stride=1 if use_aa else stride, - padding=padding, dilation=dilation, groups=groups, bias=False) + padding=padding, dilation=dilation, groups=groups, bias=bias) # NOTE for backwards compatibility with models that use separate norm and act layer definitions - norm_act_layer, norm_act_args = convert_norm_act_type(norm_layer, act_layer, norm_kwargs) - self.bn = norm_act_layer(out_channels, apply_act=apply_act, drop_block=drop_block, **norm_act_args) + norm_act_layer = convert_norm_act(norm_layer, act_layer) + self.bn = norm_act_layer(out_channels, apply_act=apply_act, drop_block=drop_block) self.aa = aa_layer(channels=out_channels) if stride == 2 and use_aa else None @property diff --git a/timm/models/layers/create_attn.py b/timm/models/layers/create_attn.py index f4a4c2c9..ff20e5df 100644 --- a/timm/models/layers/create_attn.py +++ b/timm/models/layers/create_attn.py @@ -9,6 +9,8 @@ from .cbam import CbamModule, LightCbamModule def get_attn(attn_type): + if isinstance(attn_type, torch.nn.Module): + return attn_type module_cls = None if attn_type is not None: if isinstance(attn_type, str): diff --git a/timm/models/layers/create_norm_act.py b/timm/models/layers/create_norm_act.py index 9e7e529e..5b562945 100644 --- a/timm/models/layers/create_norm_act.py +++ b/timm/models/layers/create_norm_act.py @@ -19,6 +19,7 @@ from .inplace_abn import InplaceAbn _NORM_ACT_TYPES = {BatchNormAct2d, GroupNormAct, EvoNormBatch2d, EvoNormSample2d, InplaceAbn} _NORM_ACT_REQUIRES_ARG = {BatchNormAct2d, GroupNormAct, InplaceAbn} # requires act_layer arg to define act type + def get_norm_act_layer(layer_class): layer_class = layer_class.replace('_', '').lower() if layer_class.startswith("batchnorm"): @@ -47,16 +48,22 @@ def create_norm_act(layer_type, num_features, apply_act=True, jit=False, **kwarg return layer_instance -def convert_norm_act_type(norm_layer, act_layer, norm_kwargs=None): +def convert_norm_act(norm_layer, act_layer): assert isinstance(norm_layer, (type, str, types.FunctionType, functools.partial)) assert act_layer is None or isinstance(act_layer, (type, str, types.FunctionType, functools.partial)) - norm_act_args = norm_kwargs.copy() if norm_kwargs else {} + norm_act_kwargs = {} + + # unbind partial fn, so args can be rebound later + if isinstance(norm_layer, functools.partial): + norm_act_kwargs.update(norm_layer.keywords) + norm_layer = norm_layer.func + if isinstance(norm_layer, str): norm_act_layer = get_norm_act_layer(norm_layer) elif norm_layer in _NORM_ACT_TYPES: norm_act_layer = norm_layer - elif isinstance(norm_layer, (types.FunctionType, functools.partial)): - # assuming this is a lambda/fn/bound partial that creates norm_act layer + elif isinstance(norm_layer, types.FunctionType): + # if function type, must be a lambda/fn that creates a norm_act layer norm_act_layer = norm_layer else: type_name = norm_layer.__name__.lower() @@ -66,9 +73,11 @@ def convert_norm_act_type(norm_layer, act_layer, norm_kwargs=None): norm_act_layer = GroupNormAct else: assert False, f"No equivalent norm_act layer for {type_name}" + if norm_act_layer in _NORM_ACT_REQUIRES_ARG: - # Must pass `act_layer` through for backwards compat where `act_layer=None` implies no activation. + # pass `act_layer` through for backwards compat where `act_layer=None` implies no activation. # In the future, may force use of `apply_act` with `act_layer` arg bound to relevant NormAct types - # It is intended that functions/partial does not trigger this, they should define act. - norm_act_args.update(dict(act_layer=act_layer)) - return norm_act_layer, norm_act_args + norm_act_kwargs.setdefault('act_layer', act_layer) + if norm_act_kwargs: + norm_act_layer = functools.partial(norm_act_layer, **norm_act_kwargs) # bind/rebind args + return norm_act_layer diff --git a/timm/models/layers/norm_act.py b/timm/models/layers/norm_act.py index e3fe3940..02cabe88 100644 --- a/timm/models/layers/norm_act.py +++ b/timm/models/layers/norm_act.py @@ -24,7 +24,7 @@ class BatchNormAct2d(nn.BatchNorm2d): act_args = dict(inplace=True) if inplace else {} self.act = act_layer(**act_args) else: - self.act = None + self.act = nn.Identity() def _forward_jit(self, x): """ A cut & paste of the contents of the PyTorch BatchNorm2d forward function @@ -62,8 +62,7 @@ class BatchNormAct2d(nn.BatchNorm2d): x = self._forward_jit(x) else: x = self._forward_python(x) - if self.act is not None: - x = self.act(x) + x = self.act(x) return x @@ -75,12 +74,12 @@ class GroupNormAct(nn.GroupNorm): if isinstance(act_layer, str): act_layer = get_act_layer(act_layer) if act_layer is not None and apply_act: - self.act = act_layer(inplace=inplace) + act_args = dict(inplace=True) if inplace else {} + self.act = act_layer(**act_args) else: - self.act = None + self.act = nn.Identity() def forward(self, x): x = F.group_norm(x, self.num_groups, self.weight, self.bias, self.eps) - if self.act is not None: - x = self.act(x) + x = self.act(x) return x diff --git a/timm/models/layers/separable_conv.py b/timm/models/layers/separable_conv.py index e949ea43..1ddcb4e6 100644 --- a/timm/models/layers/separable_conv.py +++ b/timm/models/layers/separable_conv.py @@ -8,17 +8,16 @@ Hacked together by / Copyright 2020 Ross Wightman from torch import nn as nn from .create_conv2d import create_conv2d -from .create_norm_act import convert_norm_act_type +from .create_norm_act import convert_norm_act class SeparableConvBnAct(nn.Module): """ Separable Conv w/ trailing Norm and Activation """ def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1, padding='', bias=False, - channel_multiplier=1.0, pw_kernel_size=1, norm_layer=nn.BatchNorm2d, norm_kwargs=None, - act_layer=nn.ReLU, apply_act=True, drop_block=None): + channel_multiplier=1.0, pw_kernel_size=1, norm_layer=nn.BatchNorm2d, act_layer=nn.ReLU, + apply_act=True, drop_block=None): super(SeparableConvBnAct, self).__init__() - norm_kwargs = norm_kwargs or {} self.conv_dw = create_conv2d( in_channels, int(in_channels * channel_multiplier), kernel_size, @@ -27,8 +26,8 @@ class SeparableConvBnAct(nn.Module): self.conv_pw = create_conv2d( int(in_channels * channel_multiplier), out_channels, pw_kernel_size, padding=padding, bias=bias) - norm_act_layer, norm_act_args = convert_norm_act_type(norm_layer, act_layer, norm_kwargs) - self.bn = norm_act_layer(out_channels, apply_act=apply_act, drop_block=drop_block, **norm_act_args) + norm_act_layer = convert_norm_act(norm_layer, act_layer) + self.bn = norm_act_layer(out_channels, apply_act=apply_act, drop_block=drop_block) @property def in_channels(self): diff --git a/timm/models/nasnet.py b/timm/models/nasnet.py index 60e1a276..1f1a3b75 100644 --- a/timm/models/nasnet.py +++ b/timm/models/nasnet.py @@ -1,6 +1,9 @@ +""" NasNet-A (Large) + nasnetalarge implementation grabbed from Cadene's pretrained models + https://github.com/Cadene/pretrained-models.pytorch """ +from functools import partial -""" import torch import torch.nn as nn import torch.nn.functional as F @@ -20,9 +23,10 @@ default_cfgs = { 'interpolation': 'bicubic', 'mean': (0.5, 0.5, 0.5), 'std': (0.5, 0.5, 0.5), - 'num_classes': 1001, + 'num_classes': 1000, 'first_conv': 'conv0.conv', 'classifier': 'last_linear', + 'label_offset': 1, # 1001 classes in pretrained weights }, } @@ -418,7 +422,7 @@ class NASNetALarge(nn.Module): self.conv0 = ConvBnAct( in_channels=in_chans, out_channels=self.stem_size, kernel_size=3, padding=0, stride=2, - norm_kwargs=dict(eps=0.001, momentum=0.1), act_layer=None) + norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.1), apply_act=False) self.cell_stem_0 = CellStem0( self.stem_size, num_channels=channels // (channel_multiplier ** 2), pad_type=pad_type) diff --git a/timm/models/pnasnet.py b/timm/models/pnasnet.py index 5f1e177f..73073009 100644 --- a/timm/models/pnasnet.py +++ b/timm/models/pnasnet.py @@ -6,6 +6,7 @@ """ from collections import OrderedDict +from functools import partial import torch import torch.nn as nn @@ -26,9 +27,10 @@ default_cfgs = { 'interpolation': 'bicubic', 'mean': (0.5, 0.5, 0.5), 'std': (0.5, 0.5, 0.5), - 'num_classes': 1001, + 'num_classes': 1000, 'first_conv': 'conv_0.conv', 'classifier': 'last_linear', + 'label_offset': 1, # 1001 classes in pretrained weights }, } @@ -234,7 +236,7 @@ class Cell(CellBase): class PNASNet5Large(nn.Module): - def __init__(self, num_classes=1001, in_chans=3, output_stride=32, drop_rate=0., global_pool='avg', pad_type=''): + def __init__(self, num_classes=1000, in_chans=3, output_stride=32, drop_rate=0., global_pool='avg', pad_type=''): super(PNASNet5Large, self).__init__() self.num_classes = num_classes self.drop_rate = drop_rate @@ -243,7 +245,7 @@ class PNASNet5Large(nn.Module): self.conv_0 = ConvBnAct( in_chans, 96, kernel_size=3, stride=2, padding=0, - norm_kwargs=dict(eps=0.001, momentum=0.1), act_layer=None) + norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.1), apply_act=False) self.cell_stem_0 = CellStem0( in_chs_left=96, out_chs_left=54, in_chs_right=96, out_chs_right=54, pad_type=pad_type) diff --git a/timm/models/xception_aligned.py b/timm/models/xception_aligned.py index e6b21576..dd7a7a86 100644 --- a/timm/models/xception_aligned.py +++ b/timm/models/xception_aligned.py @@ -5,7 +5,7 @@ https://github.com/tensorflow/models/blob/master/research/deeplab/g3doc/model_zo Hacked together by / Copyright 2020 Ross Wightman """ -from collections import OrderedDict +from functools import partial import torch.nn as nn import torch.nn.functional as F @@ -43,9 +43,8 @@ default_cfgs = dict( class SeparableConv2d(nn.Module): def __init__( self, inplanes, planes, kernel_size=3, stride=1, dilation=1, padding='', - act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d, norm_kwargs=None): + act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d): super(SeparableConv2d, self).__init__() - norm_kwargs = norm_kwargs if norm_kwargs is not None else {} self.kernel_size = kernel_size self.dilation = dilation @@ -53,7 +52,7 @@ class SeparableConv2d(nn.Module): self.conv_dw = create_conv2d( inplanes, inplanes, kernel_size, stride=stride, padding=padding, dilation=dilation, depthwise=True) - self.bn_dw = norm_layer(inplanes, **norm_kwargs) + self.bn_dw = norm_layer(inplanes) if act_layer is not None: self.act_dw = act_layer(inplace=True) else: @@ -61,7 +60,7 @@ class SeparableConv2d(nn.Module): # pointwise convolution self.conv_pw = create_conv2d(inplanes, planes, kernel_size=1) - self.bn_pw = norm_layer(planes, **norm_kwargs) + self.bn_pw = norm_layer(planes) if act_layer is not None: self.act_pw = act_layer(inplace=True) else: @@ -82,17 +81,15 @@ class SeparableConv2d(nn.Module): class XceptionModule(nn.Module): def __init__( self, in_chs, out_chs, stride=1, dilation=1, pad_type='', - start_with_relu=True, no_skip=False, act_layer=nn.ReLU, norm_layer=None, norm_kwargs=None): + start_with_relu=True, no_skip=False, act_layer=nn.ReLU, norm_layer=None): super(XceptionModule, self).__init__() - norm_kwargs = norm_kwargs if norm_kwargs is not None else {} out_chs = to_3tuple(out_chs) self.in_channels = in_chs self.out_channels = out_chs[-1] self.no_skip = no_skip if not no_skip and (self.out_channels != self.in_channels or stride != 1): self.shortcut = ConvBnAct( - in_chs, self.out_channels, 1, stride=stride, - norm_layer=norm_layer, norm_kwargs=norm_kwargs, act_layer=None) + in_chs, self.out_channels, 1, stride=stride, norm_layer=norm_layer, act_layer=None) else: self.shortcut = None @@ -103,7 +100,7 @@ class XceptionModule(nn.Module): self.stack.add_module(f'act{i + 1}', nn.ReLU(inplace=i > 0)) self.stack.add_module(f'conv{i + 1}', SeparableConv2d( in_chs, out_chs[i], 3, stride=stride if i == 2 else 1, dilation=dilation, padding=pad_type, - act_layer=separable_act_layer, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) + act_layer=separable_act_layer, norm_layer=norm_layer)) in_chs = out_chs[i] def forward(self, x): @@ -121,14 +118,13 @@ class XceptionAligned(nn.Module): """ def __init__(self, block_cfg, num_classes=1000, in_chans=3, output_stride=32, - act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d, norm_kwargs=None, drop_rate=0., global_pool='avg'): + act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d, drop_rate=0., global_pool='avg'): super(XceptionAligned, self).__init__() self.num_classes = num_classes self.drop_rate = drop_rate assert output_stride in (8, 16, 32) - norm_kwargs = norm_kwargs if norm_kwargs is not None else {} - layer_args = dict(act_layer=act_layer, norm_layer=norm_layer, norm_kwargs=norm_kwargs) + layer_args = dict(act_layer=act_layer, norm_layer=norm_layer) self.stem = nn.Sequential(*[ ConvBnAct(in_chans, 32, kernel_size=3, stride=2, **layer_args), ConvBnAct(32, 64, kernel_size=3, stride=1, **layer_args) @@ -196,7 +192,7 @@ def xception41(pretrained=False, **kwargs): dict(in_chs=728, out_chs=(728, 1024, 1024), stride=2), dict(in_chs=1024, out_chs=(1536, 1536, 2048), stride=1, no_skip=True, start_with_relu=False), ] - model_args = dict(block_cfg=block_cfg, norm_kwargs=dict(eps=.001, momentum=.1), **kwargs) + model_args = dict(block_cfg=block_cfg, norm_layer=partial(nn.BatchNorm2d, eps=.001, momentum=.1), **kwargs) return _xception('xception41', pretrained=pretrained, **model_args) @@ -215,7 +211,7 @@ def xception65(pretrained=False, **kwargs): dict(in_chs=728, out_chs=(728, 1024, 1024), stride=2), dict(in_chs=1024, out_chs=(1536, 1536, 2048), stride=1, no_skip=True, start_with_relu=False), ] - model_args = dict(block_cfg=block_cfg, norm_kwargs=dict(eps=.001, momentum=.1), **kwargs) + model_args = dict(block_cfg=block_cfg, norm_layer=partial(nn.BatchNorm2d, eps=.001, momentum=.1), **kwargs) return _xception('xception65', pretrained=pretrained, **model_args) @@ -236,5 +232,5 @@ def xception71(pretrained=False, **kwargs): dict(in_chs=728, out_chs=(728, 1024, 1024), stride=2), dict(in_chs=1024, out_chs=(1536, 1536, 2048), stride=1, no_skip=True, start_with_relu=False), ] - model_args = dict(block_cfg=block_cfg, norm_kwargs=dict(eps=.001, momentum=.1), **kwargs) + model_args = dict(block_cfg=block_cfg, norm_layer=partial(nn.BatchNorm2d, eps=.001, momentum=.1), **kwargs) return _xception('xception71', pretrained=pretrained, **model_args) From 1bcc69e0ad8adbd3c31202394415e4bfdbcc62d0 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Tue, 9 Feb 2021 16:00:19 -0800 Subject: [PATCH 2/7] Use in_channels for depthwise groups, allows using `out_channels=N * in_channels` (does not impact existing models). Fix #354. --- timm/models/layers/create_conv2d.py | 3 ++- timm/models/layers/mixed_conv2d.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/timm/models/layers/create_conv2d.py b/timm/models/layers/create_conv2d.py index 0134b05c..3a0cc03a 100644 --- a/timm/models/layers/create_conv2d.py +++ b/timm/models/layers/create_conv2d.py @@ -22,7 +22,8 @@ def create_conv2d(in_channels, out_channels, kernel_size, **kwargs): m = MixedConv2d(in_channels, out_channels, kernel_size, **kwargs) else: depthwise = kwargs.pop('depthwise', False) - groups = out_channels if depthwise else kwargs.pop('groups', 1) + # for DW out_channels must be multiple of in_channels as must have out_channels % groups == 0 + groups = in_channels if depthwise else kwargs.pop('groups', 1) if 'num_experts' in kwargs and kwargs['num_experts'] > 0: m = CondConv2d(in_channels, out_channels, kernel_size, groups=groups, **kwargs) else: diff --git a/timm/models/layers/mixed_conv2d.py b/timm/models/layers/mixed_conv2d.py index 53d650cd..fa0ce565 100644 --- a/timm/models/layers/mixed_conv2d.py +++ b/timm/models/layers/mixed_conv2d.py @@ -34,7 +34,7 @@ class MixedConv2d(nn.ModuleDict): self.in_channels = sum(in_splits) self.out_channels = sum(out_splits) for idx, (k, in_ch, out_ch) in enumerate(zip(kernel_size, in_splits, out_splits)): - conv_groups = out_ch if depthwise else 1 + conv_groups = in_ch if depthwise else 1 # use add_module to keep key space clean self.add_module( str(idx), From dc85e5a237753b149f72caa4b9c351f46d257200 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Tue, 9 Feb 2021 16:21:47 -0800 Subject: [PATCH 3/7] Add ByobNet w/ GPU-EfficientNets and RepVGG. Also add classic vgg models. --- timm/models/__init__.py | 2 + timm/models/byobnet.py | 707 ++++++++++++++++++++++++++++++++++++++++ timm/models/nfnet.py | 7 +- timm/models/vgg.py | 261 +++++++++++++++ 4 files changed, 975 insertions(+), 2 deletions(-) create mode 100644 timm/models/byobnet.py create mode 100644 timm/models/vgg.py diff --git a/timm/models/__init__.py b/timm/models/__init__.py index a7e85084..dc56848e 100644 --- a/timm/models/__init__.py +++ b/timm/models/__init__.py @@ -1,3 +1,4 @@ +from .byobnet import * from .cspnet import * from .densenet import * from .dla import * @@ -23,6 +24,7 @@ from .selecsls import * from .senet import * from .sknet import * from .tresnet import * +from .vgg import * from .vision_transformer import * from .vovnet import * from .xception import * diff --git a/timm/models/byobnet.py b/timm/models/byobnet.py new file mode 100644 index 00000000..680edd22 --- /dev/null +++ b/timm/models/byobnet.py @@ -0,0 +1,707 @@ +""" Bring-Your-Own-Blocks Network + +A flexible network w/ dataclass based config for stacking those NN blocks. + +This model is currently used to implement the following networks: + +GPU Efficient (ResNets) - gernet_l/m/s (original versions called genet, but this was already used (by SENet author)). +Paper: `Neural Architecture Design for GPU-Efficient Networks` - https://arxiv.org/abs/2006.14090 +Code and weights: https://github.com/idstcv/GPU-Efficient-Networks, licensed Apache 2.0 + +RepVGG - repvgg_* +Paper: `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697 +Code and weights: https://github.com/DingXiaoH/RepVGG, licensed MIT + +In all cases the models have been modified to fit within the design of ByobNet. I've remapped +the original weights and verified accuracies. + +For GPU Efficient nets, I used the original names for the blocks since they were for the most part +the same as original residual blocks in ResNe(X)t, DarkNet, and other existing models. Note also some +changes introduced in RegNet were also present in the stem and bottleneck blocks for this model. + +A significant number of different network archs can be implemented here, including variants of the +above nets that include attention. + +Hacked together by / copyright Ross Wightman, 2021. +""" +import math +from dataclasses import dataclass, field +from collections import OrderedDict +from typing import Tuple, Dict, Optional, Union, Any, Callable +from functools import partial + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD +from .helpers import build_model_with_cfg +from .layers import ClassifierHead, ConvBnAct, DropPath, AvgPool2dSame, \ + create_conv2d, get_act_layer, get_attn, convert_norm_act, make_divisible +from .registry import register_model + +__all__ = ['ByobNet', 'ByobCfg', 'BlocksCfg'] + + +def _cfg(url='', **kwargs): + return { + 'url': url, 'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': (7, 7), + 'crop_pct': 0.875, 'interpolation': 'bilinear', + 'mean': IMAGENET_DEFAULT_MEAN, 'std': IMAGENET_DEFAULT_STD, + 'first_conv': 'stem.conv', 'classifier': 'head.fc', + **kwargs + } + + +default_cfgs = { + # GPU-Efficient (ResNet) weights + 'gernet_s': _cfg( + url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-ger-weights/gernet_s-756b4751.pth'), + 'gernet_m': _cfg( + url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-ger-weights/gernet_m-0873c53a.pth'), + 'gernet_l': _cfg( + url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-ger-weights/gernet_l-f31e2e8d.pth', + input_size=(3, 256, 256), pool_size=(8, 8)), + + # RepVGG weights + 'repvgg_a2': _cfg( + url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-repvgg-weights/repvgg_a2-c1ee6d2b.pth', + first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv')), + 'repvgg_b0': _cfg( + url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-repvgg-weights/repvgg_b0-80ac3f1b.pth', + first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv')), + 'repvgg_b1': _cfg( + url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-repvgg-weights/repvgg_b1-77ca2989.pth', + first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv')), + 'repvgg_b1g4': _cfg( + url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-repvgg-weights/repvgg_b1g4-abde5d92.pth', + first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv')), + 'repvgg_b2': _cfg( + url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-repvgg-weights/repvgg_b2-25b7494e.pth', + first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv')), + 'repvgg_b2g4': _cfg( + url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-repvgg-weights/repvgg_b2g4-165a85f2.pth', + first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv')), + 'repvgg_b3': _cfg( + url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-repvgg-weights/repvgg_b3-199bc50d.pth', + first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv')), + 'repvgg_b3g4': _cfg( + url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-repvgg-weights/repvgg_b3g4-73c370bf.pth', + first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv')), +} + + +@dataclass +class BlocksCfg: + type: Union[str, nn.Module] + d: int # block depth (number of block repeats in stage) + c: int # number of output channels for each block in stage + s: int = 2 # stride of stage (first block) + gs: Optional[Union[int, Callable]] = None # group-size of blocks in stage, conv is depthwise if gs == 1 + br: float = 1. # bottleneck-ratio of blocks in stage + + +@dataclass +class ByobCfg: + blocks: Tuple[BlocksCfg, ...] + downsample: str = 'conv1x1' + stem_type: str = '3x3' + stem_chs: int = 32 + width_factor: float = 1.0 + num_features: int = 0 # num out_channels for final conv, no final 1x1 conv if 0 + zero_init_last_bn: bool = True + + act_layer: str = 'relu' + norm_layer: nn.Module = nn.BatchNorm2d + attn_layer: Optional[str] = None + attn_kwargs: dict = field(default_factory=lambda: dict()) + + +def _rep_vgg_bcfg(d=(4, 6, 16, 1), wf=(1., 1., 1., 1.), groups=0): + c = (64, 128, 256, 512) + group_size = 0 + if groups > 0: + group_size = lambda chs, idx: chs // groups if (idx + 1) % 2 == 0 else 0 + bcfg = tuple([BlocksCfg(type='rep', d=d, c=c * wf, gs=group_size) for d, c, wf in zip(d, c, wf)]) + return bcfg + + +model_cfgs = dict( + + gernet_l=ByobCfg( + blocks=( + BlocksCfg(type='basic', d=1, c=128, s=2, gs=0, br=1.), + BlocksCfg(type='basic', d=2, c=192, s=2, gs=0, br=1.), + BlocksCfg(type='bottle', d=6, c=640, s=2, gs=0, br=1 / 4), + BlocksCfg(type='bottle', d=5, c=640, s=2, gs=1, br=3.), + BlocksCfg(type='bottle', d=4, c=640, s=1, gs=1, br=3.), + ), + stem_chs=32, + num_features=2560, + ), + gernet_m=ByobCfg( + blocks=( + BlocksCfg(type='basic', d=1, c=128, s=2, gs=0, br=1.), + BlocksCfg(type='basic', d=2, c=192, s=2, gs=0, br=1.), + BlocksCfg(type='bottle', d=6, c=640, s=2, gs=0, br=1 / 4), + BlocksCfg(type='bottle', d=4, c=640, s=2, gs=1, br=3.), + BlocksCfg(type='bottle', d=1, c=640, s=1, gs=1, br=3.), + ), + stem_chs=32, + num_features=2560, + ), + gernet_s=ByobCfg( + blocks=( + BlocksCfg(type='basic', d=1, c=48, s=2, gs=0, br=1.), + BlocksCfg(type='basic', d=3, c=48, s=2, gs=0, br=1.), + BlocksCfg(type='bottle', d=7, c=384, s=2, gs=0, br=1 / 4), + BlocksCfg(type='bottle', d=2, c=560, s=2, gs=1, br=3.), + BlocksCfg(type='bottle', d=1, c=256, s=1, gs=1, br=3.), + ), + stem_chs=13, + num_features=1920, + ), + + repvgg_a2=ByobCfg( + blocks=_rep_vgg_bcfg(d=(2, 4, 14, 1), wf=(1.5, 1.5, 1.5, 2.75)), + stem_type='rep', + stem_chs=64, + ), + repvgg_b0=ByobCfg( + blocks=_rep_vgg_bcfg(wf=(1., 1., 1., 2.5)), + stem_type='rep', + stem_chs=64, + ), + repvgg_b1=ByobCfg( + blocks=_rep_vgg_bcfg(wf=(2., 2., 2., 4.)), + stem_type='rep', + stem_chs=64, + ), + repvgg_b1g4=ByobCfg( + blocks=_rep_vgg_bcfg(wf=(2., 2., 2., 4.), groups=4), + stem_type='rep', + stem_chs=64, + ), + repvgg_b2=ByobCfg( + blocks=_rep_vgg_bcfg(wf=(2.5, 2.5, 2.5, 5.)), + stem_type='rep', + stem_chs=64, + ), + repvgg_b2g4=ByobCfg( + blocks=_rep_vgg_bcfg(wf=(2.5, 2.5, 2.5, 5.), groups=4), + stem_type='rep', + stem_chs=64, + ), + repvgg_b3=ByobCfg( + blocks=_rep_vgg_bcfg(wf=(3., 3., 3., 5.)), + stem_type='rep', + stem_chs=64, + ), + repvgg_b3g4=ByobCfg( + blocks=_rep_vgg_bcfg(wf=(3., 3., 3., 5.), groups=4), + stem_type='rep', + stem_chs=64, + ), +) + + +def _na_args(cfg: dict): + return dict( + norm_layer=cfg.get('norm_layer', nn.BatchNorm2d), + act_layer=cfg.get('act_layer', nn.ReLU)) + + +def _ex_tuple(cfg: dict, *names): + return tuple([cfg.get(n, None) for n in names]) + + +def num_groups(group_size, channels): + if not group_size: # 0 or None + return 1 # normal conv with 1 group + else: + # NOTE group_size == 1 -> depthwise conv + assert channels % group_size == 0 + return channels // group_size + + +class DownsampleAvg(nn.Module): + def __init__(self, in_chs, out_chs, stride=1, dilation=1, apply_act=False, norm_layer=None, act_layer=None): + """ AvgPool Downsampling as in 'D' ResNet variants.""" + super(DownsampleAvg, self).__init__() + avg_stride = stride if dilation == 1 else 1 + if stride > 1 or dilation > 1: + avg_pool_fn = AvgPool2dSame if avg_stride == 1 and dilation > 1 else nn.AvgPool2d + self.pool = avg_pool_fn(2, avg_stride, ceil_mode=True, count_include_pad=False) + else: + self.pool = nn.Identity() + self.conv = ConvBnAct(in_chs, out_chs, 1, apply_act=apply_act, norm_layer=norm_layer, act_layer=act_layer) + + def forward(self, x): + return self.conv(self.pool(x)) + + +def create_downsample(type, **kwargs): + if type == 'avg': + return DownsampleAvg(**kwargs) + else: + return ConvBnAct(kwargs.pop('in_chs'), kwargs.pop('out_chs'), kernel_size=1, **kwargs) + + +class BasicBlock(nn.Module): + """ ResNet Basic Block - kxk + kxk + """ + + def __init__( + self, in_chs, out_chs, kernel_size=3, stride=1, dilation=(1, 1), group_size=None, bottle_ratio=1.0, + downsample='avg', linear_out=False, layer_cfg=None, drop_block=None, drop_path_rate=0.): + super(BasicBlock, self).__init__() + layer_cfg = layer_cfg or {} + act_layer, attn_layer = _ex_tuple(layer_cfg, 'act_layer', 'attn_layer') + layer_args = _na_args(layer_cfg) + mid_chs = make_divisible(out_chs * bottle_ratio) + groups = num_groups(group_size, mid_chs) + + if in_chs != out_chs or stride != 1 or dilation[0] != dilation[1]: + self.shortcut = create_downsample( + downsample, in_chs=in_chs, out_chs=out_chs, stride=stride, dilation=dilation[0], + apply_act=False, **layer_args) + else: + self.shortcut = nn.Identity() + + self.conv1_kxk = ConvBnAct(in_chs, mid_chs, kernel_size, stride=stride, dilation=dilation[0], **layer_args) + self.conv2_kxk = ConvBnAct( + mid_chs, out_chs, kernel_size, dilation=dilation[1], groups=groups, + drop_block=drop_block, apply_act=False, **layer_args) + self.attn = nn.Identity() if attn_layer is None else attn_layer(out_chs) + self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity() + self.act = nn.Identity() if linear_out else act_layer(inplace=True) + + def init_weights(self, zero_init_last_bn=False): + if zero_init_last_bn: + nn.init.zeros_(self.conv2_kxk.bn.weight) + + def forward(self, x): + shortcut = self.shortcut(x) + + # residual path + x = self.conv1_kxk(x) + x = self.conv2_kxk(x) + x = self.attn(x) + x = self.drop_path(x) + + x = self.act(x + shortcut) + return x + + +class BottleneckBlock(nn.Module): + """ ResNet-like Bottleneck Block - 1x1 - kxk - 1x1 + """ + + def __init__(self, in_chs, out_chs, kernel_size=3, stride=1, dilation=(1, 1), bottle_ratio=1., group_size=None, + downsample='avg', linear_out=False, layer_cfg=None, drop_block=None, drop_path_rate=0.): + super(BottleneckBlock, self).__init__() + layer_cfg = layer_cfg or {} + act_layer, attn_layer = _ex_tuple(layer_cfg, 'act_layer', 'attn_layer') + layer_args = _na_args(layer_cfg) + mid_chs = make_divisible(out_chs * bottle_ratio) + groups = num_groups(group_size, mid_chs) + + if in_chs != out_chs or stride != 1 or dilation[0] != dilation[1]: + self.shortcut = create_downsample( + downsample, in_chs=in_chs, out_chs=out_chs, stride=stride, dilation=dilation[0], + apply_act=False, **layer_args) + else: + self.shortcut = nn.Identity() + + self.conv1_1x1 = ConvBnAct(in_chs, mid_chs, 1, **layer_args) + self.conv2_kxk = ConvBnAct( + mid_chs, mid_chs, kernel_size, stride=stride, dilation=dilation[0], + groups=groups, drop_block=drop_block, **layer_args) + self.attn = nn.Identity() if attn_layer is None else attn_layer(mid_chs) + self.conv3_1x1 = ConvBnAct(mid_chs, out_chs, 1, apply_act=False, **layer_args) + self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity() + self.act = nn.Identity() if linear_out else act_layer(inplace=True) + + def init_weights(self, zero_init_last_bn=False): + if zero_init_last_bn: + nn.init.zeros_(self.conv3_1x1.bn.weight) + + def forward(self, x): + shortcut = self.shortcut(x) + + x = self.conv1_1x1(x) + x = self.conv2_kxk(x) + x = self.attn(x) + x = self.conv3_1x1(x) + x = self.drop_path(x) + + x = self.act(x + shortcut) + return x + + +class DarkBlock(nn.Module): + """ DarkNet-like (1x1 + 3x3 w/ stride) block + + The GE-Net impl included a 1x1 + 3x3 block in their search space. It was not used in the feature models. + This block is pretty much a DarkNet block (also DenseNet) hence the name. Neither DarkNet or DenseNet + uses strides within the block (external 3x3 or maxpool downsampling is done in front of the block repeats). + + If one does want to use a lot of these blocks w/ stride, I'd recommend using the EdgeBlock (3x3 /w stride + 1x1) + for more optimal compute. + """ + + def __init__(self, in_chs, out_chs, kernel_size=3, stride=1, dilation=(1, 1), bottle_ratio=1.0, group_size=None, + downsample='avg', linear_out=False, layer_cfg=None, drop_block=None, drop_path_rate=0.): + super(DarkBlock, self).__init__() + layer_cfg = layer_cfg or {} + act_layer, attn_layer = _ex_tuple(layer_cfg, 'act_layer', 'attn_layer') + layer_args = _na_args(layer_cfg) + mid_chs = make_divisible(out_chs * bottle_ratio) + groups = num_groups(group_size, mid_chs) + + if in_chs != out_chs or stride != 1 or dilation[0] != dilation[1]: + self.shortcut = create_downsample( + downsample, in_chs=in_chs, out_chs=out_chs, stride=stride, dilation=dilation[0], + apply_act=False, **layer_args) + else: + self.shortcut = nn.Identity() + + self.conv1_1x1 = ConvBnAct(in_chs, mid_chs, 1, **layer_args) + self.conv2_kxk = ConvBnAct( + mid_chs, out_chs, kernel_size, stride=stride, dilation=dilation[0], + groups=groups, drop_block=drop_block, apply_act=False, **layer_args) + self.attn = nn.Identity() if attn_layer is None else attn_layer(out_chs) + self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity() + self.act = nn.Identity() if linear_out else act_layer(inplace=True) + + def init_weights(self, zero_init_last_bn=False): + if zero_init_last_bn: + nn.init.zeros_(self.conv2_kxk.bn.weight) + + def forward(self, x): + shortcut = self.shortcut(x) + + x = self.conv1_1x1(x) + x = self.conv2_kxk(x) + x = self.attn(x) + x = self.drop_path(x) + x = self.act(x + shortcut) + return x + + +class EdgeBlock(nn.Module): + """ EdgeResidual-like (3x3 + 1x1) block + + A two layer block like DarkBlock, but with the order of the 3x3 and 1x1 convs reversed. + Very similar to the EfficientNet Edge-Residual block but this block it ends with activations, is + intended to be used with either expansion or bottleneck contraction, and can use DW/group/non-grouped convs. + + FIXME is there a more common 3x3 + 1x1 conv block to name this after? + """ + + def __init__(self, in_chs, out_chs, kernel_size=3, stride=1, dilation=(1, 1), bottle_ratio=1.0, group_size=None, + downsample='avg', linear_out=False, layer_cfg=None, drop_block=None, drop_path_rate=0.): + super(EdgeBlock, self).__init__() + layer_cfg = layer_cfg or {} + act_layer, attn_layer = _ex_tuple(layer_cfg, 'act_layer', 'attn_layer') + layer_args = _na_args(layer_cfg) + mid_chs = make_divisible(out_chs * bottle_ratio) + groups = num_groups(group_size, mid_chs) + + if in_chs != out_chs or stride != 1 or dilation[0] != dilation[1]: + self.shortcut = create_downsample( + downsample, in_chs=in_chs, out_chs=out_chs, stride=stride, dilation=dilation[0], + apply_act=False, **layer_args) + else: + self.shortcut = nn.Identity() + + self.conv1_kxk = ConvBnAct( + in_chs, mid_chs, kernel_size, stride=stride, dilation=dilation[0], + groups=groups, drop_block=drop_block, **layer_args) + self.attn = nn.Identity() if attn_layer is None else attn_layer(out_chs) + self.conv2_1x1 = ConvBnAct(mid_chs, out_chs, 1, apply_act=False, **layer_args) + self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity() + self.act = nn.Identity() if linear_out else act_layer(inplace=True) + + def init_weights(self, zero_init_last_bn=False): + if zero_init_last_bn: + nn.init.zeros_(self.conv2_1x1.bn.weight) + + def forward(self, x): + shortcut = self.shortcut(x) + + x = self.conv1_kxk(x) + x = self.attn(x) + x = self.conv2_1x1(x) + x = self.drop_path(x) + x = self.act(x + shortcut) + return x + + +class RepVggBlock(nn.Module): + """ RepVGG Block. + + Adapted from impl at https://github.com/DingXiaoH/RepVGG + + This version does not currently support the deploy optimization. It is currently fixed in 'train' model. + """ + + def __init__(self, in_chs, out_chs, kernel_size=3, stride=1, dilation=(1, 1), bottle_ratio=1.0, group_size=None, + downsample='', layer_cfg=None, drop_block=None, drop_path_rate=0.): + super(RepVggBlock, self).__init__() + layer_cfg = layer_cfg or {} + act_layer, norm_layer, attn_layer = _ex_tuple(layer_cfg, 'act_layer', 'norm_layer', 'attn_layer') + norm_layer = convert_norm_act(norm_layer=norm_layer, act_layer=act_layer) + layer_args = _na_args(layer_cfg) + groups = num_groups(group_size, in_chs) + + use_ident = in_chs == out_chs and stride == 1 and dilation[0] == dilation[1] + self.identity = norm_layer(out_chs, apply_act=False) if use_ident else None + self.conv_kxk = ConvBnAct( + in_chs, out_chs, kernel_size, stride=stride, dilation=dilation[0], + groups=groups, drop_block=drop_block, apply_act=False, **layer_args) + self.conv_1x1 = ConvBnAct(in_chs, out_chs, 1, stride=stride, groups=groups, apply_act=False, **layer_args) + self.attn = None if attn_layer is None else attn_layer(out_chs) + self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity() + self.act = act_layer(inplace=True) + + def init_weights(self, zero_init_last_bn=False): + # NOTE this init overrides that base model init with specific changes for the block type + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d): + nn.init.normal_(m.weight, .1, .1) + nn.init.normal_(m.bias, 0, .1) + + def forward(self, x): + if self.identity is None: + identity = 0 + else: + identity = self.identity(x) + x = self.conv_1x1(x) + self.conv_kxk(x) + if self.attn is not None: + x = self.attn(x) + x = self.drop_path(x) + x = self.act(x + identity) + return x + + +_block_registry = dict( + basic=BasicBlock, + bottle=BottleneckBlock, + dark=DarkBlock, + edge=EdgeBlock, + rep=RepVggBlock, +) + + +def register_block(block_type:str, block_fn: nn.Module): + _block_registry[block_type] = block_fn + + +def create_block(block: Union[str, nn.Module], **kwargs): + if isinstance(block, (nn.Module, partial)): + return block(**kwargs) + assert block in _block_registry, f'Unknown block type ({block}' + return _block_registry[block](**kwargs) + + +def create_stem(in_chs, out_chs, stem_type='', layer_cfg=None): + layer_cfg = layer_cfg or {} + layer_args = _na_args(layer_cfg) + assert stem_type in ('', 'deep', 'deep_tiered', '3x3', '7x7', 'rep') + if 'deep' in stem_type: + # 3 deep 3x3 conv stack + stem = OrderedDict() + stem_chs = (out_chs // 2, out_chs // 2) + if 'tiered' in stem_type: + stem_chs = (3 * stem_chs[0] // 4, stem_chs[1]) + norm_layer, act_layer = _ex_tuple(layer_args, 'norm_layer', 'act_layer') + stem['conv1'] = create_conv2d(in_chs, stem_chs[0], kernel_size=3, stride=2) + stem['conv2'] = create_conv2d(stem_chs[0], stem_chs[1], kernel_size=3, stride=1) + stem['conv3'] = create_conv2d(stem_chs[1], out_chs, kernel_size=3, stride=1) + norm_act_layer = convert_norm_act(norm_layer=norm_layer, act_layer=act_layer) + stem['na'] = norm_act_layer(out_chs) + stem = nn.Sequential(stem) + elif '7x7' in stem_type: + # 7x7 stem conv as in ResNet + stem = ConvBnAct(in_chs, out_chs, 7, stride=2, **layer_args) + elif 'rep' in stem_type: + stem = RepVggBlock(in_chs, out_chs, stride=2, layer_cfg=layer_cfg) + else: + # 3x3 stem conv as in RegNet + stem = ConvBnAct(in_chs, out_chs, 3, stride=2, **layer_args) + + return stem + + +class ByobNet(nn.Module): + """ 'Bring-your-own-blocks' Net + + A flexible network backbone that allows building model stem + blocks via + dataclass cfg definition w/ factory functions for module instantiation. + + Current assumption is that both stem and blocks are in conv-bn-act order (w/ block ending in act). + """ + def __init__(self, cfg: ByobCfg, num_classes=1000, in_chans=3, global_pool='avg', output_stride=32, + zero_init_last_bn=True, drop_rate=0., drop_path_rate=0.): + super().__init__() + self.num_classes = num_classes + self.drop_rate = drop_rate + norm_layer = cfg.norm_layer + act_layer = get_act_layer(cfg.act_layer) + attn_layer = partial(get_attn(cfg.attn_layer), **cfg.attn_kwargs) if cfg.attn_layer else None + layer_cfg = dict(norm_layer=norm_layer, act_layer=act_layer, attn_layer=attn_layer) + + stem_chs = int(round((cfg.stem_chs or cfg.blocks[0].c) * cfg.width_factor)) + self.stem = create_stem(in_chans, stem_chs, cfg.stem_type, layer_cfg=layer_cfg) + + self.feature_info = [] + depths = [bc.d for bc in cfg.blocks] + dpr = [x.tolist() for x in torch.linspace(0, drop_path_rate, sum(depths)).split(depths)] + prev_name = 'stem' + prev_chs = stem_chs + net_stride = 2 + dilation = 1 + stages = [] + for stage_idx, block_cfg in enumerate(cfg.blocks): + stride = block_cfg.s + if stride != 1: + self.feature_info.append(dict(num_chs=prev_chs, reduction=net_stride, module=prev_name)) + if net_stride >= output_stride and stride > 1: + dilation *= stride + stride = 1 + net_stride *= stride + first_dilation = 1 if dilation in (1, 2) else 2 + + blocks = [] + for block_idx in range(block_cfg.d): + out_chs = make_divisible(block_cfg.c * cfg.width_factor) + group_size = block_cfg.gs + if isinstance(group_size, Callable): + group_size = group_size(out_chs, block_idx) + block_kwargs = dict( # Blocks used in this model must accept these arguments + in_chs=prev_chs, + out_chs=out_chs, + stride=stride if block_idx == 0 else 1, + dilation=(first_dilation, dilation), + group_size=group_size, + bottle_ratio=block_cfg.br, + downsample=cfg.downsample, + drop_path_rate=dpr[stage_idx][block_idx], + layer_cfg=layer_cfg, + ) + blocks += [create_block(block_cfg.type, **block_kwargs)] + first_dilation = dilation + prev_chs = out_chs + stages += [nn.Sequential(*blocks)] + prev_name = f'stages.{stage_idx}' + self.stages = nn.Sequential(*stages) + + if cfg.num_features: + self.num_features = int(round(cfg.width_factor * cfg.num_features)) + self.final_conv = ConvBnAct(prev_chs, self.num_features, 1, **_na_args(layer_cfg)) + else: + self.num_features = prev_chs + self.final_conv = nn.Identity() + self.feature_info += [dict(num_chs=self.num_features, reduction=net_stride, module='final_conv')] + + self.head = ClassifierHead(self.num_features, num_classes, pool_type=global_pool, drop_rate=self.drop_rate) + + for n, m in self.named_modules(): + if isinstance(m, nn.Conv2d): + fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + fan_out //= m.groups + m.weight.data.normal_(0, math.sqrt(2.0 / fan_out)) + if m.bias is not None: + m.bias.data.zero_() + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, mean=0.0, std=0.01) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + for m in self.modules(): + # call each block's weight init for block-specific overrides to init above + if hasattr(m, 'init_weights'): + m.init_weights(zero_init_last_bn=zero_init_last_bn) + + def get_classifier(self): + return self.head.fc + + def reset_classifier(self, num_classes, global_pool='avg'): + self.head = ClassifierHead(self.num_features, num_classes, pool_type=global_pool, drop_rate=self.drop_rate) + + def forward_features(self, x): + x = self.stem(x) + x = self.stages(x) + x = self.final_conv(x) + return x + + def forward(self, x): + x = self.forward_features(x) + x = self.head(x) + return x + + +def _create_byobnet(variant, pretrained=False, **kwargs): + return build_model_with_cfg( + ByobNet, variant, pretrained, + default_cfg=default_cfgs[variant], + model_cfg=model_cfgs[variant], + feature_cfg=dict(flatten_sequential=True), + **kwargs) + + +@register_model +def gernet_l(pretrained=False, **kwargs): + return _create_byobnet('gernet_l', pretrained=pretrained, **kwargs) + + +@register_model +def gernet_m(pretrained=False, **kwargs): + return _create_byobnet('gernet_m', pretrained=pretrained, **kwargs) + + +@register_model +def gernet_s(pretrained=False, **kwargs): + return _create_byobnet('gernet_s', pretrained=pretrained, **kwargs) + + +@register_model +def repvgg_a2(pretrained=False, **kwargs): + return _create_byobnet('repvgg_a2', pretrained=pretrained, **kwargs) + + +@register_model +def repvgg_b0(pretrained=False, **kwargs): + return _create_byobnet('repvgg_b0', pretrained=pretrained, **kwargs) + + +@register_model +def repvgg_b1(pretrained=False, **kwargs): + return _create_byobnet('repvgg_b1', pretrained=pretrained, **kwargs) + + +@register_model +def repvgg_b1g4(pretrained=False, **kwargs): + return _create_byobnet('repvgg_b1g4', pretrained=pretrained, **kwargs) + + +@register_model +def repvgg_b2(pretrained=False, **kwargs): + return _create_byobnet('repvgg_b2', pretrained=pretrained, **kwargs) + + +@register_model +def repvgg_b2g4(pretrained=False, **kwargs): + return _create_byobnet('repvgg_b2g4', pretrained=pretrained, **kwargs) + + +@register_model +def repvgg_b3(pretrained=False, **kwargs): + return _create_byobnet('repvgg_b3', pretrained=pretrained, **kwargs) + + +@register_model +def repvgg_b3g4(pretrained=False, **kwargs): + return _create_byobnet('repvgg_b3g4', pretrained=pretrained, **kwargs) diff --git a/timm/models/nfnet.py b/timm/models/nfnet.py index 74c4fe33..c56c5780 100644 --- a/timm/models/nfnet.py +++ b/timm/models/nfnet.py @@ -395,8 +395,11 @@ def _create_normfreenet(variant, pretrained=False, **kwargs): feature_cfg['out_indices'] = (1, 2, 3, 4) # no stride 2, 0 level feat for stride 4 maxpool stems in ResNet return build_model_with_cfg( - NormalizerFreeNet, variant, pretrained, model_cfg=model_cfg, default_cfg=default_cfgs[variant], - feature_cfg=feature_cfg, **kwargs) + NormalizerFreeNet, variant, pretrained, + default_cfg=default_cfgs[variant], + model_cfg=model_cfg, + feature_cfg=feature_cfg, + **kwargs) @register_model diff --git a/timm/models/vgg.py b/timm/models/vgg.py new file mode 100644 index 00000000..ceede650 --- /dev/null +++ b/timm/models/vgg.py @@ -0,0 +1,261 @@ +"""VGG + +Adapted from https://github.com/pytorch/vision 'vgg.py' (BSD-3-Clause) with a few changes for +timm functionality. + +Copyright 2021 Ross Wightman +""" +import torch +import torch.nn as nn +import torch.nn.functional as F +from typing import Union, List, Dict, Any, cast + +from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD +from .helpers import build_model_with_cfg +from .layers import ClassifierHead, ConvBnAct +from .registry import register_model + +__all__ = [ + 'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', + 'vgg19_bn', 'vgg19', +] + + +def _cfg(url='', **kwargs): + return { + 'url': url, + 'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': (1, 1), + 'crop_pct': 0.875, 'interpolation': 'bilinear', + 'mean': IMAGENET_DEFAULT_MEAN, 'std': IMAGENET_DEFAULT_STD, + 'first_conv': 'features.0', 'classifier': 'head.fc', + **kwargs + } + + +default_cfgs = { + 'vgg11': _cfg(url='https://download.pytorch.org/models/vgg11-bbd30ac9.pth'), + 'vgg13': _cfg(url='https://download.pytorch.org/models/vgg13-c768596a.pth'), + 'vgg16': _cfg(url='https://download.pytorch.org/models/vgg16-397923af.pth'), + 'vgg19': _cfg(url='https://download.pytorch.org/models/vgg19-dcbb9e9d.pth'), + 'vgg11_bn': _cfg(url='https://download.pytorch.org/models/vgg11_bn-6002323d.pth'), + 'vgg13_bn': _cfg(url='https://download.pytorch.org/models/vgg13_bn-abd245e5.pth'), + 'vgg16_bn': _cfg(url='https://download.pytorch.org/models/vgg16_bn-6c64b313.pth'), + 'vgg19_bn': _cfg(url='https://download.pytorch.org/models/vgg19_bn-c79401a0.pth'), +} + + +cfgs: Dict[str, List[Union[str, int]]] = { + 'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], + 'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], + 'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], + 'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], +} + + +class ConvMlp(nn.Module): + + def __init__(self, in_features=512, out_features=4096, kernel_size=7, mlp_ratio=1.0, + drop_rate: float = 0.2, act_layer: nn.Module = None, conv_layer: nn.Module = None): + super(ConvMlp, self).__init__() + self.input_kernel_size = kernel_size + mid_features = int(out_features * mlp_ratio) + self.fc1 = conv_layer(in_features, mid_features, kernel_size, bias=True) + self.act1 = act_layer(True) + self.drop = nn.Dropout(drop_rate) + self.fc2 = conv_layer(mid_features, out_features, 1, bias=True) + self.act2 = act_layer(True) + + def forward(self, x): + if x.shape[-2] < self.input_kernel_size or x.shape[-1] < self.input_kernel_size: + # keep the input size >= 7x7 + output_size = (max(self.input_kernel_size, x.shape[-2]), max(self.input_kernel_size, x.shape[-1])) + x = F.adaptive_avg_pool2d(x, output_size) + x = self.fc1(x) + x = self.act1(x) + x = self.drop(x) + x = self.fc2(x) + x = self.act2(x) + return x + + +class VGG(nn.Module): + + def __init__( + self, + cfg: List[Any], + num_classes: int = 1000, + in_chans: int = 3, + output_stride: int = 32, + mlp_ratio: float = 1.0, + act_layer: nn.Module = nn.ReLU, + conv_layer: nn.Module = nn.Conv2d, + norm_layer: nn.Module = None, + global_pool: str = 'avg', + drop_rate: float = 0., + ) -> None: + super(VGG, self).__init__() + assert output_stride == 32 + self.num_classes = num_classes + self.num_features = 4096 + self.drop_rate = drop_rate + self.feature_info = [] + prev_chs = in_chans + net_stride = 1 + pool_layer = nn.MaxPool2d + layers: List[nn.Module] = [] + for v in cfg: + last_idx = len(layers) - 1 + if v == 'M': + self.feature_info.append(dict(num_chs=prev_chs, reduction=net_stride, module=f'features.{last_idx}')) + layers += [pool_layer(kernel_size=2, stride=2)] + net_stride *= 2 + else: + v = cast(int, v) + conv2d = conv_layer(prev_chs, v, kernel_size=3, padding=1) + if norm_layer is not None: + layers += [conv2d, norm_layer(v), act_layer(inplace=True)] + else: + layers += [conv2d, act_layer(inplace=True)] + prev_chs = v + self.features = nn.Sequential(*layers) + self.feature_info.append(dict(num_chs=prev_chs, reduction=net_stride, module=f'features.{len(layers) - 1}')) + self.pre_logits = ConvMlp( + prev_chs, self.num_features, 7, mlp_ratio=mlp_ratio, + drop_rate=drop_rate, act_layer=act_layer, conv_layer=conv_layer) + self.head = ClassifierHead( + self.num_features, num_classes, pool_type=global_pool, drop_rate=drop_rate) + + self._initialize_weights() + + def get_classifier(self): + return self.head.fc + + def reset_classifier(self, num_classes, global_pool='avg'): + self.num_classes = num_classes + self.head = ClassifierHead( + self.num_features, self.num_classes, pool_type=global_pool, drop_rate=self.drop_rate) + + def forward_features(self, x: torch.Tensor) -> torch.Tensor: + x = self.features(x) + x = self.pre_logits(x) + return x + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = self.forward_features(x) + x = self.head(x) + return x + + def _initialize_weights(self) -> None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.constant_(m.bias, 0) + + +def _filter_fn(state_dict): + """ convert patch embedding weight from manual patchify + linear proj to conv""" + out_dict = {} + for k, v in state_dict.items(): + k_r = k + k_r = k_r.replace('classifier.0', 'pre_logits.fc1') + k_r = k_r.replace('classifier.3', 'pre_logits.fc2') + k_r = k_r.replace('classifier.6', 'head.fc') + if 'classifier.0.weight' in k: + v = v.reshape(-1, 512, 7, 7) + if 'classifier.3.weight' in k: + v = v.reshape(-1, 4096, 1, 1) + out_dict[k_r] = v + return out_dict + + +def _create_vgg(variant: str, pretrained: bool, **kwargs: Any) -> VGG: + cfg = variant.split('_')[0] + # NOTE: VGG is one of the only models with stride==1 features, so indices are offset from other models + out_indices = kwargs.get('out_indices', (0, 1, 2, 3, 4, 5)) + model = build_model_with_cfg( + VGG, variant, pretrained=pretrained, + model_cfg=cfgs[cfg], + default_cfg=default_cfgs[variant], + feature_cfg=dict(flatten_sequential=True, out_indices=out_indices), + pretrained_filter_fn=_filter_fn, + **kwargs) + return model + + +@register_model +def vgg11(pretrained: bool = False, **kwargs: Any) -> VGG: + r"""VGG 11-layer model (configuration "A") from + `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `._ + """ + model_args = dict(**kwargs) + return _create_vgg('vgg11', pretrained=pretrained, **model_args) + + +@register_model +def vgg11_bn(pretrained: bool = False, **kwargs: Any) -> VGG: + r"""VGG 11-layer model (configuration "A") with batch normalization + `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `._ + """ + model_args = dict(norm_layer=nn.BatchNorm2d, **kwargs) + return _create_vgg('vgg11_bn', pretrained=pretrained, **model_args) + + +@register_model +def vgg13(pretrained: bool = False, **kwargs: Any) -> VGG: + r"""VGG 13-layer model (configuration "B") + `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `._ + """ + model_args = dict(**kwargs) + return _create_vgg('vgg13', pretrained=pretrained, **model_args) + + +@register_model +def vgg13_bn(pretrained: bool = False, **kwargs: Any) -> VGG: + r"""VGG 13-layer model (configuration "B") with batch normalization + `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `._ + """ + model_args = dict(norm_layer=nn.BatchNorm2d, **kwargs) + return _create_vgg('vgg13_bn', pretrained=pretrained, **model_args) + + +@register_model +def vgg16(pretrained: bool = False, **kwargs: Any) -> VGG: + r"""VGG 16-layer model (configuration "D") + `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `._ + """ + model_args = dict(**kwargs) + return _create_vgg('vgg16', pretrained=pretrained, **model_args) + + +@register_model +def vgg16_bn(pretrained: bool = False, **kwargs: Any) -> VGG: + r"""VGG 16-layer model (configuration "D") with batch normalization + `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `._ + """ + model_args = dict(norm_layer=nn.BatchNorm2d, **kwargs) + return _create_vgg('vgg16_bn', pretrained=pretrained, **model_args) + + +@register_model +def vgg19(pretrained: bool = False, **kwargs: Any) -> VGG: + r"""VGG 19-layer model (configuration "E") + `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `._ + """ + model_args = dict(**kwargs) + return _create_vgg('vgg19', pretrained=pretrained, **model_args) + + +@register_model +def vgg19_bn(pretrained: bool = False, **kwargs: Any) -> VGG: + r"""VGG 19-layer model (configuration 'E') with batch normalization + `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `._ + """ + model_args = dict(norm_layer=nn.BatchNorm2d, **kwargs) + return _create_vgg('vgg19_bn', pretrained=pretrained, **model_args) \ No newline at end of file From b4e216e377cd748cafc3fd24722bbece9779d1af Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Tue, 9 Feb 2021 17:33:43 -0800 Subject: [PATCH 4/7] Fix a few small things. --- timm/models/helpers.py | 2 +- timm/models/inception_v3.py | 4 ++-- validate.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/timm/models/helpers.py b/timm/models/helpers.py index d56cdc57..33744eb5 100644 --- a/timm/models/helpers.py +++ b/timm/models/helpers.py @@ -185,7 +185,7 @@ def load_pretrained(model, cfg=None, num_classes=1000, in_chans=3, filter_fn=Non state_dict = filter_fn(state_dict) input_convs = cfg.get('first_conv', None) - if input_convs is not None: + if input_convs is not None and in_chans != 3: if isinstance(input_convs, str): input_convs = (input_convs,) for input_conv_name in input_convs: diff --git a/timm/models/inception_v3.py b/timm/models/inception_v3.py index 9ae7105f..cdb1f1c0 100644 --- a/timm/models/inception_v3.py +++ b/timm/models/inception_v3.py @@ -32,12 +32,12 @@ default_cfgs = { # my port of Tensorflow SLIM weights (http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gz) 'tf_inception_v3': _cfg( url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_inception_v3-e0069de4.pth', - num_classes=1001, has_aux=False), + num_classes=1000, has_aux=False, label_offset=1), # my port of Tensorflow adversarially trained Inception V3 from # http://download.tensorflow.org/models/adv_inception_v3_2017_08_18.tar.gz 'adv_inception_v3': _cfg( url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/adv_inception_v3-9e27bd63.pth', - num_classes=1001, has_aux=False), + num_classes=1000, has_aux=False, label_offset=1), # from gluon pretrained models, best performing in terms of accuracy/loss metrics # https://gluon-cv.mxnet.io/model_zoo/classification.html 'gluon_inception_v3': _cfg( diff --git a/validate.py b/validate.py index 8ad9cb1f..83f66fa5 100755 --- a/validate.py +++ b/validate.py @@ -284,7 +284,7 @@ def main(): if args.model == 'all': # validate all models in a list of names with pretrained checkpoints args.pretrained = True - model_names = list_models(pretrained=True) + model_names = list_models(pretrained=True, exclude_filters=['*in21k']) model_cfgs = [(n, '') for n in model_names] elif not is_model(args.model): # model name doesn't exist, try as wildcard filter From 0356e773f5405eea1032e5c4c0be528128e5684e Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Wed, 10 Feb 2021 14:31:18 -0800 Subject: [PATCH 5/7] Default to native PyTorch AMP instead of APEX amp. Too many APEX issues cropping up lately. --- timm/models/helpers.py | 2 +- train.py | 8 ++++---- validate.py | 13 +++++++++---- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/timm/models/helpers.py b/timm/models/helpers.py index 33744eb5..d9b501da 100644 --- a/timm/models/helpers.py +++ b/timm/models/helpers.py @@ -177,7 +177,7 @@ def load_pretrained(model, cfg=None, num_classes=1000, in_chans=3, filter_fn=Non if cfg is None: cfg = getattr(model, 'default_cfg') if cfg is None or 'url' not in cfg or not cfg['url']: - _logger.warning("Pretrained model URL does not exist, using random initialization.") + _logger.warning("No pretrained weights exist for this model. Using random initialization.") return state_dict = load_state_dict_from_url(cfg['url'], progress=progress, map_location='cpu') diff --git a/train.py b/train.py index f0fcd2af..2f9c3744 100755 --- a/train.py +++ b/train.py @@ -310,11 +310,11 @@ def main(): # resolve AMP arguments based on PyTorch / Apex availability use_amp = None if args.amp: - # for backwards compat, `--amp` arg tries apex before native amp - if has_apex: - args.apex_amp = True - elif has_native_amp: + # `--amp` chooses native amp before apex (APEX ver not actively maintained) + if has_native_amp: args.native_amp = True + elif has_apex: + args.apex_amp = True if args.apex_amp and has_apex: use_amp = 'apex' elif args.native_amp and has_native_amp: diff --git a/validate.py b/validate.py index 83f66fa5..ca69df08 100755 --- a/validate.py +++ b/validate.py @@ -116,15 +116,20 @@ def validate(args): args.prefetcher = not args.no_prefetcher amp_autocast = suppress # do nothing if args.amp: - if has_apex: - args.apex_amp = True - elif has_native_amp: + if has_native_amp: args.native_amp = True + elif has_apex: + args.apex_amp = True else: - _logger.warning("Neither APEX or Native Torch AMP is available, using FP32.") + _logger.warning("Neither APEX or Native Torch AMP is available.") assert not args.apex_amp or not args.native_amp, "Only one AMP mode should be set." if args.native_amp: amp_autocast = torch.cuda.amp.autocast + _logger.info('Validating in mixed precision with native PyTorch AMP.') + elif args.apex_amp: + _logger.info('Validating in mixed precision with NVIDIA APEX AMP.') + else: + _logger.info('Validating in float32. AMP not enabled.') if args.legacy_jit: set_jit_legacy() From 6853b07bbdaacaffaa23613399cdaf823621d688 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Wed, 10 Feb 2021 14:40:29 -0800 Subject: [PATCH 6/7] Improve RegVGG block identity/vs non for clariy and fix attn usage. Add comments. --- timm/models/byobnet.py | 52 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 10 deletions(-) diff --git a/timm/models/byobnet.py b/timm/models/byobnet.py index 680edd22..c5ccb70b 100644 --- a/timm/models/byobnet.py +++ b/timm/models/byobnet.py @@ -32,7 +32,6 @@ from functools import partial import torch import torch.nn as nn -import torch.nn.functional as F from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD from .helpers import build_model_with_cfg @@ -443,7 +442,7 @@ class RepVggBlock(nn.Module): Adapted from impl at https://github.com/DingXiaoH/RepVGG - This version does not currently support the deploy optimization. It is currently fixed in 'train' model. + This version does not currently support the deploy optimization. It is currently fixed in 'train' mode. """ def __init__(self, in_chs, out_chs, kernel_size=3, stride=1, dilation=(1, 1), bottle_ratio=1.0, group_size=None, @@ -461,8 +460,8 @@ class RepVggBlock(nn.Module): in_chs, out_chs, kernel_size, stride=stride, dilation=dilation[0], groups=groups, drop_block=drop_block, apply_act=False, **layer_args) self.conv_1x1 = ConvBnAct(in_chs, out_chs, 1, stride=stride, groups=groups, apply_act=False, **layer_args) - self.attn = None if attn_layer is None else attn_layer(out_chs) - self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity() + self.attn = nn.Identity() if attn_layer is None else attn_layer(out_chs) + self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. and use_ident else nn.Identity() self.act = act_layer(inplace=True) def init_weights(self, zero_init_last_bn=False): @@ -474,14 +473,14 @@ class RepVggBlock(nn.Module): def forward(self, x): if self.identity is None: - identity = 0 + x = self.conv_1x1(x) + self.conv_kxk(x) else: identity = self.identity(x) - x = self.conv_1x1(x) + self.conv_kxk(x) - if self.attn is not None: - x = self.attn(x) - x = self.drop_path(x) - x = self.act(x + identity) + x = self.conv_1x1(x) + self.conv_kxk(x) + x = self.drop_path(x) # not in the paper / official impl, experimental + x = x + identity + x = self.attn(x) # no attn in the paper / official impl, experimental + x = self.act(x) return x @@ -654,54 +653,87 @@ def _create_byobnet(variant, pretrained=False, **kwargs): @register_model def gernet_l(pretrained=False, **kwargs): + """ GEResNet-Large (GENet-Large from official impl) + `Neural Architecture Design for GPU-Efficient Networks` - https://arxiv.org/abs/2006.14090 + """ return _create_byobnet('gernet_l', pretrained=pretrained, **kwargs) @register_model def gernet_m(pretrained=False, **kwargs): + """ GEResNet-Medium (GENet-Normal from official impl) + `Neural Architecture Design for GPU-Efficient Networks` - https://arxiv.org/abs/2006.14090 + """ return _create_byobnet('gernet_m', pretrained=pretrained, **kwargs) @register_model def gernet_s(pretrained=False, **kwargs): + """ EResNet-Small (GENet-Small from official impl) + `Neural Architecture Design for GPU-Efficient Networks` - https://arxiv.org/abs/2006.14090 + """ return _create_byobnet('gernet_s', pretrained=pretrained, **kwargs) @register_model def repvgg_a2(pretrained=False, **kwargs): + """ RepVGG-A2 + `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697 + """ return _create_byobnet('repvgg_a2', pretrained=pretrained, **kwargs) @register_model def repvgg_b0(pretrained=False, **kwargs): + """ RepVGG-B0 + `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697 + """ return _create_byobnet('repvgg_b0', pretrained=pretrained, **kwargs) @register_model def repvgg_b1(pretrained=False, **kwargs): + """ RepVGG-B1 + `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697 + """ return _create_byobnet('repvgg_b1', pretrained=pretrained, **kwargs) @register_model def repvgg_b1g4(pretrained=False, **kwargs): + """ RepVGG-B1g4 + `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697 + """ return _create_byobnet('repvgg_b1g4', pretrained=pretrained, **kwargs) @register_model def repvgg_b2(pretrained=False, **kwargs): + """ RepVGG-B2 + `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697 + """ return _create_byobnet('repvgg_b2', pretrained=pretrained, **kwargs) @register_model def repvgg_b2g4(pretrained=False, **kwargs): + """ RepVGG-B2g4 + `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697 + """ return _create_byobnet('repvgg_b2g4', pretrained=pretrained, **kwargs) @register_model def repvgg_b3(pretrained=False, **kwargs): + """ RepVGG-B3 + `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697 + """ return _create_byobnet('repvgg_b3', pretrained=pretrained, **kwargs) @register_model def repvgg_b3g4(pretrained=False, **kwargs): + """ RepVGG-B3g4 + `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697 + """ return _create_byobnet('repvgg_b3g4', pretrained=pretrained, **kwargs) From ca9b078ac7b5058d6d8d3db0fa6b30916b8fa113 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Wed, 10 Feb 2021 14:46:07 -0800 Subject: [PATCH 7/7] Update README.md and docs. Version bumped to 0.4.3 --- README.md | 50 +++++++++++++++------------------------- docs/archived_changes.md | 24 +++++++++++++++++++ docs/changes.md | 50 ++++++++++++++++++++++++++++++++++++++++ docs/models.md | 12 ++++++++++ docs/scripts.md | 6 ++--- timm/version.py | 2 +- 6 files changed, 108 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 5448a5ad..a5b4b536 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,15 @@ ## What's New +### Feb 10, 2021 +* More model archs, incl a flexible ByobNet backbone ('Bring-your-own-blocks') + * GPU-Efficient-Networks (https://github.com/idstcv/GPU-Efficient-Networks), impl in `byobnet.py` + * RepVGG (https://github.com/DingXiaoH/RepVGG), impl in `byobnet.py` + * classic VGG (from torchvision, impl in `vgg.py`) +* Refinements to normalizer layer arg handling and normalizer+act layer handling in some models +* Default AMP mode changed to native PyTorch AMP instead of APEX. Issues not being fixed with APEX. Native works with `--channels-last` and `--torchscript` model training, APEX does not. +* Fix a few bugs introduced since last pypi release + ### Feb 8, 2021 * Add several ResNet weights with ECA attention. 26t & 50t trained @ 256, test @ 320. 269d train @ 256, fine-tune @320, test @ 352. * `ecaresnet26t` - 79.88 top-1 @ 320x320, 79.08 @ 256x256 @@ -118,30 +127,6 @@ Bunch of changes: * Some import cleanup and classifier reset changes, all models will have classifier reset to nn.Identity on reset_classifer(0) call * Prep for 0.1.28 pip release -### May 12, 2020 -* Add ResNeSt models (code adapted from https://github.com/zhanghang1989/ResNeSt, paper https://arxiv.org/abs/2004.08955)) - -### May 3, 2020 -* Pruned EfficientNet B1, B2, and B3 (https://arxiv.org/abs/2002.08258) contributed by [Yonathan Aflalo](https://github.com/yoniaflalo) - -### May 1, 2020 -* Merged a number of execellent contributions in the ResNet model family over the past month - * BlurPool2D and resnetblur models initiated by [Chris Ha](https://github.com/VRandme), I trained resnetblur50 to 79.3. - * TResNet models and SpaceToDepth, AntiAliasDownsampleLayer layers by [mrT23](https://github.com/mrT23) - * ecaresnet (50d, 101d, light) models and two pruned variants using pruning as per (https://arxiv.org/abs/2002.08258) by [Yonathan Aflalo](https://github.com/yoniaflalo) -* 200 pretrained models in total now with updated results csv in results folder - -### April 5, 2020 -* Add some newly trained MobileNet-V2 models trained with latest h-params, rand augment. They compare quite favourably to EfficientNet-Lite - * 3.5M param MobileNet-V2 100 @ 73% - * 4.5M param MobileNet-V2 110d @ 75% - * 6.1M param MobileNet-V2 140 @ 76.5% - * 5.8M param MobileNet-V2 120d @ 77.3% - -### March 18, 2020 -* Add EfficientNet-Lite models w/ weights ported from [Tensorflow TPU](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite) -* Add RandAugment trained ResNeXt-50 32x4d weights with 79.8 top-1. Trained by [Andrew Lavin](https://github.com/andravin) (see Training section for hparams) - ## Introduction Py**T**orch **Im**age **M**odels (`timm`) is a collection of image models, layers, utilities, optimizers, schedulers, data-loaders / augmentations, and reference training / validation scripts that aim to pull together a wide variety of SOTA models with ability to reproduce ImageNet training results. @@ -150,7 +135,7 @@ The work of many others is present here. I've tried to make sure all source mate ## Models -All model architecture families include variants with pretrained weights. The are variants without any weights. Help training new or better weights is always appreciated. Here are some example [training hparams](https://rwightman.github.io/pytorch-image-models/training_hparam_examples) to get you started. +All model architecture families include variants with pretrained weights. There are specific model variants without any weights, it is NOT a bug. Help training new or better weights is always appreciated. Here are some example [training hparams](https://rwightman.github.io/pytorch-image-models/training_hparam_examples) to get you started. A full version of the list below with source links can be found in the [documentation](https://rwightman.github.io/pytorch-image-models/models/). @@ -170,6 +155,7 @@ A full version of the list below with source links can be found in the [document * MNASNet B1, A1 (Squeeze-Excite), and Small - https://arxiv.org/abs/1807.11626 * MobileNet-V2 - https://arxiv.org/abs/1801.04381 * Single-Path NAS - https://arxiv.org/abs/1904.02877 +* GPU-Efficient Networks - https://arxiv.org/abs/2006.14090 * HRNet - https://arxiv.org/abs/1908.07919 * Inception-V3 - https://arxiv.org/abs/1512.00567 * Inception-ResNet-V2 and Inception-V4 - https://arxiv.org/abs/1602.07261 @@ -178,6 +164,7 @@ A full version of the list below with source links can be found in the [document * NF-RegNet / NF-ResNet - https://arxiv.org/abs/2101.08692 * PNasNet - https://arxiv.org/abs/1712.00559 * RegNet - https://arxiv.org/abs/2003.13678 +* RepVGG - https://arxiv.org/abs/2101.03697 * ResNet/ResNeXt * ResNet (v1b/v1.5) - https://arxiv.org/abs/1512.03385 * ResNeXt - https://arxiv.org/abs/1611.05431 @@ -261,9 +248,10 @@ The root folder of the repository contains reference train, validation, and infe One of the greatest assets of PyTorch is the community and their contributions. A few of my favourite resources that pair well with the models and componenets here are listed below. -### Training / Frameworks -* PyTorch Lightning - https://github.com/PyTorchLightning/pytorch-lightning -* fastai - https://github.com/fastai/fastai +### Object Detection, Instance and Semantic Segmentation +* Detectron2 - https://github.com/facebookresearch/detectron2 +* Segmentation Models (Semantic) - https://github.com/qubvel/segmentation_models.pytorch +* EfficientDet (Obj Det, Semantic soon) - https://github.com/rwightman/efficientdet-pytorch ### Computer Vision / Image Augmentation * Albumentations - https://github.com/albumentations-team/albumentations @@ -276,10 +264,8 @@ One of the greatest assets of PyTorch is the community and their contributions. ### Metric Learning * PyTorch Metric Learning - https://github.com/KevinMusgrave/pytorch-metric-learning -### Object Detection, Instance and Semantic Segmentation -* Detectron2 - https://github.com/facebookresearch/detectron2 -* Segmentation Models (Semantic) - https://github.com/qubvel/segmentation_models.pytorch -* EfficientDet (Obj Det, Semantic soon) - https://github.com/rwightman/efficientdet-pytorch +### Training / Frameworks +* fastai - https://github.com/fastai/fastai ## Licenses diff --git a/docs/archived_changes.md b/docs/archived_changes.md index baad8e01..add51b53 100644 --- a/docs/archived_changes.md +++ b/docs/archived_changes.md @@ -1,5 +1,29 @@ # Archived Changes +### May 12, 2020 +* Add ResNeSt models (code adapted from https://github.com/zhanghang1989/ResNeSt, paper https://arxiv.org/abs/2004.08955)) + +### May 3, 2020 +* Pruned EfficientNet B1, B2, and B3 (https://arxiv.org/abs/2002.08258) contributed by [Yonathan Aflalo](https://github.com/yoniaflalo) + +### May 1, 2020 +* Merged a number of execellent contributions in the ResNet model family over the past month + * BlurPool2D and resnetblur models initiated by [Chris Ha](https://github.com/VRandme), I trained resnetblur50 to 79.3. + * TResNet models and SpaceToDepth, AntiAliasDownsampleLayer layers by [mrT23](https://github.com/mrT23) + * ecaresnet (50d, 101d, light) models and two pruned variants using pruning as per (https://arxiv.org/abs/2002.08258) by [Yonathan Aflalo](https://github.com/yoniaflalo) +* 200 pretrained models in total now with updated results csv in results folder + +### April 5, 2020 +* Add some newly trained MobileNet-V2 models trained with latest h-params, rand augment. They compare quite favourably to EfficientNet-Lite + * 3.5M param MobileNet-V2 100 @ 73% + * 4.5M param MobileNet-V2 110d @ 75% + * 6.1M param MobileNet-V2 140 @ 76.5% + * 5.8M param MobileNet-V2 120d @ 77.3% + +### March 18, 2020 +* Add EfficientNet-Lite models w/ weights ported from [Tensorflow TPU](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite) +* Add RandAugment trained ResNeXt-50 32x4d weights with 79.8 top-1. Trained by [Andrew Lavin](https://github.com/andravin) (see Training section for hparams) + ### April 5, 2020 * Add some newly trained MobileNet-V2 models trained with latest h-params, rand augment. They compare quite favourably to EfficientNet-Lite * 3.5M param MobileNet-V2 100 @ 73% diff --git a/docs/changes.md b/docs/changes.md index 5e696a11..3bdb92d8 100644 --- a/docs/changes.md +++ b/docs/changes.md @@ -1,5 +1,55 @@ # Recent Changes +### Feb 10, 2021 +* More model archs, incl a flexible ByobNet backbone ('Bring-your-own-blocks') + * GPU-Efficient-Networks (https://github.com/idstcv/GPU-Efficient-Networks), impl in `byobnet.py` + * RepVGG (https://github.com/DingXiaoH/RepVGG), impl in `byobnet.py` + * classic VGG (from torchvision, impl in `vgg`) +* Refinements to normalizer layer arg handling and normalizer+act layer handling in some models +* Default AMP mode changed to native PyTorch AMP instead of APEX. Issues not being fixed with APEX. Native works with `--channels-last` and `--torchscript` model training, APEX does not. +* Fix a few bugs introduced since last pypi release + +### Feb 8, 2021 +* Add several ResNet weights with ECA attention. 26t & 50t trained @ 256, test @ 320. 269d train @ 256, fine-tune @320, test @ 352. + * `ecaresnet26t` - 79.88 top-1 @ 320x320, 79.08 @ 256x256 + * `ecaresnet50t` - 82.35 top-1 @ 320x320, 81.52 @ 256x256 + * `ecaresnet269d` - 84.93 top-1 @ 352x352, 84.87 @ 320x320 +* Remove separate tiered (`t`) vs tiered_narrow (`tn`) ResNet model defs, all `tn` changed to `t` and `t` models removed (`seresnext26t_32x4d` only model w/ weights that was removed). +* Support model default_cfgs with separate train vs test resolution `test_input_size` and remove extra `_320` suffix ResNet model defs that were just for test. + +### Jan 30, 2021 +* Add initial "Normalization Free" NF-RegNet-B* and NF-ResNet model definitions based on [paper](https://arxiv.org/abs/2101.08692) + +### Jan 25, 2021 +* Add ResNetV2 Big Transfer (BiT) models w/ ImageNet-1k and 21k weights from https://github.com/google-research/big_transfer +* Add official R50+ViT-B/16 hybrid models + weights from https://github.com/google-research/vision_transformer +* ImageNet-21k ViT weights are added w/ model defs and representation layer (pre logits) support + * NOTE: ImageNet-21k classifier heads were zero'd in original weights, they are only useful for transfer learning +* Add model defs and weights for DeiT Vision Transformer models from https://github.com/facebookresearch/deit +* Refactor dataset classes into ImageDataset/IterableImageDataset + dataset specific parser classes +* Add Tensorflow-Datasets (TFDS) wrapper to allow use of TFDS image classification sets with train script + * Ex: `train.py /data/tfds --dataset tfds/oxford_iiit_pet --val-split test --model resnet50 -b 256 --amp --num-classes 37 --opt adamw --lr 3e-4 --weight-decay .001 --pretrained -j 2` +* Add improved .tar dataset parser that reads images from .tar, folder of .tar files, or .tar within .tar + * Run validation on full ImageNet-21k directly from tar w/ BiT model: `validate.py /data/fall11_whole.tar --model resnetv2_50x1_bitm_in21k --amp` +* Models in this update should be stable w/ possible exception of ViT/BiT, possibility of some regressions with train/val scripts and dataset handling + +### Jan 3, 2021 +* Add SE-ResNet-152D weights + * 256x256 val, 0.94 crop top-1 - 83.75 + * 320x320 val, 1.0 crop - 84.36 +* Update results files + +### Dec 18, 2020 +* Add ResNet-101D, ResNet-152D, and ResNet-200D weights trained @ 256x256 + * 256x256 val, 0.94 crop (top-1) - 101D (82.33), 152D (83.08), 200D (83.25) + * 288x288 val, 1.0 crop - 101D (82.64), 152D (83.48), 200D (83.76) + * 320x320 val, 1.0 crop - 101D (83.00), 152D (83.66), 200D (84.01) + +### Dec 7, 2020 +* Simplify EMA module (ModelEmaV2), compatible with fully torchscripted models +* Misc fixes for SiLU ONNX export, default_cfg missing from Feature extraction models, Linear layer w/ AMP + torchscript +* PyPi release @ 0.3.2 (needed by EfficientDet) + ### Oct 30, 2020 * Test with PyTorch 1.7 and fix a small top-n metric view vs reshape issue. * Convert newly added 224x224 Vision Transformer weights from official JAX repo. 81.8 top-1 for B/16, 83.1 L/16. diff --git a/docs/models.md b/docs/models.md index 5522eb2d..ffae7321 100644 --- a/docs/models.md +++ b/docs/models.md @@ -31,6 +31,10 @@ The validation results for the pretrained weights can be found [here](results.md * My PyTorch code: https://github.com/rwightman/pytorch-dpn-pretrained * Reference code: https://github.com/cypw/DPNs +## GPU-Efficient Networks [[byobnet.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/byobnet.py)] +* Paper: `Neural Architecture Design for GPU-Efficient Networks` - https://arxiv.org/abs/2006.14090 +* Reference code: https://github.com/idstcv/GPU-Efficient-Networks + ## HRNet [[hrnet.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/hrnet.py)] * Paper: `Deep High-Resolution Representation Learning for Visual Recognition` - https://arxiv.org/abs/1908.07919 * Code: https://github.com/HRNet/HRNet-Image-Classification @@ -82,6 +86,10 @@ The validation results for the pretrained weights can be found [here](results.md * Paper: `Designing Network Design Spaces` - https://arxiv.org/abs/2003.13678 * Reference code: https://github.com/facebookresearch/pycls/blob/master/pycls/models/regnet.py +## RepVGG [[byobnet.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/byobnet.py)] +* Paper: `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697 +* Reference code: https://github.com/DingXiaoH/RepVGG + ## ResNet, ResNeXt [[resnet.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/resnet.py)] * ResNet (V1B) @@ -136,6 +144,10 @@ NOTE: I am deprecating this version of the networks, the new ones are part of `r * Paper: `TResNet: High Performance GPU-Dedicated Architecture` - https://arxiv.org/abs/2003.13630 * Code: https://github.com/mrT23/TResNet +## VGG [[vgg.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vgg.py)] +* Paper: `Very Deep Convolutional Networks For Large-Scale Image Recognition` - https://arxiv.org/pdf/1409.1556.pdf +* Reference code: https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py + ## Vision Transformer [[vision_transformer.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py)] * Paper: `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 * Reference code and pretrained weights: https://github.com/google-research/vision_transformer diff --git a/docs/scripts.md b/docs/scripts.md index 48f123af..f48eec0d 100644 --- a/docs/scripts.md +++ b/docs/scripts.md @@ -10,9 +10,9 @@ The variety of training args is large and not all combinations of options (or ev To train an SE-ResNet34 on ImageNet, locally distributed, 4 GPUs, one process per GPU w/ cosine schedule, random-erasing prob of 50% and per-pixel random value: -`./distributed_train.sh 4 /data/imagenet --model seresnet34 --sched cosine --epochs 150 --warmup-epochs 5 --lr 0.4 --reprob 0.5 --remode pixel --batch-size 256 -j 4` +`./distributed_train.sh 4 /data/imagenet --model seresnet34 --sched cosine --epochs 150 --warmup-epochs 5 --lr 0.4 --reprob 0.5 --remode pixel --batch-size 256 --amp -j 4` -NOTE: NVIDIA APEX should be installed to run in per-process distributed via DDP or to enable AMP mixed precision with the --amp flag +NOTE: It is recommended to use PyTorch 1.7+ w/ PyTorch native AMP and DDP instead of APEX AMP. `--amp` defaults to native AMP as of timm ver 0.4.3. `--apex-amp` will force use of APEX components if they are installed. ## Validation / Inference Scripts @@ -24,4 +24,4 @@ To validate with the model's pretrained weights (if they exist): To run inference from a checkpoint: -`python inference.py /imagenet/validation/ --model mobilenetv3_large_100 --checkpoint ./output/model_best.pth.tar` \ No newline at end of file +`python inference.py /imagenet/validation/ --model mobilenetv3_large_100 --checkpoint ./output/train/model_best.pth.tar` \ No newline at end of file diff --git a/timm/version.py b/timm/version.py index a9873473..908c0bb7 100644 --- a/timm/version.py +++ b/timm/version.py @@ -1 +1 @@ -__version__ = '0.4.2' +__version__ = '0.4.3'